diff options
Diffstat (limited to 'arch/parisc/lib')
-rw-r--r-- | arch/parisc/lib/Makefile | 7 | ||||
-rw-r--r-- | arch/parisc/lib/bitops.c | 82 | ||||
-rw-r--r-- | arch/parisc/lib/checksum.c | 149 | ||||
-rw-r--r-- | arch/parisc/lib/fixup.S | 92 | ||||
-rw-r--r-- | arch/parisc/lib/io.c | 488 | ||||
-rw-r--r-- | arch/parisc/lib/iomap.c | 464 | ||||
-rw-r--r-- | arch/parisc/lib/lusercopy.S | 180 | ||||
-rw-r--r-- | arch/parisc/lib/memcpy.c | 506 | ||||
-rw-r--r-- | arch/parisc/lib/memset.c | 91 |
9 files changed, 2059 insertions, 0 deletions
diff --git a/arch/parisc/lib/Makefile b/arch/parisc/lib/Makefile new file mode 100644 index 00000000..5f2e6904 --- /dev/null +++ b/arch/parisc/lib/Makefile @@ -0,0 +1,7 @@ +# +# Makefile for parisc-specific library files +# + +lib-y := lusercopy.o bitops.o checksum.o io.o memset.o fixup.o memcpy.o + +obj-y := iomap.o diff --git a/arch/parisc/lib/bitops.c b/arch/parisc/lib/bitops.c new file mode 100644 index 00000000..18711884 --- /dev/null +++ b/arch/parisc/lib/bitops.c @@ -0,0 +1,82 @@ +/* + * bitops.c: atomic operations which got too long to be inlined all over + * the place. + * + * Copyright 1999 Philipp Rumpf (prumpf@tux.org) + * Copyright 2000 Grant Grundler (grundler@cup.hp.com) + */ + +#include <linux/kernel.h> +#include <linux/spinlock.h> +#include <linux/atomic.h> + +#ifdef CONFIG_SMP +arch_spinlock_t __atomic_hash[ATOMIC_HASH_SIZE] __lock_aligned = { + [0 ... (ATOMIC_HASH_SIZE-1)] = __ARCH_SPIN_LOCK_UNLOCKED +}; +#endif + +#ifdef CONFIG_64BIT +unsigned long __xchg64(unsigned long x, unsigned long *ptr) +{ + unsigned long temp, flags; + + _atomic_spin_lock_irqsave(ptr, flags); + temp = *ptr; + *ptr = x; + _atomic_spin_unlock_irqrestore(ptr, flags); + return temp; +} +#endif + +unsigned long __xchg32(int x, int *ptr) +{ + unsigned long flags; + long temp; + + _atomic_spin_lock_irqsave(ptr, flags); + temp = (long) *ptr; /* XXX - sign extension wanted? */ + *ptr = x; + _atomic_spin_unlock_irqrestore(ptr, flags); + return (unsigned long)temp; +} + + +unsigned long __xchg8(char x, char *ptr) +{ + unsigned long flags; + long temp; + + _atomic_spin_lock_irqsave(ptr, flags); + temp = (long) *ptr; /* XXX - sign extension wanted? */ + *ptr = x; + _atomic_spin_unlock_irqrestore(ptr, flags); + return (unsigned long)temp; +} + + +#ifdef CONFIG_64BIT +unsigned long __cmpxchg_u64(volatile unsigned long *ptr, unsigned long old, unsigned long new) +{ + unsigned long flags; + unsigned long prev; + + _atomic_spin_lock_irqsave(ptr, flags); + if ((prev = *ptr) == old) + *ptr = new; + _atomic_spin_unlock_irqrestore(ptr, flags); + return prev; +} +#endif + +unsigned long __cmpxchg_u32(volatile unsigned int *ptr, unsigned int old, unsigned int new) +{ + unsigned long flags; + unsigned int prev; + + _atomic_spin_lock_irqsave(ptr, flags); + if ((prev = *ptr) == old) + *ptr = new; + _atomic_spin_unlock_irqrestore(ptr, flags); + return (unsigned long)prev; +} diff --git a/arch/parisc/lib/checksum.c b/arch/parisc/lib/checksum.c new file mode 100644 index 00000000..ae66d31f --- /dev/null +++ b/arch/parisc/lib/checksum.c @@ -0,0 +1,149 @@ +/* + * INET An implementation of the TCP/IP protocol suite for the LINUX + * operating system. INET is implemented using the BSD Socket + * interface as the means of communication with the user level. + * + * MIPS specific IP/TCP/UDP checksumming routines + * + * Authors: Ralf Baechle, <ralf@waldorf-gmbh.de> + * Lots of code moved from tcp.c and ip.c; see those files + * for more names. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include <linux/module.h> +#include <linux/types.h> + +#include <net/checksum.h> +#include <asm/byteorder.h> +#include <asm/string.h> +#include <asm/uaccess.h> + +#define addc(_t,_r) \ + __asm__ __volatile__ ( \ +" add %0, %1, %0\n" \ +" addc %0, %%r0, %0\n" \ + : "=r"(_t) \ + : "r"(_r), "0"(_t)); + +static inline unsigned short from32to16(unsigned int x) +{ + /* 32 bits --> 16 bits + carry */ + x = (x & 0xffff) + (x >> 16); + /* 16 bits + carry --> 16 bits including carry */ + x = (x & 0xffff) + (x >> 16); + return (unsigned short)x; +} + +static inline unsigned int do_csum(const unsigned char * buff, int len) +{ + int odd, count; + unsigned int result = 0; + + if (len <= 0) + goto out; + odd = 1 & (unsigned long) buff; + if (odd) { + result = be16_to_cpu(*buff); + len--; + buff++; + } + count = len >> 1; /* nr of 16-bit words.. */ + if (count) { + if (2 & (unsigned long) buff) { + result += *(unsigned short *) buff; + count--; + len -= 2; + buff += 2; + } + count >>= 1; /* nr of 32-bit words.. */ + if (count) { + while (count >= 4) { + unsigned int r1, r2, r3, r4; + r1 = *(unsigned int *)(buff + 0); + r2 = *(unsigned int *)(buff + 4); + r3 = *(unsigned int *)(buff + 8); + r4 = *(unsigned int *)(buff + 12); + addc(result, r1); + addc(result, r2); + addc(result, r3); + addc(result, r4); + count -= 4; + buff += 16; + } + while (count) { + unsigned int w = *(unsigned int *) buff; + count--; + buff += 4; + addc(result, w); + } + result = (result & 0xffff) + (result >> 16); + } + if (len & 2) { + result += *(unsigned short *) buff; + buff += 2; + } + } + if (len & 1) + result += le16_to_cpu(*buff); + result = from32to16(result); + if (odd) + result = swab16(result); +out: + return result; +} + +/* + * computes a partial checksum, e.g. for TCP/UDP fragments + */ +/* + * why bother folding? + */ +__wsum csum_partial(const void *buff, int len, __wsum sum) +{ + unsigned int result = do_csum(buff, len); + addc(result, sum); + return (__force __wsum)from32to16(result); +} + +EXPORT_SYMBOL(csum_partial); + +/* + * copy while checksumming, otherwise like csum_partial + */ +__wsum csum_partial_copy_nocheck(const void *src, void *dst, + int len, __wsum sum) +{ + /* + * It's 2:30 am and I don't feel like doing it real ... + * This is lots slower than the real thing (tm) + */ + sum = csum_partial(src, len, sum); + memcpy(dst, src, len); + + return sum; +} +EXPORT_SYMBOL(csum_partial_copy_nocheck); + +/* + * Copy from userspace and compute checksum. If we catch an exception + * then zero the rest of the buffer. + */ +__wsum csum_partial_copy_from_user(const void __user *src, + void *dst, int len, + __wsum sum, int *err_ptr) +{ + int missing; + + missing = copy_from_user(dst, src, len); + if (missing) { + memset(dst + len - missing, 0, missing); + *err_ptr = -EFAULT; + } + + return csum_partial(dst, len, sum); +} +EXPORT_SYMBOL(csum_partial_copy_from_user); diff --git a/arch/parisc/lib/fixup.S b/arch/parisc/lib/fixup.S new file mode 100644 index 00000000..f8c45cc2 --- /dev/null +++ b/arch/parisc/lib/fixup.S @@ -0,0 +1,92 @@ +/* + * Linux/PA-RISC Project (http://www.parisc-linux.org/) + * + * Copyright (C) 2004 Randolph Chung <tausq@debian.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + * Fixup routines for kernel exception handling. + */ +#include <asm/asm-offsets.h> +#include <asm/assembly.h> +#include <asm/errno.h> +#include <linux/linkage.h> + +#ifdef CONFIG_SMP + .macro get_fault_ip t1 t2 + addil LT%__per_cpu_offset,%r27 + LDREG RT%__per_cpu_offset(%r1),\t1 + /* t2 = smp_processor_id() */ + mfctl 30,\t2 + ldw TI_CPU(\t2),\t2 +#ifdef CONFIG_64BIT + extrd,u \t2,63,32,\t2 +#endif + /* t2 = &__per_cpu_offset[smp_processor_id()]; */ + LDREGX \t2(\t1),\t2 + addil LT%exception_data,%r27 + LDREG RT%exception_data(%r1),\t1 + /* t1 = &__get_cpu_var(exception_data) */ + add,l \t1,\t2,\t1 + /* t1 = t1->fault_ip */ + LDREG EXCDATA_IP(\t1), \t1 + .endm +#else + .macro get_fault_ip t1 t2 + /* t1 = &__get_cpu_var(exception_data) */ + addil LT%exception_data,%r27 + LDREG RT%exception_data(%r1),\t2 + /* t1 = t2->fault_ip */ + LDREG EXCDATA_IP(\t2), \t1 + .endm +#endif + + .level LEVEL + + .text + .section .fixup, "ax" + + /* get_user() fixups, store -EFAULT in r8, and 0 in r9 */ +ENTRY(fixup_get_user_skip_1) + get_fault_ip %r1,%r8 + ldo 4(%r1), %r1 + ldi -EFAULT, %r8 + bv %r0(%r1) + copy %r0, %r9 +ENDPROC(fixup_get_user_skip_1) + +ENTRY(fixup_get_user_skip_2) + get_fault_ip %r1,%r8 + ldo 8(%r1), %r1 + ldi -EFAULT, %r8 + bv %r0(%r1) + copy %r0, %r9 +ENDPROC(fixup_get_user_skip_2) + + /* put_user() fixups, store -EFAULT in r8 */ +ENTRY(fixup_put_user_skip_1) + get_fault_ip %r1,%r8 + ldo 4(%r1), %r1 + bv %r0(%r1) + ldi -EFAULT, %r8 +ENDPROC(fixup_put_user_skip_1) + +ENTRY(fixup_put_user_skip_2) + get_fault_ip %r1,%r8 + ldo 8(%r1), %r1 + bv %r0(%r1) + ldi -EFAULT, %r8 +ENDPROC(fixup_put_user_skip_2) + diff --git a/arch/parisc/lib/io.c b/arch/parisc/lib/io.c new file mode 100644 index 00000000..7c1406ff --- /dev/null +++ b/arch/parisc/lib/io.c @@ -0,0 +1,488 @@ +/* + * arch/parisc/lib/io.c + * + * Copyright (c) Matthew Wilcox 2001 for Hewlett-Packard + * Copyright (c) Randolph Chung 2001 <tausq@debian.org> + * + * IO accessing functions which shouldn't be inlined because they're too big + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <asm/io.h> + +/* Copies a block of memory to a device in an efficient manner. + * Assumes the device can cope with 32-bit transfers. If it can't, + * don't use this function. + */ +void memcpy_toio(volatile void __iomem *dst, const void *src, int count) +{ + if (((unsigned long)dst & 3) != ((unsigned long)src & 3)) + goto bytecopy; + while ((unsigned long)dst & 3) { + writeb(*(char *)src, dst++); + src++; + count--; + } + while (count > 3) { + __raw_writel(*(u32 *)src, dst); + src += 4; + dst += 4; + count -= 4; + } + bytecopy: + while (count--) { + writeb(*(char *)src, dst++); + src++; + } +} + +/* +** Copies a block of memory from a device in an efficient manner. +** Assumes the device can cope with 32-bit transfers. If it can't, +** don't use this function. +** +** CR16 counts on C3000 reading 256 bytes from Symbios 896 RAM: +** 27341/64 = 427 cyc per int +** 61311/128 = 478 cyc per short +** 122637/256 = 479 cyc per byte +** Ergo bus latencies dominant (not transfer size). +** Minimize total number of transfers at cost of CPU cycles. +** TODO: only look at src alignment and adjust the stores to dest. +*/ +void memcpy_fromio(void *dst, const volatile void __iomem *src, int count) +{ + /* first compare alignment of src/dst */ + if ( (((unsigned long)dst ^ (unsigned long)src) & 1) || (count < 2) ) + goto bytecopy; + + if ( (((unsigned long)dst ^ (unsigned long)src) & 2) || (count < 4) ) + goto shortcopy; + + /* Then check for misaligned start address */ + if ((unsigned long)src & 1) { + *(u8 *)dst = readb(src); + src++; + dst++; + count--; + if (count < 2) goto bytecopy; + } + + if ((unsigned long)src & 2) { + *(u16 *)dst = __raw_readw(src); + src += 2; + dst += 2; + count -= 2; + } + + while (count > 3) { + *(u32 *)dst = __raw_readl(src); + dst += 4; + src += 4; + count -= 4; + } + + shortcopy: + while (count > 1) { + *(u16 *)dst = __raw_readw(src); + src += 2; + dst += 2; + count -= 2; + } + + bytecopy: + while (count--) { + *(char *)dst = readb(src); + src++; + dst++; + } +} + +/* Sets a block of memory on a device to a given value. + * Assumes the device can cope with 32-bit transfers. If it can't, + * don't use this function. + */ +void memset_io(volatile void __iomem *addr, unsigned char val, int count) +{ + u32 val32 = (val << 24) | (val << 16) | (val << 8) | val; + while ((unsigned long)addr & 3) { + writeb(val, addr++); + count--; + } + while (count > 3) { + __raw_writel(val32, addr); + addr += 4; + count -= 4; + } + while (count--) { + writeb(val, addr++); + } +} + +/* + * Read COUNT 8-bit bytes from port PORT into memory starting at + * SRC. + */ +void insb (unsigned long port, void *dst, unsigned long count) +{ + unsigned char *p; + + p = (unsigned char *)dst; + + while (((unsigned long)p) & 0x3) { + if (!count) + return; + count--; + *p = inb(port); + p++; + } + + while (count >= 4) { + unsigned int w; + count -= 4; + w = inb(port) << 24; + w |= inb(port) << 16; + w |= inb(port) << 8; + w |= inb(port); + *(unsigned int *) p = w; + p += 4; + } + + while (count) { + --count; + *p = inb(port); + p++; + } +} + + +/* + * Read COUNT 16-bit words from port PORT into memory starting at + * SRC. SRC must be at least short aligned. This is used by the + * IDE driver to read disk sectors. Performance is important, but + * the interfaces seems to be slow: just using the inlined version + * of the inw() breaks things. + */ +void insw (unsigned long port, void *dst, unsigned long count) +{ + unsigned int l = 0, l2; + unsigned char *p; + + p = (unsigned char *)dst; + + if (!count) + return; + + switch (((unsigned long)p) & 0x3) + { + case 0x00: /* Buffer 32-bit aligned */ + while (count>=2) { + + count -= 2; + l = cpu_to_le16(inw(port)) << 16; + l |= cpu_to_le16(inw(port)); + *(unsigned int *)p = l; + p += 4; + } + if (count) { + *(unsigned short *)p = cpu_to_le16(inw(port)); + } + break; + + case 0x02: /* Buffer 16-bit aligned */ + *(unsigned short *)p = cpu_to_le16(inw(port)); + p += 2; + count--; + while (count>=2) { + + count -= 2; + l = cpu_to_le16(inw(port)) << 16; + l |= cpu_to_le16(inw(port)); + *(unsigned int *)p = l; + p += 4; + } + if (count) { + *(unsigned short *)p = cpu_to_le16(inw(port)); + } + break; + + case 0x01: /* Buffer 8-bit aligned */ + case 0x03: + /* I don't bother with 32bit transfers + * in this case, 16bit will have to do -- DE */ + --count; + + l = cpu_to_le16(inw(port)); + *p = l >> 8; + p++; + while (count--) + { + l2 = cpu_to_le16(inw(port)); + *(unsigned short *)p = (l & 0xff) << 8 | (l2 >> 8); + p += 2; + l = l2; + } + *p = l & 0xff; + break; + } +} + + + +/* + * Read COUNT 32-bit words from port PORT into memory starting at + * SRC. Now works with any alignment in SRC. Performance is important, + * but the interfaces seems to be slow: just using the inlined version + * of the inl() breaks things. + */ +void insl (unsigned long port, void *dst, unsigned long count) +{ + unsigned int l = 0, l2; + unsigned char *p; + + p = (unsigned char *)dst; + + if (!count) + return; + + switch (((unsigned long) dst) & 0x3) + { + case 0x00: /* Buffer 32-bit aligned */ + while (count--) + { + *(unsigned int *)p = cpu_to_le32(inl(port)); + p += 4; + } + break; + + case 0x02: /* Buffer 16-bit aligned */ + --count; + + l = cpu_to_le32(inl(port)); + *(unsigned short *)p = l >> 16; + p += 2; + + while (count--) + { + l2 = cpu_to_le32(inl(port)); + *(unsigned int *)p = (l & 0xffff) << 16 | (l2 >> 16); + p += 4; + l = l2; + } + *(unsigned short *)p = l & 0xffff; + break; + case 0x01: /* Buffer 8-bit aligned */ + --count; + + l = cpu_to_le32(inl(port)); + *(unsigned char *)p = l >> 24; + p++; + *(unsigned short *)p = (l >> 8) & 0xffff; + p += 2; + while (count--) + { + l2 = cpu_to_le32(inl(port)); + *(unsigned int *)p = (l & 0xff) << 24 | (l2 >> 8); + p += 4; + l = l2; + } + *p = l & 0xff; + break; + case 0x03: /* Buffer 8-bit aligned */ + --count; + + l = cpu_to_le32(inl(port)); + *p = l >> 24; + p++; + while (count--) + { + l2 = cpu_to_le32(inl(port)); + *(unsigned int *)p = (l & 0xffffff) << 8 | l2 >> 24; + p += 4; + l = l2; + } + *(unsigned short *)p = (l >> 8) & 0xffff; + p += 2; + *p = l & 0xff; + break; + } +} + + +/* + * Like insb but in the opposite direction. + * Don't worry as much about doing aligned memory transfers: + * doing byte reads the "slow" way isn't nearly as slow as + * doing byte writes the slow way (no r-m-w cycle). + */ +void outsb(unsigned long port, const void * src, unsigned long count) +{ + const unsigned char *p; + + p = (const unsigned char *)src; + while (count) { + count--; + outb(*p, port); + p++; + } +} + +/* + * Like insw but in the opposite direction. This is used by the IDE + * driver to write disk sectors. Performance is important, but the + * interfaces seems to be slow: just using the inlined version of the + * outw() breaks things. + */ +void outsw (unsigned long port, const void *src, unsigned long count) +{ + unsigned int l = 0, l2; + const unsigned char *p; + + p = (const unsigned char *)src; + + if (!count) + return; + + switch (((unsigned long)p) & 0x3) + { + case 0x00: /* Buffer 32-bit aligned */ + while (count>=2) { + count -= 2; + l = *(unsigned int *)p; + p += 4; + outw(le16_to_cpu(l >> 16), port); + outw(le16_to_cpu(l & 0xffff), port); + } + if (count) { + outw(le16_to_cpu(*(unsigned short*)p), port); + } + break; + + case 0x02: /* Buffer 16-bit aligned */ + + outw(le16_to_cpu(*(unsigned short*)p), port); + p += 2; + count--; + + while (count>=2) { + count -= 2; + l = *(unsigned int *)p; + p += 4; + outw(le16_to_cpu(l >> 16), port); + outw(le16_to_cpu(l & 0xffff), port); + } + if (count) { + outw(le16_to_cpu(*(unsigned short *)p), port); + } + break; + + case 0x01: /* Buffer 8-bit aligned */ + /* I don't bother with 32bit transfers + * in this case, 16bit will have to do -- DE */ + + l = *p << 8; + p++; + count--; + while (count) + { + count--; + l2 = *(unsigned short *)p; + p += 2; + outw(le16_to_cpu(l | l2 >> 8), port); + l = l2 << 8; + } + l2 = *(unsigned char *)p; + outw (le16_to_cpu(l | l2>>8), port); + break; + + } +} + + +/* + * Like insl but in the opposite direction. This is used by the IDE + * driver to write disk sectors. Works with any alignment in SRC. + * Performance is important, but the interfaces seems to be slow: + * just using the inlined version of the outl() breaks things. + */ +void outsl (unsigned long port, const void *src, unsigned long count) +{ + unsigned int l = 0, l2; + const unsigned char *p; + + p = (const unsigned char *)src; + + if (!count) + return; + + switch (((unsigned long)p) & 0x3) + { + case 0x00: /* Buffer 32-bit aligned */ + while (count--) + { + outl(le32_to_cpu(*(unsigned int *)p), port); + p += 4; + } + break; + + case 0x02: /* Buffer 16-bit aligned */ + --count; + + l = *(unsigned short *)p; + p += 2; + + while (count--) + { + l2 = *(unsigned int *)p; + p += 4; + outl (le32_to_cpu(l << 16 | l2 >> 16), port); + l = l2; + } + l2 = *(unsigned short *)p; + outl (le32_to_cpu(l << 16 | l2), port); + break; + case 0x01: /* Buffer 8-bit aligned */ + --count; + + l = *p << 24; + p++; + l |= *(unsigned short *)p << 8; + p += 2; + + while (count--) + { + l2 = *(unsigned int *)p; + p += 4; + outl (le32_to_cpu(l | l2 >> 24), port); + l = l2 << 8; + } + l2 = *p; + outl (le32_to_cpu(l | l2), port); + break; + case 0x03: /* Buffer 8-bit aligned */ + --count; + + l = *p << 24; + p++; + + while (count--) + { + l2 = *(unsigned int *)p; + p += 4; + outl (le32_to_cpu(l | l2 >> 8), port); + l = l2 << 24; + } + l2 = *(unsigned short *)p << 16; + p += 2; + l2 |= *p; + outl (le32_to_cpu(l | l2), port); + break; + } +} + +EXPORT_SYMBOL(insb); +EXPORT_SYMBOL(insw); +EXPORT_SYMBOL(insl); +EXPORT_SYMBOL(outsb); +EXPORT_SYMBOL(outsw); +EXPORT_SYMBOL(outsl); diff --git a/arch/parisc/lib/iomap.c b/arch/parisc/lib/iomap.c new file mode 100644 index 00000000..fb8e10a4 --- /dev/null +++ b/arch/parisc/lib/iomap.c @@ -0,0 +1,464 @@ +/* + * iomap.c - Implement iomap interface for PA-RISC + * Copyright (c) 2004 Matthew Wilcox + */ + +#include <linux/ioport.h> +#include <linux/pci.h> +#include <linux/export.h> +#include <asm/io.h> + +/* + * The iomap space on 32-bit PA-RISC is intended to look like this: + * 00000000-7fffffff virtual mapped IO + * 80000000-8fffffff ISA/EISA port space that can't be virtually mapped + * 90000000-9fffffff Dino port space + * a0000000-afffffff Astro port space + * b0000000-bfffffff PAT port space + * c0000000-cfffffff non-swapped memory IO + * f0000000-ffffffff legacy IO memory pointers + * + * For the moment, here's what it looks like: + * 80000000-8fffffff All ISA/EISA port space + * f0000000-ffffffff legacy IO memory pointers + * + * On 64-bit, everything is extended, so: + * 8000000000000000-8fffffffffffffff All ISA/EISA port space + * f000000000000000-ffffffffffffffff legacy IO memory pointers + */ + +/* + * Technically, this should be 'if (VMALLOC_START < addr < VMALLOC_END), + * but that's slow and we know it'll be within the first 2GB. + */ +#ifdef CONFIG_64BIT +#define INDIRECT_ADDR(addr) (((unsigned long)(addr) & 1UL<<63) != 0) +#define ADDR_TO_REGION(addr) (((unsigned long)addr >> 60) & 7) +#define IOPORT_MAP_BASE (8UL << 60) +#else +#define INDIRECT_ADDR(addr) (((unsigned long)(addr) & 1UL<<31) != 0) +#define ADDR_TO_REGION(addr) (((unsigned long)addr >> 28) & 7) +#define IOPORT_MAP_BASE (8UL << 28) +#endif + +struct iomap_ops { + unsigned int (*read8)(void __iomem *); + unsigned int (*read16)(void __iomem *); + unsigned int (*read16be)(void __iomem *); + unsigned int (*read32)(void __iomem *); + unsigned int (*read32be)(void __iomem *); + void (*write8)(u8, void __iomem *); + void (*write16)(u16, void __iomem *); + void (*write16be)(u16, void __iomem *); + void (*write32)(u32, void __iomem *); + void (*write32be)(u32, void __iomem *); + void (*read8r)(void __iomem *, void *, unsigned long); + void (*read16r)(void __iomem *, void *, unsigned long); + void (*read32r)(void __iomem *, void *, unsigned long); + void (*write8r)(void __iomem *, const void *, unsigned long); + void (*write16r)(void __iomem *, const void *, unsigned long); + void (*write32r)(void __iomem *, const void *, unsigned long); +}; + +/* Generic ioport ops. To be replaced later by specific dino/elroy/wax code */ + +#define ADDR2PORT(addr) ((unsigned long __force)(addr) & 0xffffff) + +static unsigned int ioport_read8(void __iomem *addr) +{ + return inb(ADDR2PORT(addr)); +} + +static unsigned int ioport_read16(void __iomem *addr) +{ + return inw(ADDR2PORT(addr)); +} + +static unsigned int ioport_read32(void __iomem *addr) +{ + return inl(ADDR2PORT(addr)); +} + +static void ioport_write8(u8 datum, void __iomem *addr) +{ + outb(datum, ADDR2PORT(addr)); +} + +static void ioport_write16(u16 datum, void __iomem *addr) +{ + outw(datum, ADDR2PORT(addr)); +} + +static void ioport_write32(u32 datum, void __iomem *addr) +{ + outl(datum, ADDR2PORT(addr)); +} + +static void ioport_read8r(void __iomem *addr, void *dst, unsigned long count) +{ + insb(ADDR2PORT(addr), dst, count); +} + +static void ioport_read16r(void __iomem *addr, void *dst, unsigned long count) +{ + insw(ADDR2PORT(addr), dst, count); +} + +static void ioport_read32r(void __iomem *addr, void *dst, unsigned long count) +{ + insl(ADDR2PORT(addr), dst, count); +} + +static void ioport_write8r(void __iomem *addr, const void *s, unsigned long n) +{ + outsb(ADDR2PORT(addr), s, n); +} + +static void ioport_write16r(void __iomem *addr, const void *s, unsigned long n) +{ + outsw(ADDR2PORT(addr), s, n); +} + +static void ioport_write32r(void __iomem *addr, const void *s, unsigned long n) +{ + outsl(ADDR2PORT(addr), s, n); +} + +static const struct iomap_ops ioport_ops = { + ioport_read8, + ioport_read16, + ioport_read16, + ioport_read32, + ioport_read32, + ioport_write8, + ioport_write16, + ioport_write16, + ioport_write32, + ioport_write32, + ioport_read8r, + ioport_read16r, + ioport_read32r, + ioport_write8r, + ioport_write16r, + ioport_write32r, +}; + +/* Legacy I/O memory ops */ + +static unsigned int iomem_read8(void __iomem *addr) +{ + return readb(addr); +} + +static unsigned int iomem_read16(void __iomem *addr) +{ + return readw(addr); +} + +static unsigned int iomem_read16be(void __iomem *addr) +{ + return __raw_readw(addr); +} + +static unsigned int iomem_read32(void __iomem *addr) +{ + return readl(addr); +} + +static unsigned int iomem_read32be(void __iomem *addr) +{ + return __raw_readl(addr); +} + +static void iomem_write8(u8 datum, void __iomem *addr) +{ + writeb(datum, addr); +} + +static void iomem_write16(u16 datum, void __iomem *addr) +{ + writew(datum, addr); +} + +static void iomem_write16be(u16 datum, void __iomem *addr) +{ + __raw_writew(datum, addr); +} + +static void iomem_write32(u32 datum, void __iomem *addr) +{ + writel(datum, addr); +} + +static void iomem_write32be(u32 datum, void __iomem *addr) +{ + __raw_writel(datum, addr); +} + +static void iomem_read8r(void __iomem *addr, void *dst, unsigned long count) +{ + while (count--) { + *(u8 *)dst = __raw_readb(addr); + dst++; + } +} + +static void iomem_read16r(void __iomem *addr, void *dst, unsigned long count) +{ + while (count--) { + *(u16 *)dst = __raw_readw(addr); + dst += 2; + } +} + +static void iomem_read32r(void __iomem *addr, void *dst, unsigned long count) +{ + while (count--) { + *(u32 *)dst = __raw_readl(addr); + dst += 4; + } +} + +static void iomem_write8r(void __iomem *addr, const void *s, unsigned long n) +{ + while (n--) { + __raw_writeb(*(u8 *)s, addr); + s++; + } +} + +static void iomem_write16r(void __iomem *addr, const void *s, unsigned long n) +{ + while (n--) { + __raw_writew(*(u16 *)s, addr); + s += 2; + } +} + +static void iomem_write32r(void __iomem *addr, const void *s, unsigned long n) +{ + while (n--) { + __raw_writel(*(u32 *)s, addr); + s += 4; + } +} + +static const struct iomap_ops iomem_ops = { + iomem_read8, + iomem_read16, + iomem_read16be, + iomem_read32, + iomem_read32be, + iomem_write8, + iomem_write16, + iomem_write16be, + iomem_write32, + iomem_write32be, + iomem_read8r, + iomem_read16r, + iomem_read32r, + iomem_write8r, + iomem_write16r, + iomem_write32r, +}; + +static const struct iomap_ops *iomap_ops[8] = { + [0] = &ioport_ops, + [7] = &iomem_ops +}; + + +unsigned int ioread8(void __iomem *addr) +{ + if (unlikely(INDIRECT_ADDR(addr))) + return iomap_ops[ADDR_TO_REGION(addr)]->read8(addr); + return *((u8 *)addr); +} + +unsigned int ioread16(void __iomem *addr) +{ + if (unlikely(INDIRECT_ADDR(addr))) + return iomap_ops[ADDR_TO_REGION(addr)]->read16(addr); + return le16_to_cpup((u16 *)addr); +} + +unsigned int ioread16be(void __iomem *addr) +{ + if (unlikely(INDIRECT_ADDR(addr))) + return iomap_ops[ADDR_TO_REGION(addr)]->read16be(addr); + return *((u16 *)addr); +} + +unsigned int ioread32(void __iomem *addr) +{ + if (unlikely(INDIRECT_ADDR(addr))) + return iomap_ops[ADDR_TO_REGION(addr)]->read32(addr); + return le32_to_cpup((u32 *)addr); +} + +unsigned int ioread32be(void __iomem *addr) +{ + if (unlikely(INDIRECT_ADDR(addr))) + return iomap_ops[ADDR_TO_REGION(addr)]->read32be(addr); + return *((u32 *)addr); +} + +void iowrite8(u8 datum, void __iomem *addr) +{ + if (unlikely(INDIRECT_ADDR(addr))) { + iomap_ops[ADDR_TO_REGION(addr)]->write8(datum, addr); + } else { + *((u8 *)addr) = datum; + } +} + +void iowrite16(u16 datum, void __iomem *addr) +{ + if (unlikely(INDIRECT_ADDR(addr))) { + iomap_ops[ADDR_TO_REGION(addr)]->write16(datum, addr); + } else { + *((u16 *)addr) = cpu_to_le16(datum); + } +} + +void iowrite16be(u16 datum, void __iomem *addr) +{ + if (unlikely(INDIRECT_ADDR(addr))) { + iomap_ops[ADDR_TO_REGION(addr)]->write16be(datum, addr); + } else { + *((u16 *)addr) = datum; + } +} + +void iowrite32(u32 datum, void __iomem *addr) +{ + if (unlikely(INDIRECT_ADDR(addr))) { + iomap_ops[ADDR_TO_REGION(addr)]->write32(datum, addr); + } else { + *((u32 *)addr) = cpu_to_le32(datum); + } +} + +void iowrite32be(u32 datum, void __iomem *addr) +{ + if (unlikely(INDIRECT_ADDR(addr))) { + iomap_ops[ADDR_TO_REGION(addr)]->write32be(datum, addr); + } else { + *((u32 *)addr) = datum; + } +} + +/* Repeating interfaces */ + +void ioread8_rep(void __iomem *addr, void *dst, unsigned long count) +{ + if (unlikely(INDIRECT_ADDR(addr))) { + iomap_ops[ADDR_TO_REGION(addr)]->read8r(addr, dst, count); + } else { + while (count--) { + *(u8 *)dst = *(u8 *)addr; + dst++; + } + } +} + +void ioread16_rep(void __iomem *addr, void *dst, unsigned long count) +{ + if (unlikely(INDIRECT_ADDR(addr))) { + iomap_ops[ADDR_TO_REGION(addr)]->read16r(addr, dst, count); + } else { + while (count--) { + *(u16 *)dst = *(u16 *)addr; + dst += 2; + } + } +} + +void ioread32_rep(void __iomem *addr, void *dst, unsigned long count) +{ + if (unlikely(INDIRECT_ADDR(addr))) { + iomap_ops[ADDR_TO_REGION(addr)]->read32r(addr, dst, count); + } else { + while (count--) { + *(u32 *)dst = *(u32 *)addr; + dst += 4; + } + } +} + +void iowrite8_rep(void __iomem *addr, const void *src, unsigned long count) +{ + if (unlikely(INDIRECT_ADDR(addr))) { + iomap_ops[ADDR_TO_REGION(addr)]->write8r(addr, src, count); + } else { + while (count--) { + *(u8 *)addr = *(u8 *)src; + src++; + } + } +} + +void iowrite16_rep(void __iomem *addr, const void *src, unsigned long count) +{ + if (unlikely(INDIRECT_ADDR(addr))) { + iomap_ops[ADDR_TO_REGION(addr)]->write16r(addr, src, count); + } else { + while (count--) { + *(u16 *)addr = *(u16 *)src; + src += 2; + } + } +} + +void iowrite32_rep(void __iomem *addr, const void *src, unsigned long count) +{ + if (unlikely(INDIRECT_ADDR(addr))) { + iomap_ops[ADDR_TO_REGION(addr)]->write32r(addr, src, count); + } else { + while (count--) { + *(u32 *)addr = *(u32 *)src; + src += 4; + } + } +} + +/* Mapping interfaces */ + +void __iomem *ioport_map(unsigned long port, unsigned int nr) +{ + return (void __iomem *)(IOPORT_MAP_BASE | port); +} + +void ioport_unmap(void __iomem *addr) +{ + if (!INDIRECT_ADDR(addr)) { + iounmap(addr); + } +} + +void pci_iounmap(struct pci_dev *dev, void __iomem * addr) +{ + if (!INDIRECT_ADDR(addr)) { + iounmap(addr); + } +} + +EXPORT_SYMBOL(ioread8); +EXPORT_SYMBOL(ioread16); +EXPORT_SYMBOL(ioread16be); +EXPORT_SYMBOL(ioread32); +EXPORT_SYMBOL(ioread32be); +EXPORT_SYMBOL(iowrite8); +EXPORT_SYMBOL(iowrite16); +EXPORT_SYMBOL(iowrite16be); +EXPORT_SYMBOL(iowrite32); +EXPORT_SYMBOL(iowrite32be); +EXPORT_SYMBOL(ioread8_rep); +EXPORT_SYMBOL(ioread16_rep); +EXPORT_SYMBOL(ioread32_rep); +EXPORT_SYMBOL(iowrite8_rep); +EXPORT_SYMBOL(iowrite16_rep); +EXPORT_SYMBOL(iowrite32_rep); +EXPORT_SYMBOL(ioport_map); +EXPORT_SYMBOL(ioport_unmap); +EXPORT_SYMBOL(pci_iounmap); diff --git a/arch/parisc/lib/lusercopy.S b/arch/parisc/lib/lusercopy.S new file mode 100644 index 00000000..1bd23cce --- /dev/null +++ b/arch/parisc/lib/lusercopy.S @@ -0,0 +1,180 @@ +/* + * User Space Access Routines + * + * Copyright (C) 2000-2002 Hewlett-Packard (John Marvin) + * Copyright (C) 2000 Richard Hirst <rhirst with parisc-linux.org> + * Copyright (C) 2001 Matthieu Delahaye <delahaym at esiee.fr> + * Copyright (C) 2003 Randolph Chung <tausq with parisc-linux.org> + * + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +/* + * These routines still have plenty of room for optimization + * (word & doubleword load/store, dual issue, store hints, etc.). + */ + +/* + * The following routines assume that space register 3 (sr3) contains + * the space id associated with the current users address space. + */ + + + .text + +#include <asm/assembly.h> +#include <asm/errno.h> +#include <linux/linkage.h> + + /* + * get_sr gets the appropriate space value into + * sr1 for kernel/user space access, depending + * on the flag stored in the task structure. + */ + + .macro get_sr + mfctl %cr30,%r1 + ldw TI_SEGMENT(%r1),%r22 + mfsp %sr3,%r1 + or,<> %r22,%r0,%r0 + copy %r0,%r1 + mtsp %r1,%sr1 + .endm + + .macro fixup_branch lbl + ldil L%\lbl, %r1 + ldo R%\lbl(%r1), %r1 + bv %r0(%r1) + .endm + + /* + * long lstrncpy_from_user(char *dst, const char *src, long n) + * + * Returns -EFAULT if exception before terminator, + * N if the entire buffer filled, + * otherwise strlen (i.e. excludes zero byte) + */ + +ENTRY(lstrncpy_from_user) + .proc + .callinfo NO_CALLS + .entry + comib,= 0,%r24,$lsfu_done + copy %r24,%r23 + get_sr +1: ldbs,ma 1(%sr1,%r25),%r1 +$lsfu_loop: + stbs,ma %r1,1(%r26) + comib,=,n 0,%r1,$lsfu_done + addib,<>,n -1,%r24,$lsfu_loop +2: ldbs,ma 1(%sr1,%r25),%r1 +$lsfu_done: + sub %r23,%r24,%r28 +$lsfu_exit: + bv %r0(%r2) + nop + .exit +ENDPROC(lstrncpy_from_user) + + .section .fixup,"ax" +3: fixup_branch $lsfu_exit + ldi -EFAULT,%r28 + .previous + + .section __ex_table,"aw" + ASM_ULONG_INSN 1b,3b + ASM_ULONG_INSN 2b,3b + .previous + + .procend + + /* + * unsigned long lclear_user(void *to, unsigned long n) + * + * Returns 0 for success. + * otherwise, returns number of bytes not transferred. + */ + +ENTRY(lclear_user) + .proc + .callinfo NO_CALLS + .entry + comib,=,n 0,%r25,$lclu_done + get_sr +$lclu_loop: + addib,<> -1,%r25,$lclu_loop +1: stbs,ma %r0,1(%sr1,%r26) + +$lclu_done: + bv %r0(%r2) + copy %r25,%r28 + .exit +ENDPROC(lclear_user) + + .section .fixup,"ax" +2: fixup_branch $lclu_done + ldo 1(%r25),%r25 + .previous + + .section __ex_table,"aw" + ASM_ULONG_INSN 1b,2b + .previous + + .procend + + /* + * long lstrnlen_user(char *s, long n) + * + * Returns 0 if exception before zero byte or reaching N, + * N+1 if N would be exceeded, + * else strlen + 1 (i.e. includes zero byte). + */ + +ENTRY(lstrnlen_user) + .proc + .callinfo NO_CALLS + .entry + comib,= 0,%r25,$lslen_nzero + copy %r26,%r24 + get_sr +1: ldbs,ma 1(%sr1,%r26),%r1 +$lslen_loop: + comib,=,n 0,%r1,$lslen_done + addib,<> -1,%r25,$lslen_loop +2: ldbs,ma 1(%sr1,%r26),%r1 +$lslen_done: + bv %r0(%r2) + sub %r26,%r24,%r28 + .exit + +$lslen_nzero: + b $lslen_done + ldo 1(%r26),%r26 /* special case for N == 0 */ +ENDPROC(lstrnlen_user) + + .section .fixup,"ax" +3: fixup_branch $lslen_done + copy %r24,%r26 /* reset r26 so 0 is returned on fault */ + .previous + + .section __ex_table,"aw" + ASM_ULONG_INSN 1b,3b + ASM_ULONG_INSN 2b,3b + .previous + + .procend + + .end diff --git a/arch/parisc/lib/memcpy.c b/arch/parisc/lib/memcpy.c new file mode 100644 index 00000000..1dbca5c3 --- /dev/null +++ b/arch/parisc/lib/memcpy.c @@ -0,0 +1,506 @@ +/* + * Optimized memory copy routines. + * + * Copyright (C) 2004 Randolph Chung <tausq@debian.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + * Portions derived from the GNU C Library + * Copyright (C) 1991, 1997, 2003 Free Software Foundation, Inc. + * + * Several strategies are tried to try to get the best performance for various + * conditions. In the optimal case, we copy 64-bytes in an unrolled loop using + * fp regs. This is followed by loops that copy 32- or 16-bytes at a time using + * general registers. Unaligned copies are handled either by aligning the + * destination and then using shift-and-write method, or in a few cases by + * falling back to a byte-at-a-time copy. + * + * I chose to implement this in C because it is easier to maintain and debug, + * and in my experiments it appears that the C code generated by gcc (3.3/3.4 + * at the time of writing) is fairly optimal. Unfortunately some of the + * semantics of the copy routine (exception handling) is difficult to express + * in C, so we have to play some tricks to get it to work. + * + * All the loads and stores are done via explicit asm() code in order to use + * the right space registers. + * + * Testing with various alignments and buffer sizes shows that this code is + * often >10x faster than a simple byte-at-a-time copy, even for strangely + * aligned operands. It is interesting to note that the glibc version + * of memcpy (written in C) is actually quite fast already. This routine is + * able to beat it by 30-40% for aligned copies because of the loop unrolling, + * but in some cases the glibc version is still slightly faster. This lends + * more credibility that gcc can generate very good code as long as we are + * careful. + * + * TODO: + * - cache prefetching needs more experimentation to get optimal settings + * - try not to use the post-increment address modifiers; they create additional + * interlocks + * - replace byte-copy loops with stybs sequences + */ + +#ifdef __KERNEL__ +#include <linux/module.h> +#include <linux/compiler.h> +#include <asm/uaccess.h> +#define s_space "%%sr1" +#define d_space "%%sr2" +#else +#include "memcpy.h" +#define s_space "%%sr0" +#define d_space "%%sr0" +#define pa_memcpy new2_copy +#endif + +DECLARE_PER_CPU(struct exception_data, exception_data); + +#define preserve_branch(label) do { \ + volatile int dummy; \ + /* The following branch is never taken, it's just here to */ \ + /* prevent gcc from optimizing away our exception code. */ \ + if (unlikely(dummy != dummy)) \ + goto label; \ +} while (0) + +#define get_user_space() (segment_eq(get_fs(), KERNEL_DS) ? 0 : mfsp(3)) +#define get_kernel_space() (0) + +#define MERGE(w0, sh_1, w1, sh_2) ({ \ + unsigned int _r; \ + asm volatile ( \ + "mtsar %3\n" \ + "shrpw %1, %2, %%sar, %0\n" \ + : "=r"(_r) \ + : "r"(w0), "r"(w1), "r"(sh_2) \ + ); \ + _r; \ +}) +#define THRESHOLD 16 + +#ifdef DEBUG_MEMCPY +#define DPRINTF(fmt, args...) do { printk(KERN_DEBUG "%s:%d:%s ", __FILE__, __LINE__, __func__ ); printk(KERN_DEBUG fmt, ##args ); } while (0) +#else +#define DPRINTF(fmt, args...) +#endif + +#define def_load_ai_insn(_insn,_sz,_tt,_s,_a,_t,_e) \ + __asm__ __volatile__ ( \ + "1:\t" #_insn ",ma " #_sz "(" _s ",%1), %0\n\t" \ + ASM_EXCEPTIONTABLE_ENTRY(1b,_e) \ + : _tt(_t), "+r"(_a) \ + : \ + : "r8") + +#define def_store_ai_insn(_insn,_sz,_tt,_s,_a,_t,_e) \ + __asm__ __volatile__ ( \ + "1:\t" #_insn ",ma %1, " #_sz "(" _s ",%0)\n\t" \ + ASM_EXCEPTIONTABLE_ENTRY(1b,_e) \ + : "+r"(_a) \ + : _tt(_t) \ + : "r8") + +#define ldbma(_s, _a, _t, _e) def_load_ai_insn(ldbs,1,"=r",_s,_a,_t,_e) +#define stbma(_s, _t, _a, _e) def_store_ai_insn(stbs,1,"r",_s,_a,_t,_e) +#define ldwma(_s, _a, _t, _e) def_load_ai_insn(ldw,4,"=r",_s,_a,_t,_e) +#define stwma(_s, _t, _a, _e) def_store_ai_insn(stw,4,"r",_s,_a,_t,_e) +#define flddma(_s, _a, _t, _e) def_load_ai_insn(fldd,8,"=f",_s,_a,_t,_e) +#define fstdma(_s, _t, _a, _e) def_store_ai_insn(fstd,8,"f",_s,_a,_t,_e) + +#define def_load_insn(_insn,_tt,_s,_o,_a,_t,_e) \ + __asm__ __volatile__ ( \ + "1:\t" #_insn " " #_o "(" _s ",%1), %0\n\t" \ + ASM_EXCEPTIONTABLE_ENTRY(1b,_e) \ + : _tt(_t) \ + : "r"(_a) \ + : "r8") + +#define def_store_insn(_insn,_tt,_s,_t,_o,_a,_e) \ + __asm__ __volatile__ ( \ + "1:\t" #_insn " %0, " #_o "(" _s ",%1)\n\t" \ + ASM_EXCEPTIONTABLE_ENTRY(1b,_e) \ + : \ + : _tt(_t), "r"(_a) \ + : "r8") + +#define ldw(_s,_o,_a,_t,_e) def_load_insn(ldw,"=r",_s,_o,_a,_t,_e) +#define stw(_s,_t,_o,_a,_e) def_store_insn(stw,"r",_s,_t,_o,_a,_e) + +#ifdef CONFIG_PREFETCH +static inline void prefetch_src(const void *addr) +{ + __asm__("ldw 0(" s_space ",%0), %%r0" : : "r" (addr)); +} + +static inline void prefetch_dst(const void *addr) +{ + __asm__("ldd 0(" d_space ",%0), %%r0" : : "r" (addr)); +} +#else +#define prefetch_src(addr) do { } while(0) +#define prefetch_dst(addr) do { } while(0) +#endif + +/* Copy from a not-aligned src to an aligned dst, using shifts. Handles 4 words + * per loop. This code is derived from glibc. + */ +static inline unsigned long copy_dstaligned(unsigned long dst, unsigned long src, unsigned long len, unsigned long o_dst, unsigned long o_src, unsigned long o_len) +{ + /* gcc complains that a2 and a3 may be uninitialized, but actually + * they cannot be. Initialize a2/a3 to shut gcc up. + */ + register unsigned int a0, a1, a2 = 0, a3 = 0; + int sh_1, sh_2; + struct exception_data *d; + + /* prefetch_src((const void *)src); */ + + /* Calculate how to shift a word read at the memory operation + aligned srcp to make it aligned for copy. */ + sh_1 = 8 * (src % sizeof(unsigned int)); + sh_2 = 8 * sizeof(unsigned int) - sh_1; + + /* Make src aligned by rounding it down. */ + src &= -sizeof(unsigned int); + + switch (len % 4) + { + case 2: + /* a1 = ((unsigned int *) src)[0]; + a2 = ((unsigned int *) src)[1]; */ + ldw(s_space, 0, src, a1, cda_ldw_exc); + ldw(s_space, 4, src, a2, cda_ldw_exc); + src -= 1 * sizeof(unsigned int); + dst -= 3 * sizeof(unsigned int); + len += 2; + goto do1; + case 3: + /* a0 = ((unsigned int *) src)[0]; + a1 = ((unsigned int *) src)[1]; */ + ldw(s_space, 0, src, a0, cda_ldw_exc); + ldw(s_space, 4, src, a1, cda_ldw_exc); + src -= 0 * sizeof(unsigned int); + dst -= 2 * sizeof(unsigned int); + len += 1; + goto do2; + case 0: + if (len == 0) + return 0; + /* a3 = ((unsigned int *) src)[0]; + a0 = ((unsigned int *) src)[1]; */ + ldw(s_space, 0, src, a3, cda_ldw_exc); + ldw(s_space, 4, src, a0, cda_ldw_exc); + src -=-1 * sizeof(unsigned int); + dst -= 1 * sizeof(unsigned int); + len += 0; + goto do3; + case 1: + /* a2 = ((unsigned int *) src)[0]; + a3 = ((unsigned int *) src)[1]; */ + ldw(s_space, 0, src, a2, cda_ldw_exc); + ldw(s_space, 4, src, a3, cda_ldw_exc); + src -=-2 * sizeof(unsigned int); + dst -= 0 * sizeof(unsigned int); + len -= 1; + if (len == 0) + goto do0; + goto do4; /* No-op. */ + } + + do + { + /* prefetch_src((const void *)(src + 4 * sizeof(unsigned int))); */ +do4: + /* a0 = ((unsigned int *) src)[0]; */ + ldw(s_space, 0, src, a0, cda_ldw_exc); + /* ((unsigned int *) dst)[0] = MERGE (a2, sh_1, a3, sh_2); */ + stw(d_space, MERGE (a2, sh_1, a3, sh_2), 0, dst, cda_stw_exc); +do3: + /* a1 = ((unsigned int *) src)[1]; */ + ldw(s_space, 4, src, a1, cda_ldw_exc); + /* ((unsigned int *) dst)[1] = MERGE (a3, sh_1, a0, sh_2); */ + stw(d_space, MERGE (a3, sh_1, a0, sh_2), 4, dst, cda_stw_exc); +do2: + /* a2 = ((unsigned int *) src)[2]; */ + ldw(s_space, 8, src, a2, cda_ldw_exc); + /* ((unsigned int *) dst)[2] = MERGE (a0, sh_1, a1, sh_2); */ + stw(d_space, MERGE (a0, sh_1, a1, sh_2), 8, dst, cda_stw_exc); +do1: + /* a3 = ((unsigned int *) src)[3]; */ + ldw(s_space, 12, src, a3, cda_ldw_exc); + /* ((unsigned int *) dst)[3] = MERGE (a1, sh_1, a2, sh_2); */ + stw(d_space, MERGE (a1, sh_1, a2, sh_2), 12, dst, cda_stw_exc); + + src += 4 * sizeof(unsigned int); + dst += 4 * sizeof(unsigned int); + len -= 4; + } + while (len != 0); + +do0: + /* ((unsigned int *) dst)[0] = MERGE (a2, sh_1, a3, sh_2); */ + stw(d_space, MERGE (a2, sh_1, a3, sh_2), 0, dst, cda_stw_exc); + + preserve_branch(handle_load_error); + preserve_branch(handle_store_error); + + return 0; + +handle_load_error: + __asm__ __volatile__ ("cda_ldw_exc:\n"); + d = &__get_cpu_var(exception_data); + DPRINTF("cda_ldw_exc: o_len=%lu fault_addr=%lu o_src=%lu ret=%lu\n", + o_len, d->fault_addr, o_src, o_len - d->fault_addr + o_src); + return o_len * 4 - d->fault_addr + o_src; + +handle_store_error: + __asm__ __volatile__ ("cda_stw_exc:\n"); + d = &__get_cpu_var(exception_data); + DPRINTF("cda_stw_exc: o_len=%lu fault_addr=%lu o_dst=%lu ret=%lu\n", + o_len, d->fault_addr, o_dst, o_len - d->fault_addr + o_dst); + return o_len * 4 - d->fault_addr + o_dst; +} + + +/* Returns 0 for success, otherwise, returns number of bytes not transferred. */ +static unsigned long pa_memcpy(void *dstp, const void *srcp, unsigned long len) +{ + register unsigned long src, dst, t1, t2, t3; + register unsigned char *pcs, *pcd; + register unsigned int *pws, *pwd; + register double *pds, *pdd; + unsigned long ret = 0; + unsigned long o_dst, o_src, o_len; + struct exception_data *d; + + src = (unsigned long)srcp; + dst = (unsigned long)dstp; + pcs = (unsigned char *)srcp; + pcd = (unsigned char *)dstp; + + o_dst = dst; o_src = src; o_len = len; + + /* prefetch_src((const void *)srcp); */ + + if (len < THRESHOLD) + goto byte_copy; + + /* Check alignment */ + t1 = (src ^ dst); + if (unlikely(t1 & (sizeof(double)-1))) + goto unaligned_copy; + + /* src and dst have same alignment. */ + + /* Copy bytes till we are double-aligned. */ + t2 = src & (sizeof(double) - 1); + if (unlikely(t2 != 0)) { + t2 = sizeof(double) - t2; + while (t2 && len) { + /* *pcd++ = *pcs++; */ + ldbma(s_space, pcs, t3, pmc_load_exc); + len--; + stbma(d_space, t3, pcd, pmc_store_exc); + t2--; + } + } + + pds = (double *)pcs; + pdd = (double *)pcd; + +#if 0 + /* Copy 8 doubles at a time */ + while (len >= 8*sizeof(double)) { + register double r1, r2, r3, r4, r5, r6, r7, r8; + /* prefetch_src((char *)pds + L1_CACHE_BYTES); */ + flddma(s_space, pds, r1, pmc_load_exc); + flddma(s_space, pds, r2, pmc_load_exc); + flddma(s_space, pds, r3, pmc_load_exc); + flddma(s_space, pds, r4, pmc_load_exc); + fstdma(d_space, r1, pdd, pmc_store_exc); + fstdma(d_space, r2, pdd, pmc_store_exc); + fstdma(d_space, r3, pdd, pmc_store_exc); + fstdma(d_space, r4, pdd, pmc_store_exc); + +#if 0 + if (L1_CACHE_BYTES <= 32) + prefetch_src((char *)pds + L1_CACHE_BYTES); +#endif + flddma(s_space, pds, r5, pmc_load_exc); + flddma(s_space, pds, r6, pmc_load_exc); + flddma(s_space, pds, r7, pmc_load_exc); + flddma(s_space, pds, r8, pmc_load_exc); + fstdma(d_space, r5, pdd, pmc_store_exc); + fstdma(d_space, r6, pdd, pmc_store_exc); + fstdma(d_space, r7, pdd, pmc_store_exc); + fstdma(d_space, r8, pdd, pmc_store_exc); + len -= 8*sizeof(double); + } +#endif + + pws = (unsigned int *)pds; + pwd = (unsigned int *)pdd; + +word_copy: + while (len >= 8*sizeof(unsigned int)) { + register unsigned int r1,r2,r3,r4,r5,r6,r7,r8; + /* prefetch_src((char *)pws + L1_CACHE_BYTES); */ + ldwma(s_space, pws, r1, pmc_load_exc); + ldwma(s_space, pws, r2, pmc_load_exc); + ldwma(s_space, pws, r3, pmc_load_exc); + ldwma(s_space, pws, r4, pmc_load_exc); + stwma(d_space, r1, pwd, pmc_store_exc); + stwma(d_space, r2, pwd, pmc_store_exc); + stwma(d_space, r3, pwd, pmc_store_exc); + stwma(d_space, r4, pwd, pmc_store_exc); + + ldwma(s_space, pws, r5, pmc_load_exc); + ldwma(s_space, pws, r6, pmc_load_exc); + ldwma(s_space, pws, r7, pmc_load_exc); + ldwma(s_space, pws, r8, pmc_load_exc); + stwma(d_space, r5, pwd, pmc_store_exc); + stwma(d_space, r6, pwd, pmc_store_exc); + stwma(d_space, r7, pwd, pmc_store_exc); + stwma(d_space, r8, pwd, pmc_store_exc); + len -= 8*sizeof(unsigned int); + } + + while (len >= 4*sizeof(unsigned int)) { + register unsigned int r1,r2,r3,r4; + ldwma(s_space, pws, r1, pmc_load_exc); + ldwma(s_space, pws, r2, pmc_load_exc); + ldwma(s_space, pws, r3, pmc_load_exc); + ldwma(s_space, pws, r4, pmc_load_exc); + stwma(d_space, r1, pwd, pmc_store_exc); + stwma(d_space, r2, pwd, pmc_store_exc); + stwma(d_space, r3, pwd, pmc_store_exc); + stwma(d_space, r4, pwd, pmc_store_exc); + len -= 4*sizeof(unsigned int); + } + + pcs = (unsigned char *)pws; + pcd = (unsigned char *)pwd; + +byte_copy: + while (len) { + /* *pcd++ = *pcs++; */ + ldbma(s_space, pcs, t3, pmc_load_exc); + stbma(d_space, t3, pcd, pmc_store_exc); + len--; + } + + return 0; + +unaligned_copy: + /* possibly we are aligned on a word, but not on a double... */ + if (likely((t1 & (sizeof(unsigned int)-1)) == 0)) { + t2 = src & (sizeof(unsigned int) - 1); + + if (unlikely(t2 != 0)) { + t2 = sizeof(unsigned int) - t2; + while (t2) { + /* *pcd++ = *pcs++; */ + ldbma(s_space, pcs, t3, pmc_load_exc); + stbma(d_space, t3, pcd, pmc_store_exc); + len--; + t2--; + } + } + + pws = (unsigned int *)pcs; + pwd = (unsigned int *)pcd; + goto word_copy; + } + + /* Align the destination. */ + if (unlikely((dst & (sizeof(unsigned int) - 1)) != 0)) { + t2 = sizeof(unsigned int) - (dst & (sizeof(unsigned int) - 1)); + while (t2) { + /* *pcd++ = *pcs++; */ + ldbma(s_space, pcs, t3, pmc_load_exc); + stbma(d_space, t3, pcd, pmc_store_exc); + len--; + t2--; + } + dst = (unsigned long)pcd; + src = (unsigned long)pcs; + } + + ret = copy_dstaligned(dst, src, len / sizeof(unsigned int), + o_dst, o_src, o_len); + if (ret) + return ret; + + pcs += (len & -sizeof(unsigned int)); + pcd += (len & -sizeof(unsigned int)); + len %= sizeof(unsigned int); + + preserve_branch(handle_load_error); + preserve_branch(handle_store_error); + + goto byte_copy; + +handle_load_error: + __asm__ __volatile__ ("pmc_load_exc:\n"); + d = &__get_cpu_var(exception_data); + DPRINTF("pmc_load_exc: o_len=%lu fault_addr=%lu o_src=%lu ret=%lu\n", + o_len, d->fault_addr, o_src, o_len - d->fault_addr + o_src); + return o_len - d->fault_addr + o_src; + +handle_store_error: + __asm__ __volatile__ ("pmc_store_exc:\n"); + d = &__get_cpu_var(exception_data); + DPRINTF("pmc_store_exc: o_len=%lu fault_addr=%lu o_dst=%lu ret=%lu\n", + o_len, d->fault_addr, o_dst, o_len - d->fault_addr + o_dst); + return o_len - d->fault_addr + o_dst; +} + +#ifdef __KERNEL__ +unsigned long copy_to_user(void __user *dst, const void *src, unsigned long len) +{ + mtsp(get_kernel_space(), 1); + mtsp(get_user_space(), 2); + return pa_memcpy((void __force *)dst, src, len); +} + +EXPORT_SYMBOL(__copy_from_user); +unsigned long __copy_from_user(void *dst, const void __user *src, unsigned long len) +{ + mtsp(get_user_space(), 1); + mtsp(get_kernel_space(), 2); + return pa_memcpy(dst, (void __force *)src, len); +} + +unsigned long copy_in_user(void __user *dst, const void __user *src, unsigned long len) +{ + mtsp(get_user_space(), 1); + mtsp(get_user_space(), 2); + return pa_memcpy((void __force *)dst, (void __force *)src, len); +} + + +void * memcpy(void * dst,const void *src, size_t count) +{ + mtsp(get_kernel_space(), 1); + mtsp(get_kernel_space(), 2); + pa_memcpy(dst, src, count); + return dst; +} + +EXPORT_SYMBOL(copy_to_user); +EXPORT_SYMBOL(copy_from_user); +EXPORT_SYMBOL(copy_in_user); +EXPORT_SYMBOL(memcpy); +#endif diff --git a/arch/parisc/lib/memset.c b/arch/parisc/lib/memset.c new file mode 100644 index 00000000..1d7929bd --- /dev/null +++ b/arch/parisc/lib/memset.c @@ -0,0 +1,91 @@ +/* Copyright (C) 1991, 1997 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +/* Slight modifications for pa-risc linux - Paul Bame <bame@debian.org> */ + +#include <linux/types.h> +#include <asm/string.h> + +#define OPSIZ (BITS_PER_LONG/8) +typedef unsigned long op_t; + +void * +memset (void *dstpp, int sc, size_t len) +{ + unsigned int c = sc; + long int dstp = (long int) dstpp; + + if (len >= 8) + { + size_t xlen; + op_t cccc; + + cccc = (unsigned char) c; + cccc |= cccc << 8; + cccc |= cccc << 16; + if (OPSIZ > 4) + /* Do the shift in two steps to avoid warning if long has 32 bits. */ + cccc |= (cccc << 16) << 16; + + /* There are at least some bytes to set. + No need to test for LEN == 0 in this alignment loop. */ + while (dstp % OPSIZ != 0) + { + ((unsigned char *) dstp)[0] = c; + dstp += 1; + len -= 1; + } + + /* Write 8 `op_t' per iteration until less than 8 `op_t' remain. */ + xlen = len / (OPSIZ * 8); + while (xlen > 0) + { + ((op_t *) dstp)[0] = cccc; + ((op_t *) dstp)[1] = cccc; + ((op_t *) dstp)[2] = cccc; + ((op_t *) dstp)[3] = cccc; + ((op_t *) dstp)[4] = cccc; + ((op_t *) dstp)[5] = cccc; + ((op_t *) dstp)[6] = cccc; + ((op_t *) dstp)[7] = cccc; + dstp += 8 * OPSIZ; + xlen -= 1; + } + len %= OPSIZ * 8; + + /* Write 1 `op_t' per iteration until less than OPSIZ bytes remain. */ + xlen = len / OPSIZ; + while (xlen > 0) + { + ((op_t *) dstp)[0] = cccc; + dstp += OPSIZ; + xlen -= 1; + } + len %= OPSIZ; + } + + /* Write the last few bytes. */ + while (len > 0) + { + ((unsigned char *) dstp)[0] = c; + dstp += 1; + len -= 1; + } + + return dstpp; +} |