summaryrefslogtreecommitdiff
path: root/ANDROID_3.4.5/arch/parisc/lib/memcpy.c
diff options
context:
space:
mode:
Diffstat (limited to 'ANDROID_3.4.5/arch/parisc/lib/memcpy.c')
-rw-r--r--ANDROID_3.4.5/arch/parisc/lib/memcpy.c506
1 files changed, 0 insertions, 506 deletions
diff --git a/ANDROID_3.4.5/arch/parisc/lib/memcpy.c b/ANDROID_3.4.5/arch/parisc/lib/memcpy.c
deleted file mode 100644
index 1dbca5c3..00000000
--- a/ANDROID_3.4.5/arch/parisc/lib/memcpy.c
+++ /dev/null
@@ -1,506 +0,0 @@
-/*
- * Optimized memory copy routines.
- *
- * Copyright (C) 2004 Randolph Chung <tausq@debian.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2, or (at your option)
- * any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- * Portions derived from the GNU C Library
- * Copyright (C) 1991, 1997, 2003 Free Software Foundation, Inc.
- *
- * Several strategies are tried to try to get the best performance for various
- * conditions. In the optimal case, we copy 64-bytes in an unrolled loop using
- * fp regs. This is followed by loops that copy 32- or 16-bytes at a time using
- * general registers. Unaligned copies are handled either by aligning the
- * destination and then using shift-and-write method, or in a few cases by
- * falling back to a byte-at-a-time copy.
- *
- * I chose to implement this in C because it is easier to maintain and debug,
- * and in my experiments it appears that the C code generated by gcc (3.3/3.4
- * at the time of writing) is fairly optimal. Unfortunately some of the
- * semantics of the copy routine (exception handling) is difficult to express
- * in C, so we have to play some tricks to get it to work.
- *
- * All the loads and stores are done via explicit asm() code in order to use
- * the right space registers.
- *
- * Testing with various alignments and buffer sizes shows that this code is
- * often >10x faster than a simple byte-at-a-time copy, even for strangely
- * aligned operands. It is interesting to note that the glibc version
- * of memcpy (written in C) is actually quite fast already. This routine is
- * able to beat it by 30-40% for aligned copies because of the loop unrolling,
- * but in some cases the glibc version is still slightly faster. This lends
- * more credibility that gcc can generate very good code as long as we are
- * careful.
- *
- * TODO:
- * - cache prefetching needs more experimentation to get optimal settings
- * - try not to use the post-increment address modifiers; they create additional
- * interlocks
- * - replace byte-copy loops with stybs sequences
- */
-
-#ifdef __KERNEL__
-#include <linux/module.h>
-#include <linux/compiler.h>
-#include <asm/uaccess.h>
-#define s_space "%%sr1"
-#define d_space "%%sr2"
-#else
-#include "memcpy.h"
-#define s_space "%%sr0"
-#define d_space "%%sr0"
-#define pa_memcpy new2_copy
-#endif
-
-DECLARE_PER_CPU(struct exception_data, exception_data);
-
-#define preserve_branch(label) do { \
- volatile int dummy; \
- /* The following branch is never taken, it's just here to */ \
- /* prevent gcc from optimizing away our exception code. */ \
- if (unlikely(dummy != dummy)) \
- goto label; \
-} while (0)
-
-#define get_user_space() (segment_eq(get_fs(), KERNEL_DS) ? 0 : mfsp(3))
-#define get_kernel_space() (0)
-
-#define MERGE(w0, sh_1, w1, sh_2) ({ \
- unsigned int _r; \
- asm volatile ( \
- "mtsar %3\n" \
- "shrpw %1, %2, %%sar, %0\n" \
- : "=r"(_r) \
- : "r"(w0), "r"(w1), "r"(sh_2) \
- ); \
- _r; \
-})
-#define THRESHOLD 16
-
-#ifdef DEBUG_MEMCPY
-#define DPRINTF(fmt, args...) do { printk(KERN_DEBUG "%s:%d:%s ", __FILE__, __LINE__, __func__ ); printk(KERN_DEBUG fmt, ##args ); } while (0)
-#else
-#define DPRINTF(fmt, args...)
-#endif
-
-#define def_load_ai_insn(_insn,_sz,_tt,_s,_a,_t,_e) \
- __asm__ __volatile__ ( \
- "1:\t" #_insn ",ma " #_sz "(" _s ",%1), %0\n\t" \
- ASM_EXCEPTIONTABLE_ENTRY(1b,_e) \
- : _tt(_t), "+r"(_a) \
- : \
- : "r8")
-
-#define def_store_ai_insn(_insn,_sz,_tt,_s,_a,_t,_e) \
- __asm__ __volatile__ ( \
- "1:\t" #_insn ",ma %1, " #_sz "(" _s ",%0)\n\t" \
- ASM_EXCEPTIONTABLE_ENTRY(1b,_e) \
- : "+r"(_a) \
- : _tt(_t) \
- : "r8")
-
-#define ldbma(_s, _a, _t, _e) def_load_ai_insn(ldbs,1,"=r",_s,_a,_t,_e)
-#define stbma(_s, _t, _a, _e) def_store_ai_insn(stbs,1,"r",_s,_a,_t,_e)
-#define ldwma(_s, _a, _t, _e) def_load_ai_insn(ldw,4,"=r",_s,_a,_t,_e)
-#define stwma(_s, _t, _a, _e) def_store_ai_insn(stw,4,"r",_s,_a,_t,_e)
-#define flddma(_s, _a, _t, _e) def_load_ai_insn(fldd,8,"=f",_s,_a,_t,_e)
-#define fstdma(_s, _t, _a, _e) def_store_ai_insn(fstd,8,"f",_s,_a,_t,_e)
-
-#define def_load_insn(_insn,_tt,_s,_o,_a,_t,_e) \
- __asm__ __volatile__ ( \
- "1:\t" #_insn " " #_o "(" _s ",%1), %0\n\t" \
- ASM_EXCEPTIONTABLE_ENTRY(1b,_e) \
- : _tt(_t) \
- : "r"(_a) \
- : "r8")
-
-#define def_store_insn(_insn,_tt,_s,_t,_o,_a,_e) \
- __asm__ __volatile__ ( \
- "1:\t" #_insn " %0, " #_o "(" _s ",%1)\n\t" \
- ASM_EXCEPTIONTABLE_ENTRY(1b,_e) \
- : \
- : _tt(_t), "r"(_a) \
- : "r8")
-
-#define ldw(_s,_o,_a,_t,_e) def_load_insn(ldw,"=r",_s,_o,_a,_t,_e)
-#define stw(_s,_t,_o,_a,_e) def_store_insn(stw,"r",_s,_t,_o,_a,_e)
-
-#ifdef CONFIG_PREFETCH
-static inline void prefetch_src(const void *addr)
-{
- __asm__("ldw 0(" s_space ",%0), %%r0" : : "r" (addr));
-}
-
-static inline void prefetch_dst(const void *addr)
-{
- __asm__("ldd 0(" d_space ",%0), %%r0" : : "r" (addr));
-}
-#else
-#define prefetch_src(addr) do { } while(0)
-#define prefetch_dst(addr) do { } while(0)
-#endif
-
-/* Copy from a not-aligned src to an aligned dst, using shifts. Handles 4 words
- * per loop. This code is derived from glibc.
- */
-static inline unsigned long copy_dstaligned(unsigned long dst, unsigned long src, unsigned long len, unsigned long o_dst, unsigned long o_src, unsigned long o_len)
-{
- /* gcc complains that a2 and a3 may be uninitialized, but actually
- * they cannot be. Initialize a2/a3 to shut gcc up.
- */
- register unsigned int a0, a1, a2 = 0, a3 = 0;
- int sh_1, sh_2;
- struct exception_data *d;
-
- /* prefetch_src((const void *)src); */
-
- /* Calculate how to shift a word read at the memory operation
- aligned srcp to make it aligned for copy. */
- sh_1 = 8 * (src % sizeof(unsigned int));
- sh_2 = 8 * sizeof(unsigned int) - sh_1;
-
- /* Make src aligned by rounding it down. */
- src &= -sizeof(unsigned int);
-
- switch (len % 4)
- {
- case 2:
- /* a1 = ((unsigned int *) src)[0];
- a2 = ((unsigned int *) src)[1]; */
- ldw(s_space, 0, src, a1, cda_ldw_exc);
- ldw(s_space, 4, src, a2, cda_ldw_exc);
- src -= 1 * sizeof(unsigned int);
- dst -= 3 * sizeof(unsigned int);
- len += 2;
- goto do1;
- case 3:
- /* a0 = ((unsigned int *) src)[0];
- a1 = ((unsigned int *) src)[1]; */
- ldw(s_space, 0, src, a0, cda_ldw_exc);
- ldw(s_space, 4, src, a1, cda_ldw_exc);
- src -= 0 * sizeof(unsigned int);
- dst -= 2 * sizeof(unsigned int);
- len += 1;
- goto do2;
- case 0:
- if (len == 0)
- return 0;
- /* a3 = ((unsigned int *) src)[0];
- a0 = ((unsigned int *) src)[1]; */
- ldw(s_space, 0, src, a3, cda_ldw_exc);
- ldw(s_space, 4, src, a0, cda_ldw_exc);
- src -=-1 * sizeof(unsigned int);
- dst -= 1 * sizeof(unsigned int);
- len += 0;
- goto do3;
- case 1:
- /* a2 = ((unsigned int *) src)[0];
- a3 = ((unsigned int *) src)[1]; */
- ldw(s_space, 0, src, a2, cda_ldw_exc);
- ldw(s_space, 4, src, a3, cda_ldw_exc);
- src -=-2 * sizeof(unsigned int);
- dst -= 0 * sizeof(unsigned int);
- len -= 1;
- if (len == 0)
- goto do0;
- goto do4; /* No-op. */
- }
-
- do
- {
- /* prefetch_src((const void *)(src + 4 * sizeof(unsigned int))); */
-do4:
- /* a0 = ((unsigned int *) src)[0]; */
- ldw(s_space, 0, src, a0, cda_ldw_exc);
- /* ((unsigned int *) dst)[0] = MERGE (a2, sh_1, a3, sh_2); */
- stw(d_space, MERGE (a2, sh_1, a3, sh_2), 0, dst, cda_stw_exc);
-do3:
- /* a1 = ((unsigned int *) src)[1]; */
- ldw(s_space, 4, src, a1, cda_ldw_exc);
- /* ((unsigned int *) dst)[1] = MERGE (a3, sh_1, a0, sh_2); */
- stw(d_space, MERGE (a3, sh_1, a0, sh_2), 4, dst, cda_stw_exc);
-do2:
- /* a2 = ((unsigned int *) src)[2]; */
- ldw(s_space, 8, src, a2, cda_ldw_exc);
- /* ((unsigned int *) dst)[2] = MERGE (a0, sh_1, a1, sh_2); */
- stw(d_space, MERGE (a0, sh_1, a1, sh_2), 8, dst, cda_stw_exc);
-do1:
- /* a3 = ((unsigned int *) src)[3]; */
- ldw(s_space, 12, src, a3, cda_ldw_exc);
- /* ((unsigned int *) dst)[3] = MERGE (a1, sh_1, a2, sh_2); */
- stw(d_space, MERGE (a1, sh_1, a2, sh_2), 12, dst, cda_stw_exc);
-
- src += 4 * sizeof(unsigned int);
- dst += 4 * sizeof(unsigned int);
- len -= 4;
- }
- while (len != 0);
-
-do0:
- /* ((unsigned int *) dst)[0] = MERGE (a2, sh_1, a3, sh_2); */
- stw(d_space, MERGE (a2, sh_1, a3, sh_2), 0, dst, cda_stw_exc);
-
- preserve_branch(handle_load_error);
- preserve_branch(handle_store_error);
-
- return 0;
-
-handle_load_error:
- __asm__ __volatile__ ("cda_ldw_exc:\n");
- d = &__get_cpu_var(exception_data);
- DPRINTF("cda_ldw_exc: o_len=%lu fault_addr=%lu o_src=%lu ret=%lu\n",
- o_len, d->fault_addr, o_src, o_len - d->fault_addr + o_src);
- return o_len * 4 - d->fault_addr + o_src;
-
-handle_store_error:
- __asm__ __volatile__ ("cda_stw_exc:\n");
- d = &__get_cpu_var(exception_data);
- DPRINTF("cda_stw_exc: o_len=%lu fault_addr=%lu o_dst=%lu ret=%lu\n",
- o_len, d->fault_addr, o_dst, o_len - d->fault_addr + o_dst);
- return o_len * 4 - d->fault_addr + o_dst;
-}
-
-
-/* Returns 0 for success, otherwise, returns number of bytes not transferred. */
-static unsigned long pa_memcpy(void *dstp, const void *srcp, unsigned long len)
-{
- register unsigned long src, dst, t1, t2, t3;
- register unsigned char *pcs, *pcd;
- register unsigned int *pws, *pwd;
- register double *pds, *pdd;
- unsigned long ret = 0;
- unsigned long o_dst, o_src, o_len;
- struct exception_data *d;
-
- src = (unsigned long)srcp;
- dst = (unsigned long)dstp;
- pcs = (unsigned char *)srcp;
- pcd = (unsigned char *)dstp;
-
- o_dst = dst; o_src = src; o_len = len;
-
- /* prefetch_src((const void *)srcp); */
-
- if (len < THRESHOLD)
- goto byte_copy;
-
- /* Check alignment */
- t1 = (src ^ dst);
- if (unlikely(t1 & (sizeof(double)-1)))
- goto unaligned_copy;
-
- /* src and dst have same alignment. */
-
- /* Copy bytes till we are double-aligned. */
- t2 = src & (sizeof(double) - 1);
- if (unlikely(t2 != 0)) {
- t2 = sizeof(double) - t2;
- while (t2 && len) {
- /* *pcd++ = *pcs++; */
- ldbma(s_space, pcs, t3, pmc_load_exc);
- len--;
- stbma(d_space, t3, pcd, pmc_store_exc);
- t2--;
- }
- }
-
- pds = (double *)pcs;
- pdd = (double *)pcd;
-
-#if 0
- /* Copy 8 doubles at a time */
- while (len >= 8*sizeof(double)) {
- register double r1, r2, r3, r4, r5, r6, r7, r8;
- /* prefetch_src((char *)pds + L1_CACHE_BYTES); */
- flddma(s_space, pds, r1, pmc_load_exc);
- flddma(s_space, pds, r2, pmc_load_exc);
- flddma(s_space, pds, r3, pmc_load_exc);
- flddma(s_space, pds, r4, pmc_load_exc);
- fstdma(d_space, r1, pdd, pmc_store_exc);
- fstdma(d_space, r2, pdd, pmc_store_exc);
- fstdma(d_space, r3, pdd, pmc_store_exc);
- fstdma(d_space, r4, pdd, pmc_store_exc);
-
-#if 0
- if (L1_CACHE_BYTES <= 32)
- prefetch_src((char *)pds + L1_CACHE_BYTES);
-#endif
- flddma(s_space, pds, r5, pmc_load_exc);
- flddma(s_space, pds, r6, pmc_load_exc);
- flddma(s_space, pds, r7, pmc_load_exc);
- flddma(s_space, pds, r8, pmc_load_exc);
- fstdma(d_space, r5, pdd, pmc_store_exc);
- fstdma(d_space, r6, pdd, pmc_store_exc);
- fstdma(d_space, r7, pdd, pmc_store_exc);
- fstdma(d_space, r8, pdd, pmc_store_exc);
- len -= 8*sizeof(double);
- }
-#endif
-
- pws = (unsigned int *)pds;
- pwd = (unsigned int *)pdd;
-
-word_copy:
- while (len >= 8*sizeof(unsigned int)) {
- register unsigned int r1,r2,r3,r4,r5,r6,r7,r8;
- /* prefetch_src((char *)pws + L1_CACHE_BYTES); */
- ldwma(s_space, pws, r1, pmc_load_exc);
- ldwma(s_space, pws, r2, pmc_load_exc);
- ldwma(s_space, pws, r3, pmc_load_exc);
- ldwma(s_space, pws, r4, pmc_load_exc);
- stwma(d_space, r1, pwd, pmc_store_exc);
- stwma(d_space, r2, pwd, pmc_store_exc);
- stwma(d_space, r3, pwd, pmc_store_exc);
- stwma(d_space, r4, pwd, pmc_store_exc);
-
- ldwma(s_space, pws, r5, pmc_load_exc);
- ldwma(s_space, pws, r6, pmc_load_exc);
- ldwma(s_space, pws, r7, pmc_load_exc);
- ldwma(s_space, pws, r8, pmc_load_exc);
- stwma(d_space, r5, pwd, pmc_store_exc);
- stwma(d_space, r6, pwd, pmc_store_exc);
- stwma(d_space, r7, pwd, pmc_store_exc);
- stwma(d_space, r8, pwd, pmc_store_exc);
- len -= 8*sizeof(unsigned int);
- }
-
- while (len >= 4*sizeof(unsigned int)) {
- register unsigned int r1,r2,r3,r4;
- ldwma(s_space, pws, r1, pmc_load_exc);
- ldwma(s_space, pws, r2, pmc_load_exc);
- ldwma(s_space, pws, r3, pmc_load_exc);
- ldwma(s_space, pws, r4, pmc_load_exc);
- stwma(d_space, r1, pwd, pmc_store_exc);
- stwma(d_space, r2, pwd, pmc_store_exc);
- stwma(d_space, r3, pwd, pmc_store_exc);
- stwma(d_space, r4, pwd, pmc_store_exc);
- len -= 4*sizeof(unsigned int);
- }
-
- pcs = (unsigned char *)pws;
- pcd = (unsigned char *)pwd;
-
-byte_copy:
- while (len) {
- /* *pcd++ = *pcs++; */
- ldbma(s_space, pcs, t3, pmc_load_exc);
- stbma(d_space, t3, pcd, pmc_store_exc);
- len--;
- }
-
- return 0;
-
-unaligned_copy:
- /* possibly we are aligned on a word, but not on a double... */
- if (likely((t1 & (sizeof(unsigned int)-1)) == 0)) {
- t2 = src & (sizeof(unsigned int) - 1);
-
- if (unlikely(t2 != 0)) {
- t2 = sizeof(unsigned int) - t2;
- while (t2) {
- /* *pcd++ = *pcs++; */
- ldbma(s_space, pcs, t3, pmc_load_exc);
- stbma(d_space, t3, pcd, pmc_store_exc);
- len--;
- t2--;
- }
- }
-
- pws = (unsigned int *)pcs;
- pwd = (unsigned int *)pcd;
- goto word_copy;
- }
-
- /* Align the destination. */
- if (unlikely((dst & (sizeof(unsigned int) - 1)) != 0)) {
- t2 = sizeof(unsigned int) - (dst & (sizeof(unsigned int) - 1));
- while (t2) {
- /* *pcd++ = *pcs++; */
- ldbma(s_space, pcs, t3, pmc_load_exc);
- stbma(d_space, t3, pcd, pmc_store_exc);
- len--;
- t2--;
- }
- dst = (unsigned long)pcd;
- src = (unsigned long)pcs;
- }
-
- ret = copy_dstaligned(dst, src, len / sizeof(unsigned int),
- o_dst, o_src, o_len);
- if (ret)
- return ret;
-
- pcs += (len & -sizeof(unsigned int));
- pcd += (len & -sizeof(unsigned int));
- len %= sizeof(unsigned int);
-
- preserve_branch(handle_load_error);
- preserve_branch(handle_store_error);
-
- goto byte_copy;
-
-handle_load_error:
- __asm__ __volatile__ ("pmc_load_exc:\n");
- d = &__get_cpu_var(exception_data);
- DPRINTF("pmc_load_exc: o_len=%lu fault_addr=%lu o_src=%lu ret=%lu\n",
- o_len, d->fault_addr, o_src, o_len - d->fault_addr + o_src);
- return o_len - d->fault_addr + o_src;
-
-handle_store_error:
- __asm__ __volatile__ ("pmc_store_exc:\n");
- d = &__get_cpu_var(exception_data);
- DPRINTF("pmc_store_exc: o_len=%lu fault_addr=%lu o_dst=%lu ret=%lu\n",
- o_len, d->fault_addr, o_dst, o_len - d->fault_addr + o_dst);
- return o_len - d->fault_addr + o_dst;
-}
-
-#ifdef __KERNEL__
-unsigned long copy_to_user(void __user *dst, const void *src, unsigned long len)
-{
- mtsp(get_kernel_space(), 1);
- mtsp(get_user_space(), 2);
- return pa_memcpy((void __force *)dst, src, len);
-}
-
-EXPORT_SYMBOL(__copy_from_user);
-unsigned long __copy_from_user(void *dst, const void __user *src, unsigned long len)
-{
- mtsp(get_user_space(), 1);
- mtsp(get_kernel_space(), 2);
- return pa_memcpy(dst, (void __force *)src, len);
-}
-
-unsigned long copy_in_user(void __user *dst, const void __user *src, unsigned long len)
-{
- mtsp(get_user_space(), 1);
- mtsp(get_user_space(), 2);
- return pa_memcpy((void __force *)dst, (void __force *)src, len);
-}
-
-
-void * memcpy(void * dst,const void *src, size_t count)
-{
- mtsp(get_kernel_space(), 1);
- mtsp(get_kernel_space(), 2);
- pa_memcpy(dst, src, count);
- return dst;
-}
-
-EXPORT_SYMBOL(copy_to_user);
-EXPORT_SYMBOL(copy_from_user);
-EXPORT_SYMBOL(copy_in_user);
-EXPORT_SYMBOL(memcpy);
-#endif