summaryrefslogtreecommitdiff
path: root/arch/c6x/lib/csum_64plus.S
diff options
context:
space:
mode:
Diffstat (limited to 'arch/c6x/lib/csum_64plus.S')
-rw-r--r--arch/c6x/lib/csum_64plus.S419
1 files changed, 419 insertions, 0 deletions
diff --git a/arch/c6x/lib/csum_64plus.S b/arch/c6x/lib/csum_64plus.S
new file mode 100644
index 00000000..6d258964
--- /dev/null
+++ b/arch/c6x/lib/csum_64plus.S
@@ -0,0 +1,419 @@
+;
+; linux/arch/c6x/lib/csum_64plus.s
+;
+; Port on Texas Instruments TMS320C6x architecture
+;
+; Copyright (C) 2006, 2009, 2010, 2011 Texas Instruments Incorporated
+; Author: Aurelien Jacquiot (aurelien.jacquiot@jaluna.com)
+;
+; This program is free software; you can redistribute it and/or modify
+; it under the terms of the GNU General Public License version 2 as
+; published by the Free Software Foundation.
+;
+#include <linux/linkage.h>
+
+;
+;unsigned int csum_partial_copy(const char *src, char * dst,
+; int len, int sum)
+;
+; A4: src
+; B4: dst
+; A6: len
+; B6: sum
+; return csum in A4
+;
+
+ .text
+ENTRY(csum_partial_copy)
+ MVC .S2 ILC,B30
+
+ MV .D1X B6,A31 ; given csum
+ ZERO .D1 A9 ; csum (a side)
+|| ZERO .D2 B9 ; csum (b side)
+|| SHRU .S2X A6,2,B5 ; len / 4
+
+ ;; Check alignment and size
+ AND .S1 3,A4,A1
+|| AND .S2 3,B4,B0
+ OR .L2X B0,A1,B0 ; non aligned condition
+|| MVC .S2 B5,ILC
+|| MVK .D2 1,B2
+|| MV .D1X B5,A1 ; words condition
+ [!A1] B .S1 L8
+ [B0] BNOP .S1 L6,5
+
+ SPLOOP 1
+
+ ;; Main loop for aligned words
+ LDW .D1T1 *A4++,A7
+ NOP 4
+ MV .S2X A7,B7
+|| EXTU .S1 A7,0,16,A16
+ STW .D2T2 B7,*B4++
+|| MPYU .M2 B7,B2,B8
+|| ADD .L1 A16,A9,A9
+ NOP
+ SPKERNEL 8,0
+|| ADD .L2 B8,B9,B9
+
+ ZERO .D1 A1
+|| ADD .L1X A9,B9,A9 ; add csum from a and b sides
+
+L6:
+ [!A1] BNOP .S1 L8,5
+
+ ;; Main loop for non-aligned words
+ SPLOOP 2
+ || MVK .L1 1,A2
+
+ LDNW .D1T1 *A4++,A7
+ NOP 3
+
+ NOP
+ MV .S2X A7,B7
+ || EXTU .S1 A7,0,16,A16
+ || MPYU .M1 A7,A2,A8
+
+ ADD .L1 A16,A9,A9
+ SPKERNEL 6,0
+ || STNW .D2T2 B7,*B4++
+ || ADD .L1 A8,A9,A9
+
+L8: AND .S2X 2,A6,B5
+ CMPGT .L2 B5,0,B0
+ [!B0] BNOP .S1 L82,4
+
+ ;; Manage half-word
+ ZERO .L1 A7
+|| ZERO .D1 A8
+
+#ifdef CONFIG_CPU_BIG_ENDIAN
+
+ LDBU .D1T1 *A4++,A7
+ LDBU .D1T1 *A4++,A8
+ NOP 3
+ SHL .S1 A7,8,A0
+ ADD .S1 A8,A9,A9
+ STB .D2T1 A7,*B4++
+|| ADD .S1 A0,A9,A9
+ STB .D2T1 A8,*B4++
+
+#else
+
+ LDBU .D1T1 *A4++,A7
+ LDBU .D1T1 *A4++,A8
+ NOP 3
+ ADD .S1 A7,A9,A9
+ SHL .S1 A8,8,A0
+
+ STB .D2T1 A7,*B4++
+|| ADD .S1 A0,A9,A9
+ STB .D2T1 A8,*B4++
+
+#endif
+
+ ;; Manage eventually the last byte
+L82: AND .S2X 1,A6,B0
+ [!B0] BNOP .S1 L9,5
+
+|| ZERO .L1 A7
+
+L83: LDBU .D1T1 *A4++,A7
+ NOP 4
+
+ MV .L2X A7,B7
+
+#ifdef CONFIG_CPU_BIG_ENDIAN
+
+ STB .D2T2 B7,*B4++
+|| SHL .S1 A7,8,A7
+ ADD .S1 A7,A9,A9
+
+#else
+
+ STB .D2T2 B7,*B4++
+|| ADD .S1 A7,A9,A9
+
+#endif
+
+ ;; Fold the csum
+L9: SHRU .S2X A9,16,B0
+ [!B0] BNOP .S1 L10,5
+
+L91: SHRU .S2X A9,16,B4
+|| EXTU .S1 A9,16,16,A3
+ ADD .D1X A3,B4,A9
+
+ SHRU .S1 A9,16,A0
+ [A0] BNOP .S1 L91,5
+
+L10: ADD .D1 A31,A9,A9
+ MV .D1 A9,A4
+
+ BNOP .S2 B3,4
+ MVC .S2 B30,ILC
+ENDPROC(csum_partial_copy)
+
+;
+;unsigned short
+;ip_fast_csum(unsigned char *iph, unsigned int ihl)
+;{
+; unsigned int checksum = 0;
+; unsigned short *tosum = (unsigned short *) iph;
+; int len;
+;
+; len = ihl*4;
+;
+; if (len <= 0)
+; return 0;
+;
+; while(len) {
+; len -= 2;
+; checksum += *tosum++;
+; }
+; if (len & 1)
+; checksum += *(unsigned char*) tosum;
+;
+; while(checksum >> 16)
+; checksum = (checksum & 0xffff) + (checksum >> 16);
+;
+; return ~checksum;
+;}
+;
+; A4: iph
+; B4: ihl
+; return checksum in A4
+;
+ .text
+
+ENTRY(ip_fast_csum)
+ ZERO .D1 A5
+ || MVC .S2 ILC,B30
+ SHL .S2 B4,2,B0
+ CMPGT .L2 B0,0,B1
+ [!B1] BNOP .S1 L15,4
+ [!B1] ZERO .D1 A3
+
+ [!B0] B .S1 L12
+ SHRU .S2 B0,1,B0
+ MVC .S2 B0,ILC
+ NOP 3
+
+ SPLOOP 1
+ LDHU .D1T1 *A4++,A3
+ NOP 3
+ NOP
+ SPKERNEL 5,0
+ || ADD .L1 A3,A5,A5
+
+L12: SHRU .S1 A5,16,A0
+ [!A0] BNOP .S1 L14,5
+
+L13: SHRU .S2X A5,16,B4
+ EXTU .S1 A5,16,16,A3
+ ADD .D1X A3,B4,A5
+ SHRU .S1 A5,16,A0
+ [A0] BNOP .S1 L13,5
+
+L14: NOT .D1 A5,A3
+ EXTU .S1 A3,16,16,A3
+
+L15: BNOP .S2 B3,3
+ MVC .S2 B30,ILC
+ MV .D1 A3,A4
+ENDPROC(ip_fast_csum)
+
+;
+;unsigned short
+;do_csum(unsigned char *buff, unsigned int len)
+;{
+; int odd, count;
+; unsigned int result = 0;
+;
+; if (len <= 0)
+; goto out;
+; odd = 1 & (unsigned long) buff;
+; if (odd) {
+;#ifdef __LITTLE_ENDIAN
+; result += (*buff << 8);
+;#else
+; result = *buff;
+;#endif
+; len--;
+; buff++;
+; }
+; count = len >> 1; /* nr of 16-bit words.. */
+; if (count) {
+; if (2 & (unsigned long) buff) {
+; result += *(unsigned short *) buff;
+; count--;
+; len -= 2;
+; buff += 2;
+; }
+; count >>= 1; /* nr of 32-bit words.. */
+; if (count) {
+; unsigned int carry = 0;
+; do {
+; unsigned int w = *(unsigned int *) buff;
+; count--;
+; buff += 4;
+; result += carry;
+; result += w;
+; carry = (w > result);
+; } while (count);
+; result += carry;
+; result = (result & 0xffff) + (result >> 16);
+; }
+; if (len & 2) {
+; result += *(unsigned short *) buff;
+; buff += 2;
+; }
+; }
+; if (len & 1)
+;#ifdef __LITTLE_ENDIAN
+; result += *buff;
+;#else
+; result += (*buff << 8);
+;#endif
+; result = (result & 0xffff) + (result >> 16);
+; /* add up carry.. */
+; result = (result & 0xffff) + (result >> 16);
+; if (odd)
+; result = ((result >> 8) & 0xff) | ((result & 0xff) << 8);
+;out:
+; return result;
+;}
+;
+; A4: buff
+; B4: len
+; return checksum in A4
+;
+
+ENTRY(do_csum)
+ CMPGT .L2 B4,0,B0
+ [!B0] BNOP .S1 L26,3
+ EXTU .S1 A4,31,31,A0
+
+ MV .L1 A0,A3
+|| MV .S1X B3,A5
+|| MV .L2 B4,B3
+|| ZERO .D1 A1
+
+#ifdef CONFIG_CPU_BIG_ENDIAN
+ [A0] SUB .L2 B3,1,B3
+|| [A0] LDBU .D1T1 *A4++,A1
+#else
+ [!A0] BNOP .S1 L21,5
+|| [A0] LDBU .D1T1 *A4++,A0
+ SUB .L2 B3,1,B3
+|| SHL .S1 A0,8,A1
+L21:
+#endif
+ SHR .S2 B3,1,B0
+ [!B0] BNOP .S1 L24,3
+ MVK .L1 2,A0
+ AND .L1 A4,A0,A0
+
+ [!A0] BNOP .S1 L22,5
+|| [A0] LDHU .D1T1 *A4++,A0
+ SUB .L2 B0,1,B0
+|| SUB .S2 B3,2,B3
+|| ADD .L1 A0,A1,A1
+L22:
+ SHR .S2 B0,1,B0
+|| ZERO .L1 A0
+
+ [!B0] BNOP .S1 L23,5
+|| [B0] MVC .S2 B0,ILC
+
+ SPLOOP 3
+ SPMASK L1
+|| MV .L1 A1,A2
+|| LDW .D1T1 *A4++,A1
+
+ NOP 4
+ ADD .L1 A0,A1,A0
+ ADD .L1 A2,A0,A2
+
+ SPKERNEL 1,2
+|| CMPGTU .L1 A1,A2,A0
+
+ ADD .L1 A0,A2,A6
+ EXTU .S1 A6,16,16,A7
+ SHRU .S2X A6,16,B0
+ NOP 1
+ ADD .L1X A7,B0,A1
+L23:
+ MVK .L2 2,B0
+ AND .L2 B3,B0,B0
+ [B0] LDHU .D1T1 *A4++,A0
+ NOP 4
+ [B0] ADD .L1 A0,A1,A1
+L24:
+ EXTU .S2 B3,31,31,B0
+#ifdef CONFIG_CPU_BIG_ENDIAN
+ [!B0] BNOP .S1 L25,4
+|| [B0] LDBU .D1T1 *A4,A0
+ SHL .S1 A0,8,A0
+ ADD .L1 A0,A1,A1
+L25:
+#else
+ [B0] LDBU .D1T1 *A4,A0
+ NOP 4
+ [B0] ADD .L1 A0,A1,A1
+#endif
+ EXTU .S1 A1,16,16,A0
+ SHRU .S2X A1,16,B0
+ NOP 1
+ ADD .L1X A0,B0,A0
+ SHRU .S1 A0,16,A1
+ ADD .L1 A0,A1,A0
+ EXTU .S1 A0,16,16,A1
+ EXTU .S1 A1,16,24,A2
+
+ EXTU .S1 A1,24,16,A0
+|| MV .L2X A3,B0
+
+ [B0] OR .L1 A0,A2,A1
+L26:
+ NOP 1
+ BNOP .S2X A5,4
+ MV .L1 A1,A4
+ENDPROC(do_csum)
+
+;__wsum csum_partial(const void *buff, int len, __wsum wsum)
+;{
+; unsigned int sum = (__force unsigned int)wsum;
+; unsigned int result = do_csum(buff, len);
+;
+; /* add in old sum, and carry.. */
+; result += sum;
+; if (sum > result)
+; result += 1;
+; return (__force __wsum)result;
+;}
+;
+ENTRY(csum_partial)
+ MV .L1X B3,A9
+|| CALLP .S2 do_csum,B3
+|| MV .S1 A6,A8
+ BNOP .S2X A9,2
+ ADD .L1 A8,A4,A1
+ CMPGTU .L1 A8,A1,A0
+ ADD .L1 A1,A0,A4
+ENDPROC(csum_partial)
+
+;unsigned short
+;ip_compute_csum(unsigned char *buff, unsigned int len)
+;
+; A4: buff
+; B4: len
+; return checksum in A4
+
+ENTRY(ip_compute_csum)
+ MV .L1X B3,A9
+|| CALLP .S2 do_csum,B3
+ BNOP .S2X A9,3
+ NOT .S1 A4,A4
+ CLR .S1 A4,16,31,A4
+ENDPROC(ip_compute_csum)