27 files changed, 0 insertions, 15955 deletions
diff --git a/ANDROID_3.4.5/arch/x86/crypto/Makefile b/ANDROID_3.4.5/arch/x86/crypto/Makefile
deleted file mode 100644
index e191ac04..00000000
--- a/ANDROID_3.4.5/arch/x86/crypto/Makefile
+++ /dev/null
@@ -1,45 +0,0 @@
-#
-# Arch-specific CryptoAPI modules.
-#
-
-obj-$(CONFIG_CRYPTO_AES_586) += aes-i586.o
-obj-$(CONFIG_CRYPTO_TWOFISH_586) += twofish-i586.o
-obj-$(CONFIG_CRYPTO_SALSA20_586) += salsa20-i586.o
-obj-$(CONFIG_CRYPTO_SERPENT_SSE2_586) += serpent-sse2-i586.o
-
-obj-$(CONFIG_CRYPTO_AES_X86_64) += aes-x86_64.o
-obj-$(CONFIG_CRYPTO_CAMELLIA_X86_64) += camellia-x86_64.o
-obj-$(CONFIG_CRYPTO_BLOWFISH_X86_64) += blowfish-x86_64.o
-obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o
-obj-$(CONFIG_CRYPTO_TWOFISH_X86_64_3WAY) += twofish-x86_64-3way.o
-obj-$(CONFIG_CRYPTO_SALSA20_X86_64) += salsa20-x86_64.o
-obj-$(CONFIG_CRYPTO_SERPENT_SSE2_X86_64) += serpent-sse2-x86_64.o
-obj-$(CONFIG_CRYPTO_AES_NI_INTEL) += aesni-intel.o
-obj-$(CONFIG_CRYPTO_GHASH_CLMUL_NI_INTEL) += ghash-clmulni-intel.o
-
-obj-$(CONFIG_CRYPTO_CRC32C_INTEL) += crc32c-intel.o
-obj-$(CONFIG_CRYPTO_SHA1_SSSE3) += sha1-ssse3.o
-
-aes-i586-y := aes-i586-asm_32.o aes_glue.o
-twofish-i586-y := twofish-i586-asm_32.o twofish_glue.o
-salsa20-i586-y := salsa20-i586-asm_32.o salsa20_glue.o
-serpent-sse2-i586-y := serpent-sse2-i586-asm_32.o serpent_sse2_glue.o
-
-aes-x86_64-y := aes-x86_64-asm_64.o aes_glue.o
-camellia-x86_64-y := camellia-x86_64-asm_64.o camellia_glue.o
-blowfish-x86_64-y := blowfish-x86_64-asm_64.o blowfish_glue.o
-twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_glue.o
-twofish-x86_64-3way-y := twofish-x86_64-asm_64-3way.o twofish_glue_3way.o
-salsa20-x86_64-y := salsa20-x86_64-asm_64.o salsa20_glue.o
-serpent-sse2-x86_64-y := serpent-sse2-x86_64-asm_64.o serpent_sse2_glue.o
-
-aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o fpu.o
-
-ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o
-
-# enable AVX support only when $(AS) can actually assemble the instructions
-ifeq ($(call as-instr,vpxor %xmm0$(comma)%xmm1$(comma)%xmm2,yes,no),yes)
-AFLAGS_sha1_ssse3_asm.o += -DSHA1_ENABLE_AVX_SUPPORT
-CFLAGS_sha1_ssse3_glue.o += -DSHA1_ENABLE_AVX_SUPPORT
-endif
-sha1-ssse3-y := sha1_ssse3_asm.o sha1_ssse3_glue.o
diff --git a/ANDROID_3.4.5/arch/x86/crypto/aes-i586-asm_32.S b/ANDROID_3.4.5/arch/x86/crypto/aes-i586-asm_32.S
deleted file mode 100644
index b949ec2f..00000000
--- a/ANDROID_3.4.5/arch/x86/crypto/aes-i586-asm_32.S
+++ /dev/null
@@ -1,367 +0,0 @@
-// -------------------------------------------------------------------------
-// Copyright (c) 2001, Dr Brian Gladman <                 >, Worcester, UK.
-// All rights reserved.
-//
-// LICENSE TERMS
-//
-// The free distribution and use of this software in both source and binary 
-// form is allowed (with or without changes) provided that:
-//
-//   1. distributions of this source code include the above copyright 
-//      notice, this list of conditions and the following disclaimer//
-//
-//   2. distributions in binary form include the above copyright
-//      notice, this list of conditions and the following disclaimer
-//      in the documentation and/or other associated materials//
-//
-//   3. the copyright holder's name is not used to endorse products 
-//      built using this software without specific written permission.
-//
-//
-// ALTERNATIVELY, provided that this notice is retained in full, this product
-// may be distributed under the terms of the GNU General Public License (GPL),
-// in which case the provisions of the GPL apply INSTEAD OF those given above.
-//
-// Copyright (c) 2004 Linus Torvalds <torvalds@osdl.org>
-// Copyright (c) 2004 Red Hat, Inc., James Morris <jmorris@redhat.com>
-
-// DISCLAIMER
-//
-// This software is provided 'as is' with no explicit or implied warranties
-// in respect of its properties including, but not limited to, correctness 
-// and fitness for purpose.
-// -------------------------------------------------------------------------
-// Issue Date: 29/07/2002
-
-.file "aes-i586-asm.S"
-.text
-
-#include <asm/asm-offsets.h>
-
-#define tlen 1024   // length of each of 4 'xor' arrays (256 32-bit words)
-
-/* offsets to parameters with one register pushed onto stack */
-#define ctx 8
-#define out_blk 12
-#define in_blk 16
-
-/* offsets in crypto_aes_ctx structure */
-#define klen (480)
-#define ekey (0)
-#define dkey (240)
-
-// register mapping for encrypt and decrypt subroutines
-
-#define r0  eax
-#define r1  ebx
-#define r2  ecx
-#define r3  edx
-#define r4  esi
-#define r5  edi
-
-#define eaxl  al
-#define eaxh  ah
-#define ebxl  bl
-#define ebxh  bh
-#define ecxl  cl
-#define ecxh  ch
-#define edxl  dl
-#define edxh  dh
-
-#define _h(reg) reg##h
-#define h(reg) _h(reg)
-
-#define _l(reg) reg##l
-#define l(reg) _l(reg)
-
-// This macro takes a 32-bit word representing a column and uses
-// each of its four bytes to index into four tables of 256 32-bit
-// words to obtain values that are then xored into the appropriate
-// output registers r0, r1, r4 or r5.  
-
-// Parameters:
-// table table base address
-//   %1  out_state[0]
-//   %2  out_state[1]
-//   %3  out_state[2]
-//   %4  out_state[3]
-//   idx input register for the round (destroyed)
-//   tmp scratch register for the round
-// sched key schedule
-
-#define do_col(table, a1,a2,a3,a4, idx, tmp)	\
-	movzx   %l(idx),%tmp;			\
-	xor     table(,%tmp,4),%a1;		\
-	movzx   %h(idx),%tmp;			\
-	shr     $16,%idx;			\
-	xor     table+tlen(,%tmp,4),%a2;	\
-	movzx   %l(idx),%tmp;			\
-	movzx   %h(idx),%idx;			\
-	xor     table+2*tlen(,%tmp,4),%a3;	\
-	xor     table+3*tlen(,%idx,4),%a4;
-
-// initialise output registers from the key schedule
-// NB1: original value of a3 is in idx on exit
-// NB2: original values of a1,a2,a4 aren't used
-#define do_fcol(table, a1,a2,a3,a4, idx, tmp, sched) \
-	mov     0 sched,%a1;			\
-	movzx   %l(idx),%tmp;			\
-	mov     12 sched,%a2;			\
-	xor     table(,%tmp,4),%a1;		\
-	mov     4 sched,%a4;			\
-	movzx   %h(idx),%tmp;			\
-	shr     $16,%idx;			\
-	xor     table+tlen(,%tmp,4),%a2;	\
-	movzx   %l(idx),%tmp;			\
-	movzx   %h(idx),%idx;			\
-	xor     table+3*tlen(,%idx,4),%a4;	\
-	mov     %a3,%idx;			\
-	mov     8 sched,%a3;			\
-	xor     table+2*tlen(,%tmp,4),%a3;
-
-// initialise output registers from the key schedule
-// NB1: original value of a3 is in idx on exit
-// NB2: original values of a1,a2,a4 aren't used
-#define do_icol(table, a1,a2,a3,a4, idx, tmp, sched) \
-	mov     0 sched,%a1;			\
-	movzx   %l(idx),%tmp;			\
-	mov     4 sched,%a2;			\
-	xor     table(,%tmp,4),%a1;		\
-	mov     12 sched,%a4;			\
-	movzx   %h(idx),%tmp;			\
-	shr     $16,%idx;			\
-	xor     table+tlen(,%tmp,4),%a2;	\
-	movzx   %l(idx),%tmp;			\
-	movzx   %h(idx),%idx;			\
-	xor     table+3*tlen(,%idx,4),%a4;	\
-	mov     %a3,%idx;			\
-	mov     8 sched,%a3;			\
-	xor     table+2*tlen(,%tmp,4),%a3;
-
-
-// original Gladman had conditional saves to MMX regs.
-#define save(a1, a2)		\
-	mov     %a2,4*a1(%esp)
-
-#define restore(a1, a2)		\
-	mov     4*a2(%esp),%a1
-
-// These macros perform a forward encryption cycle. They are entered with
-// the first previous round column values in r0,r1,r4,r5 and
-// exit with the final values in the same registers, using stack
-// for temporary storage.
-
-// round column values
-// on entry: r0,r1,r4,r5
-// on exit:  r2,r1,r4,r5
-#define fwd_rnd1(arg, table)						\
-	save   (0,r1);							\
-	save   (1,r5);							\
-									\
-	/* compute new column values */					\
-	do_fcol(table, r2,r5,r4,r1, r0,r3, arg);	/* idx=r0 */	\
-	do_col (table, r4,r1,r2,r5, r0,r3);		/* idx=r4 */	\
-	restore(r0,0);							\
-	do_col (table, r1,r2,r5,r4, r0,r3);		/* idx=r1 */	\
-	restore(r0,1);							\
-	do_col (table, r5,r4,r1,r2, r0,r3);		/* idx=r5 */
-
-// round column values
-// on entry: r2,r1,r4,r5
-// on exit:  r0,r1,r4,r5
-#define fwd_rnd2(arg, table)						\
-	save   (0,r1);							\
-	save   (1,r5);							\
-									\
-	/* compute new column values */					\
-	do_fcol(table, r0,r5,r4,r1, r2,r3, arg);	/* idx=r2 */	\
-	do_col (table, r4,r1,r0,r5, r2,r3);		/* idx=r4 */	\
-	restore(r2,0);							\
-	do_col (table, r1,r0,r5,r4, r2,r3);		/* idx=r1 */	\
-	restore(r2,1);							\
-	do_col (table, r5,r4,r1,r0, r2,r3);		/* idx=r5 */
-
-// These macros performs an inverse encryption cycle. They are entered with
-// the first previous round column values in r0,r1,r4,r5 and
-// exit with the final values in the same registers, using stack
-// for temporary storage
-
-// round column values
-// on entry: r0,r1,r4,r5
-// on exit:  r2,r1,r4,r5
-#define inv_rnd1(arg, table)						\
-	save    (0,r1);							\
-	save    (1,r5);							\
-									\
-	/* compute new column values */					\
-	do_icol(table, r2,r1,r4,r5, r0,r3, arg);	/* idx=r0 */	\
-	do_col (table, r4,r5,r2,r1, r0,r3);		/* idx=r4 */	\
-	restore(r0,0);							\
-	do_col (table, r1,r4,r5,r2, r0,r3);		/* idx=r1 */	\
-	restore(r0,1);							\
-	do_col (table, r5,r2,r1,r4, r0,r3);		/* idx=r5 */
-
-// round column values
-// on entry: r2,r1,r4,r5
-// on exit:  r0,r1,r4,r5
-#define inv_rnd2(arg, table)						\
-	save    (0,r1);							\
-	save    (1,r5);							\
-									\
-	/* compute new column values */					\
-	do_icol(table, r0,r1,r4,r5, r2,r3, arg);	/* idx=r2 */	\
-	do_col (table, r4,r5,r0,r1, r2,r3);		/* idx=r4 */	\
-	restore(r2,0);							\
-	do_col (table, r1,r4,r5,r0, r2,r3);		/* idx=r1 */	\
-	restore(r2,1);							\
-	do_col (table, r5,r0,r1,r4, r2,r3);		/* idx=r5 */
-
-// AES (Rijndael) Encryption Subroutine
-/* void aes_enc_blk(struct crypto_aes_ctx *ctx, u8 *out_blk, const u8 *in_blk) */
-
-.global  aes_enc_blk
-
-.extern  crypto_ft_tab
-.extern  crypto_fl_tab
-
-.align 4
-
-aes_enc_blk:
-	push    %ebp
-	mov     ctx(%esp),%ebp
-
-// CAUTION: the order and the values used in these assigns 
-// rely on the register mappings
-
-1:	push    %ebx
-	mov     in_blk+4(%esp),%r2
-	push    %esi
-	mov     klen(%ebp),%r3   // key size
-	push    %edi
-#if ekey != 0
-	lea     ekey(%ebp),%ebp  // key pointer
-#endif
-
-// input four columns and xor in first round key
-
-	mov     (%r2),%r0
-	mov     4(%r2),%r1
-	mov     8(%r2),%r4
-	mov     12(%r2),%r5
-	xor     (%ebp),%r0
-	xor     4(%ebp),%r1
-	xor     8(%ebp),%r4
-	xor     12(%ebp),%r5
-
-	sub     $8,%esp		// space for register saves on stack
-	add     $16,%ebp	// increment to next round key
-	cmp     $24,%r3
-	jb      4f		// 10 rounds for 128-bit key
-	lea     32(%ebp),%ebp
-	je      3f		// 12 rounds for 192-bit key
-	lea     32(%ebp),%ebp
-
-2:	fwd_rnd1( -64(%ebp), crypto_ft_tab)	// 14 rounds for 256-bit key
-	fwd_rnd2( -48(%ebp), crypto_ft_tab)
-3:	fwd_rnd1( -32(%ebp), crypto_ft_tab)	// 12 rounds for 192-bit key
-	fwd_rnd2( -16(%ebp), crypto_ft_tab)
-4:	fwd_rnd1(    (%ebp), crypto_ft_tab)	// 10 rounds for 128-bit key
-	fwd_rnd2( +16(%ebp), crypto_ft_tab)
-	fwd_rnd1( +32(%ebp), crypto_ft_tab)
-	fwd_rnd2( +48(%ebp), crypto_ft_tab)
-	fwd_rnd1( +64(%ebp), crypto_ft_tab)
-	fwd_rnd2( +80(%ebp), crypto_ft_tab)
-	fwd_rnd1( +96(%ebp), crypto_ft_tab)
-	fwd_rnd2(+112(%ebp), crypto_ft_tab)
-	fwd_rnd1(+128(%ebp), crypto_ft_tab)
-	fwd_rnd2(+144(%ebp), crypto_fl_tab)	// last round uses a different table
-
-// move final values to the output array.  CAUTION: the 
-// order of these assigns rely on the register mappings
-
-	add     $8,%esp
-	mov     out_blk+12(%esp),%ebp
-	mov     %r5,12(%ebp)
-	pop     %edi
-	mov     %r4,8(%ebp)
-	pop     %esi
-	mov     %r1,4(%ebp)
-	pop     %ebx
-	mov     %r0,(%ebp)
-	pop     %ebp
-	ret
-
-// AES (Rijndael) Decryption Subroutine
-/* void aes_dec_blk(struct crypto_aes_ctx *ctx, u8 *out_blk, const u8 *in_blk) */
-
-.global  aes_dec_blk
-
-.extern  crypto_it_tab
-.extern  crypto_il_tab
-
-.align 4
-
-aes_dec_blk:
-	push    %ebp
-	mov     ctx(%esp),%ebp
-
-// CAUTION: the order and the values used in these assigns 
-// rely on the register mappings
-
-1:	push    %ebx
-	mov     in_blk+4(%esp),%r2
-	push    %esi
-	mov     klen(%ebp),%r3   // key size
-	push    %edi
-#if dkey != 0
-	lea     dkey(%ebp),%ebp  // key pointer
-#endif
-	
-// input four columns and xor in first round key
-
-	mov     (%r2),%r0
-	mov     4(%r2),%r1
-	mov     8(%r2),%r4
-	mov     12(%r2),%r5
-	xor     (%ebp),%r0
-	xor     4(%ebp),%r1
-	xor     8(%ebp),%r4
-	xor     12(%ebp),%r5
-
-	sub     $8,%esp		// space for register saves on stack
-	add     $16,%ebp	// increment to next round key
-	cmp     $24,%r3
-	jb      4f		// 10 rounds for 128-bit key
-	lea     32(%ebp),%ebp
-	je      3f		// 12 rounds for 192-bit key
-	lea     32(%ebp),%ebp
-
-2:	inv_rnd1( -64(%ebp), crypto_it_tab)	// 14 rounds for 256-bit key
-	inv_rnd2( -48(%ebp), crypto_it_tab)
-3:	inv_rnd1( -32(%ebp), crypto_it_tab)	// 12 rounds for 192-bit key
-	inv_rnd2( -16(%ebp), crypto_it_tab)
-4:	inv_rnd1(    (%ebp), crypto_it_tab)	// 10 rounds for 128-bit key
-	inv_rnd2( +16(%ebp), crypto_it_tab)
-	inv_rnd1( +32(%ebp), crypto_it_tab)
-	inv_rnd2( +48(%ebp), crypto_it_tab)
-	inv_rnd1( +64(%ebp), crypto_it_tab)
-	inv_rnd2( +80(%ebp), crypto_it_tab)
-	inv_rnd1( +96(%ebp), crypto_it_tab)
-	inv_rnd2(+112(%ebp), crypto_it_tab)
-	inv_rnd1(+128(%ebp), crypto_it_tab)
-	inv_rnd2(+144(%ebp), crypto_il_tab)	// last round uses a different table
-
-// move final values to the output array.  CAUTION: the 
-// order of these assigns rely on the register mappings
-
-	add     $8,%esp
-	mov     out_blk+12(%esp),%ebp
-	mov     %r5,12(%ebp)
-	pop     %edi
-	mov     %r4,8(%ebp)
-	pop     %esi
-	mov     %r1,4(%ebp)
-	pop     %ebx
-	mov     %r0,(%ebp)
-	pop     %ebp
-	ret
diff --git a/ANDROID_3.4.5/arch/x86/crypto/aes-x86_64-asm_64.S b/ANDROID_3.4.5/arch/x86/crypto/aes-x86_64-asm_64.S
deleted file mode 100644
index 5b577d5a..00000000
--- a/ANDROID_3.4.5/arch/x86/crypto/aes-x86_64-asm_64.S
+++ /dev/null
@@ -1,188 +0,0 @@
-/* AES (Rijndael) implementation (FIPS PUB 197) for x86_64
- *
- * Copyright (C) 2005 Andreas Steinmetz, <ast@domdv.de>
- *
- * License:
- * This code can be distributed under the terms of the GNU General Public
- * License (GPL) Version 2 provided that the above header down to and
- * including this sentence is retained in full.
- */
-
-.extern crypto_ft_tab
-.extern crypto_it_tab
-.extern crypto_fl_tab
-.extern crypto_il_tab
-
-.text
-
-#include <asm/asm-offsets.h>
-
-#define R1	%rax
-#define R1E	%eax
-#define R1X	%ax
-#define R1H	%ah
-#define R1L	%al
-#define R2	%rbx
-#define R2E	%ebx
-#define R2X	%bx
-#define R2H	%bh
-#define R2L	%bl
-#define R3	%rcx
-#define R3E	%ecx
-#define R3X	%cx
-#define R3H	%ch
-#define R3L	%cl
-#define R4	%rdx
-#define R4E	%edx
-#define R4X	%dx
-#define R4H	%dh
-#define R4L	%dl
-#define R5	%rsi
-#define R5E	%esi
-#define R6	%rdi
-#define R6E	%edi
-#define R7	%rbp
-#define R7E	%ebp
-#define R8	%r8
-#define R9	%r9
-#define R10	%r10
-#define R11	%r11
-
-#define prologue(FUNC,KEY,B128,B192,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11) \
-	.global	FUNC;			\
-	.type	FUNC,@function;		\
-	.align	8;			\
-FUNC:	movq	r1,r2;			\
-	movq	r3,r4;			\
-	leaq	KEY+48(r8),r9;		\
-	movq	r10,r11;		\
-	movl	(r7),r5 ## E;		\
-	movl	4(r7),r1 ## E;		\
-	movl	8(r7),r6 ## E;		\
-	movl	12(r7),r7 ## E;		\
-	movl	480(r8),r10 ## E;	\
-	xorl	-48(r9),r5 ## E;	\
-	xorl	-44(r9),r1 ## E;	\
-	xorl	-40(r9),r6 ## E;	\
-	xorl	-36(r9),r7 ## E;	\
-	cmpl	$24,r10 ## E;		\
-	jb	B128;			\
-	leaq	32(r9),r9;		\
-	je	B192;			\
-	leaq	32(r9),r9;
-
-#define epilogue(r1,r2,r3,r4,r5,r6,r7,r8,r9) \
-	movq	r1,r2;			\
-	movq	r3,r4;			\
-	movl	r5 ## E,(r9);		\
-	movl	r6 ## E,4(r9);		\
-	movl	r7 ## E,8(r9);		\
-	movl	r8 ## E,12(r9);		\
-	ret;
-
-#define round(TAB,OFFSET,r1,r2,r3,r4,r5,r6,r7,r8,ra,rb,rc,rd) \
-	movzbl	r2 ## H,r5 ## E;	\
-	movzbl	r2 ## L,r6 ## E;	\
-	movl	TAB+1024(,r5,4),r5 ## E;\
-	movw	r4 ## X,r2 ## X;	\
-	movl	TAB(,r6,4),r6 ## E;	\
-	roll	$16,r2 ## E;		\
-	shrl	$16,r4 ## E;		\
-	movzbl	r4 ## H,r7 ## E;	\
-	movzbl	r4 ## L,r4 ## E;	\
-	xorl	OFFSET(r8),ra ## E;	\
-	xorl	OFFSET+4(r8),rb ## E;	\
-	xorl	TAB+3072(,r7,4),r5 ## E;\
-	xorl	TAB+2048(,r4,4),r6 ## E;\
-	movzbl	r1 ## L,r7 ## E;	\
-	movzbl	r1 ## H,r4 ## E;	\
-	movl	TAB+1024(,r4,4),r4 ## E;\
-	movw	r3 ## X,r1 ## X;	\
-	roll	$16,r1 ## E;		\
-	shrl	$16,r3 ## E;		\
-	xorl	TAB(,r7,4),r5 ## E;	\
-	movzbl	r3 ## H,r7 ## E;	\
-	movzbl	r3 ## L,r3 ## E;	\
-	xorl	TAB+3072(,r7,4),r4 ## E;\
-	xorl	TAB+2048(,r3,4),r5 ## E;\
-	movzbl	r1 ## H,r7 ## E;	\
-	movzbl	r1 ## L,r3 ## E;	\
-	shrl	$16,r1 ## E;		\
-	xorl	TAB+3072(,r7,4),r6 ## E;\
-	movl	TAB+2048(,r3,4),r3 ## E;\
-	movzbl	r1 ## H,r7 ## E;	\
-	movzbl	r1 ## L,r1 ## E;	\
-	xorl	TAB+1024(,r7,4),r6 ## E;\
-	xorl	TAB(,r1,4),r3 ## E;	\
-	movzbl	r2 ## H,r1 ## E;	\
-	movzbl	r2 ## L,r7 ## E;	\
-	shrl	$16,r2 ## E;		\
-	xorl	TAB+3072(,r1,4),r3 ## E;\
-	xorl	TAB+2048(,r7,4),r4 ## E;\
-	movzbl	r2 ## H,r1 ## E;	\
-	movzbl	r2 ## L,r2 ## E;	\
-	xorl	OFFSET+8(r8),rc ## E;	\
-	xorl	OFFSET+12(r8),rd ## E;	\
-	xorl	TAB+1024(,r1,4),r3 ## E;\
-	xorl	TAB(,r2,4),r4 ## E;
-
-#define move_regs(r1,r2,r3,r4) \
-	movl	r3 ## E,r1 ## E;	\
-	movl	r4 ## E,r2 ## E;
-
-#define entry(FUNC,KEY,B128,B192) \
-	prologue(FUNC,KEY,B128,B192,R2,R8,R7,R9,R1,R3,R4,R6,R10,R5,R11)
-
-#define return epilogue(R8,R2,R9,R7,R5,R6,R3,R4,R11)
-
-#define encrypt_round(TAB,OFFSET) \
-	round(TAB,OFFSET,R1,R2,R3,R4,R5,R6,R7,R10,R5,R6,R3,R4) \
-	move_regs(R1,R2,R5,R6)
-
-#define encrypt_final(TAB,OFFSET) \
-	round(TAB,OFFSET,R1,R2,R3,R4,R5,R6,R7,R10,R5,R6,R3,R4)
-
-#define decrypt_round(TAB,OFFSET) \
-	round(TAB,OFFSET,R2,R1,R4,R3,R6,R5,R7,R10,R5,R6,R3,R4) \
-	move_regs(R1,R2,R5,R6)
-
-#define decrypt_final(TAB,OFFSET) \
-	round(TAB,OFFSET,R2,R1,R4,R3,R6,R5,R7,R10,R5,R6,R3,R4)
-
-/* void aes_enc_blk(stuct crypto_tfm *tfm, u8 *out, const u8 *in) */
-
-	entry(aes_enc_blk,0,enc128,enc192)
-	encrypt_round(crypto_ft_tab,-96)
-	encrypt_round(crypto_ft_tab,-80)
-enc192:	encrypt_round(crypto_ft_tab,-64)
-	encrypt_round(crypto_ft_tab,-48)
-enc128:	encrypt_round(crypto_ft_tab,-32)
-	encrypt_round(crypto_ft_tab,-16)
-	encrypt_round(crypto_ft_tab,  0)
-	encrypt_round(crypto_ft_tab, 16)
-	encrypt_round(crypto_ft_tab, 32)
-	encrypt_round(crypto_ft_tab, 48)
-	encrypt_round(crypto_ft_tab, 64)
-	encrypt_round(crypto_ft_tab, 80)
-	encrypt_round(crypto_ft_tab, 96)
-	encrypt_final(crypto_fl_tab,112)
-	return
-
-/* void aes_dec_blk(struct crypto_tfm *tfm, u8 *out, const u8 *in) */
-
-	entry(aes_dec_blk,240,dec128,dec192)
-	decrypt_round(crypto_it_tab,-96)
-	decrypt_round(crypto_it_tab,-80)
-dec192:	decrypt_round(crypto_it_tab,-64)
-	decrypt_round(crypto_it_tab,-48)
-dec128:	decrypt_round(crypto_it_tab,-32)
-	decrypt_round(crypto_it_tab,-16)
-	decrypt_round(crypto_it_tab,  0)
-	decrypt_round(crypto_it_tab, 16)
-	decrypt_round(crypto_it_tab, 32)
-	decrypt_round(crypto_it_tab, 48)
-	decrypt_round(crypto_it_tab, 64)
-	decrypt_round(crypto_it_tab, 80)
-	decrypt_round(crypto_it_tab, 96)
-	decrypt_final(crypto_il_tab,112)
-	return
diff --git a/ANDROID_3.4.5/arch/x86/crypto/aes_glue.c b/ANDROID_3.4.5/arch/x86/crypto/aes_glue.c
deleted file mode 100644
index 8efcf42a..00000000
--- a/ANDROID_3.4.5/arch/x86/crypto/aes_glue.c
+++ /dev/null
@@ -1,71 +0,0 @@
-/*
- * Glue Code for the asm optimized version of the AES Cipher Algorithm
- *
- */
-
-#include <linux/module.h>
-#include <crypto/aes.h>
-#include <asm/aes.h>
-
-asmlinkage void aes_enc_blk(struct crypto_aes_ctx *ctx, u8 *out, const u8 *in);
-asmlinkage void aes_dec_blk(struct crypto_aes_ctx *ctx, u8 *out, const u8 *in);
-
-void crypto_aes_encrypt_x86(struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src)
-{
-	aes_enc_blk(ctx, dst, src);
-}
-EXPORT_SYMBOL_GPL(crypto_aes_encrypt_x86);
-
-void crypto_aes_decrypt_x86(struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src)
-{
-	aes_dec_blk(ctx, dst, src);
-}
-EXPORT_SYMBOL_GPL(crypto_aes_decrypt_x86);
-
-static void aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
-{
-	aes_enc_blk(crypto_tfm_ctx(tfm), dst, src);
-}
-
-static void aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
-{
-	aes_dec_blk(crypto_tfm_ctx(tfm), dst, src);
-}
-
-static struct crypto_alg aes_alg = {
-	.cra_name		= "aes",
-	.cra_driver_name	= "aes-asm",
-	.cra_priority		= 200,
-	.cra_flags		= CRYPTO_ALG_TYPE_CIPHER,
-	.cra_blocksize		= AES_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct crypto_aes_ctx),
-	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(aes_alg.cra_list),
-	.cra_u	= {
-		.cipher	= {
-			.cia_min_keysize	= AES_MIN_KEY_SIZE,
-			.cia_max_keysize	= AES_MAX_KEY_SIZE,
-			.cia_setkey		= crypto_aes_set_key,
-			.cia_encrypt		= aes_encrypt,
-			.cia_decrypt		= aes_decrypt
-		}
-	}
-};
-
-static int __init aes_init(void)
-{
-	return crypto_register_alg(&aes_alg);
-}
-
-static void __exit aes_fini(void)
-{
-	crypto_unregister_alg(&aes_alg);
-}
-
-module_init(aes_init);
-module_exit(aes_fini);
-
-MODULE_DESCRIPTION("Rijndael (AES) Cipher Algorithm, asm optimized");
-MODULE_LICENSE("GPL");
-MODULE_ALIAS("aes");
-MODULE_ALIAS("aes-asm");
diff --git a/ANDROID_3.4.5/arch/x86/crypto/aesni-intel_asm.S b/ANDROID_3.4.5/arch/x86/crypto/aesni-intel_asm.S
deleted file mode 100644
index 3470624d..00000000
--- a/ANDROID_3.4.5/arch/x86/crypto/aesni-intel_asm.S
+++ /dev/null
@@ -1,2618 +0,0 @@
-/*
- * Implement AES algorithm in Intel AES-NI instructions.
- *
- * The white paper of AES-NI instructions can be downloaded from:
- *   http://softwarecommunity.intel.com/isn/downloads/intelavx/AES-Instructions-Set_WP.pdf
- *
- * Copyright (C) 2008, Intel Corp.
- *    Author: Huang Ying <ying.huang@intel.com>
- *            Vinodh Gopal <vinodh.gopal@intel.com>
- *            Kahraman Akdemir
- *
- * Added RFC4106 AES-GCM support for 128-bit keys under the AEAD
- * interface for 64-bit kernels.
- *    Authors: Erdinc Ozturk (erdinc.ozturk@intel.com)
- *             Aidan O'Mahony (aidan.o.mahony@intel.com)
- *             Adrian Hoban <adrian.hoban@intel.com>
- *             James Guilford (james.guilford@intel.com)
- *             Gabriele Paoloni <gabriele.paoloni@intel.com>
- *             Tadeusz Struk (tadeusz.struk@intel.com)
- *             Wajdi Feghali (wajdi.k.feghali@intel.com)
- *    Copyright (c) 2010, Intel Corporation.
- *
- * Ported x86_64 version to x86:
- *    Author: Mathias Krause <minipli@googlemail.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- */
-
-#include <linux/linkage.h>
-#include <asm/inst.h>
-
-#ifdef __x86_64__
-.data
-POLY:   .octa 0xC2000000000000000000000000000001
-TWOONE: .octa 0x00000001000000000000000000000001
-
-# order of these constants should not change.
-# more specifically, ALL_F should follow SHIFT_MASK,
-# and ZERO should follow ALL_F
-
-SHUF_MASK:  .octa 0x000102030405060708090A0B0C0D0E0F
-MASK1:      .octa 0x0000000000000000ffffffffffffffff
-MASK2:      .octa 0xffffffffffffffff0000000000000000
-SHIFT_MASK: .octa 0x0f0e0d0c0b0a09080706050403020100
-ALL_F:      .octa 0xffffffffffffffffffffffffffffffff
-ZERO:       .octa 0x00000000000000000000000000000000
-ONE:        .octa 0x00000000000000000000000000000001
-F_MIN_MASK: .octa 0xf1f2f3f4f5f6f7f8f9fafbfcfdfeff0
-dec:        .octa 0x1
-enc:        .octa 0x2
-
-
-.text
-
-
-#define	STACK_OFFSET    8*3
-#define	HashKey		16*0	// store HashKey <<1 mod poly here
-#define	HashKey_2	16*1	// store HashKey^2 <<1 mod poly here
-#define	HashKey_3	16*2	// store HashKey^3 <<1 mod poly here
-#define	HashKey_4	16*3	// store HashKey^4 <<1 mod poly here
-#define	HashKey_k	16*4	// store XOR of High 64 bits and Low 64
-				// bits of  HashKey <<1 mod poly here
-				//(for Karatsuba purposes)
-#define	HashKey_2_k	16*5	// store XOR of High 64 bits and Low 64
-				// bits of  HashKey^2 <<1 mod poly here
-				// (for Karatsuba purposes)
-#define	HashKey_3_k	16*6	// store XOR of High 64 bits and Low 64
-				// bits of  HashKey^3 <<1 mod poly here
-				// (for Karatsuba purposes)
-#define	HashKey_4_k	16*7	// store XOR of High 64 bits and Low 64
-				// bits of  HashKey^4 <<1 mod poly here
-				// (for Karatsuba purposes)
-#define	VARIABLE_OFFSET	16*8
-
-#define arg1 rdi
-#define arg2 rsi
-#define arg3 rdx
-#define arg4 rcx
-#define arg5 r8
-#define arg6 r9
-#define arg7 STACK_OFFSET+8(%r14)
-#define arg8 STACK_OFFSET+16(%r14)
-#define arg9 STACK_OFFSET+24(%r14)
-#define arg10 STACK_OFFSET+32(%r14)
-#endif
-
-
-#define STATE1	%xmm0
-#define STATE2	%xmm4
-#define STATE3	%xmm5
-#define STATE4	%xmm6
-#define STATE	STATE1
-#define IN1	%xmm1
-#define IN2	%xmm7
-#define IN3	%xmm8
-#define IN4	%xmm9
-#define IN	IN1
-#define KEY	%xmm2
-#define IV	%xmm3
-
-#define BSWAP_MASK %xmm10
-#define CTR	%xmm11
-#define INC	%xmm12
-
-#ifdef __x86_64__
-#define AREG	%rax
-#define KEYP	%rdi
-#define OUTP	%rsi
-#define UKEYP	OUTP
-#define INP	%rdx
-#define LEN	%rcx
-#define IVP	%r8
-#define KLEN	%r9d
-#define T1	%r10
-#define TKEYP	T1
-#define T2	%r11
-#define TCTR_LOW T2
-#else
-#define AREG	%eax
-#define KEYP	%edi
-#define OUTP	AREG
-#define UKEYP	OUTP
-#define INP	%edx
-#define LEN	%esi
-#define IVP	%ebp
-#define KLEN	%ebx
-#define T1	%ecx
-#define TKEYP	T1
-#endif
-
-
-#ifdef __x86_64__
-/* GHASH_MUL MACRO to implement: Data*HashKey mod (128,127,126,121,0)
-*
-*
-* Input: A and B (128-bits each, bit-reflected)
-* Output: C = A*B*x mod poly, (i.e. >>1 )
-* To compute GH = GH*HashKey mod poly, give HK = HashKey<<1 mod poly as input
-* GH = GH * HK * x mod poly which is equivalent to GH*HashKey mod poly.
-*
-*/
-.macro GHASH_MUL GH HK TMP1 TMP2 TMP3 TMP4 TMP5
-	movdqa	  \GH, \TMP1
-	pshufd	  $78, \GH, \TMP2
-	pshufd	  $78, \HK, \TMP3
-	pxor	  \GH, \TMP2            # TMP2 = a1+a0
-	pxor	  \HK, \TMP3            # TMP3 = b1+b0
-	PCLMULQDQ 0x11, \HK, \TMP1     # TMP1 = a1*b1
-	PCLMULQDQ 0x00, \HK, \GH       # GH = a0*b0
-	PCLMULQDQ 0x00, \TMP3, \TMP2   # TMP2 = (a0+a1)*(b1+b0)
-	pxor	  \GH, \TMP2
-	pxor	  \TMP1, \TMP2          # TMP2 = (a0*b0)+(a1*b0)
-	movdqa	  \TMP2, \TMP3
-	pslldq	  $8, \TMP3             # left shift TMP3 2 DWs
-	psrldq	  $8, \TMP2             # right shift TMP2 2 DWs
-	pxor	  \TMP3, \GH
-	pxor	  \TMP2, \TMP1          # TMP2:GH holds the result of GH*HK
-
-        # first phase of the reduction
-
-	movdqa    \GH, \TMP2
-	movdqa    \GH, \TMP3
-	movdqa    \GH, \TMP4            # copy GH into TMP2,TMP3 and TMP4
-					# in in order to perform
-					# independent shifts
-	pslld     $31, \TMP2            # packed right shift <<31
-	pslld     $30, \TMP3            # packed right shift <<30
-	pslld     $25, \TMP4            # packed right shift <<25
-	pxor      \TMP3, \TMP2          # xor the shifted versions
-	pxor      \TMP4, \TMP2
-	movdqa    \TMP2, \TMP5
-	psrldq    $4, \TMP5             # right shift TMP5 1 DW
-	pslldq    $12, \TMP2            # left shift TMP2 3 DWs
-	pxor      \TMP2, \GH
-
-        # second phase of the reduction
-
-	movdqa    \GH,\TMP2             # copy GH into TMP2,TMP3 and TMP4
-					# in in order to perform
-					# independent shifts
-	movdqa    \GH,\TMP3
-	movdqa    \GH,\TMP4
-	psrld     $1,\TMP2              # packed left shift >>1
-	psrld     $2,\TMP3              # packed left shift >>2
-	psrld     $7,\TMP4              # packed left shift >>7
-	pxor      \TMP3,\TMP2		# xor the shifted versions
-	pxor      \TMP4,\TMP2
-	pxor      \TMP5, \TMP2
-	pxor      \TMP2, \GH
-	pxor      \TMP1, \GH            # result is in TMP1
-.endm
-
-/*
-* if a = number of total plaintext bytes
-* b = floor(a/16)
-* num_initial_blocks = b mod 4
-* encrypt the initial num_initial_blocks blocks and apply ghash on
-* the ciphertext
-* %r10, %r11, %r12, %rax, %xmm5, %xmm6, %xmm7, %xmm8, %xmm9 registers
-* are clobbered
-* arg1, %arg2, %arg3, %r14 are used as a pointer only, not modified
-*/
-
-
-.macro INITIAL_BLOCKS_DEC num_initial_blocks TMP1 TMP2 TMP3 TMP4 TMP5 XMM0 XMM1 \
-XMM2 XMM3 XMM4 XMMDst TMP6 TMP7 i i_seq operation
-	mov	   arg7, %r10           # %r10 = AAD
-	mov	   arg8, %r12           # %r12 = aadLen
-	mov	   %r12, %r11
-	pxor	   %xmm\i, %xmm\i
-_get_AAD_loop\num_initial_blocks\operation:
-	movd	   (%r10), \TMP1
-	pslldq	   $12, \TMP1
-	psrldq	   $4, %xmm\i
-	pxor	   \TMP1, %xmm\i
-	add	   $4, %r10
-	sub	   $4, %r12
-	jne	   _get_AAD_loop\num_initial_blocks\operation
-	cmp	   $16, %r11
-	je	   _get_AAD_loop2_done\num_initial_blocks\operation
-	mov	   $16, %r12
-_get_AAD_loop2\num_initial_blocks\operation:
-	psrldq	   $4, %xmm\i
-	sub	   $4, %r12
-	cmp	   %r11, %r12
-	jne	   _get_AAD_loop2\num_initial_blocks\operation
-_get_AAD_loop2_done\num_initial_blocks\operation:
-        movdqa     SHUF_MASK(%rip), %xmm14
-	PSHUFB_XMM   %xmm14, %xmm\i # byte-reflect the AAD data
-
-	xor	   %r11, %r11 # initialise the data pointer offset as zero
-
-        # start AES for num_initial_blocks blocks
-
-	mov	   %arg5, %rax                      # %rax = *Y0
-	movdqu	   (%rax), \XMM0                    # XMM0 = Y0
-        movdqa     SHUF_MASK(%rip), %xmm14
-	PSHUFB_XMM   %xmm14, \XMM0
-
-.if (\i == 5) || (\i == 6) || (\i == 7)
-.irpc index, \i_seq
-	paddd	   ONE(%rip), \XMM0                 # INCR Y0
-	movdqa	   \XMM0, %xmm\index
-        movdqa     SHUF_MASK(%rip), %xmm14
-	PSHUFB_XMM   %xmm14, %xmm\index      # perform a 16 byte swap
-
-.endr
-.irpc index, \i_seq
-	pxor	   16*0(%arg1), %xmm\index
-.endr
-.irpc index, \i_seq
-	movaps 0x10(%rdi), \TMP1
-	AESENC     \TMP1, %xmm\index          # Round 1
-.endr
-.irpc index, \i_seq
-	movaps 0x20(%arg1), \TMP1
-	AESENC     \TMP1, %xmm\index          # Round 2
-.endr
-.irpc index, \i_seq
-	movaps 0x30(%arg1), \TMP1
-	AESENC     \TMP1, %xmm\index          # Round 2
-.endr
-.irpc index, \i_seq
-	movaps 0x40(%arg1), \TMP1
-	AESENC     \TMP1, %xmm\index          # Round 2
-.endr
-.irpc index, \i_seq
-	movaps 0x50(%arg1), \TMP1
-	AESENC     \TMP1, %xmm\index          # Round 2
-.endr
-.irpc index, \i_seq
-	movaps 0x60(%arg1), \TMP1
-	AESENC     \TMP1, %xmm\index          # Round 2
-.endr
-.irpc index, \i_seq
-	movaps 0x70(%arg1), \TMP1
-	AESENC     \TMP1, %xmm\index          # Round 2
-.endr
-.irpc index, \i_seq
-	movaps 0x80(%arg1), \TMP1
-	AESENC     \TMP1, %xmm\index          # Round 2
-.endr
-.irpc index, \i_seq
-	movaps 0x90(%arg1), \TMP1
-	AESENC     \TMP1, %xmm\index          # Round 2
-.endr
-.irpc index, \i_seq
-	movaps 0xa0(%arg1), \TMP1
-	AESENCLAST \TMP1, %xmm\index         # Round 10
-.endr
-.irpc index, \i_seq
-	movdqu	   (%arg3 , %r11, 1), \TMP1
-	pxor	   \TMP1, %xmm\index
-	movdqu	   %xmm\index, (%arg2 , %r11, 1)
-	# write back plaintext/ciphertext for num_initial_blocks
-	add	   $16, %r11
-
-	movdqa     \TMP1, %xmm\index
-        movdqa     SHUF_MASK(%rip), %xmm14
-	PSHUFB_XMM	   %xmm14, %xmm\index
-
-		# prepare plaintext/ciphertext for GHASH computation
-.endr
-.endif
-	GHASH_MUL  %xmm\i, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
-        # apply GHASH on num_initial_blocks blocks
-
-.if \i == 5
-        pxor       %xmm5, %xmm6
-	GHASH_MUL  %xmm6, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
-        pxor       %xmm6, %xmm7
-	GHASH_MUL  %xmm7, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
-        pxor       %xmm7, %xmm8
-	GHASH_MUL  %xmm8, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
-.elseif \i == 6
-        pxor       %xmm6, %xmm7
-	GHASH_MUL  %xmm7, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
-        pxor       %xmm7, %xmm8
-	GHASH_MUL  %xmm8, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
-.elseif \i == 7
-        pxor       %xmm7, %xmm8
-	GHASH_MUL  %xmm8, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
-.endif
-	cmp	   $64, %r13
-	jl	_initial_blocks_done\num_initial_blocks\operation
-	# no need for precomputed values
-/*
-*
-* Precomputations for HashKey parallel with encryption of first 4 blocks.
-* Haskey_i_k holds XORed values of the low and high parts of the Haskey_i
-*/
-	paddd	   ONE(%rip), \XMM0              # INCR Y0
-	movdqa	   \XMM0, \XMM1
-        movdqa     SHUF_MASK(%rip), %xmm14
-	PSHUFB_XMM  %xmm14, \XMM1        # perform a 16 byte swap
-
-	paddd	   ONE(%rip), \XMM0              # INCR Y0
-	movdqa	   \XMM0, \XMM2
-        movdqa     SHUF_MASK(%rip), %xmm14
-	PSHUFB_XMM  %xmm14, \XMM2        # perform a 16 byte swap
-
-	paddd	   ONE(%rip), \XMM0              # INCR Y0
-	movdqa	   \XMM0, \XMM3
-        movdqa     SHUF_MASK(%rip), %xmm14
-	PSHUFB_XMM %xmm14, \XMM3        # perform a 16 byte swap
-
-	paddd	   ONE(%rip), \XMM0              # INCR Y0
-	movdqa	   \XMM0, \XMM4
-        movdqa     SHUF_MASK(%rip), %xmm14
-	PSHUFB_XMM %xmm14, \XMM4        # perform a 16 byte swap
-
-	pxor	   16*0(%arg1), \XMM1
-	pxor	   16*0(%arg1), \XMM2
-	pxor	   16*0(%arg1), \XMM3
-	pxor	   16*0(%arg1), \XMM4
-	movdqa	   \TMP3, \TMP5
-	pshufd	   $78, \TMP3, \TMP1
-	pxor	   \TMP3, \TMP1
-	movdqa	   \TMP1, HashKey_k(%rsp)
-	GHASH_MUL  \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7
-# TMP5 = HashKey^2<<1 (mod poly)
-	movdqa	   \TMP5, HashKey_2(%rsp)
-# HashKey_2 = HashKey^2<<1 (mod poly)
-	pshufd	   $78, \TMP5, \TMP1
-	pxor	   \TMP5, \TMP1
-	movdqa	   \TMP1, HashKey_2_k(%rsp)
-.irpc index, 1234 # do 4 rounds
-	movaps 0x10*\index(%arg1), \TMP1
-	AESENC	   \TMP1, \XMM1
-	AESENC	   \TMP1, \XMM2
-	AESENC	   \TMP1, \XMM3
-	AESENC	   \TMP1, \XMM4
-.endr
-	GHASH_MUL  \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7
-# TMP5 = HashKey^3<<1 (mod poly)
-	movdqa	   \TMP5, HashKey_3(%rsp)
-	pshufd	   $78, \TMP5, \TMP1
-	pxor	   \TMP5, \TMP1
-	movdqa	   \TMP1, HashKey_3_k(%rsp)
-.irpc index, 56789 # do next 5 rounds
-	movaps 0x10*\index(%arg1), \TMP1
-	AESENC	   \TMP1, \XMM1
-	AESENC	   \TMP1, \XMM2
-	AESENC	   \TMP1, \XMM3
-	AESENC	   \TMP1, \XMM4
-.endr
-	GHASH_MUL  \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7
-# TMP5 = HashKey^3<<1 (mod poly)
-	movdqa	   \TMP5, HashKey_4(%rsp)
-	pshufd	   $78, \TMP5, \TMP1
-	pxor	   \TMP5, \TMP1
-	movdqa	   \TMP1, HashKey_4_k(%rsp)
-	movaps 0xa0(%arg1), \TMP2
-	AESENCLAST \TMP2, \XMM1
-	AESENCLAST \TMP2, \XMM2
-	AESENCLAST \TMP2, \XMM3
-	AESENCLAST \TMP2, \XMM4
-	movdqu	   16*0(%arg3 , %r11 , 1), \TMP1
-	pxor	   \TMP1, \XMM1
-	movdqu	   \XMM1, 16*0(%arg2 , %r11 , 1)
-	movdqa     \TMP1, \XMM1
-	movdqu	   16*1(%arg3 , %r11 , 1), \TMP1
-	pxor	   \TMP1, \XMM2
-	movdqu	   \XMM2, 16*1(%arg2 , %r11 , 1)
-	movdqa     \TMP1, \XMM2
-	movdqu	   16*2(%arg3 , %r11 , 1), \TMP1
-	pxor	   \TMP1, \XMM3
-	movdqu	   \XMM3, 16*2(%arg2 , %r11 , 1)
-	movdqa     \TMP1, \XMM3
-	movdqu	   16*3(%arg3 , %r11 , 1), \TMP1
-	pxor	   \TMP1, \XMM4
-	movdqu	   \XMM4, 16*3(%arg2 , %r11 , 1)
-	movdqa     \TMP1, \XMM4
-	add	   $64, %r11
-        movdqa     SHUF_MASK(%rip), %xmm14
-	PSHUFB_XMM %xmm14, \XMM1 # perform a 16 byte swap
-	pxor	   \XMMDst, \XMM1
-# combine GHASHed value with the corresponding ciphertext
-        movdqa     SHUF_MASK(%rip), %xmm14
-	PSHUFB_XMM %xmm14, \XMM2 # perform a 16 byte swap
-        movdqa     SHUF_MASK(%rip), %xmm14
-	PSHUFB_XMM %xmm14, \XMM3 # perform a 16 byte swap
-        movdqa     SHUF_MASK(%rip), %xmm14
-	PSHUFB_XMM %xmm14, \XMM4 # perform a 16 byte swap
-
-_initial_blocks_done\num_initial_blocks\operation:
-
-.endm
-
-
-/*
-* if a = number of total plaintext bytes
-* b = floor(a/16)
-* num_initial_blocks = b mod 4
-* encrypt the initial num_initial_blocks blocks and apply ghash on
-* the ciphertext
-* %r10, %r11, %r12, %rax, %xmm5, %xmm6, %xmm7, %xmm8, %xmm9 registers
-* are clobbered
-* arg1, %arg2, %arg3, %r14 are used as a pointer only, not modified
-*/
-
-
-.macro INITIAL_BLOCKS_ENC num_initial_blocks TMP1 TMP2 TMP3 TMP4 TMP5 XMM0 XMM1 \
-XMM2 XMM3 XMM4 XMMDst TMP6 TMP7 i i_seq operation
-	mov	   arg7, %r10           # %r10 = AAD
-	mov	   arg8, %r12           # %r12 = aadLen
-	mov	   %r12, %r11
-	pxor	   %xmm\i, %xmm\i
-_get_AAD_loop\num_initial_blocks\operation:
-	movd	   (%r10), \TMP1
-	pslldq	   $12, \TMP1
-	psrldq	   $4, %xmm\i
-	pxor	   \TMP1, %xmm\i
-	add	   $4, %r10
-	sub	   $4, %r12
-	jne	   _get_AAD_loop\num_initial_blocks\operation
-	cmp	   $16, %r11
-	je	   _get_AAD_loop2_done\num_initial_blocks\operation
-	mov	   $16, %r12
-_get_AAD_loop2\num_initial_blocks\operation:
-	psrldq	   $4, %xmm\i
-	sub	   $4, %r12
-	cmp	   %r11, %r12
-	jne	   _get_AAD_loop2\num_initial_blocks\operation
-_get_AAD_loop2_done\num_initial_blocks\operation:
-        movdqa     SHUF_MASK(%rip), %xmm14
-	PSHUFB_XMM   %xmm14, %xmm\i # byte-reflect the AAD data
-
-	xor	   %r11, %r11 # initialise the data pointer offset as zero
-
-        # start AES for num_initial_blocks blocks
-
-	mov	   %arg5, %rax                      # %rax = *Y0
-	movdqu	   (%rax), \XMM0                    # XMM0 = Y0
-        movdqa     SHUF_MASK(%rip), %xmm14
-	PSHUFB_XMM   %xmm14, \XMM0
-
-.if (\i == 5) || (\i == 6) || (\i == 7)
-.irpc index, \i_seq
-	paddd	   ONE(%rip), \XMM0                 # INCR Y0
-	movdqa	   \XMM0, %xmm\index
-        movdqa     SHUF_MASK(%rip), %xmm14
-	PSHUFB_XMM   %xmm14, %xmm\index      # perform a 16 byte swap
-
-.endr
-.irpc index, \i_seq
-	pxor	   16*0(%arg1), %xmm\index
-.endr
-.irpc index, \i_seq
-	movaps 0x10(%rdi), \TMP1
-	AESENC     \TMP1, %xmm\index          # Round 1
-.endr
-.irpc index, \i_seq
-	movaps 0x20(%arg1), \TMP1
-	AESENC     \TMP1, %xmm\index          # Round 2
-.endr
-.irpc index, \i_seq
-	movaps 0x30(%arg1), \TMP1
-	AESENC     \TMP1, %xmm\index          # Round 2
-.endr
-.irpc index, \i_seq
-	movaps 0x40(%arg1), \TMP1
-	AESENC     \TMP1, %xmm\index          # Round 2
-.endr
-.irpc index, \i_seq
-	movaps 0x50(%arg1), \TMP1
-	AESENC     \TMP1, %xmm\index          # Round 2
-.endr
-.irpc index, \i_seq
-	movaps 0x60(%arg1), \TMP1
-	AESENC     \TMP1, %xmm\index          # Round 2
-.endr
-.irpc index, \i_seq
-	movaps 0x70(%arg1), \TMP1
-	AESENC     \TMP1, %xmm\index          # Round 2
-.endr
-.irpc index, \i_seq
-	movaps 0x80(%arg1), \TMP1
-	AESENC     \TMP1, %xmm\index          # Round 2
-.endr
-.irpc index, \i_seq
-	movaps 0x90(%arg1), \TMP1
-	AESENC     \TMP1, %xmm\index          # Round 2
-.endr
-.irpc index, \i_seq
-	movaps 0xa0(%arg1), \TMP1
-	AESENCLAST \TMP1, %xmm\index         # Round 10
-.endr
-.irpc index, \i_seq
-	movdqu	   (%arg3 , %r11, 1), \TMP1
-	pxor	   \TMP1, %xmm\index
-	movdqu	   %xmm\index, (%arg2 , %r11, 1)
-	# write back plaintext/ciphertext for num_initial_blocks
-	add	   $16, %r11
-
-        movdqa     SHUF_MASK(%rip), %xmm14
-	PSHUFB_XMM	   %xmm14, %xmm\index
-
-		# prepare plaintext/ciphertext for GHASH computation
-.endr
-.endif
-	GHASH_MUL  %xmm\i, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
-        # apply GHASH on num_initial_blocks blocks
-
-.if \i == 5
-        pxor       %xmm5, %xmm6
-	GHASH_MUL  %xmm6, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
-        pxor       %xmm6, %xmm7
-	GHASH_MUL  %xmm7, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
-        pxor       %xmm7, %xmm8
-	GHASH_MUL  %xmm8, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
-.elseif \i == 6
-        pxor       %xmm6, %xmm7
-	GHASH_MUL  %xmm7, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
-        pxor       %xmm7, %xmm8
-	GHASH_MUL  %xmm8, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
-.elseif \i == 7
-        pxor       %xmm7, %xmm8
-	GHASH_MUL  %xmm8, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
-.endif
-	cmp	   $64, %r13
-	jl	_initial_blocks_done\num_initial_blocks\operation
-	# no need for precomputed values
-/*
-*
-* Precomputations for HashKey parallel with encryption of first 4 blocks.
-* Haskey_i_k holds XORed values of the low and high parts of the Haskey_i
-*/
-	paddd	   ONE(%rip), \XMM0              # INCR Y0
-	movdqa	   \XMM0, \XMM1
-        movdqa     SHUF_MASK(%rip), %xmm14
-	PSHUFB_XMM  %xmm14, \XMM1        # perform a 16 byte swap
-
-	paddd	   ONE(%rip), \XMM0              # INCR Y0
-	movdqa	   \XMM0, \XMM2
-        movdqa     SHUF_MASK(%rip), %xmm14
-	PSHUFB_XMM  %xmm14, \XMM2        # perform a 16 byte swap
-
-	paddd	   ONE(%rip), \XMM0              # INCR Y0
-	movdqa	   \XMM0, \XMM3
-        movdqa     SHUF_MASK(%rip), %xmm14
-	PSHUFB_XMM %xmm14, \XMM3        # perform a 16 byte swap
-
-	paddd	   ONE(%rip), \XMM0              # INCR Y0
-	movdqa	   \XMM0, \XMM4
-        movdqa     SHUF_MASK(%rip), %xmm14
-	PSHUFB_XMM %xmm14, \XMM4        # perform a 16 byte swap
-
-	pxor	   16*0(%arg1), \XMM1
-	pxor	   16*0(%arg1), \XMM2
-	pxor	   16*0(%arg1), \XMM3
-	pxor	   16*0(%arg1), \XMM4
-	movdqa	   \TMP3, \TMP5
-	pshufd	   $78, \TMP3, \TMP1
-	pxor	   \TMP3, \TMP1
-	movdqa	   \TMP1, HashKey_k(%rsp)
-	GHASH_MUL  \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7
-# TMP5 = HashKey^2<<1 (mod poly)
-	movdqa	   \TMP5, HashKey_2(%rsp)
-# HashKey_2 = HashKey^2<<1 (mod poly)
-	pshufd	   $78, \TMP5, \TMP1
-	pxor	   \TMP5, \TMP1
-	movdqa	   \TMP1, HashKey_2_k(%rsp)
-.irpc index, 1234 # do 4 rounds
-	movaps 0x10*\index(%arg1), \TMP1
-	AESENC	   \TMP1, \XMM1
-	AESENC	   \TMP1, \XMM2
-	AESENC	   \TMP1, \XMM3
-	AESENC	   \TMP1, \XMM4
-.endr
-	GHASH_MUL  \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7
-# TMP5 = HashKey^3<<1 (mod poly)
-	movdqa	   \TMP5, HashKey_3(%rsp)
-	pshufd	   $78, \TMP5, \TMP1
-	pxor	   \TMP5, \TMP1
-	movdqa	   \TMP1, HashKey_3_k(%rsp)
-.irpc index, 56789 # do next 5 rounds
-	movaps 0x10*\index(%arg1), \TMP1
-	AESENC	   \TMP1, \XMM1
-	AESENC	   \TMP1, \XMM2
-	AESENC	   \TMP1, \XMM3
-	AESENC	   \TMP1, \XMM4
-.endr
-	GHASH_MUL  \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7
-# TMP5 = HashKey^3<<1 (mod poly)
-	movdqa	   \TMP5, HashKey_4(%rsp)
-	pshufd	   $78, \TMP5, \TMP1
-	pxor	   \TMP5, \TMP1
-	movdqa	   \TMP1, HashKey_4_k(%rsp)
-	movaps 0xa0(%arg1), \TMP2
-	AESENCLAST \TMP2, \XMM1
-	AESENCLAST \TMP2, \XMM2
-	AESENCLAST \TMP2, \XMM3
-	AESENCLAST \TMP2, \XMM4
-	movdqu	   16*0(%arg3 , %r11 , 1), \TMP1
-	pxor	   \TMP1, \XMM1
-	movdqu	   16*1(%arg3 , %r11 , 1), \TMP1
-	pxor	   \TMP1, \XMM2
-	movdqu	   16*2(%arg3 , %r11 , 1), \TMP1
-	pxor	   \TMP1, \XMM3
-	movdqu	   16*3(%arg3 , %r11 , 1), \TMP1
-	pxor	   \TMP1, \XMM4
-	movdqu     \XMM1, 16*0(%arg2 , %r11 , 1)
-	movdqu     \XMM2, 16*1(%arg2 , %r11 , 1)
-	movdqu     \XMM3, 16*2(%arg2 , %r11 , 1)
-	movdqu     \XMM4, 16*3(%arg2 , %r11 , 1)
-
-	add	   $64, %r11
-        movdqa     SHUF_MASK(%rip), %xmm14
-	PSHUFB_XMM %xmm14, \XMM1 # perform a 16 byte swap
-	pxor	   \XMMDst, \XMM1
-# combine GHASHed value with the corresponding ciphertext
-        movdqa     SHUF_MASK(%rip), %xmm14
-	PSHUFB_XMM %xmm14, \XMM2 # perform a 16 byte swap
-        movdqa     SHUF_MASK(%rip), %xmm14
-	PSHUFB_XMM %xmm14, \XMM3 # perform a 16 byte swap
-        movdqa     SHUF_MASK(%rip), %xmm14
-	PSHUFB_XMM %xmm14, \XMM4 # perform a 16 byte swap
-
-_initial_blocks_done\num_initial_blocks\operation:
-
-.endm
-
-/*
-* encrypt 4 blocks at a time
-* ghash the 4 previously encrypted ciphertext blocks
-* arg1, %arg2, %arg3 are used as pointers only, not modified
-* %r11 is the data offset value
-*/
-.macro GHASH_4_ENCRYPT_4_PARALLEL_ENC TMP1 TMP2 TMP3 TMP4 TMP5 \
-TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
-
-	movdqa	  \XMM1, \XMM5
-	movdqa	  \XMM2, \XMM6
-	movdqa	  \XMM3, \XMM7
-	movdqa	  \XMM4, \XMM8
-
-        movdqa    SHUF_MASK(%rip), %xmm15
-        # multiply TMP5 * HashKey using karatsuba
-
-	movdqa	  \XMM5, \TMP4
-	pshufd	  $78, \XMM5, \TMP6
-	pxor	  \XMM5, \TMP6
-	paddd     ONE(%rip), \XMM0		# INCR CNT
-	movdqa	  HashKey_4(%rsp), \TMP5
-	PCLMULQDQ 0x11, \TMP5, \TMP4           # TMP4 = a1*b1
-	movdqa    \XMM0, \XMM1
-	paddd     ONE(%rip), \XMM0		# INCR CNT
-	movdqa    \XMM0, \XMM2
-	paddd     ONE(%rip), \XMM0		# INCR CNT
-	movdqa    \XMM0, \XMM3
-	paddd     ONE(%rip), \XMM0		# INCR CNT
-	movdqa    \XMM0, \XMM4
-	PSHUFB_XMM %xmm15, \XMM1	# perform a 16 byte swap
-	PCLMULQDQ 0x00, \TMP5, \XMM5           # XMM5 = a0*b0
-	PSHUFB_XMM %xmm15, \XMM2	# perform a 16 byte swap
-	PSHUFB_XMM %xmm15, \XMM3	# perform a 16 byte swap
-	PSHUFB_XMM %xmm15, \XMM4	# perform a 16 byte swap
-
-	pxor	  (%arg1), \XMM1
-	pxor	  (%arg1), \XMM2
-	pxor	  (%arg1), \XMM3
-	pxor	  (%arg1), \XMM4
-	movdqa	  HashKey_4_k(%rsp), \TMP5
-	PCLMULQDQ 0x00, \TMP5, \TMP6           # TMP6 = (a1+a0)*(b1+b0)
-	movaps 0x10(%arg1), \TMP1
-	AESENC	  \TMP1, \XMM1              # Round 1
-	AESENC	  \TMP1, \XMM2
-	AESENC	  \TMP1, \XMM3
-	AESENC	  \TMP1, \XMM4
-	movaps 0x20(%arg1), \TMP1
-	AESENC	  \TMP1, \XMM1              # Round 2
-	AESENC	  \TMP1, \XMM2
-	AESENC	  \TMP1, \XMM3
-	AESENC	  \TMP1, \XMM4
-	movdqa	  \XMM6, \TMP1
-	pshufd	  $78, \XMM6, \TMP2
-	pxor	  \XMM6, \TMP2
-	movdqa	  HashKey_3(%rsp), \TMP5
-	PCLMULQDQ 0x11, \TMP5, \TMP1           # TMP1 = a1 * b1
-	movaps 0x30(%arg1), \TMP3
-	AESENC    \TMP3, \XMM1              # Round 3
-	AESENC    \TMP3, \XMM2
-	AESENC    \TMP3, \XMM3
-	AESENC    \TMP3, \XMM4
-	PCLMULQDQ 0x00, \TMP5, \XMM6           # XMM6 = a0*b0
-	movaps 0x40(%arg1), \TMP3
-	AESENC	  \TMP3, \XMM1              # Round 4
-	AESENC	  \TMP3, \XMM2
-	AESENC	  \TMP3, \XMM3
-	AESENC	  \TMP3, \XMM4
-	movdqa	  HashKey_3_k(%rsp), \TMP5
-	PCLMULQDQ 0x00, \TMP5, \TMP2           # TMP2 = (a1+a0)*(b1+b0)
-	movaps 0x50(%arg1), \TMP3
-	AESENC	  \TMP3, \XMM1              # Round 5
-	AESENC	  \TMP3, \XMM2
-	AESENC	  \TMP3, \XMM3
-	AESENC	  \TMP3, \XMM4
-	pxor	  \TMP1, \TMP4
-# accumulate the results in TMP4:XMM5, TMP6 holds the middle part
-	pxor	  \XMM6, \XMM5
-	pxor	  \TMP2, \TMP6
-	movdqa	  \XMM7, \TMP1
-	pshufd	  $78, \XMM7, \TMP2
-	pxor	  \XMM7, \TMP2
-	movdqa	  HashKey_2(%rsp ), \TMP5
-
-        # Multiply TMP5 * HashKey using karatsuba
-
-	PCLMULQDQ 0x11, \TMP5, \TMP1           # TMP1 = a1*b1
-	movaps 0x60(%arg1), \TMP3
-	AESENC	  \TMP3, \XMM1              # Round 6
-	AESENC	  \TMP3, \XMM2
-	AESENC	  \TMP3, \XMM3
-	AESENC	  \TMP3, \XMM4
-	PCLMULQDQ 0x00, \TMP5, \XMM7           # XMM7 = a0*b0
-	movaps 0x70(%arg1), \TMP3
-	AESENC	  \TMP3, \XMM1             # Round 7
-	AESENC	  \TMP3, \XMM2
-	AESENC	  \TMP3, \XMM3
-	AESENC	  \TMP3, \XMM4
-	movdqa	  HashKey_2_k(%rsp), \TMP5
-	PCLMULQDQ 0x00, \TMP5, \TMP2           # TMP2 = (a1+a0)*(b1+b0)
-	movaps 0x80(%arg1), \TMP3
-	AESENC	  \TMP3, \XMM1             # Round 8
-	AESENC	  \TMP3, \XMM2
-	AESENC	  \TMP3, \XMM3
-	AESENC	  \TMP3, \XMM4
-	pxor	  \TMP1, \TMP4
-# accumulate the results in TMP4:XMM5, TMP6 holds the middle part
-	pxor	  \XMM7, \XMM5
-	pxor	  \TMP2, \TMP6
-
-        # Multiply XMM8 * HashKey
-        # XMM8 and TMP5 hold the values for the two operands
-
-	movdqa	  \XMM8, \TMP1
-	pshufd	  $78, \XMM8, \TMP2
-	pxor	  \XMM8, \TMP2
-	movdqa	  HashKey(%rsp), \TMP5
-	PCLMULQDQ 0x11, \TMP5, \TMP1          # TMP1 = a1*b1
-	movaps 0x90(%arg1), \TMP3
-	AESENC	  \TMP3, \XMM1            # Round 9
-	AESENC	  \TMP3, \XMM2
-	AESENC	  \TMP3, \XMM3
-	AESENC	  \TMP3, \XMM4
-	PCLMULQDQ 0x00, \TMP5, \XMM8          # XMM8 = a0*b0
-	movaps 0xa0(%arg1), \TMP3
-	AESENCLAST \TMP3, \XMM1           # Round 10
-	AESENCLAST \TMP3, \XMM2
-	AESENCLAST \TMP3, \XMM3
-	AESENCLAST \TMP3, \XMM4
-	movdqa    HashKey_k(%rsp), \TMP5
-	PCLMULQDQ 0x00, \TMP5, \TMP2          # TMP2 = (a1+a0)*(b1+b0)
-	movdqu	  (%arg3,%r11,1), \TMP3
-	pxor	  \TMP3, \XMM1                 # Ciphertext/Plaintext XOR EK
-	movdqu	  16(%arg3,%r11,1), \TMP3
-	pxor	  \TMP3, \XMM2                 # Ciphertext/Plaintext XOR EK
-	movdqu	  32(%arg3,%r11,1), \TMP3
-	pxor	  \TMP3, \XMM3                 # Ciphertext/Plaintext XOR EK
-	movdqu	  48(%arg3,%r11,1), \TMP3
-	pxor	  \TMP3, \XMM4                 # Ciphertext/Plaintext XOR EK
-        movdqu    \XMM1, (%arg2,%r11,1)        # Write to the ciphertext buffer
-        movdqu    \XMM2, 16(%arg2,%r11,1)      # Write to the ciphertext buffer
-        movdqu    \XMM3, 32(%arg2,%r11,1)      # Write to the ciphertext buffer
-        movdqu    \XMM4, 48(%arg2,%r11,1)      # Write to the ciphertext buffer
-	PSHUFB_XMM %xmm15, \XMM1        # perform a 16 byte swap
-	PSHUFB_XMM %xmm15, \XMM2	# perform a 16 byte swap
-	PSHUFB_XMM %xmm15, \XMM3	# perform a 16 byte swap
-	PSHUFB_XMM %xmm15, \XMM4	# perform a 16 byte swap
-
-	pxor	  \TMP4, \TMP1
-	pxor	  \XMM8, \XMM5
-	pxor	  \TMP6, \TMP2
-	pxor	  \TMP1, \TMP2
-	pxor	  \XMM5, \TMP2
-	movdqa	  \TMP2, \TMP3
-	pslldq	  $8, \TMP3                    # left shift TMP3 2 DWs
-	psrldq	  $8, \TMP2                    # right shift TMP2 2 DWs
-	pxor	  \TMP3, \XMM5
-	pxor	  \TMP2, \TMP1	  # accumulate the results in TMP1:XMM5
-
-        # first phase of reduction
-
-	movdqa    \XMM5, \TMP2
-	movdqa    \XMM5, \TMP3
-	movdqa    \XMM5, \TMP4
-# move XMM5 into TMP2, TMP3, TMP4 in order to perform shifts independently
-	pslld     $31, \TMP2                   # packed right shift << 31
-	pslld     $30, \TMP3                   # packed right shift << 30
-	pslld     $25, \TMP4                   # packed right shift << 25
-	pxor      \TMP3, \TMP2	               # xor the shifted versions
-	pxor      \TMP4, \TMP2
-	movdqa    \TMP2, \TMP5
-	psrldq    $4, \TMP5                    # right shift T5 1 DW
-	pslldq    $12, \TMP2                   # left shift T2 3 DWs
-	pxor      \TMP2, \XMM5
-
-        # second phase of reduction
-
-	movdqa    \XMM5,\TMP2 # make 3 copies of XMM5 into TMP2, TMP3, TMP4
-	movdqa    \XMM5,\TMP3
-	movdqa    \XMM5,\TMP4
-	psrld     $1, \TMP2                    # packed left shift >>1
-	psrld     $2, \TMP3                    # packed left shift >>2
-	psrld     $7, \TMP4                    # packed left shift >>7
-	pxor      \TMP3,\TMP2		       # xor the shifted versions
-	pxor      \TMP4,\TMP2
-	pxor      \TMP5, \TMP2
-	pxor      \TMP2, \XMM5
-	pxor      \TMP1, \XMM5                 # result is in TMP1
-
-	pxor	  \XMM5, \XMM1
-.endm
-
-/*
-* decrypt 4 blocks at a time
-* ghash the 4 previously decrypted ciphertext blocks
-* arg1, %arg2, %arg3 are used as pointers only, not modified
-* %r11 is the data offset value
-*/
-.macro GHASH_4_ENCRYPT_4_PARALLEL_DEC TMP1 TMP2 TMP3 TMP4 TMP5 \
-TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
-
-	movdqa	  \XMM1, \XMM5
-	movdqa	  \XMM2, \XMM6
-	movdqa	  \XMM3, \XMM7
-	movdqa	  \XMM4, \XMM8
-
-        movdqa    SHUF_MASK(%rip), %xmm15
-        # multiply TMP5 * HashKey using karatsuba
-
-	movdqa	  \XMM5, \TMP4
-	pshufd	  $78, \XMM5, \TMP6
-	pxor	  \XMM5, \TMP6
-	paddd     ONE(%rip), \XMM0		# INCR CNT
-	movdqa	  HashKey_4(%rsp), \TMP5
-	PCLMULQDQ 0x11, \TMP5, \TMP4           # TMP4 = a1*b1
-	movdqa    \XMM0, \XMM1
-	paddd     ONE(%rip), \XMM0		# INCR CNT
-	movdqa    \XMM0, \XMM2
-	paddd     ONE(%rip), \XMM0		# INCR CNT
-	movdqa    \XMM0, \XMM3
-	paddd     ONE(%rip), \XMM0		# INCR CNT
-	movdqa    \XMM0, \XMM4
-	PSHUFB_XMM %xmm15, \XMM1	# perform a 16 byte swap
-	PCLMULQDQ 0x00, \TMP5, \XMM5           # XMM5 = a0*b0
-	PSHUFB_XMM %xmm15, \XMM2	# perform a 16 byte swap
-	PSHUFB_XMM %xmm15, \XMM3	# perform a 16 byte swap
-	PSHUFB_XMM %xmm15, \XMM4	# perform a 16 byte swap
-
-	pxor	  (%arg1), \XMM1
-	pxor	  (%arg1), \XMM2
-	pxor	  (%arg1), \XMM3
-	pxor	  (%arg1), \XMM4
-	movdqa	  HashKey_4_k(%rsp), \TMP5
-	PCLMULQDQ 0x00, \TMP5, \TMP6           # TMP6 = (a1+a0)*(b1+b0)
-	movaps 0x10(%arg1), \TMP1
-	AESENC	  \TMP1, \XMM1              # Round 1
-	AESENC	  \TMP1, \XMM2
-	AESENC	  \TMP1, \XMM3
-	AESENC	  \TMP1, \XMM4
-	movaps 0x20(%arg1), \TMP1
-	AESENC	  \TMP1, \XMM1              # Round 2
-	AESENC	  \TMP1, \XMM2
-	AESENC	  \TMP1, \XMM3
-	AESENC	  \TMP1, \XMM4
-	movdqa	  \XMM6, \TMP1
-	pshufd	  $78, \XMM6, \TMP2
-	pxor	  \XMM6, \TMP2
-	movdqa	  HashKey_3(%rsp), \TMP5
-	PCLMULQDQ 0x11, \TMP5, \TMP1           # TMP1 = a1 * b1
-	movaps 0x30(%arg1), \TMP3
-	AESENC    \TMP3, \XMM1              # Round 3
-	AESENC    \TMP3, \XMM2
-	AESENC    \TMP3, \XMM3
-	AESENC    \TMP3, \XMM4
-	PCLMULQDQ 0x00, \TMP5, \XMM6           # XMM6 = a0*b0
-	movaps 0x40(%arg1), \TMP3
-	AESENC	  \TMP3, \XMM1              # Round 4
-	AESENC	  \TMP3, \XMM2
-	AESENC	  \TMP3, \XMM3
-	AESENC	  \TMP3, \XMM4
-	movdqa	  HashKey_3_k(%rsp), \TMP5
-	PCLMULQDQ 0x00, \TMP5, \TMP2           # TMP2 = (a1+a0)*(b1+b0)
-	movaps 0x50(%arg1), \TMP3
-	AESENC	  \TMP3, \XMM1              # Round 5
-	AESENC	  \TMP3, \XMM2
-	AESENC	  \TMP3, \XMM3
-	AESENC	  \TMP3, \XMM4
-	pxor	  \TMP1, \TMP4
-# accumulate the results in TMP4:XMM5, TMP6 holds the middle part
-	pxor	  \XMM6, \XMM5
-	pxor	  \TMP2, \TMP6
-	movdqa	  \XMM7, \TMP1
-	pshufd	  $78, \XMM7, \TMP2
-	pxor	  \XMM7, \TMP2
-	movdqa	  HashKey_2(%rsp ), \TMP5
-
-        # Multiply TMP5 * HashKey using karatsuba
-
-	PCLMULQDQ 0x11, \TMP5, \TMP1           # TMP1 = a1*b1
-	movaps 0x60(%arg1), \TMP3
-	AESENC	  \TMP3, \XMM1              # Round 6
-	AESENC	  \TMP3, \XMM2
-	AESENC	  \TMP3, \XMM3
-	AESENC	  \TMP3, \XMM4
-	PCLMULQDQ 0x00, \TMP5, \XMM7           # XMM7 = a0*b0
-	movaps 0x70(%arg1), \TMP3
-	AESENC	  \TMP3, \XMM1             # Round 7
-	AESENC	  \TMP3, \XMM2
-	AESENC	  \TMP3, \XMM3
-	AESENC	  \TMP3, \XMM4
-	movdqa	  HashKey_2_k(%rsp), \TMP5
-	PCLMULQDQ 0x00, \TMP5, \TMP2           # TMP2 = (a1+a0)*(b1+b0)
-	movaps 0x80(%arg1), \TMP3
-	AESENC	  \TMP3, \XMM1             # Round 8
-	AESENC	  \TMP3, \XMM2
-	AESENC	  \TMP3, \XMM3
-	AESENC	  \TMP3, \XMM4
-	pxor	  \TMP1, \TMP4
-# accumulate the results in TMP4:XMM5, TMP6 holds the middle part
-	pxor	  \XMM7, \XMM5
-	pxor	  \TMP2, \TMP6
-
-        # Multiply XMM8 * HashKey
-        # XMM8 and TMP5 hold the values for the two operands
-
-	movdqa	  \XMM8, \TMP1
-	pshufd	  $78, \XMM8, \TMP2
-	pxor	  \XMM8, \TMP2
-	movdqa	  HashKey(%rsp), \TMP5
-	PCLMULQDQ 0x11, \TMP5, \TMP1          # TMP1 = a1*b1
-	movaps 0x90(%arg1), \TMP3
-	AESENC	  \TMP3, \XMM1            # Round 9
-	AESENC	  \TMP3, \XMM2
-	AESENC	  \TMP3, \XMM3
-	AESENC	  \TMP3, \XMM4
-	PCLMULQDQ 0x00, \TMP5, \XMM8          # XMM8 = a0*b0
-	movaps 0xa0(%arg1), \TMP3
-	AESENCLAST \TMP3, \XMM1           # Round 10
-	AESENCLAST \TMP3, \XMM2
-	AESENCLAST \TMP3, \XMM3
-	AESENCLAST \TMP3, \XMM4
-	movdqa    HashKey_k(%rsp), \TMP5
-	PCLMULQDQ 0x00, \TMP5, \TMP2          # TMP2 = (a1+a0)*(b1+b0)
-	movdqu	  (%arg3,%r11,1), \TMP3
-	pxor	  \TMP3, \XMM1                 # Ciphertext/Plaintext XOR EK
-	movdqu	  \XMM1, (%arg2,%r11,1)        # Write to plaintext buffer
-	movdqa    \TMP3, \XMM1
-	movdqu	  16(%arg3,%r11,1), \TMP3
-	pxor	  \TMP3, \XMM2                 # Ciphertext/Plaintext XOR EK
-	movdqu	  \XMM2, 16(%arg2,%r11,1)      # Write to plaintext buffer
-	movdqa    \TMP3, \XMM2
-	movdqu	  32(%arg3,%r11,1), \TMP3
-	pxor	  \TMP3, \XMM3                 # Ciphertext/Plaintext XOR EK
-	movdqu	  \XMM3, 32(%arg2,%r11,1)      # Write to plaintext buffer
-	movdqa    \TMP3, \XMM3
-	movdqu	  48(%arg3,%r11,1), \TMP3
-	pxor	  \TMP3, \XMM4                 # Ciphertext/Plaintext XOR EK
-	movdqu	  \XMM4, 48(%arg2,%r11,1)      # Write to plaintext buffer
-	movdqa    \TMP3, \XMM4
-	PSHUFB_XMM %xmm15, \XMM1        # perform a 16 byte swap
-	PSHUFB_XMM %xmm15, \XMM2	# perform a 16 byte swap
-	PSHUFB_XMM %xmm15, \XMM3	# perform a 16 byte swap
-	PSHUFB_XMM %xmm15, \XMM4	# perform a 16 byte swap
-
-	pxor	  \TMP4, \TMP1
-	pxor	  \XMM8, \XMM5
-	pxor	  \TMP6, \TMP2
-	pxor	  \TMP1, \TMP2
-	pxor	  \XMM5, \TMP2
-	movdqa	  \TMP2, \TMP3
-	pslldq	  $8, \TMP3                    # left shift TMP3 2 DWs
-	psrldq	  $8, \TMP2                    # right shift TMP2 2 DWs
-	pxor	  \TMP3, \XMM5
-	pxor	  \TMP2, \TMP1	  # accumulate the results in TMP1:XMM5
-
-        # first phase of reduction
-
-	movdqa    \XMM5, \TMP2
-	movdqa    \XMM5, \TMP3
-	movdqa    \XMM5, \TMP4
-# move XMM5 into TMP2, TMP3, TMP4 in order to perform shifts independently
-	pslld     $31, \TMP2                   # packed right shift << 31
-	pslld     $30, \TMP3                   # packed right shift << 30
-	pslld     $25, \TMP4                   # packed right shift << 25
-	pxor      \TMP3, \TMP2	               # xor the shifted versions
-	pxor      \TMP4, \TMP2
-	movdqa    \TMP2, \TMP5
-	psrldq    $4, \TMP5                    # right shift T5 1 DW
-	pslldq    $12, \TMP2                   # left shift T2 3 DWs
-	pxor      \TMP2, \XMM5
-
-        # second phase of reduction
-
-	movdqa    \XMM5,\TMP2 # make 3 copies of XMM5 into TMP2, TMP3, TMP4
-	movdqa    \XMM5,\TMP3
-	movdqa    \XMM5,\TMP4
-	psrld     $1, \TMP2                    # packed left shift >>1
-	psrld     $2, \TMP3                    # packed left shift >>2
-	psrld     $7, \TMP4                    # packed left shift >>7
-	pxor      \TMP3,\TMP2		       # xor the shifted versions
-	pxor      \TMP4,\TMP2
-	pxor      \TMP5, \TMP2
-	pxor      \TMP2, \XMM5
-	pxor      \TMP1, \XMM5                 # result is in TMP1
-
-	pxor	  \XMM5, \XMM1
-.endm
-
-/* GHASH the last 4 ciphertext blocks. */
-.macro	GHASH_LAST_4 TMP1 TMP2 TMP3 TMP4 TMP5 TMP6 \
-TMP7 XMM1 XMM2 XMM3 XMM4 XMMDst
-
-        # Multiply TMP6 * HashKey (using Karatsuba)
-
-	movdqa	  \XMM1, \TMP6
-	pshufd	  $78, \XMM1, \TMP2
-	pxor	  \XMM1, \TMP2
-	movdqa	  HashKey_4(%rsp), \TMP5
-	PCLMULQDQ 0x11, \TMP5, \TMP6       # TMP6 = a1*b1
-	PCLMULQDQ 0x00, \TMP5, \XMM1       # XMM1 = a0*b0
-	movdqa	  HashKey_4_k(%rsp), \TMP4
-	PCLMULQDQ 0x00, \TMP4, \TMP2       # TMP2 = (a1+a0)*(b1+b0)
-	movdqa	  \XMM1, \XMMDst
-	movdqa	  \TMP2, \XMM1              # result in TMP6, XMMDst, XMM1
-
-        # Multiply TMP1 * HashKey (using Karatsuba)
-
-	movdqa	  \XMM2, \TMP1
-	pshufd	  $78, \XMM2, \TMP2
-	pxor	  \XMM2, \TMP2
-	movdqa	  HashKey_3(%rsp), \TMP5
-	PCLMULQDQ 0x11, \TMP5, \TMP1       # TMP1 = a1*b1
-	PCLMULQDQ 0x00, \TMP5, \XMM2       # XMM2 = a0*b0
-	movdqa	  HashKey_3_k(%rsp), \TMP4
-	PCLMULQDQ 0x00, \TMP4, \TMP2       # TMP2 = (a1+a0)*(b1+b0)
-	pxor	  \TMP1, \TMP6
-	pxor	  \XMM2, \XMMDst
-	pxor	  \TMP2, \XMM1
-# results accumulated in TMP6, XMMDst, XMM1
-
-        # Multiply TMP1 * HashKey (using Karatsuba)
-
-	movdqa	  \XMM3, \TMP1
-	pshufd	  $78, \XMM3, \TMP2
-	pxor	  \XMM3, \TMP2
-	movdqa	  HashKey_2(%rsp), \TMP5
-	PCLMULQDQ 0x11, \TMP5, \TMP1       # TMP1 = a1*b1
-	PCLMULQDQ 0x00, \TMP5, \XMM3       # XMM3 = a0*b0
-	movdqa	  HashKey_2_k(%rsp), \TMP4
-	PCLMULQDQ 0x00, \TMP4, \TMP2       # TMP2 = (a1+a0)*(b1+b0)
-	pxor	  \TMP1, \TMP6
-	pxor	  \XMM3, \XMMDst
-	pxor	  \TMP2, \XMM1   # results accumulated in TMP6, XMMDst, XMM1
-
-        # Multiply TMP1 * HashKey (using Karatsuba)
-	movdqa	  \XMM4, \TMP1
-	pshufd	  $78, \XMM4, \TMP2
-	pxor	  \XMM4, \TMP2
-	movdqa	  HashKey(%rsp), \TMP5
-	PCLMULQDQ 0x11, \TMP5, \TMP1	    # TMP1 = a1*b1
-	PCLMULQDQ 0x00, \TMP5, \XMM4       # XMM4 = a0*b0
-	movdqa	  HashKey_k(%rsp), \TMP4
-	PCLMULQDQ 0x00, \TMP4, \TMP2       # TMP2 = (a1+a0)*(b1+b0)
-	pxor	  \TMP1, \TMP6
-	pxor	  \XMM4, \XMMDst
-	pxor	  \XMM1, \TMP2
-	pxor	  \TMP6, \TMP2
-	pxor	  \XMMDst, \TMP2
-	# middle section of the temp results combined as in karatsuba algorithm
-	movdqa	  \TMP2, \TMP4
-	pslldq	  $8, \TMP4                 # left shift TMP4 2 DWs
-	psrldq	  $8, \TMP2                 # right shift TMP2 2 DWs
-	pxor	  \TMP4, \XMMDst
-	pxor	  \TMP2, \TMP6
-# TMP6:XMMDst holds the result of the accumulated carry-less multiplications
-	# first phase of the reduction
-	movdqa    \XMMDst, \TMP2
-	movdqa    \XMMDst, \TMP3
-	movdqa    \XMMDst, \TMP4
-# move XMMDst into TMP2, TMP3, TMP4 in order to perform 3 shifts independently
-	pslld     $31, \TMP2                # packed right shifting << 31
-	pslld     $30, \TMP3                # packed right shifting << 30
-	pslld     $25, \TMP4                # packed right shifting << 25
-	pxor      \TMP3, \TMP2              # xor the shifted versions
-	pxor      \TMP4, \TMP2
-	movdqa    \TMP2, \TMP7
-	psrldq    $4, \TMP7                 # right shift TMP7 1 DW
-	pslldq    $12, \TMP2                # left shift TMP2 3 DWs
-	pxor      \TMP2, \XMMDst
-
-        # second phase of the reduction
-	movdqa    \XMMDst, \TMP2
-	# make 3 copies of XMMDst for doing 3 shift operations
-	movdqa    \XMMDst, \TMP3
-	movdqa    \XMMDst, \TMP4
-	psrld     $1, \TMP2                 # packed left shift >> 1
-	psrld     $2, \TMP3                 # packed left shift >> 2
-	psrld     $7, \TMP4                 # packed left shift >> 7
-	pxor      \TMP3, \TMP2              # xor the shifted versions
-	pxor      \TMP4, \TMP2
-	pxor      \TMP7, \TMP2
-	pxor      \TMP2, \XMMDst
-	pxor      \TMP6, \XMMDst            # reduced result is in XMMDst
-.endm
-
-/* Encryption of a single block done*/
-.macro ENCRYPT_SINGLE_BLOCK XMM0 TMP1
-
-	pxor	(%arg1), \XMM0
-        movaps 16(%arg1), \TMP1
-	AESENC	\TMP1, \XMM0
-        movaps 32(%arg1), \TMP1
-	AESENC	\TMP1, \XMM0
-        movaps 48(%arg1), \TMP1
-	AESENC	\TMP1, \XMM0
-        movaps 64(%arg1), \TMP1
-	AESENC	\TMP1, \XMM0
-        movaps 80(%arg1), \TMP1
-	AESENC	\TMP1, \XMM0
-        movaps 96(%arg1), \TMP1
-	AESENC	\TMP1, \XMM0
-        movaps 112(%arg1), \TMP1
-	AESENC	\TMP1, \XMM0
-        movaps 128(%arg1), \TMP1
-	AESENC	\TMP1, \XMM0
-        movaps 144(%arg1), \TMP1
-	AESENC	\TMP1, \XMM0
-        movaps 160(%arg1), \TMP1
-	AESENCLAST	\TMP1, \XMM0
-.endm
-
-
-/*****************************************************************************
-* void aesni_gcm_dec(void *aes_ctx,    // AES Key schedule. Starts on a 16 byte boundary.
-*                   u8 *out,           // Plaintext output. Encrypt in-place is allowed.
-*                   const u8 *in,      // Ciphertext input
-*                   u64 plaintext_len, // Length of data in bytes for decryption.
-*                   u8 *iv,            // Pre-counter block j0: 4 byte salt (from Security Association)
-*                                      // concatenated with 8 byte Initialisation Vector (from IPSec ESP Payload)
-*                                      // concatenated with 0x00000001. 16-byte aligned pointer.
-*                   u8 *hash_subkey,   // H, the Hash sub key input. Data starts on a 16-byte boundary.
-*                   const u8 *aad,     // Additional Authentication Data (AAD)
-*                   u64 aad_len,       // Length of AAD in bytes. With RFC4106 this is going to be 8 or 12 bytes
-*                   u8  *auth_tag,     // Authenticated Tag output. The driver will compare this to the
-*                                      // given authentication tag and only return the plaintext if they match.
-*                   u64 auth_tag_len); // Authenticated Tag Length in bytes. Valid values are 16
-*                                      // (most likely), 12 or 8.
-*
-* Assumptions:
-*
-* keys:
-*       keys are pre-expanded and aligned to 16 bytes. we are using the first
-*       set of 11 keys in the data structure void *aes_ctx
-*
-* iv:
-*       0                   1                   2                   3
-*       0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
-*       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-*       |                             Salt  (From the SA)               |
-*       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-*       |                     Initialization Vector                     |
-*       |         (This is the sequence number from IPSec header)       |
-*       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-*       |                              0x1                              |
-*       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-*
-*
-*
-* AAD:
-*       AAD padded to 128 bits with 0
-*       for example, assume AAD is a u32 vector
-*
-*       if AAD is 8 bytes:
-*       AAD[3] = {A0, A1};
-*       padded AAD in xmm register = {A1 A0 0 0}
-*
-*       0                   1                   2                   3
-*       0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
-*       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-*       |                               SPI (A1)                        |
-*       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-*       |                     32-bit Sequence Number (A0)               |
-*       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-*       |                              0x0                              |
-*       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-*
-*                                       AAD Format with 32-bit Sequence Number
-*
-*       if AAD is 12 bytes:
-*       AAD[3] = {A0, A1, A2};
-*       padded AAD in xmm register = {A2 A1 A0 0}
-*
-*       0                   1                   2                   3
-*       0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
-*       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-*       0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
-*       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-*       |                               SPI (A2)                        |
-*       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-*       |                 64-bit Extended Sequence Number {A1,A0}       |
-*       |                                                               |
-*       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-*       |                              0x0                              |
-*       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-*
-*                        AAD Format with 64-bit Extended Sequence Number
-*
-* aadLen:
-*       from the definition of the spec, aadLen can only be 8 or 12 bytes.
-*       The code supports 16 too but for other sizes, the code will fail.
-*
-* TLen:
-*       from the definition of the spec, TLen can only be 8, 12 or 16 bytes.
-*       For other sizes, the code will fail.
-*
-* poly = x^128 + x^127 + x^126 + x^121 + 1
-*
-*****************************************************************************/
-
-ENTRY(aesni_gcm_dec)
-	push	%r12
-	push	%r13
-	push	%r14
-	mov	%rsp, %r14
-/*
-* states of %xmm registers %xmm6:%xmm15 not saved
-* all %xmm registers are clobbered
-*/
-	sub	$VARIABLE_OFFSET, %rsp
-	and	$~63, %rsp                        # align rsp to 64 bytes
-	mov	%arg6, %r12
-	movdqu	(%r12), %xmm13			  # %xmm13 = HashKey
-        movdqa  SHUF_MASK(%rip), %xmm2
-	PSHUFB_XMM %xmm2, %xmm13
-
-
-# Precompute HashKey<<1 (mod poly) from the hash key (required for GHASH)
-
-	movdqa	%xmm13, %xmm2
-	psllq	$1, %xmm13
-	psrlq	$63, %xmm2
-	movdqa	%xmm2, %xmm1
-	pslldq	$8, %xmm2
-	psrldq	$8, %xmm1
-	por	%xmm2, %xmm13
-
-        # Reduction
-
-	pshufd	$0x24, %xmm1, %xmm2
-	pcmpeqd TWOONE(%rip), %xmm2
-	pand	POLY(%rip), %xmm2
-	pxor	%xmm2, %xmm13     # %xmm13 holds the HashKey<<1 (mod poly)
-
-
-        # Decrypt first few blocks
-
-	movdqa %xmm13, HashKey(%rsp)           # store HashKey<<1 (mod poly)
-	mov %arg4, %r13    # save the number of bytes of plaintext/ciphertext
-	and $-16, %r13                      # %r13 = %r13 - (%r13 mod 16)
-	mov %r13, %r12
-	and $(3<<4), %r12
-	jz _initial_num_blocks_is_0_decrypt
-	cmp $(2<<4), %r12
-	jb _initial_num_blocks_is_1_decrypt
-	je _initial_num_blocks_is_2_decrypt
-_initial_num_blocks_is_3_decrypt:
-	INITIAL_BLOCKS_DEC 3, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \
-%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 5, 678, dec
-	sub	$48, %r13
-	jmp	_initial_blocks_decrypted
-_initial_num_blocks_is_2_decrypt:
-	INITIAL_BLOCKS_DEC	2, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \
-%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 6, 78, dec
-	sub	$32, %r13
-	jmp	_initial_blocks_decrypted
-_initial_num_blocks_is_1_decrypt:
-	INITIAL_BLOCKS_DEC	1, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \
-%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 7, 8, dec
-	sub	$16, %r13
-	jmp	_initial_blocks_decrypted
-_initial_num_blocks_is_0_decrypt:
-	INITIAL_BLOCKS_DEC	0, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \
-%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 8, 0, dec
-_initial_blocks_decrypted:
-	cmp	$0, %r13
-	je	_zero_cipher_left_decrypt
-	sub	$64, %r13
-	je	_four_cipher_left_decrypt
-_decrypt_by_4:
-	GHASH_4_ENCRYPT_4_PARALLEL_DEC	%xmm9, %xmm10, %xmm11, %xmm12, %xmm13, \
-%xmm14, %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, dec
-	add	$64, %r11
-	sub	$64, %r13
-	jne	_decrypt_by_4
-_four_cipher_left_decrypt:
-	GHASH_LAST_4	%xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, \
-%xmm15, %xmm1, %xmm2, %xmm3, %xmm4, %xmm8
-_zero_cipher_left_decrypt:
-	mov	%arg4, %r13
-	and	$15, %r13				# %r13 = arg4 (mod 16)
-	je	_multiple_of_16_bytes_decrypt
-
-        # Handle the last <16 byte block separately
-
-	paddd ONE(%rip), %xmm0         # increment CNT to get Yn
-        movdqa SHUF_MASK(%rip), %xmm10
-	PSHUFB_XMM %xmm10, %xmm0
-
-	ENCRYPT_SINGLE_BLOCK  %xmm0, %xmm1    # E(K, Yn)
-	sub $16, %r11
-	add %r13, %r11
-	movdqu (%arg3,%r11,1), %xmm1   # receive the last <16 byte block
-	lea SHIFT_MASK+16(%rip), %r12
-	sub %r13, %r12
-# adjust the shuffle mask pointer to be able to shift 16-%r13 bytes
-# (%r13 is the number of bytes in plaintext mod 16)
-	movdqu (%r12), %xmm2           # get the appropriate shuffle mask
-	PSHUFB_XMM %xmm2, %xmm1            # right shift 16-%r13 butes
-
-	movdqa  %xmm1, %xmm2
-	pxor %xmm1, %xmm0            # Ciphertext XOR E(K, Yn)
-	movdqu ALL_F-SHIFT_MASK(%r12), %xmm1
-	# get the appropriate mask to mask out top 16-%r13 bytes of %xmm0
-	pand %xmm1, %xmm0            # mask out top 16-%r13 bytes of %xmm0
-	pand    %xmm1, %xmm2
-        movdqa SHUF_MASK(%rip), %xmm10
-	PSHUFB_XMM %xmm10 ,%xmm2
-
-	pxor %xmm2, %xmm8
-	GHASH_MUL %xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6
-	          # GHASH computation for the last <16 byte block
-	sub %r13, %r11
-	add $16, %r11
-
-        # output %r13 bytes
-	MOVQ_R64_XMM	%xmm0, %rax
-	cmp	$8, %r13
-	jle	_less_than_8_bytes_left_decrypt
-	mov	%rax, (%arg2 , %r11, 1)
-	add	$8, %r11
-	psrldq	$8, %xmm0
-	MOVQ_R64_XMM	%xmm0, %rax
-	sub	$8, %r13
-_less_than_8_bytes_left_decrypt:
-	mov	%al,  (%arg2, %r11, 1)
-	add	$1, %r11
-	shr	$8, %rax
-	sub	$1, %r13
-	jne	_less_than_8_bytes_left_decrypt
-_multiple_of_16_bytes_decrypt:
-	mov	arg8, %r12		  # %r13 = aadLen (number of bytes)
-	shl	$3, %r12		  # convert into number of bits
-	movd	%r12d, %xmm15		  # len(A) in %xmm15
-	shl	$3, %arg4		  # len(C) in bits (*128)
-	MOVQ_R64_XMM	%arg4, %xmm1
-	pslldq	$8, %xmm15		  # %xmm15 = len(A)||0x0000000000000000
-	pxor	%xmm1, %xmm15		  # %xmm15 = len(A)||len(C)
-	pxor	%xmm15, %xmm8
-	GHASH_MUL	%xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6
-	         # final GHASH computation
-        movdqa SHUF_MASK(%rip), %xmm10
-	PSHUFB_XMM %xmm10, %xmm8
-
-	mov	%arg5, %rax		  # %rax = *Y0
-	movdqu	(%rax), %xmm0		  # %xmm0 = Y0
-	ENCRYPT_SINGLE_BLOCK	%xmm0,  %xmm1	  # E(K, Y0)
-	pxor	%xmm8, %xmm0
-_return_T_decrypt:
-	mov	arg9, %r10                # %r10 = authTag
-	mov	arg10, %r11               # %r11 = auth_tag_len
-	cmp	$16, %r11
-	je	_T_16_decrypt
-	cmp	$12, %r11
-	je	_T_12_decrypt
-_T_8_decrypt:
-	MOVQ_R64_XMM	%xmm0, %rax
-	mov	%rax, (%r10)
-	jmp	_return_T_done_decrypt
-_T_12_decrypt:
-	MOVQ_R64_XMM	%xmm0, %rax
-	mov	%rax, (%r10)
-	psrldq	$8, %xmm0
-	movd	%xmm0, %eax
-	mov	%eax, 8(%r10)
-	jmp	_return_T_done_decrypt
-_T_16_decrypt:
-	movdqu	%xmm0, (%r10)
-_return_T_done_decrypt:
-	mov	%r14, %rsp
-	pop	%r14
-	pop	%r13
-	pop	%r12
-	ret
-
-
-/*****************************************************************************
-* void aesni_gcm_enc(void *aes_ctx,      // AES Key schedule. Starts on a 16 byte boundary.
-*                    u8 *out,            // Ciphertext output. Encrypt in-place is allowed.
-*                    const u8 *in,       // Plaintext input
-*                    u64 plaintext_len,  // Length of data in bytes for encryption.
-*                    u8 *iv,             // Pre-counter block j0: 4 byte salt (from Security Association)
-*                                        // concatenated with 8 byte Initialisation Vector (from IPSec ESP Payload)
-*                                        // concatenated with 0x00000001. 16-byte aligned pointer.
-*                    u8 *hash_subkey,    // H, the Hash sub key input. Data starts on a 16-byte boundary.
-*                    const u8 *aad,      // Additional Authentication Data (AAD)
-*                    u64 aad_len,        // Length of AAD in bytes. With RFC4106 this is going to be 8 or 12 bytes
-*                    u8 *auth_tag,       // Authenticated Tag output.
-*                    u64 auth_tag_len);  // Authenticated Tag Length in bytes. Valid values are 16 (most likely),
-*                                        // 12 or 8.
-*
-* Assumptions:
-*
-* keys:
-*       keys are pre-expanded and aligned to 16 bytes. we are using the
-*       first set of 11 keys in the data structure void *aes_ctx
-*
-*
-* iv:
-*       0                   1                   2                   3
-*       0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
-*       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-*       |                             Salt  (From the SA)               |
-*       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-*       |                     Initialization Vector                     |
-*       |         (This is the sequence number from IPSec header)       |
-*       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-*       |                              0x1                              |
-*       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-*
-*
-*
-* AAD:
-*       AAD padded to 128 bits with 0
-*       for example, assume AAD is a u32 vector
-*
-*       if AAD is 8 bytes:
-*       AAD[3] = {A0, A1};
-*       padded AAD in xmm register = {A1 A0 0 0}
-*
-*       0                   1                   2                   3
-*       0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
-*       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-*       |                               SPI (A1)                        |
-*       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-*       |                     32-bit Sequence Number (A0)               |
-*       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-*       |                              0x0                              |
-*       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-*
-*                                 AAD Format with 32-bit Sequence Number
-*
-*       if AAD is 12 bytes:
-*       AAD[3] = {A0, A1, A2};
-*       padded AAD in xmm register = {A2 A1 A0 0}
-*
-*       0                   1                   2                   3
-*       0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
-*       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-*       |                               SPI (A2)                        |
-*       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-*       |                 64-bit Extended Sequence Number {A1,A0}       |
-*       |                                                               |
-*       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-*       |                              0x0                              |
-*       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-*
-*                         AAD Format with 64-bit Extended Sequence Number
-*
-* aadLen:
-*       from the definition of the spec, aadLen can only be 8 or 12 bytes.
-*       The code supports 16 too but for other sizes, the code will fail.
-*
-* TLen:
-*       from the definition of the spec, TLen can only be 8, 12 or 16 bytes.
-*       For other sizes, the code will fail.
-*
-* poly = x^128 + x^127 + x^126 + x^121 + 1
-***************************************************************************/
-ENTRY(aesni_gcm_enc)
-	push	%r12
-	push	%r13
-	push	%r14
-	mov	%rsp, %r14
-#
-# states of %xmm registers %xmm6:%xmm15 not saved
-# all %xmm registers are clobbered
-#
-	sub	$VARIABLE_OFFSET, %rsp
-	and	$~63, %rsp
-	mov	%arg6, %r12
-	movdqu	(%r12), %xmm13
-        movdqa  SHUF_MASK(%rip), %xmm2
-	PSHUFB_XMM %xmm2, %xmm13
-
-
-# precompute HashKey<<1 mod poly from the HashKey (required for GHASH)
-
-	movdqa	%xmm13, %xmm2
-	psllq	$1, %xmm13
-	psrlq	$63, %xmm2
-	movdqa	%xmm2, %xmm1
-	pslldq	$8, %xmm2
-	psrldq	$8, %xmm1
-	por	%xmm2, %xmm13
-
-        # reduce HashKey<<1
-
-	pshufd	$0x24, %xmm1, %xmm2
-	pcmpeqd TWOONE(%rip), %xmm2
-	pand	POLY(%rip), %xmm2
-	pxor	%xmm2, %xmm13
-	movdqa	%xmm13, HashKey(%rsp)
-	mov	%arg4, %r13            # %xmm13 holds HashKey<<1 (mod poly)
-	and	$-16, %r13
-	mov	%r13, %r12
-
-        # Encrypt first few blocks
-
-	and	$(3<<4), %r12
-	jz	_initial_num_blocks_is_0_encrypt
-	cmp	$(2<<4), %r12
-	jb	_initial_num_blocks_is_1_encrypt
-	je	_initial_num_blocks_is_2_encrypt
-_initial_num_blocks_is_3_encrypt:
-	INITIAL_BLOCKS_ENC	3, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \
-%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 5, 678, enc
-	sub	$48, %r13
-	jmp	_initial_blocks_encrypted
-_initial_num_blocks_is_2_encrypt:
-	INITIAL_BLOCKS_ENC	2, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \
-%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 6, 78, enc
-	sub	$32, %r13
-	jmp	_initial_blocks_encrypted
-_initial_num_blocks_is_1_encrypt:
-	INITIAL_BLOCKS_ENC	1, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \
-%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 7, 8, enc
-	sub	$16, %r13
-	jmp	_initial_blocks_encrypted
-_initial_num_blocks_is_0_encrypt:
-	INITIAL_BLOCKS_ENC	0, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \
-%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 8, 0, enc
-_initial_blocks_encrypted:
-
-        # Main loop - Encrypt remaining blocks
-
-	cmp	$0, %r13
-	je	_zero_cipher_left_encrypt
-	sub	$64, %r13
-	je	_four_cipher_left_encrypt
-_encrypt_by_4_encrypt:
-	GHASH_4_ENCRYPT_4_PARALLEL_ENC	%xmm9, %xmm10, %xmm11, %xmm12, %xmm13, \
-%xmm14, %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, enc
-	add	$64, %r11
-	sub	$64, %r13
-	jne	_encrypt_by_4_encrypt
-_four_cipher_left_encrypt:
-	GHASH_LAST_4	%xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, \
-%xmm15, %xmm1, %xmm2, %xmm3, %xmm4, %xmm8
-_zero_cipher_left_encrypt:
-	mov	%arg4, %r13
-	and	$15, %r13			# %r13 = arg4 (mod 16)
-	je	_multiple_of_16_bytes_encrypt
-
-         # Handle the last <16 Byte block separately
-	paddd ONE(%rip), %xmm0                # INCR CNT to get Yn
-        movdqa SHUF_MASK(%rip), %xmm10
-	PSHUFB_XMM %xmm10, %xmm0
-
-
-	ENCRYPT_SINGLE_BLOCK	%xmm0, %xmm1        # Encrypt(K, Yn)
-	sub $16, %r11
-	add %r13, %r11
-	movdqu (%arg3,%r11,1), %xmm1     # receive the last <16 byte blocks
-	lea SHIFT_MASK+16(%rip), %r12
-	sub %r13, %r12
-	# adjust the shuffle mask pointer to be able to shift 16-r13 bytes
-	# (%r13 is the number of bytes in plaintext mod 16)
-	movdqu	(%r12), %xmm2           # get the appropriate shuffle mask
-	PSHUFB_XMM	%xmm2, %xmm1            # shift right 16-r13 byte
-	pxor	%xmm1, %xmm0            # Plaintext XOR Encrypt(K, Yn)
-	movdqu	ALL_F-SHIFT_MASK(%r12), %xmm1
-	# get the appropriate mask to mask out top 16-r13 bytes of xmm0
-	pand	%xmm1, %xmm0            # mask out top 16-r13 bytes of xmm0
-        movdqa SHUF_MASK(%rip), %xmm10
-	PSHUFB_XMM %xmm10,%xmm0
-
-	pxor	%xmm0, %xmm8
-	GHASH_MUL %xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6
-	# GHASH computation for the last <16 byte block
-	sub	%r13, %r11
-	add	$16, %r11
-
-	movdqa SHUF_MASK(%rip), %xmm10
-	PSHUFB_XMM %xmm10, %xmm0
-
-	# shuffle xmm0 back to output as ciphertext
-
-        # Output %r13 bytes
-	MOVQ_R64_XMM %xmm0, %rax
-	cmp $8, %r13
-	jle _less_than_8_bytes_left_encrypt
-	mov %rax, (%arg2 , %r11, 1)
-	add $8, %r11
-	psrldq $8, %xmm0
-	MOVQ_R64_XMM %xmm0, %rax
-	sub $8, %r13
-_less_than_8_bytes_left_encrypt:
-	mov %al,  (%arg2, %r11, 1)
-	add $1, %r11
-	shr $8, %rax
-	sub $1, %r13
-	jne _less_than_8_bytes_left_encrypt
-_multiple_of_16_bytes_encrypt:
-	mov	arg8, %r12    # %r12 = addLen (number of bytes)
-	shl	$3, %r12
-	movd	%r12d, %xmm15       # len(A) in %xmm15
-	shl	$3, %arg4               # len(C) in bits (*128)
-	MOVQ_R64_XMM	%arg4, %xmm1
-	pslldq	$8, %xmm15          # %xmm15 = len(A)||0x0000000000000000
-	pxor	%xmm1, %xmm15       # %xmm15 = len(A)||len(C)
-	pxor	%xmm15, %xmm8
-	GHASH_MUL	%xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6
-	# final GHASH computation
-        movdqa SHUF_MASK(%rip), %xmm10
-	PSHUFB_XMM %xmm10, %xmm8         # perform a 16 byte swap
-
-	mov	%arg5, %rax		       # %rax  = *Y0
-	movdqu	(%rax), %xmm0		       # %xmm0 = Y0
-	ENCRYPT_SINGLE_BLOCK	%xmm0, %xmm15         # Encrypt(K, Y0)
-	pxor	%xmm8, %xmm0
-_return_T_encrypt:
-	mov	arg9, %r10                     # %r10 = authTag
-	mov	arg10, %r11                    # %r11 = auth_tag_len
-	cmp	$16, %r11
-	je	_T_16_encrypt
-	cmp	$12, %r11
-	je	_T_12_encrypt
-_T_8_encrypt:
-	MOVQ_R64_XMM	%xmm0, %rax
-	mov	%rax, (%r10)
-	jmp	_return_T_done_encrypt
-_T_12_encrypt:
-	MOVQ_R64_XMM	%xmm0, %rax
-	mov	%rax, (%r10)
-	psrldq	$8, %xmm0
-	movd	%xmm0, %eax
-	mov	%eax, 8(%r10)
-	jmp	_return_T_done_encrypt
-_T_16_encrypt:
-	movdqu	%xmm0, (%r10)
-_return_T_done_encrypt:
-	mov	%r14, %rsp
-	pop	%r14
-	pop	%r13
-	pop	%r12
-	ret
-
-#endif
-
-
-_key_expansion_128:
-_key_expansion_256a:
-	pshufd $0b11111111, %xmm1, %xmm1
-	shufps $0b00010000, %xmm0, %xmm4
-	pxor %xmm4, %xmm0
-	shufps $0b10001100, %xmm0, %xmm4
-	pxor %xmm4, %xmm0
-	pxor %xmm1, %xmm0
-	movaps %xmm0, (TKEYP)
-	add $0x10, TKEYP
-	ret
-
-.align 4
-_key_expansion_192a:
-	pshufd $0b01010101, %xmm1, %xmm1
-	shufps $0b00010000, %xmm0, %xmm4
-	pxor %xmm4, %xmm0
-	shufps $0b10001100, %xmm0, %xmm4
-	pxor %xmm4, %xmm0
-	pxor %xmm1, %xmm0
-
-	movaps %xmm2, %xmm5
-	movaps %xmm2, %xmm6
-	pslldq $4, %xmm5
-	pshufd $0b11111111, %xmm0, %xmm3
-	pxor %xmm3, %xmm2
-	pxor %xmm5, %xmm2
-
-	movaps %xmm0, %xmm1
-	shufps $0b01000100, %xmm0, %xmm6
-	movaps %xmm6, (TKEYP)
-	shufps $0b01001110, %xmm2, %xmm1
-	movaps %xmm1, 0x10(TKEYP)
-	add $0x20, TKEYP
-	ret
-
-.align 4
-_key_expansion_192b:
-	pshufd $0b01010101, %xmm1, %xmm1
-	shufps $0b00010000, %xmm0, %xmm4
-	pxor %xmm4, %xmm0
-	shufps $0b10001100, %xmm0, %xmm4
-	pxor %xmm4, %xmm0
-	pxor %xmm1, %xmm0
-
-	movaps %xmm2, %xmm5
-	pslldq $4, %xmm5
-	pshufd $0b11111111, %xmm0, %xmm3
-	pxor %xmm3, %xmm2
-	pxor %xmm5, %xmm2
-
-	movaps %xmm0, (TKEYP)
-	add $0x10, TKEYP
-	ret
-
-.align 4
-_key_expansion_256b:
-	pshufd $0b10101010, %xmm1, %xmm1
-	shufps $0b00010000, %xmm2, %xmm4
-	pxor %xmm4, %xmm2
-	shufps $0b10001100, %xmm2, %xmm4
-	pxor %xmm4, %xmm2
-	pxor %xmm1, %xmm2
-	movaps %xmm2, (TKEYP)
-	add $0x10, TKEYP
-	ret
-
-/*
- * int aesni_set_key(struct crypto_aes_ctx *ctx, const u8 *in_key,
- *                   unsigned int key_len)
- */
-ENTRY(aesni_set_key)
-#ifndef __x86_64__
-	pushl KEYP
-	movl 8(%esp), KEYP		# ctx
-	movl 12(%esp), UKEYP		# in_key
-	movl 16(%esp), %edx		# key_len
-#endif
-	movups (UKEYP), %xmm0		# user key (first 16 bytes)
-	movaps %xmm0, (KEYP)
-	lea 0x10(KEYP), TKEYP		# key addr
-	movl %edx, 480(KEYP)
-	pxor %xmm4, %xmm4		# xmm4 is assumed 0 in _key_expansion_x
-	cmp $24, %dl
-	jb .Lenc_key128
-	je .Lenc_key192
-	movups 0x10(UKEYP), %xmm2	# other user key
-	movaps %xmm2, (TKEYP)
-	add $0x10, TKEYP
-	AESKEYGENASSIST 0x1 %xmm2 %xmm1		# round 1
-	call _key_expansion_256a
-	AESKEYGENASSIST 0x1 %xmm0 %xmm1
-	call _key_expansion_256b
-	AESKEYGENASSIST 0x2 %xmm2 %xmm1		# round 2
-	call _key_expansion_256a
-	AESKEYGENASSIST 0x2 %xmm0 %xmm1
-	call _key_expansion_256b
-	AESKEYGENASSIST 0x4 %xmm2 %xmm1		# round 3
-	call _key_expansion_256a
-	AESKEYGENASSIST 0x4 %xmm0 %xmm1
-	call _key_expansion_256b
-	AESKEYGENASSIST 0x8 %xmm2 %xmm1		# round 4
-	call _key_expansion_256a
-	AESKEYGENASSIST 0x8 %xmm0 %xmm1
-	call _key_expansion_256b
-	AESKEYGENASSIST 0x10 %xmm2 %xmm1	# round 5
-	call _key_expansion_256a
-	AESKEYGENASSIST 0x10 %xmm0 %xmm1
-	call _key_expansion_256b
-	AESKEYGENASSIST 0x20 %xmm2 %xmm1	# round 6
-	call _key_expansion_256a
-	AESKEYGENASSIST 0x20 %xmm0 %xmm1
-	call _key_expansion_256b
-	AESKEYGENASSIST 0x40 %xmm2 %xmm1	# round 7
-	call _key_expansion_256a
-	jmp .Ldec_key
-.Lenc_key192:
-	movq 0x10(UKEYP), %xmm2		# other user key
-	AESKEYGENASSIST 0x1 %xmm2 %xmm1		# round 1
-	call _key_expansion_192a
-	AESKEYGENASSIST 0x2 %xmm2 %xmm1		# round 2
-	call _key_expansion_192b
-	AESKEYGENASSIST 0x4 %xmm2 %xmm1		# round 3
-	call _key_expansion_192a
-	AESKEYGENASSIST 0x8 %xmm2 %xmm1		# round 4
-	call _key_expansion_192b
-	AESKEYGENASSIST 0x10 %xmm2 %xmm1	# round 5
-	call _key_expansion_192a
-	AESKEYGENASSIST 0x20 %xmm2 %xmm1	# round 6
-	call _key_expansion_192b
-	AESKEYGENASSIST 0x40 %xmm2 %xmm1	# round 7
-	call _key_expansion_192a
-	AESKEYGENASSIST 0x80 %xmm2 %xmm1	# round 8
-	call _key_expansion_192b
-	jmp .Ldec_key
-.Lenc_key128:
-	AESKEYGENASSIST 0x1 %xmm0 %xmm1		# round 1
-	call _key_expansion_128
-	AESKEYGENASSIST 0x2 %xmm0 %xmm1		# round 2
-	call _key_expansion_128
-	AESKEYGENASSIST 0x4 %xmm0 %xmm1		# round 3
-	call _key_expansion_128
-	AESKEYGENASSIST 0x8 %xmm0 %xmm1		# round 4
-	call _key_expansion_128
-	AESKEYGENASSIST 0x10 %xmm0 %xmm1	# round 5
-	call _key_expansion_128
-	AESKEYGENASSIST 0x20 %xmm0 %xmm1	# round 6
-	call _key_expansion_128
-	AESKEYGENASSIST 0x40 %xmm0 %xmm1	# round 7
-	call _key_expansion_128
-	AESKEYGENASSIST 0x80 %xmm0 %xmm1	# round 8
-	call _key_expansion_128
-	AESKEYGENASSIST 0x1b %xmm0 %xmm1	# round 9
-	call _key_expansion_128
-	AESKEYGENASSIST 0x36 %xmm0 %xmm1	# round 10
-	call _key_expansion_128
-.Ldec_key:
-	sub $0x10, TKEYP
-	movaps (KEYP), %xmm0
-	movaps (TKEYP), %xmm1
-	movaps %xmm0, 240(TKEYP)
-	movaps %xmm1, 240(KEYP)
-	add $0x10, KEYP
-	lea 240-16(TKEYP), UKEYP
-.align 4
-.Ldec_key_loop:
-	movaps (KEYP), %xmm0
-	AESIMC %xmm0 %xmm1
-	movaps %xmm1, (UKEYP)
-	add $0x10, KEYP
-	sub $0x10, UKEYP
-	cmp TKEYP, KEYP
-	jb .Ldec_key_loop
-	xor AREG, AREG
-#ifndef __x86_64__
-	popl KEYP
-#endif
-	ret
-
-/*
- * void aesni_enc(struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src)
- */
-ENTRY(aesni_enc)
-#ifndef __x86_64__
-	pushl KEYP
-	pushl KLEN
-	movl 12(%esp), KEYP
-	movl 16(%esp), OUTP
-	movl 20(%esp), INP
-#endif
-	movl 480(KEYP), KLEN		# key length
-	movups (INP), STATE		# input
-	call _aesni_enc1
-	movups STATE, (OUTP)		# output
-#ifndef __x86_64__
-	popl KLEN
-	popl KEYP
-#endif
-	ret
-
-/*
- * _aesni_enc1:		internal ABI
- * input:
- *	KEYP:		key struct pointer
- *	KLEN:		round count
- *	STATE:		initial state (input)
- * output:
- *	STATE:		finial state (output)
- * changed:
- *	KEY
- *	TKEYP (T1)
- */
-.align 4
-_aesni_enc1:
-	movaps (KEYP), KEY		# key
-	mov KEYP, TKEYP
-	pxor KEY, STATE		# round 0
-	add $0x30, TKEYP
-	cmp $24, KLEN
-	jb .Lenc128
-	lea 0x20(TKEYP), TKEYP
-	je .Lenc192
-	add $0x20, TKEYP
-	movaps -0x60(TKEYP), KEY
-	AESENC KEY STATE
-	movaps -0x50(TKEYP), KEY
-	AESENC KEY STATE
-.align 4
-.Lenc192:
-	movaps -0x40(TKEYP), KEY
-	AESENC KEY STATE
-	movaps -0x30(TKEYP), KEY
-	AESENC KEY STATE
-.align 4
-.Lenc128:
-	movaps -0x20(TKEYP), KEY
-	AESENC KEY STATE
-	movaps -0x10(TKEYP), KEY
-	AESENC KEY STATE
-	movaps (TKEYP), KEY
-	AESENC KEY STATE
-	movaps 0x10(TKEYP), KEY
-	AESENC KEY STATE
-	movaps 0x20(TKEYP), KEY
-	AESENC KEY STATE
-	movaps 0x30(TKEYP), KEY
-	AESENC KEY STATE
-	movaps 0x40(TKEYP), KEY
-	AESENC KEY STATE
-	movaps 0x50(TKEYP), KEY
-	AESENC KEY STATE
-	movaps 0x60(TKEYP), KEY
-	AESENC KEY STATE
-	movaps 0x70(TKEYP), KEY
-	AESENCLAST KEY STATE
-	ret
-
-/*
- * _aesni_enc4:	internal ABI
- * input:
- *	KEYP:		key struct pointer
- *	KLEN:		round count
- *	STATE1:		initial state (input)
- *	STATE2
- *	STATE3
- *	STATE4
- * output:
- *	STATE1:		finial state (output)
- *	STATE2
- *	STATE3
- *	STATE4
- * changed:
- *	KEY
- *	TKEYP (T1)
- */
-.align 4
-_aesni_enc4:
-	movaps (KEYP), KEY		# key
-	mov KEYP, TKEYP
-	pxor KEY, STATE1		# round 0
-	pxor KEY, STATE2
-	pxor KEY, STATE3
-	pxor KEY, STATE4
-	add $0x30, TKEYP
-	cmp $24, KLEN
-	jb .L4enc128
-	lea 0x20(TKEYP), TKEYP
-	je .L4enc192
-	add $0x20, TKEYP
-	movaps -0x60(TKEYP), KEY
-	AESENC KEY STATE1
-	AESENC KEY STATE2
-	AESENC KEY STATE3
-	AESENC KEY STATE4
-	movaps -0x50(TKEYP), KEY
-	AESENC KEY STATE1
-	AESENC KEY STATE2
-	AESENC KEY STATE3
-	AESENC KEY STATE4
-#.align 4
-.L4enc192:
-	movaps -0x40(TKEYP), KEY
-	AESENC KEY STATE1
-	AESENC KEY STATE2
-	AESENC KEY STATE3
-	AESENC KEY STATE4
-	movaps -0x30(TKEYP), KEY
-	AESENC KEY STATE1
-	AESENC KEY STATE2
-	AESENC KEY STATE3
-	AESENC KEY STATE4
-#.align 4
-.L4enc128:
-	movaps -0x20(TKEYP), KEY
-	AESENC KEY STATE1
-	AESENC KEY STATE2
-	AESENC KEY STATE3
-	AESENC KEY STATE4
-	movaps -0x10(TKEYP), KEY
-	AESENC KEY STATE1
-	AESENC KEY STATE2
-	AESENC KEY STATE3
-	AESENC KEY STATE4
-	movaps (TKEYP), KEY
-	AESENC KEY STATE1
-	AESENC KEY STATE2
-	AESENC KEY STATE3
-	AESENC KEY STATE4
-	movaps 0x10(TKEYP), KEY
-	AESENC KEY STATE1
-	AESENC KEY STATE2
-	AESENC KEY STATE3
-	AESENC KEY STATE4
-	movaps 0x20(TKEYP), KEY
-	AESENC KEY STATE1
-	AESENC KEY STATE2
-	AESENC KEY STATE3
-	AESENC KEY STATE4
-	movaps 0x30(TKEYP), KEY
-	AESENC KEY STATE1
-	AESENC KEY STATE2
-	AESENC KEY STATE3
-	AESENC KEY STATE4
-	movaps 0x40(TKEYP), KEY
-	AESENC KEY STATE1
-	AESENC KEY STATE2
-	AESENC KEY STATE3
-	AESENC KEY STATE4
-	movaps 0x50(TKEYP), KEY
-	AESENC KEY STATE1
-	AESENC KEY STATE2
-	AESENC KEY STATE3
-	AESENC KEY STATE4
-	movaps 0x60(TKEYP), KEY
-	AESENC KEY STATE1
-	AESENC KEY STATE2
-	AESENC KEY STATE3
-	AESENC KEY STATE4
-	movaps 0x70(TKEYP), KEY
-	AESENCLAST KEY STATE1		# last round
-	AESENCLAST KEY STATE2
-	AESENCLAST KEY STATE3
-	AESENCLAST KEY STATE4
-	ret
-
-/*
- * void aesni_dec (struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src)
- */
-ENTRY(aesni_dec)
-#ifndef __x86_64__
-	pushl KEYP
-	pushl KLEN
-	movl 12(%esp), KEYP
-	movl 16(%esp), OUTP
-	movl 20(%esp), INP
-#endif
-	mov 480(KEYP), KLEN		# key length
-	add $240, KEYP
-	movups (INP), STATE		# input
-	call _aesni_dec1
-	movups STATE, (OUTP)		#output
-#ifndef __x86_64__
-	popl KLEN
-	popl KEYP
-#endif
-	ret
-
-/*
- * _aesni_dec1:		internal ABI
- * input:
- *	KEYP:		key struct pointer
- *	KLEN:		key length
- *	STATE:		initial state (input)
- * output:
- *	STATE:		finial state (output)
- * changed:
- *	KEY
- *	TKEYP (T1)
- */
-.align 4
-_aesni_dec1:
-	movaps (KEYP), KEY		# key
-	mov KEYP, TKEYP
-	pxor KEY, STATE		# round 0
-	add $0x30, TKEYP
-	cmp $24, KLEN
-	jb .Ldec128
-	lea 0x20(TKEYP), TKEYP
-	je .Ldec192
-	add $0x20, TKEYP
-	movaps -0x60(TKEYP), KEY
-	AESDEC KEY STATE
-	movaps -0x50(TKEYP), KEY
-	AESDEC KEY STATE
-.align 4
-.Ldec192:
-	movaps -0x40(TKEYP), KEY
-	AESDEC KEY STATE
-	movaps -0x30(TKEYP), KEY
-	AESDEC KEY STATE
-.align 4
-.Ldec128:
-	movaps -0x20(TKEYP), KEY
-	AESDEC KEY STATE
-	movaps -0x10(TKEYP), KEY
-	AESDEC KEY STATE
-	movaps (TKEYP), KEY
-	AESDEC KEY STATE
-	movaps 0x10(TKEYP), KEY
-	AESDEC KEY STATE
-	movaps 0x20(TKEYP), KEY
-	AESDEC KEY STATE
-	movaps 0x30(TKEYP), KEY
-	AESDEC KEY STATE
-	movaps 0x40(TKEYP), KEY
-	AESDEC KEY STATE
-	movaps 0x50(TKEYP), KEY
-	AESDEC KEY STATE
-	movaps 0x60(TKEYP), KEY
-	AESDEC KEY STATE
-	movaps 0x70(TKEYP), KEY
-	AESDECLAST KEY STATE
-	ret
-
-/*
- * _aesni_dec4:	internal ABI
- * input:
- *	KEYP:		key struct pointer
- *	KLEN:		key length
- *	STATE1:		initial state (input)
- *	STATE2
- *	STATE3
- *	STATE4
- * output:
- *	STATE1:		finial state (output)
- *	STATE2
- *	STATE3
- *	STATE4
- * changed:
- *	KEY
- *	TKEYP (T1)
- */
-.align 4
-_aesni_dec4:
-	movaps (KEYP), KEY		# key
-	mov KEYP, TKEYP
-	pxor KEY, STATE1		# round 0
-	pxor KEY, STATE2
-	pxor KEY, STATE3
-	pxor KEY, STATE4
-	add $0x30, TKEYP
-	cmp $24, KLEN
-	jb .L4dec128
-	lea 0x20(TKEYP), TKEYP
-	je .L4dec192
-	add $0x20, TKEYP
-	movaps -0x60(TKEYP), KEY
-	AESDEC KEY STATE1
-	AESDEC KEY STATE2
-	AESDEC KEY STATE3
-	AESDEC KEY STATE4
-	movaps -0x50(TKEYP), KEY
-	AESDEC KEY STATE1
-	AESDEC KEY STATE2
-	AESDEC KEY STATE3
-	AESDEC KEY STATE4
-.align 4
-.L4dec192:
-	movaps -0x40(TKEYP), KEY
-	AESDEC KEY STATE1
-	AESDEC KEY STATE2
-	AESDEC KEY STATE3
-	AESDEC KEY STATE4
-	movaps -0x30(TKEYP), KEY
-	AESDEC KEY STATE1
-	AESDEC KEY STATE2
-	AESDEC KEY STATE3
-	AESDEC KEY STATE4
-.align 4
-.L4dec128:
-	movaps -0x20(TKEYP), KEY
-	AESDEC KEY STATE1
-	AESDEC KEY STATE2
-	AESDEC KEY STATE3
-	AESDEC KEY STATE4
-	movaps -0x10(TKEYP), KEY
-	AESDEC KEY STATE1
-	AESDEC KEY STATE2
-	AESDEC KEY STATE3
-	AESDEC KEY STATE4
-	movaps (TKEYP), KEY
-	AESDEC KEY STATE1
-	AESDEC KEY STATE2
-	AESDEC KEY STATE3
-	AESDEC KEY STATE4
-	movaps 0x10(TKEYP), KEY
-	AESDEC KEY STATE1
-	AESDEC KEY STATE2
-	AESDEC KEY STATE3
-	AESDEC KEY STATE4
-	movaps 0x20(TKEYP), KEY
-	AESDEC KEY STATE1
-	AESDEC KEY STATE2
-	AESDEC KEY STATE3
-	AESDEC KEY STATE4
-	movaps 0x30(TKEYP), KEY
-	AESDEC KEY STATE1
-	AESDEC KEY STATE2
-	AESDEC KEY STATE3
-	AESDEC KEY STATE4
-	movaps 0x40(TKEYP), KEY
-	AESDEC KEY STATE1
-	AESDEC KEY STATE2
-	AESDEC KEY STATE3
-	AESDEC KEY STATE4
-	movaps 0x50(TKEYP), KEY
-	AESDEC KEY STATE1
-	AESDEC KEY STATE2
-	AESDEC KEY STATE3
-	AESDEC KEY STATE4
-	movaps 0x60(TKEYP), KEY
-	AESDEC KEY STATE1
-	AESDEC KEY STATE2
-	AESDEC KEY STATE3
-	AESDEC KEY STATE4
-	movaps 0x70(TKEYP), KEY
-	AESDECLAST KEY STATE1		# last round
-	AESDECLAST KEY STATE2
-	AESDECLAST KEY STATE3
-	AESDECLAST KEY STATE4
-	ret
-
-/*
- * void aesni_ecb_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
- *		      size_t len)
- */
-ENTRY(aesni_ecb_enc)
-#ifndef __x86_64__
-	pushl LEN
-	pushl KEYP
-	pushl KLEN
-	movl 16(%esp), KEYP
-	movl 20(%esp), OUTP
-	movl 24(%esp), INP
-	movl 28(%esp), LEN
-#endif
-	test LEN, LEN		# check length
-	jz .Lecb_enc_ret
-	mov 480(KEYP), KLEN
-	cmp $16, LEN
-	jb .Lecb_enc_ret
-	cmp $64, LEN
-	jb .Lecb_enc_loop1
-.align 4
-.Lecb_enc_loop4:
-	movups (INP), STATE1
-	movups 0x10(INP), STATE2
-	movups 0x20(INP), STATE3
-	movups 0x30(INP), STATE4
-	call _aesni_enc4
-	movups STATE1, (OUTP)
-	movups STATE2, 0x10(OUTP)
-	movups STATE3, 0x20(OUTP)
-	movups STATE4, 0x30(OUTP)
-	sub $64, LEN
-	add $64, INP
-	add $64, OUTP
-	cmp $64, LEN
-	jge .Lecb_enc_loop4
-	cmp $16, LEN
-	jb .Lecb_enc_ret
-.align 4
-.Lecb_enc_loop1:
-	movups (INP), STATE1
-	call _aesni_enc1
-	movups STATE1, (OUTP)
-	sub $16, LEN
-	add $16, INP
-	add $16, OUTP
-	cmp $16, LEN
-	jge .Lecb_enc_loop1
-.Lecb_enc_ret:
-#ifndef __x86_64__
-	popl KLEN
-	popl KEYP
-	popl LEN
-#endif
-	ret
-
-/*
- * void aesni_ecb_dec(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
- *		      size_t len);
- */
-ENTRY(aesni_ecb_dec)
-#ifndef __x86_64__
-	pushl LEN
-	pushl KEYP
-	pushl KLEN
-	movl 16(%esp), KEYP
-	movl 20(%esp), OUTP
-	movl 24(%esp), INP
-	movl 28(%esp), LEN
-#endif
-	test LEN, LEN
-	jz .Lecb_dec_ret
-	mov 480(KEYP), KLEN
-	add $240, KEYP
-	cmp $16, LEN
-	jb .Lecb_dec_ret
-	cmp $64, LEN
-	jb .Lecb_dec_loop1
-.align 4
-.Lecb_dec_loop4:
-	movups (INP), STATE1
-	movups 0x10(INP), STATE2
-	movups 0x20(INP), STATE3
-	movups 0x30(INP), STATE4
-	call _aesni_dec4
-	movups STATE1, (OUTP)
-	movups STATE2, 0x10(OUTP)
-	movups STATE3, 0x20(OUTP)
-	movups STATE4, 0x30(OUTP)
-	sub $64, LEN
-	add $64, INP
-	add $64, OUTP
-	cmp $64, LEN
-	jge .Lecb_dec_loop4
-	cmp $16, LEN
-	jb .Lecb_dec_ret
-.align 4
-.Lecb_dec_loop1:
-	movups (INP), STATE1
-	call _aesni_dec1
-	movups STATE1, (OUTP)
-	sub $16, LEN
-	add $16, INP
-	add $16, OUTP
-	cmp $16, LEN
-	jge .Lecb_dec_loop1
-.Lecb_dec_ret:
-#ifndef __x86_64__
-	popl KLEN
-	popl KEYP
-	popl LEN
-#endif
-	ret
-
-/*
- * void aesni_cbc_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
- *		      size_t len, u8 *iv)
- */
-ENTRY(aesni_cbc_enc)
-#ifndef __x86_64__
-	pushl IVP
-	pushl LEN
-	pushl KEYP
-	pushl KLEN
-	movl 20(%esp), KEYP
-	movl 24(%esp), OUTP
-	movl 28(%esp), INP
-	movl 32(%esp), LEN
-	movl 36(%esp), IVP
-#endif
-	cmp $16, LEN
-	jb .Lcbc_enc_ret
-	mov 480(KEYP), KLEN
-	movups (IVP), STATE	# load iv as initial state
-.align 4
-.Lcbc_enc_loop:
-	movups (INP), IN	# load input
-	pxor IN, STATE
-	call _aesni_enc1
-	movups STATE, (OUTP)	# store output
-	sub $16, LEN
-	add $16, INP
-	add $16, OUTP
-	cmp $16, LEN
-	jge .Lcbc_enc_loop
-	movups STATE, (IVP)
-.Lcbc_enc_ret:
-#ifndef __x86_64__
-	popl KLEN
-	popl KEYP
-	popl LEN
-	popl IVP
-#endif
-	ret
-
-/*
- * void aesni_cbc_dec(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
- *		      size_t len, u8 *iv)
- */
-ENTRY(aesni_cbc_dec)
-#ifndef __x86_64__
-	pushl IVP
-	pushl LEN
-	pushl KEYP
-	pushl KLEN
-	movl 20(%esp), KEYP
-	movl 24(%esp), OUTP
-	movl 28(%esp), INP
-	movl 32(%esp), LEN
-	movl 36(%esp), IVP
-#endif
-	cmp $16, LEN
-	jb .Lcbc_dec_just_ret
-	mov 480(KEYP), KLEN
-	add $240, KEYP
-	movups (IVP), IV
-	cmp $64, LEN
-	jb .Lcbc_dec_loop1
-.align 4
-.Lcbc_dec_loop4:
-	movups (INP), IN1
-	movaps IN1, STATE1
-	movups 0x10(INP), IN2
-	movaps IN2, STATE2
-#ifdef __x86_64__
-	movups 0x20(INP), IN3
-	movaps IN3, STATE3
-	movups 0x30(INP), IN4
-	movaps IN4, STATE4
-#else
-	movups 0x20(INP), IN1
-	movaps IN1, STATE3
-	movups 0x30(INP), IN2
-	movaps IN2, STATE4
-#endif
-	call _aesni_dec4
-	pxor IV, STATE1
-#ifdef __x86_64__
-	pxor IN1, STATE2
-	pxor IN2, STATE3
-	pxor IN3, STATE4
-	movaps IN4, IV
-#else
-	pxor IN1, STATE4
-	movaps IN2, IV
-	movups (INP), IN1
-	pxor IN1, STATE2
-	movups 0x10(INP), IN2
-	pxor IN2, STATE3
-#endif
-	movups STATE1, (OUTP)
-	movups STATE2, 0x10(OUTP)
-	movups STATE3, 0x20(OUTP)
-	movups STATE4, 0x30(OUTP)
-	sub $64, LEN
-	add $64, INP
-	add $64, OUTP
-	cmp $64, LEN
-	jge .Lcbc_dec_loop4
-	cmp $16, LEN
-	jb .Lcbc_dec_ret
-.align 4
-.Lcbc_dec_loop1:
-	movups (INP), IN
-	movaps IN, STATE
-	call _aesni_dec1
-	pxor IV, STATE
-	movups STATE, (OUTP)
-	movaps IN, IV
-	sub $16, LEN
-	add $16, INP
-	add $16, OUTP
-	cmp $16, LEN
-	jge .Lcbc_dec_loop1
-.Lcbc_dec_ret:
-	movups IV, (IVP)
-.Lcbc_dec_just_ret:
-#ifndef __x86_64__
-	popl KLEN
-	popl KEYP
-	popl LEN
-	popl IVP
-#endif
-	ret
-
-#ifdef __x86_64__
-.align 16
-.Lbswap_mask:
-	.byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
-
-/*
- * _aesni_inc_init:	internal ABI
- *	setup registers used by _aesni_inc
- * input:
- *	IV
- * output:
- *	CTR:	== IV, in little endian
- *	TCTR_LOW: == lower qword of CTR
- *	INC:	== 1, in little endian
- *	BSWAP_MASK == endian swapping mask
- */
-.align 4
-_aesni_inc_init:
-	movaps .Lbswap_mask, BSWAP_MASK
-	movaps IV, CTR
-	PSHUFB_XMM BSWAP_MASK CTR
-	mov $1, TCTR_LOW
-	MOVQ_R64_XMM TCTR_LOW INC
-	MOVQ_R64_XMM CTR TCTR_LOW
-	ret
-
-/*
- * _aesni_inc:		internal ABI
- *	Increase IV by 1, IV is in big endian
- * input:
- *	IV
- *	CTR:	== IV, in little endian
- *	TCTR_LOW: == lower qword of CTR
- *	INC:	== 1, in little endian
- *	BSWAP_MASK == endian swapping mask
- * output:
- *	IV:	Increase by 1
- * changed:
- *	CTR:	== output IV, in little endian
- *	TCTR_LOW: == lower qword of CTR
- */
-.align 4
-_aesni_inc:
-	paddq INC, CTR
-	add $1, TCTR_LOW
-	jnc .Linc_low
-	pslldq $8, INC
-	paddq INC, CTR
-	psrldq $8, INC
-.Linc_low:
-	movaps CTR, IV
-	PSHUFB_XMM BSWAP_MASK IV
-	ret
-
-/*
- * void aesni_ctr_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
- *		      size_t len, u8 *iv)
- */
-ENTRY(aesni_ctr_enc)
-	cmp $16, LEN
-	jb .Lctr_enc_just_ret
-	mov 480(KEYP), KLEN
-	movups (IVP), IV
-	call _aesni_inc_init
-	cmp $64, LEN
-	jb .Lctr_enc_loop1
-.align 4
-.Lctr_enc_loop4:
-	movaps IV, STATE1
-	call _aesni_inc
-	movups (INP), IN1
-	movaps IV, STATE2
-	call _aesni_inc
-	movups 0x10(INP), IN2
-	movaps IV, STATE3
-	call _aesni_inc
-	movups 0x20(INP), IN3
-	movaps IV, STATE4
-	call _aesni_inc
-	movups 0x30(INP), IN4
-	call _aesni_enc4
-	pxor IN1, STATE1
-	movups STATE1, (OUTP)
-	pxor IN2, STATE2
-	movups STATE2, 0x10(OUTP)
-	pxor IN3, STATE3
-	movups STATE3, 0x20(OUTP)
-	pxor IN4, STATE4
-	movups STATE4, 0x30(OUTP)
-	sub $64, LEN
-	add $64, INP
-	add $64, OUTP
-	cmp $64, LEN
-	jge .Lctr_enc_loop4
-	cmp $16, LEN
-	jb .Lctr_enc_ret
-.align 4
-.Lctr_enc_loop1:
-	movaps IV, STATE
-	call _aesni_inc
-	movups (INP), IN
-	call _aesni_enc1
-	pxor IN, STATE
-	movups STATE, (OUTP)
-	sub $16, LEN
-	add $16, INP
-	add $16, OUTP
-	cmp $16, LEN
-	jge .Lctr_enc_loop1
-.Lctr_enc_ret:
-	movups IV, (IVP)
-.Lctr_enc_just_ret:
-	ret
-#endif
diff --git a/ANDROID_3.4.5/arch/x86/crypto/aesni-intel_glue.c b/ANDROID_3.4.5/arch/x86/crypto/aesni-intel_glue.c
deleted file mode 100644
index c799352e..00000000
--- a/ANDROID_3.4.5/arch/x86/crypto/aesni-intel_glue.c
+++ /dev/null
@@ -1,1389 +0,0 @@
-/*
- * Support for Intel AES-NI instructions. This file contains glue
- * code, the real AES implementation is in intel-aes_asm.S.
- *
- * Copyright (C) 2008, Intel Corp.
- *    Author: Huang Ying <ying.huang@intel.com>
- *
- * Added RFC4106 AES-GCM support for 128-bit keys under the AEAD
- * interface for 64-bit kernels.
- *    Authors: Adrian Hoban <adrian.hoban@intel.com>
- *             Gabriele Paoloni <gabriele.paoloni@intel.com>
- *             Tadeusz Struk (tadeusz.struk@intel.com)
- *             Aidan O'Mahony (aidan.o.mahony@intel.com)
- *    Copyright (c) 2010, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- */
-
-#include <linux/hardirq.h>
-#include <linux/types.h>
-#include <linux/crypto.h>
-#include <linux/module.h>
-#include <linux/err.h>
-#include <crypto/algapi.h>
-#include <crypto/aes.h>
-#include <crypto/cryptd.h>
-#include <crypto/ctr.h>
-#include <asm/cpu_device_id.h>
-#include <asm/i387.h>
-#include <asm/aes.h>
-#include <crypto/scatterwalk.h>
-#include <crypto/internal/aead.h>
-#include <linux/workqueue.h>
-#include <linux/spinlock.h>
-
-#if defined(CONFIG_CRYPTO_CTR) || defined(CONFIG_CRYPTO_CTR_MODULE)
-#define HAS_CTR
-#endif
-
-#if defined(CONFIG_CRYPTO_LRW) || defined(CONFIG_CRYPTO_LRW_MODULE)
-#define HAS_LRW
-#endif
-
-#if defined(CONFIG_CRYPTO_PCBC) || defined(CONFIG_CRYPTO_PCBC_MODULE)
-#define HAS_PCBC
-#endif
-
-#if defined(CONFIG_CRYPTO_XTS) || defined(CONFIG_CRYPTO_XTS_MODULE)
-#define HAS_XTS
-#endif
-
-struct async_aes_ctx {
-	struct cryptd_ablkcipher *cryptd_tfm;
-};
-
-/* This data is stored at the end of the crypto_tfm struct.
- * It's a type of per "session" data storage location.
- * This needs to be 16 byte aligned.
- */
-struct aesni_rfc4106_gcm_ctx {
-	u8 hash_subkey[16];
-	struct crypto_aes_ctx aes_key_expanded;
-	u8 nonce[4];
-	struct cryptd_aead *cryptd_tfm;
-};
-
-struct aesni_gcm_set_hash_subkey_result {
-	int err;
-	struct completion completion;
-};
-
-struct aesni_hash_subkey_req_data {
-	u8 iv[16];
-	struct aesni_gcm_set_hash_subkey_result result;
-	struct scatterlist sg;
-};
-
-#define AESNI_ALIGN	(16)
-#define AES_BLOCK_MASK	(~(AES_BLOCK_SIZE-1))
-#define RFC4106_HASH_SUBKEY_SIZE 16
-
-asmlinkage int aesni_set_key(struct crypto_aes_ctx *ctx, const u8 *in_key,
-			     unsigned int key_len);
-asmlinkage void aesni_enc(struct crypto_aes_ctx *ctx, u8 *out,
-			  const u8 *in);
-asmlinkage void aesni_dec(struct crypto_aes_ctx *ctx, u8 *out,
-			  const u8 *in);
-asmlinkage void aesni_ecb_enc(struct crypto_aes_ctx *ctx, u8 *out,
-			      const u8 *in, unsigned int len);
-asmlinkage void aesni_ecb_dec(struct crypto_aes_ctx *ctx, u8 *out,
-			      const u8 *in, unsigned int len);
-asmlinkage void aesni_cbc_enc(struct crypto_aes_ctx *ctx, u8 *out,
-			      const u8 *in, unsigned int len, u8 *iv);
-asmlinkage void aesni_cbc_dec(struct crypto_aes_ctx *ctx, u8 *out,
-			      const u8 *in, unsigned int len, u8 *iv);
-
-int crypto_fpu_init(void);
-void crypto_fpu_exit(void);
-
-#ifdef CONFIG_X86_64
-asmlinkage void aesni_ctr_enc(struct crypto_aes_ctx *ctx, u8 *out,
-			      const u8 *in, unsigned int len, u8 *iv);
-
-/* asmlinkage void aesni_gcm_enc()
- * void *ctx,  AES Key schedule. Starts on a 16 byte boundary.
- * u8 *out, Ciphertext output. Encrypt in-place is allowed.
- * const u8 *in, Plaintext input
- * unsigned long plaintext_len, Length of data in bytes for encryption.
- * u8 *iv, Pre-counter block j0: 4 byte salt (from Security Association)
- *         concatenated with 8 byte Initialisation Vector (from IPSec ESP
- *         Payload) concatenated with 0x00000001. 16-byte aligned pointer.
- * u8 *hash_subkey, the Hash sub key input. Data starts on a 16-byte boundary.
- * const u8 *aad, Additional Authentication Data (AAD)
- * unsigned long aad_len, Length of AAD in bytes. With RFC4106 this
- *          is going to be 8 or 12 bytes
- * u8 *auth_tag, Authenticated Tag output.
- * unsigned long auth_tag_len), Authenticated Tag Length in bytes.
- *          Valid values are 16 (most likely), 12 or 8.
- */
-asmlinkage void aesni_gcm_enc(void *ctx, u8 *out,
-			const u8 *in, unsigned long plaintext_len, u8 *iv,
-			u8 *hash_subkey, const u8 *aad, unsigned long aad_len,
-			u8 *auth_tag, unsigned long auth_tag_len);
-
-/* asmlinkage void aesni_gcm_dec()
- * void *ctx, AES Key schedule. Starts on a 16 byte boundary.
- * u8 *out, Plaintext output. Decrypt in-place is allowed.
- * const u8 *in, Ciphertext input
- * unsigned long ciphertext_len, Length of data in bytes for decryption.
- * u8 *iv, Pre-counter block j0: 4 byte salt (from Security Association)
- *         concatenated with 8 byte Initialisation Vector (from IPSec ESP
- *         Payload) concatenated with 0x00000001. 16-byte aligned pointer.
- * u8 *hash_subkey, the Hash sub key input. Data starts on a 16-byte boundary.
- * const u8 *aad, Additional Authentication Data (AAD)
- * unsigned long aad_len, Length of AAD in bytes. With RFC4106 this is going
- * to be 8 or 12 bytes
- * u8 *auth_tag, Authenticated Tag output.
- * unsigned long auth_tag_len) Authenticated Tag Length in bytes.
- * Valid values are 16 (most likely), 12 or 8.
- */
-asmlinkage void aesni_gcm_dec(void *ctx, u8 *out,
-			const u8 *in, unsigned long ciphertext_len, u8 *iv,
-			u8 *hash_subkey, const u8 *aad, unsigned long aad_len,
-			u8 *auth_tag, unsigned long auth_tag_len);
-
-static inline struct
-aesni_rfc4106_gcm_ctx *aesni_rfc4106_gcm_ctx_get(struct crypto_aead *tfm)
-{
-	return
-		(struct aesni_rfc4106_gcm_ctx *)
-		PTR_ALIGN((u8 *)
-		crypto_tfm_ctx(crypto_aead_tfm(tfm)), AESNI_ALIGN);
-}
-#endif
-
-static inline struct crypto_aes_ctx *aes_ctx(void *raw_ctx)
-{
-	unsigned long addr = (unsigned long)raw_ctx;
-	unsigned long align = AESNI_ALIGN;
-
-	if (align <= crypto_tfm_ctx_alignment())
-		align = 1;
-	return (struct crypto_aes_ctx *)ALIGN(addr, align);
-}
-
-static int aes_set_key_common(struct crypto_tfm *tfm, void *raw_ctx,
-			      const u8 *in_key, unsigned int key_len)
-{
-	struct crypto_aes_ctx *ctx = aes_ctx(raw_ctx);
-	u32 *flags = &tfm->crt_flags;
-	int err;
-
-	if (key_len != AES_KEYSIZE_128 && key_len != AES_KEYSIZE_192 &&
-	    key_len != AES_KEYSIZE_256) {
-		*flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
-		return -EINVAL;
-	}
-
-	if (!irq_fpu_usable())
-		err = crypto_aes_expand_key(ctx, in_key, key_len);
-	else {
-		kernel_fpu_begin();
-		err = aesni_set_key(ctx, in_key, key_len);
-		kernel_fpu_end();
-	}
-
-	return err;
-}
-
-static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
-		       unsigned int key_len)
-{
-	return aes_set_key_common(tfm, crypto_tfm_ctx(tfm), in_key, key_len);
-}
-
-static void aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
-{
-	struct crypto_aes_ctx *ctx = aes_ctx(crypto_tfm_ctx(tfm));
-
-	if (!irq_fpu_usable())
-		crypto_aes_encrypt_x86(ctx, dst, src);
-	else {
-		kernel_fpu_begin();
-		aesni_enc(ctx, dst, src);
-		kernel_fpu_end();
-	}
-}
-
-static void aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
-{
-	struct crypto_aes_ctx *ctx = aes_ctx(crypto_tfm_ctx(tfm));
-
-	if (!irq_fpu_usable())
-		crypto_aes_decrypt_x86(ctx, dst, src);
-	else {
-		kernel_fpu_begin();
-		aesni_dec(ctx, dst, src);
-		kernel_fpu_end();
-	}
-}
-
-static struct crypto_alg aesni_alg = {
-	.cra_name		= "aes",
-	.cra_driver_name	= "aes-aesni",
-	.cra_priority		= 300,
-	.cra_flags		= CRYPTO_ALG_TYPE_CIPHER,
-	.cra_blocksize		= AES_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct crypto_aes_ctx)+AESNI_ALIGN-1,
-	.cra_alignmask		= 0,
-	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(aesni_alg.cra_list),
-	.cra_u	= {
-		.cipher	= {
-			.cia_min_keysize	= AES_MIN_KEY_SIZE,
-			.cia_max_keysize	= AES_MAX_KEY_SIZE,
-			.cia_setkey		= aes_set_key,
-			.cia_encrypt		= aes_encrypt,
-			.cia_decrypt		= aes_decrypt
-		}
-	}
-};
-
-static void __aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
-{
-	struct crypto_aes_ctx *ctx = aes_ctx(crypto_tfm_ctx(tfm));
-
-	aesni_enc(ctx, dst, src);
-}
-
-static void __aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
-{
-	struct crypto_aes_ctx *ctx = aes_ctx(crypto_tfm_ctx(tfm));
-
-	aesni_dec(ctx, dst, src);
-}
-
-static struct crypto_alg __aesni_alg = {
-	.cra_name		= "__aes-aesni",
-	.cra_driver_name	= "__driver-aes-aesni",
-	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_CIPHER,
-	.cra_blocksize		= AES_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct crypto_aes_ctx)+AESNI_ALIGN-1,
-	.cra_alignmask		= 0,
-	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(__aesni_alg.cra_list),
-	.cra_u	= {
-		.cipher	= {
-			.cia_min_keysize	= AES_MIN_KEY_SIZE,
-			.cia_max_keysize	= AES_MAX_KEY_SIZE,
-			.cia_setkey		= aes_set_key,
-			.cia_encrypt		= __aes_encrypt,
-			.cia_decrypt		= __aes_decrypt
-		}
-	}
-};
-
-static int ecb_encrypt(struct blkcipher_desc *desc,
-		       struct scatterlist *dst, struct scatterlist *src,
-		       unsigned int nbytes)
-{
-	struct crypto_aes_ctx *ctx = aes_ctx(crypto_blkcipher_ctx(desc->tfm));
-	struct blkcipher_walk walk;
-	int err;
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt(desc, &walk);
-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
-
-	kernel_fpu_begin();
-	while ((nbytes = walk.nbytes)) {
-		aesni_ecb_enc(ctx, walk.dst.virt.addr, walk.src.virt.addr,
-			      nbytes & AES_BLOCK_MASK);
-		nbytes &= AES_BLOCK_SIZE - 1;
-		err = blkcipher_walk_done(desc, &walk, nbytes);
-	}
-	kernel_fpu_end();
-
-	return err;
-}
-
-static int ecb_decrypt(struct blkcipher_desc *desc,
-		       struct scatterlist *dst, struct scatterlist *src,
-		       unsigned int nbytes)
-{
-	struct crypto_aes_ctx *ctx = aes_ctx(crypto_blkcipher_ctx(desc->tfm));
-	struct blkcipher_walk walk;
-	int err;
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt(desc, &walk);
-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
-
-	kernel_fpu_begin();
-	while ((nbytes = walk.nbytes)) {
-		aesni_ecb_dec(ctx, walk.dst.virt.addr, walk.src.virt.addr,
-			      nbytes & AES_BLOCK_MASK);
-		nbytes &= AES_BLOCK_SIZE - 1;
-		err = blkcipher_walk_done(desc, &walk, nbytes);
-	}
-	kernel_fpu_end();
-
-	return err;
-}
-
-static struct crypto_alg blk_ecb_alg = {
-	.cra_name		= "__ecb-aes-aesni",
-	.cra_driver_name	= "__driver-ecb-aes-aesni",
-	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize		= AES_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct crypto_aes_ctx)+AESNI_ALIGN-1,
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(blk_ecb_alg.cra_list),
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= AES_MIN_KEY_SIZE,
-			.max_keysize	= AES_MAX_KEY_SIZE,
-			.setkey		= aes_set_key,
-			.encrypt	= ecb_encrypt,
-			.decrypt	= ecb_decrypt,
-		},
-	},
-};
-
-static int cbc_encrypt(struct blkcipher_desc *desc,
-		       struct scatterlist *dst, struct scatterlist *src,
-		       unsigned int nbytes)
-{
-	struct crypto_aes_ctx *ctx = aes_ctx(crypto_blkcipher_ctx(desc->tfm));
-	struct blkcipher_walk walk;
-	int err;
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt(desc, &walk);
-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
-
-	kernel_fpu_begin();
-	while ((nbytes = walk.nbytes)) {
-		aesni_cbc_enc(ctx, walk.dst.virt.addr, walk.src.virt.addr,
-			      nbytes & AES_BLOCK_MASK, walk.iv);
-		nbytes &= AES_BLOCK_SIZE - 1;
-		err = blkcipher_walk_done(desc, &walk, nbytes);
-	}
-	kernel_fpu_end();
-
-	return err;
-}
-
-static int cbc_decrypt(struct blkcipher_desc *desc,
-		       struct scatterlist *dst, struct scatterlist *src,
-		       unsigned int nbytes)
-{
-	struct crypto_aes_ctx *ctx = aes_ctx(crypto_blkcipher_ctx(desc->tfm));
-	struct blkcipher_walk walk;
-	int err;
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt(desc, &walk);
-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
-
-	kernel_fpu_begin();
-	while ((nbytes = walk.nbytes)) {
-		aesni_cbc_dec(ctx, walk.dst.virt.addr, walk.src.virt.addr,
-			      nbytes & AES_BLOCK_MASK, walk.iv);
-		nbytes &= AES_BLOCK_SIZE - 1;
-		err = blkcipher_walk_done(desc, &walk, nbytes);
-	}
-	kernel_fpu_end();
-
-	return err;
-}
-
-static struct crypto_alg blk_cbc_alg = {
-	.cra_name		= "__cbc-aes-aesni",
-	.cra_driver_name	= "__driver-cbc-aes-aesni",
-	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize		= AES_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct crypto_aes_ctx)+AESNI_ALIGN-1,
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(blk_cbc_alg.cra_list),
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= AES_MIN_KEY_SIZE,
-			.max_keysize	= AES_MAX_KEY_SIZE,
-			.setkey		= aes_set_key,
-			.encrypt	= cbc_encrypt,
-			.decrypt	= cbc_decrypt,
-		},
-	},
-};
-
-#ifdef CONFIG_X86_64
-static void ctr_crypt_final(struct crypto_aes_ctx *ctx,
-			    struct blkcipher_walk *walk)
-{
-	u8 *ctrblk = walk->iv;
-	u8 keystream[AES_BLOCK_SIZE];
-	u8 *src = walk->src.virt.addr;
-	u8 *dst = walk->dst.virt.addr;
-	unsigned int nbytes = walk->nbytes;
-
-	aesni_enc(ctx, keystream, ctrblk);
-	crypto_xor(keystream, src, nbytes);
-	memcpy(dst, keystream, nbytes);
-	crypto_inc(ctrblk, AES_BLOCK_SIZE);
-}
-
-static int ctr_crypt(struct blkcipher_desc *desc,
-		     struct scatterlist *dst, struct scatterlist *src,
-		     unsigned int nbytes)
-{
-	struct crypto_aes_ctx *ctx = aes_ctx(crypto_blkcipher_ctx(desc->tfm));
-	struct blkcipher_walk walk;
-	int err;
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt_block(desc, &walk, AES_BLOCK_SIZE);
-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
-
-	kernel_fpu_begin();
-	while ((nbytes = walk.nbytes) >= AES_BLOCK_SIZE) {
-		aesni_ctr_enc(ctx, walk.dst.virt.addr, walk.src.virt.addr,
-			      nbytes & AES_BLOCK_MASK, walk.iv);
-		nbytes &= AES_BLOCK_SIZE - 1;
-		err = blkcipher_walk_done(desc, &walk, nbytes);
-	}
-	if (walk.nbytes) {
-		ctr_crypt_final(ctx, &walk);
-		err = blkcipher_walk_done(desc, &walk, 0);
-	}
-	kernel_fpu_end();
-
-	return err;
-}
-
-static struct crypto_alg blk_ctr_alg = {
-	.cra_name		= "__ctr-aes-aesni",
-	.cra_driver_name	= "__driver-ctr-aes-aesni",
-	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize		= 1,
-	.cra_ctxsize		= sizeof(struct crypto_aes_ctx)+AESNI_ALIGN-1,
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(blk_ctr_alg.cra_list),
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= AES_MIN_KEY_SIZE,
-			.max_keysize	= AES_MAX_KEY_SIZE,
-			.ivsize		= AES_BLOCK_SIZE,
-			.setkey		= aes_set_key,
-			.encrypt	= ctr_crypt,
-			.decrypt	= ctr_crypt,
-		},
-	},
-};
-#endif
-
-static int ablk_set_key(struct crypto_ablkcipher *tfm, const u8 *key,
-			unsigned int key_len)
-{
-	struct async_aes_ctx *ctx = crypto_ablkcipher_ctx(tfm);
-	struct crypto_ablkcipher *child = &ctx->cryptd_tfm->base;
-	int err;
-
-	crypto_ablkcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK);
-	crypto_ablkcipher_set_flags(child, crypto_ablkcipher_get_flags(tfm)
-				    & CRYPTO_TFM_REQ_MASK);
-	err = crypto_ablkcipher_setkey(child, key, key_len);
-	crypto_ablkcipher_set_flags(tfm, crypto_ablkcipher_get_flags(child)
-				    & CRYPTO_TFM_RES_MASK);
-	return err;
-}
-
-static int ablk_encrypt(struct ablkcipher_request *req)
-{
-	struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
-	struct async_aes_ctx *ctx = crypto_ablkcipher_ctx(tfm);
-
-	if (!irq_fpu_usable()) {
-		struct ablkcipher_request *cryptd_req =
-			ablkcipher_request_ctx(req);
-		memcpy(cryptd_req, req, sizeof(*req));
-		ablkcipher_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base);
-		return crypto_ablkcipher_encrypt(cryptd_req);
-	} else {
-		struct blkcipher_desc desc;
-		desc.tfm = cryptd_ablkcipher_child(ctx->cryptd_tfm);
-		desc.info = req->info;
-		desc.flags = 0;
-		return crypto_blkcipher_crt(desc.tfm)->encrypt(
-			&desc, req->dst, req->src, req->nbytes);
-	}
-}
-
-static int ablk_decrypt(struct ablkcipher_request *req)
-{
-	struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
-	struct async_aes_ctx *ctx = crypto_ablkcipher_ctx(tfm);
-
-	if (!irq_fpu_usable()) {
-		struct ablkcipher_request *cryptd_req =
-			ablkcipher_request_ctx(req);
-		memcpy(cryptd_req, req, sizeof(*req));
-		ablkcipher_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base);
-		return crypto_ablkcipher_decrypt(cryptd_req);
-	} else {
-		struct blkcipher_desc desc;
-		desc.tfm = cryptd_ablkcipher_child(ctx->cryptd_tfm);
-		desc.info = req->info;
-		desc.flags = 0;
-		return crypto_blkcipher_crt(desc.tfm)->decrypt(
-			&desc, req->dst, req->src, req->nbytes);
-	}
-}
-
-static void ablk_exit(struct crypto_tfm *tfm)
-{
-	struct async_aes_ctx *ctx = crypto_tfm_ctx(tfm);
-
-	cryptd_free_ablkcipher(ctx->cryptd_tfm);
-}
-
-static void ablk_init_common(struct crypto_tfm *tfm,
-			     struct cryptd_ablkcipher *cryptd_tfm)
-{
-	struct async_aes_ctx *ctx = crypto_tfm_ctx(tfm);
-
-	ctx->cryptd_tfm = cryptd_tfm;
-	tfm->crt_ablkcipher.reqsize = sizeof(struct ablkcipher_request) +
-		crypto_ablkcipher_reqsize(&cryptd_tfm->base);
-}
-
-static int ablk_ecb_init(struct crypto_tfm *tfm)
-{
-	struct cryptd_ablkcipher *cryptd_tfm;
-
-	cryptd_tfm = cryptd_alloc_ablkcipher("__driver-ecb-aes-aesni", 0, 0);
-	if (IS_ERR(cryptd_tfm))
-		return PTR_ERR(cryptd_tfm);
-	ablk_init_common(tfm, cryptd_tfm);
-	return 0;
-}
-
-static struct crypto_alg ablk_ecb_alg = {
-	.cra_name		= "ecb(aes)",
-	.cra_driver_name	= "ecb-aes-aesni",
-	.cra_priority		= 400,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= AES_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct async_aes_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(ablk_ecb_alg.cra_list),
-	.cra_init		= ablk_ecb_init,
-	.cra_exit		= ablk_exit,
-	.cra_u = {
-		.ablkcipher = {
-			.min_keysize	= AES_MIN_KEY_SIZE,
-			.max_keysize	= AES_MAX_KEY_SIZE,
-			.setkey		= ablk_set_key,
-			.encrypt	= ablk_encrypt,
-			.decrypt	= ablk_decrypt,
-		},
-	},
-};
-
-static int ablk_cbc_init(struct crypto_tfm *tfm)
-{
-	struct cryptd_ablkcipher *cryptd_tfm;
-
-	cryptd_tfm = cryptd_alloc_ablkcipher("__driver-cbc-aes-aesni", 0, 0);
-	if (IS_ERR(cryptd_tfm))
-		return PTR_ERR(cryptd_tfm);
-	ablk_init_common(tfm, cryptd_tfm);
-	return 0;
-}
-
-static struct crypto_alg ablk_cbc_alg = {
-	.cra_name		= "cbc(aes)",
-	.cra_driver_name	= "cbc-aes-aesni",
-	.cra_priority		= 400,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= AES_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct async_aes_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(ablk_cbc_alg.cra_list),
-	.cra_init		= ablk_cbc_init,
-	.cra_exit		= ablk_exit,
-	.cra_u = {
-		.ablkcipher = {
-			.min_keysize	= AES_MIN_KEY_SIZE,
-			.max_keysize	= AES_MAX_KEY_SIZE,
-			.ivsize		= AES_BLOCK_SIZE,
-			.setkey		= ablk_set_key,
-			.encrypt	= ablk_encrypt,
-			.decrypt	= ablk_decrypt,
-		},
-	},
-};
-
-#ifdef CONFIG_X86_64
-static int ablk_ctr_init(struct crypto_tfm *tfm)
-{
-	struct cryptd_ablkcipher *cryptd_tfm;
-
-	cryptd_tfm = cryptd_alloc_ablkcipher("__driver-ctr-aes-aesni", 0, 0);
-	if (IS_ERR(cryptd_tfm))
-		return PTR_ERR(cryptd_tfm);
-	ablk_init_common(tfm, cryptd_tfm);
-	return 0;
-}
-
-static struct crypto_alg ablk_ctr_alg = {
-	.cra_name		= "ctr(aes)",
-	.cra_driver_name	= "ctr-aes-aesni",
-	.cra_priority		= 400,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= 1,
-	.cra_ctxsize		= sizeof(struct async_aes_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(ablk_ctr_alg.cra_list),
-	.cra_init		= ablk_ctr_init,
-	.cra_exit		= ablk_exit,
-	.cra_u = {
-		.ablkcipher = {
-			.min_keysize	= AES_MIN_KEY_SIZE,
-			.max_keysize	= AES_MAX_KEY_SIZE,
-			.ivsize		= AES_BLOCK_SIZE,
-			.setkey		= ablk_set_key,
-			.encrypt	= ablk_encrypt,
-			.decrypt	= ablk_encrypt,
-			.geniv		= "chainiv",
-		},
-	},
-};
-
-#ifdef HAS_CTR
-static int ablk_rfc3686_ctr_init(struct crypto_tfm *tfm)
-{
-	struct cryptd_ablkcipher *cryptd_tfm;
-
-	cryptd_tfm = cryptd_alloc_ablkcipher(
-		"rfc3686(__driver-ctr-aes-aesni)", 0, 0);
-	if (IS_ERR(cryptd_tfm))
-		return PTR_ERR(cryptd_tfm);
-	ablk_init_common(tfm, cryptd_tfm);
-	return 0;
-}
-
-static struct crypto_alg ablk_rfc3686_ctr_alg = {
-	.cra_name		= "rfc3686(ctr(aes))",
-	.cra_driver_name	= "rfc3686-ctr-aes-aesni",
-	.cra_priority		= 400,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= 1,
-	.cra_ctxsize		= sizeof(struct async_aes_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(ablk_rfc3686_ctr_alg.cra_list),
-	.cra_init		= ablk_rfc3686_ctr_init,
-	.cra_exit		= ablk_exit,
-	.cra_u = {
-		.ablkcipher = {
-			.min_keysize = AES_MIN_KEY_SIZE+CTR_RFC3686_NONCE_SIZE,
-			.max_keysize = AES_MAX_KEY_SIZE+CTR_RFC3686_NONCE_SIZE,
-			.ivsize	     = CTR_RFC3686_IV_SIZE,
-			.setkey	     = ablk_set_key,
-			.encrypt     = ablk_encrypt,
-			.decrypt     = ablk_decrypt,
-			.geniv	     = "seqiv",
-		},
-	},
-};
-#endif
-#endif
-
-#ifdef HAS_LRW
-static int ablk_lrw_init(struct crypto_tfm *tfm)
-{
-	struct cryptd_ablkcipher *cryptd_tfm;
-
-	cryptd_tfm = cryptd_alloc_ablkcipher("fpu(lrw(__driver-aes-aesni))",
-					     0, 0);
-	if (IS_ERR(cryptd_tfm))
-		return PTR_ERR(cryptd_tfm);
-	ablk_init_common(tfm, cryptd_tfm);
-	return 0;
-}
-
-static struct crypto_alg ablk_lrw_alg = {
-	.cra_name		= "lrw(aes)",
-	.cra_driver_name	= "lrw-aes-aesni",
-	.cra_priority		= 400,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= AES_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct async_aes_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(ablk_lrw_alg.cra_list),
-	.cra_init		= ablk_lrw_init,
-	.cra_exit		= ablk_exit,
-	.cra_u = {
-		.ablkcipher = {
-			.min_keysize	= AES_MIN_KEY_SIZE + AES_BLOCK_SIZE,
-			.max_keysize	= AES_MAX_KEY_SIZE + AES_BLOCK_SIZE,
-			.ivsize		= AES_BLOCK_SIZE,
-			.setkey		= ablk_set_key,
-			.encrypt	= ablk_encrypt,
-			.decrypt	= ablk_decrypt,
-		},
-	},
-};
-#endif
-
-#ifdef HAS_PCBC
-static int ablk_pcbc_init(struct crypto_tfm *tfm)
-{
-	struct cryptd_ablkcipher *cryptd_tfm;
-
-	cryptd_tfm = cryptd_alloc_ablkcipher("fpu(pcbc(__driver-aes-aesni))",
-					     0, 0);
-	if (IS_ERR(cryptd_tfm))
-		return PTR_ERR(cryptd_tfm);
-	ablk_init_common(tfm, cryptd_tfm);
-	return 0;
-}
-
-static struct crypto_alg ablk_pcbc_alg = {
-	.cra_name		= "pcbc(aes)",
-	.cra_driver_name	= "pcbc-aes-aesni",
-	.cra_priority		= 400,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= AES_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct async_aes_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(ablk_pcbc_alg.cra_list),
-	.cra_init		= ablk_pcbc_init,
-	.cra_exit		= ablk_exit,
-	.cra_u = {
-		.ablkcipher = {
-			.min_keysize	= AES_MIN_KEY_SIZE,
-			.max_keysize	= AES_MAX_KEY_SIZE,
-			.ivsize		= AES_BLOCK_SIZE,
-			.setkey		= ablk_set_key,
-			.encrypt	= ablk_encrypt,
-			.decrypt	= ablk_decrypt,
-		},
-	},
-};
-#endif
-
-#ifdef HAS_XTS
-static int ablk_xts_init(struct crypto_tfm *tfm)
-{
-	struct cryptd_ablkcipher *cryptd_tfm;
-
-	cryptd_tfm = cryptd_alloc_ablkcipher("fpu(xts(__driver-aes-aesni))",
-					     0, 0);
-	if (IS_ERR(cryptd_tfm))
-		return PTR_ERR(cryptd_tfm);
-	ablk_init_common(tfm, cryptd_tfm);
-	return 0;
-}
-
-static struct crypto_alg ablk_xts_alg = {
-	.cra_name		= "xts(aes)",
-	.cra_driver_name	= "xts-aes-aesni",
-	.cra_priority		= 400,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= AES_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct async_aes_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(ablk_xts_alg.cra_list),
-	.cra_init		= ablk_xts_init,
-	.cra_exit		= ablk_exit,
-	.cra_u = {
-		.ablkcipher = {
-			.min_keysize	= 2 * AES_MIN_KEY_SIZE,
-			.max_keysize	= 2 * AES_MAX_KEY_SIZE,
-			.ivsize		= AES_BLOCK_SIZE,
-			.setkey		= ablk_set_key,
-			.encrypt	= ablk_encrypt,
-			.decrypt	= ablk_decrypt,
-		},
-	},
-};
-#endif
-
-#ifdef CONFIG_X86_64
-static int rfc4106_init(struct crypto_tfm *tfm)
-{
-	struct cryptd_aead *cryptd_tfm;
-	struct aesni_rfc4106_gcm_ctx *ctx = (struct aesni_rfc4106_gcm_ctx *)
-		PTR_ALIGN((u8 *)crypto_tfm_ctx(tfm), AESNI_ALIGN);
-	struct crypto_aead *cryptd_child;
-	struct aesni_rfc4106_gcm_ctx *child_ctx;
-	cryptd_tfm = cryptd_alloc_aead("__driver-gcm-aes-aesni", 0, 0);
-	if (IS_ERR(cryptd_tfm))
-		return PTR_ERR(cryptd_tfm);
-
-	cryptd_child = cryptd_aead_child(cryptd_tfm);
-	child_ctx = aesni_rfc4106_gcm_ctx_get(cryptd_child);
-	memcpy(child_ctx, ctx, sizeof(*ctx));
-	ctx->cryptd_tfm = cryptd_tfm;
-	tfm->crt_aead.reqsize = sizeof(struct aead_request)
-		+ crypto_aead_reqsize(&cryptd_tfm->base);
-	return 0;
-}
-
-static void rfc4106_exit(struct crypto_tfm *tfm)
-{
-	struct aesni_rfc4106_gcm_ctx *ctx =
-		(struct aesni_rfc4106_gcm_ctx *)
-		PTR_ALIGN((u8 *)crypto_tfm_ctx(tfm), AESNI_ALIGN);
-	if (!IS_ERR(ctx->cryptd_tfm))
-		cryptd_free_aead(ctx->cryptd_tfm);
-	return;
-}
-
-static void
-rfc4106_set_hash_subkey_done(struct crypto_async_request *req, int err)
-{
-	struct aesni_gcm_set_hash_subkey_result *result = req->data;
-
-	if (err == -EINPROGRESS)
-		return;
-	result->err = err;
-	complete(&result->completion);
-}
-
-static int
-rfc4106_set_hash_subkey(u8 *hash_subkey, const u8 *key, unsigned int key_len)
-{
-	struct crypto_ablkcipher *ctr_tfm;
-	struct ablkcipher_request *req;
-	int ret = -EINVAL;
-	struct aesni_hash_subkey_req_data *req_data;
-
-	ctr_tfm = crypto_alloc_ablkcipher("ctr(aes)", 0, 0);
-	if (IS_ERR(ctr_tfm))
-		return PTR_ERR(ctr_tfm);
-
-	crypto_ablkcipher_clear_flags(ctr_tfm, ~0);
-
-	ret = crypto_ablkcipher_setkey(ctr_tfm, key, key_len);
-	if (ret)
-		goto out_free_ablkcipher;
-
-	ret = -ENOMEM;
-	req = ablkcipher_request_alloc(ctr_tfm, GFP_KERNEL);
-	if (!req)
-		goto out_free_ablkcipher;
-
-	req_data = kmalloc(sizeof(*req_data), GFP_KERNEL);
-	if (!req_data)
-		goto out_free_request;
-
-	memset(req_data->iv, 0, sizeof(req_data->iv));
-
-	/* Clear the data in the hash sub key container to zero.*/
-	/* We want to cipher all zeros to create the hash sub key. */
-	memset(hash_subkey, 0, RFC4106_HASH_SUBKEY_SIZE);
-
-	init_completion(&req_data->result.completion);
-	sg_init_one(&req_data->sg, hash_subkey, RFC4106_HASH_SUBKEY_SIZE);
-	ablkcipher_request_set_tfm(req, ctr_tfm);
-	ablkcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_SLEEP |
-					CRYPTO_TFM_REQ_MAY_BACKLOG,
-					rfc4106_set_hash_subkey_done,
-					&req_data->result);
-
-	ablkcipher_request_set_crypt(req, &req_data->sg,
-		&req_data->sg, RFC4106_HASH_SUBKEY_SIZE, req_data->iv);
-
-	ret = crypto_ablkcipher_encrypt(req);
-	if (ret == -EINPROGRESS || ret == -EBUSY) {
-		ret = wait_for_completion_interruptible
-			(&req_data->result.completion);
-		if (!ret)
-			ret = req_data->result.err;
-	}
-	kfree(req_data);
-out_free_request:
-	ablkcipher_request_free(req);
-out_free_ablkcipher:
-	crypto_free_ablkcipher(ctr_tfm);
-	return ret;
-}
-
-static int rfc4106_set_key(struct crypto_aead *parent, const u8 *key,
-						   unsigned int key_len)
-{
-	int ret = 0;
-	struct crypto_tfm *tfm = crypto_aead_tfm(parent);
-	struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(parent);
-	struct crypto_aead *cryptd_child = cryptd_aead_child(ctx->cryptd_tfm);
-	struct aesni_rfc4106_gcm_ctx *child_ctx =
-                                 aesni_rfc4106_gcm_ctx_get(cryptd_child);
-	u8 *new_key_mem = NULL;
-
-	if (key_len < 4) {
-		crypto_tfm_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
-		return -EINVAL;
-	}
-	/*Account for 4 byte nonce at the end.*/
-	key_len -= 4;
-	if (key_len != AES_KEYSIZE_128) {
-		crypto_tfm_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
-		return -EINVAL;
-	}
-
-	memcpy(ctx->nonce, key + key_len, sizeof(ctx->nonce));
-	/*This must be on a 16 byte boundary!*/
-	if ((unsigned long)(&(ctx->aes_key_expanded.key_enc[0])) % AESNI_ALIGN)
-		return -EINVAL;
-
-	if ((unsigned long)key % AESNI_ALIGN) {
-		/*key is not aligned: use an auxuliar aligned pointer*/
-		new_key_mem = kmalloc(key_len+AESNI_ALIGN, GFP_KERNEL);
-		if (!new_key_mem)
-			return -ENOMEM;
-
-		new_key_mem = PTR_ALIGN(new_key_mem, AESNI_ALIGN);
-		memcpy(new_key_mem, key, key_len);
-		key = new_key_mem;
-	}
-
-	if (!irq_fpu_usable())
-		ret = crypto_aes_expand_key(&(ctx->aes_key_expanded),
-		key, key_len);
-	else {
-		kernel_fpu_begin();
-		ret = aesni_set_key(&(ctx->aes_key_expanded), key, key_len);
-		kernel_fpu_end();
-	}
-	/*This must be on a 16 byte boundary!*/
-	if ((unsigned long)(&(ctx->hash_subkey[0])) % AESNI_ALIGN) {
-		ret = -EINVAL;
-		goto exit;
-	}
-	ret = rfc4106_set_hash_subkey(ctx->hash_subkey, key, key_len);
-	memcpy(child_ctx, ctx, sizeof(*ctx));
-exit:
-	kfree(new_key_mem);
-	return ret;
-}
-
-/* This is the Integrity Check Value (aka the authentication tag length and can
- * be 8, 12 or 16 bytes long. */
-static int rfc4106_set_authsize(struct crypto_aead *parent,
-				unsigned int authsize)
-{
-	struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(parent);
-	struct crypto_aead *cryptd_child = cryptd_aead_child(ctx->cryptd_tfm);
-
-	switch (authsize) {
-	case 8:
-	case 12:
-	case 16:
-		break;
-	default:
-		return -EINVAL;
-	}
-	crypto_aead_crt(parent)->authsize = authsize;
-	crypto_aead_crt(cryptd_child)->authsize = authsize;
-	return 0;
-}
-
-static int rfc4106_encrypt(struct aead_request *req)
-{
-	int ret;
-	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
-	struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm);
-
-	if (!irq_fpu_usable()) {
-		struct aead_request *cryptd_req =
-			(struct aead_request *) aead_request_ctx(req);
-		memcpy(cryptd_req, req, sizeof(*req));
-		aead_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base);
-		return crypto_aead_encrypt(cryptd_req);
-	} else {
-		struct crypto_aead *cryptd_child = cryptd_aead_child(ctx->cryptd_tfm);
-		kernel_fpu_begin();
-		ret = cryptd_child->base.crt_aead.encrypt(req);
-		kernel_fpu_end();
-		return ret;
-	}
-}
-
-static int rfc4106_decrypt(struct aead_request *req)
-{
-	int ret;
-	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
-	struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm);
-
-	if (!irq_fpu_usable()) {
-		struct aead_request *cryptd_req =
-			(struct aead_request *) aead_request_ctx(req);
-		memcpy(cryptd_req, req, sizeof(*req));
-		aead_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base);
-		return crypto_aead_decrypt(cryptd_req);
-	} else {
-		struct crypto_aead *cryptd_child = cryptd_aead_child(ctx->cryptd_tfm);
-		kernel_fpu_begin();
-		ret = cryptd_child->base.crt_aead.decrypt(req);
-		kernel_fpu_end();
-		return ret;
-	}
-}
-
-static struct crypto_alg rfc4106_alg = {
-	.cra_name = "rfc4106(gcm(aes))",
-	.cra_driver_name = "rfc4106-gcm-aesni",
-	.cra_priority = 400,
-	.cra_flags = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_ASYNC,
-	.cra_blocksize = 1,
-	.cra_ctxsize = sizeof(struct aesni_rfc4106_gcm_ctx) + AESNI_ALIGN,
-	.cra_alignmask = 0,
-	.cra_type = &crypto_nivaead_type,
-	.cra_module = THIS_MODULE,
-	.cra_list = LIST_HEAD_INIT(rfc4106_alg.cra_list),
-	.cra_init = rfc4106_init,
-	.cra_exit = rfc4106_exit,
-	.cra_u = {
-		.aead = {
-			.setkey = rfc4106_set_key,
-			.setauthsize = rfc4106_set_authsize,
-			.encrypt = rfc4106_encrypt,
-			.decrypt = rfc4106_decrypt,
-			.geniv = "seqiv",
-			.ivsize = 8,
-			.maxauthsize = 16,
-		},
-	},
-};
-
-static int __driver_rfc4106_encrypt(struct aead_request *req)
-{
-	u8 one_entry_in_sg = 0;
-	u8 *src, *dst, *assoc;
-	__be32 counter = cpu_to_be32(1);
-	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
-	struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm);
-	void *aes_ctx = &(ctx->aes_key_expanded);
-	unsigned long auth_tag_len = crypto_aead_authsize(tfm);
-	u8 iv_tab[16+AESNI_ALIGN];
-	u8* iv = (u8 *) PTR_ALIGN((u8 *)iv_tab, AESNI_ALIGN);
-	struct scatter_walk src_sg_walk;
-	struct scatter_walk assoc_sg_walk;
-	struct scatter_walk dst_sg_walk;
-	unsigned int i;
-
-	/* Assuming we are supporting rfc4106 64-bit extended */
-	/* sequence numbers We need to have the AAD length equal */
-	/* to 8 or 12 bytes */
-	if (unlikely(req->assoclen != 8 && req->assoclen != 12))
-		return -EINVAL;
-	/* IV below built */
-	for (i = 0; i < 4; i++)
-		*(iv+i) = ctx->nonce[i];
-	for (i = 0; i < 8; i++)
-		*(iv+4+i) = req->iv[i];
-	*((__be32 *)(iv+12)) = counter;
-
-	if ((sg_is_last(req->src)) && (sg_is_last(req->assoc))) {
-		one_entry_in_sg = 1;
-		scatterwalk_start(&src_sg_walk, req->src);
-		scatterwalk_start(&assoc_sg_walk, req->assoc);
-		src = scatterwalk_map(&src_sg_walk);
-		assoc = scatterwalk_map(&assoc_sg_walk);
-		dst = src;
-		if (unlikely(req->src != req->dst)) {
-			scatterwalk_start(&dst_sg_walk, req->dst);
-			dst = scatterwalk_map(&dst_sg_walk);
-		}
-
-	} else {
-		/* Allocate memory for src, dst, assoc */
-		src = kmalloc(req->cryptlen + auth_tag_len + req->assoclen,
-			GFP_ATOMIC);
-		if (unlikely(!src))
-			return -ENOMEM;
-		assoc = (src + req->cryptlen + auth_tag_len);
-		scatterwalk_map_and_copy(src, req->src, 0, req->cryptlen, 0);
-		scatterwalk_map_and_copy(assoc, req->assoc, 0,
-					req->assoclen, 0);
-		dst = src;
-	}
-
-	aesni_gcm_enc(aes_ctx, dst, src, (unsigned long)req->cryptlen, iv,
-		ctx->hash_subkey, assoc, (unsigned long)req->assoclen, dst
-		+ ((unsigned long)req->cryptlen), auth_tag_len);
-
-	/* The authTag (aka the Integrity Check Value) needs to be written
-	 * back to the packet. */
-	if (one_entry_in_sg) {
-		if (unlikely(req->src != req->dst)) {
-			scatterwalk_unmap(dst);
-			scatterwalk_done(&dst_sg_walk, 0, 0);
-		}
-		scatterwalk_unmap(src);
-		scatterwalk_unmap(assoc);
-		scatterwalk_done(&src_sg_walk, 0, 0);
-		scatterwalk_done(&assoc_sg_walk, 0, 0);
-	} else {
-		scatterwalk_map_and_copy(dst, req->dst, 0,
-			req->cryptlen + auth_tag_len, 1);
-		kfree(src);
-	}
-	return 0;
-}
-
-static int __driver_rfc4106_decrypt(struct aead_request *req)
-{
-	u8 one_entry_in_sg = 0;
-	u8 *src, *dst, *assoc;
-	unsigned long tempCipherLen = 0;
-	__be32 counter = cpu_to_be32(1);
-	int retval = 0;
-	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
-	struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm);
-	void *aes_ctx = &(ctx->aes_key_expanded);
-	unsigned long auth_tag_len = crypto_aead_authsize(tfm);
-	u8 iv_and_authTag[32+AESNI_ALIGN];
-	u8 *iv = (u8 *) PTR_ALIGN((u8 *)iv_and_authTag, AESNI_ALIGN);
-	u8 *authTag = iv + 16;
-	struct scatter_walk src_sg_walk;
-	struct scatter_walk assoc_sg_walk;
-	struct scatter_walk dst_sg_walk;
-	unsigned int i;
-
-	if (unlikely((req->cryptlen < auth_tag_len) ||
-		(req->assoclen != 8 && req->assoclen != 12)))
-		return -EINVAL;
-	/* Assuming we are supporting rfc4106 64-bit extended */
-	/* sequence numbers We need to have the AAD length */
-	/* equal to 8 or 12 bytes */
-
-	tempCipherLen = (unsigned long)(req->cryptlen - auth_tag_len);
-	/* IV below built */
-	for (i = 0; i < 4; i++)
-		*(iv+i) = ctx->nonce[i];
-	for (i = 0; i < 8; i++)
-		*(iv+4+i) = req->iv[i];
-	*((__be32 *)(iv+12)) = counter;
-
-	if ((sg_is_last(req->src)) && (sg_is_last(req->assoc))) {
-		one_entry_in_sg = 1;
-		scatterwalk_start(&src_sg_walk, req->src);
-		scatterwalk_start(&assoc_sg_walk, req->assoc);
-		src = scatterwalk_map(&src_sg_walk);
-		assoc = scatterwalk_map(&assoc_sg_walk);
-		dst = src;
-		if (unlikely(req->src != req->dst)) {
-			scatterwalk_start(&dst_sg_walk, req->dst);
-			dst = scatterwalk_map(&dst_sg_walk);
-		}
-
-	} else {
-		/* Allocate memory for src, dst, assoc */
-		src = kmalloc(req->cryptlen + req->assoclen, GFP_ATOMIC);
-		if (!src)
-			return -ENOMEM;
-		assoc = (src + req->cryptlen + auth_tag_len);
-		scatterwalk_map_and_copy(src, req->src, 0, req->cryptlen, 0);
-		scatterwalk_map_and_copy(assoc, req->assoc, 0,
-			req->assoclen, 0);
-		dst = src;
-	}
-
-	aesni_gcm_dec(aes_ctx, dst, src, tempCipherLen, iv,
-		ctx->hash_subkey, assoc, (unsigned long)req->assoclen,
-		authTag, auth_tag_len);
-
-	/* Compare generated tag with passed in tag. */
-	retval = memcmp(src + tempCipherLen, authTag, auth_tag_len) ?
-		-EBADMSG : 0;
-
-	if (one_entry_in_sg) {
-		if (unlikely(req->src != req->dst)) {
-			scatterwalk_unmap(dst);
-			scatterwalk_done(&dst_sg_walk, 0, 0);
-		}
-		scatterwalk_unmap(src);
-		scatterwalk_unmap(assoc);
-		scatterwalk_done(&src_sg_walk, 0, 0);
-		scatterwalk_done(&assoc_sg_walk, 0, 0);
-	} else {
-		scatterwalk_map_and_copy(dst, req->dst, 0, req->cryptlen, 1);
-		kfree(src);
-	}
-	return retval;
-}
-
-static struct crypto_alg __rfc4106_alg = {
-	.cra_name		= "__gcm-aes-aesni",
-	.cra_driver_name	= "__driver-gcm-aes-aesni",
-	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_AEAD,
-	.cra_blocksize		= 1,
-	.cra_ctxsize	= sizeof(struct aesni_rfc4106_gcm_ctx) + AESNI_ALIGN,
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_aead_type,
-	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(__rfc4106_alg.cra_list),
-	.cra_u = {
-		.aead = {
-			.encrypt	= __driver_rfc4106_encrypt,
-			.decrypt	= __driver_rfc4106_decrypt,
-		},
-	},
-};
-#endif
-
-
-static const struct x86_cpu_id aesni_cpu_id[] = {
-	X86_FEATURE_MATCH(X86_FEATURE_AES),
-	{}
-};
-MODULE_DEVICE_TABLE(x86cpu, aesni_cpu_id);
-
-static int __init aesni_init(void)
-{
-	int err;
-
-	if (!x86_match_cpu(aesni_cpu_id))
-		return -ENODEV;
-
-	if ((err = crypto_fpu_init()))
-		goto fpu_err;
-	if ((err = crypto_register_alg(&aesni_alg)))
-		goto aes_err;
-	if ((err = crypto_register_alg(&__aesni_alg)))
-		goto __aes_err;
-	if ((err = crypto_register_alg(&blk_ecb_alg)))
-		goto blk_ecb_err;
-	if ((err = crypto_register_alg(&blk_cbc_alg)))
-		goto blk_cbc_err;
-	if ((err = crypto_register_alg(&ablk_ecb_alg)))
-		goto ablk_ecb_err;
-	if ((err = crypto_register_alg(&ablk_cbc_alg)))
-		goto ablk_cbc_err;
-#ifdef CONFIG_X86_64
-	if ((err = crypto_register_alg(&blk_ctr_alg)))
-		goto blk_ctr_err;
-	if ((err = crypto_register_alg(&ablk_ctr_alg)))
-		goto ablk_ctr_err;
-	if ((err = crypto_register_alg(&__rfc4106_alg)))
-		goto __aead_gcm_err;
-	if ((err = crypto_register_alg(&rfc4106_alg)))
-		goto aead_gcm_err;
-#ifdef HAS_CTR
-	if ((err = crypto_register_alg(&ablk_rfc3686_ctr_alg)))
-		goto ablk_rfc3686_ctr_err;
-#endif
-#endif
-#ifdef HAS_LRW
-	if ((err = crypto_register_alg(&ablk_lrw_alg)))
-		goto ablk_lrw_err;
-#endif
-#ifdef HAS_PCBC
-	if ((err = crypto_register_alg(&ablk_pcbc_alg)))
-		goto ablk_pcbc_err;
-#endif
-#ifdef HAS_XTS
-	if ((err = crypto_register_alg(&ablk_xts_alg)))
-		goto ablk_xts_err;
-#endif
-	return err;
-
-#ifdef HAS_XTS
-ablk_xts_err:
-#endif
-#ifdef HAS_PCBC
-	crypto_unregister_alg(&ablk_pcbc_alg);
-ablk_pcbc_err:
-#endif
-#ifdef HAS_LRW
-	crypto_unregister_alg(&ablk_lrw_alg);
-ablk_lrw_err:
-#endif
-#ifdef CONFIG_X86_64
-#ifdef HAS_CTR
-	crypto_unregister_alg(&ablk_rfc3686_ctr_alg);
-ablk_rfc3686_ctr_err:
-#endif
-	crypto_unregister_alg(&rfc4106_alg);
-aead_gcm_err:
-	crypto_unregister_alg(&__rfc4106_alg);
-__aead_gcm_err:
-	crypto_unregister_alg(&ablk_ctr_alg);
-ablk_ctr_err:
-	crypto_unregister_alg(&blk_ctr_alg);
-blk_ctr_err:
-#endif
-	crypto_unregister_alg(&ablk_cbc_alg);
-ablk_cbc_err:
-	crypto_unregister_alg(&ablk_ecb_alg);
-ablk_ecb_err:
-	crypto_unregister_alg(&blk_cbc_alg);
-blk_cbc_err:
-	crypto_unregister_alg(&blk_ecb_alg);
-blk_ecb_err:
-	crypto_unregister_alg(&__aesni_alg);
-__aes_err:
-	crypto_unregister_alg(&aesni_alg);
-aes_err:
-fpu_err:
-	return err;
-}
-
-static void __exit aesni_exit(void)
-{
-#ifdef HAS_XTS
-	crypto_unregister_alg(&ablk_xts_alg);
-#endif
-#ifdef HAS_PCBC
-	crypto_unregister_alg(&ablk_pcbc_alg);
-#endif
-#ifdef HAS_LRW
-	crypto_unregister_alg(&ablk_lrw_alg);
-#endif
-#ifdef CONFIG_X86_64
-#ifdef HAS_CTR
-	crypto_unregister_alg(&ablk_rfc3686_ctr_alg);
-#endif
-	crypto_unregister_alg(&rfc4106_alg);
-	crypto_unregister_alg(&__rfc4106_alg);
-	crypto_unregister_alg(&ablk_ctr_alg);
-	crypto_unregister_alg(&blk_ctr_alg);
-#endif
-	crypto_unregister_alg(&ablk_cbc_alg);
-	crypto_unregister_alg(&ablk_ecb_alg);
-	crypto_unregister_alg(&blk_cbc_alg);
-	crypto_unregister_alg(&blk_ecb_alg);
-	crypto_unregister_alg(&__aesni_alg);
-	crypto_unregister_alg(&aesni_alg);
-
-	crypto_fpu_exit();
-}
-
-module_init(aesni_init);
-module_exit(aesni_exit);
-
-MODULE_DESCRIPTION("Rijndael (AES) Cipher Algorithm, Intel AES-NI instructions optimized");
-MODULE_LICENSE("GPL");
-MODULE_ALIAS("aes");
diff --git a/ANDROID_3.4.5/arch/x86/crypto/blowfish-x86_64-asm_64.S b/ANDROID_3.4.5/arch/x86/crypto/blowfish-x86_64-asm_64.S
deleted file mode 100644
index 391d245d..00000000
--- a/ANDROID_3.4.5/arch/x86/crypto/blowfish-x86_64-asm_64.S
+++ /dev/null
@@ -1,390 +0,0 @@
-/*
- * Blowfish Cipher Algorithm (x86_64)
- *
- * Copyright (C) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307
- * USA
- *
- */
-
-.file "blowfish-x86_64-asm.S"
-.text
-
-/* structure of crypto context */
-#define p	0
-#define s0	((16 + 2) * 4)
-#define s1	((16 + 2 + (1 * 256)) * 4)
-#define s2	((16 + 2 + (2 * 256)) * 4)
-#define s3	((16 + 2 + (3 * 256)) * 4)
-
-/* register macros */
-#define CTX %rdi
-#define RIO %rsi
-
-#define RX0 %rax
-#define RX1 %rbx
-#define RX2 %rcx
-#define RX3 %rdx
-
-#define RX0d %eax
-#define RX1d %ebx
-#define RX2d %ecx
-#define RX3d %edx
-
-#define RX0bl %al
-#define RX1bl %bl
-#define RX2bl %cl
-#define RX3bl %dl
-
-#define RX0bh %ah
-#define RX1bh %bh
-#define RX2bh %ch
-#define RX3bh %dh
-
-#define RT0 %rbp
-#define RT1 %rsi
-#define RT2 %r8
-#define RT3 %r9
-
-#define RT0d %ebp
-#define RT1d %esi
-#define RT2d %r8d
-#define RT3d %r9d
-
-#define RKEY %r10
-
-/***********************************************************************
- * 1-way blowfish
- ***********************************************************************/
-#define F() \
-	rorq $16,		RX0; \
-	movzbl RX0bh,		RT0d; \
-	movzbl RX0bl,		RT1d; \
-	rolq $16,		RX0; \
-	movl s0(CTX,RT0,4),	RT0d; \
-	addl s1(CTX,RT1,4),	RT0d; \
-	movzbl RX0bh,		RT1d; \
-	movzbl RX0bl,		RT2d; \
-	rolq $32,		RX0; \
-	xorl s2(CTX,RT1,4),	RT0d; \
-	addl s3(CTX,RT2,4),	RT0d; \
-	xorq RT0,		RX0;
-
-#define add_roundkey_enc(n) \
-	xorq p+4*(n)(CTX), 	RX0;
-
-#define round_enc(n) \
-	add_roundkey_enc(n); \
-	\
-	F(); \
-	F();
-
-#define add_roundkey_dec(n) \
-	movq p+4*(n-1)(CTX),	RT0; \
-	rorq $32,		RT0; \
-	xorq RT0,		RX0;
-
-#define round_dec(n) \
-	add_roundkey_dec(n); \
-	\
-	F(); \
-	F(); \
-
-#define read_block() \
-	movq (RIO), 		RX0; \
-	rorq $32, 		RX0; \
-	bswapq 			RX0;
-
-#define write_block() \
-	bswapq 			RX0; \
-	movq RX0, 		(RIO);
-
-#define xor_block() \
-	bswapq 			RX0; \
-	xorq RX0, 		(RIO);
-
-.align 8
-.global __blowfish_enc_blk
-.type   __blowfish_enc_blk,@function;
-
-__blowfish_enc_blk:
-	/* input:
-	 *	%rdi: ctx, CTX
-	 *	%rsi: dst
-	 *	%rdx: src
-	 *	%rcx: bool, if true: xor output
-	 */
-	movq %rbp, %r11;
-
-	movq %rsi, %r10;
-	movq %rdx, RIO;
-
-	read_block();
-
-	round_enc(0);
-	round_enc(2);
-	round_enc(4);
-	round_enc(6);
-	round_enc(8);
-	round_enc(10);
-	round_enc(12);
-	round_enc(14);
-	add_roundkey_enc(16);
-
-	movq %r11, %rbp;
-
-	movq %r10, RIO;
-	test %cl, %cl;
-	jnz __enc_xor;
-
-	write_block();
-	ret;
-__enc_xor:
-	xor_block();
-	ret;
-
-.align 8
-.global blowfish_dec_blk
-.type   blowfish_dec_blk,@function;
-
-blowfish_dec_blk:
-	/* input:
-	 *	%rdi: ctx, CTX
-	 *	%rsi: dst
-	 *	%rdx: src
-	 */
-	movq %rbp, %r11;
-
-	movq %rsi, %r10;
-	movq %rdx, RIO;
-
-	read_block();
-
-	round_dec(17);
-	round_dec(15);
-	round_dec(13);
-	round_dec(11);
-	round_dec(9);
-	round_dec(7);
-	round_dec(5);
-	round_dec(3);
-	add_roundkey_dec(1);
-
-	movq %r10, RIO;
-	write_block();
-
-	movq %r11, %rbp;
-
-	ret;
-
-/**********************************************************************
-  4-way blowfish, four blocks parallel
- **********************************************************************/
-
-/* F() for 4-way. Slower when used alone/1-way, but faster when used
- * parallel/4-way (tested on AMD Phenom II & Intel Xeon E7330).
- */
-#define F4(x) \
-	movzbl x ## bh,		RT1d; \
-	movzbl x ## bl,		RT3d; \
-	rorq $16,		x; \
-	movzbl x ## bh,		RT0d; \
-	movzbl x ## bl,		RT2d; \
-	rorq $16,		x; \
-	movl s0(CTX,RT0,4),	RT0d; \
-	addl s1(CTX,RT2,4),	RT0d; \
-	xorl s2(CTX,RT1,4),	RT0d; \
-	addl s3(CTX,RT3,4),	RT0d; \
-	xorq RT0,		x;
-
-#define add_preloaded_roundkey4() \
-	xorq RKEY,		RX0; \
-	xorq RKEY,		RX1; \
-	xorq RKEY,		RX2; \
-	xorq RKEY,		RX3;
-
-#define preload_roundkey_enc(n) \
-	movq p+4*(n)(CTX),	RKEY;
-
-#define add_roundkey_enc4(n) \
-	add_preloaded_roundkey4(); \
-	preload_roundkey_enc(n + 2);
-
-#define round_enc4(n) \
-	add_roundkey_enc4(n); \
-	\
-	F4(RX0); \
-	F4(RX1); \
-	F4(RX2); \
-	F4(RX3); \
-	\
-	F4(RX0); \
-	F4(RX1); \
-	F4(RX2); \
-	F4(RX3);
-
-#define preload_roundkey_dec(n) \
-	movq p+4*((n)-1)(CTX),	RKEY; \
-	rorq $32,		RKEY;
-
-#define add_roundkey_dec4(n) \
-	add_preloaded_roundkey4(); \
-	preload_roundkey_dec(n - 2);
-
-#define round_dec4(n) \
-	add_roundkey_dec4(n); \
-	\
-	F4(RX0); \
-	F4(RX1); \
-	F4(RX2); \
-	F4(RX3); \
-	\
-	F4(RX0); \
-	F4(RX1); \
-	F4(RX2); \
-	F4(RX3);
-
-#define read_block4() \
-	movq (RIO),		RX0; \
-	rorq $32,		RX0; \
-	bswapq 			RX0; \
-	\
-	movq 8(RIO),		RX1; \
-	rorq $32,		RX1; \
-	bswapq 			RX1; \
-	\
-	movq 16(RIO),		RX2; \
-	rorq $32,		RX2; \
-	bswapq 			RX2; \
-	\
-	movq 24(RIO),		RX3; \
-	rorq $32,		RX3; \
-	bswapq 			RX3;
-
-#define write_block4() \
-	bswapq 			RX0; \
-	movq RX0,		(RIO); \
-	\
-	bswapq 			RX1; \
-	movq RX1,		8(RIO); \
-	\
-	bswapq 			RX2; \
-	movq RX2,		16(RIO); \
-	\
-	bswapq 			RX3; \
-	movq RX3,		24(RIO);
-
-#define xor_block4() \
-	bswapq 			RX0; \
-	xorq RX0,		(RIO); \
-	\
-	bswapq 			RX1; \
-	xorq RX1,		8(RIO); \
-	\
-	bswapq 			RX2; \
-	xorq RX2,		16(RIO); \
-	\
-	bswapq 			RX3; \
-	xorq RX3,		24(RIO);
-
-.align 8
-.global __blowfish_enc_blk_4way
-.type   __blowfish_enc_blk_4way,@function;
-
-__blowfish_enc_blk_4way:
-	/* input:
-	 *	%rdi: ctx, CTX
-	 *	%rsi: dst
-	 *	%rdx: src
-	 *	%rcx: bool, if true: xor output
-	 */
-	pushq %rbp;
-	pushq %rbx;
-	pushq %rcx;
-
-	preload_roundkey_enc(0);
-
-	movq %rsi, %r11;
-	movq %rdx, RIO;
-
-	read_block4();
-
-	round_enc4(0);
-	round_enc4(2);
-	round_enc4(4);
-	round_enc4(6);
-	round_enc4(8);
-	round_enc4(10);
-	round_enc4(12);
-	round_enc4(14);
-	add_preloaded_roundkey4();
-
-	popq %rbp;
-	movq %r11, RIO;
-
-	test %bpl, %bpl;
-	jnz __enc_xor4;
-
-	write_block4();
-
-	popq %rbx;
-	popq %rbp;
-	ret;
-
-__enc_xor4:
-	xor_block4();
-
-	popq %rbx;
-	popq %rbp;
-	ret;
-
-.align 8
-.global blowfish_dec_blk_4way
-.type   blowfish_dec_blk_4way,@function;
-
-blowfish_dec_blk_4way:
-	/* input:
-	 *	%rdi: ctx, CTX
-	 *	%rsi: dst
-	 *	%rdx: src
-	 */
-	pushq %rbp;
-	pushq %rbx;
-	preload_roundkey_dec(17);
-
-	movq %rsi, %r11;
-	movq %rdx, RIO;
-
-	read_block4();
-
-	round_dec4(17);
-	round_dec4(15);
-	round_dec4(13);
-	round_dec4(11);
-	round_dec4(9);
-	round_dec4(7);
-	round_dec4(5);
-	round_dec4(3);
-	add_preloaded_roundkey4();
-
-	movq %r11, RIO;
-	write_block4();
-
-	popq %rbx;
-	popq %rbp;
-
-	ret;
-
diff --git a/ANDROID_3.4.5/arch/x86/crypto/blowfish_glue.c b/ANDROID_3.4.5/arch/x86/crypto/blowfish_glue.c
deleted file mode 100644
index 7967474d..00000000
--- a/ANDROID_3.4.5/arch/x86/crypto/blowfish_glue.c
+++ /dev/null
@@ -1,489 +0,0 @@
-/*
- * Glue Code for assembler optimized version of Blowfish
- *
- * Copyright (c) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
- *
- * CBC & ECB parts based on code (crypto/cbc.c,ecb.c) by:
- *   Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au>
- * CTR part based on code (crypto/ctr.c) by:
- *   (C) Copyright IBM Corp. 2007 - Joy Latten <latten@us.ibm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307
- * USA
- *
- */
-
-#include <asm/processor.h>
-#include <crypto/blowfish.h>
-#include <linux/crypto.h>
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/types.h>
-#include <crypto/algapi.h>
-
-/* regular block cipher functions */
-asmlinkage void __blowfish_enc_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src,
-				   bool xor);
-asmlinkage void blowfish_dec_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src);
-
-/* 4-way parallel cipher functions */
-asmlinkage void __blowfish_enc_blk_4way(struct bf_ctx *ctx, u8 *dst,
-					const u8 *src, bool xor);
-asmlinkage void blowfish_dec_blk_4way(struct bf_ctx *ctx, u8 *dst,
-				      const u8 *src);
-
-static inline void blowfish_enc_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src)
-{
-	__blowfish_enc_blk(ctx, dst, src, false);
-}
-
-static inline void blowfish_enc_blk_xor(struct bf_ctx *ctx, u8 *dst,
-					const u8 *src)
-{
-	__blowfish_enc_blk(ctx, dst, src, true);
-}
-
-static inline void blowfish_enc_blk_4way(struct bf_ctx *ctx, u8 *dst,
-					 const u8 *src)
-{
-	__blowfish_enc_blk_4way(ctx, dst, src, false);
-}
-
-static inline void blowfish_enc_blk_xor_4way(struct bf_ctx *ctx, u8 *dst,
-				      const u8 *src)
-{
-	__blowfish_enc_blk_4way(ctx, dst, src, true);
-}
-
-static void blowfish_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
-{
-	blowfish_enc_blk(crypto_tfm_ctx(tfm), dst, src);
-}
-
-static void blowfish_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
-{
-	blowfish_dec_blk(crypto_tfm_ctx(tfm), dst, src);
-}
-
-static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk,
-		     void (*fn)(struct bf_ctx *, u8 *, const u8 *),
-		     void (*fn_4way)(struct bf_ctx *, u8 *, const u8 *))
-{
-	struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	unsigned int bsize = BF_BLOCK_SIZE;
-	unsigned int nbytes;
-	int err;
-
-	err = blkcipher_walk_virt(desc, walk);
-
-	while ((nbytes = walk->nbytes)) {
-		u8 *wsrc = walk->src.virt.addr;
-		u8 *wdst = walk->dst.virt.addr;
-
-		/* Process four block batch */
-		if (nbytes >= bsize * 4) {
-			do {
-				fn_4way(ctx, wdst, wsrc);
-
-				wsrc += bsize * 4;
-				wdst += bsize * 4;
-				nbytes -= bsize * 4;
-			} while (nbytes >= bsize * 4);
-
-			if (nbytes < bsize)
-				goto done;
-		}
-
-		/* Handle leftovers */
-		do {
-			fn(ctx, wdst, wsrc);
-
-			wsrc += bsize;
-			wdst += bsize;
-			nbytes -= bsize;
-		} while (nbytes >= bsize);
-
-done:
-		err = blkcipher_walk_done(desc, walk, nbytes);
-	}
-
-	return err;
-}
-
-static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	struct blkcipher_walk walk;
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return ecb_crypt(desc, &walk, blowfish_enc_blk, blowfish_enc_blk_4way);
-}
-
-static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	struct blkcipher_walk walk;
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return ecb_crypt(desc, &walk, blowfish_dec_blk, blowfish_dec_blk_4way);
-}
-
-static unsigned int __cbc_encrypt(struct blkcipher_desc *desc,
-				  struct blkcipher_walk *walk)
-{
-	struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	unsigned int bsize = BF_BLOCK_SIZE;
-	unsigned int nbytes = walk->nbytes;
-	u64 *src = (u64 *)walk->src.virt.addr;
-	u64 *dst = (u64 *)walk->dst.virt.addr;
-	u64 *iv = (u64 *)walk->iv;
-
-	do {
-		*dst = *src ^ *iv;
-		blowfish_enc_blk(ctx, (u8 *)dst, (u8 *)dst);
-		iv = dst;
-
-		src += 1;
-		dst += 1;
-		nbytes -= bsize;
-	} while (nbytes >= bsize);
-
-	*(u64 *)walk->iv = *iv;
-	return nbytes;
-}
-
-static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	struct blkcipher_walk walk;
-	int err;
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt(desc, &walk);
-
-	while ((nbytes = walk.nbytes)) {
-		nbytes = __cbc_encrypt(desc, &walk);
-		err = blkcipher_walk_done(desc, &walk, nbytes);
-	}
-
-	return err;
-}
-
-static unsigned int __cbc_decrypt(struct blkcipher_desc *desc,
-				  struct blkcipher_walk *walk)
-{
-	struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	unsigned int bsize = BF_BLOCK_SIZE;
-	unsigned int nbytes = walk->nbytes;
-	u64 *src = (u64 *)walk->src.virt.addr;
-	u64 *dst = (u64 *)walk->dst.virt.addr;
-	u64 ivs[4 - 1];
-	u64 last_iv;
-
-	/* Start of the last block. */
-	src += nbytes / bsize - 1;
-	dst += nbytes / bsize - 1;
-
-	last_iv = *src;
-
-	/* Process four block batch */
-	if (nbytes >= bsize * 4) {
-		do {
-			nbytes -= bsize * 4 - bsize;
-			src -= 4 - 1;
-			dst -= 4 - 1;
-
-			ivs[0] = src[0];
-			ivs[1] = src[1];
-			ivs[2] = src[2];
-
-			blowfish_dec_blk_4way(ctx, (u8 *)dst, (u8 *)src);
-
-			dst[1] ^= ivs[0];
-			dst[2] ^= ivs[1];
-			dst[3] ^= ivs[2];
-
-			nbytes -= bsize;
-			if (nbytes < bsize)
-				goto done;
-
-			*dst ^= *(src - 1);
-			src -= 1;
-			dst -= 1;
-		} while (nbytes >= bsize * 4);
-
-		if (nbytes < bsize)
-			goto done;
-	}
-
-	/* Handle leftovers */
-	for (;;) {
-		blowfish_dec_blk(ctx, (u8 *)dst, (u8 *)src);
-
-		nbytes -= bsize;
-		if (nbytes < bsize)
-			break;
-
-		*dst ^= *(src - 1);
-		src -= 1;
-		dst -= 1;
-	}
-
-done:
-	*dst ^= *(u64 *)walk->iv;
-	*(u64 *)walk->iv = last_iv;
-
-	return nbytes;
-}
-
-static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	struct blkcipher_walk walk;
-	int err;
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt(desc, &walk);
-
-	while ((nbytes = walk.nbytes)) {
-		nbytes = __cbc_decrypt(desc, &walk);
-		err = blkcipher_walk_done(desc, &walk, nbytes);
-	}
-
-	return err;
-}
-
-static void ctr_crypt_final(struct bf_ctx *ctx, struct blkcipher_walk *walk)
-{
-	u8 *ctrblk = walk->iv;
-	u8 keystream[BF_BLOCK_SIZE];
-	u8 *src = walk->src.virt.addr;
-	u8 *dst = walk->dst.virt.addr;
-	unsigned int nbytes = walk->nbytes;
-
-	blowfish_enc_blk(ctx, keystream, ctrblk);
-	crypto_xor(keystream, src, nbytes);
-	memcpy(dst, keystream, nbytes);
-
-	crypto_inc(ctrblk, BF_BLOCK_SIZE);
-}
-
-static unsigned int __ctr_crypt(struct blkcipher_desc *desc,
-				struct blkcipher_walk *walk)
-{
-	struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	unsigned int bsize = BF_BLOCK_SIZE;
-	unsigned int nbytes = walk->nbytes;
-	u64 *src = (u64 *)walk->src.virt.addr;
-	u64 *dst = (u64 *)walk->dst.virt.addr;
-	u64 ctrblk = be64_to_cpu(*(__be64 *)walk->iv);
-	__be64 ctrblocks[4];
-
-	/* Process four block batch */
-	if (nbytes >= bsize * 4) {
-		do {
-			if (dst != src) {
-				dst[0] = src[0];
-				dst[1] = src[1];
-				dst[2] = src[2];
-				dst[3] = src[3];
-			}
-
-			/* create ctrblks for parallel encrypt */
-			ctrblocks[0] = cpu_to_be64(ctrblk++);
-			ctrblocks[1] = cpu_to_be64(ctrblk++);
-			ctrblocks[2] = cpu_to_be64(ctrblk++);
-			ctrblocks[3] = cpu_to_be64(ctrblk++);
-
-			blowfish_enc_blk_xor_4way(ctx, (u8 *)dst,
-						  (u8 *)ctrblocks);
-
-			src += 4;
-			dst += 4;
-		} while ((nbytes -= bsize * 4) >= bsize * 4);
-
-		if (nbytes < bsize)
-			goto done;
-	}
-
-	/* Handle leftovers */
-	do {
-		if (dst != src)
-			*dst = *src;
-
-		ctrblocks[0] = cpu_to_be64(ctrblk++);
-
-		blowfish_enc_blk_xor(ctx, (u8 *)dst, (u8 *)ctrblocks);
-
-		src += 1;
-		dst += 1;
-	} while ((nbytes -= bsize) >= bsize);
-
-done:
-	*(__be64 *)walk->iv = cpu_to_be64(ctrblk);
-	return nbytes;
-}
-
-static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		     struct scatterlist *src, unsigned int nbytes)
-{
-	struct blkcipher_walk walk;
-	int err;
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt_block(desc, &walk, BF_BLOCK_SIZE);
-
-	while ((nbytes = walk.nbytes) >= BF_BLOCK_SIZE) {
-		nbytes = __ctr_crypt(desc, &walk);
-		err = blkcipher_walk_done(desc, &walk, nbytes);
-	}
-
-	if (walk.nbytes) {
-		ctr_crypt_final(crypto_blkcipher_ctx(desc->tfm), &walk);
-		err = blkcipher_walk_done(desc, &walk, 0);
-	}
-
-	return err;
-}
-
-static struct crypto_alg bf_algs[4] = { {
-	.cra_name		= "blowfish",
-	.cra_driver_name	= "blowfish-asm",
-	.cra_priority		= 200,
-	.cra_flags		= CRYPTO_ALG_TYPE_CIPHER,
-	.cra_blocksize		= BF_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct bf_ctx),
-	.cra_alignmask		= 0,
-	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(bf_algs[0].cra_list),
-	.cra_u = {
-		.cipher = {
-			.cia_min_keysize	= BF_MIN_KEY_SIZE,
-			.cia_max_keysize	= BF_MAX_KEY_SIZE,
-			.cia_setkey		= blowfish_setkey,
-			.cia_encrypt		= blowfish_encrypt,
-			.cia_decrypt		= blowfish_decrypt,
-		}
-	}
-}, {
-	.cra_name		= "ecb(blowfish)",
-	.cra_driver_name	= "ecb-blowfish-asm",
-	.cra_priority		= 300,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize		= BF_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct bf_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(bf_algs[1].cra_list),
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= BF_MIN_KEY_SIZE,
-			.max_keysize	= BF_MAX_KEY_SIZE,
-			.setkey		= blowfish_setkey,
-			.encrypt	= ecb_encrypt,
-			.decrypt	= ecb_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "cbc(blowfish)",
-	.cra_driver_name	= "cbc-blowfish-asm",
-	.cra_priority		= 300,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize		= BF_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct bf_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(bf_algs[2].cra_list),
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= BF_MIN_KEY_SIZE,
-			.max_keysize	= BF_MAX_KEY_SIZE,
-			.ivsize		= BF_BLOCK_SIZE,
-			.setkey		= blowfish_setkey,
-			.encrypt	= cbc_encrypt,
-			.decrypt	= cbc_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "ctr(blowfish)",
-	.cra_driver_name	= "ctr-blowfish-asm",
-	.cra_priority		= 300,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize		= 1,
-	.cra_ctxsize		= sizeof(struct bf_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(bf_algs[3].cra_list),
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= BF_MIN_KEY_SIZE,
-			.max_keysize	= BF_MAX_KEY_SIZE,
-			.ivsize		= BF_BLOCK_SIZE,
-			.setkey		= blowfish_setkey,
-			.encrypt	= ctr_crypt,
-			.decrypt	= ctr_crypt,
-		},
-	},
-} };
-
-static bool is_blacklisted_cpu(void)
-{
-	if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
-		return false;
-
-	if (boot_cpu_data.x86 == 0x0f) {
-		/*
-		 * On Pentium 4, blowfish-x86_64 is slower than generic C
-		 * implementation because use of 64bit rotates (which are really
-		 * slow on P4). Therefore blacklist P4s.
-		 */
-		return true;
-	}
-
-	return false;
-}
-
-static int force;
-module_param(force, int, 0);
-MODULE_PARM_DESC(force, "Force module load, ignore CPU blacklist");
-
-static int __init init(void)
-{
-	if (!force && is_blacklisted_cpu()) {
-		printk(KERN_INFO
-			"blowfish-x86_64: performance on this CPU "
-			"would be suboptimal: disabling "
-			"blowfish-x86_64.\n");
-		return -ENODEV;
-	}
-
-	return crypto_register_algs(bf_algs, ARRAY_SIZE(bf_algs));
-}
-
-static void __exit fini(void)
-{
-	crypto_unregister_algs(bf_algs, ARRAY_SIZE(bf_algs));
-}
-
-module_init(init);
-module_exit(fini);
-
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("Blowfish Cipher Algorithm, asm optimized");
-MODULE_ALIAS("blowfish");
-MODULE_ALIAS("blowfish-asm");
diff --git a/ANDROID_3.4.5/arch/x86/crypto/camellia-x86_64-asm_64.S b/ANDROID_3.4.5/arch/x86/crypto/camellia-x86_64-asm_64.S
deleted file mode 100644
index 0b337433..00000000
--- a/ANDROID_3.4.5/arch/x86/crypto/camellia-x86_64-asm_64.S
+++ /dev/null
@@ -1,520 +0,0 @@
-/*
- * Camellia Cipher Algorithm (x86_64)
- *
- * Copyright (C) 2012 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307
- * USA
- *
- */
-
-.file "camellia-x86_64-asm_64.S"
-.text
-
-.extern camellia_sp10011110;
-.extern camellia_sp22000222;
-.extern camellia_sp03303033;
-.extern camellia_sp00444404;
-.extern camellia_sp02220222;
-.extern camellia_sp30333033;
-.extern camellia_sp44044404;
-.extern camellia_sp11101110;
-
-#define sp10011110 camellia_sp10011110
-#define sp22000222 camellia_sp22000222
-#define sp03303033 camellia_sp03303033
-#define sp00444404 camellia_sp00444404
-#define sp02220222 camellia_sp02220222
-#define sp30333033 camellia_sp30333033
-#define sp44044404 camellia_sp44044404
-#define sp11101110 camellia_sp11101110
-
-#define CAMELLIA_TABLE_BYTE_LEN 272
-
-/* struct camellia_ctx: */
-#define key_table 0
-#define key_length CAMELLIA_TABLE_BYTE_LEN
-
-/* register macros */
-#define CTX %rdi
-#define RIO %rsi
-#define RIOd %esi
-
-#define RAB0 %rax
-#define RCD0 %rcx
-#define RAB1 %rbx
-#define RCD1 %rdx
-
-#define RAB0d %eax
-#define RCD0d %ecx
-#define RAB1d %ebx
-#define RCD1d %edx
-
-#define RAB0bl %al
-#define RCD0bl %cl
-#define RAB1bl %bl
-#define RCD1bl %dl
-
-#define RAB0bh %ah
-#define RCD0bh %ch
-#define RAB1bh %bh
-#define RCD1bh %dh
-
-#define RT0 %rsi
-#define RT1 %rbp
-#define RT2 %r8
-
-#define RT0d %esi
-#define RT1d %ebp
-#define RT2d %r8d
-
-#define RT2bl %r8b
-
-#define RXOR %r9
-#define RRBP %r10
-#define RDST %r11
-
-#define RXORd %r9d
-#define RXORbl %r9b
-
-#define xor2ror16(T0, T1, tmp1, tmp2, ab, dst) \
-	movzbl ab ## bl,		tmp2 ## d; \
-	movzbl ab ## bh,		tmp1 ## d; \
-	rorq $16,			ab; \
-	xorq T0(, tmp2, 8),		dst; \
-	xorq T1(, tmp1, 8),		dst;
-
-/**********************************************************************
-  1-way camellia
- **********************************************************************/
-#define roundsm(ab, subkey, cd) \
-	movq (key_table + ((subkey) * 2) * 4)(CTX),	RT2; \
-	\
-	xor2ror16(sp00444404, sp03303033, RT0, RT1, ab ## 0, cd ## 0); \
-	xor2ror16(sp22000222, sp10011110, RT0, RT1, ab ## 0, RT2); \
-	xor2ror16(sp11101110, sp44044404, RT0, RT1, ab ## 0, cd ## 0); \
-	xor2ror16(sp30333033, sp02220222, RT0, RT1, ab ## 0, RT2); \
-	\
-	xorq RT2,					cd ## 0;
-
-#define fls(l, r, kl, kr) \
-	movl (key_table + ((kl) * 2) * 4)(CTX),		RT0d; \
-	andl l ## 0d,					RT0d; \
-	roll $1,					RT0d; \
-	shlq $32,					RT0; \
-	xorq RT0,					l ## 0; \
-	movq (key_table + ((kr) * 2) * 4)(CTX),		RT1; \
-	orq r ## 0,					RT1; \
-	shrq $32,					RT1; \
-	xorq RT1,					r ## 0; \
-	\
-	movq (key_table + ((kl) * 2) * 4)(CTX),		RT2; \
-	orq l ## 0,					RT2; \
-	shrq $32,					RT2; \
-	xorq RT2,					l ## 0; \
-	movl (key_table + ((kr) * 2) * 4)(CTX),		RT0d; \
-	andl r ## 0d,					RT0d; \
-	roll $1,					RT0d; \
-	shlq $32,					RT0; \
-	xorq RT0,					r ## 0;
-
-#define enc_rounds(i) \
-	roundsm(RAB, i + 2, RCD); \
-	roundsm(RCD, i + 3, RAB); \
-	roundsm(RAB, i + 4, RCD); \
-	roundsm(RCD, i + 5, RAB); \
-	roundsm(RAB, i + 6, RCD); \
-	roundsm(RCD, i + 7, RAB);
-
-#define enc_fls(i) \
-	fls(RAB, RCD, i + 0, i + 1);
-
-#define enc_inpack() \
-	movq (RIO),			RAB0; \
-	bswapq				RAB0; \
-	rolq $32,			RAB0; \
-	movq 4*2(RIO),			RCD0; \
-	bswapq				RCD0; \
-	rorq $32,			RCD0; \
-	xorq key_table(CTX),		RAB0;
-
-#define enc_outunpack(op, max) \
-	xorq key_table(CTX, max, 8),	RCD0; \
-	rorq $32,			RCD0; \
-	bswapq				RCD0; \
-	op ## q RCD0,			(RIO); \
-	rolq $32,			RAB0; \
-	bswapq				RAB0; \
-	op ## q RAB0,			4*2(RIO);
-
-#define dec_rounds(i) \
-	roundsm(RAB, i + 7, RCD); \
-	roundsm(RCD, i + 6, RAB); \
-	roundsm(RAB, i + 5, RCD); \
-	roundsm(RCD, i + 4, RAB); \
-	roundsm(RAB, i + 3, RCD); \
-	roundsm(RCD, i + 2, RAB);
-
-#define dec_fls(i) \
-	fls(RAB, RCD, i + 1, i + 0);
-
-#define dec_inpack(max) \
-	movq (RIO),			RAB0; \
-	bswapq				RAB0; \
-	rolq $32,			RAB0; \
-	movq 4*2(RIO),			RCD0; \
-	bswapq				RCD0; \
-	rorq $32,			RCD0; \
-	xorq key_table(CTX, max, 8),	RAB0;
-
-#define dec_outunpack() \
-	xorq key_table(CTX),		RCD0; \
-	rorq $32,			RCD0; \
-	bswapq				RCD0; \
-	movq RCD0,			(RIO); \
-	rolq $32,			RAB0; \
-	bswapq				RAB0; \
-	movq RAB0,			4*2(RIO);
-
-.global __camellia_enc_blk;
-.type   __camellia_enc_blk,@function;
-
-__camellia_enc_blk:
-	/* input:
-	 *	%rdi: ctx, CTX
-	 *	%rsi: dst
-	 *	%rdx: src
-	 *	%rcx: bool xor
-	 */
-	movq %rbp, RRBP;
-
-	movq %rcx, RXOR;
-	movq %rsi, RDST;
-	movq %rdx, RIO;
-
-	enc_inpack();
-
-	enc_rounds(0);
-	enc_fls(8);
-	enc_rounds(8);
-	enc_fls(16);
-	enc_rounds(16);
-	movl $24, RT1d; /* max */
-
-	cmpb $16, key_length(CTX);
-	je __enc_done;
-
-	enc_fls(24);
-	enc_rounds(24);
-	movl $32, RT1d; /* max */
-
-__enc_done:
-	testb RXORbl, RXORbl;
-	movq RDST, RIO;
-
-	jnz __enc_xor;
-
-	enc_outunpack(mov, RT1);
-
-	movq RRBP, %rbp;
-	ret;
-
-__enc_xor:
-	enc_outunpack(xor, RT1);
-
-	movq RRBP, %rbp;
-	ret;
-
-.global camellia_dec_blk;
-.type   camellia_dec_blk,@function;
-
-camellia_dec_blk:
-	/* input:
-	 *	%rdi: ctx, CTX
-	 *	%rsi: dst
-	 *	%rdx: src
-	 */
-	cmpl $16, key_length(CTX);
-	movl $32, RT2d;
-	movl $24, RXORd;
-	cmovel RXORd, RT2d; /* max */
-
-	movq %rbp, RRBP;
-	movq %rsi, RDST;
-	movq %rdx, RIO;
-
-	dec_inpack(RT2);
-
-	cmpb $24, RT2bl;
-	je __dec_rounds16;
-
-	dec_rounds(24);
-	dec_fls(24);
-
-__dec_rounds16:
-	dec_rounds(16);
-	dec_fls(16);
-	dec_rounds(8);
-	dec_fls(8);
-	dec_rounds(0);
-
-	movq RDST, RIO;
-
-	dec_outunpack();
-
-	movq RRBP, %rbp;
-	ret;
-
-/**********************************************************************
-  2-way camellia
- **********************************************************************/
-#define roundsm2(ab, subkey, cd) \
-	movq (key_table + ((subkey) * 2) * 4)(CTX),	RT2; \
-	xorq RT2,					cd ## 1; \
-	\
-	xor2ror16(sp00444404, sp03303033, RT0, RT1, ab ## 0, cd ## 0); \
-	xor2ror16(sp22000222, sp10011110, RT0, RT1, ab ## 0, RT2); \
-	xor2ror16(sp11101110, sp44044404, RT0, RT1, ab ## 0, cd ## 0); \
-	xor2ror16(sp30333033, sp02220222, RT0, RT1, ab ## 0, RT2); \
-	\
-		xor2ror16(sp00444404, sp03303033, RT0, RT1, ab ## 1, cd ## 1); \
-		xorq RT2,					cd ## 0; \
-		xor2ror16(sp22000222, sp10011110, RT0, RT1, ab ## 1, cd ## 1); \
-		xor2ror16(sp11101110, sp44044404, RT0, RT1, ab ## 1, cd ## 1); \
-		xor2ror16(sp30333033, sp02220222, RT0, RT1, ab ## 1, cd ## 1);
-
-#define fls2(l, r, kl, kr) \
-	movl (key_table + ((kl) * 2) * 4)(CTX),		RT0d; \
-	andl l ## 0d,					RT0d; \
-	roll $1,					RT0d; \
-	shlq $32,					RT0; \
-	xorq RT0,					l ## 0; \
-	movq (key_table + ((kr) * 2) * 4)(CTX),		RT1; \
-	orq r ## 0,					RT1; \
-	shrq $32,					RT1; \
-	xorq RT1,					r ## 0; \
-	\
-		movl (key_table + ((kl) * 2) * 4)(CTX),		RT2d; \
-		andl l ## 1d,					RT2d; \
-		roll $1,					RT2d; \
-		shlq $32,					RT2; \
-		xorq RT2,					l ## 1; \
-		movq (key_table + ((kr) * 2) * 4)(CTX),		RT0; \
-		orq r ## 1,					RT0; \
-		shrq $32,					RT0; \
-		xorq RT0,					r ## 1; \
-	\
-	movq (key_table + ((kl) * 2) * 4)(CTX),		RT1; \
-	orq l ## 0,					RT1; \
-	shrq $32,					RT1; \
-	xorq RT1,					l ## 0; \
-	movl (key_table + ((kr) * 2) * 4)(CTX),		RT2d; \
-	andl r ## 0d,					RT2d; \
-	roll $1,					RT2d; \
-	shlq $32,					RT2; \
-	xorq RT2,					r ## 0; \
-	\
-		movq (key_table + ((kl) * 2) * 4)(CTX),		RT0; \
-		orq l ## 1,					RT0; \
-		shrq $32,					RT0; \
-		xorq RT0,					l ## 1; \
-		movl (key_table + ((kr) * 2) * 4)(CTX),		RT1d; \
-		andl r ## 1d,					RT1d; \
-		roll $1,					RT1d; \
-		shlq $32,					RT1; \
-		xorq RT1,					r ## 1;
-
-#define enc_rounds2(i) \
-	roundsm2(RAB, i + 2, RCD); \
-	roundsm2(RCD, i + 3, RAB); \
-	roundsm2(RAB, i + 4, RCD); \
-	roundsm2(RCD, i + 5, RAB); \
-	roundsm2(RAB, i + 6, RCD); \
-	roundsm2(RCD, i + 7, RAB);
-
-#define enc_fls2(i) \
-	fls2(RAB, RCD, i + 0, i + 1);
-
-#define enc_inpack2() \
-	movq (RIO),			RAB0; \
-	bswapq				RAB0; \
-	rorq $32,			RAB0; \
-	movq 4*2(RIO),			RCD0; \
-	bswapq				RCD0; \
-	rolq $32,			RCD0; \
-	xorq key_table(CTX),		RAB0; \
-	\
-		movq 8*2(RIO),			RAB1; \
-		bswapq				RAB1; \
-		rorq $32,			RAB1; \
-		movq 12*2(RIO),			RCD1; \
-		bswapq				RCD1; \
-		rolq $32,			RCD1; \
-		xorq key_table(CTX),		RAB1;
-
-#define enc_outunpack2(op, max) \
-	xorq key_table(CTX, max, 8),	RCD0; \
-	rolq $32,			RCD0; \
-	bswapq				RCD0; \
-	op ## q RCD0,			(RIO); \
-	rorq $32,			RAB0; \
-	bswapq				RAB0; \
-	op ## q RAB0,			4*2(RIO); \
-	\
-		xorq key_table(CTX, max, 8),	RCD1; \
-		rolq $32,			RCD1; \
-		bswapq				RCD1; \
-		op ## q RCD1,			8*2(RIO); \
-		rorq $32,			RAB1; \
-		bswapq				RAB1; \
-		op ## q RAB1,			12*2(RIO);
-
-#define dec_rounds2(i) \
-	roundsm2(RAB, i + 7, RCD); \
-	roundsm2(RCD, i + 6, RAB); \
-	roundsm2(RAB, i + 5, RCD); \
-	roundsm2(RCD, i + 4, RAB); \
-	roundsm2(RAB, i + 3, RCD); \
-	roundsm2(RCD, i + 2, RAB);
-
-#define dec_fls2(i) \
-	fls2(RAB, RCD, i + 1, i + 0);
-
-#define dec_inpack2(max) \
-	movq (RIO),			RAB0; \
-	bswapq				RAB0; \
-	rorq $32,			RAB0; \
-	movq 4*2(RIO),			RCD0; \
-	bswapq				RCD0; \
-	rolq $32,			RCD0; \
-	xorq key_table(CTX, max, 8),	RAB0; \
-	\
-		movq 8*2(RIO),			RAB1; \
-		bswapq				RAB1; \
-		rorq $32,			RAB1; \
-		movq 12*2(RIO),			RCD1; \
-		bswapq				RCD1; \
-		rolq $32,			RCD1; \
-		xorq key_table(CTX, max, 8),	RAB1;
-
-#define dec_outunpack2() \
-	xorq key_table(CTX),		RCD0; \
-	rolq $32,			RCD0; \
-	bswapq				RCD0; \
-	movq RCD0,			(RIO); \
-	rorq $32,			RAB0; \
-	bswapq				RAB0; \
-	movq RAB0,			4*2(RIO); \
-	\
-		xorq key_table(CTX),		RCD1; \
-		rolq $32,			RCD1; \
-		bswapq				RCD1; \
-		movq RCD1,			8*2(RIO); \
-		rorq $32,			RAB1; \
-		bswapq				RAB1; \
-		movq RAB1,			12*2(RIO);
-
-.global __camellia_enc_blk_2way;
-.type   __camellia_enc_blk_2way,@function;
-
-__camellia_enc_blk_2way:
-	/* input:
-	 *	%rdi: ctx, CTX
-	 *	%rsi: dst
-	 *	%rdx: src
-	 *	%rcx: bool xor
-	 */
-	pushq %rbx;
-
-	movq %rbp, RRBP;
-	movq %rcx, RXOR;
-	movq %rsi, RDST;
-	movq %rdx, RIO;
-
-	enc_inpack2();
-
-	enc_rounds2(0);
-	enc_fls2(8);
-	enc_rounds2(8);
-	enc_fls2(16);
-	enc_rounds2(16);
-	movl $24, RT2d; /* max */
-
-	cmpb $16, key_length(CTX);
-	je __enc2_done;
-
-	enc_fls2(24);
-	enc_rounds2(24);
-	movl $32, RT2d; /* max */
-
-__enc2_done:
-	test RXORbl, RXORbl;
-	movq RDST, RIO;
-	jnz __enc2_xor;
-
-	enc_outunpack2(mov, RT2);
-
-	movq RRBP, %rbp;
-	popq %rbx;
-	ret;
-
-__enc2_xor:
-	enc_outunpack2(xor, RT2);
-
-	movq RRBP, %rbp;
-	popq %rbx;
-	ret;
-
-.global camellia_dec_blk_2way;
-.type   camellia_dec_blk_2way,@function;
-
-camellia_dec_blk_2way:
-	/* input:
-	 *	%rdi: ctx, CTX
-	 *	%rsi: dst
-	 *	%rdx: src
-	 */
-	cmpl $16, key_length(CTX);
-	movl $32, RT2d;
-	movl $24, RXORd;
-	cmovel RXORd, RT2d; /* max */
-
-	movq %rbx, RXOR;
-	movq %rbp, RRBP;
-	movq %rsi, RDST;
-	movq %rdx, RIO;
-
-	dec_inpack2(RT2);
-
-	cmpb $24, RT2bl;
-	je __dec2_rounds16;
-
-	dec_rounds2(24);
-	dec_fls2(24);
-
-__dec2_rounds16:
-	dec_rounds2(16);
-	dec_fls2(16);
-	dec_rounds2(8);
-	dec_fls2(8);
-	dec_rounds2(0);
-
-	movq RDST, RIO;
-
-	dec_outunpack2();
-
-	movq RRBP, %rbp;
-	movq RXOR, %rbx;
-	ret;
diff --git a/ANDROID_3.4.5/arch/x86/crypto/camellia_glue.c b/ANDROID_3.4.5/arch/x86/crypto/camellia_glue.c
deleted file mode 100644
index 3306dc0b..00000000
--- a/ANDROID_3.4.5/arch/x86/crypto/camellia_glue.c
+++ /dev/null
@@ -1,1952 +0,0 @@
-/*
- * Glue Code for assembler optimized version of Camellia
- *
- * Copyright (c) 2012 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
- *
- * Camellia parts based on code by:
- *  Copyright (C) 2006 NTT (Nippon Telegraph and Telephone Corporation)
- * CBC & ECB parts based on code (crypto/cbc.c,ecb.c) by:
- *   Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au>
- * CTR part based on code (crypto/ctr.c) by:
- *   (C) Copyright IBM Corp. 2007 - Joy Latten <latten@us.ibm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307
- * USA
- *
- */
-
-#include <asm/processor.h>
-#include <asm/unaligned.h>
-#include <linux/crypto.h>
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/types.h>
-#include <crypto/algapi.h>
-#include <crypto/b128ops.h>
-#include <crypto/lrw.h>
-#include <crypto/xts.h>
-
-#define CAMELLIA_MIN_KEY_SIZE	16
-#define CAMELLIA_MAX_KEY_SIZE	32
-#define CAMELLIA_BLOCK_SIZE	16
-#define CAMELLIA_TABLE_BYTE_LEN	272
-
-struct camellia_ctx {
-	u64 key_table[CAMELLIA_TABLE_BYTE_LEN / sizeof(u64)];
-	u32 key_length;
-};
-
-/* regular block cipher functions */
-asmlinkage void __camellia_enc_blk(struct camellia_ctx *ctx, u8 *dst,
-				   const u8 *src, bool xor);
-asmlinkage void camellia_dec_blk(struct camellia_ctx *ctx, u8 *dst,
-				 const u8 *src);
-
-/* 2-way parallel cipher functions */
-asmlinkage void __camellia_enc_blk_2way(struct camellia_ctx *ctx, u8 *dst,
-					const u8 *src, bool xor);
-asmlinkage void camellia_dec_blk_2way(struct camellia_ctx *ctx, u8 *dst,
-				      const u8 *src);
-
-static inline void camellia_enc_blk(struct camellia_ctx *ctx, u8 *dst,
-				    const u8 *src)
-{
-	__camellia_enc_blk(ctx, dst, src, false);
-}
-
-static inline void camellia_enc_blk_xor(struct camellia_ctx *ctx, u8 *dst,
-					const u8 *src)
-{
-	__camellia_enc_blk(ctx, dst, src, true);
-}
-
-static inline void camellia_enc_blk_2way(struct camellia_ctx *ctx, u8 *dst,
-					 const u8 *src)
-{
-	__camellia_enc_blk_2way(ctx, dst, src, false);
-}
-
-static inline void camellia_enc_blk_xor_2way(struct camellia_ctx *ctx, u8 *dst,
-					     const u8 *src)
-{
-	__camellia_enc_blk_2way(ctx, dst, src, true);
-}
-
-static void camellia_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
-{
-	camellia_enc_blk(crypto_tfm_ctx(tfm), dst, src);
-}
-
-static void camellia_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
-{
-	camellia_dec_blk(crypto_tfm_ctx(tfm), dst, src);
-}
-
-/* camellia sboxes */
-const u64 camellia_sp10011110[256] = {
-	0x7000007070707000, 0x8200008282828200, 0x2c00002c2c2c2c00,
-	0xec0000ecececec00, 0xb30000b3b3b3b300, 0x2700002727272700,
-	0xc00000c0c0c0c000, 0xe50000e5e5e5e500, 0xe40000e4e4e4e400,
-	0x8500008585858500, 0x5700005757575700, 0x3500003535353500,
-	0xea0000eaeaeaea00, 0x0c00000c0c0c0c00, 0xae0000aeaeaeae00,
-	0x4100004141414100, 0x2300002323232300, 0xef0000efefefef00,
-	0x6b00006b6b6b6b00, 0x9300009393939300, 0x4500004545454500,
-	0x1900001919191900, 0xa50000a5a5a5a500, 0x2100002121212100,
-	0xed0000edededed00, 0x0e00000e0e0e0e00, 0x4f00004f4f4f4f00,
-	0x4e00004e4e4e4e00, 0x1d00001d1d1d1d00, 0x6500006565656500,
-	0x9200009292929200, 0xbd0000bdbdbdbd00, 0x8600008686868600,
-	0xb80000b8b8b8b800, 0xaf0000afafafaf00, 0x8f00008f8f8f8f00,
-	0x7c00007c7c7c7c00, 0xeb0000ebebebeb00, 0x1f00001f1f1f1f00,
-	0xce0000cececece00, 0x3e00003e3e3e3e00, 0x3000003030303000,
-	0xdc0000dcdcdcdc00, 0x5f00005f5f5f5f00, 0x5e00005e5e5e5e00,
-	0xc50000c5c5c5c500, 0x0b00000b0b0b0b00, 0x1a00001a1a1a1a00,
-	0xa60000a6a6a6a600, 0xe10000e1e1e1e100, 0x3900003939393900,
-	0xca0000cacacaca00, 0xd50000d5d5d5d500, 0x4700004747474700,
-	0x5d00005d5d5d5d00, 0x3d00003d3d3d3d00, 0xd90000d9d9d9d900,
-	0x0100000101010100, 0x5a00005a5a5a5a00, 0xd60000d6d6d6d600,
-	0x5100005151515100, 0x5600005656565600, 0x6c00006c6c6c6c00,
-	0x4d00004d4d4d4d00, 0x8b00008b8b8b8b00, 0x0d00000d0d0d0d00,
-	0x9a00009a9a9a9a00, 0x6600006666666600, 0xfb0000fbfbfbfb00,
-	0xcc0000cccccccc00, 0xb00000b0b0b0b000, 0x2d00002d2d2d2d00,
-	0x7400007474747400, 0x1200001212121200, 0x2b00002b2b2b2b00,
-	0x2000002020202000, 0xf00000f0f0f0f000, 0xb10000b1b1b1b100,
-	0x8400008484848400, 0x9900009999999900, 0xdf0000dfdfdfdf00,
-	0x4c00004c4c4c4c00, 0xcb0000cbcbcbcb00, 0xc20000c2c2c2c200,
-	0x3400003434343400, 0x7e00007e7e7e7e00, 0x7600007676767600,
-	0x0500000505050500, 0x6d00006d6d6d6d00, 0xb70000b7b7b7b700,
-	0xa90000a9a9a9a900, 0x3100003131313100, 0xd10000d1d1d1d100,
-	0x1700001717171700, 0x0400000404040400, 0xd70000d7d7d7d700,
-	0x1400001414141400, 0x5800005858585800, 0x3a00003a3a3a3a00,
-	0x6100006161616100, 0xde0000dededede00, 0x1b00001b1b1b1b00,
-	0x1100001111111100, 0x1c00001c1c1c1c00, 0x3200003232323200,
-	0x0f00000f0f0f0f00, 0x9c00009c9c9c9c00, 0x1600001616161600,
-	0x5300005353535300, 0x1800001818181800, 0xf20000f2f2f2f200,
-	0x2200002222222200, 0xfe0000fefefefe00, 0x4400004444444400,
-	0xcf0000cfcfcfcf00, 0xb20000b2b2b2b200, 0xc30000c3c3c3c300,
-	0xb50000b5b5b5b500, 0x7a00007a7a7a7a00, 0x9100009191919100,
-	0x2400002424242400, 0x0800000808080800, 0xe80000e8e8e8e800,
-	0xa80000a8a8a8a800, 0x6000006060606000, 0xfc0000fcfcfcfc00,
-	0x6900006969696900, 0x5000005050505000, 0xaa0000aaaaaaaa00,
-	0xd00000d0d0d0d000, 0xa00000a0a0a0a000, 0x7d00007d7d7d7d00,
-	0xa10000a1a1a1a100, 0x8900008989898900, 0x6200006262626200,
-	0x9700009797979700, 0x5400005454545400, 0x5b00005b5b5b5b00,
-	0x1e00001e1e1e1e00, 0x9500009595959500, 0xe00000e0e0e0e000,
-	0xff0000ffffffff00, 0x6400006464646400, 0xd20000d2d2d2d200,
-	0x1000001010101000, 0xc40000c4c4c4c400, 0x0000000000000000,
-	0x4800004848484800, 0xa30000a3a3a3a300, 0xf70000f7f7f7f700,
-	0x7500007575757500, 0xdb0000dbdbdbdb00, 0x8a00008a8a8a8a00,
-	0x0300000303030300, 0xe60000e6e6e6e600, 0xda0000dadadada00,
-	0x0900000909090900, 0x3f00003f3f3f3f00, 0xdd0000dddddddd00,
-	0x9400009494949400, 0x8700008787878700, 0x5c00005c5c5c5c00,
-	0x8300008383838300, 0x0200000202020200, 0xcd0000cdcdcdcd00,
-	0x4a00004a4a4a4a00, 0x9000009090909000, 0x3300003333333300,
-	0x7300007373737300, 0x6700006767676700, 0xf60000f6f6f6f600,
-	0xf30000f3f3f3f300, 0x9d00009d9d9d9d00, 0x7f00007f7f7f7f00,
-	0xbf0000bfbfbfbf00, 0xe20000e2e2e2e200, 0x5200005252525200,
-	0x9b00009b9b9b9b00, 0xd80000d8d8d8d800, 0x2600002626262600,
-	0xc80000c8c8c8c800, 0x3700003737373700, 0xc60000c6c6c6c600,
-	0x3b00003b3b3b3b00, 0x8100008181818100, 0x9600009696969600,
-	0x6f00006f6f6f6f00, 0x4b00004b4b4b4b00, 0x1300001313131300,
-	0xbe0000bebebebe00, 0x6300006363636300, 0x2e00002e2e2e2e00,
-	0xe90000e9e9e9e900, 0x7900007979797900, 0xa70000a7a7a7a700,
-	0x8c00008c8c8c8c00, 0x9f00009f9f9f9f00, 0x6e00006e6e6e6e00,
-	0xbc0000bcbcbcbc00, 0x8e00008e8e8e8e00, 0x2900002929292900,
-	0xf50000f5f5f5f500, 0xf90000f9f9f9f900, 0xb60000b6b6b6b600,
-	0x2f00002f2f2f2f00, 0xfd0000fdfdfdfd00, 0xb40000b4b4b4b400,
-	0x5900005959595900, 0x7800007878787800, 0x9800009898989800,
-	0x0600000606060600, 0x6a00006a6a6a6a00, 0xe70000e7e7e7e700,
-	0x4600004646464600, 0x7100007171717100, 0xba0000babababa00,
-	0xd40000d4d4d4d400, 0x2500002525252500, 0xab0000abababab00,
-	0x4200004242424200, 0x8800008888888800, 0xa20000a2a2a2a200,
-	0x8d00008d8d8d8d00, 0xfa0000fafafafa00, 0x7200007272727200,
-	0x0700000707070700, 0xb90000b9b9b9b900, 0x5500005555555500,
-	0xf80000f8f8f8f800, 0xee0000eeeeeeee00, 0xac0000acacacac00,
-	0x0a00000a0a0a0a00, 0x3600003636363600, 0x4900004949494900,
-	0x2a00002a2a2a2a00, 0x6800006868686800, 0x3c00003c3c3c3c00,
-	0x3800003838383800, 0xf10000f1f1f1f100, 0xa40000a4a4a4a400,
-	0x4000004040404000, 0x2800002828282800, 0xd30000d3d3d3d300,
-	0x7b00007b7b7b7b00, 0xbb0000bbbbbbbb00, 0xc90000c9c9c9c900,
-	0x4300004343434300, 0xc10000c1c1c1c100, 0x1500001515151500,
-	0xe30000e3e3e3e300, 0xad0000adadadad00, 0xf40000f4f4f4f400,
-	0x7700007777777700, 0xc70000c7c7c7c700, 0x8000008080808000,
-	0x9e00009e9e9e9e00,
-};
-
-const u64 camellia_sp22000222[256] = {
-	0xe0e0000000e0e0e0, 0x0505000000050505, 0x5858000000585858,
-	0xd9d9000000d9d9d9, 0x6767000000676767, 0x4e4e0000004e4e4e,
-	0x8181000000818181, 0xcbcb000000cbcbcb, 0xc9c9000000c9c9c9,
-	0x0b0b0000000b0b0b, 0xaeae000000aeaeae, 0x6a6a0000006a6a6a,
-	0xd5d5000000d5d5d5, 0x1818000000181818, 0x5d5d0000005d5d5d,
-	0x8282000000828282, 0x4646000000464646, 0xdfdf000000dfdfdf,
-	0xd6d6000000d6d6d6, 0x2727000000272727, 0x8a8a0000008a8a8a,
-	0x3232000000323232, 0x4b4b0000004b4b4b, 0x4242000000424242,
-	0xdbdb000000dbdbdb, 0x1c1c0000001c1c1c, 0x9e9e0000009e9e9e,
-	0x9c9c0000009c9c9c, 0x3a3a0000003a3a3a, 0xcaca000000cacaca,
-	0x2525000000252525, 0x7b7b0000007b7b7b, 0x0d0d0000000d0d0d,
-	0x7171000000717171, 0x5f5f0000005f5f5f, 0x1f1f0000001f1f1f,
-	0xf8f8000000f8f8f8, 0xd7d7000000d7d7d7, 0x3e3e0000003e3e3e,
-	0x9d9d0000009d9d9d, 0x7c7c0000007c7c7c, 0x6060000000606060,
-	0xb9b9000000b9b9b9, 0xbebe000000bebebe, 0xbcbc000000bcbcbc,
-	0x8b8b0000008b8b8b, 0x1616000000161616, 0x3434000000343434,
-	0x4d4d0000004d4d4d, 0xc3c3000000c3c3c3, 0x7272000000727272,
-	0x9595000000959595, 0xabab000000ababab, 0x8e8e0000008e8e8e,
-	0xbaba000000bababa, 0x7a7a0000007a7a7a, 0xb3b3000000b3b3b3,
-	0x0202000000020202, 0xb4b4000000b4b4b4, 0xadad000000adadad,
-	0xa2a2000000a2a2a2, 0xacac000000acacac, 0xd8d8000000d8d8d8,
-	0x9a9a0000009a9a9a, 0x1717000000171717, 0x1a1a0000001a1a1a,
-	0x3535000000353535, 0xcccc000000cccccc, 0xf7f7000000f7f7f7,
-	0x9999000000999999, 0x6161000000616161, 0x5a5a0000005a5a5a,
-	0xe8e8000000e8e8e8, 0x2424000000242424, 0x5656000000565656,
-	0x4040000000404040, 0xe1e1000000e1e1e1, 0x6363000000636363,
-	0x0909000000090909, 0x3333000000333333, 0xbfbf000000bfbfbf,
-	0x9898000000989898, 0x9797000000979797, 0x8585000000858585,
-	0x6868000000686868, 0xfcfc000000fcfcfc, 0xecec000000ececec,
-	0x0a0a0000000a0a0a, 0xdada000000dadada, 0x6f6f0000006f6f6f,
-	0x5353000000535353, 0x6262000000626262, 0xa3a3000000a3a3a3,
-	0x2e2e0000002e2e2e, 0x0808000000080808, 0xafaf000000afafaf,
-	0x2828000000282828, 0xb0b0000000b0b0b0, 0x7474000000747474,
-	0xc2c2000000c2c2c2, 0xbdbd000000bdbdbd, 0x3636000000363636,
-	0x2222000000222222, 0x3838000000383838, 0x6464000000646464,
-	0x1e1e0000001e1e1e, 0x3939000000393939, 0x2c2c0000002c2c2c,
-	0xa6a6000000a6a6a6, 0x3030000000303030, 0xe5e5000000e5e5e5,
-	0x4444000000444444, 0xfdfd000000fdfdfd, 0x8888000000888888,
-	0x9f9f0000009f9f9f, 0x6565000000656565, 0x8787000000878787,
-	0x6b6b0000006b6b6b, 0xf4f4000000f4f4f4, 0x2323000000232323,
-	0x4848000000484848, 0x1010000000101010, 0xd1d1000000d1d1d1,
-	0x5151000000515151, 0xc0c0000000c0c0c0, 0xf9f9000000f9f9f9,
-	0xd2d2000000d2d2d2, 0xa0a0000000a0a0a0, 0x5555000000555555,
-	0xa1a1000000a1a1a1, 0x4141000000414141, 0xfafa000000fafafa,
-	0x4343000000434343, 0x1313000000131313, 0xc4c4000000c4c4c4,
-	0x2f2f0000002f2f2f, 0xa8a8000000a8a8a8, 0xb6b6000000b6b6b6,
-	0x3c3c0000003c3c3c, 0x2b2b0000002b2b2b, 0xc1c1000000c1c1c1,
-	0xffff000000ffffff, 0xc8c8000000c8c8c8, 0xa5a5000000a5a5a5,
-	0x2020000000202020, 0x8989000000898989, 0x0000000000000000,
-	0x9090000000909090, 0x4747000000474747, 0xefef000000efefef,
-	0xeaea000000eaeaea, 0xb7b7000000b7b7b7, 0x1515000000151515,
-	0x0606000000060606, 0xcdcd000000cdcdcd, 0xb5b5000000b5b5b5,
-	0x1212000000121212, 0x7e7e0000007e7e7e, 0xbbbb000000bbbbbb,
-	0x2929000000292929, 0x0f0f0000000f0f0f, 0xb8b8000000b8b8b8,
-	0x0707000000070707, 0x0404000000040404, 0x9b9b0000009b9b9b,
-	0x9494000000949494, 0x2121000000212121, 0x6666000000666666,
-	0xe6e6000000e6e6e6, 0xcece000000cecece, 0xeded000000ededed,
-	0xe7e7000000e7e7e7, 0x3b3b0000003b3b3b, 0xfefe000000fefefe,
-	0x7f7f0000007f7f7f, 0xc5c5000000c5c5c5, 0xa4a4000000a4a4a4,
-	0x3737000000373737, 0xb1b1000000b1b1b1, 0x4c4c0000004c4c4c,
-	0x9191000000919191, 0x6e6e0000006e6e6e, 0x8d8d0000008d8d8d,
-	0x7676000000767676, 0x0303000000030303, 0x2d2d0000002d2d2d,
-	0xdede000000dedede, 0x9696000000969696, 0x2626000000262626,
-	0x7d7d0000007d7d7d, 0xc6c6000000c6c6c6, 0x5c5c0000005c5c5c,
-	0xd3d3000000d3d3d3, 0xf2f2000000f2f2f2, 0x4f4f0000004f4f4f,
-	0x1919000000191919, 0x3f3f0000003f3f3f, 0xdcdc000000dcdcdc,
-	0x7979000000797979, 0x1d1d0000001d1d1d, 0x5252000000525252,
-	0xebeb000000ebebeb, 0xf3f3000000f3f3f3, 0x6d6d0000006d6d6d,
-	0x5e5e0000005e5e5e, 0xfbfb000000fbfbfb, 0x6969000000696969,
-	0xb2b2000000b2b2b2, 0xf0f0000000f0f0f0, 0x3131000000313131,
-	0x0c0c0000000c0c0c, 0xd4d4000000d4d4d4, 0xcfcf000000cfcfcf,
-	0x8c8c0000008c8c8c, 0xe2e2000000e2e2e2, 0x7575000000757575,
-	0xa9a9000000a9a9a9, 0x4a4a0000004a4a4a, 0x5757000000575757,
-	0x8484000000848484, 0x1111000000111111, 0x4545000000454545,
-	0x1b1b0000001b1b1b, 0xf5f5000000f5f5f5, 0xe4e4000000e4e4e4,
-	0x0e0e0000000e0e0e, 0x7373000000737373, 0xaaaa000000aaaaaa,
-	0xf1f1000000f1f1f1, 0xdddd000000dddddd, 0x5959000000595959,
-	0x1414000000141414, 0x6c6c0000006c6c6c, 0x9292000000929292,
-	0x5454000000545454, 0xd0d0000000d0d0d0, 0x7878000000787878,
-	0x7070000000707070, 0xe3e3000000e3e3e3, 0x4949000000494949,
-	0x8080000000808080, 0x5050000000505050, 0xa7a7000000a7a7a7,
-	0xf6f6000000f6f6f6, 0x7777000000777777, 0x9393000000939393,
-	0x8686000000868686, 0x8383000000838383, 0x2a2a0000002a2a2a,
-	0xc7c7000000c7c7c7, 0x5b5b0000005b5b5b, 0xe9e9000000e9e9e9,
-	0xeeee000000eeeeee, 0x8f8f0000008f8f8f, 0x0101000000010101,
-	0x3d3d0000003d3d3d,
-};
-
-const u64 camellia_sp03303033[256] = {
-	0x0038380038003838, 0x0041410041004141, 0x0016160016001616,
-	0x0076760076007676, 0x00d9d900d900d9d9, 0x0093930093009393,
-	0x0060600060006060, 0x00f2f200f200f2f2, 0x0072720072007272,
-	0x00c2c200c200c2c2, 0x00abab00ab00abab, 0x009a9a009a009a9a,
-	0x0075750075007575, 0x0006060006000606, 0x0057570057005757,
-	0x00a0a000a000a0a0, 0x0091910091009191, 0x00f7f700f700f7f7,
-	0x00b5b500b500b5b5, 0x00c9c900c900c9c9, 0x00a2a200a200a2a2,
-	0x008c8c008c008c8c, 0x00d2d200d200d2d2, 0x0090900090009090,
-	0x00f6f600f600f6f6, 0x0007070007000707, 0x00a7a700a700a7a7,
-	0x0027270027002727, 0x008e8e008e008e8e, 0x00b2b200b200b2b2,
-	0x0049490049004949, 0x00dede00de00dede, 0x0043430043004343,
-	0x005c5c005c005c5c, 0x00d7d700d700d7d7, 0x00c7c700c700c7c7,
-	0x003e3e003e003e3e, 0x00f5f500f500f5f5, 0x008f8f008f008f8f,
-	0x0067670067006767, 0x001f1f001f001f1f, 0x0018180018001818,
-	0x006e6e006e006e6e, 0x00afaf00af00afaf, 0x002f2f002f002f2f,
-	0x00e2e200e200e2e2, 0x0085850085008585, 0x000d0d000d000d0d,
-	0x0053530053005353, 0x00f0f000f000f0f0, 0x009c9c009c009c9c,
-	0x0065650065006565, 0x00eaea00ea00eaea, 0x00a3a300a300a3a3,
-	0x00aeae00ae00aeae, 0x009e9e009e009e9e, 0x00ecec00ec00ecec,
-	0x0080800080008080, 0x002d2d002d002d2d, 0x006b6b006b006b6b,
-	0x00a8a800a800a8a8, 0x002b2b002b002b2b, 0x0036360036003636,
-	0x00a6a600a600a6a6, 0x00c5c500c500c5c5, 0x0086860086008686,
-	0x004d4d004d004d4d, 0x0033330033003333, 0x00fdfd00fd00fdfd,
-	0x0066660066006666, 0x0058580058005858, 0x0096960096009696,
-	0x003a3a003a003a3a, 0x0009090009000909, 0x0095950095009595,
-	0x0010100010001010, 0x0078780078007878, 0x00d8d800d800d8d8,
-	0x0042420042004242, 0x00cccc00cc00cccc, 0x00efef00ef00efef,
-	0x0026260026002626, 0x00e5e500e500e5e5, 0x0061610061006161,
-	0x001a1a001a001a1a, 0x003f3f003f003f3f, 0x003b3b003b003b3b,
-	0x0082820082008282, 0x00b6b600b600b6b6, 0x00dbdb00db00dbdb,
-	0x00d4d400d400d4d4, 0x0098980098009898, 0x00e8e800e800e8e8,
-	0x008b8b008b008b8b, 0x0002020002000202, 0x00ebeb00eb00ebeb,
-	0x000a0a000a000a0a, 0x002c2c002c002c2c, 0x001d1d001d001d1d,
-	0x00b0b000b000b0b0, 0x006f6f006f006f6f, 0x008d8d008d008d8d,
-	0x0088880088008888, 0x000e0e000e000e0e, 0x0019190019001919,
-	0x0087870087008787, 0x004e4e004e004e4e, 0x000b0b000b000b0b,
-	0x00a9a900a900a9a9, 0x000c0c000c000c0c, 0x0079790079007979,
-	0x0011110011001111, 0x007f7f007f007f7f, 0x0022220022002222,
-	0x00e7e700e700e7e7, 0x0059590059005959, 0x00e1e100e100e1e1,
-	0x00dada00da00dada, 0x003d3d003d003d3d, 0x00c8c800c800c8c8,
-	0x0012120012001212, 0x0004040004000404, 0x0074740074007474,
-	0x0054540054005454, 0x0030300030003030, 0x007e7e007e007e7e,
-	0x00b4b400b400b4b4, 0x0028280028002828, 0x0055550055005555,
-	0x0068680068006868, 0x0050500050005050, 0x00bebe00be00bebe,
-	0x00d0d000d000d0d0, 0x00c4c400c400c4c4, 0x0031310031003131,
-	0x00cbcb00cb00cbcb, 0x002a2a002a002a2a, 0x00adad00ad00adad,
-	0x000f0f000f000f0f, 0x00caca00ca00caca, 0x0070700070007070,
-	0x00ffff00ff00ffff, 0x0032320032003232, 0x0069690069006969,
-	0x0008080008000808, 0x0062620062006262, 0x0000000000000000,
-	0x0024240024002424, 0x00d1d100d100d1d1, 0x00fbfb00fb00fbfb,
-	0x00baba00ba00baba, 0x00eded00ed00eded, 0x0045450045004545,
-	0x0081810081008181, 0x0073730073007373, 0x006d6d006d006d6d,
-	0x0084840084008484, 0x009f9f009f009f9f, 0x00eeee00ee00eeee,
-	0x004a4a004a004a4a, 0x00c3c300c300c3c3, 0x002e2e002e002e2e,
-	0x00c1c100c100c1c1, 0x0001010001000101, 0x00e6e600e600e6e6,
-	0x0025250025002525, 0x0048480048004848, 0x0099990099009999,
-	0x00b9b900b900b9b9, 0x00b3b300b300b3b3, 0x007b7b007b007b7b,
-	0x00f9f900f900f9f9, 0x00cece00ce00cece, 0x00bfbf00bf00bfbf,
-	0x00dfdf00df00dfdf, 0x0071710071007171, 0x0029290029002929,
-	0x00cdcd00cd00cdcd, 0x006c6c006c006c6c, 0x0013130013001313,
-	0x0064640064006464, 0x009b9b009b009b9b, 0x0063630063006363,
-	0x009d9d009d009d9d, 0x00c0c000c000c0c0, 0x004b4b004b004b4b,
-	0x00b7b700b700b7b7, 0x00a5a500a500a5a5, 0x0089890089008989,
-	0x005f5f005f005f5f, 0x00b1b100b100b1b1, 0x0017170017001717,
-	0x00f4f400f400f4f4, 0x00bcbc00bc00bcbc, 0x00d3d300d300d3d3,
-	0x0046460046004646, 0x00cfcf00cf00cfcf, 0x0037370037003737,
-	0x005e5e005e005e5e, 0x0047470047004747, 0x0094940094009494,
-	0x00fafa00fa00fafa, 0x00fcfc00fc00fcfc, 0x005b5b005b005b5b,
-	0x0097970097009797, 0x00fefe00fe00fefe, 0x005a5a005a005a5a,
-	0x00acac00ac00acac, 0x003c3c003c003c3c, 0x004c4c004c004c4c,
-	0x0003030003000303, 0x0035350035003535, 0x00f3f300f300f3f3,
-	0x0023230023002323, 0x00b8b800b800b8b8, 0x005d5d005d005d5d,
-	0x006a6a006a006a6a, 0x0092920092009292, 0x00d5d500d500d5d5,
-	0x0021210021002121, 0x0044440044004444, 0x0051510051005151,
-	0x00c6c600c600c6c6, 0x007d7d007d007d7d, 0x0039390039003939,
-	0x0083830083008383, 0x00dcdc00dc00dcdc, 0x00aaaa00aa00aaaa,
-	0x007c7c007c007c7c, 0x0077770077007777, 0x0056560056005656,
-	0x0005050005000505, 0x001b1b001b001b1b, 0x00a4a400a400a4a4,
-	0x0015150015001515, 0x0034340034003434, 0x001e1e001e001e1e,
-	0x001c1c001c001c1c, 0x00f8f800f800f8f8, 0x0052520052005252,
-	0x0020200020002020, 0x0014140014001414, 0x00e9e900e900e9e9,
-	0x00bdbd00bd00bdbd, 0x00dddd00dd00dddd, 0x00e4e400e400e4e4,
-	0x00a1a100a100a1a1, 0x00e0e000e000e0e0, 0x008a8a008a008a8a,
-	0x00f1f100f100f1f1, 0x00d6d600d600d6d6, 0x007a7a007a007a7a,
-	0x00bbbb00bb00bbbb, 0x00e3e300e300e3e3, 0x0040400040004040,
-	0x004f4f004f004f4f,
-};
-
-const u64 camellia_sp00444404[256] = {
-	0x0000707070700070, 0x00002c2c2c2c002c, 0x0000b3b3b3b300b3,
-	0x0000c0c0c0c000c0, 0x0000e4e4e4e400e4, 0x0000575757570057,
-	0x0000eaeaeaea00ea, 0x0000aeaeaeae00ae, 0x0000232323230023,
-	0x00006b6b6b6b006b, 0x0000454545450045, 0x0000a5a5a5a500a5,
-	0x0000edededed00ed, 0x00004f4f4f4f004f, 0x00001d1d1d1d001d,
-	0x0000929292920092, 0x0000868686860086, 0x0000afafafaf00af,
-	0x00007c7c7c7c007c, 0x00001f1f1f1f001f, 0x00003e3e3e3e003e,
-	0x0000dcdcdcdc00dc, 0x00005e5e5e5e005e, 0x00000b0b0b0b000b,
-	0x0000a6a6a6a600a6, 0x0000393939390039, 0x0000d5d5d5d500d5,
-	0x00005d5d5d5d005d, 0x0000d9d9d9d900d9, 0x00005a5a5a5a005a,
-	0x0000515151510051, 0x00006c6c6c6c006c, 0x00008b8b8b8b008b,
-	0x00009a9a9a9a009a, 0x0000fbfbfbfb00fb, 0x0000b0b0b0b000b0,
-	0x0000747474740074, 0x00002b2b2b2b002b, 0x0000f0f0f0f000f0,
-	0x0000848484840084, 0x0000dfdfdfdf00df, 0x0000cbcbcbcb00cb,
-	0x0000343434340034, 0x0000767676760076, 0x00006d6d6d6d006d,
-	0x0000a9a9a9a900a9, 0x0000d1d1d1d100d1, 0x0000040404040004,
-	0x0000141414140014, 0x00003a3a3a3a003a, 0x0000dededede00de,
-	0x0000111111110011, 0x0000323232320032, 0x00009c9c9c9c009c,
-	0x0000535353530053, 0x0000f2f2f2f200f2, 0x0000fefefefe00fe,
-	0x0000cfcfcfcf00cf, 0x0000c3c3c3c300c3, 0x00007a7a7a7a007a,
-	0x0000242424240024, 0x0000e8e8e8e800e8, 0x0000606060600060,
-	0x0000696969690069, 0x0000aaaaaaaa00aa, 0x0000a0a0a0a000a0,
-	0x0000a1a1a1a100a1, 0x0000626262620062, 0x0000545454540054,
-	0x00001e1e1e1e001e, 0x0000e0e0e0e000e0, 0x0000646464640064,
-	0x0000101010100010, 0x0000000000000000, 0x0000a3a3a3a300a3,
-	0x0000757575750075, 0x00008a8a8a8a008a, 0x0000e6e6e6e600e6,
-	0x0000090909090009, 0x0000dddddddd00dd, 0x0000878787870087,
-	0x0000838383830083, 0x0000cdcdcdcd00cd, 0x0000909090900090,
-	0x0000737373730073, 0x0000f6f6f6f600f6, 0x00009d9d9d9d009d,
-	0x0000bfbfbfbf00bf, 0x0000525252520052, 0x0000d8d8d8d800d8,
-	0x0000c8c8c8c800c8, 0x0000c6c6c6c600c6, 0x0000818181810081,
-	0x00006f6f6f6f006f, 0x0000131313130013, 0x0000636363630063,
-	0x0000e9e9e9e900e9, 0x0000a7a7a7a700a7, 0x00009f9f9f9f009f,
-	0x0000bcbcbcbc00bc, 0x0000292929290029, 0x0000f9f9f9f900f9,
-	0x00002f2f2f2f002f, 0x0000b4b4b4b400b4, 0x0000787878780078,
-	0x0000060606060006, 0x0000e7e7e7e700e7, 0x0000717171710071,
-	0x0000d4d4d4d400d4, 0x0000abababab00ab, 0x0000888888880088,
-	0x00008d8d8d8d008d, 0x0000727272720072, 0x0000b9b9b9b900b9,
-	0x0000f8f8f8f800f8, 0x0000acacacac00ac, 0x0000363636360036,
-	0x00002a2a2a2a002a, 0x00003c3c3c3c003c, 0x0000f1f1f1f100f1,
-	0x0000404040400040, 0x0000d3d3d3d300d3, 0x0000bbbbbbbb00bb,
-	0x0000434343430043, 0x0000151515150015, 0x0000adadadad00ad,
-	0x0000777777770077, 0x0000808080800080, 0x0000828282820082,
-	0x0000ecececec00ec, 0x0000272727270027, 0x0000e5e5e5e500e5,
-	0x0000858585850085, 0x0000353535350035, 0x00000c0c0c0c000c,
-	0x0000414141410041, 0x0000efefefef00ef, 0x0000939393930093,
-	0x0000191919190019, 0x0000212121210021, 0x00000e0e0e0e000e,
-	0x00004e4e4e4e004e, 0x0000656565650065, 0x0000bdbdbdbd00bd,
-	0x0000b8b8b8b800b8, 0x00008f8f8f8f008f, 0x0000ebebebeb00eb,
-	0x0000cececece00ce, 0x0000303030300030, 0x00005f5f5f5f005f,
-	0x0000c5c5c5c500c5, 0x00001a1a1a1a001a, 0x0000e1e1e1e100e1,
-	0x0000cacacaca00ca, 0x0000474747470047, 0x00003d3d3d3d003d,
-	0x0000010101010001, 0x0000d6d6d6d600d6, 0x0000565656560056,
-	0x00004d4d4d4d004d, 0x00000d0d0d0d000d, 0x0000666666660066,
-	0x0000cccccccc00cc, 0x00002d2d2d2d002d, 0x0000121212120012,
-	0x0000202020200020, 0x0000b1b1b1b100b1, 0x0000999999990099,
-	0x00004c4c4c4c004c, 0x0000c2c2c2c200c2, 0x00007e7e7e7e007e,
-	0x0000050505050005, 0x0000b7b7b7b700b7, 0x0000313131310031,
-	0x0000171717170017, 0x0000d7d7d7d700d7, 0x0000585858580058,
-	0x0000616161610061, 0x00001b1b1b1b001b, 0x00001c1c1c1c001c,
-	0x00000f0f0f0f000f, 0x0000161616160016, 0x0000181818180018,
-	0x0000222222220022, 0x0000444444440044, 0x0000b2b2b2b200b2,
-	0x0000b5b5b5b500b5, 0x0000919191910091, 0x0000080808080008,
-	0x0000a8a8a8a800a8, 0x0000fcfcfcfc00fc, 0x0000505050500050,
-	0x0000d0d0d0d000d0, 0x00007d7d7d7d007d, 0x0000898989890089,
-	0x0000979797970097, 0x00005b5b5b5b005b, 0x0000959595950095,
-	0x0000ffffffff00ff, 0x0000d2d2d2d200d2, 0x0000c4c4c4c400c4,
-	0x0000484848480048, 0x0000f7f7f7f700f7, 0x0000dbdbdbdb00db,
-	0x0000030303030003, 0x0000dadadada00da, 0x00003f3f3f3f003f,
-	0x0000949494940094, 0x00005c5c5c5c005c, 0x0000020202020002,
-	0x00004a4a4a4a004a, 0x0000333333330033, 0x0000676767670067,
-	0x0000f3f3f3f300f3, 0x00007f7f7f7f007f, 0x0000e2e2e2e200e2,
-	0x00009b9b9b9b009b, 0x0000262626260026, 0x0000373737370037,
-	0x00003b3b3b3b003b, 0x0000969696960096, 0x00004b4b4b4b004b,
-	0x0000bebebebe00be, 0x00002e2e2e2e002e, 0x0000797979790079,
-	0x00008c8c8c8c008c, 0x00006e6e6e6e006e, 0x00008e8e8e8e008e,
-	0x0000f5f5f5f500f5, 0x0000b6b6b6b600b6, 0x0000fdfdfdfd00fd,
-	0x0000595959590059, 0x0000989898980098, 0x00006a6a6a6a006a,
-	0x0000464646460046, 0x0000babababa00ba, 0x0000252525250025,
-	0x0000424242420042, 0x0000a2a2a2a200a2, 0x0000fafafafa00fa,
-	0x0000070707070007, 0x0000555555550055, 0x0000eeeeeeee00ee,
-	0x00000a0a0a0a000a, 0x0000494949490049, 0x0000686868680068,
-	0x0000383838380038, 0x0000a4a4a4a400a4, 0x0000282828280028,
-	0x00007b7b7b7b007b, 0x0000c9c9c9c900c9, 0x0000c1c1c1c100c1,
-	0x0000e3e3e3e300e3, 0x0000f4f4f4f400f4, 0x0000c7c7c7c700c7,
-	0x00009e9e9e9e009e,
-};
-
-const u64 camellia_sp02220222[256] = {
-	0x00e0e0e000e0e0e0, 0x0005050500050505, 0x0058585800585858,
-	0x00d9d9d900d9d9d9, 0x0067676700676767, 0x004e4e4e004e4e4e,
-	0x0081818100818181, 0x00cbcbcb00cbcbcb, 0x00c9c9c900c9c9c9,
-	0x000b0b0b000b0b0b, 0x00aeaeae00aeaeae, 0x006a6a6a006a6a6a,
-	0x00d5d5d500d5d5d5, 0x0018181800181818, 0x005d5d5d005d5d5d,
-	0x0082828200828282, 0x0046464600464646, 0x00dfdfdf00dfdfdf,
-	0x00d6d6d600d6d6d6, 0x0027272700272727, 0x008a8a8a008a8a8a,
-	0x0032323200323232, 0x004b4b4b004b4b4b, 0x0042424200424242,
-	0x00dbdbdb00dbdbdb, 0x001c1c1c001c1c1c, 0x009e9e9e009e9e9e,
-	0x009c9c9c009c9c9c, 0x003a3a3a003a3a3a, 0x00cacaca00cacaca,
-	0x0025252500252525, 0x007b7b7b007b7b7b, 0x000d0d0d000d0d0d,
-	0x0071717100717171, 0x005f5f5f005f5f5f, 0x001f1f1f001f1f1f,
-	0x00f8f8f800f8f8f8, 0x00d7d7d700d7d7d7, 0x003e3e3e003e3e3e,
-	0x009d9d9d009d9d9d, 0x007c7c7c007c7c7c, 0x0060606000606060,
-	0x00b9b9b900b9b9b9, 0x00bebebe00bebebe, 0x00bcbcbc00bcbcbc,
-	0x008b8b8b008b8b8b, 0x0016161600161616, 0x0034343400343434,
-	0x004d4d4d004d4d4d, 0x00c3c3c300c3c3c3, 0x0072727200727272,
-	0x0095959500959595, 0x00ababab00ababab, 0x008e8e8e008e8e8e,
-	0x00bababa00bababa, 0x007a7a7a007a7a7a, 0x00b3b3b300b3b3b3,
-	0x0002020200020202, 0x00b4b4b400b4b4b4, 0x00adadad00adadad,
-	0x00a2a2a200a2a2a2, 0x00acacac00acacac, 0x00d8d8d800d8d8d8,
-	0x009a9a9a009a9a9a, 0x0017171700171717, 0x001a1a1a001a1a1a,
-	0x0035353500353535, 0x00cccccc00cccccc, 0x00f7f7f700f7f7f7,
-	0x0099999900999999, 0x0061616100616161, 0x005a5a5a005a5a5a,
-	0x00e8e8e800e8e8e8, 0x0024242400242424, 0x0056565600565656,
-	0x0040404000404040, 0x00e1e1e100e1e1e1, 0x0063636300636363,
-	0x0009090900090909, 0x0033333300333333, 0x00bfbfbf00bfbfbf,
-	0x0098989800989898, 0x0097979700979797, 0x0085858500858585,
-	0x0068686800686868, 0x00fcfcfc00fcfcfc, 0x00ececec00ececec,
-	0x000a0a0a000a0a0a, 0x00dadada00dadada, 0x006f6f6f006f6f6f,
-	0x0053535300535353, 0x0062626200626262, 0x00a3a3a300a3a3a3,
-	0x002e2e2e002e2e2e, 0x0008080800080808, 0x00afafaf00afafaf,
-	0x0028282800282828, 0x00b0b0b000b0b0b0, 0x0074747400747474,
-	0x00c2c2c200c2c2c2, 0x00bdbdbd00bdbdbd, 0x0036363600363636,
-	0x0022222200222222, 0x0038383800383838, 0x0064646400646464,
-	0x001e1e1e001e1e1e, 0x0039393900393939, 0x002c2c2c002c2c2c,
-	0x00a6a6a600a6a6a6, 0x0030303000303030, 0x00e5e5e500e5e5e5,
-	0x0044444400444444, 0x00fdfdfd00fdfdfd, 0x0088888800888888,
-	0x009f9f9f009f9f9f, 0x0065656500656565, 0x0087878700878787,
-	0x006b6b6b006b6b6b, 0x00f4f4f400f4f4f4, 0x0023232300232323,
-	0x0048484800484848, 0x0010101000101010, 0x00d1d1d100d1d1d1,
-	0x0051515100515151, 0x00c0c0c000c0c0c0, 0x00f9f9f900f9f9f9,
-	0x00d2d2d200d2d2d2, 0x00a0a0a000a0a0a0, 0x0055555500555555,
-	0x00a1a1a100a1a1a1, 0x0041414100414141, 0x00fafafa00fafafa,
-	0x0043434300434343, 0x0013131300131313, 0x00c4c4c400c4c4c4,
-	0x002f2f2f002f2f2f, 0x00a8a8a800a8a8a8, 0x00b6b6b600b6b6b6,
-	0x003c3c3c003c3c3c, 0x002b2b2b002b2b2b, 0x00c1c1c100c1c1c1,
-	0x00ffffff00ffffff, 0x00c8c8c800c8c8c8, 0x00a5a5a500a5a5a5,
-	0x0020202000202020, 0x0089898900898989, 0x0000000000000000,
-	0x0090909000909090, 0x0047474700474747, 0x00efefef00efefef,
-	0x00eaeaea00eaeaea, 0x00b7b7b700b7b7b7, 0x0015151500151515,
-	0x0006060600060606, 0x00cdcdcd00cdcdcd, 0x00b5b5b500b5b5b5,
-	0x0012121200121212, 0x007e7e7e007e7e7e, 0x00bbbbbb00bbbbbb,
-	0x0029292900292929, 0x000f0f0f000f0f0f, 0x00b8b8b800b8b8b8,
-	0x0007070700070707, 0x0004040400040404, 0x009b9b9b009b9b9b,
-	0x0094949400949494, 0x0021212100212121, 0x0066666600666666,
-	0x00e6e6e600e6e6e6, 0x00cecece00cecece, 0x00ededed00ededed,
-	0x00e7e7e700e7e7e7, 0x003b3b3b003b3b3b, 0x00fefefe00fefefe,
-	0x007f7f7f007f7f7f, 0x00c5c5c500c5c5c5, 0x00a4a4a400a4a4a4,
-	0x0037373700373737, 0x00b1b1b100b1b1b1, 0x004c4c4c004c4c4c,
-	0x0091919100919191, 0x006e6e6e006e6e6e, 0x008d8d8d008d8d8d,
-	0x0076767600767676, 0x0003030300030303, 0x002d2d2d002d2d2d,
-	0x00dedede00dedede, 0x0096969600969696, 0x0026262600262626,
-	0x007d7d7d007d7d7d, 0x00c6c6c600c6c6c6, 0x005c5c5c005c5c5c,
-	0x00d3d3d300d3d3d3, 0x00f2f2f200f2f2f2, 0x004f4f4f004f4f4f,
-	0x0019191900191919, 0x003f3f3f003f3f3f, 0x00dcdcdc00dcdcdc,
-	0x0079797900797979, 0x001d1d1d001d1d1d, 0x0052525200525252,
-	0x00ebebeb00ebebeb, 0x00f3f3f300f3f3f3, 0x006d6d6d006d6d6d,
-	0x005e5e5e005e5e5e, 0x00fbfbfb00fbfbfb, 0x0069696900696969,
-	0x00b2b2b200b2b2b2, 0x00f0f0f000f0f0f0, 0x0031313100313131,
-	0x000c0c0c000c0c0c, 0x00d4d4d400d4d4d4, 0x00cfcfcf00cfcfcf,
-	0x008c8c8c008c8c8c, 0x00e2e2e200e2e2e2, 0x0075757500757575,
-	0x00a9a9a900a9a9a9, 0x004a4a4a004a4a4a, 0x0057575700575757,
-	0x0084848400848484, 0x0011111100111111, 0x0045454500454545,
-	0x001b1b1b001b1b1b, 0x00f5f5f500f5f5f5, 0x00e4e4e400e4e4e4,
-	0x000e0e0e000e0e0e, 0x0073737300737373, 0x00aaaaaa00aaaaaa,
-	0x00f1f1f100f1f1f1, 0x00dddddd00dddddd, 0x0059595900595959,
-	0x0014141400141414, 0x006c6c6c006c6c6c, 0x0092929200929292,
-	0x0054545400545454, 0x00d0d0d000d0d0d0, 0x0078787800787878,
-	0x0070707000707070, 0x00e3e3e300e3e3e3, 0x0049494900494949,
-	0x0080808000808080, 0x0050505000505050, 0x00a7a7a700a7a7a7,
-	0x00f6f6f600f6f6f6, 0x0077777700777777, 0x0093939300939393,
-	0x0086868600868686, 0x0083838300838383, 0x002a2a2a002a2a2a,
-	0x00c7c7c700c7c7c7, 0x005b5b5b005b5b5b, 0x00e9e9e900e9e9e9,
-	0x00eeeeee00eeeeee, 0x008f8f8f008f8f8f, 0x0001010100010101,
-	0x003d3d3d003d3d3d,
-};
-
-const u64 camellia_sp30333033[256] = {
-	0x3800383838003838, 0x4100414141004141, 0x1600161616001616,
-	0x7600767676007676, 0xd900d9d9d900d9d9, 0x9300939393009393,
-	0x6000606060006060, 0xf200f2f2f200f2f2, 0x7200727272007272,
-	0xc200c2c2c200c2c2, 0xab00ababab00abab, 0x9a009a9a9a009a9a,
-	0x7500757575007575, 0x0600060606000606, 0x5700575757005757,
-	0xa000a0a0a000a0a0, 0x9100919191009191, 0xf700f7f7f700f7f7,
-	0xb500b5b5b500b5b5, 0xc900c9c9c900c9c9, 0xa200a2a2a200a2a2,
-	0x8c008c8c8c008c8c, 0xd200d2d2d200d2d2, 0x9000909090009090,
-	0xf600f6f6f600f6f6, 0x0700070707000707, 0xa700a7a7a700a7a7,
-	0x2700272727002727, 0x8e008e8e8e008e8e, 0xb200b2b2b200b2b2,
-	0x4900494949004949, 0xde00dedede00dede, 0x4300434343004343,
-	0x5c005c5c5c005c5c, 0xd700d7d7d700d7d7, 0xc700c7c7c700c7c7,
-	0x3e003e3e3e003e3e, 0xf500f5f5f500f5f5, 0x8f008f8f8f008f8f,
-	0x6700676767006767, 0x1f001f1f1f001f1f, 0x1800181818001818,
-	0x6e006e6e6e006e6e, 0xaf00afafaf00afaf, 0x2f002f2f2f002f2f,
-	0xe200e2e2e200e2e2, 0x8500858585008585, 0x0d000d0d0d000d0d,
-	0x5300535353005353, 0xf000f0f0f000f0f0, 0x9c009c9c9c009c9c,
-	0x6500656565006565, 0xea00eaeaea00eaea, 0xa300a3a3a300a3a3,
-	0xae00aeaeae00aeae, 0x9e009e9e9e009e9e, 0xec00ececec00ecec,
-	0x8000808080008080, 0x2d002d2d2d002d2d, 0x6b006b6b6b006b6b,
-	0xa800a8a8a800a8a8, 0x2b002b2b2b002b2b, 0x3600363636003636,
-	0xa600a6a6a600a6a6, 0xc500c5c5c500c5c5, 0x8600868686008686,
-	0x4d004d4d4d004d4d, 0x3300333333003333, 0xfd00fdfdfd00fdfd,
-	0x6600666666006666, 0x5800585858005858, 0x9600969696009696,
-	0x3a003a3a3a003a3a, 0x0900090909000909, 0x9500959595009595,
-	0x1000101010001010, 0x7800787878007878, 0xd800d8d8d800d8d8,
-	0x4200424242004242, 0xcc00cccccc00cccc, 0xef00efefef00efef,
-	0x2600262626002626, 0xe500e5e5e500e5e5, 0x6100616161006161,
-	0x1a001a1a1a001a1a, 0x3f003f3f3f003f3f, 0x3b003b3b3b003b3b,
-	0x8200828282008282, 0xb600b6b6b600b6b6, 0xdb00dbdbdb00dbdb,
-	0xd400d4d4d400d4d4, 0x9800989898009898, 0xe800e8e8e800e8e8,
-	0x8b008b8b8b008b8b, 0x0200020202000202, 0xeb00ebebeb00ebeb,
-	0x0a000a0a0a000a0a, 0x2c002c2c2c002c2c, 0x1d001d1d1d001d1d,
-	0xb000b0b0b000b0b0, 0x6f006f6f6f006f6f, 0x8d008d8d8d008d8d,
-	0x8800888888008888, 0x0e000e0e0e000e0e, 0x1900191919001919,
-	0x8700878787008787, 0x4e004e4e4e004e4e, 0x0b000b0b0b000b0b,
-	0xa900a9a9a900a9a9, 0x0c000c0c0c000c0c, 0x7900797979007979,
-	0x1100111111001111, 0x7f007f7f7f007f7f, 0x2200222222002222,
-	0xe700e7e7e700e7e7, 0x5900595959005959, 0xe100e1e1e100e1e1,
-	0xda00dadada00dada, 0x3d003d3d3d003d3d, 0xc800c8c8c800c8c8,
-	0x1200121212001212, 0x0400040404000404, 0x7400747474007474,
-	0x5400545454005454, 0x3000303030003030, 0x7e007e7e7e007e7e,
-	0xb400b4b4b400b4b4, 0x2800282828002828, 0x5500555555005555,
-	0x6800686868006868, 0x5000505050005050, 0xbe00bebebe00bebe,
-	0xd000d0d0d000d0d0, 0xc400c4c4c400c4c4, 0x3100313131003131,
-	0xcb00cbcbcb00cbcb, 0x2a002a2a2a002a2a, 0xad00adadad00adad,
-	0x0f000f0f0f000f0f, 0xca00cacaca00caca, 0x7000707070007070,
-	0xff00ffffff00ffff, 0x3200323232003232, 0x6900696969006969,
-	0x0800080808000808, 0x6200626262006262, 0x0000000000000000,
-	0x2400242424002424, 0xd100d1d1d100d1d1, 0xfb00fbfbfb00fbfb,
-	0xba00bababa00baba, 0xed00ededed00eded, 0x4500454545004545,
-	0x8100818181008181, 0x7300737373007373, 0x6d006d6d6d006d6d,
-	0x8400848484008484, 0x9f009f9f9f009f9f, 0xee00eeeeee00eeee,
-	0x4a004a4a4a004a4a, 0xc300c3c3c300c3c3, 0x2e002e2e2e002e2e,
-	0xc100c1c1c100c1c1, 0x0100010101000101, 0xe600e6e6e600e6e6,
-	0x2500252525002525, 0x4800484848004848, 0x9900999999009999,
-	0xb900b9b9b900b9b9, 0xb300b3b3b300b3b3, 0x7b007b7b7b007b7b,
-	0xf900f9f9f900f9f9, 0xce00cecece00cece, 0xbf00bfbfbf00bfbf,
-	0xdf00dfdfdf00dfdf, 0x7100717171007171, 0x2900292929002929,
-	0xcd00cdcdcd00cdcd, 0x6c006c6c6c006c6c, 0x1300131313001313,
-	0x6400646464006464, 0x9b009b9b9b009b9b, 0x6300636363006363,
-	0x9d009d9d9d009d9d, 0xc000c0c0c000c0c0, 0x4b004b4b4b004b4b,
-	0xb700b7b7b700b7b7, 0xa500a5a5a500a5a5, 0x8900898989008989,
-	0x5f005f5f5f005f5f, 0xb100b1b1b100b1b1, 0x1700171717001717,
-	0xf400f4f4f400f4f4, 0xbc00bcbcbc00bcbc, 0xd300d3d3d300d3d3,
-	0x4600464646004646, 0xcf00cfcfcf00cfcf, 0x3700373737003737,
-	0x5e005e5e5e005e5e, 0x4700474747004747, 0x9400949494009494,
-	0xfa00fafafa00fafa, 0xfc00fcfcfc00fcfc, 0x5b005b5b5b005b5b,
-	0x9700979797009797, 0xfe00fefefe00fefe, 0x5a005a5a5a005a5a,
-	0xac00acacac00acac, 0x3c003c3c3c003c3c, 0x4c004c4c4c004c4c,
-	0x0300030303000303, 0x3500353535003535, 0xf300f3f3f300f3f3,
-	0x2300232323002323, 0xb800b8b8b800b8b8, 0x5d005d5d5d005d5d,
-	0x6a006a6a6a006a6a, 0x9200929292009292, 0xd500d5d5d500d5d5,
-	0x2100212121002121, 0x4400444444004444, 0x5100515151005151,
-	0xc600c6c6c600c6c6, 0x7d007d7d7d007d7d, 0x3900393939003939,
-	0x8300838383008383, 0xdc00dcdcdc00dcdc, 0xaa00aaaaaa00aaaa,
-	0x7c007c7c7c007c7c, 0x7700777777007777, 0x5600565656005656,
-	0x0500050505000505, 0x1b001b1b1b001b1b, 0xa400a4a4a400a4a4,
-	0x1500151515001515, 0x3400343434003434, 0x1e001e1e1e001e1e,
-	0x1c001c1c1c001c1c, 0xf800f8f8f800f8f8, 0x5200525252005252,
-	0x2000202020002020, 0x1400141414001414, 0xe900e9e9e900e9e9,
-	0xbd00bdbdbd00bdbd, 0xdd00dddddd00dddd, 0xe400e4e4e400e4e4,
-	0xa100a1a1a100a1a1, 0xe000e0e0e000e0e0, 0x8a008a8a8a008a8a,
-	0xf100f1f1f100f1f1, 0xd600d6d6d600d6d6, 0x7a007a7a7a007a7a,
-	0xbb00bbbbbb00bbbb, 0xe300e3e3e300e3e3, 0x4000404040004040,
-	0x4f004f4f4f004f4f,
-};
-
-const u64 camellia_sp44044404[256] = {
-	0x7070007070700070, 0x2c2c002c2c2c002c, 0xb3b300b3b3b300b3,
-	0xc0c000c0c0c000c0, 0xe4e400e4e4e400e4, 0x5757005757570057,
-	0xeaea00eaeaea00ea, 0xaeae00aeaeae00ae, 0x2323002323230023,
-	0x6b6b006b6b6b006b, 0x4545004545450045, 0xa5a500a5a5a500a5,
-	0xeded00ededed00ed, 0x4f4f004f4f4f004f, 0x1d1d001d1d1d001d,
-	0x9292009292920092, 0x8686008686860086, 0xafaf00afafaf00af,
-	0x7c7c007c7c7c007c, 0x1f1f001f1f1f001f, 0x3e3e003e3e3e003e,
-	0xdcdc00dcdcdc00dc, 0x5e5e005e5e5e005e, 0x0b0b000b0b0b000b,
-	0xa6a600a6a6a600a6, 0x3939003939390039, 0xd5d500d5d5d500d5,
-	0x5d5d005d5d5d005d, 0xd9d900d9d9d900d9, 0x5a5a005a5a5a005a,
-	0x5151005151510051, 0x6c6c006c6c6c006c, 0x8b8b008b8b8b008b,
-	0x9a9a009a9a9a009a, 0xfbfb00fbfbfb00fb, 0xb0b000b0b0b000b0,
-	0x7474007474740074, 0x2b2b002b2b2b002b, 0xf0f000f0f0f000f0,
-	0x8484008484840084, 0xdfdf00dfdfdf00df, 0xcbcb00cbcbcb00cb,
-	0x3434003434340034, 0x7676007676760076, 0x6d6d006d6d6d006d,
-	0xa9a900a9a9a900a9, 0xd1d100d1d1d100d1, 0x0404000404040004,
-	0x1414001414140014, 0x3a3a003a3a3a003a, 0xdede00dedede00de,
-	0x1111001111110011, 0x3232003232320032, 0x9c9c009c9c9c009c,
-	0x5353005353530053, 0xf2f200f2f2f200f2, 0xfefe00fefefe00fe,
-	0xcfcf00cfcfcf00cf, 0xc3c300c3c3c300c3, 0x7a7a007a7a7a007a,
-	0x2424002424240024, 0xe8e800e8e8e800e8, 0x6060006060600060,
-	0x6969006969690069, 0xaaaa00aaaaaa00aa, 0xa0a000a0a0a000a0,
-	0xa1a100a1a1a100a1, 0x6262006262620062, 0x5454005454540054,
-	0x1e1e001e1e1e001e, 0xe0e000e0e0e000e0, 0x6464006464640064,
-	0x1010001010100010, 0x0000000000000000, 0xa3a300a3a3a300a3,
-	0x7575007575750075, 0x8a8a008a8a8a008a, 0xe6e600e6e6e600e6,
-	0x0909000909090009, 0xdddd00dddddd00dd, 0x8787008787870087,
-	0x8383008383830083, 0xcdcd00cdcdcd00cd, 0x9090009090900090,
-	0x7373007373730073, 0xf6f600f6f6f600f6, 0x9d9d009d9d9d009d,
-	0xbfbf00bfbfbf00bf, 0x5252005252520052, 0xd8d800d8d8d800d8,
-	0xc8c800c8c8c800c8, 0xc6c600c6c6c600c6, 0x8181008181810081,
-	0x6f6f006f6f6f006f, 0x1313001313130013, 0x6363006363630063,
-	0xe9e900e9e9e900e9, 0xa7a700a7a7a700a7, 0x9f9f009f9f9f009f,
-	0xbcbc00bcbcbc00bc, 0x2929002929290029, 0xf9f900f9f9f900f9,
-	0x2f2f002f2f2f002f, 0xb4b400b4b4b400b4, 0x7878007878780078,
-	0x0606000606060006, 0xe7e700e7e7e700e7, 0x7171007171710071,
-	0xd4d400d4d4d400d4, 0xabab00ababab00ab, 0x8888008888880088,
-	0x8d8d008d8d8d008d, 0x7272007272720072, 0xb9b900b9b9b900b9,
-	0xf8f800f8f8f800f8, 0xacac00acacac00ac, 0x3636003636360036,
-	0x2a2a002a2a2a002a, 0x3c3c003c3c3c003c, 0xf1f100f1f1f100f1,
-	0x4040004040400040, 0xd3d300d3d3d300d3, 0xbbbb00bbbbbb00bb,
-	0x4343004343430043, 0x1515001515150015, 0xadad00adadad00ad,
-	0x7777007777770077, 0x8080008080800080, 0x8282008282820082,
-	0xecec00ececec00ec, 0x2727002727270027, 0xe5e500e5e5e500e5,
-	0x8585008585850085, 0x3535003535350035, 0x0c0c000c0c0c000c,
-	0x4141004141410041, 0xefef00efefef00ef, 0x9393009393930093,
-	0x1919001919190019, 0x2121002121210021, 0x0e0e000e0e0e000e,
-	0x4e4e004e4e4e004e, 0x6565006565650065, 0xbdbd00bdbdbd00bd,
-	0xb8b800b8b8b800b8, 0x8f8f008f8f8f008f, 0xebeb00ebebeb00eb,
-	0xcece00cecece00ce, 0x3030003030300030, 0x5f5f005f5f5f005f,
-	0xc5c500c5c5c500c5, 0x1a1a001a1a1a001a, 0xe1e100e1e1e100e1,
-	0xcaca00cacaca00ca, 0x4747004747470047, 0x3d3d003d3d3d003d,
-	0x0101000101010001, 0xd6d600d6d6d600d6, 0x5656005656560056,
-	0x4d4d004d4d4d004d, 0x0d0d000d0d0d000d, 0x6666006666660066,
-	0xcccc00cccccc00cc, 0x2d2d002d2d2d002d, 0x1212001212120012,
-	0x2020002020200020, 0xb1b100b1b1b100b1, 0x9999009999990099,
-	0x4c4c004c4c4c004c, 0xc2c200c2c2c200c2, 0x7e7e007e7e7e007e,
-	0x0505000505050005, 0xb7b700b7b7b700b7, 0x3131003131310031,
-	0x1717001717170017, 0xd7d700d7d7d700d7, 0x5858005858580058,
-	0x6161006161610061, 0x1b1b001b1b1b001b, 0x1c1c001c1c1c001c,
-	0x0f0f000f0f0f000f, 0x1616001616160016, 0x1818001818180018,
-	0x2222002222220022, 0x4444004444440044, 0xb2b200b2b2b200b2,
-	0xb5b500b5b5b500b5, 0x9191009191910091, 0x0808000808080008,
-	0xa8a800a8a8a800a8, 0xfcfc00fcfcfc00fc, 0x5050005050500050,
-	0xd0d000d0d0d000d0, 0x7d7d007d7d7d007d, 0x8989008989890089,
-	0x9797009797970097, 0x5b5b005b5b5b005b, 0x9595009595950095,
-	0xffff00ffffff00ff, 0xd2d200d2d2d200d2, 0xc4c400c4c4c400c4,
-	0x4848004848480048, 0xf7f700f7f7f700f7, 0xdbdb00dbdbdb00db,
-	0x0303000303030003, 0xdada00dadada00da, 0x3f3f003f3f3f003f,
-	0x9494009494940094, 0x5c5c005c5c5c005c, 0x0202000202020002,
-	0x4a4a004a4a4a004a, 0x3333003333330033, 0x6767006767670067,
-	0xf3f300f3f3f300f3, 0x7f7f007f7f7f007f, 0xe2e200e2e2e200e2,
-	0x9b9b009b9b9b009b, 0x2626002626260026, 0x3737003737370037,
-	0x3b3b003b3b3b003b, 0x9696009696960096, 0x4b4b004b4b4b004b,
-	0xbebe00bebebe00be, 0x2e2e002e2e2e002e, 0x7979007979790079,
-	0x8c8c008c8c8c008c, 0x6e6e006e6e6e006e, 0x8e8e008e8e8e008e,
-	0xf5f500f5f5f500f5, 0xb6b600b6b6b600b6, 0xfdfd00fdfdfd00fd,
-	0x5959005959590059, 0x9898009898980098, 0x6a6a006a6a6a006a,
-	0x4646004646460046, 0xbaba00bababa00ba, 0x2525002525250025,
-	0x4242004242420042, 0xa2a200a2a2a200a2, 0xfafa00fafafa00fa,
-	0x0707000707070007, 0x5555005555550055, 0xeeee00eeeeee00ee,
-	0x0a0a000a0a0a000a, 0x4949004949490049, 0x6868006868680068,
-	0x3838003838380038, 0xa4a400a4a4a400a4, 0x2828002828280028,
-	0x7b7b007b7b7b007b, 0xc9c900c9c9c900c9, 0xc1c100c1c1c100c1,
-	0xe3e300e3e3e300e3, 0xf4f400f4f4f400f4, 0xc7c700c7c7c700c7,
-	0x9e9e009e9e9e009e,
-};
-
-const u64 camellia_sp11101110[256] = {
-	0x7070700070707000, 0x8282820082828200, 0x2c2c2c002c2c2c00,
-	0xececec00ececec00, 0xb3b3b300b3b3b300, 0x2727270027272700,
-	0xc0c0c000c0c0c000, 0xe5e5e500e5e5e500, 0xe4e4e400e4e4e400,
-	0x8585850085858500, 0x5757570057575700, 0x3535350035353500,
-	0xeaeaea00eaeaea00, 0x0c0c0c000c0c0c00, 0xaeaeae00aeaeae00,
-	0x4141410041414100, 0x2323230023232300, 0xefefef00efefef00,
-	0x6b6b6b006b6b6b00, 0x9393930093939300, 0x4545450045454500,
-	0x1919190019191900, 0xa5a5a500a5a5a500, 0x2121210021212100,
-	0xededed00ededed00, 0x0e0e0e000e0e0e00, 0x4f4f4f004f4f4f00,
-	0x4e4e4e004e4e4e00, 0x1d1d1d001d1d1d00, 0x6565650065656500,
-	0x9292920092929200, 0xbdbdbd00bdbdbd00, 0x8686860086868600,
-	0xb8b8b800b8b8b800, 0xafafaf00afafaf00, 0x8f8f8f008f8f8f00,
-	0x7c7c7c007c7c7c00, 0xebebeb00ebebeb00, 0x1f1f1f001f1f1f00,
-	0xcecece00cecece00, 0x3e3e3e003e3e3e00, 0x3030300030303000,
-	0xdcdcdc00dcdcdc00, 0x5f5f5f005f5f5f00, 0x5e5e5e005e5e5e00,
-	0xc5c5c500c5c5c500, 0x0b0b0b000b0b0b00, 0x1a1a1a001a1a1a00,
-	0xa6a6a600a6a6a600, 0xe1e1e100e1e1e100, 0x3939390039393900,
-	0xcacaca00cacaca00, 0xd5d5d500d5d5d500, 0x4747470047474700,
-	0x5d5d5d005d5d5d00, 0x3d3d3d003d3d3d00, 0xd9d9d900d9d9d900,
-	0x0101010001010100, 0x5a5a5a005a5a5a00, 0xd6d6d600d6d6d600,
-	0x5151510051515100, 0x5656560056565600, 0x6c6c6c006c6c6c00,
-	0x4d4d4d004d4d4d00, 0x8b8b8b008b8b8b00, 0x0d0d0d000d0d0d00,
-	0x9a9a9a009a9a9a00, 0x6666660066666600, 0xfbfbfb00fbfbfb00,
-	0xcccccc00cccccc00, 0xb0b0b000b0b0b000, 0x2d2d2d002d2d2d00,
-	0x7474740074747400, 0x1212120012121200, 0x2b2b2b002b2b2b00,
-	0x2020200020202000, 0xf0f0f000f0f0f000, 0xb1b1b100b1b1b100,
-	0x8484840084848400, 0x9999990099999900, 0xdfdfdf00dfdfdf00,
-	0x4c4c4c004c4c4c00, 0xcbcbcb00cbcbcb00, 0xc2c2c200c2c2c200,
-	0x3434340034343400, 0x7e7e7e007e7e7e00, 0x7676760076767600,
-	0x0505050005050500, 0x6d6d6d006d6d6d00, 0xb7b7b700b7b7b700,
-	0xa9a9a900a9a9a900, 0x3131310031313100, 0xd1d1d100d1d1d100,
-	0x1717170017171700, 0x0404040004040400, 0xd7d7d700d7d7d700,
-	0x1414140014141400, 0x5858580058585800, 0x3a3a3a003a3a3a00,
-	0x6161610061616100, 0xdedede00dedede00, 0x1b1b1b001b1b1b00,
-	0x1111110011111100, 0x1c1c1c001c1c1c00, 0x3232320032323200,
-	0x0f0f0f000f0f0f00, 0x9c9c9c009c9c9c00, 0x1616160016161600,
-	0x5353530053535300, 0x1818180018181800, 0xf2f2f200f2f2f200,
-	0x2222220022222200, 0xfefefe00fefefe00, 0x4444440044444400,
-	0xcfcfcf00cfcfcf00, 0xb2b2b200b2b2b200, 0xc3c3c300c3c3c300,
-	0xb5b5b500b5b5b500, 0x7a7a7a007a7a7a00, 0x9191910091919100,
-	0x2424240024242400, 0x0808080008080800, 0xe8e8e800e8e8e800,
-	0xa8a8a800a8a8a800, 0x6060600060606000, 0xfcfcfc00fcfcfc00,
-	0x6969690069696900, 0x5050500050505000, 0xaaaaaa00aaaaaa00,
-	0xd0d0d000d0d0d000, 0xa0a0a000a0a0a000, 0x7d7d7d007d7d7d00,
-	0xa1a1a100a1a1a100, 0x8989890089898900, 0x6262620062626200,
-	0x9797970097979700, 0x5454540054545400, 0x5b5b5b005b5b5b00,
-	0x1e1e1e001e1e1e00, 0x9595950095959500, 0xe0e0e000e0e0e000,
-	0xffffff00ffffff00, 0x6464640064646400, 0xd2d2d200d2d2d200,
-	0x1010100010101000, 0xc4c4c400c4c4c400, 0x0000000000000000,
-	0x4848480048484800, 0xa3a3a300a3a3a300, 0xf7f7f700f7f7f700,
-	0x7575750075757500, 0xdbdbdb00dbdbdb00, 0x8a8a8a008a8a8a00,
-	0x0303030003030300, 0xe6e6e600e6e6e600, 0xdadada00dadada00,
-	0x0909090009090900, 0x3f3f3f003f3f3f00, 0xdddddd00dddddd00,
-	0x9494940094949400, 0x8787870087878700, 0x5c5c5c005c5c5c00,
-	0x8383830083838300, 0x0202020002020200, 0xcdcdcd00cdcdcd00,
-	0x4a4a4a004a4a4a00, 0x9090900090909000, 0x3333330033333300,
-	0x7373730073737300, 0x6767670067676700, 0xf6f6f600f6f6f600,
-	0xf3f3f300f3f3f300, 0x9d9d9d009d9d9d00, 0x7f7f7f007f7f7f00,
-	0xbfbfbf00bfbfbf00, 0xe2e2e200e2e2e200, 0x5252520052525200,
-	0x9b9b9b009b9b9b00, 0xd8d8d800d8d8d800, 0x2626260026262600,
-	0xc8c8c800c8c8c800, 0x3737370037373700, 0xc6c6c600c6c6c600,
-	0x3b3b3b003b3b3b00, 0x8181810081818100, 0x9696960096969600,
-	0x6f6f6f006f6f6f00, 0x4b4b4b004b4b4b00, 0x1313130013131300,
-	0xbebebe00bebebe00, 0x6363630063636300, 0x2e2e2e002e2e2e00,
-	0xe9e9e900e9e9e900, 0x7979790079797900, 0xa7a7a700a7a7a700,
-	0x8c8c8c008c8c8c00, 0x9f9f9f009f9f9f00, 0x6e6e6e006e6e6e00,
-	0xbcbcbc00bcbcbc00, 0x8e8e8e008e8e8e00, 0x2929290029292900,
-	0xf5f5f500f5f5f500, 0xf9f9f900f9f9f900, 0xb6b6b600b6b6b600,
-	0x2f2f2f002f2f2f00, 0xfdfdfd00fdfdfd00, 0xb4b4b400b4b4b400,
-	0x5959590059595900, 0x7878780078787800, 0x9898980098989800,
-	0x0606060006060600, 0x6a6a6a006a6a6a00, 0xe7e7e700e7e7e700,
-	0x4646460046464600, 0x7171710071717100, 0xbababa00bababa00,
-	0xd4d4d400d4d4d400, 0x2525250025252500, 0xababab00ababab00,
-	0x4242420042424200, 0x8888880088888800, 0xa2a2a200a2a2a200,
-	0x8d8d8d008d8d8d00, 0xfafafa00fafafa00, 0x7272720072727200,
-	0x0707070007070700, 0xb9b9b900b9b9b900, 0x5555550055555500,
-	0xf8f8f800f8f8f800, 0xeeeeee00eeeeee00, 0xacacac00acacac00,
-	0x0a0a0a000a0a0a00, 0x3636360036363600, 0x4949490049494900,
-	0x2a2a2a002a2a2a00, 0x6868680068686800, 0x3c3c3c003c3c3c00,
-	0x3838380038383800, 0xf1f1f100f1f1f100, 0xa4a4a400a4a4a400,
-	0x4040400040404000, 0x2828280028282800, 0xd3d3d300d3d3d300,
-	0x7b7b7b007b7b7b00, 0xbbbbbb00bbbbbb00, 0xc9c9c900c9c9c900,
-	0x4343430043434300, 0xc1c1c100c1c1c100, 0x1515150015151500,
-	0xe3e3e300e3e3e300, 0xadadad00adadad00, 0xf4f4f400f4f4f400,
-	0x7777770077777700, 0xc7c7c700c7c7c700, 0x8080800080808000,
-	0x9e9e9e009e9e9e00,
-};
-
-/* key constants */
-#define CAMELLIA_SIGMA1L (0xA09E667FL)
-#define CAMELLIA_SIGMA1R (0x3BCC908BL)
-#define CAMELLIA_SIGMA2L (0xB67AE858L)
-#define CAMELLIA_SIGMA2R (0x4CAA73B2L)
-#define CAMELLIA_SIGMA3L (0xC6EF372FL)
-#define CAMELLIA_SIGMA3R (0xE94F82BEL)
-#define CAMELLIA_SIGMA4L (0x54FF53A5L)
-#define CAMELLIA_SIGMA4R (0xF1D36F1CL)
-#define CAMELLIA_SIGMA5L (0x10E527FAL)
-#define CAMELLIA_SIGMA5R (0xDE682D1DL)
-#define CAMELLIA_SIGMA6L (0xB05688C2L)
-#define CAMELLIA_SIGMA6R (0xB3E6C1FDL)
-
-/* macros */
-#define ROLDQ(l, r, bits) ({ \
-	u64 t = l;					\
-	l = (l << bits) | (r >> (64 - bits));		\
-	r = (r << bits) | (t >> (64 - bits));		\
-})
-
-#define CAMELLIA_F(x, kl, kr, y) ({ \
-	u64 ii = x ^ (((u64)kl << 32) | kr);				\
-	y = camellia_sp11101110[(uint8_t)ii];				\
-	y ^= camellia_sp44044404[(uint8_t)(ii >> 8)];			\
-	ii >>= 16;							\
-	y ^= camellia_sp30333033[(uint8_t)ii];				\
-	y ^= camellia_sp02220222[(uint8_t)(ii >> 8)];			\
-	ii >>= 16;							\
-	y ^= camellia_sp00444404[(uint8_t)ii];				\
-	y ^= camellia_sp03303033[(uint8_t)(ii >> 8)];			\
-	ii >>= 16;							\
-	y ^= camellia_sp22000222[(uint8_t)ii];				\
-	y ^= camellia_sp10011110[(uint8_t)(ii >> 8)];			\
-	y = ror64(y, 32);						\
-})
-
-#define SET_SUBKEY_LR(INDEX, sRL) (subkey[(INDEX)] = ror64((sRL), 32))
-
-static void camellia_setup_tail(u64 *subkey, u64 *subRL, int max)
-{
-	u64 kw4, tt;
-	u32 dw, tl, tr;
-
-	/* absorb kw2 to other subkeys */
-	/* round 2 */
-	subRL[3] ^= subRL[1];
-	/* round 4 */
-	subRL[5] ^= subRL[1];
-	/* round 6 */
-	subRL[7] ^= subRL[1];
-
-	subRL[1] ^= (subRL[1] & ~subRL[9]) << 32;
-	/* modified for FLinv(kl2) */
-	dw = (subRL[1] & subRL[9]) >> 32,
-		subRL[1] ^= rol32(dw, 1);
-
-	/* round 8 */
-	subRL[11] ^= subRL[1];
-	/* round 10 */
-	subRL[13] ^= subRL[1];
-	/* round 12 */
-	subRL[15] ^= subRL[1];
-
-	subRL[1] ^= (subRL[1] & ~subRL[17]) << 32;
-	/* modified for FLinv(kl4) */
-	dw = (subRL[1] & subRL[17]) >> 32,
-		subRL[1] ^= rol32(dw, 1);
-
-	/* round 14 */
-	subRL[19] ^= subRL[1];
-	/* round 16 */
-	subRL[21] ^= subRL[1];
-	/* round 18 */
-	subRL[23] ^= subRL[1];
-
-	if (max == 24) {
-		/* kw3 */
-		subRL[24] ^= subRL[1];
-
-		/* absorb kw4 to other subkeys */
-		kw4 = subRL[25];
-	} else {
-		subRL[1] ^= (subRL[1] & ~subRL[25]) << 32;
-		/* modified for FLinv(kl6) */
-		dw = (subRL[1] & subRL[25]) >> 32,
-			subRL[1] ^= rol32(dw, 1);
-
-		/* round 20 */
-		subRL[27] ^= subRL[1];
-		/* round 22 */
-		subRL[29] ^= subRL[1];
-		/* round 24 */
-		subRL[31] ^= subRL[1];
-		/* kw3 */
-		subRL[32] ^= subRL[1];
-
-		/* absorb kw4 to other subkeys */
-		kw4 = subRL[33];
-		/* round 23 */
-		subRL[30] ^= kw4;
-		/* round 21 */
-		subRL[28] ^= kw4;
-		/* round 19 */
-		subRL[26] ^= kw4;
-
-		kw4 ^= (kw4 & ~subRL[24]) << 32;
-		/* modified for FL(kl5) */
-		dw = (kw4 & subRL[24]) >> 32,
-			kw4 ^= rol32(dw, 1);
-	}
-
-	/* round 17 */
-	subRL[22] ^= kw4;
-	/* round 15 */
-	subRL[20] ^= kw4;
-	/* round 13 */
-	subRL[18] ^= kw4;
-
-	kw4 ^= (kw4 & ~subRL[16]) << 32;
-	/* modified for FL(kl3) */
-	dw = (kw4 & subRL[16]) >> 32,
-		kw4 ^= rol32(dw, 1);
-
-	/* round 11 */
-	subRL[14] ^= kw4;
-	/* round 9 */
-	subRL[12] ^= kw4;
-	/* round 7 */
-	subRL[10] ^= kw4;
-
-	kw4 ^= (kw4 & ~subRL[8]) << 32;
-	/* modified for FL(kl1) */
-	dw = (kw4 & subRL[8]) >> 32,
-		kw4 ^= rol32(dw, 1);
-
-	/* round 5 */
-	subRL[6] ^= kw4;
-	/* round 3 */
-	subRL[4] ^= kw4;
-	/* round 1 */
-	subRL[2] ^= kw4;
-	/* kw1 */
-	subRL[0] ^= kw4;
-
-	/* key XOR is end of F-function */
-	SET_SUBKEY_LR(0, subRL[0] ^ subRL[2]);			/* kw1 */
-	SET_SUBKEY_LR(2, subRL[3]);				/* round 1 */
-	SET_SUBKEY_LR(3, subRL[2] ^ subRL[4]);			/* round 2 */
-	SET_SUBKEY_LR(4, subRL[3] ^ subRL[5]);			/* round 3 */
-	SET_SUBKEY_LR(5, subRL[4] ^ subRL[6]);			/* round 4 */
-	SET_SUBKEY_LR(6, subRL[5] ^ subRL[7]);			/* round 5 */
-
-	tl = (subRL[10] >> 32) ^ (subRL[10] & ~subRL[8]);
-	dw = tl & (subRL[8] >> 32),				/* FL(kl1) */
-		tr = subRL[10] ^ rol32(dw, 1);
-	tt = (tr | ((u64)tl << 32));
-
-	SET_SUBKEY_LR(7, subRL[6] ^ tt);			/* round 6 */
-	SET_SUBKEY_LR(8, subRL[8]);				/* FL(kl1) */
-	SET_SUBKEY_LR(9, subRL[9]);				/* FLinv(kl2) */
-
-	tl = (subRL[7] >> 32) ^ (subRL[7] & ~subRL[9]);
-	dw = tl & (subRL[9] >> 32),				/* FLinv(kl2) */
-		tr = subRL[7] ^ rol32(dw, 1);
-	tt = (tr | ((u64)tl << 32));
-
-	SET_SUBKEY_LR(10, subRL[11] ^ tt);			/* round 7 */
-	SET_SUBKEY_LR(11, subRL[10] ^ subRL[12]);		/* round 8 */
-	SET_SUBKEY_LR(12, subRL[11] ^ subRL[13]);		/* round 9 */
-	SET_SUBKEY_LR(13, subRL[12] ^ subRL[14]);		/* round 10 */
-	SET_SUBKEY_LR(14, subRL[13] ^ subRL[15]);		/* round 11 */
-
-	tl = (subRL[18] >> 32) ^ (subRL[18] & ~subRL[16]);
-	dw = tl & (subRL[16] >> 32),				/* FL(kl3) */
-		tr = subRL[18] ^ rol32(dw, 1);
-	tt = (tr | ((u64)tl << 32));
-
-	SET_SUBKEY_LR(15, subRL[14] ^ tt);			/* round 12 */
-	SET_SUBKEY_LR(16, subRL[16]);				/* FL(kl3) */
-	SET_SUBKEY_LR(17, subRL[17]);				/* FLinv(kl4) */
-
-	tl = (subRL[15] >> 32) ^ (subRL[15] & ~subRL[17]);
-	dw = tl & (subRL[17] >> 32),				/* FLinv(kl4) */
-		tr = subRL[15] ^ rol32(dw, 1);
-	tt = (tr | ((u64)tl << 32));
-
-	SET_SUBKEY_LR(18, subRL[19] ^ tt);			/* round 13 */
-	SET_SUBKEY_LR(19, subRL[18] ^ subRL[20]);		/* round 14 */
-	SET_SUBKEY_LR(20, subRL[19] ^ subRL[21]);		/* round 15 */
-	SET_SUBKEY_LR(21, subRL[20] ^ subRL[22]);		/* round 16 */
-	SET_SUBKEY_LR(22, subRL[21] ^ subRL[23]);		/* round 17 */
-
-	if (max == 24) {
-		SET_SUBKEY_LR(23, subRL[22]);			/* round 18 */
-		SET_SUBKEY_LR(24, subRL[24] ^ subRL[23]);	/* kw3 */
-	} else {
-		tl = (subRL[26] >> 32) ^ (subRL[26] & ~subRL[24]);
-		dw = tl & (subRL[24] >> 32),			/* FL(kl5) */
-			tr = subRL[26] ^ rol32(dw, 1);
-		tt = (tr | ((u64)tl << 32));
-
-		SET_SUBKEY_LR(23, subRL[22] ^ tt);		/* round 18 */
-		SET_SUBKEY_LR(24, subRL[24]);			/* FL(kl5) */
-		SET_SUBKEY_LR(25, subRL[25]);			/* FLinv(kl6) */
-
-		tl = (subRL[23] >> 32) ^ (subRL[23] & ~subRL[25]);
-		dw = tl & (subRL[25] >> 32),			/* FLinv(kl6) */
-			tr = subRL[23] ^ rol32(dw, 1);
-		tt = (tr | ((u64)tl << 32));
-
-		SET_SUBKEY_LR(26, subRL[27] ^ tt);		/* round 19 */
-		SET_SUBKEY_LR(27, subRL[26] ^ subRL[28]);	/* round 20 */
-		SET_SUBKEY_LR(28, subRL[27] ^ subRL[29]);	/* round 21 */
-		SET_SUBKEY_LR(29, subRL[28] ^ subRL[30]);	/* round 22 */
-		SET_SUBKEY_LR(30, subRL[29] ^ subRL[31]);	/* round 23 */
-		SET_SUBKEY_LR(31, subRL[30]);			/* round 24 */
-		SET_SUBKEY_LR(32, subRL[32] ^ subRL[31]);	/* kw3 */
-	}
-}
-
-static void camellia_setup128(const unsigned char *key, u64 *subkey)
-{
-	u64 kl, kr, ww;
-	u64 subRL[26];
-
-	/**
-	 *  k == kl || kr (|| is concatenation)
-	 */
-	kl = get_unaligned_be64(key);
-	kr = get_unaligned_be64(key + 8);
-
-	/* generate KL dependent subkeys */
-	/* kw1 */
-	subRL[0] = kl;
-	/* kw2 */
-	subRL[1] = kr;
-
-	/* rotation left shift 15bit */
-	ROLDQ(kl, kr, 15);
-
-	/* k3 */
-	subRL[4] = kl;
-	/* k4 */
-	subRL[5] = kr;
-
-	/* rotation left shift 15+30bit */
-	ROLDQ(kl, kr, 30);
-
-	/* k7 */
-	subRL[10] = kl;
-	/* k8 */
-	subRL[11] = kr;
-
-	/* rotation left shift 15+30+15bit */
-	ROLDQ(kl, kr, 15);
-
-	/* k10 */
-	subRL[13] = kr;
-	/* rotation left shift 15+30+15+17 bit */
-	ROLDQ(kl, kr, 17);
-
-	/* kl3 */
-	subRL[16] = kl;
-	/* kl4 */
-	subRL[17] = kr;
-
-	/* rotation left shift 15+30+15+17+17 bit */
-	ROLDQ(kl, kr, 17);
-
-	/* k13 */
-	subRL[18] = kl;
-	/* k14 */
-	subRL[19] = kr;
-
-	/* rotation left shift 15+30+15+17+17+17 bit */
-	ROLDQ(kl, kr, 17);
-
-	/* k17 */
-	subRL[22] = kl;
-	/* k18 */
-	subRL[23] = kr;
-
-	/* generate KA */
-	kl = subRL[0];
-	kr = subRL[1];
-	CAMELLIA_F(kl, CAMELLIA_SIGMA1L, CAMELLIA_SIGMA1R, ww);
-	kr ^= ww;
-	CAMELLIA_F(kr, CAMELLIA_SIGMA2L, CAMELLIA_SIGMA2R, kl);
-
-	/* current status == (kll, klr, w0, w1) */
-	CAMELLIA_F(kl, CAMELLIA_SIGMA3L, CAMELLIA_SIGMA3R, kr);
-	kr ^= ww;
-	CAMELLIA_F(kr, CAMELLIA_SIGMA4L, CAMELLIA_SIGMA4R, ww);
-	kl ^= ww;
-
-	/* generate KA dependent subkeys */
-	/* k1, k2 */
-	subRL[2] = kl;
-	subRL[3] = kr;
-	ROLDQ(kl, kr, 15);
-	/* k5,k6 */
-	subRL[6] = kl;
-	subRL[7] = kr;
-	ROLDQ(kl, kr, 15);
-	/* kl1, kl2 */
-	subRL[8] = kl;
-	subRL[9] = kr;
-	ROLDQ(kl, kr, 15);
-	/* k9 */
-	subRL[12] = kl;
-	ROLDQ(kl, kr, 15);
-	/* k11, k12 */
-	subRL[14] = kl;
-	subRL[15] = kr;
-	ROLDQ(kl, kr, 34);
-	/* k15, k16 */
-	subRL[20] = kl;
-	subRL[21] = kr;
-	ROLDQ(kl, kr, 17);
-	/* kw3, kw4 */
-	subRL[24] = kl;
-	subRL[25] = kr;
-
-	camellia_setup_tail(subkey, subRL, 24);
-}
-
-static void camellia_setup256(const unsigned char *key, u64 *subkey)
-{
-	u64 kl, kr;			/* left half of key */
-	u64 krl, krr;			/* right half of key */
-	u64 ww;				/* temporary variables */
-	u64 subRL[34];
-
-	/**
-	 *  key = (kl || kr || krl || krr) (|| is concatenation)
-	 */
-	kl = get_unaligned_be64(key);
-	kr = get_unaligned_be64(key + 8);
-	krl = get_unaligned_be64(key + 16);
-	krr = get_unaligned_be64(key + 24);
-
-	/* generate KL dependent subkeys */
-	/* kw1 */
-	subRL[0] = kl;
-	/* kw2 */
-	subRL[1] = kr;
-	ROLDQ(kl, kr, 45);
-	/* k9 */
-	subRL[12] = kl;
-	/* k10 */
-	subRL[13] = kr;
-	ROLDQ(kl, kr, 15);
-	/* kl3 */
-	subRL[16] = kl;
-	/* kl4 */
-	subRL[17] = kr;
-	ROLDQ(kl, kr, 17);
-	/* k17 */
-	subRL[22] = kl;
-	/* k18 */
-	subRL[23] = kr;
-	ROLDQ(kl, kr, 34);
-	/* k23 */
-	subRL[30] = kl;
-	/* k24 */
-	subRL[31] = kr;
-
-	/* generate KR dependent subkeys */
-	ROLDQ(krl, krr, 15);
-	/* k3 */
-	subRL[4] = krl;
-	/* k4 */
-	subRL[5] = krr;
-	ROLDQ(krl, krr, 15);
-	/* kl1 */
-	subRL[8] = krl;
-	/* kl2 */
-	subRL[9] = krr;
-	ROLDQ(krl, krr, 30);
-	/* k13 */
-	subRL[18] = krl;
-	/* k14 */
-	subRL[19] = krr;
-	ROLDQ(krl, krr, 34);
-	/* k19 */
-	subRL[26] = krl;
-	/* k20 */
-	subRL[27] = krr;
-	ROLDQ(krl, krr, 34);
-
-	/* generate KA */
-	kl = subRL[0] ^ krl;
-	kr = subRL[1] ^ krr;
-
-	CAMELLIA_F(kl, CAMELLIA_SIGMA1L, CAMELLIA_SIGMA1R, ww);
-	kr ^= ww;
-	CAMELLIA_F(kr, CAMELLIA_SIGMA2L, CAMELLIA_SIGMA2R, kl);
-	kl ^= krl;
-	CAMELLIA_F(kl, CAMELLIA_SIGMA3L, CAMELLIA_SIGMA3R, kr);
-	kr ^= ww ^ krr;
-	CAMELLIA_F(kr, CAMELLIA_SIGMA4L, CAMELLIA_SIGMA4R, ww);
-	kl ^= ww;
-
-	/* generate KB */
-	krl ^= kl;
-	krr ^= kr;
-	CAMELLIA_F(krl, CAMELLIA_SIGMA5L, CAMELLIA_SIGMA5R, ww);
-	krr ^= ww;
-	CAMELLIA_F(krr, CAMELLIA_SIGMA6L, CAMELLIA_SIGMA6R, ww);
-	krl ^= ww;
-
-	/* generate KA dependent subkeys */
-	ROLDQ(kl, kr, 15);
-	/* k5 */
-	subRL[6] = kl;
-	/* k6 */
-	subRL[7] = kr;
-	ROLDQ(kl, kr, 30);
-	/* k11 */
-	subRL[14] = kl;
-	/* k12 */
-	subRL[15] = kr;
-	/* rotation left shift 32bit */
-	ROLDQ(kl, kr, 32);
-	/* kl5 */
-	subRL[24] = kl;
-	/* kl6 */
-	subRL[25] = kr;
-	/* rotation left shift 17 from k11,k12 -> k21,k22 */
-	ROLDQ(kl, kr, 17);
-	/* k21 */
-	subRL[28] = kl;
-	/* k22 */
-	subRL[29] = kr;
-
-	/* generate KB dependent subkeys */
-	/* k1 */
-	subRL[2] = krl;
-	/* k2 */
-	subRL[3] = krr;
-	ROLDQ(krl, krr, 30);
-	/* k7 */
-	subRL[10] = krl;
-	/* k8 */
-	subRL[11] = krr;
-	ROLDQ(krl, krr, 30);
-	/* k15 */
-	subRL[20] = krl;
-	/* k16 */
-	subRL[21] = krr;
-	ROLDQ(krl, krr, 51);
-	/* kw3 */
-	subRL[32] = krl;
-	/* kw4 */
-	subRL[33] = krr;
-
-	camellia_setup_tail(subkey, subRL, 32);
-}
-
-static void camellia_setup192(const unsigned char *key, u64 *subkey)
-{
-	unsigned char kk[32];
-	u64 krl, krr;
-
-	memcpy(kk, key, 24);
-	memcpy((unsigned char *)&krl, key+16, 8);
-	krr = ~krl;
-	memcpy(kk+24, (unsigned char *)&krr, 8);
-	camellia_setup256(kk, subkey);
-}
-
-static int __camellia_setkey(struct camellia_ctx *cctx,
-			     const unsigned char *key,
-			     unsigned int key_len, u32 *flags)
-{
-	if (key_len != 16 && key_len != 24 && key_len != 32) {
-		*flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
-		return -EINVAL;
-	}
-
-	cctx->key_length = key_len;
-
-	switch (key_len) {
-	case 16:
-		camellia_setup128(key, cctx->key_table);
-		break;
-	case 24:
-		camellia_setup192(key, cctx->key_table);
-		break;
-	case 32:
-		camellia_setup256(key, cctx->key_table);
-		break;
-	}
-
-	return 0;
-}
-
-static int camellia_setkey(struct crypto_tfm *tfm, const u8 *in_key,
-			   unsigned int key_len)
-{
-	return __camellia_setkey(crypto_tfm_ctx(tfm), in_key, key_len,
-				 &tfm->crt_flags);
-}
-
-static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk,
-		     void (*fn)(struct camellia_ctx *, u8 *, const u8 *),
-		     void (*fn_2way)(struct camellia_ctx *, u8 *, const u8 *))
-{
-	struct camellia_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	unsigned int bsize = CAMELLIA_BLOCK_SIZE;
-	unsigned int nbytes;
-	int err;
-
-	err = blkcipher_walk_virt(desc, walk);
-
-	while ((nbytes = walk->nbytes)) {
-		u8 *wsrc = walk->src.virt.addr;
-		u8 *wdst = walk->dst.virt.addr;
-
-		/* Process two block batch */
-		if (nbytes >= bsize * 2) {
-			do {
-				fn_2way(ctx, wdst, wsrc);
-
-				wsrc += bsize * 2;
-				wdst += bsize * 2;
-				nbytes -= bsize * 2;
-			} while (nbytes >= bsize * 2);
-
-			if (nbytes < bsize)
-				goto done;
-		}
-
-		/* Handle leftovers */
-		do {
-			fn(ctx, wdst, wsrc);
-
-			wsrc += bsize;
-			wdst += bsize;
-			nbytes -= bsize;
-		} while (nbytes >= bsize);
-
-done:
-		err = blkcipher_walk_done(desc, walk, nbytes);
-	}
-
-	return err;
-}
-
-static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	struct blkcipher_walk walk;
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return ecb_crypt(desc, &walk, camellia_enc_blk, camellia_enc_blk_2way);
-}
-
-static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	struct blkcipher_walk walk;
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return ecb_crypt(desc, &walk, camellia_dec_blk, camellia_dec_blk_2way);
-}
-
-static unsigned int __cbc_encrypt(struct blkcipher_desc *desc,
-				  struct blkcipher_walk *walk)
-{
-	struct camellia_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	unsigned int bsize = CAMELLIA_BLOCK_SIZE;
-	unsigned int nbytes = walk->nbytes;
-	u128 *src = (u128 *)walk->src.virt.addr;
-	u128 *dst = (u128 *)walk->dst.virt.addr;
-	u128 *iv = (u128 *)walk->iv;
-
-	do {
-		u128_xor(dst, src, iv);
-		camellia_enc_blk(ctx, (u8 *)dst, (u8 *)dst);
-		iv = dst;
-
-		src += 1;
-		dst += 1;
-		nbytes -= bsize;
-	} while (nbytes >= bsize);
-
-	u128_xor((u128 *)walk->iv, (u128 *)walk->iv, iv);
-	return nbytes;
-}
-
-static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	struct blkcipher_walk walk;
-	int err;
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt(desc, &walk);
-
-	while ((nbytes = walk.nbytes)) {
-		nbytes = __cbc_encrypt(desc, &walk);
-		err = blkcipher_walk_done(desc, &walk, nbytes);
-	}
-
-	return err;
-}
-
-static unsigned int __cbc_decrypt(struct blkcipher_desc *desc,
-				  struct blkcipher_walk *walk)
-{
-	struct camellia_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	unsigned int bsize = CAMELLIA_BLOCK_SIZE;
-	unsigned int nbytes = walk->nbytes;
-	u128 *src = (u128 *)walk->src.virt.addr;
-	u128 *dst = (u128 *)walk->dst.virt.addr;
-	u128 ivs[2 - 1];
-	u128 last_iv;
-
-	/* Start of the last block. */
-	src += nbytes / bsize - 1;
-	dst += nbytes / bsize - 1;
-
-	last_iv = *src;
-
-	/* Process two block batch */
-	if (nbytes >= bsize * 2) {
-		do {
-			nbytes -= bsize * (2 - 1);
-			src -= 2 - 1;
-			dst -= 2 - 1;
-
-			ivs[0] = src[0];
-
-			camellia_dec_blk_2way(ctx, (u8 *)dst, (u8 *)src);
-
-			u128_xor(dst + 1, dst + 1, ivs + 0);
-
-			nbytes -= bsize;
-			if (nbytes < bsize)
-				goto done;
-
-			u128_xor(dst, dst, src - 1);
-			src -= 1;
-			dst -= 1;
-		} while (nbytes >= bsize * 2);
-
-		if (nbytes < bsize)
-			goto done;
-	}
-
-	/* Handle leftovers */
-	for (;;) {
-		camellia_dec_blk(ctx, (u8 *)dst, (u8 *)src);
-
-		nbytes -= bsize;
-		if (nbytes < bsize)
-			break;
-
-		u128_xor(dst, dst, src - 1);
-		src -= 1;
-		dst -= 1;
-	}
-
-done:
-	u128_xor(dst, dst, (u128 *)walk->iv);
-	*(u128 *)walk->iv = last_iv;
-
-	return nbytes;
-}
-
-static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	struct blkcipher_walk walk;
-	int err;
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt(desc, &walk);
-
-	while ((nbytes = walk.nbytes)) {
-		nbytes = __cbc_decrypt(desc, &walk);
-		err = blkcipher_walk_done(desc, &walk, nbytes);
-	}
-
-	return err;
-}
-
-static inline void u128_to_be128(be128 *dst, const u128 *src)
-{
-	dst->a = cpu_to_be64(src->a);
-	dst->b = cpu_to_be64(src->b);
-}
-
-static inline void be128_to_u128(u128 *dst, const be128 *src)
-{
-	dst->a = be64_to_cpu(src->a);
-	dst->b = be64_to_cpu(src->b);
-}
-
-static inline void u128_inc(u128 *i)
-{
-	i->b++;
-	if (!i->b)
-		i->a++;
-}
-
-static void ctr_crypt_final(struct blkcipher_desc *desc,
-			    struct blkcipher_walk *walk)
-{
-	struct camellia_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	u8 keystream[CAMELLIA_BLOCK_SIZE];
-	u8 *src = walk->src.virt.addr;
-	u8 *dst = walk->dst.virt.addr;
-	unsigned int nbytes = walk->nbytes;
-	u128 ctrblk;
-
-	memcpy(keystream, src, nbytes);
-	camellia_enc_blk_xor(ctx, keystream, walk->iv);
-	memcpy(dst, keystream, nbytes);
-
-	be128_to_u128(&ctrblk, (be128 *)walk->iv);
-	u128_inc(&ctrblk);
-	u128_to_be128((be128 *)walk->iv, &ctrblk);
-}
-
-static unsigned int __ctr_crypt(struct blkcipher_desc *desc,
-				struct blkcipher_walk *walk)
-{
-	struct camellia_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	unsigned int bsize = CAMELLIA_BLOCK_SIZE;
-	unsigned int nbytes = walk->nbytes;
-	u128 *src = (u128 *)walk->src.virt.addr;
-	u128 *dst = (u128 *)walk->dst.virt.addr;
-	u128 ctrblk;
-	be128 ctrblocks[2];
-
-	be128_to_u128(&ctrblk, (be128 *)walk->iv);
-
-	/* Process two block batch */
-	if (nbytes >= bsize * 2) {
-		do {
-			if (dst != src) {
-				dst[0] = src[0];
-				dst[1] = src[1];
-			}
-
-			/* create ctrblks for parallel encrypt */
-			u128_to_be128(&ctrblocks[0], &ctrblk);
-			u128_inc(&ctrblk);
-			u128_to_be128(&ctrblocks[1], &ctrblk);
-			u128_inc(&ctrblk);
-
-			camellia_enc_blk_xor_2way(ctx, (u8 *)dst,
-						 (u8 *)ctrblocks);
-
-			src += 2;
-			dst += 2;
-			nbytes -= bsize * 2;
-		} while (nbytes >= bsize * 2);
-
-		if (nbytes < bsize)
-			goto done;
-	}
-
-	/* Handle leftovers */
-	do {
-		if (dst != src)
-			*dst = *src;
-
-		u128_to_be128(&ctrblocks[0], &ctrblk);
-		u128_inc(&ctrblk);
-
-		camellia_enc_blk_xor(ctx, (u8 *)dst, (u8 *)ctrblocks);
-
-		src += 1;
-		dst += 1;
-		nbytes -= bsize;
-	} while (nbytes >= bsize);
-
-done:
-	u128_to_be128((be128 *)walk->iv, &ctrblk);
-	return nbytes;
-}
-
-static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		     struct scatterlist *src, unsigned int nbytes)
-{
-	struct blkcipher_walk walk;
-	int err;
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt_block(desc, &walk, CAMELLIA_BLOCK_SIZE);
-
-	while ((nbytes = walk.nbytes) >= CAMELLIA_BLOCK_SIZE) {
-		nbytes = __ctr_crypt(desc, &walk);
-		err = blkcipher_walk_done(desc, &walk, nbytes);
-	}
-
-	if (walk.nbytes) {
-		ctr_crypt_final(desc, &walk);
-		err = blkcipher_walk_done(desc, &walk, 0);
-	}
-
-	return err;
-}
-
-static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
-{
-	const unsigned int bsize = CAMELLIA_BLOCK_SIZE;
-	struct camellia_ctx *ctx = priv;
-	int i;
-
-	while (nbytes >= 2 * bsize) {
-		camellia_enc_blk_2way(ctx, srcdst, srcdst);
-		srcdst += bsize * 2;
-		nbytes -= bsize * 2;
-	}
-
-	for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
-		camellia_enc_blk(ctx, srcdst, srcdst);
-}
-
-static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
-{
-	const unsigned int bsize = CAMELLIA_BLOCK_SIZE;
-	struct camellia_ctx *ctx = priv;
-	int i;
-
-	while (nbytes >= 2 * bsize) {
-		camellia_dec_blk_2way(ctx, srcdst, srcdst);
-		srcdst += bsize * 2;
-		nbytes -= bsize * 2;
-	}
-
-	for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
-		camellia_dec_blk(ctx, srcdst, srcdst);
-}
-
-struct camellia_lrw_ctx {
-	struct lrw_table_ctx lrw_table;
-	struct camellia_ctx camellia_ctx;
-};
-
-static int lrw_camellia_setkey(struct crypto_tfm *tfm, const u8 *key,
-			      unsigned int keylen)
-{
-	struct camellia_lrw_ctx *ctx = crypto_tfm_ctx(tfm);
-	int err;
-
-	err = __camellia_setkey(&ctx->camellia_ctx, key,
-				keylen - CAMELLIA_BLOCK_SIZE,
-				&tfm->crt_flags);
-	if (err)
-		return err;
-
-	return lrw_init_table(&ctx->lrw_table,
-			      key + keylen - CAMELLIA_BLOCK_SIZE);
-}
-
-static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	struct camellia_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	be128 buf[2 * 4];
-	struct lrw_crypt_req req = {
-		.tbuf = buf,
-		.tbuflen = sizeof(buf),
-
-		.table_ctx = &ctx->lrw_table,
-		.crypt_ctx = &ctx->camellia_ctx,
-		.crypt_fn = encrypt_callback,
-	};
-
-	return lrw_crypt(desc, dst, src, nbytes, &req);
-}
-
-static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	struct camellia_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	be128 buf[2 * 4];
-	struct lrw_crypt_req req = {
-		.tbuf = buf,
-		.tbuflen = sizeof(buf),
-
-		.table_ctx = &ctx->lrw_table,
-		.crypt_ctx = &ctx->camellia_ctx,
-		.crypt_fn = decrypt_callback,
-	};
-
-	return lrw_crypt(desc, dst, src, nbytes, &req);
-}
-
-static void lrw_exit_tfm(struct crypto_tfm *tfm)
-{
-	struct camellia_lrw_ctx *ctx = crypto_tfm_ctx(tfm);
-
-	lrw_free_table(&ctx->lrw_table);
-}
-
-struct camellia_xts_ctx {
-	struct camellia_ctx tweak_ctx;
-	struct camellia_ctx crypt_ctx;
-};
-
-static int xts_camellia_setkey(struct crypto_tfm *tfm, const u8 *key,
-			      unsigned int keylen)
-{
-	struct camellia_xts_ctx *ctx = crypto_tfm_ctx(tfm);
-	u32 *flags = &tfm->crt_flags;
-	int err;
-
-	/* key consists of keys of equal size concatenated, therefore
-	 * the length must be even
-	 */
-	if (keylen % 2) {
-		*flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
-		return -EINVAL;
-	}
-
-	/* first half of xts-key is for crypt */
-	err = __camellia_setkey(&ctx->crypt_ctx, key, keylen / 2, flags);
-	if (err)
-		return err;
-
-	/* second half of xts-key is for tweak */
-	return __camellia_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2,
-				flags);
-}
-
-static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	struct camellia_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	be128 buf[2 * 4];
-	struct xts_crypt_req req = {
-		.tbuf = buf,
-		.tbuflen = sizeof(buf),
-
-		.tweak_ctx = &ctx->tweak_ctx,
-		.tweak_fn = XTS_TWEAK_CAST(camellia_enc_blk),
-		.crypt_ctx = &ctx->crypt_ctx,
-		.crypt_fn = encrypt_callback,
-	};
-
-	return xts_crypt(desc, dst, src, nbytes, &req);
-}
-
-static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	struct camellia_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	be128 buf[2 * 4];
-	struct xts_crypt_req req = {
-		.tbuf = buf,
-		.tbuflen = sizeof(buf),
-
-		.tweak_ctx = &ctx->tweak_ctx,
-		.tweak_fn = XTS_TWEAK_CAST(camellia_enc_blk),
-		.crypt_ctx = &ctx->crypt_ctx,
-		.crypt_fn = decrypt_callback,
-	};
-
-	return xts_crypt(desc, dst, src, nbytes, &req);
-}
-
-static struct crypto_alg camellia_algs[6] = { {
-	.cra_name		= "camellia",
-	.cra_driver_name	= "camellia-asm",
-	.cra_priority		= 200,
-	.cra_flags		= CRYPTO_ALG_TYPE_CIPHER,
-	.cra_blocksize		= CAMELLIA_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct camellia_ctx),
-	.cra_alignmask		= 0,
-	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(camellia_algs[0].cra_list),
-	.cra_u			= {
-		.cipher = {
-			.cia_min_keysize = CAMELLIA_MIN_KEY_SIZE,
-			.cia_max_keysize = CAMELLIA_MAX_KEY_SIZE,
-			.cia_setkey	 = camellia_setkey,
-			.cia_encrypt	 = camellia_encrypt,
-			.cia_decrypt	 = camellia_decrypt
-		}
-	}
-}, {
-	.cra_name		= "ecb(camellia)",
-	.cra_driver_name	= "ecb-camellia-asm",
-	.cra_priority		= 300,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize		= CAMELLIA_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct camellia_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(camellia_algs[1].cra_list),
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= CAMELLIA_MIN_KEY_SIZE,
-			.max_keysize	= CAMELLIA_MAX_KEY_SIZE,
-			.setkey		= camellia_setkey,
-			.encrypt	= ecb_encrypt,
-			.decrypt	= ecb_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "cbc(camellia)",
-	.cra_driver_name	= "cbc-camellia-asm",
-	.cra_priority		= 300,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize		= CAMELLIA_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct camellia_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(camellia_algs[2].cra_list),
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= CAMELLIA_MIN_KEY_SIZE,
-			.max_keysize	= CAMELLIA_MAX_KEY_SIZE,
-			.ivsize		= CAMELLIA_BLOCK_SIZE,
-			.setkey		= camellia_setkey,
-			.encrypt	= cbc_encrypt,
-			.decrypt	= cbc_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "ctr(camellia)",
-	.cra_driver_name	= "ctr-camellia-asm",
-	.cra_priority		= 300,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize		= 1,
-	.cra_ctxsize		= sizeof(struct camellia_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(camellia_algs[3].cra_list),
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= CAMELLIA_MIN_KEY_SIZE,
-			.max_keysize	= CAMELLIA_MAX_KEY_SIZE,
-			.ivsize		= CAMELLIA_BLOCK_SIZE,
-			.setkey		= camellia_setkey,
-			.encrypt	= ctr_crypt,
-			.decrypt	= ctr_crypt,
-		},
-	},
-}, {
-	.cra_name		= "lrw(camellia)",
-	.cra_driver_name	= "lrw-camellia-asm",
-	.cra_priority		= 300,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize		= CAMELLIA_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct camellia_lrw_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(camellia_algs[4].cra_list),
-	.cra_exit		= lrw_exit_tfm,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= CAMELLIA_MIN_KEY_SIZE +
-						CAMELLIA_BLOCK_SIZE,
-			.max_keysize	= CAMELLIA_MAX_KEY_SIZE +
-						CAMELLIA_BLOCK_SIZE,
-			.ivsize		= CAMELLIA_BLOCK_SIZE,
-			.setkey		= lrw_camellia_setkey,
-			.encrypt	= lrw_encrypt,
-			.decrypt	= lrw_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "xts(camellia)",
-	.cra_driver_name	= "xts-camellia-asm",
-	.cra_priority		= 300,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize		= CAMELLIA_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct camellia_xts_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(camellia_algs[5].cra_list),
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= CAMELLIA_MIN_KEY_SIZE * 2,
-			.max_keysize	= CAMELLIA_MAX_KEY_SIZE * 2,
-			.ivsize		= CAMELLIA_BLOCK_SIZE,
-			.setkey		= xts_camellia_setkey,
-			.encrypt	= xts_encrypt,
-			.decrypt	= xts_decrypt,
-		},
-	},
-} };
-
-static bool is_blacklisted_cpu(void)
-{
-	if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
-		return false;
-
-	if (boot_cpu_data.x86 == 0x0f) {
-		/*
-		 * On Pentium 4, camellia-asm is slower than original assembler
-		 * implementation because excessive uses of 64bit rotate and
-		 * left-shifts (which are really slow on P4) needed to store and
-		 * handle 128bit block in two 64bit registers.
-		 */
-		return true;
-	}
-
-	return false;
-}
-
-static int force;
-module_param(force, int, 0);
-MODULE_PARM_DESC(force, "Force module load, ignore CPU blacklist");
-
-static int __init init(void)
-{
-	if (!force && is_blacklisted_cpu()) {
-		printk(KERN_INFO
-			"camellia-x86_64: performance on this CPU "
-			"would be suboptimal: disabling "
-			"camellia-x86_64.\n");
-		return -ENODEV;
-	}
-
-	return crypto_register_algs(camellia_algs, ARRAY_SIZE(camellia_algs));
-}
-
-static void __exit fini(void)
-{
-	crypto_unregister_algs(camellia_algs, ARRAY_SIZE(camellia_algs));
-}
-
-module_init(init);
-module_exit(fini);
-
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("Camellia Cipher Algorithm, asm optimized");
-MODULE_ALIAS("camellia");
-MODULE_ALIAS("camellia-asm");
diff --git a/ANDROID_3.4.5/arch/x86/crypto/crc32c-intel.c b/ANDROID_3.4.5/arch/x86/crypto/crc32c-intel.c
deleted file mode 100644
index 493f9592..00000000
--- a/ANDROID_3.4.5/arch/x86/crypto/crc32c-intel.c
+++ /dev/null
@@ -1,203 +0,0 @@
-/*
- * Using hardware provided CRC32 instruction to accelerate the CRC32 disposal.
- * CRC32C polynomial:0x1EDC6F41(BE)/0x82F63B78(LE)
- * CRC32 is a new instruction in Intel SSE4.2, the reference can be found at:
- * http://www.intel.com/products/processor/manuals/
- * Intel(R) 64 and IA-32 Architectures Software Developer's Manual
- * Volume 2A: Instruction Set Reference, A-M
- *
- * Copyright (C) 2008 Intel Corporation
- * Authors: Austin Zhang <austin_zhang@linux.intel.com>
- *          Kent Liu <kent.liu@intel.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
- *
- */
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/string.h>
-#include <linux/kernel.h>
-#include <crypto/internal/hash.h>
-
-#include <asm/cpufeature.h>
-#include <asm/cpu_device_id.h>
-
-#define CHKSUM_BLOCK_SIZE	1
-#define CHKSUM_DIGEST_SIZE	4
-
-#define SCALE_F	sizeof(unsigned long)
-
-#ifdef CONFIG_X86_64
-#define REX_PRE "0x48, "
-#else
-#define REX_PRE
-#endif
-
-static u32 crc32c_intel_le_hw_byte(u32 crc, unsigned char const *data, size_t length)
-{
-	while (length--) {
-		__asm__ __volatile__(
-			".byte 0xf2, 0xf, 0x38, 0xf0, 0xf1"
-			:"=S"(crc)
-			:"0"(crc), "c"(*data)
-		);
-		data++;
-	}
-
-	return crc;
-}
-
-static u32 __pure crc32c_intel_le_hw(u32 crc, unsigned char const *p, size_t len)
-{
-	unsigned int iquotient = len / SCALE_F;
-	unsigned int iremainder = len % SCALE_F;
-	unsigned long *ptmp = (unsigned long *)p;
-
-	while (iquotient--) {
-		__asm__ __volatile__(
-			".byte 0xf2, " REX_PRE "0xf, 0x38, 0xf1, 0xf1;"
-			:"=S"(crc)
-			:"0"(crc), "c"(*ptmp)
-		);
-		ptmp++;
-	}
-
-	if (iremainder)
-		crc = crc32c_intel_le_hw_byte(crc, (unsigned char *)ptmp,
-				 iremainder);
-
-	return crc;
-}
-
-/*
- * Setting the seed allows arbitrary accumulators and flexible XOR policy
- * If your algorithm starts with ~0, then XOR with ~0 before you set
- * the seed.
- */
-static int crc32c_intel_setkey(struct crypto_shash *hash, const u8 *key,
-			unsigned int keylen)
-{
-	u32 *mctx = crypto_shash_ctx(hash);
-
-	if (keylen != sizeof(u32)) {
-		crypto_shash_set_flags(hash, CRYPTO_TFM_RES_BAD_KEY_LEN);
-		return -EINVAL;
-	}
-	*mctx = le32_to_cpup((__le32 *)key);
-	return 0;
-}
-
-static int crc32c_intel_init(struct shash_desc *desc)
-{
-	u32 *mctx = crypto_shash_ctx(desc->tfm);
-	u32 *crcp = shash_desc_ctx(desc);
-
-	*crcp = *mctx;
-
-	return 0;
-}
-
-static int crc32c_intel_update(struct shash_desc *desc, const u8 *data,
-			       unsigned int len)
-{
-	u32 *crcp = shash_desc_ctx(desc);
-
-	*crcp = crc32c_intel_le_hw(*crcp, data, len);
-	return 0;
-}
-
-static int __crc32c_intel_finup(u32 *crcp, const u8 *data, unsigned int len,
-				u8 *out)
-{
-	*(__le32 *)out = ~cpu_to_le32(crc32c_intel_le_hw(*crcp, data, len));
-	return 0;
-}
-
-static int crc32c_intel_finup(struct shash_desc *desc, const u8 *data,
-			      unsigned int len, u8 *out)
-{
-	return __crc32c_intel_finup(shash_desc_ctx(desc), data, len, out);
-}
-
-static int crc32c_intel_final(struct shash_desc *desc, u8 *out)
-{
-	u32 *crcp = shash_desc_ctx(desc);
-
-	*(__le32 *)out = ~cpu_to_le32p(crcp);
-	return 0;
-}
-
-static int crc32c_intel_digest(struct shash_desc *desc, const u8 *data,
-			       unsigned int len, u8 *out)
-{
-	return __crc32c_intel_finup(crypto_shash_ctx(desc->tfm), data, len,
-				    out);
-}
-
-static int crc32c_intel_cra_init(struct crypto_tfm *tfm)
-{
-	u32 *key = crypto_tfm_ctx(tfm);
-
-	*key = ~0;
-
-	return 0;
-}
-
-static struct shash_alg alg = {
-	.setkey			=	crc32c_intel_setkey,
-	.init			=	crc32c_intel_init,
-	.update			=	crc32c_intel_update,
-	.final			=	crc32c_intel_final,
-	.finup			=	crc32c_intel_finup,
-	.digest			=	crc32c_intel_digest,
-	.descsize		=	sizeof(u32),
-	.digestsize		=	CHKSUM_DIGEST_SIZE,
-	.base			=	{
-		.cra_name		=	"crc32c",
-		.cra_driver_name	=	"crc32c-intel",
-		.cra_priority		=	200,
-		.cra_blocksize		=	CHKSUM_BLOCK_SIZE,
-		.cra_ctxsize		=	sizeof(u32),
-		.cra_module		=	THIS_MODULE,
-		.cra_init		=	crc32c_intel_cra_init,
-	}
-};
-
-static const struct x86_cpu_id crc32c_cpu_id[] = {
-	X86_FEATURE_MATCH(X86_FEATURE_XMM4_2),
-	{}
-};
-MODULE_DEVICE_TABLE(x86cpu, crc32c_cpu_id);
-
-static int __init crc32c_intel_mod_init(void)
-{
-	if (!x86_match_cpu(crc32c_cpu_id))
-		return -ENODEV;
-	return crypto_register_shash(&alg);
-}
-
-static void __exit crc32c_intel_mod_fini(void)
-{
-	crypto_unregister_shash(&alg);
-}
-
-module_init(crc32c_intel_mod_init);
-module_exit(crc32c_intel_mod_fini);
-
-MODULE_AUTHOR("Austin Zhang <austin.zhang@intel.com>, Kent Liu <kent.liu@intel.com>");
-MODULE_DESCRIPTION("CRC32c (Castagnoli) optimization using Intel Hardware.");
-MODULE_LICENSE("GPL");
-
-MODULE_ALIAS("crc32c");
-MODULE_ALIAS("crc32c-intel");
diff --git a/ANDROID_3.4.5/arch/x86/crypto/fpu.c b/ANDROID_3.4.5/arch/x86/crypto/fpu.c
deleted file mode 100644
index 98d7a188..00000000
--- a/ANDROID_3.4.5/arch/x86/crypto/fpu.c
+++ /dev/null
@@ -1,161 +0,0 @@
-/*
- * FPU: Wrapper for blkcipher touching fpu
- *
- * Copyright (c) Intel Corp.
- *   Author: Huang Ying <ying.huang@intel.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
- *
- */
-
-#include <crypto/algapi.h>
-#include <linux/err.h>
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/slab.h>
-#include <asm/i387.h>
-
-struct crypto_fpu_ctx {
-	struct crypto_blkcipher *child;
-};
-
-static int crypto_fpu_setkey(struct crypto_tfm *parent, const u8 *key,
-			     unsigned int keylen)
-{
-	struct crypto_fpu_ctx *ctx = crypto_tfm_ctx(parent);
-	struct crypto_blkcipher *child = ctx->child;
-	int err;
-
-	crypto_blkcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK);
-	crypto_blkcipher_set_flags(child, crypto_tfm_get_flags(parent) &
-				   CRYPTO_TFM_REQ_MASK);
-	err = crypto_blkcipher_setkey(child, key, keylen);
-	crypto_tfm_set_flags(parent, crypto_blkcipher_get_flags(child) &
-				     CRYPTO_TFM_RES_MASK);
-	return err;
-}
-
-static int crypto_fpu_encrypt(struct blkcipher_desc *desc_in,
-			      struct scatterlist *dst, struct scatterlist *src,
-			      unsigned int nbytes)
-{
-	int err;
-	struct crypto_fpu_ctx *ctx = crypto_blkcipher_ctx(desc_in->tfm);
-	struct crypto_blkcipher *child = ctx->child;
-	struct blkcipher_desc desc = {
-		.tfm = child,
-		.info = desc_in->info,
-		.flags = desc_in->flags & ~CRYPTO_TFM_REQ_MAY_SLEEP,
-	};
-
-	kernel_fpu_begin();
-	err = crypto_blkcipher_crt(desc.tfm)->encrypt(&desc, dst, src, nbytes);
-	kernel_fpu_end();
-	return err;
-}
-
-static int crypto_fpu_decrypt(struct blkcipher_desc *desc_in,
-			      struct scatterlist *dst, struct scatterlist *src,
-			      unsigned int nbytes)
-{
-	int err;
-	struct crypto_fpu_ctx *ctx = crypto_blkcipher_ctx(desc_in->tfm);
-	struct crypto_blkcipher *child = ctx->child;
-	struct blkcipher_desc desc = {
-		.tfm = child,
-		.info = desc_in->info,
-		.flags = desc_in->flags & ~CRYPTO_TFM_REQ_MAY_SLEEP,
-	};
-
-	kernel_fpu_begin();
-	err = crypto_blkcipher_crt(desc.tfm)->decrypt(&desc, dst, src, nbytes);
-	kernel_fpu_end();
-	return err;
-}
-
-static int crypto_fpu_init_tfm(struct crypto_tfm *tfm)
-{
-	struct crypto_instance *inst = crypto_tfm_alg_instance(tfm);
-	struct crypto_spawn *spawn = crypto_instance_ctx(inst);
-	struct crypto_fpu_ctx *ctx = crypto_tfm_ctx(tfm);
-	struct crypto_blkcipher *cipher;
-
-	cipher = crypto_spawn_blkcipher(spawn);
-	if (IS_ERR(cipher))
-		return PTR_ERR(cipher);
-
-	ctx->child = cipher;
-	return 0;
-}
-
-static void crypto_fpu_exit_tfm(struct crypto_tfm *tfm)
-{
-	struct crypto_fpu_ctx *ctx = crypto_tfm_ctx(tfm);
-	crypto_free_blkcipher(ctx->child);
-}
-
-static struct crypto_instance *crypto_fpu_alloc(struct rtattr **tb)
-{
-	struct crypto_instance *inst;
-	struct crypto_alg *alg;
-	int err;
-
-	err = crypto_check_attr_type(tb, CRYPTO_ALG_TYPE_BLKCIPHER);
-	if (err)
-		return ERR_PTR(err);
-
-	alg = crypto_get_attr_alg(tb, CRYPTO_ALG_TYPE_BLKCIPHER,
-				  CRYPTO_ALG_TYPE_MASK);
-	if (IS_ERR(alg))
-		return ERR_CAST(alg);
-
-	inst = crypto_alloc_instance("fpu", alg);
-	if (IS_ERR(inst))
-		goto out_put_alg;
-
-	inst->alg.cra_flags = alg->cra_flags;
-	inst->alg.cra_priority = alg->cra_priority;
-	inst->alg.cra_blocksize = alg->cra_blocksize;
-	inst->alg.cra_alignmask = alg->cra_alignmask;
-	inst->alg.cra_type = alg->cra_type;
-	inst->alg.cra_blkcipher.ivsize = alg->cra_blkcipher.ivsize;
-	inst->alg.cra_blkcipher.min_keysize = alg->cra_blkcipher.min_keysize;
-	inst->alg.cra_blkcipher.max_keysize = alg->cra_blkcipher.max_keysize;
-	inst->alg.cra_ctxsize = sizeof(struct crypto_fpu_ctx);
-	inst->alg.cra_init = crypto_fpu_init_tfm;
-	inst->alg.cra_exit = crypto_fpu_exit_tfm;
-	inst->alg.cra_blkcipher.setkey = crypto_fpu_setkey;
-	inst->alg.cra_blkcipher.encrypt = crypto_fpu_encrypt;
-	inst->alg.cra_blkcipher.decrypt = crypto_fpu_decrypt;
-
-out_put_alg:
-	crypto_mod_put(alg);
-	return inst;
-}
-
-static void crypto_fpu_free(struct crypto_instance *inst)
-{
-	crypto_drop_spawn(crypto_instance_ctx(inst));
-	kfree(inst);
-}
-
-static struct crypto_template crypto_fpu_tmpl = {
-	.name = "fpu",
-	.alloc = crypto_fpu_alloc,
-	.free = crypto_fpu_free,
-	.module = THIS_MODULE,
-};
-
-int __init crypto_fpu_init(void)
-{
-	return crypto_register_template(&crypto_fpu_tmpl);
-}
-
-void __exit crypto_fpu_exit(void)
-{
-	crypto_unregister_template(&crypto_fpu_tmpl);
-}
diff --git a/ANDROID_3.4.5/arch/x86/crypto/ghash-clmulni-intel_asm.S b/ANDROID_3.4.5/arch/x86/crypto/ghash-clmulni-intel_asm.S
deleted file mode 100644
index 1eb7f90c..00000000
--- a/ANDROID_3.4.5/arch/x86/crypto/ghash-clmulni-intel_asm.S
+++ /dev/null
@@ -1,157 +0,0 @@
-/*
- * Accelerated GHASH implementation with Intel PCLMULQDQ-NI
- * instructions. This file contains accelerated part of ghash
- * implementation. More information about PCLMULQDQ can be found at:
- *
- * http://software.intel.com/en-us/articles/carry-less-multiplication-and-its-usage-for-computing-the-gcm-mode/
- *
- * Copyright (c) 2009 Intel Corp.
- *   Author: Huang Ying <ying.huang@intel.com>
- *	     Vinodh Gopal
- *	     Erdinc Ozturk
- *	     Deniz Karakoyunlu
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation.
- */
-
-#include <linux/linkage.h>
-#include <asm/inst.h>
-
-.data
-
-.align 16
-.Lbswap_mask:
-	.octa 0x000102030405060708090a0b0c0d0e0f
-.Lpoly:
-	.octa 0xc2000000000000000000000000000001
-.Ltwo_one:
-	.octa 0x00000001000000000000000000000001
-
-#define DATA	%xmm0
-#define SHASH	%xmm1
-#define T1	%xmm2
-#define T2	%xmm3
-#define T3	%xmm4
-#define BSWAP	%xmm5
-#define IN1	%xmm6
-
-.text
-
-/*
- * __clmul_gf128mul_ble:	internal ABI
- * input:
- *	DATA:			operand1
- *	SHASH:			operand2, hash_key << 1 mod poly
- * output:
- *	DATA:			operand1 * operand2 mod poly
- * changed:
- *	T1
- *	T2
- *	T3
- */
-__clmul_gf128mul_ble:
-	movaps DATA, T1
-	pshufd $0b01001110, DATA, T2
-	pshufd $0b01001110, SHASH, T3
-	pxor DATA, T2
-	pxor SHASH, T3
-
-	PCLMULQDQ 0x00 SHASH DATA	# DATA = a0 * b0
-	PCLMULQDQ 0x11 SHASH T1		# T1 = a1 * b1
-	PCLMULQDQ 0x00 T3 T2		# T2 = (a1 + a0) * (b1 + b0)
-	pxor DATA, T2
-	pxor T1, T2			# T2 = a0 * b1 + a1 * b0
-
-	movaps T2, T3
-	pslldq $8, T3
-	psrldq $8, T2
-	pxor T3, DATA
-	pxor T2, T1			# <T1:DATA> is result of
-					# carry-less multiplication
-
-	# first phase of the reduction
-	movaps DATA, T3
-	psllq $1, T3
-	pxor DATA, T3
-	psllq $5, T3
-	pxor DATA, T3
-	psllq $57, T3
-	movaps T3, T2
-	pslldq $8, T2
-	psrldq $8, T3
-	pxor T2, DATA
-	pxor T3, T1
-
-	# second phase of the reduction
-	movaps DATA, T2
-	psrlq $5, T2
-	pxor DATA, T2
-	psrlq $1, T2
-	pxor DATA, T2
-	psrlq $1, T2
-	pxor T2, T1
-	pxor T1, DATA
-	ret
-
-/* void clmul_ghash_mul(char *dst, const be128 *shash) */
-ENTRY(clmul_ghash_mul)
-	movups (%rdi), DATA
-	movups (%rsi), SHASH
-	movaps .Lbswap_mask, BSWAP
-	PSHUFB_XMM BSWAP DATA
-	call __clmul_gf128mul_ble
-	PSHUFB_XMM BSWAP DATA
-	movups DATA, (%rdi)
-	ret
-
-/*
- * void clmul_ghash_update(char *dst, const char *src, unsigned int srclen,
- *			   const be128 *shash);
- */
-ENTRY(clmul_ghash_update)
-	cmp $16, %rdx
-	jb .Lupdate_just_ret	# check length
-	movaps .Lbswap_mask, BSWAP
-	movups (%rdi), DATA
-	movups (%rcx), SHASH
-	PSHUFB_XMM BSWAP DATA
-.align 4
-.Lupdate_loop:
-	movups (%rsi), IN1
-	PSHUFB_XMM BSWAP IN1
-	pxor IN1, DATA
-	call __clmul_gf128mul_ble
-	sub $16, %rdx
-	add $16, %rsi
-	cmp $16, %rdx
-	jge .Lupdate_loop
-	PSHUFB_XMM BSWAP DATA
-	movups DATA, (%rdi)
-.Lupdate_just_ret:
-	ret
-
-/*
- * void clmul_ghash_setkey(be128 *shash, const u8 *key);
- *
- * Calculate hash_key << 1 mod poly
- */
-ENTRY(clmul_ghash_setkey)
-	movaps .Lbswap_mask, BSWAP
-	movups (%rsi), %xmm0
-	PSHUFB_XMM BSWAP %xmm0
-	movaps %xmm0, %xmm1
-	psllq $1, %xmm0
-	psrlq $63, %xmm1
-	movaps %xmm1, %xmm2
-	pslldq $8, %xmm1
-	psrldq $8, %xmm2
-	por %xmm1, %xmm0
-	# reduction
-	pshufd $0b00100100, %xmm2, %xmm1
-	pcmpeqd .Ltwo_one, %xmm1
-	pand .Lpoly, %xmm1
-	pxor %xmm1, %xmm0
-	movups %xmm0, (%rdi)
-	ret
diff --git a/ANDROID_3.4.5/arch/x86/crypto/ghash-clmulni-intel_glue.c b/ANDROID_3.4.5/arch/x86/crypto/ghash-clmulni-intel_glue.c
deleted file mode 100644
index b4bf0a63..00000000
--- a/ANDROID_3.4.5/arch/x86/crypto/ghash-clmulni-intel_glue.c
+++ /dev/null
@@ -1,338 +0,0 @@
-/*
- * Accelerated GHASH implementation with Intel PCLMULQDQ-NI
- * instructions. This file contains glue code.
- *
- * Copyright (c) 2009 Intel Corp.
- *   Author: Huang Ying <ying.huang@intel.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation.
- */
-
-#include <linux/err.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/crypto.h>
-#include <crypto/algapi.h>
-#include <crypto/cryptd.h>
-#include <crypto/gf128mul.h>
-#include <crypto/internal/hash.h>
-#include <asm/i387.h>
-#include <asm/cpu_device_id.h>
-
-#define GHASH_BLOCK_SIZE	16
-#define GHASH_DIGEST_SIZE	16
-
-void clmul_ghash_mul(char *dst, const be128 *shash);
-
-void clmul_ghash_update(char *dst, const char *src, unsigned int srclen,
-			const be128 *shash);
-
-void clmul_ghash_setkey(be128 *shash, const u8 *key);
-
-struct ghash_async_ctx {
-	struct cryptd_ahash *cryptd_tfm;
-};
-
-struct ghash_ctx {
-	be128 shash;
-};
-
-struct ghash_desc_ctx {
-	u8 buffer[GHASH_BLOCK_SIZE];
-	u32 bytes;
-};
-
-static int ghash_init(struct shash_desc *desc)
-{
-	struct ghash_desc_ctx *dctx = shash_desc_ctx(desc);
-
-	memset(dctx, 0, sizeof(*dctx));
-
-	return 0;
-}
-
-static int ghash_setkey(struct crypto_shash *tfm,
-			const u8 *key, unsigned int keylen)
-{
-	struct ghash_ctx *ctx = crypto_shash_ctx(tfm);
-
-	if (keylen != GHASH_BLOCK_SIZE) {
-		crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
-		return -EINVAL;
-	}
-
-	clmul_ghash_setkey(&ctx->shash, key);
-
-	return 0;
-}
-
-static int ghash_update(struct shash_desc *desc,
-			 const u8 *src, unsigned int srclen)
-{
-	struct ghash_desc_ctx *dctx = shash_desc_ctx(desc);
-	struct ghash_ctx *ctx = crypto_shash_ctx(desc->tfm);
-	u8 *dst = dctx->buffer;
-
-	kernel_fpu_begin();
-	if (dctx->bytes) {
-		int n = min(srclen, dctx->bytes);
-		u8 *pos = dst + (GHASH_BLOCK_SIZE - dctx->bytes);
-
-		dctx->bytes -= n;
-		srclen -= n;
-
-		while (n--)
-			*pos++ ^= *src++;
-
-		if (!dctx->bytes)
-			clmul_ghash_mul(dst, &ctx->shash);
-	}
-
-	clmul_ghash_update(dst, src, srclen, &ctx->shash);
-	kernel_fpu_end();
-
-	if (srclen & 0xf) {
-		src += srclen - (srclen & 0xf);
-		srclen &= 0xf;
-		dctx->bytes = GHASH_BLOCK_SIZE - srclen;
-		while (srclen--)
-			*dst++ ^= *src++;
-	}
-
-	return 0;
-}
-
-static void ghash_flush(struct ghash_ctx *ctx, struct ghash_desc_ctx *dctx)
-{
-	u8 *dst = dctx->buffer;
-
-	if (dctx->bytes) {
-		u8 *tmp = dst + (GHASH_BLOCK_SIZE - dctx->bytes);
-
-		while (dctx->bytes--)
-			*tmp++ ^= 0;
-
-		kernel_fpu_begin();
-		clmul_ghash_mul(dst, &ctx->shash);
-		kernel_fpu_end();
-	}
-
-	dctx->bytes = 0;
-}
-
-static int ghash_final(struct shash_desc *desc, u8 *dst)
-{
-	struct ghash_desc_ctx *dctx = shash_desc_ctx(desc);
-	struct ghash_ctx *ctx = crypto_shash_ctx(desc->tfm);
-	u8 *buf = dctx->buffer;
-
-	ghash_flush(ctx, dctx);
-	memcpy(dst, buf, GHASH_BLOCK_SIZE);
-
-	return 0;
-}
-
-static struct shash_alg ghash_alg = {
-	.digestsize	= GHASH_DIGEST_SIZE,
-	.init		= ghash_init,
-	.update		= ghash_update,
-	.final		= ghash_final,
-	.setkey		= ghash_setkey,
-	.descsize	= sizeof(struct ghash_desc_ctx),
-	.base		= {
-		.cra_name		= "__ghash",
-		.cra_driver_name	= "__ghash-pclmulqdqni",
-		.cra_priority		= 0,
-		.cra_flags		= CRYPTO_ALG_TYPE_SHASH,
-		.cra_blocksize		= GHASH_BLOCK_SIZE,
-		.cra_ctxsize		= sizeof(struct ghash_ctx),
-		.cra_module		= THIS_MODULE,
-		.cra_list		= LIST_HEAD_INIT(ghash_alg.base.cra_list),
-	},
-};
-
-static int ghash_async_init(struct ahash_request *req)
-{
-	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
-	struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm);
-	struct ahash_request *cryptd_req = ahash_request_ctx(req);
-	struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm;
-
-	if (!irq_fpu_usable()) {
-		memcpy(cryptd_req, req, sizeof(*req));
-		ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base);
-		return crypto_ahash_init(cryptd_req);
-	} else {
-		struct shash_desc *desc = cryptd_shash_desc(cryptd_req);
-		struct crypto_shash *child = cryptd_ahash_child(cryptd_tfm);
-
-		desc->tfm = child;
-		desc->flags = req->base.flags;
-		return crypto_shash_init(desc);
-	}
-}
-
-static int ghash_async_update(struct ahash_request *req)
-{
-	struct ahash_request *cryptd_req = ahash_request_ctx(req);
-
-	if (!irq_fpu_usable()) {
-		struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
-		struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm);
-		struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm;
-
-		memcpy(cryptd_req, req, sizeof(*req));
-		ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base);
-		return crypto_ahash_update(cryptd_req);
-	} else {
-		struct shash_desc *desc = cryptd_shash_desc(cryptd_req);
-		return shash_ahash_update(req, desc);
-	}
-}
-
-static int ghash_async_final(struct ahash_request *req)
-{
-	struct ahash_request *cryptd_req = ahash_request_ctx(req);
-
-	if (!irq_fpu_usable()) {
-		struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
-		struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm);
-		struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm;
-
-		memcpy(cryptd_req, req, sizeof(*req));
-		ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base);
-		return crypto_ahash_final(cryptd_req);
-	} else {
-		struct shash_desc *desc = cryptd_shash_desc(cryptd_req);
-		return crypto_shash_final(desc, req->result);
-	}
-}
-
-static int ghash_async_digest(struct ahash_request *req)
-{
-	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
-	struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm);
-	struct ahash_request *cryptd_req = ahash_request_ctx(req);
-	struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm;
-
-	if (!irq_fpu_usable()) {
-		memcpy(cryptd_req, req, sizeof(*req));
-		ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base);
-		return crypto_ahash_digest(cryptd_req);
-	} else {
-		struct shash_desc *desc = cryptd_shash_desc(cryptd_req);
-		struct crypto_shash *child = cryptd_ahash_child(cryptd_tfm);
-
-		desc->tfm = child;
-		desc->flags = req->base.flags;
-		return shash_ahash_digest(req, desc);
-	}
-}
-
-static int ghash_async_setkey(struct crypto_ahash *tfm, const u8 *key,
-			      unsigned int keylen)
-{
-	struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm);
-	struct crypto_ahash *child = &ctx->cryptd_tfm->base;
-	int err;
-
-	crypto_ahash_clear_flags(child, CRYPTO_TFM_REQ_MASK);
-	crypto_ahash_set_flags(child, crypto_ahash_get_flags(tfm)
-			       & CRYPTO_TFM_REQ_MASK);
-	err = crypto_ahash_setkey(child, key, keylen);
-	crypto_ahash_set_flags(tfm, crypto_ahash_get_flags(child)
-			       & CRYPTO_TFM_RES_MASK);
-
-	return err;
-}
-
-static int ghash_async_init_tfm(struct crypto_tfm *tfm)
-{
-	struct cryptd_ahash *cryptd_tfm;
-	struct ghash_async_ctx *ctx = crypto_tfm_ctx(tfm);
-
-	cryptd_tfm = cryptd_alloc_ahash("__ghash-pclmulqdqni", 0, 0);
-	if (IS_ERR(cryptd_tfm))
-		return PTR_ERR(cryptd_tfm);
-	ctx->cryptd_tfm = cryptd_tfm;
-	crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
-				 sizeof(struct ahash_request) +
-				 crypto_ahash_reqsize(&cryptd_tfm->base));
-
-	return 0;
-}
-
-static void ghash_async_exit_tfm(struct crypto_tfm *tfm)
-{
-	struct ghash_async_ctx *ctx = crypto_tfm_ctx(tfm);
-
-	cryptd_free_ahash(ctx->cryptd_tfm);
-}
-
-static struct ahash_alg ghash_async_alg = {
-	.init		= ghash_async_init,
-	.update		= ghash_async_update,
-	.final		= ghash_async_final,
-	.setkey		= ghash_async_setkey,
-	.digest		= ghash_async_digest,
-	.halg = {
-		.digestsize	= GHASH_DIGEST_SIZE,
-		.base = {
-			.cra_name		= "ghash",
-			.cra_driver_name	= "ghash-clmulni",
-			.cra_priority		= 400,
-			.cra_flags		= CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_ASYNC,
-			.cra_blocksize		= GHASH_BLOCK_SIZE,
-			.cra_type		= &crypto_ahash_type,
-			.cra_module		= THIS_MODULE,
-			.cra_list		= LIST_HEAD_INIT(ghash_async_alg.halg.base.cra_list),
-			.cra_init		= ghash_async_init_tfm,
-			.cra_exit		= ghash_async_exit_tfm,
-		},
-	},
-};
-
-static const struct x86_cpu_id pcmul_cpu_id[] = {
-	X86_FEATURE_MATCH(X86_FEATURE_PCLMULQDQ), /* Pickle-Mickle-Duck */
-	{}
-};
-MODULE_DEVICE_TABLE(x86cpu, pcmul_cpu_id);
-
-static int __init ghash_pclmulqdqni_mod_init(void)
-{
-	int err;
-
-	if (!x86_match_cpu(pcmul_cpu_id))
-		return -ENODEV;
-
-	err = crypto_register_shash(&ghash_alg);
-	if (err)
-		goto err_out;
-	err = crypto_register_ahash(&ghash_async_alg);
-	if (err)
-		goto err_shash;
-
-	return 0;
-
-err_shash:
-	crypto_unregister_shash(&ghash_alg);
-err_out:
-	return err;
-}
-
-static void __exit ghash_pclmulqdqni_mod_exit(void)
-{
-	crypto_unregister_ahash(&ghash_async_alg);
-	crypto_unregister_shash(&ghash_alg);
-}
-
-module_init(ghash_pclmulqdqni_mod_init);
-module_exit(ghash_pclmulqdqni_mod_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("GHASH Message Digest Algorithm, "
-		   "acclerated by PCLMULQDQ-NI");
-MODULE_ALIAS("ghash");
diff --git a/ANDROID_3.4.5/arch/x86/crypto/salsa20-i586-asm_32.S b/ANDROID_3.4.5/arch/x86/crypto/salsa20-i586-asm_32.S
deleted file mode 100644
index 72eb3066..00000000
--- a/ANDROID_3.4.5/arch/x86/crypto/salsa20-i586-asm_32.S
+++ /dev/null
@@ -1,1114 +0,0 @@
-# salsa20_pm.s version 20051229
-# D. J. Bernstein
-# Public domain.
-
-# enter ECRYPT_encrypt_bytes
-.text
-.p2align 5
-.globl ECRYPT_encrypt_bytes
-ECRYPT_encrypt_bytes:
-	mov	%esp,%eax
-	and	$31,%eax
-	add	$256,%eax
-	sub	%eax,%esp
-	# eax_stack = eax
-	movl	%eax,80(%esp)
-	# ebx_stack = ebx
-	movl	%ebx,84(%esp)
-	# esi_stack = esi
-	movl	%esi,88(%esp)
-	# edi_stack = edi
-	movl	%edi,92(%esp)
-	# ebp_stack = ebp
-	movl	%ebp,96(%esp)
-	# x = arg1
-	movl	4(%esp,%eax),%edx
-	# m = arg2
-	movl	8(%esp,%eax),%esi
-	# out = arg3
-	movl	12(%esp,%eax),%edi
-	# bytes = arg4
-	movl	16(%esp,%eax),%ebx
-	# bytes -= 0
-	sub	$0,%ebx
-	# goto done if unsigned<=
-	jbe	._done
-._start:
-	# in0 = *(uint32 *) (x + 0)
-	movl	0(%edx),%eax
-	# in1 = *(uint32 *) (x + 4)
-	movl	4(%edx),%ecx
-	# in2 = *(uint32 *) (x + 8)
-	movl	8(%edx),%ebp
-	# j0 = in0
-	movl	%eax,164(%esp)
-	# in3 = *(uint32 *) (x + 12)
-	movl	12(%edx),%eax
-	# j1 = in1
-	movl	%ecx,168(%esp)
-	# in4 = *(uint32 *) (x + 16)
-	movl	16(%edx),%ecx
-	# j2 = in2
-	movl	%ebp,172(%esp)
-	# in5 = *(uint32 *) (x + 20)
-	movl	20(%edx),%ebp
-	# j3 = in3
-	movl	%eax,176(%esp)
-	# in6 = *(uint32 *) (x + 24)
-	movl	24(%edx),%eax
-	# j4 = in4
-	movl	%ecx,180(%esp)
-	# in7 = *(uint32 *) (x + 28)
-	movl	28(%edx),%ecx
-	# j5 = in5
-	movl	%ebp,184(%esp)
-	# in8 = *(uint32 *) (x + 32)
-	movl	32(%edx),%ebp
-	# j6 = in6
-	movl	%eax,188(%esp)
-	# in9 = *(uint32 *) (x + 36)
-	movl	36(%edx),%eax
-	# j7 = in7
-	movl	%ecx,192(%esp)
-	# in10 = *(uint32 *) (x + 40)
-	movl	40(%edx),%ecx
-	# j8 = in8
-	movl	%ebp,196(%esp)
-	# in11 = *(uint32 *) (x + 44)
-	movl	44(%edx),%ebp
-	# j9 = in9
-	movl	%eax,200(%esp)
-	# in12 = *(uint32 *) (x + 48)
-	movl	48(%edx),%eax
-	# j10 = in10
-	movl	%ecx,204(%esp)
-	# in13 = *(uint32 *) (x + 52)
-	movl	52(%edx),%ecx
-	# j11 = in11
-	movl	%ebp,208(%esp)
-	# in14 = *(uint32 *) (x + 56)
-	movl	56(%edx),%ebp
-	# j12 = in12
-	movl	%eax,212(%esp)
-	# in15 = *(uint32 *) (x + 60)
-	movl	60(%edx),%eax
-	# j13 = in13
-	movl	%ecx,216(%esp)
-	# j14 = in14
-	movl	%ebp,220(%esp)
-	# j15 = in15
-	movl	%eax,224(%esp)
-	# x_backup = x
-	movl	%edx,64(%esp)
-._bytesatleast1:
-	#   bytes - 64
-	cmp	$64,%ebx
-	#   goto nocopy if unsigned>=
-	jae	._nocopy
-	#     ctarget = out
-	movl	%edi,228(%esp)
-	#     out = &tmp
-	leal	0(%esp),%edi
-	#     i = bytes
-	mov	%ebx,%ecx
-	#     while (i) { *out++ = *m++; --i }
-	rep	movsb
-	#     out = &tmp
-	leal	0(%esp),%edi
-	#     m = &tmp
-	leal	0(%esp),%esi
-._nocopy:
-	#   out_backup = out
-	movl	%edi,72(%esp)
-	#   m_backup = m
-	movl	%esi,68(%esp)
-	#   bytes_backup = bytes
-	movl	%ebx,76(%esp)
-	#   in0 = j0
-	movl	164(%esp),%eax
-	#   in1 = j1
-	movl	168(%esp),%ecx
-	#   in2 = j2
-	movl	172(%esp),%edx
-	#   in3 = j3
-	movl	176(%esp),%ebx
-	#   x0 = in0
-	movl	%eax,100(%esp)
-	#   x1 = in1
-	movl	%ecx,104(%esp)
-	#   x2 = in2
-	movl	%edx,108(%esp)
-	#   x3 = in3
-	movl	%ebx,112(%esp)
-	#   in4 = j4
-	movl	180(%esp),%eax
-	#   in5 = j5
-	movl	184(%esp),%ecx
-	#   in6 = j6
-	movl	188(%esp),%edx
-	#   in7 = j7
-	movl	192(%esp),%ebx
-	#   x4 = in4
-	movl	%eax,116(%esp)
-	#   x5 = in5
-	movl	%ecx,120(%esp)
-	#   x6 = in6
-	movl	%edx,124(%esp)
-	#   x7 = in7
-	movl	%ebx,128(%esp)
-	#   in8 = j8
-	movl	196(%esp),%eax
-	#   in9 = j9
-	movl	200(%esp),%ecx
-	#   in10 = j10
-	movl	204(%esp),%edx
-	#   in11 = j11
-	movl	208(%esp),%ebx
-	#   x8 = in8
-	movl	%eax,132(%esp)
-	#   x9 = in9
-	movl	%ecx,136(%esp)
-	#   x10 = in10
-	movl	%edx,140(%esp)
-	#   x11 = in11
-	movl	%ebx,144(%esp)
-	#   in12 = j12
-	movl	212(%esp),%eax
-	#   in13 = j13
-	movl	216(%esp),%ecx
-	#   in14 = j14
-	movl	220(%esp),%edx
-	#   in15 = j15
-	movl	224(%esp),%ebx
-	#   x12 = in12
-	movl	%eax,148(%esp)
-	#   x13 = in13
-	movl	%ecx,152(%esp)
-	#   x14 = in14
-	movl	%edx,156(%esp)
-	#   x15 = in15
-	movl	%ebx,160(%esp)
-	#   i = 20
-	mov	$20,%ebp
-	# p = x0
-	movl	100(%esp),%eax
-	# s = x5
-	movl	120(%esp),%ecx
-	# t = x10
-	movl	140(%esp),%edx
-	# w = x15
-	movl	160(%esp),%ebx
-._mainloop:
-	# x0 = p
-	movl	%eax,100(%esp)
-	# 				x10 = t
-	movl	%edx,140(%esp)
-	# p += x12
-	addl	148(%esp),%eax
-	# 		x5 = s
-	movl	%ecx,120(%esp)
-	# 				t += x6
-	addl	124(%esp),%edx
-	# 						x15 = w
-	movl	%ebx,160(%esp)
-	# 		r = x1
-	movl	104(%esp),%esi
-	# 		r += s
-	add	%ecx,%esi
-	# 						v = x11
-	movl	144(%esp),%edi
-	# 						v += w
-	add	%ebx,%edi
-	# p <<<= 7
-	rol	$7,%eax
-	# p ^= x4
-	xorl	116(%esp),%eax
-	# 				t <<<= 7
-	rol	$7,%edx
-	# 				t ^= x14
-	xorl	156(%esp),%edx
-	# 		r <<<= 7
-	rol	$7,%esi
-	# 		r ^= x9
-	xorl	136(%esp),%esi
-	# 						v <<<= 7
-	rol	$7,%edi
-	# 						v ^= x3
-	xorl	112(%esp),%edi
-	# x4 = p
-	movl	%eax,116(%esp)
-	# 				x14 = t
-	movl	%edx,156(%esp)
-	# p += x0
-	addl	100(%esp),%eax
-	# 		x9 = r
-	movl	%esi,136(%esp)
-	# 				t += x10
-	addl	140(%esp),%edx
-	# 						x3 = v
-	movl	%edi,112(%esp)
-	# p <<<= 9
-	rol	$9,%eax
-	# p ^= x8
-	xorl	132(%esp),%eax
-	# 				t <<<= 9
-	rol	$9,%edx
-	# 				t ^= x2
-	xorl	108(%esp),%edx
-	# 		s += r
-	add	%esi,%ecx
-	# 		s <<<= 9
-	rol	$9,%ecx
-	# 		s ^= x13
-	xorl	152(%esp),%ecx
-	# 						w += v
-	add	%edi,%ebx
-	# 						w <<<= 9
-	rol	$9,%ebx
-	# 						w ^= x7
-	xorl	128(%esp),%ebx
-	# x8 = p
-	movl	%eax,132(%esp)
-	# 				x2 = t
-	movl	%edx,108(%esp)
-	# p += x4
-	addl	116(%esp),%eax
-	# 		x13 = s
-	movl	%ecx,152(%esp)
-	# 				t += x14
-	addl	156(%esp),%edx
-	# 						x7 = w
-	movl	%ebx,128(%esp)
-	# p <<<= 13
-	rol	$13,%eax
-	# p ^= x12
-	xorl	148(%esp),%eax
-	# 				t <<<= 13
-	rol	$13,%edx
-	# 				t ^= x6
-	xorl	124(%esp),%edx
-	# 		r += s
-	add	%ecx,%esi
-	# 		r <<<= 13
-	rol	$13,%esi
-	# 		r ^= x1
-	xorl	104(%esp),%esi
-	# 						v += w
-	add	%ebx,%edi
-	# 						v <<<= 13
-	rol	$13,%edi
-	# 						v ^= x11
-	xorl	144(%esp),%edi
-	# x12 = p
-	movl	%eax,148(%esp)
-	# 				x6 = t
-	movl	%edx,124(%esp)
-	# p += x8
-	addl	132(%esp),%eax
-	# 		x1 = r
-	movl	%esi,104(%esp)
-	# 				t += x2
-	addl	108(%esp),%edx
-	# 						x11 = v
-	movl	%edi,144(%esp)
-	# p <<<= 18
-	rol	$18,%eax
-	# p ^= x0
-	xorl	100(%esp),%eax
-	# 				t <<<= 18
-	rol	$18,%edx
-	# 				t ^= x10
-	xorl	140(%esp),%edx
-	# 		s += r
-	add	%esi,%ecx
-	# 		s <<<= 18
-	rol	$18,%ecx
-	# 		s ^= x5
-	xorl	120(%esp),%ecx
-	# 						w += v
-	add	%edi,%ebx
-	# 						w <<<= 18
-	rol	$18,%ebx
-	# 						w ^= x15
-	xorl	160(%esp),%ebx
-	# x0 = p
-	movl	%eax,100(%esp)
-	# 				x10 = t
-	movl	%edx,140(%esp)
-	# p += x3
-	addl	112(%esp),%eax
-	# p <<<= 7
-	rol	$7,%eax
-	# 		x5 = s
-	movl	%ecx,120(%esp)
-	# 				t += x9
-	addl	136(%esp),%edx
-	# 						x15 = w
-	movl	%ebx,160(%esp)
-	# 		r = x4
-	movl	116(%esp),%esi
-	# 		r += s
-	add	%ecx,%esi
-	# 						v = x14
-	movl	156(%esp),%edi
-	# 						v += w
-	add	%ebx,%edi
-	# p ^= x1
-	xorl	104(%esp),%eax
-	# 				t <<<= 7
-	rol	$7,%edx
-	# 				t ^= x11
-	xorl	144(%esp),%edx
-	# 		r <<<= 7
-	rol	$7,%esi
-	# 		r ^= x6
-	xorl	124(%esp),%esi
-	# 						v <<<= 7
-	rol	$7,%edi
-	# 						v ^= x12
-	xorl	148(%esp),%edi
-	# x1 = p
-	movl	%eax,104(%esp)
-	# 				x11 = t
-	movl	%edx,144(%esp)
-	# p += x0
-	addl	100(%esp),%eax
-	# 		x6 = r
-	movl	%esi,124(%esp)
-	# 				t += x10
-	addl	140(%esp),%edx
-	# 						x12 = v
-	movl	%edi,148(%esp)
-	# p <<<= 9
-	rol	$9,%eax
-	# p ^= x2
-	xorl	108(%esp),%eax
-	# 				t <<<= 9
-	rol	$9,%edx
-	# 				t ^= x8
-	xorl	132(%esp),%edx
-	# 		s += r
-	add	%esi,%ecx
-	# 		s <<<= 9
-	rol	$9,%ecx
-	# 		s ^= x7
-	xorl	128(%esp),%ecx
-	# 						w += v
-	add	%edi,%ebx
-	# 						w <<<= 9
-	rol	$9,%ebx
-	# 						w ^= x13
-	xorl	152(%esp),%ebx
-	# x2 = p
-	movl	%eax,108(%esp)
-	# 				x8 = t
-	movl	%edx,132(%esp)
-	# p += x1
-	addl	104(%esp),%eax
-	# 		x7 = s
-	movl	%ecx,128(%esp)
-	# 				t += x11
-	addl	144(%esp),%edx
-	# 						x13 = w
-	movl	%ebx,152(%esp)
-	# p <<<= 13
-	rol	$13,%eax
-	# p ^= x3
-	xorl	112(%esp),%eax
-	# 				t <<<= 13
-	rol	$13,%edx
-	# 				t ^= x9
-	xorl	136(%esp),%edx
-	# 		r += s
-	add	%ecx,%esi
-	# 		r <<<= 13
-	rol	$13,%esi
-	# 		r ^= x4
-	xorl	116(%esp),%esi
-	# 						v += w
-	add	%ebx,%edi
-	# 						v <<<= 13
-	rol	$13,%edi
-	# 						v ^= x14
-	xorl	156(%esp),%edi
-	# x3 = p
-	movl	%eax,112(%esp)
-	# 				x9 = t
-	movl	%edx,136(%esp)
-	# p += x2
-	addl	108(%esp),%eax
-	# 		x4 = r
-	movl	%esi,116(%esp)
-	# 				t += x8
-	addl	132(%esp),%edx
-	# 						x14 = v
-	movl	%edi,156(%esp)
-	# p <<<= 18
-	rol	$18,%eax
-	# p ^= x0
-	xorl	100(%esp),%eax
-	# 				t <<<= 18
-	rol	$18,%edx
-	# 				t ^= x10
-	xorl	140(%esp),%edx
-	# 		s += r
-	add	%esi,%ecx
-	# 		s <<<= 18
-	rol	$18,%ecx
-	# 		s ^= x5
-	xorl	120(%esp),%ecx
-	# 						w += v
-	add	%edi,%ebx
-	# 						w <<<= 18
-	rol	$18,%ebx
-	# 						w ^= x15
-	xorl	160(%esp),%ebx
-	# x0 = p
-	movl	%eax,100(%esp)
-	# 				x10 = t
-	movl	%edx,140(%esp)
-	# p += x12
-	addl	148(%esp),%eax
-	# 		x5 = s
-	movl	%ecx,120(%esp)
-	# 				t += x6
-	addl	124(%esp),%edx
-	# 						x15 = w
-	movl	%ebx,160(%esp)
-	# 		r = x1
-	movl	104(%esp),%esi
-	# 		r += s
-	add	%ecx,%esi
-	# 						v = x11
-	movl	144(%esp),%edi
-	# 						v += w
-	add	%ebx,%edi
-	# p <<<= 7
-	rol	$7,%eax
-	# p ^= x4
-	xorl	116(%esp),%eax
-	# 				t <<<= 7
-	rol	$7,%edx
-	# 				t ^= x14
-	xorl	156(%esp),%edx
-	# 		r <<<= 7
-	rol	$7,%esi
-	# 		r ^= x9
-	xorl	136(%esp),%esi
-	# 						v <<<= 7
-	rol	$7,%edi
-	# 						v ^= x3
-	xorl	112(%esp),%edi
-	# x4 = p
-	movl	%eax,116(%esp)
-	# 				x14 = t
-	movl	%edx,156(%esp)
-	# p += x0
-	addl	100(%esp),%eax
-	# 		x9 = r
-	movl	%esi,136(%esp)
-	# 				t += x10
-	addl	140(%esp),%edx
-	# 						x3 = v
-	movl	%edi,112(%esp)
-	# p <<<= 9
-	rol	$9,%eax
-	# p ^= x8
-	xorl	132(%esp),%eax
-	# 				t <<<= 9
-	rol	$9,%edx
-	# 				t ^= x2
-	xorl	108(%esp),%edx
-	# 		s += r
-	add	%esi,%ecx
-	# 		s <<<= 9
-	rol	$9,%ecx
-	# 		s ^= x13
-	xorl	152(%esp),%ecx
-	# 						w += v
-	add	%edi,%ebx
-	# 						w <<<= 9
-	rol	$9,%ebx
-	# 						w ^= x7
-	xorl	128(%esp),%ebx
-	# x8 = p
-	movl	%eax,132(%esp)
-	# 				x2 = t
-	movl	%edx,108(%esp)
-	# p += x4
-	addl	116(%esp),%eax
-	# 		x13 = s
-	movl	%ecx,152(%esp)
-	# 				t += x14
-	addl	156(%esp),%edx
-	# 						x7 = w
-	movl	%ebx,128(%esp)
-	# p <<<= 13
-	rol	$13,%eax
-	# p ^= x12
-	xorl	148(%esp),%eax
-	# 				t <<<= 13
-	rol	$13,%edx
-	# 				t ^= x6
-	xorl	124(%esp),%edx
-	# 		r += s
-	add	%ecx,%esi
-	# 		r <<<= 13
-	rol	$13,%esi
-	# 		r ^= x1
-	xorl	104(%esp),%esi
-	# 						v += w
-	add	%ebx,%edi
-	# 						v <<<= 13
-	rol	$13,%edi
-	# 						v ^= x11
-	xorl	144(%esp),%edi
-	# x12 = p
-	movl	%eax,148(%esp)
-	# 				x6 = t
-	movl	%edx,124(%esp)
-	# p += x8
-	addl	132(%esp),%eax
-	# 		x1 = r
-	movl	%esi,104(%esp)
-	# 				t += x2
-	addl	108(%esp),%edx
-	# 						x11 = v
-	movl	%edi,144(%esp)
-	# p <<<= 18
-	rol	$18,%eax
-	# p ^= x0
-	xorl	100(%esp),%eax
-	# 				t <<<= 18
-	rol	$18,%edx
-	# 				t ^= x10
-	xorl	140(%esp),%edx
-	# 		s += r
-	add	%esi,%ecx
-	# 		s <<<= 18
-	rol	$18,%ecx
-	# 		s ^= x5
-	xorl	120(%esp),%ecx
-	# 						w += v
-	add	%edi,%ebx
-	# 						w <<<= 18
-	rol	$18,%ebx
-	# 						w ^= x15
-	xorl	160(%esp),%ebx
-	# x0 = p
-	movl	%eax,100(%esp)
-	# 				x10 = t
-	movl	%edx,140(%esp)
-	# p += x3
-	addl	112(%esp),%eax
-	# p <<<= 7
-	rol	$7,%eax
-	# 		x5 = s
-	movl	%ecx,120(%esp)
-	# 				t += x9
-	addl	136(%esp),%edx
-	# 						x15 = w
-	movl	%ebx,160(%esp)
-	# 		r = x4
-	movl	116(%esp),%esi
-	# 		r += s
-	add	%ecx,%esi
-	# 						v = x14
-	movl	156(%esp),%edi
-	# 						v += w
-	add	%ebx,%edi
-	# p ^= x1
-	xorl	104(%esp),%eax
-	# 				t <<<= 7
-	rol	$7,%edx
-	# 				t ^= x11
-	xorl	144(%esp),%edx
-	# 		r <<<= 7
-	rol	$7,%esi
-	# 		r ^= x6
-	xorl	124(%esp),%esi
-	# 						v <<<= 7
-	rol	$7,%edi
-	# 						v ^= x12
-	xorl	148(%esp),%edi
-	# x1 = p
-	movl	%eax,104(%esp)
-	# 				x11 = t
-	movl	%edx,144(%esp)
-	# p += x0
-	addl	100(%esp),%eax
-	# 		x6 = r
-	movl	%esi,124(%esp)
-	# 				t += x10
-	addl	140(%esp),%edx
-	# 						x12 = v
-	movl	%edi,148(%esp)
-	# p <<<= 9
-	rol	$9,%eax
-	# p ^= x2
-	xorl	108(%esp),%eax
-	# 				t <<<= 9
-	rol	$9,%edx
-	# 				t ^= x8
-	xorl	132(%esp),%edx
-	# 		s += r
-	add	%esi,%ecx
-	# 		s <<<= 9
-	rol	$9,%ecx
-	# 		s ^= x7
-	xorl	128(%esp),%ecx
-	# 						w += v
-	add	%edi,%ebx
-	# 						w <<<= 9
-	rol	$9,%ebx
-	# 						w ^= x13
-	xorl	152(%esp),%ebx
-	# x2 = p
-	movl	%eax,108(%esp)
-	# 				x8 = t
-	movl	%edx,132(%esp)
-	# p += x1
-	addl	104(%esp),%eax
-	# 		x7 = s
-	movl	%ecx,128(%esp)
-	# 				t += x11
-	addl	144(%esp),%edx
-	# 						x13 = w
-	movl	%ebx,152(%esp)
-	# p <<<= 13
-	rol	$13,%eax
-	# p ^= x3
-	xorl	112(%esp),%eax
-	# 				t <<<= 13
-	rol	$13,%edx
-	# 				t ^= x9
-	xorl	136(%esp),%edx
-	# 		r += s
-	add	%ecx,%esi
-	# 		r <<<= 13
-	rol	$13,%esi
-	# 		r ^= x4
-	xorl	116(%esp),%esi
-	# 						v += w
-	add	%ebx,%edi
-	# 						v <<<= 13
-	rol	$13,%edi
-	# 						v ^= x14
-	xorl	156(%esp),%edi
-	# x3 = p
-	movl	%eax,112(%esp)
-	# 				x9 = t
-	movl	%edx,136(%esp)
-	# p += x2
-	addl	108(%esp),%eax
-	# 		x4 = r
-	movl	%esi,116(%esp)
-	# 				t += x8
-	addl	132(%esp),%edx
-	# 						x14 = v
-	movl	%edi,156(%esp)
-	# p <<<= 18
-	rol	$18,%eax
-	# p ^= x0
-	xorl	100(%esp),%eax
-	# 				t <<<= 18
-	rol	$18,%edx
-	# 				t ^= x10
-	xorl	140(%esp),%edx
-	# 		s += r
-	add	%esi,%ecx
-	# 		s <<<= 18
-	rol	$18,%ecx
-	# 		s ^= x5
-	xorl	120(%esp),%ecx
-	# 						w += v
-	add	%edi,%ebx
-	# 						w <<<= 18
-	rol	$18,%ebx
-	# 						w ^= x15
-	xorl	160(%esp),%ebx
-	# i -= 4
-	sub	$4,%ebp
-	# goto mainloop if unsigned >
-	ja	._mainloop
-	# x0 = p
-	movl	%eax,100(%esp)
-	# x5 = s
-	movl	%ecx,120(%esp)
-	# x10 = t
-	movl	%edx,140(%esp)
-	# x15 = w
-	movl	%ebx,160(%esp)
-	#   out = out_backup
-	movl	72(%esp),%edi
-	#   m = m_backup
-	movl	68(%esp),%esi
-	#   in0 = x0
-	movl	100(%esp),%eax
-	#   in1 = x1
-	movl	104(%esp),%ecx
-	#   in0 += j0
-	addl	164(%esp),%eax
-	#   in1 += j1
-	addl	168(%esp),%ecx
-	#   in0 ^= *(uint32 *) (m + 0)
-	xorl	0(%esi),%eax
-	#   in1 ^= *(uint32 *) (m + 4)
-	xorl	4(%esi),%ecx
-	#   *(uint32 *) (out + 0) = in0
-	movl	%eax,0(%edi)
-	#   *(uint32 *) (out + 4) = in1
-	movl	%ecx,4(%edi)
-	#   in2 = x2
-	movl	108(%esp),%eax
-	#   in3 = x3
-	movl	112(%esp),%ecx
-	#   in2 += j2
-	addl	172(%esp),%eax
-	#   in3 += j3
-	addl	176(%esp),%ecx
-	#   in2 ^= *(uint32 *) (m + 8)
-	xorl	8(%esi),%eax
-	#   in3 ^= *(uint32 *) (m + 12)
-	xorl	12(%esi),%ecx
-	#   *(uint32 *) (out + 8) = in2
-	movl	%eax,8(%edi)
-	#   *(uint32 *) (out + 12) = in3
-	movl	%ecx,12(%edi)
-	#   in4 = x4
-	movl	116(%esp),%eax
-	#   in5 = x5
-	movl	120(%esp),%ecx
-	#   in4 += j4
-	addl	180(%esp),%eax
-	#   in5 += j5
-	addl	184(%esp),%ecx
-	#   in4 ^= *(uint32 *) (m + 16)
-	xorl	16(%esi),%eax
-	#   in5 ^= *(uint32 *) (m + 20)
-	xorl	20(%esi),%ecx
-	#   *(uint32 *) (out + 16) = in4
-	movl	%eax,16(%edi)
-	#   *(uint32 *) (out + 20) = in5
-	movl	%ecx,20(%edi)
-	#   in6 = x6
-	movl	124(%esp),%eax
-	#   in7 = x7
-	movl	128(%esp),%ecx
-	#   in6 += j6
-	addl	188(%esp),%eax
-	#   in7 += j7
-	addl	192(%esp),%ecx
-	#   in6 ^= *(uint32 *) (m + 24)
-	xorl	24(%esi),%eax
-	#   in7 ^= *(uint32 *) (m + 28)
-	xorl	28(%esi),%ecx
-	#   *(uint32 *) (out + 24) = in6
-	movl	%eax,24(%edi)
-	#   *(uint32 *) (out + 28) = in7
-	movl	%ecx,28(%edi)
-	#   in8 = x8
-	movl	132(%esp),%eax
-	#   in9 = x9
-	movl	136(%esp),%ecx
-	#   in8 += j8
-	addl	196(%esp),%eax
-	#   in9 += j9
-	addl	200(%esp),%ecx
-	#   in8 ^= *(uint32 *) (m + 32)
-	xorl	32(%esi),%eax
-	#   in9 ^= *(uint32 *) (m + 36)
-	xorl	36(%esi),%ecx
-	#   *(uint32 *) (out + 32) = in8
-	movl	%eax,32(%edi)
-	#   *(uint32 *) (out + 36) = in9
-	movl	%ecx,36(%edi)
-	#   in10 = x10
-	movl	140(%esp),%eax
-	#   in11 = x11
-	movl	144(%esp),%ecx
-	#   in10 += j10
-	addl	204(%esp),%eax
-	#   in11 += j11
-	addl	208(%esp),%ecx
-	#   in10 ^= *(uint32 *) (m + 40)
-	xorl	40(%esi),%eax
-	#   in11 ^= *(uint32 *) (m + 44)
-	xorl	44(%esi),%ecx
-	#   *(uint32 *) (out + 40) = in10
-	movl	%eax,40(%edi)
-	#   *(uint32 *) (out + 44) = in11
-	movl	%ecx,44(%edi)
-	#   in12 = x12
-	movl	148(%esp),%eax
-	#   in13 = x13
-	movl	152(%esp),%ecx
-	#   in12 += j12
-	addl	212(%esp),%eax
-	#   in13 += j13
-	addl	216(%esp),%ecx
-	#   in12 ^= *(uint32 *) (m + 48)
-	xorl	48(%esi),%eax
-	#   in13 ^= *(uint32 *) (m + 52)
-	xorl	52(%esi),%ecx
-	#   *(uint32 *) (out + 48) = in12
-	movl	%eax,48(%edi)
-	#   *(uint32 *) (out + 52) = in13
-	movl	%ecx,52(%edi)
-	#   in14 = x14
-	movl	156(%esp),%eax
-	#   in15 = x15
-	movl	160(%esp),%ecx
-	#   in14 += j14
-	addl	220(%esp),%eax
-	#   in15 += j15
-	addl	224(%esp),%ecx
-	#   in14 ^= *(uint32 *) (m + 56)
-	xorl	56(%esi),%eax
-	#   in15 ^= *(uint32 *) (m + 60)
-	xorl	60(%esi),%ecx
-	#   *(uint32 *) (out + 56) = in14
-	movl	%eax,56(%edi)
-	#   *(uint32 *) (out + 60) = in15
-	movl	%ecx,60(%edi)
-	#   bytes = bytes_backup
-	movl	76(%esp),%ebx
-	#   in8 = j8
-	movl	196(%esp),%eax
-	#   in9 = j9
-	movl	200(%esp),%ecx
-	#   in8 += 1
-	add	$1,%eax
-	#   in9 += 0 + carry
-	adc	$0,%ecx
-	#   j8 = in8
-	movl	%eax,196(%esp)
-	#   j9 = in9
-	movl	%ecx,200(%esp)
-	#   bytes - 64
-	cmp	$64,%ebx
-	#   goto bytesatleast65 if unsigned>
-	ja	._bytesatleast65
-	#     goto bytesatleast64 if unsigned>=
-	jae	._bytesatleast64
-	#       m = out
-	mov	%edi,%esi
-	#       out = ctarget
-	movl	228(%esp),%edi
-	#       i = bytes
-	mov	%ebx,%ecx
-	#       while (i) { *out++ = *m++; --i }
-	rep	movsb
-._bytesatleast64:
-	#     x = x_backup
-	movl	64(%esp),%eax
-	#     in8 = j8
-	movl	196(%esp),%ecx
-	#     in9 = j9
-	movl	200(%esp),%edx
-	#     *(uint32 *) (x + 32) = in8
-	movl	%ecx,32(%eax)
-	#     *(uint32 *) (x + 36) = in9
-	movl	%edx,36(%eax)
-._done:
-	#     eax = eax_stack
-	movl	80(%esp),%eax
-	#     ebx = ebx_stack
-	movl	84(%esp),%ebx
-	#     esi = esi_stack
-	movl	88(%esp),%esi
-	#     edi = edi_stack
-	movl	92(%esp),%edi
-	#     ebp = ebp_stack
-	movl	96(%esp),%ebp
-	#     leave
-	add	%eax,%esp
-	ret
-._bytesatleast65:
-	#   bytes -= 64
-	sub	$64,%ebx
-	#   out += 64
-	add	$64,%edi
-	#   m += 64
-	add	$64,%esi
-	# goto bytesatleast1
-	jmp	._bytesatleast1
-# enter ECRYPT_keysetup
-.text
-.p2align 5
-.globl ECRYPT_keysetup
-ECRYPT_keysetup:
-	mov	%esp,%eax
-	and	$31,%eax
-	add	$256,%eax
-	sub	%eax,%esp
-	#   eax_stack = eax
-	movl	%eax,64(%esp)
-	#   ebx_stack = ebx
-	movl	%ebx,68(%esp)
-	#   esi_stack = esi
-	movl	%esi,72(%esp)
-	#   edi_stack = edi
-	movl	%edi,76(%esp)
-	#   ebp_stack = ebp
-	movl	%ebp,80(%esp)
-	#   k = arg2
-	movl	8(%esp,%eax),%ecx
-	#   kbits = arg3
-	movl	12(%esp,%eax),%edx
-	#   x = arg1
-	movl	4(%esp,%eax),%eax
-	#   in1 = *(uint32 *) (k + 0)
-	movl	0(%ecx),%ebx
-	#   in2 = *(uint32 *) (k + 4)
-	movl	4(%ecx),%esi
-	#   in3 = *(uint32 *) (k + 8)
-	movl	8(%ecx),%edi
-	#   in4 = *(uint32 *) (k + 12)
-	movl	12(%ecx),%ebp
-	#   *(uint32 *) (x + 4) = in1
-	movl	%ebx,4(%eax)
-	#   *(uint32 *) (x + 8) = in2
-	movl	%esi,8(%eax)
-	#   *(uint32 *) (x + 12) = in3
-	movl	%edi,12(%eax)
-	#   *(uint32 *) (x + 16) = in4
-	movl	%ebp,16(%eax)
-	#   kbits - 256
-	cmp	$256,%edx
-	#   goto kbits128 if unsigned<
-	jb	._kbits128
-._kbits256:
-	#     in11 = *(uint32 *) (k + 16)
-	movl	16(%ecx),%edx
-	#     in12 = *(uint32 *) (k + 20)
-	movl	20(%ecx),%ebx
-	#     in13 = *(uint32 *) (k + 24)
-	movl	24(%ecx),%esi
-	#     in14 = *(uint32 *) (k + 28)
-	movl	28(%ecx),%ecx
-	#     *(uint32 *) (x + 44) = in11
-	movl	%edx,44(%eax)
-	#     *(uint32 *) (x + 48) = in12
-	movl	%ebx,48(%eax)
-	#     *(uint32 *) (x + 52) = in13
-	movl	%esi,52(%eax)
-	#     *(uint32 *) (x + 56) = in14
-	movl	%ecx,56(%eax)
-	#     in0 = 1634760805
-	mov	$1634760805,%ecx
-	#     in5 = 857760878
-	mov	$857760878,%edx
-	#     in10 = 2036477234
-	mov	$2036477234,%ebx
-	#     in15 = 1797285236
-	mov	$1797285236,%esi
-	#     *(uint32 *) (x + 0) = in0
-	movl	%ecx,0(%eax)
-	#     *(uint32 *) (x + 20) = in5
-	movl	%edx,20(%eax)
-	#     *(uint32 *) (x + 40) = in10
-	movl	%ebx,40(%eax)
-	#     *(uint32 *) (x + 60) = in15
-	movl	%esi,60(%eax)
-	#   goto keysetupdone
-	jmp	._keysetupdone
-._kbits128:
-	#     in11 = *(uint32 *) (k + 0)
-	movl	0(%ecx),%edx
-	#     in12 = *(uint32 *) (k + 4)
-	movl	4(%ecx),%ebx
-	#     in13 = *(uint32 *) (k + 8)
-	movl	8(%ecx),%esi
-	#     in14 = *(uint32 *) (k + 12)
-	movl	12(%ecx),%ecx
-	#     *(uint32 *) (x + 44) = in11
-	movl	%edx,44(%eax)
-	#     *(uint32 *) (x + 48) = in12
-	movl	%ebx,48(%eax)
-	#     *(uint32 *) (x + 52) = in13
-	movl	%esi,52(%eax)
-	#     *(uint32 *) (x + 56) = in14
-	movl	%ecx,56(%eax)
-	#     in0 = 1634760805
-	mov	$1634760805,%ecx
-	#     in5 = 824206446
-	mov	$824206446,%edx
-	#     in10 = 2036477238
-	mov	$2036477238,%ebx
-	#     in15 = 1797285236
-	mov	$1797285236,%esi
-	#     *(uint32 *) (x + 0) = in0
-	movl	%ecx,0(%eax)
-	#     *(uint32 *) (x + 20) = in5
-	movl	%edx,20(%eax)
-	#     *(uint32 *) (x + 40) = in10
-	movl	%ebx,40(%eax)
-	#     *(uint32 *) (x + 60) = in15
-	movl	%esi,60(%eax)
-._keysetupdone:
-	#   eax = eax_stack
-	movl	64(%esp),%eax
-	#   ebx = ebx_stack
-	movl	68(%esp),%ebx
-	#   esi = esi_stack
-	movl	72(%esp),%esi
-	#   edi = edi_stack
-	movl	76(%esp),%edi
-	#   ebp = ebp_stack
-	movl	80(%esp),%ebp
-	# leave
-	add	%eax,%esp
-	ret
-# enter ECRYPT_ivsetup
-.text
-.p2align 5
-.globl ECRYPT_ivsetup
-ECRYPT_ivsetup:
-	mov	%esp,%eax
-	and	$31,%eax
-	add	$256,%eax
-	sub	%eax,%esp
-	#   eax_stack = eax
-	movl	%eax,64(%esp)
-	#   ebx_stack = ebx
-	movl	%ebx,68(%esp)
-	#   esi_stack = esi
-	movl	%esi,72(%esp)
-	#   edi_stack = edi
-	movl	%edi,76(%esp)
-	#   ebp_stack = ebp
-	movl	%ebp,80(%esp)
-	#   iv = arg2
-	movl	8(%esp,%eax),%ecx
-	#   x = arg1
-	movl	4(%esp,%eax),%eax
-	#   in6 = *(uint32 *) (iv + 0)
-	movl	0(%ecx),%edx
-	#   in7 = *(uint32 *) (iv + 4)
-	movl	4(%ecx),%ecx
-	#   in8 = 0
-	mov	$0,%ebx
-	#   in9 = 0
-	mov	$0,%esi
-	#   *(uint32 *) (x + 24) = in6
-	movl	%edx,24(%eax)
-	#   *(uint32 *) (x + 28) = in7
-	movl	%ecx,28(%eax)
-	#   *(uint32 *) (x + 32) = in8
-	movl	%ebx,32(%eax)
-	#   *(uint32 *) (x + 36) = in9
-	movl	%esi,36(%eax)
-	#   eax = eax_stack
-	movl	64(%esp),%eax
-	#   ebx = ebx_stack
-	movl	68(%esp),%ebx
-	#   esi = esi_stack
-	movl	72(%esp),%esi
-	#   edi = edi_stack
-	movl	76(%esp),%edi
-	#   ebp = ebp_stack
-	movl	80(%esp),%ebp
-	# leave
-	add	%eax,%esp
-	ret
diff --git a/ANDROID_3.4.5/arch/x86/crypto/salsa20-x86_64-asm_64.S b/ANDROID_3.4.5/arch/x86/crypto/salsa20-x86_64-asm_64.S
deleted file mode 100644
index 6214a9b0..00000000
--- a/ANDROID_3.4.5/arch/x86/crypto/salsa20-x86_64-asm_64.S
+++ /dev/null
@@ -1,920 +0,0 @@
-# enter ECRYPT_encrypt_bytes
-.text
-.p2align 5
-.globl ECRYPT_encrypt_bytes
-ECRYPT_encrypt_bytes:
-	mov	%rsp,%r11
-	and	$31,%r11
-	add	$256,%r11
-	sub	%r11,%rsp
-	# x = arg1
-	mov	%rdi,%r8
-	# m = arg2
-	mov	%rsi,%rsi
-	# out = arg3
-	mov	%rdx,%rdi
-	# bytes = arg4
-	mov	%rcx,%rdx
-	#               unsigned>? bytes - 0
-	cmp	$0,%rdx
-	# comment:fp stack unchanged by jump
-	# goto done if !unsigned>
-	jbe	._done
-	# comment:fp stack unchanged by fallthrough
-# start:
-._start:
-	# r11_stack = r11
-	movq	%r11,0(%rsp)
-	# r12_stack = r12
-	movq	%r12,8(%rsp)
-	# r13_stack = r13
-	movq	%r13,16(%rsp)
-	# r14_stack = r14
-	movq	%r14,24(%rsp)
-	# r15_stack = r15
-	movq	%r15,32(%rsp)
-	# rbx_stack = rbx
-	movq	%rbx,40(%rsp)
-	# rbp_stack = rbp
-	movq	%rbp,48(%rsp)
-	# in0 = *(uint64 *) (x + 0)
-	movq	0(%r8),%rcx
-	# in2 = *(uint64 *) (x + 8)
-	movq	8(%r8),%r9
-	# in4 = *(uint64 *) (x + 16)
-	movq	16(%r8),%rax
-	# in6 = *(uint64 *) (x + 24)
-	movq	24(%r8),%r10
-	# in8 = *(uint64 *) (x + 32)
-	movq	32(%r8),%r11
-	# in10 = *(uint64 *) (x + 40)
-	movq	40(%r8),%r12
-	# in12 = *(uint64 *) (x + 48)
-	movq	48(%r8),%r13
-	# in14 = *(uint64 *) (x + 56)
-	movq	56(%r8),%r14
-	# j0 = in0
-	movq	%rcx,56(%rsp)
-	# j2 = in2
-	movq	%r9,64(%rsp)
-	# j4 = in4
-	movq	%rax,72(%rsp)
-	# j6 = in6
-	movq	%r10,80(%rsp)
-	# j8 = in8
-	movq	%r11,88(%rsp)
-	# j10 = in10
-	movq	%r12,96(%rsp)
-	# j12 = in12
-	movq	%r13,104(%rsp)
-	# j14 = in14
-	movq	%r14,112(%rsp)
-	# x_backup = x
-	movq	%r8,120(%rsp)
-# bytesatleast1:
-._bytesatleast1:
-	#                   unsigned<? bytes - 64
-	cmp	$64,%rdx
-	# comment:fp stack unchanged by jump
-	#   goto nocopy if !unsigned<
-	jae	._nocopy
-	#     ctarget = out
-	movq	%rdi,128(%rsp)
-	#     out = &tmp
-	leaq	192(%rsp),%rdi
-	#     i = bytes
-	mov	%rdx,%rcx
-	#     while (i) { *out++ = *m++; --i }
-	rep	movsb
-	#     out = &tmp
-	leaq	192(%rsp),%rdi
-	#     m = &tmp
-	leaq	192(%rsp),%rsi
-	# comment:fp stack unchanged by fallthrough
-#   nocopy:
-._nocopy:
-	#   out_backup = out
-	movq	%rdi,136(%rsp)
-	#   m_backup = m
-	movq	%rsi,144(%rsp)
-	#   bytes_backup = bytes
-	movq	%rdx,152(%rsp)
-	#   x1 = j0
-	movq	56(%rsp),%rdi
-	#   x0 = x1
-	mov	%rdi,%rdx
-	#   (uint64) x1 >>= 32
-	shr	$32,%rdi
-	#   		x3 = j2
-	movq	64(%rsp),%rsi
-	#   		x2 = x3
-	mov	%rsi,%rcx
-	#   		(uint64) x3 >>= 32
-	shr	$32,%rsi
-	#   x5 = j4
-	movq	72(%rsp),%r8
-	#   x4 = x5
-	mov	%r8,%r9
-	#   (uint64) x5 >>= 32
-	shr	$32,%r8
-	#   x5_stack = x5
-	movq	%r8,160(%rsp)
-	#   		x7 = j6
-	movq	80(%rsp),%r8
-	#   		x6 = x7
-	mov	%r8,%rax
-	#   		(uint64) x7 >>= 32
-	shr	$32,%r8
-	#   x9 = j8
-	movq	88(%rsp),%r10
-	#   x8 = x9
-	mov	%r10,%r11
-	#   (uint64) x9 >>= 32
-	shr	$32,%r10
-	#   		x11 = j10
-	movq	96(%rsp),%r12
-	#   		x10 = x11
-	mov	%r12,%r13
-	#   		x10_stack = x10
-	movq	%r13,168(%rsp)
-	#   		(uint64) x11 >>= 32
-	shr	$32,%r12
-	#   x13 = j12
-	movq	104(%rsp),%r13
-	#   x12 = x13
-	mov	%r13,%r14
-	#   (uint64) x13 >>= 32
-	shr	$32,%r13
-	#   		x15 = j14
-	movq	112(%rsp),%r15
-	#   		x14 = x15
-	mov	%r15,%rbx
-	#   		(uint64) x15 >>= 32
-	shr	$32,%r15
-	#   		x15_stack = x15
-	movq	%r15,176(%rsp)
-	#   i = 20
-	mov	$20,%r15
-#   mainloop:
-._mainloop:
-	#   i_backup = i
-	movq	%r15,184(%rsp)
-	# 		x5 = x5_stack
-	movq	160(%rsp),%r15
-	# a = x12 + x0
-	lea	(%r14,%rdx),%rbp
-	# (uint32) a <<<= 7
-	rol	$7,%ebp
-	# x4 ^= a
-	xor	%rbp,%r9
-	# 		b = x1 + x5
-	lea	(%rdi,%r15),%rbp
-	# 		(uint32) b <<<= 7
-	rol	$7,%ebp
-	# 		x9 ^= b
-	xor	%rbp,%r10
-	# a = x0 + x4
-	lea	(%rdx,%r9),%rbp
-	# (uint32) a <<<= 9
-	rol	$9,%ebp
-	# x8 ^= a
-	xor	%rbp,%r11
-	# 		b = x5 + x9
-	lea	(%r15,%r10),%rbp
-	# 		(uint32) b <<<= 9
-	rol	$9,%ebp
-	# 		x13 ^= b
-	xor	%rbp,%r13
-	# a = x4 + x8
-	lea	(%r9,%r11),%rbp
-	# (uint32) a <<<= 13
-	rol	$13,%ebp
-	# x12 ^= a
-	xor	%rbp,%r14
-	# 		b = x9 + x13
-	lea	(%r10,%r13),%rbp
-	# 		(uint32) b <<<= 13
-	rol	$13,%ebp
-	# 		x1 ^= b
-	xor	%rbp,%rdi
-	# a = x8 + x12
-	lea	(%r11,%r14),%rbp
-	# (uint32) a <<<= 18
-	rol	$18,%ebp
-	# x0 ^= a
-	xor	%rbp,%rdx
-	# 		b = x13 + x1
-	lea	(%r13,%rdi),%rbp
-	# 		(uint32) b <<<= 18
-	rol	$18,%ebp
-	# 		x5 ^= b
-	xor	%rbp,%r15
-	# 				x10 = x10_stack
-	movq	168(%rsp),%rbp
-	# 		x5_stack = x5
-	movq	%r15,160(%rsp)
-	# 				c = x6 + x10
-	lea	(%rax,%rbp),%r15
-	# 				(uint32) c <<<= 7
-	rol	$7,%r15d
-	# 				x14 ^= c
-	xor	%r15,%rbx
-	# 				c = x10 + x14
-	lea	(%rbp,%rbx),%r15
-	# 				(uint32) c <<<= 9
-	rol	$9,%r15d
-	# 				x2 ^= c
-	xor	%r15,%rcx
-	# 				c = x14 + x2
-	lea	(%rbx,%rcx),%r15
-	# 				(uint32) c <<<= 13
-	rol	$13,%r15d
-	# 				x6 ^= c
-	xor	%r15,%rax
-	# 				c = x2 + x6
-	lea	(%rcx,%rax),%r15
-	# 				(uint32) c <<<= 18
-	rol	$18,%r15d
-	# 				x10 ^= c
-	xor	%r15,%rbp
-	# 						x15 = x15_stack
-	movq	176(%rsp),%r15
-	# 				x10_stack = x10
-	movq	%rbp,168(%rsp)
-	# 						d = x11 + x15
-	lea	(%r12,%r15),%rbp
-	# 						(uint32) d <<<= 7
-	rol	$7,%ebp
-	# 						x3 ^= d
-	xor	%rbp,%rsi
-	# 						d = x15 + x3
-	lea	(%r15,%rsi),%rbp
-	# 						(uint32) d <<<= 9
-	rol	$9,%ebp
-	# 						x7 ^= d
-	xor	%rbp,%r8
-	# 						d = x3 + x7
-	lea	(%rsi,%r8),%rbp
-	# 						(uint32) d <<<= 13
-	rol	$13,%ebp
-	# 						x11 ^= d
-	xor	%rbp,%r12
-	# 						d = x7 + x11
-	lea	(%r8,%r12),%rbp
-	# 						(uint32) d <<<= 18
-	rol	$18,%ebp
-	# 						x15 ^= d
-	xor	%rbp,%r15
-	# 						x15_stack = x15
-	movq	%r15,176(%rsp)
-	# 		x5 = x5_stack
-	movq	160(%rsp),%r15
-	# a = x3 + x0
-	lea	(%rsi,%rdx),%rbp
-	# (uint32) a <<<= 7
-	rol	$7,%ebp
-	# x1 ^= a
-	xor	%rbp,%rdi
-	# 		b = x4 + x5
-	lea	(%r9,%r15),%rbp
-	# 		(uint32) b <<<= 7
-	rol	$7,%ebp
-	# 		x6 ^= b
-	xor	%rbp,%rax
-	# a = x0 + x1
-	lea	(%rdx,%rdi),%rbp
-	# (uint32) a <<<= 9
-	rol	$9,%ebp
-	# x2 ^= a
-	xor	%rbp,%rcx
-	# 		b = x5 + x6
-	lea	(%r15,%rax),%rbp
-	# 		(uint32) b <<<= 9
-	rol	$9,%ebp
-	# 		x7 ^= b
-	xor	%rbp,%r8
-	# a = x1 + x2
-	lea	(%rdi,%rcx),%rbp
-	# (uint32) a <<<= 13
-	rol	$13,%ebp
-	# x3 ^= a
-	xor	%rbp,%rsi
-	# 		b = x6 + x7
-	lea	(%rax,%r8),%rbp
-	# 		(uint32) b <<<= 13
-	rol	$13,%ebp
-	# 		x4 ^= b
-	xor	%rbp,%r9
-	# a = x2 + x3
-	lea	(%rcx,%rsi),%rbp
-	# (uint32) a <<<= 18
-	rol	$18,%ebp
-	# x0 ^= a
-	xor	%rbp,%rdx
-	# 		b = x7 + x4
-	lea	(%r8,%r9),%rbp
-	# 		(uint32) b <<<= 18
-	rol	$18,%ebp
-	# 		x5 ^= b
-	xor	%rbp,%r15
-	# 				x10 = x10_stack
-	movq	168(%rsp),%rbp
-	# 		x5_stack = x5
-	movq	%r15,160(%rsp)
-	# 				c = x9 + x10
-	lea	(%r10,%rbp),%r15
-	# 				(uint32) c <<<= 7
-	rol	$7,%r15d
-	# 				x11 ^= c
-	xor	%r15,%r12
-	# 				c = x10 + x11
-	lea	(%rbp,%r12),%r15
-	# 				(uint32) c <<<= 9
-	rol	$9,%r15d
-	# 				x8 ^= c
-	xor	%r15,%r11
-	# 				c = x11 + x8
-	lea	(%r12,%r11),%r15
-	# 				(uint32) c <<<= 13
-	rol	$13,%r15d
-	# 				x9 ^= c
-	xor	%r15,%r10
-	# 				c = x8 + x9
-	lea	(%r11,%r10),%r15
-	# 				(uint32) c <<<= 18
-	rol	$18,%r15d
-	# 				x10 ^= c
-	xor	%r15,%rbp
-	# 						x15 = x15_stack
-	movq	176(%rsp),%r15
-	# 				x10_stack = x10
-	movq	%rbp,168(%rsp)
-	# 						d = x14 + x15
-	lea	(%rbx,%r15),%rbp
-	# 						(uint32) d <<<= 7
-	rol	$7,%ebp
-	# 						x12 ^= d
-	xor	%rbp,%r14
-	# 						d = x15 + x12
-	lea	(%r15,%r14),%rbp
-	# 						(uint32) d <<<= 9
-	rol	$9,%ebp
-	# 						x13 ^= d
-	xor	%rbp,%r13
-	# 						d = x12 + x13
-	lea	(%r14,%r13),%rbp
-	# 						(uint32) d <<<= 13
-	rol	$13,%ebp
-	# 						x14 ^= d
-	xor	%rbp,%rbx
-	# 						d = x13 + x14
-	lea	(%r13,%rbx),%rbp
-	# 						(uint32) d <<<= 18
-	rol	$18,%ebp
-	# 						x15 ^= d
-	xor	%rbp,%r15
-	# 						x15_stack = x15
-	movq	%r15,176(%rsp)
-	# 		x5 = x5_stack
-	movq	160(%rsp),%r15
-	# a = x12 + x0
-	lea	(%r14,%rdx),%rbp
-	# (uint32) a <<<= 7
-	rol	$7,%ebp
-	# x4 ^= a
-	xor	%rbp,%r9
-	# 		b = x1 + x5
-	lea	(%rdi,%r15),%rbp
-	# 		(uint32) b <<<= 7
-	rol	$7,%ebp
-	# 		x9 ^= b
-	xor	%rbp,%r10
-	# a = x0 + x4
-	lea	(%rdx,%r9),%rbp
-	# (uint32) a <<<= 9
-	rol	$9,%ebp
-	# x8 ^= a
-	xor	%rbp,%r11
-	# 		b = x5 + x9
-	lea	(%r15,%r10),%rbp
-	# 		(uint32) b <<<= 9
-	rol	$9,%ebp
-	# 		x13 ^= b
-	xor	%rbp,%r13
-	# a = x4 + x8
-	lea	(%r9,%r11),%rbp
-	# (uint32) a <<<= 13
-	rol	$13,%ebp
-	# x12 ^= a
-	xor	%rbp,%r14
-	# 		b = x9 + x13
-	lea	(%r10,%r13),%rbp
-	# 		(uint32) b <<<= 13
-	rol	$13,%ebp
-	# 		x1 ^= b
-	xor	%rbp,%rdi
-	# a = x8 + x12
-	lea	(%r11,%r14),%rbp
-	# (uint32) a <<<= 18
-	rol	$18,%ebp
-	# x0 ^= a
-	xor	%rbp,%rdx
-	# 		b = x13 + x1
-	lea	(%r13,%rdi),%rbp
-	# 		(uint32) b <<<= 18
-	rol	$18,%ebp
-	# 		x5 ^= b
-	xor	%rbp,%r15
-	# 				x10 = x10_stack
-	movq	168(%rsp),%rbp
-	# 		x5_stack = x5
-	movq	%r15,160(%rsp)
-	# 				c = x6 + x10
-	lea	(%rax,%rbp),%r15
-	# 				(uint32) c <<<= 7
-	rol	$7,%r15d
-	# 				x14 ^= c
-	xor	%r15,%rbx
-	# 				c = x10 + x14
-	lea	(%rbp,%rbx),%r15
-	# 				(uint32) c <<<= 9
-	rol	$9,%r15d
-	# 				x2 ^= c
-	xor	%r15,%rcx
-	# 				c = x14 + x2
-	lea	(%rbx,%rcx),%r15
-	# 				(uint32) c <<<= 13
-	rol	$13,%r15d
-	# 				x6 ^= c
-	xor	%r15,%rax
-	# 				c = x2 + x6
-	lea	(%rcx,%rax),%r15
-	# 				(uint32) c <<<= 18
-	rol	$18,%r15d
-	# 				x10 ^= c
-	xor	%r15,%rbp
-	# 						x15 = x15_stack
-	movq	176(%rsp),%r15
-	# 				x10_stack = x10
-	movq	%rbp,168(%rsp)
-	# 						d = x11 + x15
-	lea	(%r12,%r15),%rbp
-	# 						(uint32) d <<<= 7
-	rol	$7,%ebp
-	# 						x3 ^= d
-	xor	%rbp,%rsi
-	# 						d = x15 + x3
-	lea	(%r15,%rsi),%rbp
-	# 						(uint32) d <<<= 9
-	rol	$9,%ebp
-	# 						x7 ^= d
-	xor	%rbp,%r8
-	# 						d = x3 + x7
-	lea	(%rsi,%r8),%rbp
-	# 						(uint32) d <<<= 13
-	rol	$13,%ebp
-	# 						x11 ^= d
-	xor	%rbp,%r12
-	# 						d = x7 + x11
-	lea	(%r8,%r12),%rbp
-	# 						(uint32) d <<<= 18
-	rol	$18,%ebp
-	# 						x15 ^= d
-	xor	%rbp,%r15
-	# 						x15_stack = x15
-	movq	%r15,176(%rsp)
-	# 		x5 = x5_stack
-	movq	160(%rsp),%r15
-	# a = x3 + x0
-	lea	(%rsi,%rdx),%rbp
-	# (uint32) a <<<= 7
-	rol	$7,%ebp
-	# x1 ^= a
-	xor	%rbp,%rdi
-	# 		b = x4 + x5
-	lea	(%r9,%r15),%rbp
-	# 		(uint32) b <<<= 7
-	rol	$7,%ebp
-	# 		x6 ^= b
-	xor	%rbp,%rax
-	# a = x0 + x1
-	lea	(%rdx,%rdi),%rbp
-	# (uint32) a <<<= 9
-	rol	$9,%ebp
-	# x2 ^= a
-	xor	%rbp,%rcx
-	# 		b = x5 + x6
-	lea	(%r15,%rax),%rbp
-	# 		(uint32) b <<<= 9
-	rol	$9,%ebp
-	# 		x7 ^= b
-	xor	%rbp,%r8
-	# a = x1 + x2
-	lea	(%rdi,%rcx),%rbp
-	# (uint32) a <<<= 13
-	rol	$13,%ebp
-	# x3 ^= a
-	xor	%rbp,%rsi
-	# 		b = x6 + x7
-	lea	(%rax,%r8),%rbp
-	# 		(uint32) b <<<= 13
-	rol	$13,%ebp
-	# 		x4 ^= b
-	xor	%rbp,%r9
-	# a = x2 + x3
-	lea	(%rcx,%rsi),%rbp
-	# (uint32) a <<<= 18
-	rol	$18,%ebp
-	# x0 ^= a
-	xor	%rbp,%rdx
-	# 		b = x7 + x4
-	lea	(%r8,%r9),%rbp
-	# 		(uint32) b <<<= 18
-	rol	$18,%ebp
-	# 		x5 ^= b
-	xor	%rbp,%r15
-	# 				x10 = x10_stack
-	movq	168(%rsp),%rbp
-	# 		x5_stack = x5
-	movq	%r15,160(%rsp)
-	# 				c = x9 + x10
-	lea	(%r10,%rbp),%r15
-	# 				(uint32) c <<<= 7
-	rol	$7,%r15d
-	# 				x11 ^= c
-	xor	%r15,%r12
-	# 				c = x10 + x11
-	lea	(%rbp,%r12),%r15
-	# 				(uint32) c <<<= 9
-	rol	$9,%r15d
-	# 				x8 ^= c
-	xor	%r15,%r11
-	# 				c = x11 + x8
-	lea	(%r12,%r11),%r15
-	# 				(uint32) c <<<= 13
-	rol	$13,%r15d
-	# 				x9 ^= c
-	xor	%r15,%r10
-	# 				c = x8 + x9
-	lea	(%r11,%r10),%r15
-	# 				(uint32) c <<<= 18
-	rol	$18,%r15d
-	# 				x10 ^= c
-	xor	%r15,%rbp
-	# 						x15 = x15_stack
-	movq	176(%rsp),%r15
-	# 				x10_stack = x10
-	movq	%rbp,168(%rsp)
-	# 						d = x14 + x15
-	lea	(%rbx,%r15),%rbp
-	# 						(uint32) d <<<= 7
-	rol	$7,%ebp
-	# 						x12 ^= d
-	xor	%rbp,%r14
-	# 						d = x15 + x12
-	lea	(%r15,%r14),%rbp
-	# 						(uint32) d <<<= 9
-	rol	$9,%ebp
-	# 						x13 ^= d
-	xor	%rbp,%r13
-	# 						d = x12 + x13
-	lea	(%r14,%r13),%rbp
-	# 						(uint32) d <<<= 13
-	rol	$13,%ebp
-	# 						x14 ^= d
-	xor	%rbp,%rbx
-	# 						d = x13 + x14
-	lea	(%r13,%rbx),%rbp
-	# 						(uint32) d <<<= 18
-	rol	$18,%ebp
-	# 						x15 ^= d
-	xor	%rbp,%r15
-	# 						x15_stack = x15
-	movq	%r15,176(%rsp)
-	#   i = i_backup
-	movq	184(%rsp),%r15
-	#                  unsigned>? i -= 4
-	sub	$4,%r15
-	# comment:fp stack unchanged by jump
-	# goto mainloop if unsigned>
-	ja	._mainloop
-	#   (uint32) x2 += j2
-	addl	64(%rsp),%ecx
-	#   x3 <<= 32
-	shl	$32,%rsi
-	#   x3 += j2
-	addq	64(%rsp),%rsi
-	#   (uint64) x3 >>= 32
-	shr	$32,%rsi
-	#   x3 <<= 32
-	shl	$32,%rsi
-	#   x2 += x3
-	add	%rsi,%rcx
-	#   (uint32) x6 += j6
-	addl	80(%rsp),%eax
-	#   x7 <<= 32
-	shl	$32,%r8
-	#   x7 += j6
-	addq	80(%rsp),%r8
-	#   (uint64) x7 >>= 32
-	shr	$32,%r8
-	#   x7 <<= 32
-	shl	$32,%r8
-	#   x6 += x7
-	add	%r8,%rax
-	#   (uint32) x8 += j8
-	addl	88(%rsp),%r11d
-	#   x9 <<= 32
-	shl	$32,%r10
-	#   x9 += j8
-	addq	88(%rsp),%r10
-	#   (uint64) x9 >>= 32
-	shr	$32,%r10
-	#   x9 <<= 32
-	shl	$32,%r10
-	#   x8 += x9
-	add	%r10,%r11
-	#   (uint32) x12 += j12
-	addl	104(%rsp),%r14d
-	#   x13 <<= 32
-	shl	$32,%r13
-	#   x13 += j12
-	addq	104(%rsp),%r13
-	#   (uint64) x13 >>= 32
-	shr	$32,%r13
-	#   x13 <<= 32
-	shl	$32,%r13
-	#   x12 += x13
-	add	%r13,%r14
-	#   (uint32) x0 += j0
-	addl	56(%rsp),%edx
-	#   x1 <<= 32
-	shl	$32,%rdi
-	#   x1 += j0
-	addq	56(%rsp),%rdi
-	#   (uint64) x1 >>= 32
-	shr	$32,%rdi
-	#   x1 <<= 32
-	shl	$32,%rdi
-	#   x0 += x1
-	add	%rdi,%rdx
-	#   x5 = x5_stack
-	movq	160(%rsp),%rdi
-	#   (uint32) x4 += j4
-	addl	72(%rsp),%r9d
-	#   x5 <<= 32
-	shl	$32,%rdi
-	#   x5 += j4
-	addq	72(%rsp),%rdi
-	#   (uint64) x5 >>= 32
-	shr	$32,%rdi
-	#   x5 <<= 32
-	shl	$32,%rdi
-	#   x4 += x5
-	add	%rdi,%r9
-	#   x10 = x10_stack
-	movq	168(%rsp),%r8
-	#   (uint32) x10 += j10
-	addl	96(%rsp),%r8d
-	#   x11 <<= 32
-	shl	$32,%r12
-	#   x11 += j10
-	addq	96(%rsp),%r12
-	#   (uint64) x11 >>= 32
-	shr	$32,%r12
-	#   x11 <<= 32
-	shl	$32,%r12
-	#   x10 += x11
-	add	%r12,%r8
-	#   x15 = x15_stack
-	movq	176(%rsp),%rdi
-	#   (uint32) x14 += j14
-	addl	112(%rsp),%ebx
-	#   x15 <<= 32
-	shl	$32,%rdi
-	#   x15 += j14
-	addq	112(%rsp),%rdi
-	#   (uint64) x15 >>= 32
-	shr	$32,%rdi
-	#   x15 <<= 32
-	shl	$32,%rdi
-	#   x14 += x15
-	add	%rdi,%rbx
-	#   out = out_backup
-	movq	136(%rsp),%rdi
-	#   m = m_backup
-	movq	144(%rsp),%rsi
-	#   x0 ^= *(uint64 *) (m + 0)
-	xorq	0(%rsi),%rdx
-	#   *(uint64 *) (out + 0) = x0
-	movq	%rdx,0(%rdi)
-	#   x2 ^= *(uint64 *) (m + 8)
-	xorq	8(%rsi),%rcx
-	#   *(uint64 *) (out + 8) = x2
-	movq	%rcx,8(%rdi)
-	#   x4 ^= *(uint64 *) (m + 16)
-	xorq	16(%rsi),%r9
-	#   *(uint64 *) (out + 16) = x4
-	movq	%r9,16(%rdi)
-	#   x6 ^= *(uint64 *) (m + 24)
-	xorq	24(%rsi),%rax
-	#   *(uint64 *) (out + 24) = x6
-	movq	%rax,24(%rdi)
-	#   x8 ^= *(uint64 *) (m + 32)
-	xorq	32(%rsi),%r11
-	#   *(uint64 *) (out + 32) = x8
-	movq	%r11,32(%rdi)
-	#   x10 ^= *(uint64 *) (m + 40)
-	xorq	40(%rsi),%r8
-	#   *(uint64 *) (out + 40) = x10
-	movq	%r8,40(%rdi)
-	#   x12 ^= *(uint64 *) (m + 48)
-	xorq	48(%rsi),%r14
-	#   *(uint64 *) (out + 48) = x12
-	movq	%r14,48(%rdi)
-	#   x14 ^= *(uint64 *) (m + 56)
-	xorq	56(%rsi),%rbx
-	#   *(uint64 *) (out + 56) = x14
-	movq	%rbx,56(%rdi)
-	#   bytes = bytes_backup
-	movq	152(%rsp),%rdx
-	#   in8 = j8
-	movq	88(%rsp),%rcx
-	#   in8 += 1
-	add	$1,%rcx
-	#   j8 = in8
-	movq	%rcx,88(%rsp)
-	#                          unsigned>? unsigned<? bytes - 64
-	cmp	$64,%rdx
-	# comment:fp stack unchanged by jump
-	#   goto bytesatleast65 if unsigned>
-	ja	._bytesatleast65
-	# comment:fp stack unchanged by jump
-	#     goto bytesatleast64 if !unsigned<
-	jae	._bytesatleast64
-	#       m = out
-	mov	%rdi,%rsi
-	#       out = ctarget
-	movq	128(%rsp),%rdi
-	#       i = bytes
-	mov	%rdx,%rcx
-	#       while (i) { *out++ = *m++; --i }
-	rep	movsb
-	# comment:fp stack unchanged by fallthrough
-#     bytesatleast64:
-._bytesatleast64:
-	#     x = x_backup
-	movq	120(%rsp),%rdi
-	#     in8 = j8
-	movq	88(%rsp),%rsi
-	#     *(uint64 *) (x + 32) = in8
-	movq	%rsi,32(%rdi)
-	#     r11 = r11_stack
-	movq	0(%rsp),%r11
-	#     r12 = r12_stack
-	movq	8(%rsp),%r12
-	#     r13 = r13_stack
-	movq	16(%rsp),%r13
-	#     r14 = r14_stack
-	movq	24(%rsp),%r14
-	#     r15 = r15_stack
-	movq	32(%rsp),%r15
-	#     rbx = rbx_stack
-	movq	40(%rsp),%rbx
-	#     rbp = rbp_stack
-	movq	48(%rsp),%rbp
-	# comment:fp stack unchanged by fallthrough
-#     done:
-._done:
-	#     leave
-	add	%r11,%rsp
-	mov	%rdi,%rax
-	mov	%rsi,%rdx
-	ret
-#   bytesatleast65:
-._bytesatleast65:
-	#   bytes -= 64
-	sub	$64,%rdx
-	#   out += 64
-	add	$64,%rdi
-	#   m += 64
-	add	$64,%rsi
-	# comment:fp stack unchanged by jump
-	# goto bytesatleast1
-	jmp	._bytesatleast1
-# enter ECRYPT_keysetup
-.text
-.p2align 5
-.globl ECRYPT_keysetup
-ECRYPT_keysetup:
-	mov	%rsp,%r11
-	and	$31,%r11
-	add	$256,%r11
-	sub	%r11,%rsp
-	#   k = arg2
-	mov	%rsi,%rsi
-	#   kbits = arg3
-	mov	%rdx,%rdx
-	#   x = arg1
-	mov	%rdi,%rdi
-	#   in0 = *(uint64 *) (k + 0)
-	movq	0(%rsi),%r8
-	#   in2 = *(uint64 *) (k + 8)
-	movq	8(%rsi),%r9
-	#   *(uint64 *) (x + 4) = in0
-	movq	%r8,4(%rdi)
-	#   *(uint64 *) (x + 12) = in2
-	movq	%r9,12(%rdi)
-	#                    unsigned<? kbits - 256
-	cmp	$256,%rdx
-	# comment:fp stack unchanged by jump
-	#   goto kbits128 if unsigned<
-	jb	._kbits128
-#   kbits256:
-._kbits256:
-	#     in10 = *(uint64 *) (k + 16)
-	movq	16(%rsi),%rdx
-	#     in12 = *(uint64 *) (k + 24)
-	movq	24(%rsi),%rsi
-	#     *(uint64 *) (x + 44) = in10
-	movq	%rdx,44(%rdi)
-	#     *(uint64 *) (x + 52) = in12
-	movq	%rsi,52(%rdi)
-	#     in0 = 1634760805
-	mov	$1634760805,%rsi
-	#     in4 = 857760878
-	mov	$857760878,%rdx
-	#     in10 = 2036477234
-	mov	$2036477234,%rcx
-	#     in14 = 1797285236
-	mov	$1797285236,%r8
-	#     *(uint32 *) (x + 0) = in0
-	movl	%esi,0(%rdi)
-	#     *(uint32 *) (x + 20) = in4
-	movl	%edx,20(%rdi)
-	#     *(uint32 *) (x + 40) = in10
-	movl	%ecx,40(%rdi)
-	#     *(uint32 *) (x + 60) = in14
-	movl	%r8d,60(%rdi)
-	# comment:fp stack unchanged by jump
-	#   goto keysetupdone
-	jmp	._keysetupdone
-#   kbits128:
-._kbits128:
-	#     in10 = *(uint64 *) (k + 0)
-	movq	0(%rsi),%rdx
-	#     in12 = *(uint64 *) (k + 8)
-	movq	8(%rsi),%rsi
-	#     *(uint64 *) (x + 44) = in10
-	movq	%rdx,44(%rdi)
-	#     *(uint64 *) (x + 52) = in12
-	movq	%rsi,52(%rdi)
-	#     in0 = 1634760805
-	mov	$1634760805,%rsi
-	#     in4 = 824206446
-	mov	$824206446,%rdx
-	#     in10 = 2036477238
-	mov	$2036477238,%rcx
-	#     in14 = 1797285236
-	mov	$1797285236,%r8
-	#     *(uint32 *) (x + 0) = in0
-	movl	%esi,0(%rdi)
-	#     *(uint32 *) (x + 20) = in4
-	movl	%edx,20(%rdi)
-	#     *(uint32 *) (x + 40) = in10
-	movl	%ecx,40(%rdi)
-	#     *(uint32 *) (x + 60) = in14
-	movl	%r8d,60(%rdi)
-#   keysetupdone:
-._keysetupdone:
-	# leave
-	add	%r11,%rsp
-	mov	%rdi,%rax
-	mov	%rsi,%rdx
-	ret
-# enter ECRYPT_ivsetup
-.text
-.p2align 5
-.globl ECRYPT_ivsetup
-ECRYPT_ivsetup:
-	mov	%rsp,%r11
-	and	$31,%r11
-	add	$256,%r11
-	sub	%r11,%rsp
-	#   iv = arg2
-	mov	%rsi,%rsi
-	#   x = arg1
-	mov	%rdi,%rdi
-	#   in6 = *(uint64 *) (iv + 0)
-	movq	0(%rsi),%rsi
-	#   in8 = 0
-	mov	$0,%r8
-	#   *(uint64 *) (x + 24) = in6
-	movq	%rsi,24(%rdi)
-	#   *(uint64 *) (x + 32) = in8
-	movq	%r8,32(%rdi)
-	# leave
-	add	%r11,%rsp
-	mov	%rdi,%rax
-	mov	%rsi,%rdx
-	ret
diff --git a/ANDROID_3.4.5/arch/x86/crypto/salsa20_glue.c b/ANDROID_3.4.5/arch/x86/crypto/salsa20_glue.c
deleted file mode 100644
index bccb76d8..00000000
--- a/ANDROID_3.4.5/arch/x86/crypto/salsa20_glue.c
+++ /dev/null
@@ -1,129 +0,0 @@
-/*
- * Glue code for optimized assembly version of  Salsa20.
- *
- * Copyright (c) 2007 Tan Swee Heng <thesweeheng@gmail.com>
- *
- * The assembly codes are public domain assembly codes written by Daniel. J.
- * Bernstein <djb@cr.yp.to>. The codes are modified to include indentation
- * and to remove extraneous comments and functions that are not needed.
- * - i586 version, renamed as salsa20-i586-asm_32.S
- *   available from <http://cr.yp.to/snuffle/salsa20/x86-pm/salsa20.s>
- * - x86-64 version, renamed as salsa20-x86_64-asm_64.S
- *   available from <http://cr.yp.to/snuffle/salsa20/amd64-3/salsa20.s>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
- *
- */
-
-#include <crypto/algapi.h>
-#include <linux/module.h>
-#include <linux/crypto.h>
-
-#define SALSA20_IV_SIZE        8U
-#define SALSA20_MIN_KEY_SIZE  16U
-#define SALSA20_MAX_KEY_SIZE  32U
-
-// use the ECRYPT_* function names
-#define salsa20_keysetup        ECRYPT_keysetup
-#define salsa20_ivsetup         ECRYPT_ivsetup
-#define salsa20_encrypt_bytes   ECRYPT_encrypt_bytes
-
-struct salsa20_ctx
-{
-	u32 input[16];
-};
-
-asmlinkage void salsa20_keysetup(struct salsa20_ctx *ctx, const u8 *k,
-				 u32 keysize, u32 ivsize);
-asmlinkage void salsa20_ivsetup(struct salsa20_ctx *ctx, const u8 *iv);
-asmlinkage void salsa20_encrypt_bytes(struct salsa20_ctx *ctx,
-				      const u8 *src, u8 *dst, u32 bytes);
-
-static int setkey(struct crypto_tfm *tfm, const u8 *key,
-		  unsigned int keysize)
-{
-	struct salsa20_ctx *ctx = crypto_tfm_ctx(tfm);
-	salsa20_keysetup(ctx, key, keysize*8, SALSA20_IV_SIZE*8);
-	return 0;
-}
-
-static int encrypt(struct blkcipher_desc *desc,
-		   struct scatterlist *dst, struct scatterlist *src,
-		   unsigned int nbytes)
-{
-	struct blkcipher_walk walk;
-	struct crypto_blkcipher *tfm = desc->tfm;
-	struct salsa20_ctx *ctx = crypto_blkcipher_ctx(tfm);
-	int err;
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt_block(desc, &walk, 64);
-
-	salsa20_ivsetup(ctx, walk.iv);
-
-	if (likely(walk.nbytes == nbytes))
-	{
-		salsa20_encrypt_bytes(ctx, walk.src.virt.addr,
-				      walk.dst.virt.addr, nbytes);
-		return blkcipher_walk_done(desc, &walk, 0);
-	}
-
-	while (walk.nbytes >= 64) {
-		salsa20_encrypt_bytes(ctx, walk.src.virt.addr,
-				      walk.dst.virt.addr,
-				      walk.nbytes - (walk.nbytes % 64));
-		err = blkcipher_walk_done(desc, &walk, walk.nbytes % 64);
-	}
-
-	if (walk.nbytes) {
-		salsa20_encrypt_bytes(ctx, walk.src.virt.addr,
-				      walk.dst.virt.addr, walk.nbytes);
-		err = blkcipher_walk_done(desc, &walk, 0);
-	}
-
-	return err;
-}
-
-static struct crypto_alg alg = {
-	.cra_name           =   "salsa20",
-	.cra_driver_name    =   "salsa20-asm",
-	.cra_priority       =   200,
-	.cra_flags          =   CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_type           =   &crypto_blkcipher_type,
-	.cra_blocksize      =   1,
-	.cra_ctxsize        =   sizeof(struct salsa20_ctx),
-	.cra_alignmask      =	3,
-	.cra_module         =   THIS_MODULE,
-	.cra_list           =   LIST_HEAD_INIT(alg.cra_list),
-	.cra_u              =   {
-		.blkcipher = {
-			.setkey         =   setkey,
-			.encrypt        =   encrypt,
-			.decrypt        =   encrypt,
-			.min_keysize    =   SALSA20_MIN_KEY_SIZE,
-			.max_keysize    =   SALSA20_MAX_KEY_SIZE,
-			.ivsize         =   SALSA20_IV_SIZE,
-		}
-	}
-};
-
-static int __init init(void)
-{
-	return crypto_register_alg(&alg);
-}
-
-static void __exit fini(void)
-{
-	crypto_unregister_alg(&alg);
-}
-
-module_init(init);
-module_exit(fini);
-
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION ("Salsa20 stream cipher algorithm (optimized assembly version)");
-MODULE_ALIAS("salsa20");
-MODULE_ALIAS("salsa20-asm");
diff --git a/ANDROID_3.4.5/arch/x86/crypto/serpent-sse2-i586-asm_32.S b/ANDROID_3.4.5/arch/x86/crypto/serpent-sse2-i586-asm_32.S
deleted file mode 100644
index c00053d4..00000000
--- a/ANDROID_3.4.5/arch/x86/crypto/serpent-sse2-i586-asm_32.S
+++ /dev/null
@@ -1,635 +0,0 @@
-/*
- * Serpent Cipher 4-way parallel algorithm (i586/SSE2)
- *
- * Copyright (C) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
- *
- * Based on crypto/serpent.c by
- *  Copyright (C) 2002 Dag Arne Osvik <osvik@ii.uib.no>
- *                2003 Herbert Valerio Riedel <hvr@gnu.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307
- * USA
- *
- */
-
-.file "serpent-sse2-i586-asm_32.S"
-.text
-
-#define arg_ctx 4
-#define arg_dst 8
-#define arg_src 12
-#define arg_xor 16
-
-/**********************************************************************
-  4-way SSE2 serpent
- **********************************************************************/
-#define CTX %edx
-
-#define RA %xmm0
-#define RB %xmm1
-#define RC %xmm2
-#define RD %xmm3
-#define RE %xmm4
-
-#define RT0 %xmm5
-#define RT1 %xmm6
-
-#define RNOT %xmm7
-
-#define get_key(i, j, t) \
-	movd (4*(i)+(j))*4(CTX), t; \
-	pshufd $0, t, t;
-
-#define K(x0, x1, x2, x3, x4, i) \
-	get_key(i, 0, x4); \
-	get_key(i, 1, RT0); \
-	get_key(i, 2, RT1); \
-	pxor x4,		x0; \
-	pxor RT0,		x1; \
-	pxor RT1,		x2; \
-	get_key(i, 3, x4); \
-	pxor x4,		x3;
-
-#define LK(x0, x1, x2, x3, x4, i) \
-	movdqa x0,		x4; \
-	pslld $13,		x0; \
-	psrld $(32 - 13),	x4; \
-	por x4,			x0; \
-	pxor x0,		x1; \
-	movdqa x2,		x4; \
-	pslld $3,		x2; \
-	psrld $(32 - 3),	x4; \
-	por x4,			x2; \
-	pxor x2,		x1; \
-	movdqa x1,		x4; \
-	pslld $1,		x1; \
-	psrld $(32 - 1),	x4; \
-	por x4,			x1; \
-	movdqa x0,		x4; \
-	pslld $3,		x4; \
-	pxor x2,		x3; \
-	pxor x4,		x3; \
-	movdqa x3,		x4; \
-	pslld $7,		x3; \
-	psrld $(32 - 7),	x4; \
-	por x4,			x3; \
-	movdqa x1,		x4; \
-	pslld $7,		x4; \
-	pxor x1,		x0; \
-	pxor x3,		x0; \
-	pxor x3,		x2; \
-	pxor x4,		x2; \
-	movdqa x0,		x4; \
-	get_key(i, 1, RT0); \
-	pxor RT0,		x1; \
-	get_key(i, 3, RT0); \
-	pxor RT0,		x3; \
-	pslld $5,		x0; \
-	psrld $(32 - 5),	x4; \
-	por x4,			x0; \
-	movdqa x2,		x4; \
-	pslld $22,		x2; \
-	psrld $(32 - 22),	x4; \
-	por x4,			x2; \
-	get_key(i, 0, RT0); \
-	pxor RT0,		x0; \
-	get_key(i, 2, RT0); \
-	pxor RT0,		x2;
-
-#define KL(x0, x1, x2, x3, x4, i) \
-	K(x0, x1, x2, x3, x4, i); \
-	movdqa x0,		x4; \
-	psrld $5,		x0; \
-	pslld $(32 - 5),	x4; \
-	por x4,			x0; \
-	movdqa x2,		x4; \
-	psrld $22,		x2; \
-	pslld $(32 - 22),	x4; \
-	por x4,			x2; \
-	pxor x3,		x2; \
-	pxor x3,		x0; \
-	movdqa x1,		x4; \
-	pslld $7,		x4; \
-	pxor x1,		x0; \
-	pxor x4,		x2; \
-	movdqa x1,		x4; \
-	psrld $1,		x1; \
-	pslld $(32 - 1),	x4; \
-	por x4,			x1; \
-	movdqa x3,		x4; \
-	psrld $7,		x3; \
-	pslld $(32 - 7),	x4; \
-	por x4,			x3; \
-	pxor x0,		x1; \
-	movdqa x0,		x4; \
-	pslld $3,		x4; \
-	pxor x4,		x3; \
-	movdqa x0,		x4; \
-	psrld $13,		x0; \
-	pslld $(32 - 13),	x4; \
-	por x4,			x0; \
-	pxor x2,		x1; \
-	pxor x2,		x3; \
-	movdqa x2,		x4; \
-	psrld $3,		x2; \
-	pslld $(32 - 3),	x4; \
-	por x4,			x2;
-
-#define S0(x0, x1, x2, x3, x4) \
-	movdqa x3,		x4; \
-	por x0,			x3; \
-	pxor x4,		x0; \
-	pxor x2,		x4; \
-	pxor RNOT,		x4; \
-	pxor x1,		x3; \
-	pand x0,		x1; \
-	pxor x4,		x1; \
-	pxor x0,		x2; \
-	pxor x3,		x0; \
-	por x0,			x4; \
-	pxor x2,		x0; \
-	pand x1,		x2; \
-	pxor x2,		x3; \
-	pxor RNOT,		x1; \
-	pxor x4,		x2; \
-	pxor x2,		x1;
-
-#define S1(x0, x1, x2, x3, x4) \
-	movdqa x1,		x4; \
-	pxor x0,		x1; \
-	pxor x3,		x0; \
-	pxor RNOT,		x3; \
-	pand x1,		x4; \
-	por x1,			x0; \
-	pxor x2,		x3; \
-	pxor x3,		x0; \
-	pxor x3,		x1; \
-	pxor x4,		x3; \
-	por x4,			x1; \
-	pxor x2,		x4; \
-	pand x0,		x2; \
-	pxor x1,		x2; \
-	por x0,			x1; \
-	pxor RNOT,		x0; \
-	pxor x2,		x0; \
-	pxor x1,		x4;
-
-#define S2(x0, x1, x2, x3, x4) \
-	pxor RNOT,		x3; \
-	pxor x0,		x1; \
-	movdqa x0,		x4; \
-	pand x2,		x0; \
-	pxor x3,		x0; \
-	por x4,			x3; \
-	pxor x1,		x2; \
-	pxor x1,		x3; \
-	pand x0,		x1; \
-	pxor x2,		x0; \
-	pand x3,		x2; \
-	por x1,			x3; \
-	pxor RNOT,		x0; \
-	pxor x0,		x3; \
-	pxor x0,		x4; \
-	pxor x2,		x0; \
-	por x2,			x1;
-
-#define S3(x0, x1, x2, x3, x4) \
-	movdqa x1,		x4; \
-	pxor x3,		x1; \
-	por x0,			x3; \
-	pand x0,		x4; \
-	pxor x2,		x0; \
-	pxor x1,		x2; \
-	pand x3,		x1; \
-	pxor x3,		x2; \
-	por x4,			x0; \
-	pxor x3,		x4; \
-	pxor x0,		x1; \
-	pand x3,		x0; \
-	pand x4,		x3; \
-	pxor x2,		x3; \
-	por x1,			x4; \
-	pand x1,		x2; \
-	pxor x3,		x4; \
-	pxor x3,		x0; \
-	pxor x2,		x3;
-
-#define S4(x0, x1, x2, x3, x4) \
-	movdqa x3,		x4; \
-	pand x0,		x3; \
-	pxor x4,		x0; \
-	pxor x2,		x3; \
-	por x4,			x2; \
-	pxor x1,		x0; \
-	pxor x3,		x4; \
-	por x0,			x2; \
-	pxor x1,		x2; \
-	pand x0,		x1; \
-	pxor x4,		x1; \
-	pand x2,		x4; \
-	pxor x3,		x2; \
-	pxor x0,		x4; \
-	por x1,			x3; \
-	pxor RNOT,		x1; \
-	pxor x0,		x3;
-
-#define S5(x0, x1, x2, x3, x4) \
-	movdqa x1,		x4; \
-	por x0,			x1; \
-	pxor x1,		x2; \
-	pxor RNOT,		x3; \
-	pxor x0,		x4; \
-	pxor x2,		x0; \
-	pand x4,		x1; \
-	por x3,			x4; \
-	pxor x0,		x4; \
-	pand x3,		x0; \
-	pxor x3,		x1; \
-	pxor x2,		x3; \
-	pxor x1,		x0; \
-	pand x4,		x2; \
-	pxor x2,		x1; \
-	pand x0,		x2; \
-	pxor x2,		x3;
-
-#define S6(x0, x1, x2, x3, x4) \
-	movdqa x1,		x4; \
-	pxor x0,		x3; \
-	pxor x2,		x1; \
-	pxor x0,		x2; \
-	pand x3,		x0; \
-	por x3,			x1; \
-	pxor RNOT,		x4; \
-	pxor x1,		x0; \
-	pxor x2,		x1; \
-	pxor x4,		x3; \
-	pxor x0,		x4; \
-	pand x0,		x2; \
-	pxor x1,		x4; \
-	pxor x3,		x2; \
-	pand x1,		x3; \
-	pxor x0,		x3; \
-	pxor x2,		x1;
-
-#define S7(x0, x1, x2, x3, x4) \
-	pxor RNOT,		x1; \
-	movdqa x1,		x4; \
-	pxor RNOT,		x0; \
-	pand x2,		x1; \
-	pxor x3,		x1; \
-	por x4,			x3; \
-	pxor x2,		x4; \
-	pxor x3,		x2; \
-	pxor x0,		x3; \
-	por x1,			x0; \
-	pand x0,		x2; \
-	pxor x4,		x0; \
-	pxor x3,		x4; \
-	pand x0,		x3; \
-	pxor x1,		x4; \
-	pxor x4,		x2; \
-	pxor x1,		x3; \
-	por x0,			x4; \
-	pxor x1,		x4;
-
-#define SI0(x0, x1, x2, x3, x4) \
-	movdqa x3,		x4; \
-	pxor x0,		x1; \
-	por x1,			x3; \
-	pxor x1,		x4; \
-	pxor RNOT,		x0; \
-	pxor x3,		x2; \
-	pxor x0,		x3; \
-	pand x1,		x0; \
-	pxor x2,		x0; \
-	pand x3,		x2; \
-	pxor x4,		x3; \
-	pxor x3,		x2; \
-	pxor x3,		x1; \
-	pand x0,		x3; \
-	pxor x0,		x1; \
-	pxor x2,		x0; \
-	pxor x3,		x4;
-
-#define SI1(x0, x1, x2, x3, x4) \
-	pxor x3,		x1; \
-	movdqa x0,		x4; \
-	pxor x2,		x0; \
-	pxor RNOT,		x2; \
-	por x1,			x4; \
-	pxor x3,		x4; \
-	pand x1,		x3; \
-	pxor x2,		x1; \
-	pand x4,		x2; \
-	pxor x1,		x4; \
-	por x3,			x1; \
-	pxor x0,		x3; \
-	pxor x0,		x2; \
-	por x4,			x0; \
-	pxor x4,		x2; \
-	pxor x0,		x1; \
-	pxor x1,		x4;
-
-#define SI2(x0, x1, x2, x3, x4) \
-	pxor x1,		x2; \
-	movdqa x3,		x4; \
-	pxor RNOT,		x3; \
-	por x2,			x3; \
-	pxor x4,		x2; \
-	pxor x0,		x4; \
-	pxor x1,		x3; \
-	por x2,			x1; \
-	pxor x0,		x2; \
-	pxor x4,		x1; \
-	por x3,			x4; \
-	pxor x3,		x2; \
-	pxor x2,		x4; \
-	pand x1,		x2; \
-	pxor x3,		x2; \
-	pxor x4,		x3; \
-	pxor x0,		x4;
-
-#define SI3(x0, x1, x2, x3, x4) \
-	pxor x1,		x2; \
-	movdqa x1,		x4; \
-	pand x2,		x1; \
-	pxor x0,		x1; \
-	por x4,			x0; \
-	pxor x3,		x4; \
-	pxor x3,		x0; \
-	por x1,			x3; \
-	pxor x2,		x1; \
-	pxor x3,		x1; \
-	pxor x2,		x0; \
-	pxor x3,		x2; \
-	pand x1,		x3; \
-	pxor x0,		x1; \
-	pand x2,		x0; \
-	pxor x3,		x4; \
-	pxor x0,		x3; \
-	pxor x1,		x0;
-
-#define SI4(x0, x1, x2, x3, x4) \
-	pxor x3,		x2; \
-	movdqa x0,		x4; \
-	pand x1,		x0; \
-	pxor x2,		x0; \
-	por x3,			x2; \
-	pxor RNOT,		x4; \
-	pxor x0,		x1; \
-	pxor x2,		x0; \
-	pand x4,		x2; \
-	pxor x0,		x2; \
-	por x4,			x0; \
-	pxor x3,		x0; \
-	pand x2,		x3; \
-	pxor x3,		x4; \
-	pxor x1,		x3; \
-	pand x0,		x1; \
-	pxor x1,		x4; \
-	pxor x3,		x0;
-
-#define SI5(x0, x1, x2, x3, x4) \
-	movdqa x1,		x4; \
-	por x2,			x1; \
-	pxor x4,		x2; \
-	pxor x3,		x1; \
-	pand x4,		x3; \
-	pxor x3,		x2; \
-	por x0,			x3; \
-	pxor RNOT,		x0; \
-	pxor x2,		x3; \
-	por x0,			x2; \
-	pxor x1,		x4; \
-	pxor x4,		x2; \
-	pand x0,		x4; \
-	pxor x1,		x0; \
-	pxor x3,		x1; \
-	pand x2,		x0; \
-	pxor x3,		x2; \
-	pxor x2,		x0; \
-	pxor x4,		x2; \
-	pxor x3,		x4;
-
-#define SI6(x0, x1, x2, x3, x4) \
-	pxor x2,		x0; \
-	movdqa x0,		x4; \
-	pand x3,		x0; \
-	pxor x3,		x2; \
-	pxor x2,		x0; \
-	pxor x1,		x3; \
-	por x4,			x2; \
-	pxor x3,		x2; \
-	pand x0,		x3; \
-	pxor RNOT,		x0; \
-	pxor x1,		x3; \
-	pand x2,		x1; \
-	pxor x0,		x4; \
-	pxor x4,		x3; \
-	pxor x2,		x4; \
-	pxor x1,		x0; \
-	pxor x0,		x2;
-
-#define SI7(x0, x1, x2, x3, x4) \
-	movdqa x3,		x4; \
-	pand x0,		x3; \
-	pxor x2,		x0; \
-	por x4,			x2; \
-	pxor x1,		x4; \
-	pxor RNOT,		x0; \
-	por x3,			x1; \
-	pxor x0,		x4; \
-	pand x2,		x0; \
-	pxor x1,		x0; \
-	pand x2,		x1; \
-	pxor x2,		x3; \
-	pxor x3,		x4; \
-	pand x3,		x2; \
-	por x0,			x3; \
-	pxor x4,		x1; \
-	pxor x4,		x3; \
-	pand x0,		x4; \
-	pxor x2,		x4;
-
-#define transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \
-	movdqa x0,		t2; \
-	punpckldq x1,		x0; \
-	punpckhdq x1,		t2; \
-	movdqa x2,		t1; \
-	punpckhdq x3,		x2; \
-	punpckldq x3,		t1; \
-	movdqa x0,		x1; \
-	punpcklqdq t1,		x0; \
-	punpckhqdq t1,		x1; \
-	movdqa t2,		x3; \
-	punpcklqdq x2,		t2; \
-	punpckhqdq x2,		x3; \
-	movdqa t2,		x2;
-
-#define read_blocks(in, x0, x1, x2, x3, t0, t1, t2) \
-	movdqu (0*4*4)(in),	x0; \
-	movdqu (1*4*4)(in),	x1; \
-	movdqu (2*4*4)(in),	x2; \
-	movdqu (3*4*4)(in),	x3; \
-	\
-	transpose_4x4(x0, x1, x2, x3, t0, t1, t2)
-
-#define write_blocks(out, x0, x1, x2, x3, t0, t1, t2) \
-	transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \
-	\
-	movdqu x0, (0*4*4)(out); \
-	movdqu x1, (1*4*4)(out); \
-	movdqu x2, (2*4*4)(out); \
-	movdqu x3, (3*4*4)(out);
-
-#define xor_blocks(out, x0, x1, x2, x3, t0, t1, t2) \
-	transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \
-	\
-	movdqu (0*4*4)(out),	t0; \
-	pxor t0,		x0; \
-	movdqu x0,		(0*4*4)(out); \
-	movdqu (1*4*4)(out),	t0; \
-	pxor t0,		x1; \
-	movdqu x1,		(1*4*4)(out); \
-	movdqu (2*4*4)(out),	t0; \
-	pxor t0,		x2; \
-	movdqu x2,		(2*4*4)(out); \
-	movdqu (3*4*4)(out),	t0; \
-	pxor t0,		x3; \
-	movdqu x3,		(3*4*4)(out);
-
-.align 8
-.global __serpent_enc_blk_4way
-.type   __serpent_enc_blk_4way,@function;
-
-__serpent_enc_blk_4way:
-	/* input:
-	 *	arg_ctx(%esp): ctx, CTX
-	 *	arg_dst(%esp): dst
-	 *	arg_src(%esp): src
-	 *	arg_xor(%esp): bool, if true: xor output
-	 */
-
-	pcmpeqd RNOT, RNOT;
-
-	movl arg_ctx(%esp), CTX;
-
-	movl arg_src(%esp), %eax;
-	read_blocks(%eax, RA, RB, RC, RD, RT0, RT1, RE);
-
-					 K(RA, RB, RC, RD, RE, 0);
-	S0(RA, RB, RC, RD, RE);		LK(RC, RB, RD, RA, RE, 1);
-	S1(RC, RB, RD, RA, RE);		LK(RE, RD, RA, RC, RB, 2);
-	S2(RE, RD, RA, RC, RB);		LK(RB, RD, RE, RC, RA, 3);
-	S3(RB, RD, RE, RC, RA);		LK(RC, RA, RD, RB, RE, 4);
-	S4(RC, RA, RD, RB, RE);		LK(RA, RD, RB, RE, RC, 5);
-	S5(RA, RD, RB, RE, RC);		LK(RC, RA, RD, RE, RB, 6);
-	S6(RC, RA, RD, RE, RB);		LK(RD, RB, RA, RE, RC, 7);
-	S7(RD, RB, RA, RE, RC);		LK(RC, RA, RE, RD, RB, 8);
-	S0(RC, RA, RE, RD, RB);		LK(RE, RA, RD, RC, RB, 9);
-	S1(RE, RA, RD, RC, RB);		LK(RB, RD, RC, RE, RA, 10);
-	S2(RB, RD, RC, RE, RA);		LK(RA, RD, RB, RE, RC, 11);
-	S3(RA, RD, RB, RE, RC);		LK(RE, RC, RD, RA, RB, 12);
-	S4(RE, RC, RD, RA, RB);		LK(RC, RD, RA, RB, RE, 13);
-	S5(RC, RD, RA, RB, RE);		LK(RE, RC, RD, RB, RA, 14);
-	S6(RE, RC, RD, RB, RA);		LK(RD, RA, RC, RB, RE, 15);
-	S7(RD, RA, RC, RB, RE);		LK(RE, RC, RB, RD, RA, 16);
-	S0(RE, RC, RB, RD, RA);		LK(RB, RC, RD, RE, RA, 17);
-	S1(RB, RC, RD, RE, RA);		LK(RA, RD, RE, RB, RC, 18);
-	S2(RA, RD, RE, RB, RC);		LK(RC, RD, RA, RB, RE, 19);
-	S3(RC, RD, RA, RB, RE);		LK(RB, RE, RD, RC, RA, 20);
-	S4(RB, RE, RD, RC, RA);		LK(RE, RD, RC, RA, RB, 21);
-	S5(RE, RD, RC, RA, RB);		LK(RB, RE, RD, RA, RC, 22);
-	S6(RB, RE, RD, RA, RC);		LK(RD, RC, RE, RA, RB, 23);
-	S7(RD, RC, RE, RA, RB);		LK(RB, RE, RA, RD, RC, 24);
-	S0(RB, RE, RA, RD, RC);		LK(RA, RE, RD, RB, RC, 25);
-	S1(RA, RE, RD, RB, RC);		LK(RC, RD, RB, RA, RE, 26);
-	S2(RC, RD, RB, RA, RE);		LK(RE, RD, RC, RA, RB, 27);
-	S3(RE, RD, RC, RA, RB);		LK(RA, RB, RD, RE, RC, 28);
-	S4(RA, RB, RD, RE, RC);		LK(RB, RD, RE, RC, RA, 29);
-	S5(RB, RD, RE, RC, RA);		LK(RA, RB, RD, RC, RE, 30);
-	S6(RA, RB, RD, RC, RE);		LK(RD, RE, RB, RC, RA, 31);
-	S7(RD, RE, RB, RC, RA);		 K(RA, RB, RC, RD, RE, 32);
-
-	movl arg_dst(%esp), %eax;
-
-	cmpb $0, arg_xor(%esp);
-	jnz __enc_xor4;
-
-	write_blocks(%eax, RA, RB, RC, RD, RT0, RT1, RE);
-
-	ret;
-
-__enc_xor4:
-	xor_blocks(%eax, RA, RB, RC, RD, RT0, RT1, RE);
-
-	ret;
-
-.align 8
-.global serpent_dec_blk_4way
-.type   serpent_dec_blk_4way,@function;
-
-serpent_dec_blk_4way:
-	/* input:
-	 *	arg_ctx(%esp): ctx, CTX
-	 *	arg_dst(%esp): dst
-	 *	arg_src(%esp): src
-	 */
-
-	pcmpeqd RNOT, RNOT;
-
-	movl arg_ctx(%esp), CTX;
-
-	movl arg_src(%esp), %eax;
-	read_blocks(%eax, RA, RB, RC, RD, RT0, RT1, RE);
-
-					 K(RA, RB, RC, RD, RE, 32);
-	SI7(RA, RB, RC, RD, RE);	KL(RB, RD, RA, RE, RC, 31);
-	SI6(RB, RD, RA, RE, RC);	KL(RA, RC, RE, RB, RD, 30);
-	SI5(RA, RC, RE, RB, RD);	KL(RC, RD, RA, RE, RB, 29);
-	SI4(RC, RD, RA, RE, RB);	KL(RC, RA, RB, RE, RD, 28);
-	SI3(RC, RA, RB, RE, RD);	KL(RB, RC, RD, RE, RA, 27);
-	SI2(RB, RC, RD, RE, RA);	KL(RC, RA, RE, RD, RB, 26);
-	SI1(RC, RA, RE, RD, RB);	KL(RB, RA, RE, RD, RC, 25);
-	SI0(RB, RA, RE, RD, RC);	KL(RE, RC, RA, RB, RD, 24);
-	SI7(RE, RC, RA, RB, RD);	KL(RC, RB, RE, RD, RA, 23);
-	SI6(RC, RB, RE, RD, RA);	KL(RE, RA, RD, RC, RB, 22);
-	SI5(RE, RA, RD, RC, RB);	KL(RA, RB, RE, RD, RC, 21);
-	SI4(RA, RB, RE, RD, RC);	KL(RA, RE, RC, RD, RB, 20);
-	SI3(RA, RE, RC, RD, RB);	KL(RC, RA, RB, RD, RE, 19);
-	SI2(RC, RA, RB, RD, RE);	KL(RA, RE, RD, RB, RC, 18);
-	SI1(RA, RE, RD, RB, RC);	KL(RC, RE, RD, RB, RA, 17);
-	SI0(RC, RE, RD, RB, RA);	KL(RD, RA, RE, RC, RB, 16);
-	SI7(RD, RA, RE, RC, RB);	KL(RA, RC, RD, RB, RE, 15);
-	SI6(RA, RC, RD, RB, RE);	KL(RD, RE, RB, RA, RC, 14);
-	SI5(RD, RE, RB, RA, RC);	KL(RE, RC, RD, RB, RA, 13);
-	SI4(RE, RC, RD, RB, RA);	KL(RE, RD, RA, RB, RC, 12);
-	SI3(RE, RD, RA, RB, RC);	KL(RA, RE, RC, RB, RD, 11);
-	SI2(RA, RE, RC, RB, RD);	KL(RE, RD, RB, RC, RA, 10);
-	SI1(RE, RD, RB, RC, RA);	KL(RA, RD, RB, RC, RE, 9);
-	SI0(RA, RD, RB, RC, RE);	KL(RB, RE, RD, RA, RC, 8);
-	SI7(RB, RE, RD, RA, RC);	KL(RE, RA, RB, RC, RD, 7);
-	SI6(RE, RA, RB, RC, RD);	KL(RB, RD, RC, RE, RA, 6);
-	SI5(RB, RD, RC, RE, RA);	KL(RD, RA, RB, RC, RE, 5);
-	SI4(RD, RA, RB, RC, RE);	KL(RD, RB, RE, RC, RA, 4);
-	SI3(RD, RB, RE, RC, RA);	KL(RE, RD, RA, RC, RB, 3);
-	SI2(RE, RD, RA, RC, RB);	KL(RD, RB, RC, RA, RE, 2);
-	SI1(RD, RB, RC, RA, RE);	KL(RE, RB, RC, RA, RD, 1);
-	SI0(RE, RB, RC, RA, RD);	 K(RC, RD, RB, RE, RA, 0);
-
-	movl arg_dst(%esp), %eax;
-	write_blocks(%eax, RC, RD, RB, RE, RT0, RT1, RA);
-
-	ret;
diff --git a/ANDROID_3.4.5/arch/x86/crypto/serpent-sse2-x86_64-asm_64.S b/ANDROID_3.4.5/arch/x86/crypto/serpent-sse2-x86_64-asm_64.S
deleted file mode 100644
index 3ee1ff04..00000000
--- a/ANDROID_3.4.5/arch/x86/crypto/serpent-sse2-x86_64-asm_64.S
+++ /dev/null
@@ -1,758 +0,0 @@
-/*
- * Serpent Cipher 8-way parallel algorithm (x86_64/SSE2)
- *
- * Copyright (C) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
- *
- * Based on crypto/serpent.c by
- *  Copyright (C) 2002 Dag Arne Osvik <osvik@ii.uib.no>
- *                2003 Herbert Valerio Riedel <hvr@gnu.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307
- * USA
- *
- */
-
-.file "serpent-sse2-x86_64-asm_64.S"
-.text
-
-#define CTX %rdi
-
-/**********************************************************************
-  8-way SSE2 serpent
- **********************************************************************/
-#define RA1 %xmm0
-#define RB1 %xmm1
-#define RC1 %xmm2
-#define RD1 %xmm3
-#define RE1 %xmm4
-
-#define RA2 %xmm5
-#define RB2 %xmm6
-#define RC2 %xmm7
-#define RD2 %xmm8
-#define RE2 %xmm9
-
-#define RNOT %xmm10
-
-#define RK0 %xmm11
-#define RK1 %xmm12
-#define RK2 %xmm13
-#define RK3 %xmm14
-
-#define S0_1(x0, x1, x2, x3, x4) \
-	movdqa x3,		x4; \
-	por x0,			x3; \
-	pxor x4,		x0; \
-	pxor x2,		x4; \
-	pxor RNOT,		x4; \
-	pxor x1,		x3; \
-	pand x0,		x1; \
-	pxor x4,		x1; \
-	pxor x0,		x2;
-#define S0_2(x0, x1, x2, x3, x4) \
-	pxor x3,		x0; \
-	por x0,			x4; \
-	pxor x2,		x0; \
-	pand x1,		x2; \
-	pxor x2,		x3; \
-	pxor RNOT,		x1; \
-	pxor x4,		x2; \
-	pxor x2,		x1;
-
-#define S1_1(x0, x1, x2, x3, x4) \
-	movdqa x1,		x4; \
-	pxor x0,		x1; \
-	pxor x3,		x0; \
-	pxor RNOT,		x3; \
-	pand x1,		x4; \
-	por x1,			x0; \
-	pxor x2,		x3; \
-	pxor x3,		x0; \
-	pxor x3,		x1;
-#define S1_2(x0, x1, x2, x3, x4) \
-	pxor x4,		x3; \
-	por x4,			x1; \
-	pxor x2,		x4; \
-	pand x0,		x2; \
-	pxor x1,		x2; \
-	por x0,			x1; \
-	pxor RNOT,		x0; \
-	pxor x2,		x0; \
-	pxor x1,		x4;
-
-#define S2_1(x0, x1, x2, x3, x4) \
-	pxor RNOT,		x3; \
-	pxor x0,		x1; \
-	movdqa x0,		x4; \
-	pand x2,		x0; \
-	pxor x3,		x0; \
-	por x4,			x3; \
-	pxor x1,		x2; \
-	pxor x1,		x3; \
-	pand x0,		x1;
-#define S2_2(x0, x1, x2, x3, x4) \
-	pxor x2,		x0; \
-	pand x3,		x2; \
-	por x1,			x3; \
-	pxor RNOT,		x0; \
-	pxor x0,		x3; \
-	pxor x0,		x4; \
-	pxor x2,		x0; \
-	por x2,			x1;
-
-#define S3_1(x0, x1, x2, x3, x4) \
-	movdqa x1,		x4; \
-	pxor x3,		x1; \
-	por x0,			x3; \
-	pand x0,		x4; \
-	pxor x2,		x0; \
-	pxor x1,		x2; \
-	pand x3,		x1; \
-	pxor x3,		x2; \
-	por x4,			x0; \
-	pxor x3,		x4;
-#define S3_2(x0, x1, x2, x3, x4) \
-	pxor x0,		x1; \
-	pand x3,		x0; \
-	pand x4,		x3; \
-	pxor x2,		x3; \
-	por x1,			x4; \
-	pand x1,		x2; \
-	pxor x3,		x4; \
-	pxor x3,		x0; \
-	pxor x2,		x3;
-
-#define S4_1(x0, x1, x2, x3, x4) \
-	movdqa x3,		x4; \
-	pand x0,		x3; \
-	pxor x4,		x0; \
-	pxor x2,		x3; \
-	por x4,			x2; \
-	pxor x1,		x0; \
-	pxor x3,		x4; \
-	por x0,			x2; \
-	pxor x1,		x2;
-#define S4_2(x0, x1, x2, x3, x4) \
-	pand x0,		x1; \
-	pxor x4,		x1; \
-	pand x2,		x4; \
-	pxor x3,		x2; \
-	pxor x0,		x4; \
-	por x1,			x3; \
-	pxor RNOT,		x1; \
-	pxor x0,		x3;
-
-#define S5_1(x0, x1, x2, x3, x4) \
-	movdqa x1,		x4; \
-	por x0,			x1; \
-	pxor x1,		x2; \
-	pxor RNOT,		x3; \
-	pxor x0,		x4; \
-	pxor x2,		x0; \
-	pand x4,		x1; \
-	por x3,			x4; \
-	pxor x0,		x4;
-#define S5_2(x0, x1, x2, x3, x4) \
-	pand x3,		x0; \
-	pxor x3,		x1; \
-	pxor x2,		x3; \
-	pxor x1,		x0; \
-	pand x4,		x2; \
-	pxor x2,		x1; \
-	pand x0,		x2; \
-	pxor x2,		x3;
-
-#define S6_1(x0, x1, x2, x3, x4) \
-	movdqa x1,		x4; \
-	pxor x0,		x3; \
-	pxor x2,		x1; \
-	pxor x0,		x2; \
-	pand x3,		x0; \
-	por x3,			x1; \
-	pxor RNOT,		x4; \
-	pxor x1,		x0; \
-	pxor x2,		x1;
-#define S6_2(x0, x1, x2, x3, x4) \
-	pxor x4,		x3; \
-	pxor x0,		x4; \
-	pand x0,		x2; \
-	pxor x1,		x4; \
-	pxor x3,		x2; \
-	pand x1,		x3; \
-	pxor x0,		x3; \
-	pxor x2,		x1;
-
-#define S7_1(x0, x1, x2, x3, x4) \
-	pxor RNOT,		x1; \
-	movdqa x1,		x4; \
-	pxor RNOT,		x0; \
-	pand x2,		x1; \
-	pxor x3,		x1; \
-	por x4,			x3; \
-	pxor x2,		x4; \
-	pxor x3,		x2; \
-	pxor x0,		x3; \
-	por x1,			x0;
-#define S7_2(x0, x1, x2, x3, x4) \
-	pand x0,		x2; \
-	pxor x4,		x0; \
-	pxor x3,		x4; \
-	pand x0,		x3; \
-	pxor x1,		x4; \
-	pxor x4,		x2; \
-	pxor x1,		x3; \
-	por x0,			x4; \
-	pxor x1,		x4;
-
-#define SI0_1(x0, x1, x2, x3, x4) \
-	movdqa x3,		x4; \
-	pxor x0,		x1; \
-	por x1,			x3; \
-	pxor x1,		x4; \
-	pxor RNOT,		x0; \
-	pxor x3,		x2; \
-	pxor x0,		x3; \
-	pand x1,		x0; \
-	pxor x2,		x0;
-#define SI0_2(x0, x1, x2, x3, x4) \
-	pand x3,		x2; \
-	pxor x4,		x3; \
-	pxor x3,		x2; \
-	pxor x3,		x1; \
-	pand x0,		x3; \
-	pxor x0,		x1; \
-	pxor x2,		x0; \
-	pxor x3,		x4;
-
-#define SI1_1(x0, x1, x2, x3, x4) \
-	pxor x3,		x1; \
-	movdqa x0,		x4; \
-	pxor x2,		x0; \
-	pxor RNOT,		x2; \
-	por x1,			x4; \
-	pxor x3,		x4; \
-	pand x1,		x3; \
-	pxor x2,		x1; \
-	pand x4,		x2;
-#define SI1_2(x0, x1, x2, x3, x4) \
-	pxor x1,		x4; \
-	por x3,			x1; \
-	pxor x0,		x3; \
-	pxor x0,		x2; \
-	por x4,			x0; \
-	pxor x4,		x2; \
-	pxor x0,		x1; \
-	pxor x1,		x4;
-
-#define SI2_1(x0, x1, x2, x3, x4) \
-	pxor x1,		x2; \
-	movdqa x3,		x4; \
-	pxor RNOT,		x3; \
-	por x2,			x3; \
-	pxor x4,		x2; \
-	pxor x0,		x4; \
-	pxor x1,		x3; \
-	por x2,			x1; \
-	pxor x0,		x2;
-#define SI2_2(x0, x1, x2, x3, x4) \
-	pxor x4,		x1; \
-	por x3,			x4; \
-	pxor x3,		x2; \
-	pxor x2,		x4; \
-	pand x1,		x2; \
-	pxor x3,		x2; \
-	pxor x4,		x3; \
-	pxor x0,		x4;
-
-#define SI3_1(x0, x1, x2, x3, x4) \
-	pxor x1,		x2; \
-	movdqa x1,		x4; \
-	pand x2,		x1; \
-	pxor x0,		x1; \
-	por x4,			x0; \
-	pxor x3,		x4; \
-	pxor x3,		x0; \
-	por x1,			x3; \
-	pxor x2,		x1;
-#define SI3_2(x0, x1, x2, x3, x4) \
-	pxor x3,		x1; \
-	pxor x2,		x0; \
-	pxor x3,		x2; \
-	pand x1,		x3; \
-	pxor x0,		x1; \
-	pand x2,		x0; \
-	pxor x3,		x4; \
-	pxor x0,		x3; \
-	pxor x1,		x0;
-
-#define SI4_1(x0, x1, x2, x3, x4) \
-	pxor x3,		x2; \
-	movdqa x0,		x4; \
-	pand x1,		x0; \
-	pxor x2,		x0; \
-	por x3,			x2; \
-	pxor RNOT,		x4; \
-	pxor x0,		x1; \
-	pxor x2,		x0; \
-	pand x4,		x2;
-#define SI4_2(x0, x1, x2, x3, x4) \
-	pxor x0,		x2; \
-	por x4,			x0; \
-	pxor x3,		x0; \
-	pand x2,		x3; \
-	pxor x3,		x4; \
-	pxor x1,		x3; \
-	pand x0,		x1; \
-	pxor x1,		x4; \
-	pxor x3,		x0;
-
-#define SI5_1(x0, x1, x2, x3, x4) \
-	movdqa x1,		x4; \
-	por x2,			x1; \
-	pxor x4,		x2; \
-	pxor x3,		x1; \
-	pand x4,		x3; \
-	pxor x3,		x2; \
-	por x0,			x3; \
-	pxor RNOT,		x0; \
-	pxor x2,		x3; \
-	por x0,			x2;
-#define SI5_2(x0, x1, x2, x3, x4) \
-	pxor x1,		x4; \
-	pxor x4,		x2; \
-	pand x0,		x4; \
-	pxor x1,		x0; \
-	pxor x3,		x1; \
-	pand x2,		x0; \
-	pxor x3,		x2; \
-	pxor x2,		x0; \
-	pxor x4,		x2; \
-	pxor x3,		x4;
-
-#define SI6_1(x0, x1, x2, x3, x4) \
-	pxor x2,		x0; \
-	movdqa x0,		x4; \
-	pand x3,		x0; \
-	pxor x3,		x2; \
-	pxor x2,		x0; \
-	pxor x1,		x3; \
-	por x4,			x2; \
-	pxor x3,		x2; \
-	pand x0,		x3;
-#define SI6_2(x0, x1, x2, x3, x4) \
-	pxor RNOT,		x0; \
-	pxor x1,		x3; \
-	pand x2,		x1; \
-	pxor x0,		x4; \
-	pxor x4,		x3; \
-	pxor x2,		x4; \
-	pxor x1,		x0; \
-	pxor x0,		x2;
-
-#define SI7_1(x0, x1, x2, x3, x4) \
-	movdqa x3,		x4; \
-	pand x0,		x3; \
-	pxor x2,		x0; \
-	por x4,			x2; \
-	pxor x1,		x4; \
-	pxor RNOT,		x0; \
-	por x3,			x1; \
-	pxor x0,		x4; \
-	pand x2,		x0; \
-	pxor x1,		x0;
-#define SI7_2(x0, x1, x2, x3, x4) \
-	pand x2,		x1; \
-	pxor x2,		x3; \
-	pxor x3,		x4; \
-	pand x3,		x2; \
-	por x0,			x3; \
-	pxor x4,		x1; \
-	pxor x4,		x3; \
-	pand x0,		x4; \
-	pxor x2,		x4;
-
-#define get_key(i, j, t) \
-	movd (4*(i)+(j))*4(CTX), t; \
-	pshufd $0, t, t;
-
-#define K2(x0, x1, x2, x3, x4, i) \
-	get_key(i, 0, RK0); \
-	get_key(i, 1, RK1); \
-	get_key(i, 2, RK2); \
-	get_key(i, 3, RK3); \
-	pxor RK0,		x0 ## 1; \
-	pxor RK1,		x1 ## 1; \
-	pxor RK2,		x2 ## 1; \
-	pxor RK3,		x3 ## 1; \
-		pxor RK0,		x0 ## 2; \
-		pxor RK1,		x1 ## 2; \
-		pxor RK2,		x2 ## 2; \
-		pxor RK3,		x3 ## 2;
-
-#define LK2(x0, x1, x2, x3, x4, i) \
-	movdqa x0 ## 1,		x4 ## 1; \
-	pslld $13,		x0 ## 1; \
-	psrld $(32 - 13),	x4 ## 1; \
-	por x4 ## 1,		x0 ## 1; \
-	pxor x0 ## 1,		x1 ## 1; \
-	movdqa x2 ## 1,		x4 ## 1; \
-	pslld $3,		x2 ## 1; \
-	psrld $(32 - 3),	x4 ## 1; \
-	por x4 ## 1,		x2 ## 1; \
-	pxor x2 ## 1,		x1 ## 1; \
-		movdqa x0 ## 2,		x4 ## 2; \
-		pslld $13,		x0 ## 2; \
-		psrld $(32 - 13),	x4 ## 2; \
-		por x4 ## 2,		x0 ## 2; \
-		pxor x0 ## 2,		x1 ## 2; \
-		movdqa x2 ## 2,		x4 ## 2; \
-		pslld $3,		x2 ## 2; \
-		psrld $(32 - 3),	x4 ## 2; \
-		por x4 ## 2,		x2 ## 2; \
-		pxor x2 ## 2,		x1 ## 2; \
-	movdqa x1 ## 1,		x4 ## 1; \
-	pslld $1,		x1 ## 1; \
-	psrld $(32 - 1),	x4 ## 1; \
-	por x4 ## 1,		x1 ## 1; \
-	movdqa x0 ## 1,		x4 ## 1; \
-	pslld $3,		x4 ## 1; \
-	pxor x2 ## 1,		x3 ## 1; \
-	pxor x4 ## 1,		x3 ## 1; \
-	movdqa x3 ## 1,		x4 ## 1; \
-	get_key(i, 1, RK1); \
-		movdqa x1 ## 2,		x4 ## 2; \
-		pslld $1,		x1 ## 2; \
-		psrld $(32 - 1),	x4 ## 2; \
-		por x4 ## 2,		x1 ## 2; \
-		movdqa x0 ## 2,		x4 ## 2; \
-		pslld $3,		x4 ## 2; \
-		pxor x2 ## 2,		x3 ## 2; \
-		pxor x4 ## 2,		x3 ## 2; \
-		movdqa x3 ## 2,		x4 ## 2; \
-		get_key(i, 3, RK3); \
-	pslld $7,		x3 ## 1; \
-	psrld $(32 - 7),	x4 ## 1; \
-	por x4 ## 1,		x3 ## 1; \
-	movdqa x1 ## 1,		x4 ## 1; \
-	pslld $7,		x4 ## 1; \
-	pxor x1 ## 1,		x0 ## 1; \
-	pxor x3 ## 1,		x0 ## 1; \
-	pxor x3 ## 1,		x2 ## 1; \
-	pxor x4 ## 1,		x2 ## 1; \
-	get_key(i, 0, RK0); \
-		pslld $7,		x3 ## 2; \
-		psrld $(32 - 7),	x4 ## 2; \
-		por x4 ## 2,		x3 ## 2; \
-		movdqa x1 ## 2,		x4 ## 2; \
-		pslld $7,		x4 ## 2; \
-		pxor x1 ## 2,		x0 ## 2; \
-		pxor x3 ## 2,		x0 ## 2; \
-		pxor x3 ## 2,		x2 ## 2; \
-		pxor x4 ## 2,		x2 ## 2; \
-		get_key(i, 2, RK2); \
-	pxor RK1,		x1 ## 1; \
-	pxor RK3,		x3 ## 1; \
-	movdqa x0 ## 1,		x4 ## 1; \
-	pslld $5,		x0 ## 1; \
-	psrld $(32 - 5),	x4 ## 1; \
-	por x4 ## 1,		x0 ## 1; \
-	movdqa x2 ## 1,		x4 ## 1; \
-	pslld $22,		x2 ## 1; \
-	psrld $(32 - 22),	x4 ## 1; \
-	por x4 ## 1,		x2 ## 1; \
-	pxor RK0,		x0 ## 1; \
-	pxor RK2,		x2 ## 1; \
-		pxor RK1,		x1 ## 2; \
-		pxor RK3,		x3 ## 2; \
-		movdqa x0 ## 2,		x4 ## 2; \
-		pslld $5,		x0 ## 2; \
-		psrld $(32 - 5),	x4 ## 2; \
-		por x4 ## 2,		x0 ## 2; \
-		movdqa x2 ## 2,		x4 ## 2; \
-		pslld $22,		x2 ## 2; \
-		psrld $(32 - 22),	x4 ## 2; \
-		por x4 ## 2,		x2 ## 2; \
-		pxor RK0,		x0 ## 2; \
-		pxor RK2,		x2 ## 2;
-
-#define KL2(x0, x1, x2, x3, x4, i) \
-	pxor RK0,		x0 ## 1; \
-	pxor RK2,		x2 ## 1; \
-	movdqa x0 ## 1,		x4 ## 1; \
-	psrld $5,		x0 ## 1; \
-	pslld $(32 - 5),	x4 ## 1; \
-	por x4 ## 1,		x0 ## 1; \
-	pxor RK3,		x3 ## 1; \
-	pxor RK1,		x1 ## 1; \
-	movdqa x2 ## 1,		x4 ## 1; \
-	psrld $22,		x2 ## 1; \
-	pslld $(32 - 22),	x4 ## 1; \
-	por x4 ## 1,		x2 ## 1; \
-	pxor x3 ## 1,		x2 ## 1; \
-		pxor RK0,		x0 ## 2; \
-		pxor RK2,		x2 ## 2; \
-		movdqa x0 ## 2,		x4 ## 2; \
-		psrld $5,		x0 ## 2; \
-		pslld $(32 - 5),	x4 ## 2; \
-		por x4 ## 2,		x0 ## 2; \
-		pxor RK3,		x3 ## 2; \
-		pxor RK1,		x1 ## 2; \
-		movdqa x2 ## 2,		x4 ## 2; \
-		psrld $22,		x2 ## 2; \
-		pslld $(32 - 22),	x4 ## 2; \
-		por x4 ## 2,		x2 ## 2; \
-		pxor x3 ## 2,		x2 ## 2; \
-	pxor x3 ## 1,		x0 ## 1; \
-	movdqa x1 ## 1,		x4 ## 1; \
-	pslld $7,		x4 ## 1; \
-	pxor x1 ## 1,		x0 ## 1; \
-	pxor x4 ## 1,		x2 ## 1; \
-	movdqa x1 ## 1,		x4 ## 1; \
-	psrld $1,		x1 ## 1; \
-	pslld $(32 - 1),	x4 ## 1; \
-	por x4 ## 1,		x1 ## 1; \
-		pxor x3 ## 2,		x0 ## 2; \
-		movdqa x1 ## 2,		x4 ## 2; \
-		pslld $7,		x4 ## 2; \
-		pxor x1 ## 2,		x0 ## 2; \
-		pxor x4 ## 2,		x2 ## 2; \
-		movdqa x1 ## 2,		x4 ## 2; \
-		psrld $1,		x1 ## 2; \
-		pslld $(32 - 1),	x4 ## 2; \
-		por x4 ## 2,		x1 ## 2; \
-	movdqa x3 ## 1,		x4 ## 1; \
-	psrld $7,		x3 ## 1; \
-	pslld $(32 - 7),	x4 ## 1; \
-	por x4 ## 1,		x3 ## 1; \
-	pxor x0 ## 1,		x1 ## 1; \
-	movdqa x0 ## 1,		x4 ## 1; \
-	pslld $3,		x4 ## 1; \
-	pxor x4 ## 1,		x3 ## 1; \
-	movdqa x0 ## 1,		x4 ## 1; \
-		movdqa x3 ## 2,		x4 ## 2; \
-		psrld $7,		x3 ## 2; \
-		pslld $(32 - 7),	x4 ## 2; \
-		por x4 ## 2,		x3 ## 2; \
-		pxor x0 ## 2,		x1 ## 2; \
-		movdqa x0 ## 2,		x4 ## 2; \
-		pslld $3,		x4 ## 2; \
-		pxor x4 ## 2,		x3 ## 2; \
-		movdqa x0 ## 2,		x4 ## 2; \
-	psrld $13,		x0 ## 1; \
-	pslld $(32 - 13),	x4 ## 1; \
-	por x4 ## 1,		x0 ## 1; \
-	pxor x2 ## 1,		x1 ## 1; \
-	pxor x2 ## 1,		x3 ## 1; \
-	movdqa x2 ## 1,		x4 ## 1; \
-	psrld $3,		x2 ## 1; \
-	pslld $(32 - 3),	x4 ## 1; \
-	por x4 ## 1,		x2 ## 1; \
-		psrld $13,		x0 ## 2; \
-		pslld $(32 - 13),	x4 ## 2; \
-		por x4 ## 2,		x0 ## 2; \
-		pxor x2 ## 2,		x1 ## 2; \
-		pxor x2 ## 2,		x3 ## 2; \
-		movdqa x2 ## 2,		x4 ## 2; \
-		psrld $3,		x2 ## 2; \
-		pslld $(32 - 3),	x4 ## 2; \
-		por x4 ## 2,		x2 ## 2;
-
-#define S(SBOX, x0, x1, x2, x3, x4) \
-	SBOX ## _1(x0 ## 1, x1 ## 1, x2 ## 1, x3 ## 1, x4 ## 1); \
-	SBOX ## _2(x0 ## 1, x1 ## 1, x2 ## 1, x3 ## 1, x4 ## 1); \
-	SBOX ## _1(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, x4 ## 2); \
-	SBOX ## _2(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, x4 ## 2);
-
-#define SP(SBOX, x0, x1, x2, x3, x4, i) \
-	get_key(i, 0, RK0); \
-	SBOX ## _1(x0 ## 1, x1 ## 1, x2 ## 1, x3 ## 1, x4 ## 1); \
-	get_key(i, 2, RK2); \
-	SBOX ## _1(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, x4 ## 2); \
-	get_key(i, 3, RK3); \
-	SBOX ## _2(x0 ## 1, x1 ## 1, x2 ## 1, x3 ## 1, x4 ## 1); \
-	get_key(i, 1, RK1); \
-	SBOX ## _2(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, x4 ## 2); \
-
-#define transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \
-	movdqa x0,		t2; \
-	punpckldq x1,		x0; \
-	punpckhdq x1,		t2; \
-	movdqa x2,		t1; \
-	punpckhdq x3,		x2; \
-	punpckldq x3,		t1; \
-	movdqa x0,		x1; \
-	punpcklqdq t1,		x0; \
-	punpckhqdq t1,		x1; \
-	movdqa t2,		x3; \
-	punpcklqdq x2,		t2; \
-	punpckhqdq x2,		x3; \
-	movdqa t2,		x2;
-
-#define read_blocks(in, x0, x1, x2, x3, t0, t1, t2) \
-	movdqu (0*4*4)(in),	x0; \
-	movdqu (1*4*4)(in),	x1; \
-	movdqu (2*4*4)(in),	x2; \
-	movdqu (3*4*4)(in),	x3; \
-	\
-	transpose_4x4(x0, x1, x2, x3, t0, t1, t2)
-
-#define write_blocks(out, x0, x1, x2, x3, t0, t1, t2) \
-	transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \
-	\
-	movdqu x0,		(0*4*4)(out); \
-	movdqu x1,		(1*4*4)(out); \
-	movdqu x2,		(2*4*4)(out); \
-	movdqu x3,		(3*4*4)(out);
-
-#define xor_blocks(out, x0, x1, x2, x3, t0, t1, t2) \
-	transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \
-	\
-	movdqu (0*4*4)(out),	t0; \
-	pxor t0,		x0; \
-	movdqu x0,		(0*4*4)(out); \
-	movdqu (1*4*4)(out),	t0; \
-	pxor t0,		x1; \
-	movdqu x1,		(1*4*4)(out); \
-	movdqu (2*4*4)(out),	t0; \
-	pxor t0,		x2; \
-	movdqu x2,		(2*4*4)(out); \
-	movdqu (3*4*4)(out),	t0; \
-	pxor t0,		x3; \
-	movdqu x3,		(3*4*4)(out);
-
-.align 8
-.global __serpent_enc_blk_8way
-.type   __serpent_enc_blk_8way,@function;
-
-__serpent_enc_blk_8way:
-	/* input:
-	 *	%rdi: ctx, CTX
-	 *	%rsi: dst
-	 *	%rdx: src
-	 *	%rcx: bool, if true: xor output
-	 */
-
-	pcmpeqd RNOT, RNOT;
-
-	leaq (4*4*4)(%rdx), %rax;
-	read_blocks(%rdx, RA1, RB1, RC1, RD1, RK0, RK1, RK2);
-	read_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2);
-
-						 K2(RA, RB, RC, RD, RE, 0);
-	S(S0, RA, RB, RC, RD, RE);		LK2(RC, RB, RD, RA, RE, 1);
-	S(S1, RC, RB, RD, RA, RE);		LK2(RE, RD, RA, RC, RB, 2);
-	S(S2, RE, RD, RA, RC, RB);		LK2(RB, RD, RE, RC, RA, 3);
-	S(S3, RB, RD, RE, RC, RA);		LK2(RC, RA, RD, RB, RE, 4);
-	S(S4, RC, RA, RD, RB, RE);		LK2(RA, RD, RB, RE, RC, 5);
-	S(S5, RA, RD, RB, RE, RC);		LK2(RC, RA, RD, RE, RB, 6);
-	S(S6, RC, RA, RD, RE, RB);		LK2(RD, RB, RA, RE, RC, 7);
-	S(S7, RD, RB, RA, RE, RC);		LK2(RC, RA, RE, RD, RB, 8);
-	S(S0, RC, RA, RE, RD, RB);		LK2(RE, RA, RD, RC, RB, 9);
-	S(S1, RE, RA, RD, RC, RB);		LK2(RB, RD, RC, RE, RA, 10);
-	S(S2, RB, RD, RC, RE, RA);		LK2(RA, RD, RB, RE, RC, 11);
-	S(S3, RA, RD, RB, RE, RC);		LK2(RE, RC, RD, RA, RB, 12);
-	S(S4, RE, RC, RD, RA, RB);		LK2(RC, RD, RA, RB, RE, 13);
-	S(S5, RC, RD, RA, RB, RE);		LK2(RE, RC, RD, RB, RA, 14);
-	S(S6, RE, RC, RD, RB, RA);		LK2(RD, RA, RC, RB, RE, 15);
-	S(S7, RD, RA, RC, RB, RE);		LK2(RE, RC, RB, RD, RA, 16);
-	S(S0, RE, RC, RB, RD, RA);		LK2(RB, RC, RD, RE, RA, 17);
-	S(S1, RB, RC, RD, RE, RA);		LK2(RA, RD, RE, RB, RC, 18);
-	S(S2, RA, RD, RE, RB, RC);		LK2(RC, RD, RA, RB, RE, 19);
-	S(S3, RC, RD, RA, RB, RE);		LK2(RB, RE, RD, RC, RA, 20);
-	S(S4, RB, RE, RD, RC, RA);		LK2(RE, RD, RC, RA, RB, 21);
-	S(S5, RE, RD, RC, RA, RB);		LK2(RB, RE, RD, RA, RC, 22);
-	S(S6, RB, RE, RD, RA, RC);		LK2(RD, RC, RE, RA, RB, 23);
-	S(S7, RD, RC, RE, RA, RB);		LK2(RB, RE, RA, RD, RC, 24);
-	S(S0, RB, RE, RA, RD, RC);		LK2(RA, RE, RD, RB, RC, 25);
-	S(S1, RA, RE, RD, RB, RC);		LK2(RC, RD, RB, RA, RE, 26);
-	S(S2, RC, RD, RB, RA, RE);		LK2(RE, RD, RC, RA, RB, 27);
-	S(S3, RE, RD, RC, RA, RB);		LK2(RA, RB, RD, RE, RC, 28);
-	S(S4, RA, RB, RD, RE, RC);		LK2(RB, RD, RE, RC, RA, 29);
-	S(S5, RB, RD, RE, RC, RA);		LK2(RA, RB, RD, RC, RE, 30);
-	S(S6, RA, RB, RD, RC, RE);		LK2(RD, RE, RB, RC, RA, 31);
-	S(S7, RD, RE, RB, RC, RA);		 K2(RA, RB, RC, RD, RE, 32);
-
-	leaq (4*4*4)(%rsi), %rax;
-
-	testb %cl, %cl;
-	jnz __enc_xor8;
-
-	write_blocks(%rsi, RA1, RB1, RC1, RD1, RK0, RK1, RK2);
-	write_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2);
-
-	ret;
-
-__enc_xor8:
-	xor_blocks(%rsi, RA1, RB1, RC1, RD1, RK0, RK1, RK2);
-	xor_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2);
-
-	ret;
-
-.align 8
-.global serpent_dec_blk_8way
-.type   serpent_dec_blk_8way,@function;
-
-serpent_dec_blk_8way:
-	/* input:
-	 *	%rdi: ctx, CTX
-	 *	%rsi: dst
-	 *	%rdx: src
-	 */
-
-	pcmpeqd RNOT, RNOT;
-
-	leaq (4*4*4)(%rdx), %rax;
-	read_blocks(%rdx, RA1, RB1, RC1, RD1, RK0, RK1, RK2);
-	read_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2);
-
-						 K2(RA, RB, RC, RD, RE, 32);
-	SP(SI7, RA, RB, RC, RD, RE, 31);	KL2(RB, RD, RA, RE, RC, 31);
-	SP(SI6, RB, RD, RA, RE, RC, 30);	KL2(RA, RC, RE, RB, RD, 30);
-	SP(SI5, RA, RC, RE, RB, RD, 29);	KL2(RC, RD, RA, RE, RB, 29);
-	SP(SI4, RC, RD, RA, RE, RB, 28);	KL2(RC, RA, RB, RE, RD, 28);
-	SP(SI3, RC, RA, RB, RE, RD, 27);	KL2(RB, RC, RD, RE, RA, 27);
-	SP(SI2, RB, RC, RD, RE, RA, 26);	KL2(RC, RA, RE, RD, RB, 26);
-	SP(SI1, RC, RA, RE, RD, RB, 25);	KL2(RB, RA, RE, RD, RC, 25);
-	SP(SI0, RB, RA, RE, RD, RC, 24);	KL2(RE, RC, RA, RB, RD, 24);
-	SP(SI7, RE, RC, RA, RB, RD, 23);	KL2(RC, RB, RE, RD, RA, 23);
-	SP(SI6, RC, RB, RE, RD, RA, 22);	KL2(RE, RA, RD, RC, RB, 22);
-	SP(SI5, RE, RA, RD, RC, RB, 21);	KL2(RA, RB, RE, RD, RC, 21);
-	SP(SI4, RA, RB, RE, RD, RC, 20);	KL2(RA, RE, RC, RD, RB, 20);
-	SP(SI3, RA, RE, RC, RD, RB, 19);	KL2(RC, RA, RB, RD, RE, 19);
-	SP(SI2, RC, RA, RB, RD, RE, 18);	KL2(RA, RE, RD, RB, RC, 18);
-	SP(SI1, RA, RE, RD, RB, RC, 17);	KL2(RC, RE, RD, RB, RA, 17);
-	SP(SI0, RC, RE, RD, RB, RA, 16);	KL2(RD, RA, RE, RC, RB, 16);
-	SP(SI7, RD, RA, RE, RC, RB, 15);	KL2(RA, RC, RD, RB, RE, 15);
-	SP(SI6, RA, RC, RD, RB, RE, 14);	KL2(RD, RE, RB, RA, RC, 14);
-	SP(SI5, RD, RE, RB, RA, RC, 13);	KL2(RE, RC, RD, RB, RA, 13);
-	SP(SI4, RE, RC, RD, RB, RA, 12);	KL2(RE, RD, RA, RB, RC, 12);
-	SP(SI3, RE, RD, RA, RB, RC, 11);	KL2(RA, RE, RC, RB, RD, 11);
-	SP(SI2, RA, RE, RC, RB, RD, 10);	KL2(RE, RD, RB, RC, RA, 10);
-	SP(SI1, RE, RD, RB, RC, RA, 9);		KL2(RA, RD, RB, RC, RE, 9);
-	SP(SI0, RA, RD, RB, RC, RE, 8);		KL2(RB, RE, RD, RA, RC, 8);
-	SP(SI7, RB, RE, RD, RA, RC, 7);		KL2(RE, RA, RB, RC, RD, 7);
-	SP(SI6, RE, RA, RB, RC, RD, 6);		KL2(RB, RD, RC, RE, RA, 6);
-	SP(SI5, RB, RD, RC, RE, RA, 5);		KL2(RD, RA, RB, RC, RE, 5);
-	SP(SI4, RD, RA, RB, RC, RE, 4);		KL2(RD, RB, RE, RC, RA, 4);
-	SP(SI3, RD, RB, RE, RC, RA, 3);		KL2(RE, RD, RA, RC, RB, 3);
-	SP(SI2, RE, RD, RA, RC, RB, 2);		KL2(RD, RB, RC, RA, RE, 2);
-	SP(SI1, RD, RB, RC, RA, RE, 1);		KL2(RE, RB, RC, RA, RD, 1);
-	S(SI0, RE, RB, RC, RA, RD);		 K2(RC, RD, RB, RE, RA, 0);
-
-	leaq (4*4*4)(%rsi), %rax;
-	write_blocks(%rsi, RC1, RD1, RB1, RE1, RK0, RK1, RK2);
-	write_blocks(%rax, RC2, RD2, RB2, RE2, RK0, RK1, RK2);
-
-	ret;
diff --git a/ANDROID_3.4.5/arch/x86/crypto/serpent_sse2_glue.c b/ANDROID_3.4.5/arch/x86/crypto/serpent_sse2_glue.c
deleted file mode 100644
index 4b21be85..00000000
--- a/ANDROID_3.4.5/arch/x86/crypto/serpent_sse2_glue.c
+++ /dev/null
@@ -1,944 +0,0 @@
-/*
- * Glue Code for SSE2 assembler versions of Serpent Cipher
- *
- * Copyright (c) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
- *
- * Glue code based on aesni-intel_glue.c by:
- *  Copyright (C) 2008, Intel Corp.
- *    Author: Huang Ying <ying.huang@intel.com>
- *
- * CBC & ECB parts based on code (crypto/cbc.c,ecb.c) by:
- *   Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au>
- * CTR part based on code (crypto/ctr.c) by:
- *   (C) Copyright IBM Corp. 2007 - Joy Latten <latten@us.ibm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307
- * USA
- *
- */
-
-#include <linux/module.h>
-#include <linux/hardirq.h>
-#include <linux/types.h>
-#include <linux/crypto.h>
-#include <linux/err.h>
-#include <crypto/algapi.h>
-#include <crypto/serpent.h>
-#include <crypto/cryptd.h>
-#include <crypto/b128ops.h>
-#include <crypto/ctr.h>
-#include <crypto/lrw.h>
-#include <crypto/xts.h>
-#include <asm/i387.h>
-#include <asm/serpent.h>
-#include <crypto/scatterwalk.h>
-#include <linux/workqueue.h>
-#include <linux/spinlock.h>
-
-struct async_serpent_ctx {
-	struct cryptd_ablkcipher *cryptd_tfm;
-};
-
-static inline bool serpent_fpu_begin(bool fpu_enabled, unsigned int nbytes)
-{
-	if (fpu_enabled)
-		return true;
-
-	/* SSE2 is only used when chunk to be processed is large enough, so
-	 * do not enable FPU until it is necessary.
-	 */
-	if (nbytes < SERPENT_BLOCK_SIZE * SERPENT_PARALLEL_BLOCKS)
-		return false;
-
-	kernel_fpu_begin();
-	return true;
-}
-
-static inline void serpent_fpu_end(bool fpu_enabled)
-{
-	if (fpu_enabled)
-		kernel_fpu_end();
-}
-
-static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk,
-		     bool enc)
-{
-	bool fpu_enabled = false;
-	struct serpent_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	const unsigned int bsize = SERPENT_BLOCK_SIZE;
-	unsigned int nbytes;
-	int err;
-
-	err = blkcipher_walk_virt(desc, walk);
-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
-
-	while ((nbytes = walk->nbytes)) {
-		u8 *wsrc = walk->src.virt.addr;
-		u8 *wdst = walk->dst.virt.addr;
-
-		fpu_enabled = serpent_fpu_begin(fpu_enabled, nbytes);
-
-		/* Process multi-block batch */
-		if (nbytes >= bsize * SERPENT_PARALLEL_BLOCKS) {
-			do {
-				if (enc)
-					serpent_enc_blk_xway(ctx, wdst, wsrc);
-				else
-					serpent_dec_blk_xway(ctx, wdst, wsrc);
-
-				wsrc += bsize * SERPENT_PARALLEL_BLOCKS;
-				wdst += bsize * SERPENT_PARALLEL_BLOCKS;
-				nbytes -= bsize * SERPENT_PARALLEL_BLOCKS;
-			} while (nbytes >= bsize * SERPENT_PARALLEL_BLOCKS);
-
-			if (nbytes < bsize)
-				goto done;
-		}
-
-		/* Handle leftovers */
-		do {
-			if (enc)
-				__serpent_encrypt(ctx, wdst, wsrc);
-			else
-				__serpent_decrypt(ctx, wdst, wsrc);
-
-			wsrc += bsize;
-			wdst += bsize;
-			nbytes -= bsize;
-		} while (nbytes >= bsize);
-
-done:
-		err = blkcipher_walk_done(desc, walk, nbytes);
-	}
-
-	serpent_fpu_end(fpu_enabled);
-	return err;
-}
-
-static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	struct blkcipher_walk walk;
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return ecb_crypt(desc, &walk, true);
-}
-
-static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	struct blkcipher_walk walk;
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return ecb_crypt(desc, &walk, false);
-}
-
-static unsigned int __cbc_encrypt(struct blkcipher_desc *desc,
-				  struct blkcipher_walk *walk)
-{
-	struct serpent_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	const unsigned int bsize = SERPENT_BLOCK_SIZE;
-	unsigned int nbytes = walk->nbytes;
-	u128 *src = (u128 *)walk->src.virt.addr;
-	u128 *dst = (u128 *)walk->dst.virt.addr;
-	u128 *iv = (u128 *)walk->iv;
-
-	do {
-		u128_xor(dst, src, iv);
-		__serpent_encrypt(ctx, (u8 *)dst, (u8 *)dst);
-		iv = dst;
-
-		src += 1;
-		dst += 1;
-		nbytes -= bsize;
-	} while (nbytes >= bsize);
-
-	u128_xor((u128 *)walk->iv, (u128 *)walk->iv, iv);
-	return nbytes;
-}
-
-static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	struct blkcipher_walk walk;
-	int err;
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt(desc, &walk);
-
-	while ((nbytes = walk.nbytes)) {
-		nbytes = __cbc_encrypt(desc, &walk);
-		err = blkcipher_walk_done(desc, &walk, nbytes);
-	}
-
-	return err;
-}
-
-static unsigned int __cbc_decrypt(struct blkcipher_desc *desc,
-				  struct blkcipher_walk *walk)
-{
-	struct serpent_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	const unsigned int bsize = SERPENT_BLOCK_SIZE;
-	unsigned int nbytes = walk->nbytes;
-	u128 *src = (u128 *)walk->src.virt.addr;
-	u128 *dst = (u128 *)walk->dst.virt.addr;
-	u128 ivs[SERPENT_PARALLEL_BLOCKS - 1];
-	u128 last_iv;
-	int i;
-
-	/* Start of the last block. */
-	src += nbytes / bsize - 1;
-	dst += nbytes / bsize - 1;
-
-	last_iv = *src;
-
-	/* Process multi-block batch */
-	if (nbytes >= bsize * SERPENT_PARALLEL_BLOCKS) {
-		do {
-			nbytes -= bsize * (SERPENT_PARALLEL_BLOCKS - 1);
-			src -= SERPENT_PARALLEL_BLOCKS - 1;
-			dst -= SERPENT_PARALLEL_BLOCKS - 1;
-
-			for (i = 0; i < SERPENT_PARALLEL_BLOCKS - 1; i++)
-				ivs[i] = src[i];
-
-			serpent_dec_blk_xway(ctx, (u8 *)dst, (u8 *)src);
-
-			for (i = 0; i < SERPENT_PARALLEL_BLOCKS - 1; i++)
-				u128_xor(dst + (i + 1), dst + (i + 1), ivs + i);
-
-			nbytes -= bsize;
-			if (nbytes < bsize)
-				goto done;
-
-			u128_xor(dst, dst, src - 1);
-			src -= 1;
-			dst -= 1;
-		} while (nbytes >= bsize * SERPENT_PARALLEL_BLOCKS);
-
-		if (nbytes < bsize)
-			goto done;
-	}
-
-	/* Handle leftovers */
-	for (;;) {
-		__serpent_decrypt(ctx, (u8 *)dst, (u8 *)src);
-
-		nbytes -= bsize;
-		if (nbytes < bsize)
-			break;
-
-		u128_xor(dst, dst, src - 1);
-		src -= 1;
-		dst -= 1;
-	}
-
-done:
-	u128_xor(dst, dst, (u128 *)walk->iv);
-	*(u128 *)walk->iv = last_iv;
-
-	return nbytes;
-}
-
-static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	bool fpu_enabled = false;
-	struct blkcipher_walk walk;
-	int err;
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt(desc, &walk);
-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
-
-	while ((nbytes = walk.nbytes)) {
-		fpu_enabled = serpent_fpu_begin(fpu_enabled, nbytes);
-		nbytes = __cbc_decrypt(desc, &walk);
-		err = blkcipher_walk_done(desc, &walk, nbytes);
-	}
-
-	serpent_fpu_end(fpu_enabled);
-	return err;
-}
-
-static inline void u128_to_be128(be128 *dst, const u128 *src)
-{
-	dst->a = cpu_to_be64(src->a);
-	dst->b = cpu_to_be64(src->b);
-}
-
-static inline void be128_to_u128(u128 *dst, const be128 *src)
-{
-	dst->a = be64_to_cpu(src->a);
-	dst->b = be64_to_cpu(src->b);
-}
-
-static inline void u128_inc(u128 *i)
-{
-	i->b++;
-	if (!i->b)
-		i->a++;
-}
-
-static void ctr_crypt_final(struct blkcipher_desc *desc,
-			    struct blkcipher_walk *walk)
-{
-	struct serpent_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	u8 *ctrblk = walk->iv;
-	u8 keystream[SERPENT_BLOCK_SIZE];
-	u8 *src = walk->src.virt.addr;
-	u8 *dst = walk->dst.virt.addr;
-	unsigned int nbytes = walk->nbytes;
-
-	__serpent_encrypt(ctx, keystream, ctrblk);
-	crypto_xor(keystream, src, nbytes);
-	memcpy(dst, keystream, nbytes);
-
-	crypto_inc(ctrblk, SERPENT_BLOCK_SIZE);
-}
-
-static unsigned int __ctr_crypt(struct blkcipher_desc *desc,
-				struct blkcipher_walk *walk)
-{
-	struct serpent_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	const unsigned int bsize = SERPENT_BLOCK_SIZE;
-	unsigned int nbytes = walk->nbytes;
-	u128 *src = (u128 *)walk->src.virt.addr;
-	u128 *dst = (u128 *)walk->dst.virt.addr;
-	u128 ctrblk;
-	be128 ctrblocks[SERPENT_PARALLEL_BLOCKS];
-	int i;
-
-	be128_to_u128(&ctrblk, (be128 *)walk->iv);
-
-	/* Process multi-block batch */
-	if (nbytes >= bsize * SERPENT_PARALLEL_BLOCKS) {
-		do {
-			/* create ctrblks for parallel encrypt */
-			for (i = 0; i < SERPENT_PARALLEL_BLOCKS; i++) {
-				if (dst != src)
-					dst[i] = src[i];
-
-				u128_to_be128(&ctrblocks[i], &ctrblk);
-				u128_inc(&ctrblk);
-			}
-
-			serpent_enc_blk_xway_xor(ctx, (u8 *)dst,
-						 (u8 *)ctrblocks);
-
-			src += SERPENT_PARALLEL_BLOCKS;
-			dst += SERPENT_PARALLEL_BLOCKS;
-			nbytes -= bsize * SERPENT_PARALLEL_BLOCKS;
-		} while (nbytes >= bsize * SERPENT_PARALLEL_BLOCKS);
-
-		if (nbytes < bsize)
-			goto done;
-	}
-
-	/* Handle leftovers */
-	do {
-		if (dst != src)
-			*dst = *src;
-
-		u128_to_be128(&ctrblocks[0], &ctrblk);
-		u128_inc(&ctrblk);
-
-		__serpent_encrypt(ctx, (u8 *)ctrblocks, (u8 *)ctrblocks);
-		u128_xor(dst, dst, (u128 *)ctrblocks);
-
-		src += 1;
-		dst += 1;
-		nbytes -= bsize;
-	} while (nbytes >= bsize);
-
-done:
-	u128_to_be128((be128 *)walk->iv, &ctrblk);
-	return nbytes;
-}
-
-static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		     struct scatterlist *src, unsigned int nbytes)
-{
-	bool fpu_enabled = false;
-	struct blkcipher_walk walk;
-	int err;
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt_block(desc, &walk, SERPENT_BLOCK_SIZE);
-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
-
-	while ((nbytes = walk.nbytes) >= SERPENT_BLOCK_SIZE) {
-		fpu_enabled = serpent_fpu_begin(fpu_enabled, nbytes);
-		nbytes = __ctr_crypt(desc, &walk);
-		err = blkcipher_walk_done(desc, &walk, nbytes);
-	}
-
-	serpent_fpu_end(fpu_enabled);
-
-	if (walk.nbytes) {
-		ctr_crypt_final(desc, &walk);
-		err = blkcipher_walk_done(desc, &walk, 0);
-	}
-
-	return err;
-}
-
-struct crypt_priv {
-	struct serpent_ctx *ctx;
-	bool fpu_enabled;
-};
-
-static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
-{
-	const unsigned int bsize = SERPENT_BLOCK_SIZE;
-	struct crypt_priv *ctx = priv;
-	int i;
-
-	ctx->fpu_enabled = serpent_fpu_begin(ctx->fpu_enabled, nbytes);
-
-	if (nbytes == bsize * SERPENT_PARALLEL_BLOCKS) {
-		serpent_enc_blk_xway(ctx->ctx, srcdst, srcdst);
-		return;
-	}
-
-	for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
-		__serpent_encrypt(ctx->ctx, srcdst, srcdst);
-}
-
-static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
-{
-	const unsigned int bsize = SERPENT_BLOCK_SIZE;
-	struct crypt_priv *ctx = priv;
-	int i;
-
-	ctx->fpu_enabled = serpent_fpu_begin(ctx->fpu_enabled, nbytes);
-
-	if (nbytes == bsize * SERPENT_PARALLEL_BLOCKS) {
-		serpent_dec_blk_xway(ctx->ctx, srcdst, srcdst);
-		return;
-	}
-
-	for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
-		__serpent_decrypt(ctx->ctx, srcdst, srcdst);
-}
-
-struct serpent_lrw_ctx {
-	struct lrw_table_ctx lrw_table;
-	struct serpent_ctx serpent_ctx;
-};
-
-static int lrw_serpent_setkey(struct crypto_tfm *tfm, const u8 *key,
-			      unsigned int keylen)
-{
-	struct serpent_lrw_ctx *ctx = crypto_tfm_ctx(tfm);
-	int err;
-
-	err = __serpent_setkey(&ctx->serpent_ctx, key, keylen -
-							SERPENT_BLOCK_SIZE);
-	if (err)
-		return err;
-
-	return lrw_init_table(&ctx->lrw_table, key + keylen -
-						SERPENT_BLOCK_SIZE);
-}
-
-static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	struct serpent_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	be128 buf[SERPENT_PARALLEL_BLOCKS];
-	struct crypt_priv crypt_ctx = {
-		.ctx = &ctx->serpent_ctx,
-		.fpu_enabled = false,
-	};
-	struct lrw_crypt_req req = {
-		.tbuf = buf,
-		.tbuflen = sizeof(buf),
-
-		.table_ctx = &ctx->lrw_table,
-		.crypt_ctx = &crypt_ctx,
-		.crypt_fn = encrypt_callback,
-	};
-	int ret;
-
-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
-	ret = lrw_crypt(desc, dst, src, nbytes, &req);
-	serpent_fpu_end(crypt_ctx.fpu_enabled);
-
-	return ret;
-}
-
-static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	struct serpent_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	be128 buf[SERPENT_PARALLEL_BLOCKS];
-	struct crypt_priv crypt_ctx = {
-		.ctx = &ctx->serpent_ctx,
-		.fpu_enabled = false,
-	};
-	struct lrw_crypt_req req = {
-		.tbuf = buf,
-		.tbuflen = sizeof(buf),
-
-		.table_ctx = &ctx->lrw_table,
-		.crypt_ctx = &crypt_ctx,
-		.crypt_fn = decrypt_callback,
-	};
-	int ret;
-
-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
-	ret = lrw_crypt(desc, dst, src, nbytes, &req);
-	serpent_fpu_end(crypt_ctx.fpu_enabled);
-
-	return ret;
-}
-
-static void lrw_exit_tfm(struct crypto_tfm *tfm)
-{
-	struct serpent_lrw_ctx *ctx = crypto_tfm_ctx(tfm);
-
-	lrw_free_table(&ctx->lrw_table);
-}
-
-struct serpent_xts_ctx {
-	struct serpent_ctx tweak_ctx;
-	struct serpent_ctx crypt_ctx;
-};
-
-static int xts_serpent_setkey(struct crypto_tfm *tfm, const u8 *key,
-			      unsigned int keylen)
-{
-	struct serpent_xts_ctx *ctx = crypto_tfm_ctx(tfm);
-	u32 *flags = &tfm->crt_flags;
-	int err;
-
-	/* key consists of keys of equal size concatenated, therefore
-	 * the length must be even
-	 */
-	if (keylen % 2) {
-		*flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
-		return -EINVAL;
-	}
-
-	/* first half of xts-key is for crypt */
-	err = __serpent_setkey(&ctx->crypt_ctx, key, keylen / 2);
-	if (err)
-		return err;
-
-	/* second half of xts-key is for tweak */
-	return __serpent_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2);
-}
-
-static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	struct serpent_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	be128 buf[SERPENT_PARALLEL_BLOCKS];
-	struct crypt_priv crypt_ctx = {
-		.ctx = &ctx->crypt_ctx,
-		.fpu_enabled = false,
-	};
-	struct xts_crypt_req req = {
-		.tbuf = buf,
-		.tbuflen = sizeof(buf),
-
-		.tweak_ctx = &ctx->tweak_ctx,
-		.tweak_fn = XTS_TWEAK_CAST(__serpent_encrypt),
-		.crypt_ctx = &crypt_ctx,
-		.crypt_fn = encrypt_callback,
-	};
-	int ret;
-
-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
-	ret = xts_crypt(desc, dst, src, nbytes, &req);
-	serpent_fpu_end(crypt_ctx.fpu_enabled);
-
-	return ret;
-}
-
-static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	struct serpent_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	be128 buf[SERPENT_PARALLEL_BLOCKS];
-	struct crypt_priv crypt_ctx = {
-		.ctx = &ctx->crypt_ctx,
-		.fpu_enabled = false,
-	};
-	struct xts_crypt_req req = {
-		.tbuf = buf,
-		.tbuflen = sizeof(buf),
-
-		.tweak_ctx = &ctx->tweak_ctx,
-		.tweak_fn = XTS_TWEAK_CAST(__serpent_encrypt),
-		.crypt_ctx = &crypt_ctx,
-		.crypt_fn = decrypt_callback,
-	};
-	int ret;
-
-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
-	ret = xts_crypt(desc, dst, src, nbytes, &req);
-	serpent_fpu_end(crypt_ctx.fpu_enabled);
-
-	return ret;
-}
-
-static int ablk_set_key(struct crypto_ablkcipher *tfm, const u8 *key,
-			unsigned int key_len)
-{
-	struct async_serpent_ctx *ctx = crypto_ablkcipher_ctx(tfm);
-	struct crypto_ablkcipher *child = &ctx->cryptd_tfm->base;
-	int err;
-
-	crypto_ablkcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK);
-	crypto_ablkcipher_set_flags(child, crypto_ablkcipher_get_flags(tfm)
-				    & CRYPTO_TFM_REQ_MASK);
-	err = crypto_ablkcipher_setkey(child, key, key_len);
-	crypto_ablkcipher_set_flags(tfm, crypto_ablkcipher_get_flags(child)
-				    & CRYPTO_TFM_RES_MASK);
-	return err;
-}
-
-static int __ablk_encrypt(struct ablkcipher_request *req)
-{
-	struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
-	struct async_serpent_ctx *ctx = crypto_ablkcipher_ctx(tfm);
-	struct blkcipher_desc desc;
-
-	desc.tfm = cryptd_ablkcipher_child(ctx->cryptd_tfm);
-	desc.info = req->info;
-	desc.flags = 0;
-
-	return crypto_blkcipher_crt(desc.tfm)->encrypt(
-		&desc, req->dst, req->src, req->nbytes);
-}
-
-static int ablk_encrypt(struct ablkcipher_request *req)
-{
-	struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
-	struct async_serpent_ctx *ctx = crypto_ablkcipher_ctx(tfm);
-
-	if (!irq_fpu_usable()) {
-		struct ablkcipher_request *cryptd_req =
-			ablkcipher_request_ctx(req);
-
-		memcpy(cryptd_req, req, sizeof(*req));
-		ablkcipher_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base);
-
-		return crypto_ablkcipher_encrypt(cryptd_req);
-	} else {
-		return __ablk_encrypt(req);
-	}
-}
-
-static int ablk_decrypt(struct ablkcipher_request *req)
-{
-	struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
-	struct async_serpent_ctx *ctx = crypto_ablkcipher_ctx(tfm);
-
-	if (!irq_fpu_usable()) {
-		struct ablkcipher_request *cryptd_req =
-			ablkcipher_request_ctx(req);
-
-		memcpy(cryptd_req, req, sizeof(*req));
-		ablkcipher_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base);
-
-		return crypto_ablkcipher_decrypt(cryptd_req);
-	} else {
-		struct blkcipher_desc desc;
-
-		desc.tfm = cryptd_ablkcipher_child(ctx->cryptd_tfm);
-		desc.info = req->info;
-		desc.flags = 0;
-
-		return crypto_blkcipher_crt(desc.tfm)->decrypt(
-			&desc, req->dst, req->src, req->nbytes);
-	}
-}
-
-static void ablk_exit(struct crypto_tfm *tfm)
-{
-	struct async_serpent_ctx *ctx = crypto_tfm_ctx(tfm);
-
-	cryptd_free_ablkcipher(ctx->cryptd_tfm);
-}
-
-static int ablk_init(struct crypto_tfm *tfm)
-{
-	struct async_serpent_ctx *ctx = crypto_tfm_ctx(tfm);
-	struct cryptd_ablkcipher *cryptd_tfm;
-	char drv_name[CRYPTO_MAX_ALG_NAME];
-
-	snprintf(drv_name, sizeof(drv_name), "__driver-%s",
-					crypto_tfm_alg_driver_name(tfm));
-
-	cryptd_tfm = cryptd_alloc_ablkcipher(drv_name, 0, 0);
-	if (IS_ERR(cryptd_tfm))
-		return PTR_ERR(cryptd_tfm);
-
-	ctx->cryptd_tfm = cryptd_tfm;
-	tfm->crt_ablkcipher.reqsize = sizeof(struct ablkcipher_request) +
-		crypto_ablkcipher_reqsize(&cryptd_tfm->base);
-
-	return 0;
-}
-
-static struct crypto_alg serpent_algs[10] = { {
-	.cra_name		= "__ecb-serpent-sse2",
-	.cra_driver_name	= "__driver-ecb-serpent-sse2",
-	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize		= SERPENT_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct serpent_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(serpent_algs[0].cra_list),
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= SERPENT_MIN_KEY_SIZE,
-			.max_keysize	= SERPENT_MAX_KEY_SIZE,
-			.setkey		= serpent_setkey,
-			.encrypt	= ecb_encrypt,
-			.decrypt	= ecb_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "__cbc-serpent-sse2",
-	.cra_driver_name	= "__driver-cbc-serpent-sse2",
-	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize		= SERPENT_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct serpent_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(serpent_algs[1].cra_list),
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= SERPENT_MIN_KEY_SIZE,
-			.max_keysize	= SERPENT_MAX_KEY_SIZE,
-			.setkey		= serpent_setkey,
-			.encrypt	= cbc_encrypt,
-			.decrypt	= cbc_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "__ctr-serpent-sse2",
-	.cra_driver_name	= "__driver-ctr-serpent-sse2",
-	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize		= 1,
-	.cra_ctxsize		= sizeof(struct serpent_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(serpent_algs[2].cra_list),
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= SERPENT_MIN_KEY_SIZE,
-			.max_keysize	= SERPENT_MAX_KEY_SIZE,
-			.ivsize		= SERPENT_BLOCK_SIZE,
-			.setkey		= serpent_setkey,
-			.encrypt	= ctr_crypt,
-			.decrypt	= ctr_crypt,
-		},
-	},
-}, {
-	.cra_name		= "__lrw-serpent-sse2",
-	.cra_driver_name	= "__driver-lrw-serpent-sse2",
-	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize		= SERPENT_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct serpent_lrw_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(serpent_algs[3].cra_list),
-	.cra_exit		= lrw_exit_tfm,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= SERPENT_MIN_KEY_SIZE +
-					  SERPENT_BLOCK_SIZE,
-			.max_keysize	= SERPENT_MAX_KEY_SIZE +
-					  SERPENT_BLOCK_SIZE,
-			.ivsize		= SERPENT_BLOCK_SIZE,
-			.setkey		= lrw_serpent_setkey,
-			.encrypt	= lrw_encrypt,
-			.decrypt	= lrw_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "__xts-serpent-sse2",
-	.cra_driver_name	= "__driver-xts-serpent-sse2",
-	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize		= SERPENT_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct serpent_xts_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(serpent_algs[4].cra_list),
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= SERPENT_MIN_KEY_SIZE * 2,
-			.max_keysize	= SERPENT_MAX_KEY_SIZE * 2,
-			.ivsize		= SERPENT_BLOCK_SIZE,
-			.setkey		= xts_serpent_setkey,
-			.encrypt	= xts_encrypt,
-			.decrypt	= xts_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "ecb(serpent)",
-	.cra_driver_name	= "ecb-serpent-sse2",
-	.cra_priority		= 400,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= SERPENT_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct async_serpent_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(serpent_algs[5].cra_list),
-	.cra_init		= ablk_init,
-	.cra_exit		= ablk_exit,
-	.cra_u = {
-		.ablkcipher = {
-			.min_keysize	= SERPENT_MIN_KEY_SIZE,
-			.max_keysize	= SERPENT_MAX_KEY_SIZE,
-			.setkey		= ablk_set_key,
-			.encrypt	= ablk_encrypt,
-			.decrypt	= ablk_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "cbc(serpent)",
-	.cra_driver_name	= "cbc-serpent-sse2",
-	.cra_priority		= 400,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= SERPENT_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct async_serpent_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(serpent_algs[6].cra_list),
-	.cra_init		= ablk_init,
-	.cra_exit		= ablk_exit,
-	.cra_u = {
-		.ablkcipher = {
-			.min_keysize	= SERPENT_MIN_KEY_SIZE,
-			.max_keysize	= SERPENT_MAX_KEY_SIZE,
-			.ivsize		= SERPENT_BLOCK_SIZE,
-			.setkey		= ablk_set_key,
-			.encrypt	= __ablk_encrypt,
-			.decrypt	= ablk_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "ctr(serpent)",
-	.cra_driver_name	= "ctr-serpent-sse2",
-	.cra_priority		= 400,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= 1,
-	.cra_ctxsize		= sizeof(struct async_serpent_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(serpent_algs[7].cra_list),
-	.cra_init		= ablk_init,
-	.cra_exit		= ablk_exit,
-	.cra_u = {
-		.ablkcipher = {
-			.min_keysize	= SERPENT_MIN_KEY_SIZE,
-			.max_keysize	= SERPENT_MAX_KEY_SIZE,
-			.ivsize		= SERPENT_BLOCK_SIZE,
-			.setkey		= ablk_set_key,
-			.encrypt	= ablk_encrypt,
-			.decrypt	= ablk_encrypt,
-			.geniv		= "chainiv",
-		},
-	},
-}, {
-	.cra_name		= "lrw(serpent)",
-	.cra_driver_name	= "lrw-serpent-sse2",
-	.cra_priority		= 400,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= SERPENT_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct async_serpent_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(serpent_algs[8].cra_list),
-	.cra_init		= ablk_init,
-	.cra_exit		= ablk_exit,
-	.cra_u = {
-		.ablkcipher = {
-			.min_keysize	= SERPENT_MIN_KEY_SIZE +
-					  SERPENT_BLOCK_SIZE,
-			.max_keysize	= SERPENT_MAX_KEY_SIZE +
-					  SERPENT_BLOCK_SIZE,
-			.ivsize		= SERPENT_BLOCK_SIZE,
-			.setkey		= ablk_set_key,
-			.encrypt	= ablk_encrypt,
-			.decrypt	= ablk_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "xts(serpent)",
-	.cra_driver_name	= "xts-serpent-sse2",
-	.cra_priority		= 400,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= SERPENT_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct async_serpent_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(serpent_algs[9].cra_list),
-	.cra_init		= ablk_init,
-	.cra_exit		= ablk_exit,
-	.cra_u = {
-		.ablkcipher = {
-			.min_keysize	= SERPENT_MIN_KEY_SIZE * 2,
-			.max_keysize	= SERPENT_MAX_KEY_SIZE * 2,
-			.ivsize		= SERPENT_BLOCK_SIZE,
-			.setkey		= ablk_set_key,
-			.encrypt	= ablk_encrypt,
-			.decrypt	= ablk_decrypt,
-		},
-	},
-} };
-
-static int __init serpent_sse2_init(void)
-{
-	if (!cpu_has_xmm2) {
-		printk(KERN_INFO "SSE2 instructions are not detected.\n");
-		return -ENODEV;
-	}
-
-	return crypto_register_algs(serpent_algs, ARRAY_SIZE(serpent_algs));
-}
-
-static void __exit serpent_sse2_exit(void)
-{
-	crypto_unregister_algs(serpent_algs, ARRAY_SIZE(serpent_algs));
-}
-
-module_init(serpent_sse2_init);
-module_exit(serpent_sse2_exit);
-
-MODULE_DESCRIPTION("Serpent Cipher Algorithm, SSE2 optimized");
-MODULE_LICENSE("GPL");
-MODULE_ALIAS("serpent");
diff --git a/ANDROID_3.4.5/arch/x86/crypto/sha1_ssse3_asm.S b/ANDROID_3.4.5/arch/x86/crypto/sha1_ssse3_asm.S
deleted file mode 100644
index b2c2f57d..00000000
--- a/ANDROID_3.4.5/arch/x86/crypto/sha1_ssse3_asm.S
+++ /dev/null
@@ -1,558 +0,0 @@
-/*
- * This is a SIMD SHA-1 implementation. It requires the Intel(R) Supplemental
- * SSE3 instruction set extensions introduced in Intel Core Microarchitecture
- * processors. CPUs supporting Intel(R) AVX extensions will get an additional
- * boost.
- *
- * This work was inspired by the vectorized implementation of Dean Gaudet.
- * Additional information on it can be found at:
- *    http://www.arctic.org/~dean/crypto/sha1.html
- *
- * It was improved upon with more efficient vectorization of the message
- * scheduling. This implementation has also been optimized for all current and
- * several future generations of Intel CPUs.
- *
- * See this article for more information about the implementation details:
- *   http://software.intel.com/en-us/articles/improving-the-performance-of-the-secure-hash-algorithm-1/
- *
- * Copyright (C) 2010, Intel Corp.
- *   Authors: Maxim Locktyukhin <maxim.locktyukhin@intel.com>
- *            Ronen Zohar <ronen.zohar@intel.com>
- *
- * Converted to AT&T syntax and adapted for inclusion in the Linux kernel:
- *   Author: Mathias Krause <minipli@googlemail.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- */
-
-#define CTX	%rdi	// arg1
-#define BUF	%rsi	// arg2
-#define CNT	%rdx	// arg3
-
-#define REG_A	%ecx
-#define REG_B	%esi
-#define REG_C	%edi
-#define REG_D	%ebp
-#define REG_E	%edx
-
-#define REG_T1	%eax
-#define REG_T2	%ebx
-
-#define K_BASE		%r8
-#define HASH_PTR	%r9
-#define BUFFER_PTR	%r10
-#define BUFFER_END	%r11
-
-#define W_TMP1	%xmm0
-#define W_TMP2	%xmm9
-
-#define W0	%xmm1
-#define W4	%xmm2
-#define W8	%xmm3
-#define W12	%xmm4
-#define W16	%xmm5
-#define W20	%xmm6
-#define W24	%xmm7
-#define W28	%xmm8
-
-#define XMM_SHUFB_BSWAP	%xmm10
-
-/* we keep window of 64 w[i]+K pre-calculated values in a circular buffer */
-#define WK(t)	(((t) & 15) * 4)(%rsp)
-#define W_PRECALC_AHEAD	16
-
-/*
- * This macro implements the SHA-1 function's body for single 64-byte block
- * param: function's name
- */
-.macro SHA1_VECTOR_ASM  name
-	.global	\name
-	.type	\name, @function
-	.align 32
-\name:
-	push	%rbx
-	push	%rbp
-	push	%r12
-
-	mov	%rsp, %r12
-	sub	$64, %rsp		# allocate workspace
-	and	$~15, %rsp		# align stack
-
-	mov	CTX, HASH_PTR
-	mov	BUF, BUFFER_PTR
-
-	shl	$6, CNT			# multiply by 64
-	add	BUF, CNT
-	mov	CNT, BUFFER_END
-
-	lea	K_XMM_AR(%rip), K_BASE
-	xmm_mov	BSWAP_SHUFB_CTL(%rip), XMM_SHUFB_BSWAP
-
-	SHA1_PIPELINED_MAIN_BODY
-
-	# cleanup workspace
-	mov	$8, %ecx
-	mov	%rsp, %rdi
-	xor	%rax, %rax
-	rep stosq
-
-	mov	%r12, %rsp		# deallocate workspace
-
-	pop	%r12
-	pop	%rbp
-	pop	%rbx
-	ret
-
-	.size	\name, .-\name
-.endm
-
-/*
- * This macro implements 80 rounds of SHA-1 for one 64-byte block
- */
-.macro SHA1_PIPELINED_MAIN_BODY
-	INIT_REGALLOC
-
-	mov	  (HASH_PTR), A
-	mov	 4(HASH_PTR), B
-	mov	 8(HASH_PTR), C
-	mov	12(HASH_PTR), D
-	mov	16(HASH_PTR), E
-
-  .set i, 0
-  .rept W_PRECALC_AHEAD
-	W_PRECALC i
-    .set i, (i+1)
-  .endr
-
-.align 4
-1:
-	RR F1,A,B,C,D,E,0
-	RR F1,D,E,A,B,C,2
-	RR F1,B,C,D,E,A,4
-	RR F1,E,A,B,C,D,6
-	RR F1,C,D,E,A,B,8
-
-	RR F1,A,B,C,D,E,10
-	RR F1,D,E,A,B,C,12
-	RR F1,B,C,D,E,A,14
-	RR F1,E,A,B,C,D,16
-	RR F1,C,D,E,A,B,18
-
-	RR F2,A,B,C,D,E,20
-	RR F2,D,E,A,B,C,22
-	RR F2,B,C,D,E,A,24
-	RR F2,E,A,B,C,D,26
-	RR F2,C,D,E,A,B,28
-
-	RR F2,A,B,C,D,E,30
-	RR F2,D,E,A,B,C,32
-	RR F2,B,C,D,E,A,34
-	RR F2,E,A,B,C,D,36
-	RR F2,C,D,E,A,B,38
-
-	RR F3,A,B,C,D,E,40
-	RR F3,D,E,A,B,C,42
-	RR F3,B,C,D,E,A,44
-	RR F3,E,A,B,C,D,46
-	RR F3,C,D,E,A,B,48
-
-	RR F3,A,B,C,D,E,50
-	RR F3,D,E,A,B,C,52
-	RR F3,B,C,D,E,A,54
-	RR F3,E,A,B,C,D,56
-	RR F3,C,D,E,A,B,58
-
-	add	$64, BUFFER_PTR		# move to the next 64-byte block
-	cmp	BUFFER_END, BUFFER_PTR	# if the current is the last one use
-	cmovae	K_BASE, BUFFER_PTR	# dummy source to avoid buffer overrun
-
-	RR F4,A,B,C,D,E,60
-	RR F4,D,E,A,B,C,62
-	RR F4,B,C,D,E,A,64
-	RR F4,E,A,B,C,D,66
-	RR F4,C,D,E,A,B,68
-
-	RR F4,A,B,C,D,E,70
-	RR F4,D,E,A,B,C,72
-	RR F4,B,C,D,E,A,74
-	RR F4,E,A,B,C,D,76
-	RR F4,C,D,E,A,B,78
-
-	UPDATE_HASH   (HASH_PTR), A
-	UPDATE_HASH  4(HASH_PTR), B
-	UPDATE_HASH  8(HASH_PTR), C
-	UPDATE_HASH 12(HASH_PTR), D
-	UPDATE_HASH 16(HASH_PTR), E
-
-	RESTORE_RENAMED_REGS
-	cmp	K_BASE, BUFFER_PTR	# K_BASE means, we reached the end
-	jne	1b
-.endm
-
-.macro INIT_REGALLOC
-  .set A, REG_A
-  .set B, REG_B
-  .set C, REG_C
-  .set D, REG_D
-  .set E, REG_E
-  .set T1, REG_T1
-  .set T2, REG_T2
-.endm
-
-.macro RESTORE_RENAMED_REGS
-	# order is important (REG_C is where it should be)
-	mov	B, REG_B
-	mov	D, REG_D
-	mov	A, REG_A
-	mov	E, REG_E
-.endm
-
-.macro SWAP_REG_NAMES  a, b
-  .set _T, \a
-  .set \a, \b
-  .set \b, _T
-.endm
-
-.macro F1  b, c, d
-	mov	\c, T1
-	SWAP_REG_NAMES \c, T1
-	xor	\d, T1
-	and	\b, T1
-	xor	\d, T1
-.endm
-
-.macro F2  b, c, d
-	mov	\d, T1
-	SWAP_REG_NAMES \d, T1
-	xor	\c, T1
-	xor	\b, T1
-.endm
-
-.macro F3  b, c ,d
-	mov	\c, T1
-	SWAP_REG_NAMES \c, T1
-	mov	\b, T2
-	or	\b, T1
-	and	\c, T2
-	and	\d, T1
-	or	T2, T1
-.endm
-
-.macro F4  b, c, d
-	F2 \b, \c, \d
-.endm
-
-.macro UPDATE_HASH  hash, val
-	add	\hash, \val
-	mov	\val, \hash
-.endm
-
-/*
- * RR does two rounds of SHA-1 back to back with W[] pre-calc
- *   t1 = F(b, c, d);   e += w(i)
- *   e += t1;           b <<= 30;   d  += w(i+1);
- *   t1 = F(a, b, c);
- *   d += t1;           a <<= 5;
- *   e += a;
- *   t1 = e;            a >>= 7;
- *   t1 <<= 5;
- *   d += t1;
- */
-.macro RR  F, a, b, c, d, e, round
-	add	WK(\round), \e
-	\F   \b, \c, \d		# t1 = F(b, c, d);
-	W_PRECALC (\round + W_PRECALC_AHEAD)
-	rol	$30, \b
-	add	T1, \e
-	add	WK(\round + 1), \d
-
-	\F   \a, \b, \c
-	W_PRECALC (\round + W_PRECALC_AHEAD + 1)
-	rol	$5, \a
-	add	\a, \e
-	add	T1, \d
-	ror	$7, \a		# (a <<r 5) >>r 7) => a <<r 30)
-
-	mov	\e, T1
-	SWAP_REG_NAMES \e, T1
-
-	rol	$5, T1
-	add	T1, \d
-
-	# write:  \a, \b
-	# rotate: \a<=\d, \b<=\e, \c<=\a, \d<=\b, \e<=\c
-.endm
-
-.macro W_PRECALC  r
-  .set i, \r
-
-  .if (i < 20)
-    .set K_XMM, 0
-  .elseif (i < 40)
-    .set K_XMM, 16
-  .elseif (i < 60)
-    .set K_XMM, 32
-  .elseif (i < 80)
-    .set K_XMM, 48
-  .endif
-
-  .if ((i < 16) || ((i >= 80) && (i < (80 + W_PRECALC_AHEAD))))
-    .set i, ((\r) % 80)	    # pre-compute for the next iteration
-    .if (i == 0)
-	W_PRECALC_RESET
-    .endif
-	W_PRECALC_00_15
-  .elseif (i<32)
-	W_PRECALC_16_31
-  .elseif (i < 80)   // rounds 32-79
-	W_PRECALC_32_79
-  .endif
-.endm
-
-.macro W_PRECALC_RESET
-  .set W,          W0
-  .set W_minus_04, W4
-  .set W_minus_08, W8
-  .set W_minus_12, W12
-  .set W_minus_16, W16
-  .set W_minus_20, W20
-  .set W_minus_24, W24
-  .set W_minus_28, W28
-  .set W_minus_32, W
-.endm
-
-.macro W_PRECALC_ROTATE
-  .set W_minus_32, W_minus_28
-  .set W_minus_28, W_minus_24
-  .set W_minus_24, W_minus_20
-  .set W_minus_20, W_minus_16
-  .set W_minus_16, W_minus_12
-  .set W_minus_12, W_minus_08
-  .set W_minus_08, W_minus_04
-  .set W_minus_04, W
-  .set W,          W_minus_32
-.endm
-
-.macro W_PRECALC_SSSE3
-
-.macro W_PRECALC_00_15
-	W_PRECALC_00_15_SSSE3
-.endm
-.macro W_PRECALC_16_31
-	W_PRECALC_16_31_SSSE3
-.endm
-.macro W_PRECALC_32_79
-	W_PRECALC_32_79_SSSE3
-.endm
-
-/* message scheduling pre-compute for rounds 0-15 */
-.macro W_PRECALC_00_15_SSSE3
-  .if ((i & 3) == 0)
-	movdqu	(i*4)(BUFFER_PTR), W_TMP1
-  .elseif ((i & 3) == 1)
-	pshufb	XMM_SHUFB_BSWAP, W_TMP1
-	movdqa	W_TMP1, W
-  .elseif ((i & 3) == 2)
-	paddd	(K_BASE), W_TMP1
-  .elseif ((i & 3) == 3)
-	movdqa  W_TMP1, WK(i&~3)
-	W_PRECALC_ROTATE
-  .endif
-.endm
-
-/* message scheduling pre-compute for rounds 16-31
- *
- * - calculating last 32 w[i] values in 8 XMM registers
- * - pre-calculate K+w[i] values and store to mem, for later load by ALU add
- *   instruction
- *
- * some "heavy-lifting" vectorization for rounds 16-31 due to w[i]->w[i-3]
- * dependency, but improves for 32-79
- */
-.macro W_PRECALC_16_31_SSSE3
-  # blended scheduling of vector and scalar instruction streams, one 4-wide
-  # vector iteration / 4 scalar rounds
-  .if ((i & 3) == 0)
-	movdqa	W_minus_12, W
-	palignr	$8, W_minus_16, W	# w[i-14]
-	movdqa	W_minus_04, W_TMP1
-	psrldq	$4, W_TMP1		# w[i-3]
-	pxor	W_minus_08, W
-  .elseif ((i & 3) == 1)
-	pxor	W_minus_16, W_TMP1
-	pxor	W_TMP1, W
-	movdqa	W, W_TMP2
-	movdqa	W, W_TMP1
-	pslldq	$12, W_TMP2
-  .elseif ((i & 3) == 2)
-	psrld	$31, W
-	pslld	$1, W_TMP1
-	por	W, W_TMP1
-	movdqa	W_TMP2, W
-	psrld	$30, W_TMP2
-	pslld	$2, W
-  .elseif ((i & 3) == 3)
-	pxor	W, W_TMP1
-	pxor	W_TMP2, W_TMP1
-	movdqa	W_TMP1, W
-	paddd	K_XMM(K_BASE), W_TMP1
-	movdqa	W_TMP1, WK(i&~3)
-	W_PRECALC_ROTATE
-  .endif
-.endm
-
-/* message scheduling pre-compute for rounds 32-79
- *
- * in SHA-1 specification: w[i] = (w[i-3] ^ w[i-8]  ^ w[i-14] ^ w[i-16]) rol 1
- * instead we do equal:    w[i] = (w[i-6] ^ w[i-16] ^ w[i-28] ^ w[i-32]) rol 2
- * allows more efficient vectorization since w[i]=>w[i-3] dependency is broken
- */
-.macro W_PRECALC_32_79_SSSE3
-  .if ((i & 3) == 0)
-	movdqa	W_minus_04, W_TMP1
-	pxor	W_minus_28, W		# W is W_minus_32 before xor
-	palignr	$8, W_minus_08, W_TMP1
-  .elseif ((i & 3) == 1)
-	pxor	W_minus_16, W
-	pxor	W_TMP1, W
-	movdqa	W, W_TMP1
-  .elseif ((i & 3) == 2)
-	psrld	$30, W
-	pslld	$2, W_TMP1
-	por	W, W_TMP1
-  .elseif ((i & 3) == 3)
-	movdqa	W_TMP1, W
-	paddd	K_XMM(K_BASE), W_TMP1
-	movdqa	W_TMP1, WK(i&~3)
-	W_PRECALC_ROTATE
-  .endif
-.endm
-
-.endm		// W_PRECALC_SSSE3
-
-
-#define K1	0x5a827999
-#define K2	0x6ed9eba1
-#define K3	0x8f1bbcdc
-#define K4	0xca62c1d6
-
-.section .rodata
-.align 16
-
-K_XMM_AR:
-	.long K1, K1, K1, K1
-	.long K2, K2, K2, K2
-	.long K3, K3, K3, K3
-	.long K4, K4, K4, K4
-
-BSWAP_SHUFB_CTL:
-	.long 0x00010203
-	.long 0x04050607
-	.long 0x08090a0b
-	.long 0x0c0d0e0f
-
-
-.section .text
-
-W_PRECALC_SSSE3
-.macro xmm_mov a, b
-	movdqu	\a,\b
-.endm
-
-/* SSSE3 optimized implementation:
- *  extern "C" void sha1_transform_ssse3(u32 *digest, const char *data, u32 *ws,
- *                                       unsigned int rounds);
- */
-SHA1_VECTOR_ASM     sha1_transform_ssse3
-
-#ifdef SHA1_ENABLE_AVX_SUPPORT
-
-.macro W_PRECALC_AVX
-
-.purgem W_PRECALC_00_15
-.macro  W_PRECALC_00_15
-    W_PRECALC_00_15_AVX
-.endm
-.purgem W_PRECALC_16_31
-.macro  W_PRECALC_16_31
-    W_PRECALC_16_31_AVX
-.endm
-.purgem W_PRECALC_32_79
-.macro  W_PRECALC_32_79
-    W_PRECALC_32_79_AVX
-.endm
-
-.macro W_PRECALC_00_15_AVX
-  .if ((i & 3) == 0)
-	vmovdqu	(i*4)(BUFFER_PTR), W_TMP1
-  .elseif ((i & 3) == 1)
-	vpshufb	XMM_SHUFB_BSWAP, W_TMP1, W
-  .elseif ((i & 3) == 2)
-	vpaddd	(K_BASE), W, W_TMP1
-  .elseif ((i & 3) == 3)
-	vmovdqa	W_TMP1, WK(i&~3)
-	W_PRECALC_ROTATE
-  .endif
-.endm
-
-.macro W_PRECALC_16_31_AVX
-  .if ((i & 3) == 0)
-	vpalignr $8, W_minus_16, W_minus_12, W	# w[i-14]
-	vpsrldq	$4, W_minus_04, W_TMP1		# w[i-3]
-	vpxor	W_minus_08, W, W
-	vpxor	W_minus_16, W_TMP1, W_TMP1
-  .elseif ((i & 3) == 1)
-	vpxor	W_TMP1, W, W
-	vpslldq	$12, W, W_TMP2
-	vpslld	$1, W, W_TMP1
-  .elseif ((i & 3) == 2)
-	vpsrld	$31, W, W
-	vpor	W, W_TMP1, W_TMP1
-	vpslld	$2, W_TMP2, W
-	vpsrld	$30, W_TMP2, W_TMP2
-  .elseif ((i & 3) == 3)
-	vpxor	W, W_TMP1, W_TMP1
-	vpxor	W_TMP2, W_TMP1, W
-	vpaddd	K_XMM(K_BASE), W, W_TMP1
-	vmovdqu	W_TMP1, WK(i&~3)
-	W_PRECALC_ROTATE
-  .endif
-.endm
-
-.macro W_PRECALC_32_79_AVX
-  .if ((i & 3) == 0)
-	vpalignr $8, W_minus_08, W_minus_04, W_TMP1
-	vpxor	W_minus_28, W, W		# W is W_minus_32 before xor
-  .elseif ((i & 3) == 1)
-	vpxor	W_minus_16, W_TMP1, W_TMP1
-	vpxor	W_TMP1, W, W
-  .elseif ((i & 3) == 2)
-	vpslld	$2, W, W_TMP1
-	vpsrld	$30, W, W
-	vpor	W, W_TMP1, W
-  .elseif ((i & 3) == 3)
-	vpaddd	K_XMM(K_BASE), W, W_TMP1
-	vmovdqu	W_TMP1, WK(i&~3)
-	W_PRECALC_ROTATE
-  .endif
-.endm
-
-.endm    // W_PRECALC_AVX
-
-W_PRECALC_AVX
-.purgem xmm_mov
-.macro xmm_mov a, b
-	vmovdqu	\a,\b
-.endm
-
-
-/* AVX optimized implementation:
- *  extern "C" void sha1_transform_avx(u32 *digest, const char *data, u32 *ws,
- *                                     unsigned int rounds);
- */
-SHA1_VECTOR_ASM     sha1_transform_avx
-
-#endif
diff --git a/ANDROID_3.4.5/arch/x86/crypto/sha1_ssse3_glue.c b/ANDROID_3.4.5/arch/x86/crypto/sha1_ssse3_glue.c
deleted file mode 100644
index f916499d..00000000
--- a/ANDROID_3.4.5/arch/x86/crypto/sha1_ssse3_glue.c
+++ /dev/null
@@ -1,240 +0,0 @@
-/*
- * Cryptographic API.
- *
- * Glue code for the SHA1 Secure Hash Algorithm assembler implementation using
- * Supplemental SSE3 instructions.
- *
- * This file is based on sha1_generic.c
- *
- * Copyright (c) Alan Smithee.
- * Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk>
- * Copyright (c) Jean-Francois Dive <jef@linuxbe.org>
- * Copyright (c) Mathias Krause <minipli@googlemail.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
- *
- */
-
-#define pr_fmt(fmt)	KBUILD_MODNAME ": " fmt
-
-#include <crypto/internal/hash.h>
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/mm.h>
-#include <linux/cryptohash.h>
-#include <linux/types.h>
-#include <crypto/sha.h>
-#include <asm/byteorder.h>
-#include <asm/i387.h>
-#include <asm/xcr.h>
-#include <asm/xsave.h>
-
-
-asmlinkage void sha1_transform_ssse3(u32 *digest, const char *data,
-				     unsigned int rounds);
-#ifdef SHA1_ENABLE_AVX_SUPPORT
-asmlinkage void sha1_transform_avx(u32 *digest, const char *data,
-				   unsigned int rounds);
-#endif
-
-static asmlinkage void (*sha1_transform_asm)(u32 *, const char *, unsigned int);
-
-
-static int sha1_ssse3_init(struct shash_desc *desc)
-{
-	struct sha1_state *sctx = shash_desc_ctx(desc);
-
-	*sctx = (struct sha1_state){
-		.state = { SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 },
-	};
-
-	return 0;
-}
-
-static int __sha1_ssse3_update(struct shash_desc *desc, const u8 *data,
-			       unsigned int len, unsigned int partial)
-{
-	struct sha1_state *sctx = shash_desc_ctx(desc);
-	unsigned int done = 0;
-
-	sctx->count += len;
-
-	if (partial) {
-		done = SHA1_BLOCK_SIZE - partial;
-		memcpy(sctx->buffer + partial, data, done);
-		sha1_transform_asm(sctx->state, sctx->buffer, 1);
-	}
-
-	if (len - done >= SHA1_BLOCK_SIZE) {
-		const unsigned int rounds = (len - done) / SHA1_BLOCK_SIZE;
-
-		sha1_transform_asm(sctx->state, data + done, rounds);
-		done += rounds * SHA1_BLOCK_SIZE;
-	}
-
-	memcpy(sctx->buffer, data + done, len - done);
-
-	return 0;
-}
-
-static int sha1_ssse3_update(struct shash_desc *desc, const u8 *data,
-			     unsigned int len)
-{
-	struct sha1_state *sctx = shash_desc_ctx(desc);
-	unsigned int partial = sctx->count % SHA1_BLOCK_SIZE;
-	int res;
-
-	/* Handle the fast case right here */
-	if (partial + len < SHA1_BLOCK_SIZE) {
-		sctx->count += len;
-		memcpy(sctx->buffer + partial, data, len);
-
-		return 0;
-	}
-
-	if (!irq_fpu_usable()) {
-		res = crypto_sha1_update(desc, data, len);
-	} else {
-		kernel_fpu_begin();
-		res = __sha1_ssse3_update(desc, data, len, partial);
-		kernel_fpu_end();
-	}
-
-	return res;
-}
-
-
-/* Add padding and return the message digest. */
-static int sha1_ssse3_final(struct shash_desc *desc, u8 *out)
-{
-	struct sha1_state *sctx = shash_desc_ctx(desc);
-	unsigned int i, index, padlen;
-	__be32 *dst = (__be32 *)out;
-	__be64 bits;
-	static const u8 padding[SHA1_BLOCK_SIZE] = { 0x80, };
-
-	bits = cpu_to_be64(sctx->count << 3);
-
-	/* Pad out to 56 mod 64 and append length */
-	index = sctx->count % SHA1_BLOCK_SIZE;
-	padlen = (index < 56) ? (56 - index) : ((SHA1_BLOCK_SIZE+56) - index);
-	if (!irq_fpu_usable()) {
-		crypto_sha1_update(desc, padding, padlen);
-		crypto_sha1_update(desc, (const u8 *)&bits, sizeof(bits));
-	} else {
-		kernel_fpu_begin();
-		/* We need to fill a whole block for __sha1_ssse3_update() */
-		if (padlen <= 56) {
-			sctx->count += padlen;
-			memcpy(sctx->buffer + index, padding, padlen);
-		} else {
-			__sha1_ssse3_update(desc, padding, padlen, index);
-		}
-		__sha1_ssse3_update(desc, (const u8 *)&bits, sizeof(bits), 56);
-		kernel_fpu_end();
-	}
-
-	/* Store state in digest */
-	for (i = 0; i < 5; i++)
-		dst[i] = cpu_to_be32(sctx->state[i]);
-
-	/* Wipe context */
-	memset(sctx, 0, sizeof(*sctx));
-
-	return 0;
-}
-
-static int sha1_ssse3_export(struct shash_desc *desc, void *out)
-{
-	struct sha1_state *sctx = shash_desc_ctx(desc);
-
-	memcpy(out, sctx, sizeof(*sctx));
-
-	return 0;
-}
-
-static int sha1_ssse3_import(struct shash_desc *desc, const void *in)
-{
-	struct sha1_state *sctx = shash_desc_ctx(desc);
-
-	memcpy(sctx, in, sizeof(*sctx));
-
-	return 0;
-}
-
-static struct shash_alg alg = {
-	.digestsize	=	SHA1_DIGEST_SIZE,
-	.init		=	sha1_ssse3_init,
-	.update		=	sha1_ssse3_update,
-	.final		=	sha1_ssse3_final,
-	.export		=	sha1_ssse3_export,
-	.import		=	sha1_ssse3_import,
-	.descsize	=	sizeof(struct sha1_state),
-	.statesize	=	sizeof(struct sha1_state),
-	.base		=	{
-		.cra_name	=	"sha1",
-		.cra_driver_name=	"sha1-ssse3",
-		.cra_priority	=	150,
-		.cra_flags	=	CRYPTO_ALG_TYPE_SHASH,
-		.cra_blocksize	=	SHA1_BLOCK_SIZE,
-		.cra_module	=	THIS_MODULE,
-	}
-};
-
-#ifdef SHA1_ENABLE_AVX_SUPPORT
-static bool __init avx_usable(void)
-{
-	u64 xcr0;
-
-	if (!cpu_has_avx || !cpu_has_osxsave)
-		return false;
-
-	xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
-	if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) {
-		pr_info("AVX detected but unusable.\n");
-
-		return false;
-	}
-
-	return true;
-}
-#endif
-
-static int __init sha1_ssse3_mod_init(void)
-{
-	/* test for SSSE3 first */
-	if (cpu_has_ssse3)
-		sha1_transform_asm = sha1_transform_ssse3;
-
-#ifdef SHA1_ENABLE_AVX_SUPPORT
-	/* allow AVX to override SSSE3, it's a little faster */
-	if (avx_usable())
-		sha1_transform_asm = sha1_transform_avx;
-#endif
-
-	if (sha1_transform_asm) {
-		pr_info("Using %s optimized SHA-1 implementation\n",
-		        sha1_transform_asm == sha1_transform_ssse3 ? "SSSE3"
-		                                                   : "AVX");
-		return crypto_register_shash(&alg);
-	}
-	pr_info("Neither AVX nor SSSE3 is available/usable.\n");
-
-	return -ENODEV;
-}
-
-static void __exit sha1_ssse3_mod_fini(void)
-{
-	crypto_unregister_shash(&alg);
-}
-
-module_init(sha1_ssse3_mod_init);
-module_exit(sha1_ssse3_mod_fini);
-
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("SHA1 Secure Hash Algorithm, Supplemental SSE3 accelerated");
-
-MODULE_ALIAS("sha1");
diff --git a/ANDROID_3.4.5/arch/x86/crypto/twofish-i586-asm_32.S b/ANDROID_3.4.5/arch/x86/crypto/twofish-i586-asm_32.S
deleted file mode 100644
index 658af4bb..00000000
--- a/ANDROID_3.4.5/arch/x86/crypto/twofish-i586-asm_32.S
+++ /dev/null
@@ -1,335 +0,0 @@
-/***************************************************************************
-*   Copyright (C) 2006 by Joachim Fritschi, <jfritschi@freenet.de>        *
-*                                                                         *
-*   This program is free software; you can redistribute it and/or modify  *
-*   it under the terms of the GNU General Public License as published by  *
-*   the Free Software Foundation; either version 2 of the License, or     *
-*   (at your option) any later version.                                   *
-*                                                                         *
-*   This program is distributed in the hope that it will be useful,       *
-*   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
-*   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
-*   GNU General Public License for more details.                          *
-*                                                                         *
-*   You should have received a copy of the GNU General Public License     *
-*   along with this program; if not, write to the                         *
-*   Free Software Foundation, Inc.,                                       *
-*   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.             *
-***************************************************************************/
-
-.file "twofish-i586-asm.S"
-.text
-
-#include <asm/asm-offsets.h>
-
-/* return address at 0 */
-
-#define in_blk    12  /* input byte array address parameter*/
-#define out_blk   8  /* output byte array address parameter*/
-#define ctx       4  /* Twofish context structure */
-
-#define a_offset	0
-#define b_offset	4
-#define c_offset	8
-#define d_offset	12
-
-/* Structure of the crypto context struct*/
-
-#define s0	0	/* S0 Array 256 Words each */
-#define s1	1024	/* S1 Array */
-#define s2	2048	/* S2 Array */
-#define s3	3072	/* S3 Array */
-#define w	4096	/* 8 whitening keys (word) */
-#define k	4128	/* key 1-32 ( word ) */
-
-/* define a few register aliases to allow macro substitution */
-
-#define R0D    %eax
-#define R0B    %al
-#define R0H    %ah
-
-#define R1D    %ebx
-#define R1B    %bl
-#define R1H    %bh
-
-#define R2D    %ecx
-#define R2B    %cl
-#define R2H    %ch
-
-#define R3D    %edx
-#define R3B    %dl
-#define R3H    %dh
-
-
-/* performs input whitening */
-#define input_whitening(src,context,offset)\
-	xor	w+offset(context),	src;
-
-/* performs input whitening */
-#define output_whitening(src,context,offset)\
-	xor	w+16+offset(context),	src;
-
-/*
- * a input register containing a (rotated 16)
- * b input register containing b
- * c input register containing c
- * d input register containing d (already rol $1)
- * operations on a and b are interleaved to increase performance
- */
-#define encrypt_round(a,b,c,d,round)\
-	push	d ## D;\
-	movzx	b ## B,		%edi;\
-	mov	s1(%ebp,%edi,4),d ## D;\
-	movzx	a ## B,		%edi;\
-	mov	s2(%ebp,%edi,4),%esi;\
-	movzx	b ## H,		%edi;\
-	ror	$16,		b ## D;\
-	xor	s2(%ebp,%edi,4),d ## D;\
-	movzx	a ## H,		%edi;\
-	ror	$16,		a ## D;\
-	xor	s3(%ebp,%edi,4),%esi;\
-	movzx	b ## B,		%edi;\
-	xor	s3(%ebp,%edi,4),d ## D;\
-	movzx	a ## B,		%edi;\
-	xor	(%ebp,%edi,4),	%esi;\
-	movzx	b ## H,		%edi;\
-	ror	$15,		b ## D;\
-	xor	(%ebp,%edi,4),	d ## D;\
-	movzx	a ## H,		%edi;\
-	xor	s1(%ebp,%edi,4),%esi;\
-	pop	%edi;\
-	add	d ## D,		%esi;\
-	add	%esi,		d ## D;\
-	add	k+round(%ebp),	%esi;\
-	xor	%esi,		c ## D;\
-	rol	$15,		c ## D;\
-	add	k+4+round(%ebp),d ## D;\
-	xor	%edi,		d ## D;
-
-/*
- * a input register containing a (rotated 16)
- * b input register containing b
- * c input register containing c
- * d input register containing d (already rol $1)
- * operations on a and b are interleaved to increase performance
- * last round has different rotations for the output preparation
- */
-#define encrypt_last_round(a,b,c,d,round)\
-	push	d ## D;\
-	movzx	b ## B,		%edi;\
-	mov	s1(%ebp,%edi,4),d ## D;\
-	movzx	a ## B,		%edi;\
-	mov	s2(%ebp,%edi,4),%esi;\
-	movzx	b ## H,		%edi;\
-	ror	$16,		b ## D;\
-	xor	s2(%ebp,%edi,4),d ## D;\
-	movzx	a ## H,		%edi;\
-	ror	$16,		a ## D;\
-	xor	s3(%ebp,%edi,4),%esi;\
-	movzx	b ## B,		%edi;\
-	xor	s3(%ebp,%edi,4),d ## D;\
-	movzx	a ## B,		%edi;\
-	xor	(%ebp,%edi,4),	%esi;\
-	movzx	b ## H,		%edi;\
-	ror	$16,		b ## D;\
-	xor	(%ebp,%edi,4),	d ## D;\
-	movzx	a ## H,		%edi;\
-	xor	s1(%ebp,%edi,4),%esi;\
-	pop	%edi;\
-	add	d ## D,		%esi;\
-	add	%esi,		d ## D;\
-	add	k+round(%ebp),	%esi;\
-	xor	%esi,		c ## D;\
-	ror	$1,		c ## D;\
-	add	k+4+round(%ebp),d ## D;\
-	xor	%edi,		d ## D;
-
-/*
- * a input register containing a
- * b input register containing b (rotated 16)
- * c input register containing c
- * d input register containing d (already rol $1)
- * operations on a and b are interleaved to increase performance
- */
-#define decrypt_round(a,b,c,d,round)\
-	push	c ## D;\
-	movzx	a ## B,		%edi;\
-	mov	(%ebp,%edi,4),	c ## D;\
-	movzx	b ## B,		%edi;\
-	mov	s3(%ebp,%edi,4),%esi;\
-	movzx	a ## H,		%edi;\
-	ror	$16,		a ## D;\
-	xor	s1(%ebp,%edi,4),c ## D;\
-	movzx	b ## H,		%edi;\
-	ror	$16,		b ## D;\
-	xor	(%ebp,%edi,4),	%esi;\
-	movzx	a ## B,		%edi;\
-	xor	s2(%ebp,%edi,4),c ## D;\
-	movzx	b ## B,		%edi;\
-	xor	s1(%ebp,%edi,4),%esi;\
-	movzx	a ## H,		%edi;\
-	ror	$15,		a ## D;\
-	xor	s3(%ebp,%edi,4),c ## D;\
-	movzx	b ## H,		%edi;\
-	xor	s2(%ebp,%edi,4),%esi;\
-	pop	%edi;\
-	add	%esi,		c ## D;\
-	add	c ## D,		%esi;\
-	add	k+round(%ebp),	c ## D;\
-	xor	%edi,		c ## D;\
-	add	k+4+round(%ebp),%esi;\
-	xor	%esi,		d ## D;\
-	rol	$15,		d ## D;
-
-/*
- * a input register containing a
- * b input register containing b (rotated 16)
- * c input register containing c
- * d input register containing d (already rol $1)
- * operations on a and b are interleaved to increase performance
- * last round has different rotations for the output preparation
- */
-#define decrypt_last_round(a,b,c,d,round)\
-	push	c ## D;\
-	movzx	a ## B,		%edi;\
-	mov	(%ebp,%edi,4),	c ## D;\
-	movzx	b ## B,		%edi;\
-	mov	s3(%ebp,%edi,4),%esi;\
-	movzx	a ## H,		%edi;\
-	ror	$16,		a ## D;\
-	xor	s1(%ebp,%edi,4),c ## D;\
-	movzx	b ## H,		%edi;\
-	ror	$16,		b ## D;\
-	xor	(%ebp,%edi,4),	%esi;\
-	movzx	a ## B,		%edi;\
-	xor	s2(%ebp,%edi,4),c ## D;\
-	movzx	b ## B,		%edi;\
-	xor	s1(%ebp,%edi,4),%esi;\
-	movzx	a ## H,		%edi;\
-	ror	$16,		a ## D;\
-	xor	s3(%ebp,%edi,4),c ## D;\
-	movzx	b ## H,		%edi;\
-	xor	s2(%ebp,%edi,4),%esi;\
-	pop	%edi;\
-	add	%esi,		c ## D;\
-	add	c ## D,		%esi;\
-	add	k+round(%ebp),	c ## D;\
-	xor	%edi,		c ## D;\
-	add	k+4+round(%ebp),%esi;\
-	xor	%esi,		d ## D;\
-	ror	$1,		d ## D;
-
-.align 4
-.global twofish_enc_blk
-.global twofish_dec_blk
-
-twofish_enc_blk:
-	push	%ebp			/* save registers according to calling convention*/
-	push    %ebx
-	push    %esi
-	push    %edi
-
-	mov	ctx + 16(%esp),	%ebp	/* abuse the base pointer: set new base
-					 * pointer to the ctx address */
-	mov     in_blk+16(%esp),%edi	/* input address in edi */
-
-	mov	(%edi),		%eax
-	mov	b_offset(%edi),	%ebx
-	mov	c_offset(%edi),	%ecx
-	mov	d_offset(%edi),	%edx
-	input_whitening(%eax,%ebp,a_offset)
-	ror	$16,	%eax
-	input_whitening(%ebx,%ebp,b_offset)
-	input_whitening(%ecx,%ebp,c_offset)
-	input_whitening(%edx,%ebp,d_offset)
-	rol	$1,	%edx
-
-	encrypt_round(R0,R1,R2,R3,0);
-	encrypt_round(R2,R3,R0,R1,8);
-	encrypt_round(R0,R1,R2,R3,2*8);
-	encrypt_round(R2,R3,R0,R1,3*8);
-	encrypt_round(R0,R1,R2,R3,4*8);
-	encrypt_round(R2,R3,R0,R1,5*8);
-	encrypt_round(R0,R1,R2,R3,6*8);
-	encrypt_round(R2,R3,R0,R1,7*8);
-	encrypt_round(R0,R1,R2,R3,8*8);
-	encrypt_round(R2,R3,R0,R1,9*8);
-	encrypt_round(R0,R1,R2,R3,10*8);
-	encrypt_round(R2,R3,R0,R1,11*8);
-	encrypt_round(R0,R1,R2,R3,12*8);
-	encrypt_round(R2,R3,R0,R1,13*8);
-	encrypt_round(R0,R1,R2,R3,14*8);
-	encrypt_last_round(R2,R3,R0,R1,15*8);
-
-	output_whitening(%eax,%ebp,c_offset)
-	output_whitening(%ebx,%ebp,d_offset)
-	output_whitening(%ecx,%ebp,a_offset)
-	output_whitening(%edx,%ebp,b_offset)
-	mov	out_blk+16(%esp),%edi;
-	mov	%eax,		c_offset(%edi)
-	mov	%ebx,		d_offset(%edi)
-	mov	%ecx,		(%edi)
-	mov	%edx,		b_offset(%edi)
-
-	pop	%edi
-	pop	%esi
-	pop	%ebx
-	pop	%ebp
-	mov	$1,	%eax
-	ret
-
-twofish_dec_blk:
-	push	%ebp			/* save registers according to calling convention*/
-	push    %ebx
-	push    %esi
-	push    %edi
-
-
-	mov	ctx + 16(%esp),	%ebp	/* abuse the base pointer: set new base
-					 * pointer to the ctx address */
-	mov     in_blk+16(%esp),%edi	/* input address in edi */
-
-	mov	(%edi),		%eax
-	mov	b_offset(%edi),	%ebx
-	mov	c_offset(%edi),	%ecx
-	mov	d_offset(%edi),	%edx
-	output_whitening(%eax,%ebp,a_offset)
-	output_whitening(%ebx,%ebp,b_offset)
-	ror	$16,	%ebx
-	output_whitening(%ecx,%ebp,c_offset)
-	output_whitening(%edx,%ebp,d_offset)
-	rol	$1,	%ecx
-
-	decrypt_round(R0,R1,R2,R3,15*8);
-	decrypt_round(R2,R3,R0,R1,14*8);
-	decrypt_round(R0,R1,R2,R3,13*8);
-	decrypt_round(R2,R3,R0,R1,12*8);
-	decrypt_round(R0,R1,R2,R3,11*8);
-	decrypt_round(R2,R3,R0,R1,10*8);
-	decrypt_round(R0,R1,R2,R3,9*8);
-	decrypt_round(R2,R3,R0,R1,8*8);
-	decrypt_round(R0,R1,R2,R3,7*8);
-	decrypt_round(R2,R3,R0,R1,6*8);
-	decrypt_round(R0,R1,R2,R3,5*8);
-	decrypt_round(R2,R3,R0,R1,4*8);
-	decrypt_round(R0,R1,R2,R3,3*8);
-	decrypt_round(R2,R3,R0,R1,2*8);
-	decrypt_round(R0,R1,R2,R3,1*8);
-	decrypt_last_round(R2,R3,R0,R1,0);
-
-	input_whitening(%eax,%ebp,c_offset)
-	input_whitening(%ebx,%ebp,d_offset)
-	input_whitening(%ecx,%ebp,a_offset)
-	input_whitening(%edx,%ebp,b_offset)
-	mov	out_blk+16(%esp),%edi;
-	mov	%eax,		c_offset(%edi)
-	mov	%ebx,		d_offset(%edi)
-	mov	%ecx,		(%edi)
-	mov	%edx,		b_offset(%edi)
-
-	pop	%edi
-	pop	%esi
-	pop	%ebx
-	pop	%ebp
-	mov	$1,	%eax
-	ret
diff --git a/ANDROID_3.4.5/arch/x86/crypto/twofish-x86_64-asm_64-3way.S b/ANDROID_3.4.5/arch/x86/crypto/twofish-x86_64-asm_64-3way.S
deleted file mode 100644
index 5b012a2c..00000000
--- a/ANDROID_3.4.5/arch/x86/crypto/twofish-x86_64-asm_64-3way.S
+++ /dev/null
@@ -1,316 +0,0 @@
-/*
- * Twofish Cipher 3-way parallel algorithm (x86_64)
- *
- * Copyright (C) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307
- * USA
- *
- */
-
-.file "twofish-x86_64-asm-3way.S"
-.text
-
-/* structure of crypto context */
-#define s0	0
-#define s1	1024
-#define s2	2048
-#define s3	3072
-#define w	4096
-#define k	4128
-
-/**********************************************************************
-  3-way twofish
- **********************************************************************/
-#define CTX %rdi
-#define RIO %rdx
-
-#define RAB0 %rax
-#define RAB1 %rbx
-#define RAB2 %rcx
-
-#define RAB0d %eax
-#define RAB1d %ebx
-#define RAB2d %ecx
-
-#define RAB0bh %ah
-#define RAB1bh %bh
-#define RAB2bh %ch
-
-#define RAB0bl %al
-#define RAB1bl %bl
-#define RAB2bl %cl
-
-#define RCD0 %r8
-#define RCD1 %r9
-#define RCD2 %r10
-
-#define RCD0d %r8d
-#define RCD1d %r9d
-#define RCD2d %r10d
-
-#define RX0 %rbp
-#define RX1 %r11
-#define RX2 %r12
-
-#define RX0d %ebp
-#define RX1d %r11d
-#define RX2d %r12d
-
-#define RY0 %r13
-#define RY1 %r14
-#define RY2 %r15
-
-#define RY0d %r13d
-#define RY1d %r14d
-#define RY2d %r15d
-
-#define RT0 %rdx
-#define RT1 %rsi
-
-#define RT0d %edx
-#define RT1d %esi
-
-#define do16bit_ror(rot, op1, op2, T0, T1, tmp1, tmp2, ab, dst) \
-	movzbl ab ## bl,		tmp2 ## d; \
-	movzbl ab ## bh,		tmp1 ## d; \
-	rorq $(rot),			ab; \
-	op1##l T0(CTX, tmp2, 4),	dst ## d; \
-	op2##l T1(CTX, tmp1, 4),	dst ## d;
-
-/*
- * Combined G1 & G2 function. Reordered with help of rotates to have moves
- * at begining.
- */
-#define g1g2_3(ab, cd, Tx0, Tx1, Tx2, Tx3, Ty0, Ty1, Ty2, Ty3, x, y) \
-	/* G1,1 && G2,1 */ \
-	do16bit_ror(32, mov, xor, Tx0, Tx1, RT0, x ## 0, ab ## 0, x ## 0); \
-	do16bit_ror(48, mov, xor, Ty1, Ty2, RT0, y ## 0, ab ## 0, y ## 0); \
-	\
-	do16bit_ror(32, mov, xor, Tx0, Tx1, RT0, x ## 1, ab ## 1, x ## 1); \
-	do16bit_ror(48, mov, xor, Ty1, Ty2, RT0, y ## 1, ab ## 1, y ## 1); \
-	\
-	do16bit_ror(32, mov, xor, Tx0, Tx1, RT0, x ## 2, ab ## 2, x ## 2); \
-	do16bit_ror(48, mov, xor, Ty1, Ty2, RT0, y ## 2, ab ## 2, y ## 2); \
-	\
-	/* G1,2 && G2,2 */ \
-	do16bit_ror(32, xor, xor, Tx2, Tx3, RT0, RT1, ab ## 0, x ## 0); \
-	do16bit_ror(16, xor, xor, Ty3, Ty0, RT0, RT1, ab ## 0, y ## 0); \
-	xchgq cd ## 0, ab ## 0; \
-	\
-	do16bit_ror(32, xor, xor, Tx2, Tx3, RT0, RT1, ab ## 1, x ## 1); \
-	do16bit_ror(16, xor, xor, Ty3, Ty0, RT0, RT1, ab ## 1, y ## 1); \
-	xchgq cd ## 1, ab ## 1; \
-	\
-	do16bit_ror(32, xor, xor, Tx2, Tx3, RT0, RT1, ab ## 2, x ## 2); \
-	do16bit_ror(16, xor, xor, Ty3, Ty0, RT0, RT1, ab ## 2, y ## 2); \
-	xchgq cd ## 2, ab ## 2;
-
-#define enc_round_end(ab, x, y, n) \
-	addl y ## d,			x ## d; \
-	addl x ## d,			y ## d; \
-	addl k+4*(2*(n))(CTX),		x ## d; \
-	xorl ab ## d,			x ## d; \
-	addl k+4*(2*(n)+1)(CTX),	y ## d; \
-	shrq $32,			ab; \
-	roll $1,			ab ## d; \
-	xorl y ## d,			ab ## d; \
-	shlq $32,			ab; \
-	rorl $1,			x ## d; \
-	orq x,				ab;
-
-#define dec_round_end(ba, x, y, n) \
-	addl y ## d,			x ## d; \
-	addl x ## d,			y ## d; \
-	addl k+4*(2*(n))(CTX),		x ## d; \
-	addl k+4*(2*(n)+1)(CTX),	y ## d; \
-	xorl ba ## d,			y ## d; \
-	shrq $32,			ba; \
-	roll $1,			ba ## d; \
-	xorl x ## d,			ba ## d; \
-	shlq $32,			ba; \
-	rorl $1,			y ## d; \
-	orq y,				ba;
-
-#define encrypt_round3(ab, cd, n) \
-	g1g2_3(ab, cd, s0, s1, s2, s3, s0, s1, s2, s3, RX, RY); \
-	\
-	enc_round_end(ab ## 0, RX0, RY0, n); \
-	enc_round_end(ab ## 1, RX1, RY1, n); \
-	enc_round_end(ab ## 2, RX2, RY2, n);
-
-#define decrypt_round3(ba, dc, n) \
-	g1g2_3(ba, dc, s1, s2, s3, s0, s3, s0, s1, s2, RY, RX); \
-	\
-	dec_round_end(ba ## 0, RX0, RY0, n); \
-	dec_round_end(ba ## 1, RX1, RY1, n); \
-	dec_round_end(ba ## 2, RX2, RY2, n);
-
-#define encrypt_cycle3(ab, cd, n) \
-	encrypt_round3(ab, cd, n*2); \
-	encrypt_round3(ab, cd, (n*2)+1);
-
-#define decrypt_cycle3(ba, dc, n) \
-	decrypt_round3(ba, dc, (n*2)+1); \
-	decrypt_round3(ba, dc, (n*2));
-
-#define inpack3(in, n, xy, m) \
-	movq 4*(n)(in),			xy ## 0; \
-	xorq w+4*m(CTX),		xy ## 0; \
-	\
-	movq 4*(4+(n))(in),		xy ## 1; \
-	xorq w+4*m(CTX),		xy ## 1; \
-	\
-	movq 4*(8+(n))(in),		xy ## 2; \
-	xorq w+4*m(CTX),		xy ## 2;
-
-#define outunpack3(op, out, n, xy, m) \
-	xorq w+4*m(CTX),		xy ## 0; \
-	op ## q xy ## 0,		4*(n)(out); \
-	\
-	xorq w+4*m(CTX),		xy ## 1; \
-	op ## q xy ## 1,		4*(4+(n))(out); \
-	\
-	xorq w+4*m(CTX),		xy ## 2; \
-	op ## q xy ## 2,		4*(8+(n))(out);
-
-#define inpack_enc3() \
-	inpack3(RIO, 0, RAB, 0); \
-	inpack3(RIO, 2, RCD, 2);
-
-#define outunpack_enc3(op) \
-	outunpack3(op, RIO, 2, RAB, 6); \
-	outunpack3(op, RIO, 0, RCD, 4);
-
-#define inpack_dec3() \
-	inpack3(RIO, 0, RAB, 4); \
-	rorq $32,			RAB0; \
-	rorq $32,			RAB1; \
-	rorq $32,			RAB2; \
-	inpack3(RIO, 2, RCD, 6); \
-	rorq $32,			RCD0; \
-	rorq $32,			RCD1; \
-	rorq $32,			RCD2;
-
-#define outunpack_dec3() \
-	rorq $32,			RCD0; \
-	rorq $32,			RCD1; \
-	rorq $32,			RCD2; \
-	outunpack3(mov, RIO, 0, RCD, 0); \
-	rorq $32,			RAB0; \
-	rorq $32,			RAB1; \
-	rorq $32,			RAB2; \
-	outunpack3(mov, RIO, 2, RAB, 2);
-
-.align 8
-.global __twofish_enc_blk_3way
-.type   __twofish_enc_blk_3way,@function;
-
-__twofish_enc_blk_3way:
-	/* input:
-	 *	%rdi: ctx, CTX
-	 *	%rsi: dst
-	 *	%rdx: src, RIO
-	 *	%rcx: bool, if true: xor output
-	 */
-	pushq %r15;
-	pushq %r14;
-	pushq %r13;
-	pushq %r12;
-	pushq %rbp;
-	pushq %rbx;
-
-	pushq %rcx; /* bool xor */
-	pushq %rsi; /* dst */
-
-	inpack_enc3();
-
-	encrypt_cycle3(RAB, RCD, 0);
-	encrypt_cycle3(RAB, RCD, 1);
-	encrypt_cycle3(RAB, RCD, 2);
-	encrypt_cycle3(RAB, RCD, 3);
-	encrypt_cycle3(RAB, RCD, 4);
-	encrypt_cycle3(RAB, RCD, 5);
-	encrypt_cycle3(RAB, RCD, 6);
-	encrypt_cycle3(RAB, RCD, 7);
-
-	popq RIO; /* dst */
-	popq %rbp; /* bool xor */
-
-	testb %bpl, %bpl;
-	jnz __enc_xor3;
-
-	outunpack_enc3(mov);
-
-	popq %rbx;
-	popq %rbp;
-	popq %r12;
-	popq %r13;
-	popq %r14;
-	popq %r15;
-	ret;
-
-__enc_xor3:
-	outunpack_enc3(xor);
-
-	popq %rbx;
-	popq %rbp;
-	popq %r12;
-	popq %r13;
-	popq %r14;
-	popq %r15;
-	ret;
-
-.global twofish_dec_blk_3way
-.type   twofish_dec_blk_3way,@function;
-
-twofish_dec_blk_3way:
-	/* input:
-	 *	%rdi: ctx, CTX
-	 *	%rsi: dst
-	 *	%rdx: src, RIO
-	 */
-	pushq %r15;
-	pushq %r14;
-	pushq %r13;
-	pushq %r12;
-	pushq %rbp;
-	pushq %rbx;
-
-	pushq %rsi; /* dst */
-
-	inpack_dec3();
-
-	decrypt_cycle3(RAB, RCD, 7);
-	decrypt_cycle3(RAB, RCD, 6);
-	decrypt_cycle3(RAB, RCD, 5);
-	decrypt_cycle3(RAB, RCD, 4);
-	decrypt_cycle3(RAB, RCD, 3);
-	decrypt_cycle3(RAB, RCD, 2);
-	decrypt_cycle3(RAB, RCD, 1);
-	decrypt_cycle3(RAB, RCD, 0);
-
-	popq RIO; /* dst */
-
-	outunpack_dec3();
-
-	popq %rbx;
-	popq %rbp;
-	popq %r12;
-	popq %r13;
-	popq %r14;
-	popq %r15;
-	ret;
-
diff --git a/ANDROID_3.4.5/arch/x86/crypto/twofish-x86_64-asm_64.S b/ANDROID_3.4.5/arch/x86/crypto/twofish-x86_64-asm_64.S
deleted file mode 100644
index 7bcf3fcc..00000000
--- a/ANDROID_3.4.5/arch/x86/crypto/twofish-x86_64-asm_64.S
+++ /dev/null
@@ -1,322 +0,0 @@
-/***************************************************************************
-*   Copyright (C) 2006 by Joachim Fritschi, <jfritschi@freenet.de>        *
-*                                                                         *
-*   This program is free software; you can redistribute it and/or modify  *
-*   it under the terms of the GNU General Public License as published by  *
-*   the Free Software Foundation; either version 2 of the License, or     *
-*   (at your option) any later version.                                   *
-*                                                                         *
-*   This program is distributed in the hope that it will be useful,       *
-*   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
-*   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
-*   GNU General Public License for more details.                          *
-*                                                                         *
-*   You should have received a copy of the GNU General Public License     *
-*   along with this program; if not, write to the                         *
-*   Free Software Foundation, Inc.,                                       *
-*   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.             *
-***************************************************************************/
-
-.file "twofish-x86_64-asm.S"
-.text
-
-#include <asm/asm-offsets.h>
-
-#define a_offset	0
-#define b_offset	4
-#define c_offset	8
-#define d_offset	12
-
-/* Structure of the crypto context struct*/
-
-#define s0	0	/* S0 Array 256 Words each */
-#define s1	1024	/* S1 Array */
-#define s2	2048	/* S2 Array */
-#define s3	3072	/* S3 Array */
-#define w	4096	/* 8 whitening keys (word) */
-#define k	4128	/* key 1-32 ( word ) */
-
-/* define a few register aliases to allow macro substitution */
-
-#define R0     %rax
-#define R0D    %eax
-#define R0B    %al
-#define R0H    %ah
-
-#define R1     %rbx
-#define R1D    %ebx
-#define R1B    %bl
-#define R1H    %bh
-
-#define R2     %rcx
-#define R2D    %ecx
-#define R2B    %cl
-#define R2H    %ch
-
-#define R3     %rdx
-#define R3D    %edx
-#define R3B    %dl
-#define R3H    %dh
-
-
-/* performs input whitening */
-#define input_whitening(src,context,offset)\
-	xor	w+offset(context),	src;
-
-/* performs input whitening */
-#define output_whitening(src,context,offset)\
-	xor	w+16+offset(context),	src;
-
-
-/*
- * a input register containing a (rotated 16)
- * b input register containing b
- * c input register containing c
- * d input register containing d (already rol $1)
- * operations on a and b are interleaved to increase performance
- */
-#define encrypt_round(a,b,c,d,round)\
-	movzx	b ## B,		%edi;\
-	mov	s1(%r11,%rdi,4),%r8d;\
-	movzx	a ## B,		%edi;\
-	mov	s2(%r11,%rdi,4),%r9d;\
-	movzx	b ## H,		%edi;\
-	ror	$16,		b ## D;\
-	xor	s2(%r11,%rdi,4),%r8d;\
-	movzx	a ## H,		%edi;\
-	ror	$16,		a ## D;\
-	xor	s3(%r11,%rdi,4),%r9d;\
-	movzx	b ## B,		%edi;\
-	xor	s3(%r11,%rdi,4),%r8d;\
-	movzx	a ## B,		%edi;\
-	xor	(%r11,%rdi,4),	%r9d;\
-	movzx	b ## H,		%edi;\
-	ror	$15,		b ## D;\
-	xor	(%r11,%rdi,4),	%r8d;\
-	movzx	a ## H,		%edi;\
-	xor	s1(%r11,%rdi,4),%r9d;\
-	add	%r8d,		%r9d;\
-	add	%r9d,		%r8d;\
-	add	k+round(%r11),	%r9d;\
-	xor	%r9d,		c ## D;\
-	rol	$15,		c ## D;\
-	add	k+4+round(%r11),%r8d;\
-	xor	%r8d,		d ## D;
-
-/*
- * a input register containing a(rotated 16)
- * b input register containing b
- * c input register containing c
- * d input register containing d (already rol $1)
- * operations on a and b are interleaved to increase performance
- * during the round a and b are prepared for the output whitening
- */
-#define encrypt_last_round(a,b,c,d,round)\
-	mov	b ## D,		%r10d;\
-	shl	$32,		%r10;\
-	movzx	b ## B,		%edi;\
-	mov	s1(%r11,%rdi,4),%r8d;\
-	movzx	a ## B,		%edi;\
-	mov	s2(%r11,%rdi,4),%r9d;\
-	movzx	b ## H,		%edi;\
-	ror	$16,		b ## D;\
-	xor	s2(%r11,%rdi,4),%r8d;\
-	movzx	a ## H,		%edi;\
-	ror	$16,		a ## D;\
-	xor	s3(%r11,%rdi,4),%r9d;\
-	movzx	b ## B,		%edi;\
-	xor	s3(%r11,%rdi,4),%r8d;\
-	movzx	a ## B,		%edi;\
-	xor	(%r11,%rdi,4),	%r9d;\
-	xor	a,		%r10;\
-	movzx	b ## H,		%edi;\
-	xor	(%r11,%rdi,4),	%r8d;\
-	movzx	a ## H,		%edi;\
-	xor	s1(%r11,%rdi,4),%r9d;\
-	add	%r8d,		%r9d;\
-	add	%r9d,		%r8d;\
-	add	k+round(%r11),	%r9d;\
-	xor	%r9d,		c ## D;\
-	ror	$1,		c ## D;\
-	add	k+4+round(%r11),%r8d;\
-	xor	%r8d,		d ## D
-
-/*
- * a input register containing a
- * b input register containing b (rotated 16)
- * c input register containing c (already rol $1)
- * d input register containing d
- * operations on a and b are interleaved to increase performance
- */
-#define decrypt_round(a,b,c,d,round)\
-	movzx	a ## B,		%edi;\
-	mov	(%r11,%rdi,4),	%r9d;\
-	movzx	b ## B,		%edi;\
-	mov	s3(%r11,%rdi,4),%r8d;\
-	movzx	a ## H,		%edi;\
-	ror	$16,		a ## D;\
-	xor	s1(%r11,%rdi,4),%r9d;\
-	movzx	b ## H,		%edi;\
-	ror	$16,		b ## D;\
-	xor	(%r11,%rdi,4),	%r8d;\
-	movzx	a ## B,		%edi;\
-	xor	s2(%r11,%rdi,4),%r9d;\
-	movzx	b ## B,		%edi;\
-	xor	s1(%r11,%rdi,4),%r8d;\
-	movzx	a ## H,		%edi;\
-	ror	$15,		a ## D;\
-	xor	s3(%r11,%rdi,4),%r9d;\
-	movzx	b ## H,		%edi;\
-	xor	s2(%r11,%rdi,4),%r8d;\
-	add	%r8d,		%r9d;\
-	add	%r9d,		%r8d;\
-	add	k+round(%r11),	%r9d;\
-	xor	%r9d,		c ## D;\
-	add	k+4+round(%r11),%r8d;\
-	xor	%r8d,		d ## D;\
-	rol	$15,		d ## D;
-
-/*
- * a input register containing a
- * b input register containing b
- * c input register containing c (already rol $1)
- * d input register containing d
- * operations on a and b are interleaved to increase performance
- * during the round a and b are prepared for the output whitening
- */
-#define decrypt_last_round(a,b,c,d,round)\
-	movzx	a ## B,		%edi;\
-	mov	(%r11,%rdi,4),	%r9d;\
-	movzx	b ## B,		%edi;\
-	mov	s3(%r11,%rdi,4),%r8d;\
-	movzx	b ## H,		%edi;\
-	ror	$16,		b ## D;\
-	xor	(%r11,%rdi,4),	%r8d;\
-	movzx	a ## H,		%edi;\
-	mov	b ## D,		%r10d;\
-	shl	$32,		%r10;\
-	xor	a,		%r10;\
-	ror	$16,		a ## D;\
-	xor	s1(%r11,%rdi,4),%r9d;\
-	movzx	b ## B,		%edi;\
-	xor	s1(%r11,%rdi,4),%r8d;\
-	movzx	a ## B,		%edi;\
-	xor	s2(%r11,%rdi,4),%r9d;\
-	movzx	b ## H,		%edi;\
-	xor	s2(%r11,%rdi,4),%r8d;\
-	movzx	a ## H,		%edi;\
-	xor	s3(%r11,%rdi,4),%r9d;\
-	add	%r8d,		%r9d;\
-	add	%r9d,		%r8d;\
-	add	k+round(%r11),	%r9d;\
-	xor	%r9d,		c ## D;\
-	add	k+4+round(%r11),%r8d;\
-	xor	%r8d,		d ## D;\
-	ror	$1,		d ## D;
-
-.align 8
-.global twofish_enc_blk
-.global twofish_dec_blk
-
-twofish_enc_blk:
-	pushq    R1
-
-	/* %rdi contains the ctx address */
-	/* %rsi contains the output address */
-	/* %rdx contains the input address */
-	/* ctx address is moved to free one non-rex register
-	as target for the 8bit high operations */
-	mov	%rdi,		%r11
-
-	movq	(R3),	R1
-	movq	8(R3),	R3
-	input_whitening(R1,%r11,a_offset)
-	input_whitening(R3,%r11,c_offset)
-	mov	R1D,	R0D
-	rol	$16,	R0D
-	shr	$32,	R1
-	mov	R3D,	R2D
-	shr	$32,	R3
-	rol	$1,	R3D
-
-	encrypt_round(R0,R1,R2,R3,0);
-	encrypt_round(R2,R3,R0,R1,8);
-	encrypt_round(R0,R1,R2,R3,2*8);
-	encrypt_round(R2,R3,R0,R1,3*8);
-	encrypt_round(R0,R1,R2,R3,4*8);
-	encrypt_round(R2,R3,R0,R1,5*8);
-	encrypt_round(R0,R1,R2,R3,6*8);
-	encrypt_round(R2,R3,R0,R1,7*8);
-	encrypt_round(R0,R1,R2,R3,8*8);
-	encrypt_round(R2,R3,R0,R1,9*8);
-	encrypt_round(R0,R1,R2,R3,10*8);
-	encrypt_round(R2,R3,R0,R1,11*8);
-	encrypt_round(R0,R1,R2,R3,12*8);
-	encrypt_round(R2,R3,R0,R1,13*8);
-	encrypt_round(R0,R1,R2,R3,14*8);
-	encrypt_last_round(R2,R3,R0,R1,15*8);
-
-
-	output_whitening(%r10,%r11,a_offset)
-	movq	%r10,	(%rsi)
-
-	shl	$32,	R1
-	xor	R0,	R1
-
-	output_whitening(R1,%r11,c_offset)
-	movq	R1,	8(%rsi)
-
-	popq	R1
-	movq	$1,%rax
-	ret
-
-twofish_dec_blk:
-	pushq    R1
-
-	/* %rdi contains the ctx address */
-	/* %rsi contains the output address */
-	/* %rdx contains the input address */
-	/* ctx address is moved to free one non-rex register
-	as target for the 8bit high operations */
-	mov	%rdi,		%r11
-
-	movq	(R3),	R1
-	movq	8(R3),	R3
-	output_whitening(R1,%r11,a_offset)
-	output_whitening(R3,%r11,c_offset)
-	mov	R1D,	R0D
-	shr	$32,	R1
-	rol	$16,	R1D
-	mov	R3D,	R2D
-	shr	$32,	R3
-	rol	$1,	R2D
-
-	decrypt_round(R0,R1,R2,R3,15*8);
-	decrypt_round(R2,R3,R0,R1,14*8);
-	decrypt_round(R0,R1,R2,R3,13*8);
-	decrypt_round(R2,R3,R0,R1,12*8);
-	decrypt_round(R0,R1,R2,R3,11*8);
-	decrypt_round(R2,R3,R0,R1,10*8);
-	decrypt_round(R0,R1,R2,R3,9*8);
-	decrypt_round(R2,R3,R0,R1,8*8);
-	decrypt_round(R0,R1,R2,R3,7*8);
-	decrypt_round(R2,R3,R0,R1,6*8);
-	decrypt_round(R0,R1,R2,R3,5*8);
-	decrypt_round(R2,R3,R0,R1,4*8);
-	decrypt_round(R0,R1,R2,R3,3*8);
-	decrypt_round(R2,R3,R0,R1,2*8);
-	decrypt_round(R0,R1,R2,R3,1*8);
-	decrypt_last_round(R2,R3,R0,R1,0);
-
-	input_whitening(%r10,%r11,a_offset)
-	movq	%r10,	(%rsi)
-
-	shl	$32,	R1
-	xor	R0,	R1
-
-	input_whitening(R1,%r11,c_offset)
-	movq	R1,	8(%rsi)
-
-	popq	R1
-	movq	$1,%rax
-	ret
diff --git a/ANDROID_3.4.5/arch/x86/crypto/twofish_glue.c b/ANDROID_3.4.5/arch/x86/crypto/twofish_glue.c
deleted file mode 100644
index 359ae084..00000000
--- a/ANDROID_3.4.5/arch/x86/crypto/twofish_glue.c
+++ /dev/null
@@ -1,101 +0,0 @@
-/*
- * Glue Code for assembler optimized version of TWOFISH
- *
- * Originally Twofish for GPG
- * By Matthew Skala <mskala@ansuz.sooke.bc.ca>, July 26, 1998
- * 256-bit key length added March 20, 1999
- * Some modifications to reduce the text size by Werner Koch, April, 1998
- * Ported to the kerneli patch by Marc Mutz <Marc@Mutz.com>
- * Ported to CryptoAPI by Colin Slater <hoho@tacomeat.net>
- *
- * The original author has disclaimed all copyright interest in this
- * code and thus put it in the public domain. The subsequent authors
- * have put this under the GNU General Public License.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307
- * USA
- *
- * This code is a "clean room" implementation, written from the paper
- * _Twofish: A 128-Bit Block Cipher_ by Bruce Schneier, John Kelsey,
- * Doug Whiting, David Wagner, Chris Hall, and Niels Ferguson, available
- * through http://www.counterpane.com/twofish.html
- *
- * For background information on multiplication in finite fields, used for
- * the matrix operations in the key schedule, see the book _Contemporary
- * Abstract Algebra_ by Joseph A. Gallian, especially chapter 22 in the
- * Third Edition.
- */
-
-#include <crypto/twofish.h>
-#include <linux/crypto.h>
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/types.h>
-
-asmlinkage void twofish_enc_blk(struct twofish_ctx *ctx, u8 *dst,
-				const u8 *src);
-EXPORT_SYMBOL_GPL(twofish_enc_blk);
-asmlinkage void twofish_dec_blk(struct twofish_ctx *ctx, u8 *dst,
-				const u8 *src);
-EXPORT_SYMBOL_GPL(twofish_dec_blk);
-
-static void twofish_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
-{
-	twofish_enc_blk(crypto_tfm_ctx(tfm), dst, src);
-}
-
-static void twofish_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
-{
-	twofish_dec_blk(crypto_tfm_ctx(tfm), dst, src);
-}
-
-static struct crypto_alg alg = {
-	.cra_name		=	"twofish",
-	.cra_driver_name	=	"twofish-asm",
-	.cra_priority		=	200,
-	.cra_flags		=	CRYPTO_ALG_TYPE_CIPHER,
-	.cra_blocksize		=	TF_BLOCK_SIZE,
-	.cra_ctxsize		=	sizeof(struct twofish_ctx),
-	.cra_alignmask		=	0,
-	.cra_module		=	THIS_MODULE,
-	.cra_list		=	LIST_HEAD_INIT(alg.cra_list),
-	.cra_u			=	{
-		.cipher = {
-			.cia_min_keysize	=	TF_MIN_KEY_SIZE,
-			.cia_max_keysize	=	TF_MAX_KEY_SIZE,
-			.cia_setkey		=	twofish_setkey,
-			.cia_encrypt		=	twofish_encrypt,
-			.cia_decrypt		=	twofish_decrypt
-		}
-	}
-};
-
-static int __init init(void)
-{
-	return crypto_register_alg(&alg);
-}
-
-static void __exit fini(void)
-{
-	crypto_unregister_alg(&alg);
-}
-
-module_init(init);
-module_exit(fini);
-
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION ("Twofish Cipher Algorithm, asm optimized");
-MODULE_ALIAS("twofish");
-MODULE_ALIAS("twofish-asm");
diff --git a/ANDROID_3.4.5/arch/x86/crypto/twofish_glue_3way.c b/ANDROID_3.4.5/arch/x86/crypto/twofish_glue_3way.c
deleted file mode 100644
index 922ab24c..00000000
--- a/ANDROID_3.4.5/arch/x86/crypto/twofish_glue_3way.c
+++ /dev/null
@@ -1,695 +0,0 @@
-/*
- * Glue Code for 3-way parallel assembler optimized version of Twofish
- *
- * Copyright (c) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
- *
- * CBC & ECB parts based on code (crypto/cbc.c,ecb.c) by:
- *   Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au>
- * CTR part based on code (crypto/ctr.c) by:
- *   (C) Copyright IBM Corp. 2007 - Joy Latten <latten@us.ibm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307
- * USA
- *
- */
-
-#include <asm/processor.h>
-#include <linux/crypto.h>
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/types.h>
-#include <crypto/algapi.h>
-#include <crypto/twofish.h>
-#include <crypto/b128ops.h>
-#include <crypto/lrw.h>
-#include <crypto/xts.h>
-
-/* regular block cipher functions from twofish_x86_64 module */
-asmlinkage void twofish_enc_blk(struct twofish_ctx *ctx, u8 *dst,
-				const u8 *src);
-asmlinkage void twofish_dec_blk(struct twofish_ctx *ctx, u8 *dst,
-				const u8 *src);
-
-/* 3-way parallel cipher functions */
-asmlinkage void __twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst,
-				       const u8 *src, bool xor);
-asmlinkage void twofish_dec_blk_3way(struct twofish_ctx *ctx, u8 *dst,
-				     const u8 *src);
-
-static inline void twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst,
-					const u8 *src)
-{
-	__twofish_enc_blk_3way(ctx, dst, src, false);
-}
-
-static inline void twofish_enc_blk_xor_3way(struct twofish_ctx *ctx, u8 *dst,
-					    const u8 *src)
-{
-	__twofish_enc_blk_3way(ctx, dst, src, true);
-}
-
-static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk,
-		     void (*fn)(struct twofish_ctx *, u8 *, const u8 *),
-		     void (*fn_3way)(struct twofish_ctx *, u8 *, const u8 *))
-{
-	struct twofish_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	unsigned int bsize = TF_BLOCK_SIZE;
-	unsigned int nbytes;
-	int err;
-
-	err = blkcipher_walk_virt(desc, walk);
-
-	while ((nbytes = walk->nbytes)) {
-		u8 *wsrc = walk->src.virt.addr;
-		u8 *wdst = walk->dst.virt.addr;
-
-		/* Process three block batch */
-		if (nbytes >= bsize * 3) {
-			do {
-				fn_3way(ctx, wdst, wsrc);
-
-				wsrc += bsize * 3;
-				wdst += bsize * 3;
-				nbytes -= bsize * 3;
-			} while (nbytes >= bsize * 3);
-
-			if (nbytes < bsize)
-				goto done;
-		}
-
-		/* Handle leftovers */
-		do {
-			fn(ctx, wdst, wsrc);
-
-			wsrc += bsize;
-			wdst += bsize;
-			nbytes -= bsize;
-		} while (nbytes >= bsize);
-
-done:
-		err = blkcipher_walk_done(desc, walk, nbytes);
-	}
-
-	return err;
-}
-
-static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	struct blkcipher_walk walk;
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return ecb_crypt(desc, &walk, twofish_enc_blk, twofish_enc_blk_3way);
-}
-
-static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	struct blkcipher_walk walk;
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return ecb_crypt(desc, &walk, twofish_dec_blk, twofish_dec_blk_3way);
-}
-
-static unsigned int __cbc_encrypt(struct blkcipher_desc *desc,
-				  struct blkcipher_walk *walk)
-{
-	struct twofish_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	unsigned int bsize = TF_BLOCK_SIZE;
-	unsigned int nbytes = walk->nbytes;
-	u128 *src = (u128 *)walk->src.virt.addr;
-	u128 *dst = (u128 *)walk->dst.virt.addr;
-	u128 *iv = (u128 *)walk->iv;
-
-	do {
-		u128_xor(dst, src, iv);
-		twofish_enc_blk(ctx, (u8 *)dst, (u8 *)dst);
-		iv = dst;
-
-		src += 1;
-		dst += 1;
-		nbytes -= bsize;
-	} while (nbytes >= bsize);
-
-	u128_xor((u128 *)walk->iv, (u128 *)walk->iv, iv);
-	return nbytes;
-}
-
-static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	struct blkcipher_walk walk;
-	int err;
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt(desc, &walk);
-
-	while ((nbytes = walk.nbytes)) {
-		nbytes = __cbc_encrypt(desc, &walk);
-		err = blkcipher_walk_done(desc, &walk, nbytes);
-	}
-
-	return err;
-}
-
-static unsigned int __cbc_decrypt(struct blkcipher_desc *desc,
-				  struct blkcipher_walk *walk)
-{
-	struct twofish_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	unsigned int bsize = TF_BLOCK_SIZE;
-	unsigned int nbytes = walk->nbytes;
-	u128 *src = (u128 *)walk->src.virt.addr;
-	u128 *dst = (u128 *)walk->dst.virt.addr;
-	u128 ivs[3 - 1];
-	u128 last_iv;
-
-	/* Start of the last block. */
-	src += nbytes / bsize - 1;
-	dst += nbytes / bsize - 1;
-
-	last_iv = *src;
-
-	/* Process three block batch */
-	if (nbytes >= bsize * 3) {
-		do {
-			nbytes -= bsize * (3 - 1);
-			src -= 3 - 1;
-			dst -= 3 - 1;
-
-			ivs[0] = src[0];
-			ivs[1] = src[1];
-
-			twofish_dec_blk_3way(ctx, (u8 *)dst, (u8 *)src);
-
-			u128_xor(dst + 1, dst + 1, ivs + 0);
-			u128_xor(dst + 2, dst + 2, ivs + 1);
-
-			nbytes -= bsize;
-			if (nbytes < bsize)
-				goto done;
-
-			u128_xor(dst, dst, src - 1);
-			src -= 1;
-			dst -= 1;
-		} while (nbytes >= bsize * 3);
-
-		if (nbytes < bsize)
-			goto done;
-	}
-
-	/* Handle leftovers */
-	for (;;) {
-		twofish_dec_blk(ctx, (u8 *)dst, (u8 *)src);
-
-		nbytes -= bsize;
-		if (nbytes < bsize)
-			break;
-
-		u128_xor(dst, dst, src - 1);
-		src -= 1;
-		dst -= 1;
-	}
-
-done:
-	u128_xor(dst, dst, (u128 *)walk->iv);
-	*(u128 *)walk->iv = last_iv;
-
-	return nbytes;
-}
-
-static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	struct blkcipher_walk walk;
-	int err;
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt(desc, &walk);
-
-	while ((nbytes = walk.nbytes)) {
-		nbytes = __cbc_decrypt(desc, &walk);
-		err = blkcipher_walk_done(desc, &walk, nbytes);
-	}
-
-	return err;
-}
-
-static inline void u128_to_be128(be128 *dst, const u128 *src)
-{
-	dst->a = cpu_to_be64(src->a);
-	dst->b = cpu_to_be64(src->b);
-}
-
-static inline void be128_to_u128(u128 *dst, const be128 *src)
-{
-	dst->a = be64_to_cpu(src->a);
-	dst->b = be64_to_cpu(src->b);
-}
-
-static inline void u128_inc(u128 *i)
-{
-	i->b++;
-	if (!i->b)
-		i->a++;
-}
-
-static void ctr_crypt_final(struct blkcipher_desc *desc,
-			    struct blkcipher_walk *walk)
-{
-	struct twofish_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	u8 *ctrblk = walk->iv;
-	u8 keystream[TF_BLOCK_SIZE];
-	u8 *src = walk->src.virt.addr;
-	u8 *dst = walk->dst.virt.addr;
-	unsigned int nbytes = walk->nbytes;
-
-	twofish_enc_blk(ctx, keystream, ctrblk);
-	crypto_xor(keystream, src, nbytes);
-	memcpy(dst, keystream, nbytes);
-
-	crypto_inc(ctrblk, TF_BLOCK_SIZE);
-}
-
-static unsigned int __ctr_crypt(struct blkcipher_desc *desc,
-				struct blkcipher_walk *walk)
-{
-	struct twofish_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	unsigned int bsize = TF_BLOCK_SIZE;
-	unsigned int nbytes = walk->nbytes;
-	u128 *src = (u128 *)walk->src.virt.addr;
-	u128 *dst = (u128 *)walk->dst.virt.addr;
-	u128 ctrblk;
-	be128 ctrblocks[3];
-
-	be128_to_u128(&ctrblk, (be128 *)walk->iv);
-
-	/* Process three block batch */
-	if (nbytes >= bsize * 3) {
-		do {
-			if (dst != src) {
-				dst[0] = src[0];
-				dst[1] = src[1];
-				dst[2] = src[2];
-			}
-
-			/* create ctrblks for parallel encrypt */
-			u128_to_be128(&ctrblocks[0], &ctrblk);
-			u128_inc(&ctrblk);
-			u128_to_be128(&ctrblocks[1], &ctrblk);
-			u128_inc(&ctrblk);
-			u128_to_be128(&ctrblocks[2], &ctrblk);
-			u128_inc(&ctrblk);
-
-			twofish_enc_blk_xor_3way(ctx, (u8 *)dst,
-						 (u8 *)ctrblocks);
-
-			src += 3;
-			dst += 3;
-			nbytes -= bsize * 3;
-		} while (nbytes >= bsize * 3);
-
-		if (nbytes < bsize)
-			goto done;
-	}
-
-	/* Handle leftovers */
-	do {
-		if (dst != src)
-			*dst = *src;
-
-		u128_to_be128(&ctrblocks[0], &ctrblk);
-		u128_inc(&ctrblk);
-
-		twofish_enc_blk(ctx, (u8 *)ctrblocks, (u8 *)ctrblocks);
-		u128_xor(dst, dst, (u128 *)ctrblocks);
-
-		src += 1;
-		dst += 1;
-		nbytes -= bsize;
-	} while (nbytes >= bsize);
-
-done:
-	u128_to_be128((be128 *)walk->iv, &ctrblk);
-	return nbytes;
-}
-
-static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		     struct scatterlist *src, unsigned int nbytes)
-{
-	struct blkcipher_walk walk;
-	int err;
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt_block(desc, &walk, TF_BLOCK_SIZE);
-
-	while ((nbytes = walk.nbytes) >= TF_BLOCK_SIZE) {
-		nbytes = __ctr_crypt(desc, &walk);
-		err = blkcipher_walk_done(desc, &walk, nbytes);
-	}
-
-	if (walk.nbytes) {
-		ctr_crypt_final(desc, &walk);
-		err = blkcipher_walk_done(desc, &walk, 0);
-	}
-
-	return err;
-}
-
-static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
-{
-	const unsigned int bsize = TF_BLOCK_SIZE;
-	struct twofish_ctx *ctx = priv;
-	int i;
-
-	if (nbytes == 3 * bsize) {
-		twofish_enc_blk_3way(ctx, srcdst, srcdst);
-		return;
-	}
-
-	for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
-		twofish_enc_blk(ctx, srcdst, srcdst);
-}
-
-static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
-{
-	const unsigned int bsize = TF_BLOCK_SIZE;
-	struct twofish_ctx *ctx = priv;
-	int i;
-
-	if (nbytes == 3 * bsize) {
-		twofish_dec_blk_3way(ctx, srcdst, srcdst);
-		return;
-	}
-
-	for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
-		twofish_dec_blk(ctx, srcdst, srcdst);
-}
-
-struct twofish_lrw_ctx {
-	struct lrw_table_ctx lrw_table;
-	struct twofish_ctx twofish_ctx;
-};
-
-static int lrw_twofish_setkey(struct crypto_tfm *tfm, const u8 *key,
-			      unsigned int keylen)
-{
-	struct twofish_lrw_ctx *ctx = crypto_tfm_ctx(tfm);
-	int err;
-
-	err = __twofish_setkey(&ctx->twofish_ctx, key, keylen - TF_BLOCK_SIZE,
-			       &tfm->crt_flags);
-	if (err)
-		return err;
-
-	return lrw_init_table(&ctx->lrw_table, key + keylen - TF_BLOCK_SIZE);
-}
-
-static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	struct twofish_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	be128 buf[3];
-	struct lrw_crypt_req req = {
-		.tbuf = buf,
-		.tbuflen = sizeof(buf),
-
-		.table_ctx = &ctx->lrw_table,
-		.crypt_ctx = &ctx->twofish_ctx,
-		.crypt_fn = encrypt_callback,
-	};
-
-	return lrw_crypt(desc, dst, src, nbytes, &req);
-}
-
-static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	struct twofish_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	be128 buf[3];
-	struct lrw_crypt_req req = {
-		.tbuf = buf,
-		.tbuflen = sizeof(buf),
-
-		.table_ctx = &ctx->lrw_table,
-		.crypt_ctx = &ctx->twofish_ctx,
-		.crypt_fn = decrypt_callback,
-	};
-
-	return lrw_crypt(desc, dst, src, nbytes, &req);
-}
-
-static void lrw_exit_tfm(struct crypto_tfm *tfm)
-{
-	struct twofish_lrw_ctx *ctx = crypto_tfm_ctx(tfm);
-
-	lrw_free_table(&ctx->lrw_table);
-}
-
-struct twofish_xts_ctx {
-	struct twofish_ctx tweak_ctx;
-	struct twofish_ctx crypt_ctx;
-};
-
-static int xts_twofish_setkey(struct crypto_tfm *tfm, const u8 *key,
-			      unsigned int keylen)
-{
-	struct twofish_xts_ctx *ctx = crypto_tfm_ctx(tfm);
-	u32 *flags = &tfm->crt_flags;
-	int err;
-
-	/* key consists of keys of equal size concatenated, therefore
-	 * the length must be even
-	 */
-	if (keylen % 2) {
-		*flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
-		return -EINVAL;
-	}
-
-	/* first half of xts-key is for crypt */
-	err = __twofish_setkey(&ctx->crypt_ctx, key, keylen / 2, flags);
-	if (err)
-		return err;
-
-	/* second half of xts-key is for tweak */
-	return __twofish_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2,
-				flags);
-}
-
-static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	struct twofish_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	be128 buf[3];
-	struct xts_crypt_req req = {
-		.tbuf = buf,
-		.tbuflen = sizeof(buf),
-
-		.tweak_ctx = &ctx->tweak_ctx,
-		.tweak_fn = XTS_TWEAK_CAST(twofish_enc_blk),
-		.crypt_ctx = &ctx->crypt_ctx,
-		.crypt_fn = encrypt_callback,
-	};
-
-	return xts_crypt(desc, dst, src, nbytes, &req);
-}
-
-static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	struct twofish_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	be128 buf[3];
-	struct xts_crypt_req req = {
-		.tbuf = buf,
-		.tbuflen = sizeof(buf),
-
-		.tweak_ctx = &ctx->tweak_ctx,
-		.tweak_fn = XTS_TWEAK_CAST(twofish_enc_blk),
-		.crypt_ctx = &ctx->crypt_ctx,
-		.crypt_fn = decrypt_callback,
-	};
-
-	return xts_crypt(desc, dst, src, nbytes, &req);
-}
-
-static struct crypto_alg tf_algs[5] = { {
-	.cra_name		= "ecb(twofish)",
-	.cra_driver_name	= "ecb-twofish-3way",
-	.cra_priority		= 300,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize		= TF_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct twofish_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(tf_algs[0].cra_list),
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= TF_MIN_KEY_SIZE,
-			.max_keysize	= TF_MAX_KEY_SIZE,
-			.setkey		= twofish_setkey,
-			.encrypt	= ecb_encrypt,
-			.decrypt	= ecb_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "cbc(twofish)",
-	.cra_driver_name	= "cbc-twofish-3way",
-	.cra_priority		= 300,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize		= TF_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct twofish_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(tf_algs[1].cra_list),
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= TF_MIN_KEY_SIZE,
-			.max_keysize	= TF_MAX_KEY_SIZE,
-			.ivsize		= TF_BLOCK_SIZE,
-			.setkey		= twofish_setkey,
-			.encrypt	= cbc_encrypt,
-			.decrypt	= cbc_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "ctr(twofish)",
-	.cra_driver_name	= "ctr-twofish-3way",
-	.cra_priority		= 300,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize		= 1,
-	.cra_ctxsize		= sizeof(struct twofish_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(tf_algs[2].cra_list),
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= TF_MIN_KEY_SIZE,
-			.max_keysize	= TF_MAX_KEY_SIZE,
-			.ivsize		= TF_BLOCK_SIZE,
-			.setkey		= twofish_setkey,
-			.encrypt	= ctr_crypt,
-			.decrypt	= ctr_crypt,
-		},
-	},
-}, {
-	.cra_name		= "lrw(twofish)",
-	.cra_driver_name	= "lrw-twofish-3way",
-	.cra_priority		= 300,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize		= TF_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct twofish_lrw_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(tf_algs[3].cra_list),
-	.cra_exit		= lrw_exit_tfm,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= TF_MIN_KEY_SIZE + TF_BLOCK_SIZE,
-			.max_keysize	= TF_MAX_KEY_SIZE + TF_BLOCK_SIZE,
-			.ivsize		= TF_BLOCK_SIZE,
-			.setkey		= lrw_twofish_setkey,
-			.encrypt	= lrw_encrypt,
-			.decrypt	= lrw_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "xts(twofish)",
-	.cra_driver_name	= "xts-twofish-3way",
-	.cra_priority		= 300,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize		= TF_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct twofish_xts_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(tf_algs[4].cra_list),
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= TF_MIN_KEY_SIZE * 2,
-			.max_keysize	= TF_MAX_KEY_SIZE * 2,
-			.ivsize		= TF_BLOCK_SIZE,
-			.setkey		= xts_twofish_setkey,
-			.encrypt	= xts_encrypt,
-			.decrypt	= xts_decrypt,
-		},
-	},
-} };
-
-static bool is_blacklisted_cpu(void)
-{
-	if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
-		return false;
-
-	if (boot_cpu_data.x86 == 0x06 &&
-		(boot_cpu_data.x86_model == 0x1c ||
-		 boot_cpu_data.x86_model == 0x26 ||
-		 boot_cpu_data.x86_model == 0x36)) {
-		/*
-		 * On Atom, twofish-3way is slower than original assembler
-		 * implementation. Twofish-3way trades off some performance in
-		 * storing blocks in 64bit registers to allow three blocks to
-		 * be processed parallel. Parallel operation then allows gaining
-		 * more performance than was trade off, on out-of-order CPUs.
-		 * However Atom does not benefit from this parallellism and
-		 * should be blacklisted.
-		 */
-		return true;
-	}
-
-	if (boot_cpu_data.x86 == 0x0f) {
-		/*
-		 * On Pentium 4, twofish-3way is slower than original assembler
-		 * implementation because excessive uses of 64bit rotate and
-		 * left-shifts (which are really slow on P4) needed to store and
-		 * handle 128bit block in two 64bit registers.
-		 */
-		return true;
-	}
-
-	return false;
-}
-
-static int force;
-module_param(force, int, 0);
-MODULE_PARM_DESC(force, "Force module load, ignore CPU blacklist");
-
-static int __init init(void)
-{
-	if (!force && is_blacklisted_cpu()) {
-		printk(KERN_INFO
-			"twofish-x86_64-3way: performance on this CPU "
-			"would be suboptimal: disabling "
-			"twofish-x86_64-3way.\n");
-		return -ENODEV;
-	}
-
-	return crypto_register_algs(tf_algs, ARRAY_SIZE(tf_algs));
-}
-
-static void __exit fini(void)
-{
-	crypto_unregister_algs(tf_algs, ARRAY_SIZE(tf_algs));
-}
-
-module_init(init);
-module_exit(fini);
-
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("Twofish Cipher Algorithm, 3-way parallel asm optimized");
-MODULE_ALIAS("twofish");
-MODULE_ALIAS("twofish-asm");