diff options
Diffstat (limited to 'ANDROID_3.4.5/arch/x86/crypto')
27 files changed, 0 insertions, 15955 deletions
diff --git a/ANDROID_3.4.5/arch/x86/crypto/Makefile b/ANDROID_3.4.5/arch/x86/crypto/Makefile deleted file mode 100644 index e191ac04..00000000 --- a/ANDROID_3.4.5/arch/x86/crypto/Makefile +++ /dev/null @@ -1,45 +0,0 @@ -# -# Arch-specific CryptoAPI modules. -# - -obj-$(CONFIG_CRYPTO_AES_586) += aes-i586.o -obj-$(CONFIG_CRYPTO_TWOFISH_586) += twofish-i586.o -obj-$(CONFIG_CRYPTO_SALSA20_586) += salsa20-i586.o -obj-$(CONFIG_CRYPTO_SERPENT_SSE2_586) += serpent-sse2-i586.o - -obj-$(CONFIG_CRYPTO_AES_X86_64) += aes-x86_64.o -obj-$(CONFIG_CRYPTO_CAMELLIA_X86_64) += camellia-x86_64.o -obj-$(CONFIG_CRYPTO_BLOWFISH_X86_64) += blowfish-x86_64.o -obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o -obj-$(CONFIG_CRYPTO_TWOFISH_X86_64_3WAY) += twofish-x86_64-3way.o -obj-$(CONFIG_CRYPTO_SALSA20_X86_64) += salsa20-x86_64.o -obj-$(CONFIG_CRYPTO_SERPENT_SSE2_X86_64) += serpent-sse2-x86_64.o -obj-$(CONFIG_CRYPTO_AES_NI_INTEL) += aesni-intel.o -obj-$(CONFIG_CRYPTO_GHASH_CLMUL_NI_INTEL) += ghash-clmulni-intel.o - -obj-$(CONFIG_CRYPTO_CRC32C_INTEL) += crc32c-intel.o -obj-$(CONFIG_CRYPTO_SHA1_SSSE3) += sha1-ssse3.o - -aes-i586-y := aes-i586-asm_32.o aes_glue.o -twofish-i586-y := twofish-i586-asm_32.o twofish_glue.o -salsa20-i586-y := salsa20-i586-asm_32.o salsa20_glue.o -serpent-sse2-i586-y := serpent-sse2-i586-asm_32.o serpent_sse2_glue.o - -aes-x86_64-y := aes-x86_64-asm_64.o aes_glue.o -camellia-x86_64-y := camellia-x86_64-asm_64.o camellia_glue.o -blowfish-x86_64-y := blowfish-x86_64-asm_64.o blowfish_glue.o -twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_glue.o -twofish-x86_64-3way-y := twofish-x86_64-asm_64-3way.o twofish_glue_3way.o -salsa20-x86_64-y := salsa20-x86_64-asm_64.o salsa20_glue.o -serpent-sse2-x86_64-y := serpent-sse2-x86_64-asm_64.o serpent_sse2_glue.o - -aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o fpu.o - -ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o - -# enable AVX support only when $(AS) can actually assemble the instructions -ifeq ($(call as-instr,vpxor %xmm0$(comma)%xmm1$(comma)%xmm2,yes,no),yes) -AFLAGS_sha1_ssse3_asm.o += -DSHA1_ENABLE_AVX_SUPPORT -CFLAGS_sha1_ssse3_glue.o += -DSHA1_ENABLE_AVX_SUPPORT -endif -sha1-ssse3-y := sha1_ssse3_asm.o sha1_ssse3_glue.o diff --git a/ANDROID_3.4.5/arch/x86/crypto/aes-i586-asm_32.S b/ANDROID_3.4.5/arch/x86/crypto/aes-i586-asm_32.S deleted file mode 100644 index b949ec2f..00000000 --- a/ANDROID_3.4.5/arch/x86/crypto/aes-i586-asm_32.S +++ /dev/null @@ -1,367 +0,0 @@ -// ------------------------------------------------------------------------- -// Copyright (c) 2001, Dr Brian Gladman < >, Worcester, UK. -// All rights reserved. -// -// LICENSE TERMS -// -// The free distribution and use of this software in both source and binary -// form is allowed (with or without changes) provided that: -// -// 1. distributions of this source code include the above copyright -// notice, this list of conditions and the following disclaimer// -// -// 2. distributions in binary form include the above copyright -// notice, this list of conditions and the following disclaimer -// in the documentation and/or other associated materials// -// -// 3. the copyright holder's name is not used to endorse products -// built using this software without specific written permission. -// -// -// ALTERNATIVELY, provided that this notice is retained in full, this product -// may be distributed under the terms of the GNU General Public License (GPL), -// in which case the provisions of the GPL apply INSTEAD OF those given above. -// -// Copyright (c) 2004 Linus Torvalds <torvalds@osdl.org> -// Copyright (c) 2004 Red Hat, Inc., James Morris <jmorris@redhat.com> - -// DISCLAIMER -// -// This software is provided 'as is' with no explicit or implied warranties -// in respect of its properties including, but not limited to, correctness -// and fitness for purpose. -// ------------------------------------------------------------------------- -// Issue Date: 29/07/2002 - -.file "aes-i586-asm.S" -.text - -#include <asm/asm-offsets.h> - -#define tlen 1024 // length of each of 4 'xor' arrays (256 32-bit words) - -/* offsets to parameters with one register pushed onto stack */ -#define ctx 8 -#define out_blk 12 -#define in_blk 16 - -/* offsets in crypto_aes_ctx structure */ -#define klen (480) -#define ekey (0) -#define dkey (240) - -// register mapping for encrypt and decrypt subroutines - -#define r0 eax -#define r1 ebx -#define r2 ecx -#define r3 edx -#define r4 esi -#define r5 edi - -#define eaxl al -#define eaxh ah -#define ebxl bl -#define ebxh bh -#define ecxl cl -#define ecxh ch -#define edxl dl -#define edxh dh - -#define _h(reg) reg##h -#define h(reg) _h(reg) - -#define _l(reg) reg##l -#define l(reg) _l(reg) - -// This macro takes a 32-bit word representing a column and uses -// each of its four bytes to index into four tables of 256 32-bit -// words to obtain values that are then xored into the appropriate -// output registers r0, r1, r4 or r5. - -// Parameters: -// table table base address -// %1 out_state[0] -// %2 out_state[1] -// %3 out_state[2] -// %4 out_state[3] -// idx input register for the round (destroyed) -// tmp scratch register for the round -// sched key schedule - -#define do_col(table, a1,a2,a3,a4, idx, tmp) \ - movzx %l(idx),%tmp; \ - xor table(,%tmp,4),%a1; \ - movzx %h(idx),%tmp; \ - shr $16,%idx; \ - xor table+tlen(,%tmp,4),%a2; \ - movzx %l(idx),%tmp; \ - movzx %h(idx),%idx; \ - xor table+2*tlen(,%tmp,4),%a3; \ - xor table+3*tlen(,%idx,4),%a4; - -// initialise output registers from the key schedule -// NB1: original value of a3 is in idx on exit -// NB2: original values of a1,a2,a4 aren't used -#define do_fcol(table, a1,a2,a3,a4, idx, tmp, sched) \ - mov 0 sched,%a1; \ - movzx %l(idx),%tmp; \ - mov 12 sched,%a2; \ - xor table(,%tmp,4),%a1; \ - mov 4 sched,%a4; \ - movzx %h(idx),%tmp; \ - shr $16,%idx; \ - xor table+tlen(,%tmp,4),%a2; \ - movzx %l(idx),%tmp; \ - movzx %h(idx),%idx; \ - xor table+3*tlen(,%idx,4),%a4; \ - mov %a3,%idx; \ - mov 8 sched,%a3; \ - xor table+2*tlen(,%tmp,4),%a3; - -// initialise output registers from the key schedule -// NB1: original value of a3 is in idx on exit -// NB2: original values of a1,a2,a4 aren't used -#define do_icol(table, a1,a2,a3,a4, idx, tmp, sched) \ - mov 0 sched,%a1; \ - movzx %l(idx),%tmp; \ - mov 4 sched,%a2; \ - xor table(,%tmp,4),%a1; \ - mov 12 sched,%a4; \ - movzx %h(idx),%tmp; \ - shr $16,%idx; \ - xor table+tlen(,%tmp,4),%a2; \ - movzx %l(idx),%tmp; \ - movzx %h(idx),%idx; \ - xor table+3*tlen(,%idx,4),%a4; \ - mov %a3,%idx; \ - mov 8 sched,%a3; \ - xor table+2*tlen(,%tmp,4),%a3; - - -// original Gladman had conditional saves to MMX regs. -#define save(a1, a2) \ - mov %a2,4*a1(%esp) - -#define restore(a1, a2) \ - mov 4*a2(%esp),%a1 - -// These macros perform a forward encryption cycle. They are entered with -// the first previous round column values in r0,r1,r4,r5 and -// exit with the final values in the same registers, using stack -// for temporary storage. - -// round column values -// on entry: r0,r1,r4,r5 -// on exit: r2,r1,r4,r5 -#define fwd_rnd1(arg, table) \ - save (0,r1); \ - save (1,r5); \ - \ - /* compute new column values */ \ - do_fcol(table, r2,r5,r4,r1, r0,r3, arg); /* idx=r0 */ \ - do_col (table, r4,r1,r2,r5, r0,r3); /* idx=r4 */ \ - restore(r0,0); \ - do_col (table, r1,r2,r5,r4, r0,r3); /* idx=r1 */ \ - restore(r0,1); \ - do_col (table, r5,r4,r1,r2, r0,r3); /* idx=r5 */ - -// round column values -// on entry: r2,r1,r4,r5 -// on exit: r0,r1,r4,r5 -#define fwd_rnd2(arg, table) \ - save (0,r1); \ - save (1,r5); \ - \ - /* compute new column values */ \ - do_fcol(table, r0,r5,r4,r1, r2,r3, arg); /* idx=r2 */ \ - do_col (table, r4,r1,r0,r5, r2,r3); /* idx=r4 */ \ - restore(r2,0); \ - do_col (table, r1,r0,r5,r4, r2,r3); /* idx=r1 */ \ - restore(r2,1); \ - do_col (table, r5,r4,r1,r0, r2,r3); /* idx=r5 */ - -// These macros performs an inverse encryption cycle. They are entered with -// the first previous round column values in r0,r1,r4,r5 and -// exit with the final values in the same registers, using stack -// for temporary storage - -// round column values -// on entry: r0,r1,r4,r5 -// on exit: r2,r1,r4,r5 -#define inv_rnd1(arg, table) \ - save (0,r1); \ - save (1,r5); \ - \ - /* compute new column values */ \ - do_icol(table, r2,r1,r4,r5, r0,r3, arg); /* idx=r0 */ \ - do_col (table, r4,r5,r2,r1, r0,r3); /* idx=r4 */ \ - restore(r0,0); \ - do_col (table, r1,r4,r5,r2, r0,r3); /* idx=r1 */ \ - restore(r0,1); \ - do_col (table, r5,r2,r1,r4, r0,r3); /* idx=r5 */ - -// round column values -// on entry: r2,r1,r4,r5 -// on exit: r0,r1,r4,r5 -#define inv_rnd2(arg, table) \ - save (0,r1); \ - save (1,r5); \ - \ - /* compute new column values */ \ - do_icol(table, r0,r1,r4,r5, r2,r3, arg); /* idx=r2 */ \ - do_col (table, r4,r5,r0,r1, r2,r3); /* idx=r4 */ \ - restore(r2,0); \ - do_col (table, r1,r4,r5,r0, r2,r3); /* idx=r1 */ \ - restore(r2,1); \ - do_col (table, r5,r0,r1,r4, r2,r3); /* idx=r5 */ - -// AES (Rijndael) Encryption Subroutine -/* void aes_enc_blk(struct crypto_aes_ctx *ctx, u8 *out_blk, const u8 *in_blk) */ - -.global aes_enc_blk - -.extern crypto_ft_tab -.extern crypto_fl_tab - -.align 4 - -aes_enc_blk: - push %ebp - mov ctx(%esp),%ebp - -// CAUTION: the order and the values used in these assigns -// rely on the register mappings - -1: push %ebx - mov in_blk+4(%esp),%r2 - push %esi - mov klen(%ebp),%r3 // key size - push %edi -#if ekey != 0 - lea ekey(%ebp),%ebp // key pointer -#endif - -// input four columns and xor in first round key - - mov (%r2),%r0 - mov 4(%r2),%r1 - mov 8(%r2),%r4 - mov 12(%r2),%r5 - xor (%ebp),%r0 - xor 4(%ebp),%r1 - xor 8(%ebp),%r4 - xor 12(%ebp),%r5 - - sub $8,%esp // space for register saves on stack - add $16,%ebp // increment to next round key - cmp $24,%r3 - jb 4f // 10 rounds for 128-bit key - lea 32(%ebp),%ebp - je 3f // 12 rounds for 192-bit key - lea 32(%ebp),%ebp - -2: fwd_rnd1( -64(%ebp), crypto_ft_tab) // 14 rounds for 256-bit key - fwd_rnd2( -48(%ebp), crypto_ft_tab) -3: fwd_rnd1( -32(%ebp), crypto_ft_tab) // 12 rounds for 192-bit key - fwd_rnd2( -16(%ebp), crypto_ft_tab) -4: fwd_rnd1( (%ebp), crypto_ft_tab) // 10 rounds for 128-bit key - fwd_rnd2( +16(%ebp), crypto_ft_tab) - fwd_rnd1( +32(%ebp), crypto_ft_tab) - fwd_rnd2( +48(%ebp), crypto_ft_tab) - fwd_rnd1( +64(%ebp), crypto_ft_tab) - fwd_rnd2( +80(%ebp), crypto_ft_tab) - fwd_rnd1( +96(%ebp), crypto_ft_tab) - fwd_rnd2(+112(%ebp), crypto_ft_tab) - fwd_rnd1(+128(%ebp), crypto_ft_tab) - fwd_rnd2(+144(%ebp), crypto_fl_tab) // last round uses a different table - -// move final values to the output array. CAUTION: the -// order of these assigns rely on the register mappings - - add $8,%esp - mov out_blk+12(%esp),%ebp - mov %r5,12(%ebp) - pop %edi - mov %r4,8(%ebp) - pop %esi - mov %r1,4(%ebp) - pop %ebx - mov %r0,(%ebp) - pop %ebp - ret - -// AES (Rijndael) Decryption Subroutine -/* void aes_dec_blk(struct crypto_aes_ctx *ctx, u8 *out_blk, const u8 *in_blk) */ - -.global aes_dec_blk - -.extern crypto_it_tab -.extern crypto_il_tab - -.align 4 - -aes_dec_blk: - push %ebp - mov ctx(%esp),%ebp - -// CAUTION: the order and the values used in these assigns -// rely on the register mappings - -1: push %ebx - mov in_blk+4(%esp),%r2 - push %esi - mov klen(%ebp),%r3 // key size - push %edi -#if dkey != 0 - lea dkey(%ebp),%ebp // key pointer -#endif - -// input four columns and xor in first round key - - mov (%r2),%r0 - mov 4(%r2),%r1 - mov 8(%r2),%r4 - mov 12(%r2),%r5 - xor (%ebp),%r0 - xor 4(%ebp),%r1 - xor 8(%ebp),%r4 - xor 12(%ebp),%r5 - - sub $8,%esp // space for register saves on stack - add $16,%ebp // increment to next round key - cmp $24,%r3 - jb 4f // 10 rounds for 128-bit key - lea 32(%ebp),%ebp - je 3f // 12 rounds for 192-bit key - lea 32(%ebp),%ebp - -2: inv_rnd1( -64(%ebp), crypto_it_tab) // 14 rounds for 256-bit key - inv_rnd2( -48(%ebp), crypto_it_tab) -3: inv_rnd1( -32(%ebp), crypto_it_tab) // 12 rounds for 192-bit key - inv_rnd2( -16(%ebp), crypto_it_tab) -4: inv_rnd1( (%ebp), crypto_it_tab) // 10 rounds for 128-bit key - inv_rnd2( +16(%ebp), crypto_it_tab) - inv_rnd1( +32(%ebp), crypto_it_tab) - inv_rnd2( +48(%ebp), crypto_it_tab) - inv_rnd1( +64(%ebp), crypto_it_tab) - inv_rnd2( +80(%ebp), crypto_it_tab) - inv_rnd1( +96(%ebp), crypto_it_tab) - inv_rnd2(+112(%ebp), crypto_it_tab) - inv_rnd1(+128(%ebp), crypto_it_tab) - inv_rnd2(+144(%ebp), crypto_il_tab) // last round uses a different table - -// move final values to the output array. CAUTION: the -// order of these assigns rely on the register mappings - - add $8,%esp - mov out_blk+12(%esp),%ebp - mov %r5,12(%ebp) - pop %edi - mov %r4,8(%ebp) - pop %esi - mov %r1,4(%ebp) - pop %ebx - mov %r0,(%ebp) - pop %ebp - ret diff --git a/ANDROID_3.4.5/arch/x86/crypto/aes-x86_64-asm_64.S b/ANDROID_3.4.5/arch/x86/crypto/aes-x86_64-asm_64.S deleted file mode 100644 index 5b577d5a..00000000 --- a/ANDROID_3.4.5/arch/x86/crypto/aes-x86_64-asm_64.S +++ /dev/null @@ -1,188 +0,0 @@ -/* AES (Rijndael) implementation (FIPS PUB 197) for x86_64 - * - * Copyright (C) 2005 Andreas Steinmetz, <ast@domdv.de> - * - * License: - * This code can be distributed under the terms of the GNU General Public - * License (GPL) Version 2 provided that the above header down to and - * including this sentence is retained in full. - */ - -.extern crypto_ft_tab -.extern crypto_it_tab -.extern crypto_fl_tab -.extern crypto_il_tab - -.text - -#include <asm/asm-offsets.h> - -#define R1 %rax -#define R1E %eax -#define R1X %ax -#define R1H %ah -#define R1L %al -#define R2 %rbx -#define R2E %ebx -#define R2X %bx -#define R2H %bh -#define R2L %bl -#define R3 %rcx -#define R3E %ecx -#define R3X %cx -#define R3H %ch -#define R3L %cl -#define R4 %rdx -#define R4E %edx -#define R4X %dx -#define R4H %dh -#define R4L %dl -#define R5 %rsi -#define R5E %esi -#define R6 %rdi -#define R6E %edi -#define R7 %rbp -#define R7E %ebp -#define R8 %r8 -#define R9 %r9 -#define R10 %r10 -#define R11 %r11 - -#define prologue(FUNC,KEY,B128,B192,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11) \ - .global FUNC; \ - .type FUNC,@function; \ - .align 8; \ -FUNC: movq r1,r2; \ - movq r3,r4; \ - leaq KEY+48(r8),r9; \ - movq r10,r11; \ - movl (r7),r5 ## E; \ - movl 4(r7),r1 ## E; \ - movl 8(r7),r6 ## E; \ - movl 12(r7),r7 ## E; \ - movl 480(r8),r10 ## E; \ - xorl -48(r9),r5 ## E; \ - xorl -44(r9),r1 ## E; \ - xorl -40(r9),r6 ## E; \ - xorl -36(r9),r7 ## E; \ - cmpl $24,r10 ## E; \ - jb B128; \ - leaq 32(r9),r9; \ - je B192; \ - leaq 32(r9),r9; - -#define epilogue(r1,r2,r3,r4,r5,r6,r7,r8,r9) \ - movq r1,r2; \ - movq r3,r4; \ - movl r5 ## E,(r9); \ - movl r6 ## E,4(r9); \ - movl r7 ## E,8(r9); \ - movl r8 ## E,12(r9); \ - ret; - -#define round(TAB,OFFSET,r1,r2,r3,r4,r5,r6,r7,r8,ra,rb,rc,rd) \ - movzbl r2 ## H,r5 ## E; \ - movzbl r2 ## L,r6 ## E; \ - movl TAB+1024(,r5,4),r5 ## E;\ - movw r4 ## X,r2 ## X; \ - movl TAB(,r6,4),r6 ## E; \ - roll $16,r2 ## E; \ - shrl $16,r4 ## E; \ - movzbl r4 ## H,r7 ## E; \ - movzbl r4 ## L,r4 ## E; \ - xorl OFFSET(r8),ra ## E; \ - xorl OFFSET+4(r8),rb ## E; \ - xorl TAB+3072(,r7,4),r5 ## E;\ - xorl TAB+2048(,r4,4),r6 ## E;\ - movzbl r1 ## L,r7 ## E; \ - movzbl r1 ## H,r4 ## E; \ - movl TAB+1024(,r4,4),r4 ## E;\ - movw r3 ## X,r1 ## X; \ - roll $16,r1 ## E; \ - shrl $16,r3 ## E; \ - xorl TAB(,r7,4),r5 ## E; \ - movzbl r3 ## H,r7 ## E; \ - movzbl r3 ## L,r3 ## E; \ - xorl TAB+3072(,r7,4),r4 ## E;\ - xorl TAB+2048(,r3,4),r5 ## E;\ - movzbl r1 ## H,r7 ## E; \ - movzbl r1 ## L,r3 ## E; \ - shrl $16,r1 ## E; \ - xorl TAB+3072(,r7,4),r6 ## E;\ - movl TAB+2048(,r3,4),r3 ## E;\ - movzbl r1 ## H,r7 ## E; \ - movzbl r1 ## L,r1 ## E; \ - xorl TAB+1024(,r7,4),r6 ## E;\ - xorl TAB(,r1,4),r3 ## E; \ - movzbl r2 ## H,r1 ## E; \ - movzbl r2 ## L,r7 ## E; \ - shrl $16,r2 ## E; \ - xorl TAB+3072(,r1,4),r3 ## E;\ - xorl TAB+2048(,r7,4),r4 ## E;\ - movzbl r2 ## H,r1 ## E; \ - movzbl r2 ## L,r2 ## E; \ - xorl OFFSET+8(r8),rc ## E; \ - xorl OFFSET+12(r8),rd ## E; \ - xorl TAB+1024(,r1,4),r3 ## E;\ - xorl TAB(,r2,4),r4 ## E; - -#define move_regs(r1,r2,r3,r4) \ - movl r3 ## E,r1 ## E; \ - movl r4 ## E,r2 ## E; - -#define entry(FUNC,KEY,B128,B192) \ - prologue(FUNC,KEY,B128,B192,R2,R8,R7,R9,R1,R3,R4,R6,R10,R5,R11) - -#define return epilogue(R8,R2,R9,R7,R5,R6,R3,R4,R11) - -#define encrypt_round(TAB,OFFSET) \ - round(TAB,OFFSET,R1,R2,R3,R4,R5,R6,R7,R10,R5,R6,R3,R4) \ - move_regs(R1,R2,R5,R6) - -#define encrypt_final(TAB,OFFSET) \ - round(TAB,OFFSET,R1,R2,R3,R4,R5,R6,R7,R10,R5,R6,R3,R4) - -#define decrypt_round(TAB,OFFSET) \ - round(TAB,OFFSET,R2,R1,R4,R3,R6,R5,R7,R10,R5,R6,R3,R4) \ - move_regs(R1,R2,R5,R6) - -#define decrypt_final(TAB,OFFSET) \ - round(TAB,OFFSET,R2,R1,R4,R3,R6,R5,R7,R10,R5,R6,R3,R4) - -/* void aes_enc_blk(stuct crypto_tfm *tfm, u8 *out, const u8 *in) */ - - entry(aes_enc_blk,0,enc128,enc192) - encrypt_round(crypto_ft_tab,-96) - encrypt_round(crypto_ft_tab,-80) -enc192: encrypt_round(crypto_ft_tab,-64) - encrypt_round(crypto_ft_tab,-48) -enc128: encrypt_round(crypto_ft_tab,-32) - encrypt_round(crypto_ft_tab,-16) - encrypt_round(crypto_ft_tab, 0) - encrypt_round(crypto_ft_tab, 16) - encrypt_round(crypto_ft_tab, 32) - encrypt_round(crypto_ft_tab, 48) - encrypt_round(crypto_ft_tab, 64) - encrypt_round(crypto_ft_tab, 80) - encrypt_round(crypto_ft_tab, 96) - encrypt_final(crypto_fl_tab,112) - return - -/* void aes_dec_blk(struct crypto_tfm *tfm, u8 *out, const u8 *in) */ - - entry(aes_dec_blk,240,dec128,dec192) - decrypt_round(crypto_it_tab,-96) - decrypt_round(crypto_it_tab,-80) -dec192: decrypt_round(crypto_it_tab,-64) - decrypt_round(crypto_it_tab,-48) -dec128: decrypt_round(crypto_it_tab,-32) - decrypt_round(crypto_it_tab,-16) - decrypt_round(crypto_it_tab, 0) - decrypt_round(crypto_it_tab, 16) - decrypt_round(crypto_it_tab, 32) - decrypt_round(crypto_it_tab, 48) - decrypt_round(crypto_it_tab, 64) - decrypt_round(crypto_it_tab, 80) - decrypt_round(crypto_it_tab, 96) - decrypt_final(crypto_il_tab,112) - return diff --git a/ANDROID_3.4.5/arch/x86/crypto/aes_glue.c b/ANDROID_3.4.5/arch/x86/crypto/aes_glue.c deleted file mode 100644 index 8efcf42a..00000000 --- a/ANDROID_3.4.5/arch/x86/crypto/aes_glue.c +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Glue Code for the asm optimized version of the AES Cipher Algorithm - * - */ - -#include <linux/module.h> -#include <crypto/aes.h> -#include <asm/aes.h> - -asmlinkage void aes_enc_blk(struct crypto_aes_ctx *ctx, u8 *out, const u8 *in); -asmlinkage void aes_dec_blk(struct crypto_aes_ctx *ctx, u8 *out, const u8 *in); - -void crypto_aes_encrypt_x86(struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src) -{ - aes_enc_blk(ctx, dst, src); -} -EXPORT_SYMBOL_GPL(crypto_aes_encrypt_x86); - -void crypto_aes_decrypt_x86(struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src) -{ - aes_dec_blk(ctx, dst, src); -} -EXPORT_SYMBOL_GPL(crypto_aes_decrypt_x86); - -static void aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) -{ - aes_enc_blk(crypto_tfm_ctx(tfm), dst, src); -} - -static void aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) -{ - aes_dec_blk(crypto_tfm_ctx(tfm), dst, src); -} - -static struct crypto_alg aes_alg = { - .cra_name = "aes", - .cra_driver_name = "aes-asm", - .cra_priority = 200, - .cra_flags = CRYPTO_ALG_TYPE_CIPHER, - .cra_blocksize = AES_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct crypto_aes_ctx), - .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(aes_alg.cra_list), - .cra_u = { - .cipher = { - .cia_min_keysize = AES_MIN_KEY_SIZE, - .cia_max_keysize = AES_MAX_KEY_SIZE, - .cia_setkey = crypto_aes_set_key, - .cia_encrypt = aes_encrypt, - .cia_decrypt = aes_decrypt - } - } -}; - -static int __init aes_init(void) -{ - return crypto_register_alg(&aes_alg); -} - -static void __exit aes_fini(void) -{ - crypto_unregister_alg(&aes_alg); -} - -module_init(aes_init); -module_exit(aes_fini); - -MODULE_DESCRIPTION("Rijndael (AES) Cipher Algorithm, asm optimized"); -MODULE_LICENSE("GPL"); -MODULE_ALIAS("aes"); -MODULE_ALIAS("aes-asm"); diff --git a/ANDROID_3.4.5/arch/x86/crypto/aesni-intel_asm.S b/ANDROID_3.4.5/arch/x86/crypto/aesni-intel_asm.S deleted file mode 100644 index 3470624d..00000000 --- a/ANDROID_3.4.5/arch/x86/crypto/aesni-intel_asm.S +++ /dev/null @@ -1,2618 +0,0 @@ -/* - * Implement AES algorithm in Intel AES-NI instructions. - * - * The white paper of AES-NI instructions can be downloaded from: - * http://softwarecommunity.intel.com/isn/downloads/intelavx/AES-Instructions-Set_WP.pdf - * - * Copyright (C) 2008, Intel Corp. - * Author: Huang Ying <ying.huang@intel.com> - * Vinodh Gopal <vinodh.gopal@intel.com> - * Kahraman Akdemir - * - * Added RFC4106 AES-GCM support for 128-bit keys under the AEAD - * interface for 64-bit kernels. - * Authors: Erdinc Ozturk (erdinc.ozturk@intel.com) - * Aidan O'Mahony (aidan.o.mahony@intel.com) - * Adrian Hoban <adrian.hoban@intel.com> - * James Guilford (james.guilford@intel.com) - * Gabriele Paoloni <gabriele.paoloni@intel.com> - * Tadeusz Struk (tadeusz.struk@intel.com) - * Wajdi Feghali (wajdi.k.feghali@intel.com) - * Copyright (c) 2010, Intel Corporation. - * - * Ported x86_64 version to x86: - * Author: Mathias Krause <minipli@googlemail.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - */ - -#include <linux/linkage.h> -#include <asm/inst.h> - -#ifdef __x86_64__ -.data -POLY: .octa 0xC2000000000000000000000000000001 -TWOONE: .octa 0x00000001000000000000000000000001 - -# order of these constants should not change. -# more specifically, ALL_F should follow SHIFT_MASK, -# and ZERO should follow ALL_F - -SHUF_MASK: .octa 0x000102030405060708090A0B0C0D0E0F -MASK1: .octa 0x0000000000000000ffffffffffffffff -MASK2: .octa 0xffffffffffffffff0000000000000000 -SHIFT_MASK: .octa 0x0f0e0d0c0b0a09080706050403020100 -ALL_F: .octa 0xffffffffffffffffffffffffffffffff -ZERO: .octa 0x00000000000000000000000000000000 -ONE: .octa 0x00000000000000000000000000000001 -F_MIN_MASK: .octa 0xf1f2f3f4f5f6f7f8f9fafbfcfdfeff0 -dec: .octa 0x1 -enc: .octa 0x2 - - -.text - - -#define STACK_OFFSET 8*3 -#define HashKey 16*0 // store HashKey <<1 mod poly here -#define HashKey_2 16*1 // store HashKey^2 <<1 mod poly here -#define HashKey_3 16*2 // store HashKey^3 <<1 mod poly here -#define HashKey_4 16*3 // store HashKey^4 <<1 mod poly here -#define HashKey_k 16*4 // store XOR of High 64 bits and Low 64 - // bits of HashKey <<1 mod poly here - //(for Karatsuba purposes) -#define HashKey_2_k 16*5 // store XOR of High 64 bits and Low 64 - // bits of HashKey^2 <<1 mod poly here - // (for Karatsuba purposes) -#define HashKey_3_k 16*6 // store XOR of High 64 bits and Low 64 - // bits of HashKey^3 <<1 mod poly here - // (for Karatsuba purposes) -#define HashKey_4_k 16*7 // store XOR of High 64 bits and Low 64 - // bits of HashKey^4 <<1 mod poly here - // (for Karatsuba purposes) -#define VARIABLE_OFFSET 16*8 - -#define arg1 rdi -#define arg2 rsi -#define arg3 rdx -#define arg4 rcx -#define arg5 r8 -#define arg6 r9 -#define arg7 STACK_OFFSET+8(%r14) -#define arg8 STACK_OFFSET+16(%r14) -#define arg9 STACK_OFFSET+24(%r14) -#define arg10 STACK_OFFSET+32(%r14) -#endif - - -#define STATE1 %xmm0 -#define STATE2 %xmm4 -#define STATE3 %xmm5 -#define STATE4 %xmm6 -#define STATE STATE1 -#define IN1 %xmm1 -#define IN2 %xmm7 -#define IN3 %xmm8 -#define IN4 %xmm9 -#define IN IN1 -#define KEY %xmm2 -#define IV %xmm3 - -#define BSWAP_MASK %xmm10 -#define CTR %xmm11 -#define INC %xmm12 - -#ifdef __x86_64__ -#define AREG %rax -#define KEYP %rdi -#define OUTP %rsi -#define UKEYP OUTP -#define INP %rdx -#define LEN %rcx -#define IVP %r8 -#define KLEN %r9d -#define T1 %r10 -#define TKEYP T1 -#define T2 %r11 -#define TCTR_LOW T2 -#else -#define AREG %eax -#define KEYP %edi -#define OUTP AREG -#define UKEYP OUTP -#define INP %edx -#define LEN %esi -#define IVP %ebp -#define KLEN %ebx -#define T1 %ecx -#define TKEYP T1 -#endif - - -#ifdef __x86_64__ -/* GHASH_MUL MACRO to implement: Data*HashKey mod (128,127,126,121,0) -* -* -* Input: A and B (128-bits each, bit-reflected) -* Output: C = A*B*x mod poly, (i.e. >>1 ) -* To compute GH = GH*HashKey mod poly, give HK = HashKey<<1 mod poly as input -* GH = GH * HK * x mod poly which is equivalent to GH*HashKey mod poly. -* -*/ -.macro GHASH_MUL GH HK TMP1 TMP2 TMP3 TMP4 TMP5 - movdqa \GH, \TMP1 - pshufd $78, \GH, \TMP2 - pshufd $78, \HK, \TMP3 - pxor \GH, \TMP2 # TMP2 = a1+a0 - pxor \HK, \TMP3 # TMP3 = b1+b0 - PCLMULQDQ 0x11, \HK, \TMP1 # TMP1 = a1*b1 - PCLMULQDQ 0x00, \HK, \GH # GH = a0*b0 - PCLMULQDQ 0x00, \TMP3, \TMP2 # TMP2 = (a0+a1)*(b1+b0) - pxor \GH, \TMP2 - pxor \TMP1, \TMP2 # TMP2 = (a0*b0)+(a1*b0) - movdqa \TMP2, \TMP3 - pslldq $8, \TMP3 # left shift TMP3 2 DWs - psrldq $8, \TMP2 # right shift TMP2 2 DWs - pxor \TMP3, \GH - pxor \TMP2, \TMP1 # TMP2:GH holds the result of GH*HK - - # first phase of the reduction - - movdqa \GH, \TMP2 - movdqa \GH, \TMP3 - movdqa \GH, \TMP4 # copy GH into TMP2,TMP3 and TMP4 - # in in order to perform - # independent shifts - pslld $31, \TMP2 # packed right shift <<31 - pslld $30, \TMP3 # packed right shift <<30 - pslld $25, \TMP4 # packed right shift <<25 - pxor \TMP3, \TMP2 # xor the shifted versions - pxor \TMP4, \TMP2 - movdqa \TMP2, \TMP5 - psrldq $4, \TMP5 # right shift TMP5 1 DW - pslldq $12, \TMP2 # left shift TMP2 3 DWs - pxor \TMP2, \GH - - # second phase of the reduction - - movdqa \GH,\TMP2 # copy GH into TMP2,TMP3 and TMP4 - # in in order to perform - # independent shifts - movdqa \GH,\TMP3 - movdqa \GH,\TMP4 - psrld $1,\TMP2 # packed left shift >>1 - psrld $2,\TMP3 # packed left shift >>2 - psrld $7,\TMP4 # packed left shift >>7 - pxor \TMP3,\TMP2 # xor the shifted versions - pxor \TMP4,\TMP2 - pxor \TMP5, \TMP2 - pxor \TMP2, \GH - pxor \TMP1, \GH # result is in TMP1 -.endm - -/* -* if a = number of total plaintext bytes -* b = floor(a/16) -* num_initial_blocks = b mod 4 -* encrypt the initial num_initial_blocks blocks and apply ghash on -* the ciphertext -* %r10, %r11, %r12, %rax, %xmm5, %xmm6, %xmm7, %xmm8, %xmm9 registers -* are clobbered -* arg1, %arg2, %arg3, %r14 are used as a pointer only, not modified -*/ - - -.macro INITIAL_BLOCKS_DEC num_initial_blocks TMP1 TMP2 TMP3 TMP4 TMP5 XMM0 XMM1 \ -XMM2 XMM3 XMM4 XMMDst TMP6 TMP7 i i_seq operation - mov arg7, %r10 # %r10 = AAD - mov arg8, %r12 # %r12 = aadLen - mov %r12, %r11 - pxor %xmm\i, %xmm\i -_get_AAD_loop\num_initial_blocks\operation: - movd (%r10), \TMP1 - pslldq $12, \TMP1 - psrldq $4, %xmm\i - pxor \TMP1, %xmm\i - add $4, %r10 - sub $4, %r12 - jne _get_AAD_loop\num_initial_blocks\operation - cmp $16, %r11 - je _get_AAD_loop2_done\num_initial_blocks\operation - mov $16, %r12 -_get_AAD_loop2\num_initial_blocks\operation: - psrldq $4, %xmm\i - sub $4, %r12 - cmp %r11, %r12 - jne _get_AAD_loop2\num_initial_blocks\operation -_get_AAD_loop2_done\num_initial_blocks\operation: - movdqa SHUF_MASK(%rip), %xmm14 - PSHUFB_XMM %xmm14, %xmm\i # byte-reflect the AAD data - - xor %r11, %r11 # initialise the data pointer offset as zero - - # start AES for num_initial_blocks blocks - - mov %arg5, %rax # %rax = *Y0 - movdqu (%rax), \XMM0 # XMM0 = Y0 - movdqa SHUF_MASK(%rip), %xmm14 - PSHUFB_XMM %xmm14, \XMM0 - -.if (\i == 5) || (\i == 6) || (\i == 7) -.irpc index, \i_seq - paddd ONE(%rip), \XMM0 # INCR Y0 - movdqa \XMM0, %xmm\index - movdqa SHUF_MASK(%rip), %xmm14 - PSHUFB_XMM %xmm14, %xmm\index # perform a 16 byte swap - -.endr -.irpc index, \i_seq - pxor 16*0(%arg1), %xmm\index -.endr -.irpc index, \i_seq - movaps 0x10(%rdi), \TMP1 - AESENC \TMP1, %xmm\index # Round 1 -.endr -.irpc index, \i_seq - movaps 0x20(%arg1), \TMP1 - AESENC \TMP1, %xmm\index # Round 2 -.endr -.irpc index, \i_seq - movaps 0x30(%arg1), \TMP1 - AESENC \TMP1, %xmm\index # Round 2 -.endr -.irpc index, \i_seq - movaps 0x40(%arg1), \TMP1 - AESENC \TMP1, %xmm\index # Round 2 -.endr -.irpc index, \i_seq - movaps 0x50(%arg1), \TMP1 - AESENC \TMP1, %xmm\index # Round 2 -.endr -.irpc index, \i_seq - movaps 0x60(%arg1), \TMP1 - AESENC \TMP1, %xmm\index # Round 2 -.endr -.irpc index, \i_seq - movaps 0x70(%arg1), \TMP1 - AESENC \TMP1, %xmm\index # Round 2 -.endr -.irpc index, \i_seq - movaps 0x80(%arg1), \TMP1 - AESENC \TMP1, %xmm\index # Round 2 -.endr -.irpc index, \i_seq - movaps 0x90(%arg1), \TMP1 - AESENC \TMP1, %xmm\index # Round 2 -.endr -.irpc index, \i_seq - movaps 0xa0(%arg1), \TMP1 - AESENCLAST \TMP1, %xmm\index # Round 10 -.endr -.irpc index, \i_seq - movdqu (%arg3 , %r11, 1), \TMP1 - pxor \TMP1, %xmm\index - movdqu %xmm\index, (%arg2 , %r11, 1) - # write back plaintext/ciphertext for num_initial_blocks - add $16, %r11 - - movdqa \TMP1, %xmm\index - movdqa SHUF_MASK(%rip), %xmm14 - PSHUFB_XMM %xmm14, %xmm\index - - # prepare plaintext/ciphertext for GHASH computation -.endr -.endif - GHASH_MUL %xmm\i, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 - # apply GHASH on num_initial_blocks blocks - -.if \i == 5 - pxor %xmm5, %xmm6 - GHASH_MUL %xmm6, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 - pxor %xmm6, %xmm7 - GHASH_MUL %xmm7, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 - pxor %xmm7, %xmm8 - GHASH_MUL %xmm8, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 -.elseif \i == 6 - pxor %xmm6, %xmm7 - GHASH_MUL %xmm7, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 - pxor %xmm7, %xmm8 - GHASH_MUL %xmm8, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 -.elseif \i == 7 - pxor %xmm7, %xmm8 - GHASH_MUL %xmm8, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 -.endif - cmp $64, %r13 - jl _initial_blocks_done\num_initial_blocks\operation - # no need for precomputed values -/* -* -* Precomputations for HashKey parallel with encryption of first 4 blocks. -* Haskey_i_k holds XORed values of the low and high parts of the Haskey_i -*/ - paddd ONE(%rip), \XMM0 # INCR Y0 - movdqa \XMM0, \XMM1 - movdqa SHUF_MASK(%rip), %xmm14 - PSHUFB_XMM %xmm14, \XMM1 # perform a 16 byte swap - - paddd ONE(%rip), \XMM0 # INCR Y0 - movdqa \XMM0, \XMM2 - movdqa SHUF_MASK(%rip), %xmm14 - PSHUFB_XMM %xmm14, \XMM2 # perform a 16 byte swap - - paddd ONE(%rip), \XMM0 # INCR Y0 - movdqa \XMM0, \XMM3 - movdqa SHUF_MASK(%rip), %xmm14 - PSHUFB_XMM %xmm14, \XMM3 # perform a 16 byte swap - - paddd ONE(%rip), \XMM0 # INCR Y0 - movdqa \XMM0, \XMM4 - movdqa SHUF_MASK(%rip), %xmm14 - PSHUFB_XMM %xmm14, \XMM4 # perform a 16 byte swap - - pxor 16*0(%arg1), \XMM1 - pxor 16*0(%arg1), \XMM2 - pxor 16*0(%arg1), \XMM3 - pxor 16*0(%arg1), \XMM4 - movdqa \TMP3, \TMP5 - pshufd $78, \TMP3, \TMP1 - pxor \TMP3, \TMP1 - movdqa \TMP1, HashKey_k(%rsp) - GHASH_MUL \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7 -# TMP5 = HashKey^2<<1 (mod poly) - movdqa \TMP5, HashKey_2(%rsp) -# HashKey_2 = HashKey^2<<1 (mod poly) - pshufd $78, \TMP5, \TMP1 - pxor \TMP5, \TMP1 - movdqa \TMP1, HashKey_2_k(%rsp) -.irpc index, 1234 # do 4 rounds - movaps 0x10*\index(%arg1), \TMP1 - AESENC \TMP1, \XMM1 - AESENC \TMP1, \XMM2 - AESENC \TMP1, \XMM3 - AESENC \TMP1, \XMM4 -.endr - GHASH_MUL \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7 -# TMP5 = HashKey^3<<1 (mod poly) - movdqa \TMP5, HashKey_3(%rsp) - pshufd $78, \TMP5, \TMP1 - pxor \TMP5, \TMP1 - movdqa \TMP1, HashKey_3_k(%rsp) -.irpc index, 56789 # do next 5 rounds - movaps 0x10*\index(%arg1), \TMP1 - AESENC \TMP1, \XMM1 - AESENC \TMP1, \XMM2 - AESENC \TMP1, \XMM3 - AESENC \TMP1, \XMM4 -.endr - GHASH_MUL \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7 -# TMP5 = HashKey^3<<1 (mod poly) - movdqa \TMP5, HashKey_4(%rsp) - pshufd $78, \TMP5, \TMP1 - pxor \TMP5, \TMP1 - movdqa \TMP1, HashKey_4_k(%rsp) - movaps 0xa0(%arg1), \TMP2 - AESENCLAST \TMP2, \XMM1 - AESENCLAST \TMP2, \XMM2 - AESENCLAST \TMP2, \XMM3 - AESENCLAST \TMP2, \XMM4 - movdqu 16*0(%arg3 , %r11 , 1), \TMP1 - pxor \TMP1, \XMM1 - movdqu \XMM1, 16*0(%arg2 , %r11 , 1) - movdqa \TMP1, \XMM1 - movdqu 16*1(%arg3 , %r11 , 1), \TMP1 - pxor \TMP1, \XMM2 - movdqu \XMM2, 16*1(%arg2 , %r11 , 1) - movdqa \TMP1, \XMM2 - movdqu 16*2(%arg3 , %r11 , 1), \TMP1 - pxor \TMP1, \XMM3 - movdqu \XMM3, 16*2(%arg2 , %r11 , 1) - movdqa \TMP1, \XMM3 - movdqu 16*3(%arg3 , %r11 , 1), \TMP1 - pxor \TMP1, \XMM4 - movdqu \XMM4, 16*3(%arg2 , %r11 , 1) - movdqa \TMP1, \XMM4 - add $64, %r11 - movdqa SHUF_MASK(%rip), %xmm14 - PSHUFB_XMM %xmm14, \XMM1 # perform a 16 byte swap - pxor \XMMDst, \XMM1 -# combine GHASHed value with the corresponding ciphertext - movdqa SHUF_MASK(%rip), %xmm14 - PSHUFB_XMM %xmm14, \XMM2 # perform a 16 byte swap - movdqa SHUF_MASK(%rip), %xmm14 - PSHUFB_XMM %xmm14, \XMM3 # perform a 16 byte swap - movdqa SHUF_MASK(%rip), %xmm14 - PSHUFB_XMM %xmm14, \XMM4 # perform a 16 byte swap - -_initial_blocks_done\num_initial_blocks\operation: - -.endm - - -/* -* if a = number of total plaintext bytes -* b = floor(a/16) -* num_initial_blocks = b mod 4 -* encrypt the initial num_initial_blocks blocks and apply ghash on -* the ciphertext -* %r10, %r11, %r12, %rax, %xmm5, %xmm6, %xmm7, %xmm8, %xmm9 registers -* are clobbered -* arg1, %arg2, %arg3, %r14 are used as a pointer only, not modified -*/ - - -.macro INITIAL_BLOCKS_ENC num_initial_blocks TMP1 TMP2 TMP3 TMP4 TMP5 XMM0 XMM1 \ -XMM2 XMM3 XMM4 XMMDst TMP6 TMP7 i i_seq operation - mov arg7, %r10 # %r10 = AAD - mov arg8, %r12 # %r12 = aadLen - mov %r12, %r11 - pxor %xmm\i, %xmm\i -_get_AAD_loop\num_initial_blocks\operation: - movd (%r10), \TMP1 - pslldq $12, \TMP1 - psrldq $4, %xmm\i - pxor \TMP1, %xmm\i - add $4, %r10 - sub $4, %r12 - jne _get_AAD_loop\num_initial_blocks\operation - cmp $16, %r11 - je _get_AAD_loop2_done\num_initial_blocks\operation - mov $16, %r12 -_get_AAD_loop2\num_initial_blocks\operation: - psrldq $4, %xmm\i - sub $4, %r12 - cmp %r11, %r12 - jne _get_AAD_loop2\num_initial_blocks\operation -_get_AAD_loop2_done\num_initial_blocks\operation: - movdqa SHUF_MASK(%rip), %xmm14 - PSHUFB_XMM %xmm14, %xmm\i # byte-reflect the AAD data - - xor %r11, %r11 # initialise the data pointer offset as zero - - # start AES for num_initial_blocks blocks - - mov %arg5, %rax # %rax = *Y0 - movdqu (%rax), \XMM0 # XMM0 = Y0 - movdqa SHUF_MASK(%rip), %xmm14 - PSHUFB_XMM %xmm14, \XMM0 - -.if (\i == 5) || (\i == 6) || (\i == 7) -.irpc index, \i_seq - paddd ONE(%rip), \XMM0 # INCR Y0 - movdqa \XMM0, %xmm\index - movdqa SHUF_MASK(%rip), %xmm14 - PSHUFB_XMM %xmm14, %xmm\index # perform a 16 byte swap - -.endr -.irpc index, \i_seq - pxor 16*0(%arg1), %xmm\index -.endr -.irpc index, \i_seq - movaps 0x10(%rdi), \TMP1 - AESENC \TMP1, %xmm\index # Round 1 -.endr -.irpc index, \i_seq - movaps 0x20(%arg1), \TMP1 - AESENC \TMP1, %xmm\index # Round 2 -.endr -.irpc index, \i_seq - movaps 0x30(%arg1), \TMP1 - AESENC \TMP1, %xmm\index # Round 2 -.endr -.irpc index, \i_seq - movaps 0x40(%arg1), \TMP1 - AESENC \TMP1, %xmm\index # Round 2 -.endr -.irpc index, \i_seq - movaps 0x50(%arg1), \TMP1 - AESENC \TMP1, %xmm\index # Round 2 -.endr -.irpc index, \i_seq - movaps 0x60(%arg1), \TMP1 - AESENC \TMP1, %xmm\index # Round 2 -.endr -.irpc index, \i_seq - movaps 0x70(%arg1), \TMP1 - AESENC \TMP1, %xmm\index # Round 2 -.endr -.irpc index, \i_seq - movaps 0x80(%arg1), \TMP1 - AESENC \TMP1, %xmm\index # Round 2 -.endr -.irpc index, \i_seq - movaps 0x90(%arg1), \TMP1 - AESENC \TMP1, %xmm\index # Round 2 -.endr -.irpc index, \i_seq - movaps 0xa0(%arg1), \TMP1 - AESENCLAST \TMP1, %xmm\index # Round 10 -.endr -.irpc index, \i_seq - movdqu (%arg3 , %r11, 1), \TMP1 - pxor \TMP1, %xmm\index - movdqu %xmm\index, (%arg2 , %r11, 1) - # write back plaintext/ciphertext for num_initial_blocks - add $16, %r11 - - movdqa SHUF_MASK(%rip), %xmm14 - PSHUFB_XMM %xmm14, %xmm\index - - # prepare plaintext/ciphertext for GHASH computation -.endr -.endif - GHASH_MUL %xmm\i, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 - # apply GHASH on num_initial_blocks blocks - -.if \i == 5 - pxor %xmm5, %xmm6 - GHASH_MUL %xmm6, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 - pxor %xmm6, %xmm7 - GHASH_MUL %xmm7, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 - pxor %xmm7, %xmm8 - GHASH_MUL %xmm8, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 -.elseif \i == 6 - pxor %xmm6, %xmm7 - GHASH_MUL %xmm7, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 - pxor %xmm7, %xmm8 - GHASH_MUL %xmm8, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 -.elseif \i == 7 - pxor %xmm7, %xmm8 - GHASH_MUL %xmm8, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 -.endif - cmp $64, %r13 - jl _initial_blocks_done\num_initial_blocks\operation - # no need for precomputed values -/* -* -* Precomputations for HashKey parallel with encryption of first 4 blocks. -* Haskey_i_k holds XORed values of the low and high parts of the Haskey_i -*/ - paddd ONE(%rip), \XMM0 # INCR Y0 - movdqa \XMM0, \XMM1 - movdqa SHUF_MASK(%rip), %xmm14 - PSHUFB_XMM %xmm14, \XMM1 # perform a 16 byte swap - - paddd ONE(%rip), \XMM0 # INCR Y0 - movdqa \XMM0, \XMM2 - movdqa SHUF_MASK(%rip), %xmm14 - PSHUFB_XMM %xmm14, \XMM2 # perform a 16 byte swap - - paddd ONE(%rip), \XMM0 # INCR Y0 - movdqa \XMM0, \XMM3 - movdqa SHUF_MASK(%rip), %xmm14 - PSHUFB_XMM %xmm14, \XMM3 # perform a 16 byte swap - - paddd ONE(%rip), \XMM0 # INCR Y0 - movdqa \XMM0, \XMM4 - movdqa SHUF_MASK(%rip), %xmm14 - PSHUFB_XMM %xmm14, \XMM4 # perform a 16 byte swap - - pxor 16*0(%arg1), \XMM1 - pxor 16*0(%arg1), \XMM2 - pxor 16*0(%arg1), \XMM3 - pxor 16*0(%arg1), \XMM4 - movdqa \TMP3, \TMP5 - pshufd $78, \TMP3, \TMP1 - pxor \TMP3, \TMP1 - movdqa \TMP1, HashKey_k(%rsp) - GHASH_MUL \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7 -# TMP5 = HashKey^2<<1 (mod poly) - movdqa \TMP5, HashKey_2(%rsp) -# HashKey_2 = HashKey^2<<1 (mod poly) - pshufd $78, \TMP5, \TMP1 - pxor \TMP5, \TMP1 - movdqa \TMP1, HashKey_2_k(%rsp) -.irpc index, 1234 # do 4 rounds - movaps 0x10*\index(%arg1), \TMP1 - AESENC \TMP1, \XMM1 - AESENC \TMP1, \XMM2 - AESENC \TMP1, \XMM3 - AESENC \TMP1, \XMM4 -.endr - GHASH_MUL \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7 -# TMP5 = HashKey^3<<1 (mod poly) - movdqa \TMP5, HashKey_3(%rsp) - pshufd $78, \TMP5, \TMP1 - pxor \TMP5, \TMP1 - movdqa \TMP1, HashKey_3_k(%rsp) -.irpc index, 56789 # do next 5 rounds - movaps 0x10*\index(%arg1), \TMP1 - AESENC \TMP1, \XMM1 - AESENC \TMP1, \XMM2 - AESENC \TMP1, \XMM3 - AESENC \TMP1, \XMM4 -.endr - GHASH_MUL \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7 -# TMP5 = HashKey^3<<1 (mod poly) - movdqa \TMP5, HashKey_4(%rsp) - pshufd $78, \TMP5, \TMP1 - pxor \TMP5, \TMP1 - movdqa \TMP1, HashKey_4_k(%rsp) - movaps 0xa0(%arg1), \TMP2 - AESENCLAST \TMP2, \XMM1 - AESENCLAST \TMP2, \XMM2 - AESENCLAST \TMP2, \XMM3 - AESENCLAST \TMP2, \XMM4 - movdqu 16*0(%arg3 , %r11 , 1), \TMP1 - pxor \TMP1, \XMM1 - movdqu 16*1(%arg3 , %r11 , 1), \TMP1 - pxor \TMP1, \XMM2 - movdqu 16*2(%arg3 , %r11 , 1), \TMP1 - pxor \TMP1, \XMM3 - movdqu 16*3(%arg3 , %r11 , 1), \TMP1 - pxor \TMP1, \XMM4 - movdqu \XMM1, 16*0(%arg2 , %r11 , 1) - movdqu \XMM2, 16*1(%arg2 , %r11 , 1) - movdqu \XMM3, 16*2(%arg2 , %r11 , 1) - movdqu \XMM4, 16*3(%arg2 , %r11 , 1) - - add $64, %r11 - movdqa SHUF_MASK(%rip), %xmm14 - PSHUFB_XMM %xmm14, \XMM1 # perform a 16 byte swap - pxor \XMMDst, \XMM1 -# combine GHASHed value with the corresponding ciphertext - movdqa SHUF_MASK(%rip), %xmm14 - PSHUFB_XMM %xmm14, \XMM2 # perform a 16 byte swap - movdqa SHUF_MASK(%rip), %xmm14 - PSHUFB_XMM %xmm14, \XMM3 # perform a 16 byte swap - movdqa SHUF_MASK(%rip), %xmm14 - PSHUFB_XMM %xmm14, \XMM4 # perform a 16 byte swap - -_initial_blocks_done\num_initial_blocks\operation: - -.endm - -/* -* encrypt 4 blocks at a time -* ghash the 4 previously encrypted ciphertext blocks -* arg1, %arg2, %arg3 are used as pointers only, not modified -* %r11 is the data offset value -*/ -.macro GHASH_4_ENCRYPT_4_PARALLEL_ENC TMP1 TMP2 TMP3 TMP4 TMP5 \ -TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation - - movdqa \XMM1, \XMM5 - movdqa \XMM2, \XMM6 - movdqa \XMM3, \XMM7 - movdqa \XMM4, \XMM8 - - movdqa SHUF_MASK(%rip), %xmm15 - # multiply TMP5 * HashKey using karatsuba - - movdqa \XMM5, \TMP4 - pshufd $78, \XMM5, \TMP6 - pxor \XMM5, \TMP6 - paddd ONE(%rip), \XMM0 # INCR CNT - movdqa HashKey_4(%rsp), \TMP5 - PCLMULQDQ 0x11, \TMP5, \TMP4 # TMP4 = a1*b1 - movdqa \XMM0, \XMM1 - paddd ONE(%rip), \XMM0 # INCR CNT - movdqa \XMM0, \XMM2 - paddd ONE(%rip), \XMM0 # INCR CNT - movdqa \XMM0, \XMM3 - paddd ONE(%rip), \XMM0 # INCR CNT - movdqa \XMM0, \XMM4 - PSHUFB_XMM %xmm15, \XMM1 # perform a 16 byte swap - PCLMULQDQ 0x00, \TMP5, \XMM5 # XMM5 = a0*b0 - PSHUFB_XMM %xmm15, \XMM2 # perform a 16 byte swap - PSHUFB_XMM %xmm15, \XMM3 # perform a 16 byte swap - PSHUFB_XMM %xmm15, \XMM4 # perform a 16 byte swap - - pxor (%arg1), \XMM1 - pxor (%arg1), \XMM2 - pxor (%arg1), \XMM3 - pxor (%arg1), \XMM4 - movdqa HashKey_4_k(%rsp), \TMP5 - PCLMULQDQ 0x00, \TMP5, \TMP6 # TMP6 = (a1+a0)*(b1+b0) - movaps 0x10(%arg1), \TMP1 - AESENC \TMP1, \XMM1 # Round 1 - AESENC \TMP1, \XMM2 - AESENC \TMP1, \XMM3 - AESENC \TMP1, \XMM4 - movaps 0x20(%arg1), \TMP1 - AESENC \TMP1, \XMM1 # Round 2 - AESENC \TMP1, \XMM2 - AESENC \TMP1, \XMM3 - AESENC \TMP1, \XMM4 - movdqa \XMM6, \TMP1 - pshufd $78, \XMM6, \TMP2 - pxor \XMM6, \TMP2 - movdqa HashKey_3(%rsp), \TMP5 - PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1 * b1 - movaps 0x30(%arg1), \TMP3 - AESENC \TMP3, \XMM1 # Round 3 - AESENC \TMP3, \XMM2 - AESENC \TMP3, \XMM3 - AESENC \TMP3, \XMM4 - PCLMULQDQ 0x00, \TMP5, \XMM6 # XMM6 = a0*b0 - movaps 0x40(%arg1), \TMP3 - AESENC \TMP3, \XMM1 # Round 4 - AESENC \TMP3, \XMM2 - AESENC \TMP3, \XMM3 - AESENC \TMP3, \XMM4 - movdqa HashKey_3_k(%rsp), \TMP5 - PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0) - movaps 0x50(%arg1), \TMP3 - AESENC \TMP3, \XMM1 # Round 5 - AESENC \TMP3, \XMM2 - AESENC \TMP3, \XMM3 - AESENC \TMP3, \XMM4 - pxor \TMP1, \TMP4 -# accumulate the results in TMP4:XMM5, TMP6 holds the middle part - pxor \XMM6, \XMM5 - pxor \TMP2, \TMP6 - movdqa \XMM7, \TMP1 - pshufd $78, \XMM7, \TMP2 - pxor \XMM7, \TMP2 - movdqa HashKey_2(%rsp ), \TMP5 - - # Multiply TMP5 * HashKey using karatsuba - - PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1*b1 - movaps 0x60(%arg1), \TMP3 - AESENC \TMP3, \XMM1 # Round 6 - AESENC \TMP3, \XMM2 - AESENC \TMP3, \XMM3 - AESENC \TMP3, \XMM4 - PCLMULQDQ 0x00, \TMP5, \XMM7 # XMM7 = a0*b0 - movaps 0x70(%arg1), \TMP3 - AESENC \TMP3, \XMM1 # Round 7 - AESENC \TMP3, \XMM2 - AESENC \TMP3, \XMM3 - AESENC \TMP3, \XMM4 - movdqa HashKey_2_k(%rsp), \TMP5 - PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0) - movaps 0x80(%arg1), \TMP3 - AESENC \TMP3, \XMM1 # Round 8 - AESENC \TMP3, \XMM2 - AESENC \TMP3, \XMM3 - AESENC \TMP3, \XMM4 - pxor \TMP1, \TMP4 -# accumulate the results in TMP4:XMM5, TMP6 holds the middle part - pxor \XMM7, \XMM5 - pxor \TMP2, \TMP6 - - # Multiply XMM8 * HashKey - # XMM8 and TMP5 hold the values for the two operands - - movdqa \XMM8, \TMP1 - pshufd $78, \XMM8, \TMP2 - pxor \XMM8, \TMP2 - movdqa HashKey(%rsp), \TMP5 - PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1*b1 - movaps 0x90(%arg1), \TMP3 - AESENC \TMP3, \XMM1 # Round 9 - AESENC \TMP3, \XMM2 - AESENC \TMP3, \XMM3 - AESENC \TMP3, \XMM4 - PCLMULQDQ 0x00, \TMP5, \XMM8 # XMM8 = a0*b0 - movaps 0xa0(%arg1), \TMP3 - AESENCLAST \TMP3, \XMM1 # Round 10 - AESENCLAST \TMP3, \XMM2 - AESENCLAST \TMP3, \XMM3 - AESENCLAST \TMP3, \XMM4 - movdqa HashKey_k(%rsp), \TMP5 - PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0) - movdqu (%arg3,%r11,1), \TMP3 - pxor \TMP3, \XMM1 # Ciphertext/Plaintext XOR EK - movdqu 16(%arg3,%r11,1), \TMP3 - pxor \TMP3, \XMM2 # Ciphertext/Plaintext XOR EK - movdqu 32(%arg3,%r11,1), \TMP3 - pxor \TMP3, \XMM3 # Ciphertext/Plaintext XOR EK - movdqu 48(%arg3,%r11,1), \TMP3 - pxor \TMP3, \XMM4 # Ciphertext/Plaintext XOR EK - movdqu \XMM1, (%arg2,%r11,1) # Write to the ciphertext buffer - movdqu \XMM2, 16(%arg2,%r11,1) # Write to the ciphertext buffer - movdqu \XMM3, 32(%arg2,%r11,1) # Write to the ciphertext buffer - movdqu \XMM4, 48(%arg2,%r11,1) # Write to the ciphertext buffer - PSHUFB_XMM %xmm15, \XMM1 # perform a 16 byte swap - PSHUFB_XMM %xmm15, \XMM2 # perform a 16 byte swap - PSHUFB_XMM %xmm15, \XMM3 # perform a 16 byte swap - PSHUFB_XMM %xmm15, \XMM4 # perform a 16 byte swap - - pxor \TMP4, \TMP1 - pxor \XMM8, \XMM5 - pxor \TMP6, \TMP2 - pxor \TMP1, \TMP2 - pxor \XMM5, \TMP2 - movdqa \TMP2, \TMP3 - pslldq $8, \TMP3 # left shift TMP3 2 DWs - psrldq $8, \TMP2 # right shift TMP2 2 DWs - pxor \TMP3, \XMM5 - pxor \TMP2, \TMP1 # accumulate the results in TMP1:XMM5 - - # first phase of reduction - - movdqa \XMM5, \TMP2 - movdqa \XMM5, \TMP3 - movdqa \XMM5, \TMP4 -# move XMM5 into TMP2, TMP3, TMP4 in order to perform shifts independently - pslld $31, \TMP2 # packed right shift << 31 - pslld $30, \TMP3 # packed right shift << 30 - pslld $25, \TMP4 # packed right shift << 25 - pxor \TMP3, \TMP2 # xor the shifted versions - pxor \TMP4, \TMP2 - movdqa \TMP2, \TMP5 - psrldq $4, \TMP5 # right shift T5 1 DW - pslldq $12, \TMP2 # left shift T2 3 DWs - pxor \TMP2, \XMM5 - - # second phase of reduction - - movdqa \XMM5,\TMP2 # make 3 copies of XMM5 into TMP2, TMP3, TMP4 - movdqa \XMM5,\TMP3 - movdqa \XMM5,\TMP4 - psrld $1, \TMP2 # packed left shift >>1 - psrld $2, \TMP3 # packed left shift >>2 - psrld $7, \TMP4 # packed left shift >>7 - pxor \TMP3,\TMP2 # xor the shifted versions - pxor \TMP4,\TMP2 - pxor \TMP5, \TMP2 - pxor \TMP2, \XMM5 - pxor \TMP1, \XMM5 # result is in TMP1 - - pxor \XMM5, \XMM1 -.endm - -/* -* decrypt 4 blocks at a time -* ghash the 4 previously decrypted ciphertext blocks -* arg1, %arg2, %arg3 are used as pointers only, not modified -* %r11 is the data offset value -*/ -.macro GHASH_4_ENCRYPT_4_PARALLEL_DEC TMP1 TMP2 TMP3 TMP4 TMP5 \ -TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation - - movdqa \XMM1, \XMM5 - movdqa \XMM2, \XMM6 - movdqa \XMM3, \XMM7 - movdqa \XMM4, \XMM8 - - movdqa SHUF_MASK(%rip), %xmm15 - # multiply TMP5 * HashKey using karatsuba - - movdqa \XMM5, \TMP4 - pshufd $78, \XMM5, \TMP6 - pxor \XMM5, \TMP6 - paddd ONE(%rip), \XMM0 # INCR CNT - movdqa HashKey_4(%rsp), \TMP5 - PCLMULQDQ 0x11, \TMP5, \TMP4 # TMP4 = a1*b1 - movdqa \XMM0, \XMM1 - paddd ONE(%rip), \XMM0 # INCR CNT - movdqa \XMM0, \XMM2 - paddd ONE(%rip), \XMM0 # INCR CNT - movdqa \XMM0, \XMM3 - paddd ONE(%rip), \XMM0 # INCR CNT - movdqa \XMM0, \XMM4 - PSHUFB_XMM %xmm15, \XMM1 # perform a 16 byte swap - PCLMULQDQ 0x00, \TMP5, \XMM5 # XMM5 = a0*b0 - PSHUFB_XMM %xmm15, \XMM2 # perform a 16 byte swap - PSHUFB_XMM %xmm15, \XMM3 # perform a 16 byte swap - PSHUFB_XMM %xmm15, \XMM4 # perform a 16 byte swap - - pxor (%arg1), \XMM1 - pxor (%arg1), \XMM2 - pxor (%arg1), \XMM3 - pxor (%arg1), \XMM4 - movdqa HashKey_4_k(%rsp), \TMP5 - PCLMULQDQ 0x00, \TMP5, \TMP6 # TMP6 = (a1+a0)*(b1+b0) - movaps 0x10(%arg1), \TMP1 - AESENC \TMP1, \XMM1 # Round 1 - AESENC \TMP1, \XMM2 - AESENC \TMP1, \XMM3 - AESENC \TMP1, \XMM4 - movaps 0x20(%arg1), \TMP1 - AESENC \TMP1, \XMM1 # Round 2 - AESENC \TMP1, \XMM2 - AESENC \TMP1, \XMM3 - AESENC \TMP1, \XMM4 - movdqa \XMM6, \TMP1 - pshufd $78, \XMM6, \TMP2 - pxor \XMM6, \TMP2 - movdqa HashKey_3(%rsp), \TMP5 - PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1 * b1 - movaps 0x30(%arg1), \TMP3 - AESENC \TMP3, \XMM1 # Round 3 - AESENC \TMP3, \XMM2 - AESENC \TMP3, \XMM3 - AESENC \TMP3, \XMM4 - PCLMULQDQ 0x00, \TMP5, \XMM6 # XMM6 = a0*b0 - movaps 0x40(%arg1), \TMP3 - AESENC \TMP3, \XMM1 # Round 4 - AESENC \TMP3, \XMM2 - AESENC \TMP3, \XMM3 - AESENC \TMP3, \XMM4 - movdqa HashKey_3_k(%rsp), \TMP5 - PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0) - movaps 0x50(%arg1), \TMP3 - AESENC \TMP3, \XMM1 # Round 5 - AESENC \TMP3, \XMM2 - AESENC \TMP3, \XMM3 - AESENC \TMP3, \XMM4 - pxor \TMP1, \TMP4 -# accumulate the results in TMP4:XMM5, TMP6 holds the middle part - pxor \XMM6, \XMM5 - pxor \TMP2, \TMP6 - movdqa \XMM7, \TMP1 - pshufd $78, \XMM7, \TMP2 - pxor \XMM7, \TMP2 - movdqa HashKey_2(%rsp ), \TMP5 - - # Multiply TMP5 * HashKey using karatsuba - - PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1*b1 - movaps 0x60(%arg1), \TMP3 - AESENC \TMP3, \XMM1 # Round 6 - AESENC \TMP3, \XMM2 - AESENC \TMP3, \XMM3 - AESENC \TMP3, \XMM4 - PCLMULQDQ 0x00, \TMP5, \XMM7 # XMM7 = a0*b0 - movaps 0x70(%arg1), \TMP3 - AESENC \TMP3, \XMM1 # Round 7 - AESENC \TMP3, \XMM2 - AESENC \TMP3, \XMM3 - AESENC \TMP3, \XMM4 - movdqa HashKey_2_k(%rsp), \TMP5 - PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0) - movaps 0x80(%arg1), \TMP3 - AESENC \TMP3, \XMM1 # Round 8 - AESENC \TMP3, \XMM2 - AESENC \TMP3, \XMM3 - AESENC \TMP3, \XMM4 - pxor \TMP1, \TMP4 -# accumulate the results in TMP4:XMM5, TMP6 holds the middle part - pxor \XMM7, \XMM5 - pxor \TMP2, \TMP6 - - # Multiply XMM8 * HashKey - # XMM8 and TMP5 hold the values for the two operands - - movdqa \XMM8, \TMP1 - pshufd $78, \XMM8, \TMP2 - pxor \XMM8, \TMP2 - movdqa HashKey(%rsp), \TMP5 - PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1*b1 - movaps 0x90(%arg1), \TMP3 - AESENC \TMP3, \XMM1 # Round 9 - AESENC \TMP3, \XMM2 - AESENC \TMP3, \XMM3 - AESENC \TMP3, \XMM4 - PCLMULQDQ 0x00, \TMP5, \XMM8 # XMM8 = a0*b0 - movaps 0xa0(%arg1), \TMP3 - AESENCLAST \TMP3, \XMM1 # Round 10 - AESENCLAST \TMP3, \XMM2 - AESENCLAST \TMP3, \XMM3 - AESENCLAST \TMP3, \XMM4 - movdqa HashKey_k(%rsp), \TMP5 - PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0) - movdqu (%arg3,%r11,1), \TMP3 - pxor \TMP3, \XMM1 # Ciphertext/Plaintext XOR EK - movdqu \XMM1, (%arg2,%r11,1) # Write to plaintext buffer - movdqa \TMP3, \XMM1 - movdqu 16(%arg3,%r11,1), \TMP3 - pxor \TMP3, \XMM2 # Ciphertext/Plaintext XOR EK - movdqu \XMM2, 16(%arg2,%r11,1) # Write to plaintext buffer - movdqa \TMP3, \XMM2 - movdqu 32(%arg3,%r11,1), \TMP3 - pxor \TMP3, \XMM3 # Ciphertext/Plaintext XOR EK - movdqu \XMM3, 32(%arg2,%r11,1) # Write to plaintext buffer - movdqa \TMP3, \XMM3 - movdqu 48(%arg3,%r11,1), \TMP3 - pxor \TMP3, \XMM4 # Ciphertext/Plaintext XOR EK - movdqu \XMM4, 48(%arg2,%r11,1) # Write to plaintext buffer - movdqa \TMP3, \XMM4 - PSHUFB_XMM %xmm15, \XMM1 # perform a 16 byte swap - PSHUFB_XMM %xmm15, \XMM2 # perform a 16 byte swap - PSHUFB_XMM %xmm15, \XMM3 # perform a 16 byte swap - PSHUFB_XMM %xmm15, \XMM4 # perform a 16 byte swap - - pxor \TMP4, \TMP1 - pxor \XMM8, \XMM5 - pxor \TMP6, \TMP2 - pxor \TMP1, \TMP2 - pxor \XMM5, \TMP2 - movdqa \TMP2, \TMP3 - pslldq $8, \TMP3 # left shift TMP3 2 DWs - psrldq $8, \TMP2 # right shift TMP2 2 DWs - pxor \TMP3, \XMM5 - pxor \TMP2, \TMP1 # accumulate the results in TMP1:XMM5 - - # first phase of reduction - - movdqa \XMM5, \TMP2 - movdqa \XMM5, \TMP3 - movdqa \XMM5, \TMP4 -# move XMM5 into TMP2, TMP3, TMP4 in order to perform shifts independently - pslld $31, \TMP2 # packed right shift << 31 - pslld $30, \TMP3 # packed right shift << 30 - pslld $25, \TMP4 # packed right shift << 25 - pxor \TMP3, \TMP2 # xor the shifted versions - pxor \TMP4, \TMP2 - movdqa \TMP2, \TMP5 - psrldq $4, \TMP5 # right shift T5 1 DW - pslldq $12, \TMP2 # left shift T2 3 DWs - pxor \TMP2, \XMM5 - - # second phase of reduction - - movdqa \XMM5,\TMP2 # make 3 copies of XMM5 into TMP2, TMP3, TMP4 - movdqa \XMM5,\TMP3 - movdqa \XMM5,\TMP4 - psrld $1, \TMP2 # packed left shift >>1 - psrld $2, \TMP3 # packed left shift >>2 - psrld $7, \TMP4 # packed left shift >>7 - pxor \TMP3,\TMP2 # xor the shifted versions - pxor \TMP4,\TMP2 - pxor \TMP5, \TMP2 - pxor \TMP2, \XMM5 - pxor \TMP1, \XMM5 # result is in TMP1 - - pxor \XMM5, \XMM1 -.endm - -/* GHASH the last 4 ciphertext blocks. */ -.macro GHASH_LAST_4 TMP1 TMP2 TMP3 TMP4 TMP5 TMP6 \ -TMP7 XMM1 XMM2 XMM3 XMM4 XMMDst - - # Multiply TMP6 * HashKey (using Karatsuba) - - movdqa \XMM1, \TMP6 - pshufd $78, \XMM1, \TMP2 - pxor \XMM1, \TMP2 - movdqa HashKey_4(%rsp), \TMP5 - PCLMULQDQ 0x11, \TMP5, \TMP6 # TMP6 = a1*b1 - PCLMULQDQ 0x00, \TMP5, \XMM1 # XMM1 = a0*b0 - movdqa HashKey_4_k(%rsp), \TMP4 - PCLMULQDQ 0x00, \TMP4, \TMP2 # TMP2 = (a1+a0)*(b1+b0) - movdqa \XMM1, \XMMDst - movdqa \TMP2, \XMM1 # result in TMP6, XMMDst, XMM1 - - # Multiply TMP1 * HashKey (using Karatsuba) - - movdqa \XMM2, \TMP1 - pshufd $78, \XMM2, \TMP2 - pxor \XMM2, \TMP2 - movdqa HashKey_3(%rsp), \TMP5 - PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1*b1 - PCLMULQDQ 0x00, \TMP5, \XMM2 # XMM2 = a0*b0 - movdqa HashKey_3_k(%rsp), \TMP4 - PCLMULQDQ 0x00, \TMP4, \TMP2 # TMP2 = (a1+a0)*(b1+b0) - pxor \TMP1, \TMP6 - pxor \XMM2, \XMMDst - pxor \TMP2, \XMM1 -# results accumulated in TMP6, XMMDst, XMM1 - - # Multiply TMP1 * HashKey (using Karatsuba) - - movdqa \XMM3, \TMP1 - pshufd $78, \XMM3, \TMP2 - pxor \XMM3, \TMP2 - movdqa HashKey_2(%rsp), \TMP5 - PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1*b1 - PCLMULQDQ 0x00, \TMP5, \XMM3 # XMM3 = a0*b0 - movdqa HashKey_2_k(%rsp), \TMP4 - PCLMULQDQ 0x00, \TMP4, \TMP2 # TMP2 = (a1+a0)*(b1+b0) - pxor \TMP1, \TMP6 - pxor \XMM3, \XMMDst - pxor \TMP2, \XMM1 # results accumulated in TMP6, XMMDst, XMM1 - - # Multiply TMP1 * HashKey (using Karatsuba) - movdqa \XMM4, \TMP1 - pshufd $78, \XMM4, \TMP2 - pxor \XMM4, \TMP2 - movdqa HashKey(%rsp), \TMP5 - PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1*b1 - PCLMULQDQ 0x00, \TMP5, \XMM4 # XMM4 = a0*b0 - movdqa HashKey_k(%rsp), \TMP4 - PCLMULQDQ 0x00, \TMP4, \TMP2 # TMP2 = (a1+a0)*(b1+b0) - pxor \TMP1, \TMP6 - pxor \XMM4, \XMMDst - pxor \XMM1, \TMP2 - pxor \TMP6, \TMP2 - pxor \XMMDst, \TMP2 - # middle section of the temp results combined as in karatsuba algorithm - movdqa \TMP2, \TMP4 - pslldq $8, \TMP4 # left shift TMP4 2 DWs - psrldq $8, \TMP2 # right shift TMP2 2 DWs - pxor \TMP4, \XMMDst - pxor \TMP2, \TMP6 -# TMP6:XMMDst holds the result of the accumulated carry-less multiplications - # first phase of the reduction - movdqa \XMMDst, \TMP2 - movdqa \XMMDst, \TMP3 - movdqa \XMMDst, \TMP4 -# move XMMDst into TMP2, TMP3, TMP4 in order to perform 3 shifts independently - pslld $31, \TMP2 # packed right shifting << 31 - pslld $30, \TMP3 # packed right shifting << 30 - pslld $25, \TMP4 # packed right shifting << 25 - pxor \TMP3, \TMP2 # xor the shifted versions - pxor \TMP4, \TMP2 - movdqa \TMP2, \TMP7 - psrldq $4, \TMP7 # right shift TMP7 1 DW - pslldq $12, \TMP2 # left shift TMP2 3 DWs - pxor \TMP2, \XMMDst - - # second phase of the reduction - movdqa \XMMDst, \TMP2 - # make 3 copies of XMMDst for doing 3 shift operations - movdqa \XMMDst, \TMP3 - movdqa \XMMDst, \TMP4 - psrld $1, \TMP2 # packed left shift >> 1 - psrld $2, \TMP3 # packed left shift >> 2 - psrld $7, \TMP4 # packed left shift >> 7 - pxor \TMP3, \TMP2 # xor the shifted versions - pxor \TMP4, \TMP2 - pxor \TMP7, \TMP2 - pxor \TMP2, \XMMDst - pxor \TMP6, \XMMDst # reduced result is in XMMDst -.endm - -/* Encryption of a single block done*/ -.macro ENCRYPT_SINGLE_BLOCK XMM0 TMP1 - - pxor (%arg1), \XMM0 - movaps 16(%arg1), \TMP1 - AESENC \TMP1, \XMM0 - movaps 32(%arg1), \TMP1 - AESENC \TMP1, \XMM0 - movaps 48(%arg1), \TMP1 - AESENC \TMP1, \XMM0 - movaps 64(%arg1), \TMP1 - AESENC \TMP1, \XMM0 - movaps 80(%arg1), \TMP1 - AESENC \TMP1, \XMM0 - movaps 96(%arg1), \TMP1 - AESENC \TMP1, \XMM0 - movaps 112(%arg1), \TMP1 - AESENC \TMP1, \XMM0 - movaps 128(%arg1), \TMP1 - AESENC \TMP1, \XMM0 - movaps 144(%arg1), \TMP1 - AESENC \TMP1, \XMM0 - movaps 160(%arg1), \TMP1 - AESENCLAST \TMP1, \XMM0 -.endm - - -/***************************************************************************** -* void aesni_gcm_dec(void *aes_ctx, // AES Key schedule. Starts on a 16 byte boundary. -* u8 *out, // Plaintext output. Encrypt in-place is allowed. -* const u8 *in, // Ciphertext input -* u64 plaintext_len, // Length of data in bytes for decryption. -* u8 *iv, // Pre-counter block j0: 4 byte salt (from Security Association) -* // concatenated with 8 byte Initialisation Vector (from IPSec ESP Payload) -* // concatenated with 0x00000001. 16-byte aligned pointer. -* u8 *hash_subkey, // H, the Hash sub key input. Data starts on a 16-byte boundary. -* const u8 *aad, // Additional Authentication Data (AAD) -* u64 aad_len, // Length of AAD in bytes. With RFC4106 this is going to be 8 or 12 bytes -* u8 *auth_tag, // Authenticated Tag output. The driver will compare this to the -* // given authentication tag and only return the plaintext if they match. -* u64 auth_tag_len); // Authenticated Tag Length in bytes. Valid values are 16 -* // (most likely), 12 or 8. -* -* Assumptions: -* -* keys: -* keys are pre-expanded and aligned to 16 bytes. we are using the first -* set of 11 keys in the data structure void *aes_ctx -* -* iv: -* 0 1 2 3 -* 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 -* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ -* | Salt (From the SA) | -* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ -* | Initialization Vector | -* | (This is the sequence number from IPSec header) | -* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ -* | 0x1 | -* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ -* -* -* -* AAD: -* AAD padded to 128 bits with 0 -* for example, assume AAD is a u32 vector -* -* if AAD is 8 bytes: -* AAD[3] = {A0, A1}; -* padded AAD in xmm register = {A1 A0 0 0} -* -* 0 1 2 3 -* 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 -* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ -* | SPI (A1) | -* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ -* | 32-bit Sequence Number (A0) | -* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ -* | 0x0 | -* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ -* -* AAD Format with 32-bit Sequence Number -* -* if AAD is 12 bytes: -* AAD[3] = {A0, A1, A2}; -* padded AAD in xmm register = {A2 A1 A0 0} -* -* 0 1 2 3 -* 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 -* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ -* 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 -* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ -* | SPI (A2) | -* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ -* | 64-bit Extended Sequence Number {A1,A0} | -* | | -* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ -* | 0x0 | -* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ -* -* AAD Format with 64-bit Extended Sequence Number -* -* aadLen: -* from the definition of the spec, aadLen can only be 8 or 12 bytes. -* The code supports 16 too but for other sizes, the code will fail. -* -* TLen: -* from the definition of the spec, TLen can only be 8, 12 or 16 bytes. -* For other sizes, the code will fail. -* -* poly = x^128 + x^127 + x^126 + x^121 + 1 -* -*****************************************************************************/ - -ENTRY(aesni_gcm_dec) - push %r12 - push %r13 - push %r14 - mov %rsp, %r14 -/* -* states of %xmm registers %xmm6:%xmm15 not saved -* all %xmm registers are clobbered -*/ - sub $VARIABLE_OFFSET, %rsp - and $~63, %rsp # align rsp to 64 bytes - mov %arg6, %r12 - movdqu (%r12), %xmm13 # %xmm13 = HashKey - movdqa SHUF_MASK(%rip), %xmm2 - PSHUFB_XMM %xmm2, %xmm13 - - -# Precompute HashKey<<1 (mod poly) from the hash key (required for GHASH) - - movdqa %xmm13, %xmm2 - psllq $1, %xmm13 - psrlq $63, %xmm2 - movdqa %xmm2, %xmm1 - pslldq $8, %xmm2 - psrldq $8, %xmm1 - por %xmm2, %xmm13 - - # Reduction - - pshufd $0x24, %xmm1, %xmm2 - pcmpeqd TWOONE(%rip), %xmm2 - pand POLY(%rip), %xmm2 - pxor %xmm2, %xmm13 # %xmm13 holds the HashKey<<1 (mod poly) - - - # Decrypt first few blocks - - movdqa %xmm13, HashKey(%rsp) # store HashKey<<1 (mod poly) - mov %arg4, %r13 # save the number of bytes of plaintext/ciphertext - and $-16, %r13 # %r13 = %r13 - (%r13 mod 16) - mov %r13, %r12 - and $(3<<4), %r12 - jz _initial_num_blocks_is_0_decrypt - cmp $(2<<4), %r12 - jb _initial_num_blocks_is_1_decrypt - je _initial_num_blocks_is_2_decrypt -_initial_num_blocks_is_3_decrypt: - INITIAL_BLOCKS_DEC 3, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ -%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 5, 678, dec - sub $48, %r13 - jmp _initial_blocks_decrypted -_initial_num_blocks_is_2_decrypt: - INITIAL_BLOCKS_DEC 2, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ -%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 6, 78, dec - sub $32, %r13 - jmp _initial_blocks_decrypted -_initial_num_blocks_is_1_decrypt: - INITIAL_BLOCKS_DEC 1, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ -%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 7, 8, dec - sub $16, %r13 - jmp _initial_blocks_decrypted -_initial_num_blocks_is_0_decrypt: - INITIAL_BLOCKS_DEC 0, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ -%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 8, 0, dec -_initial_blocks_decrypted: - cmp $0, %r13 - je _zero_cipher_left_decrypt - sub $64, %r13 - je _four_cipher_left_decrypt -_decrypt_by_4: - GHASH_4_ENCRYPT_4_PARALLEL_DEC %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, \ -%xmm14, %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, dec - add $64, %r11 - sub $64, %r13 - jne _decrypt_by_4 -_four_cipher_left_decrypt: - GHASH_LAST_4 %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, \ -%xmm15, %xmm1, %xmm2, %xmm3, %xmm4, %xmm8 -_zero_cipher_left_decrypt: - mov %arg4, %r13 - and $15, %r13 # %r13 = arg4 (mod 16) - je _multiple_of_16_bytes_decrypt - - # Handle the last <16 byte block separately - - paddd ONE(%rip), %xmm0 # increment CNT to get Yn - movdqa SHUF_MASK(%rip), %xmm10 - PSHUFB_XMM %xmm10, %xmm0 - - ENCRYPT_SINGLE_BLOCK %xmm0, %xmm1 # E(K, Yn) - sub $16, %r11 - add %r13, %r11 - movdqu (%arg3,%r11,1), %xmm1 # receive the last <16 byte block - lea SHIFT_MASK+16(%rip), %r12 - sub %r13, %r12 -# adjust the shuffle mask pointer to be able to shift 16-%r13 bytes -# (%r13 is the number of bytes in plaintext mod 16) - movdqu (%r12), %xmm2 # get the appropriate shuffle mask - PSHUFB_XMM %xmm2, %xmm1 # right shift 16-%r13 butes - - movdqa %xmm1, %xmm2 - pxor %xmm1, %xmm0 # Ciphertext XOR E(K, Yn) - movdqu ALL_F-SHIFT_MASK(%r12), %xmm1 - # get the appropriate mask to mask out top 16-%r13 bytes of %xmm0 - pand %xmm1, %xmm0 # mask out top 16-%r13 bytes of %xmm0 - pand %xmm1, %xmm2 - movdqa SHUF_MASK(%rip), %xmm10 - PSHUFB_XMM %xmm10 ,%xmm2 - - pxor %xmm2, %xmm8 - GHASH_MUL %xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6 - # GHASH computation for the last <16 byte block - sub %r13, %r11 - add $16, %r11 - - # output %r13 bytes - MOVQ_R64_XMM %xmm0, %rax - cmp $8, %r13 - jle _less_than_8_bytes_left_decrypt - mov %rax, (%arg2 , %r11, 1) - add $8, %r11 - psrldq $8, %xmm0 - MOVQ_R64_XMM %xmm0, %rax - sub $8, %r13 -_less_than_8_bytes_left_decrypt: - mov %al, (%arg2, %r11, 1) - add $1, %r11 - shr $8, %rax - sub $1, %r13 - jne _less_than_8_bytes_left_decrypt -_multiple_of_16_bytes_decrypt: - mov arg8, %r12 # %r13 = aadLen (number of bytes) - shl $3, %r12 # convert into number of bits - movd %r12d, %xmm15 # len(A) in %xmm15 - shl $3, %arg4 # len(C) in bits (*128) - MOVQ_R64_XMM %arg4, %xmm1 - pslldq $8, %xmm15 # %xmm15 = len(A)||0x0000000000000000 - pxor %xmm1, %xmm15 # %xmm15 = len(A)||len(C) - pxor %xmm15, %xmm8 - GHASH_MUL %xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6 - # final GHASH computation - movdqa SHUF_MASK(%rip), %xmm10 - PSHUFB_XMM %xmm10, %xmm8 - - mov %arg5, %rax # %rax = *Y0 - movdqu (%rax), %xmm0 # %xmm0 = Y0 - ENCRYPT_SINGLE_BLOCK %xmm0, %xmm1 # E(K, Y0) - pxor %xmm8, %xmm0 -_return_T_decrypt: - mov arg9, %r10 # %r10 = authTag - mov arg10, %r11 # %r11 = auth_tag_len - cmp $16, %r11 - je _T_16_decrypt - cmp $12, %r11 - je _T_12_decrypt -_T_8_decrypt: - MOVQ_R64_XMM %xmm0, %rax - mov %rax, (%r10) - jmp _return_T_done_decrypt -_T_12_decrypt: - MOVQ_R64_XMM %xmm0, %rax - mov %rax, (%r10) - psrldq $8, %xmm0 - movd %xmm0, %eax - mov %eax, 8(%r10) - jmp _return_T_done_decrypt -_T_16_decrypt: - movdqu %xmm0, (%r10) -_return_T_done_decrypt: - mov %r14, %rsp - pop %r14 - pop %r13 - pop %r12 - ret - - -/***************************************************************************** -* void aesni_gcm_enc(void *aes_ctx, // AES Key schedule. Starts on a 16 byte boundary. -* u8 *out, // Ciphertext output. Encrypt in-place is allowed. -* const u8 *in, // Plaintext input -* u64 plaintext_len, // Length of data in bytes for encryption. -* u8 *iv, // Pre-counter block j0: 4 byte salt (from Security Association) -* // concatenated with 8 byte Initialisation Vector (from IPSec ESP Payload) -* // concatenated with 0x00000001. 16-byte aligned pointer. -* u8 *hash_subkey, // H, the Hash sub key input. Data starts on a 16-byte boundary. -* const u8 *aad, // Additional Authentication Data (AAD) -* u64 aad_len, // Length of AAD in bytes. With RFC4106 this is going to be 8 or 12 bytes -* u8 *auth_tag, // Authenticated Tag output. -* u64 auth_tag_len); // Authenticated Tag Length in bytes. Valid values are 16 (most likely), -* // 12 or 8. -* -* Assumptions: -* -* keys: -* keys are pre-expanded and aligned to 16 bytes. we are using the -* first set of 11 keys in the data structure void *aes_ctx -* -* -* iv: -* 0 1 2 3 -* 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 -* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ -* | Salt (From the SA) | -* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ -* | Initialization Vector | -* | (This is the sequence number from IPSec header) | -* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ -* | 0x1 | -* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ -* -* -* -* AAD: -* AAD padded to 128 bits with 0 -* for example, assume AAD is a u32 vector -* -* if AAD is 8 bytes: -* AAD[3] = {A0, A1}; -* padded AAD in xmm register = {A1 A0 0 0} -* -* 0 1 2 3 -* 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 -* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ -* | SPI (A1) | -* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ -* | 32-bit Sequence Number (A0) | -* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ -* | 0x0 | -* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ -* -* AAD Format with 32-bit Sequence Number -* -* if AAD is 12 bytes: -* AAD[3] = {A0, A1, A2}; -* padded AAD in xmm register = {A2 A1 A0 0} -* -* 0 1 2 3 -* 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 -* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ -* | SPI (A2) | -* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ -* | 64-bit Extended Sequence Number {A1,A0} | -* | | -* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ -* | 0x0 | -* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ -* -* AAD Format with 64-bit Extended Sequence Number -* -* aadLen: -* from the definition of the spec, aadLen can only be 8 or 12 bytes. -* The code supports 16 too but for other sizes, the code will fail. -* -* TLen: -* from the definition of the spec, TLen can only be 8, 12 or 16 bytes. -* For other sizes, the code will fail. -* -* poly = x^128 + x^127 + x^126 + x^121 + 1 -***************************************************************************/ -ENTRY(aesni_gcm_enc) - push %r12 - push %r13 - push %r14 - mov %rsp, %r14 -# -# states of %xmm registers %xmm6:%xmm15 not saved -# all %xmm registers are clobbered -# - sub $VARIABLE_OFFSET, %rsp - and $~63, %rsp - mov %arg6, %r12 - movdqu (%r12), %xmm13 - movdqa SHUF_MASK(%rip), %xmm2 - PSHUFB_XMM %xmm2, %xmm13 - - -# precompute HashKey<<1 mod poly from the HashKey (required for GHASH) - - movdqa %xmm13, %xmm2 - psllq $1, %xmm13 - psrlq $63, %xmm2 - movdqa %xmm2, %xmm1 - pslldq $8, %xmm2 - psrldq $8, %xmm1 - por %xmm2, %xmm13 - - # reduce HashKey<<1 - - pshufd $0x24, %xmm1, %xmm2 - pcmpeqd TWOONE(%rip), %xmm2 - pand POLY(%rip), %xmm2 - pxor %xmm2, %xmm13 - movdqa %xmm13, HashKey(%rsp) - mov %arg4, %r13 # %xmm13 holds HashKey<<1 (mod poly) - and $-16, %r13 - mov %r13, %r12 - - # Encrypt first few blocks - - and $(3<<4), %r12 - jz _initial_num_blocks_is_0_encrypt - cmp $(2<<4), %r12 - jb _initial_num_blocks_is_1_encrypt - je _initial_num_blocks_is_2_encrypt -_initial_num_blocks_is_3_encrypt: - INITIAL_BLOCKS_ENC 3, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ -%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 5, 678, enc - sub $48, %r13 - jmp _initial_blocks_encrypted -_initial_num_blocks_is_2_encrypt: - INITIAL_BLOCKS_ENC 2, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ -%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 6, 78, enc - sub $32, %r13 - jmp _initial_blocks_encrypted -_initial_num_blocks_is_1_encrypt: - INITIAL_BLOCKS_ENC 1, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ -%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 7, 8, enc - sub $16, %r13 - jmp _initial_blocks_encrypted -_initial_num_blocks_is_0_encrypt: - INITIAL_BLOCKS_ENC 0, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ -%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 8, 0, enc -_initial_blocks_encrypted: - - # Main loop - Encrypt remaining blocks - - cmp $0, %r13 - je _zero_cipher_left_encrypt - sub $64, %r13 - je _four_cipher_left_encrypt -_encrypt_by_4_encrypt: - GHASH_4_ENCRYPT_4_PARALLEL_ENC %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, \ -%xmm14, %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, enc - add $64, %r11 - sub $64, %r13 - jne _encrypt_by_4_encrypt -_four_cipher_left_encrypt: - GHASH_LAST_4 %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, \ -%xmm15, %xmm1, %xmm2, %xmm3, %xmm4, %xmm8 -_zero_cipher_left_encrypt: - mov %arg4, %r13 - and $15, %r13 # %r13 = arg4 (mod 16) - je _multiple_of_16_bytes_encrypt - - # Handle the last <16 Byte block separately - paddd ONE(%rip), %xmm0 # INCR CNT to get Yn - movdqa SHUF_MASK(%rip), %xmm10 - PSHUFB_XMM %xmm10, %xmm0 - - - ENCRYPT_SINGLE_BLOCK %xmm0, %xmm1 # Encrypt(K, Yn) - sub $16, %r11 - add %r13, %r11 - movdqu (%arg3,%r11,1), %xmm1 # receive the last <16 byte blocks - lea SHIFT_MASK+16(%rip), %r12 - sub %r13, %r12 - # adjust the shuffle mask pointer to be able to shift 16-r13 bytes - # (%r13 is the number of bytes in plaintext mod 16) - movdqu (%r12), %xmm2 # get the appropriate shuffle mask - PSHUFB_XMM %xmm2, %xmm1 # shift right 16-r13 byte - pxor %xmm1, %xmm0 # Plaintext XOR Encrypt(K, Yn) - movdqu ALL_F-SHIFT_MASK(%r12), %xmm1 - # get the appropriate mask to mask out top 16-r13 bytes of xmm0 - pand %xmm1, %xmm0 # mask out top 16-r13 bytes of xmm0 - movdqa SHUF_MASK(%rip), %xmm10 - PSHUFB_XMM %xmm10,%xmm0 - - pxor %xmm0, %xmm8 - GHASH_MUL %xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6 - # GHASH computation for the last <16 byte block - sub %r13, %r11 - add $16, %r11 - - movdqa SHUF_MASK(%rip), %xmm10 - PSHUFB_XMM %xmm10, %xmm0 - - # shuffle xmm0 back to output as ciphertext - - # Output %r13 bytes - MOVQ_R64_XMM %xmm0, %rax - cmp $8, %r13 - jle _less_than_8_bytes_left_encrypt - mov %rax, (%arg2 , %r11, 1) - add $8, %r11 - psrldq $8, %xmm0 - MOVQ_R64_XMM %xmm0, %rax - sub $8, %r13 -_less_than_8_bytes_left_encrypt: - mov %al, (%arg2, %r11, 1) - add $1, %r11 - shr $8, %rax - sub $1, %r13 - jne _less_than_8_bytes_left_encrypt -_multiple_of_16_bytes_encrypt: - mov arg8, %r12 # %r12 = addLen (number of bytes) - shl $3, %r12 - movd %r12d, %xmm15 # len(A) in %xmm15 - shl $3, %arg4 # len(C) in bits (*128) - MOVQ_R64_XMM %arg4, %xmm1 - pslldq $8, %xmm15 # %xmm15 = len(A)||0x0000000000000000 - pxor %xmm1, %xmm15 # %xmm15 = len(A)||len(C) - pxor %xmm15, %xmm8 - GHASH_MUL %xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6 - # final GHASH computation - movdqa SHUF_MASK(%rip), %xmm10 - PSHUFB_XMM %xmm10, %xmm8 # perform a 16 byte swap - - mov %arg5, %rax # %rax = *Y0 - movdqu (%rax), %xmm0 # %xmm0 = Y0 - ENCRYPT_SINGLE_BLOCK %xmm0, %xmm15 # Encrypt(K, Y0) - pxor %xmm8, %xmm0 -_return_T_encrypt: - mov arg9, %r10 # %r10 = authTag - mov arg10, %r11 # %r11 = auth_tag_len - cmp $16, %r11 - je _T_16_encrypt - cmp $12, %r11 - je _T_12_encrypt -_T_8_encrypt: - MOVQ_R64_XMM %xmm0, %rax - mov %rax, (%r10) - jmp _return_T_done_encrypt -_T_12_encrypt: - MOVQ_R64_XMM %xmm0, %rax - mov %rax, (%r10) - psrldq $8, %xmm0 - movd %xmm0, %eax - mov %eax, 8(%r10) - jmp _return_T_done_encrypt -_T_16_encrypt: - movdqu %xmm0, (%r10) -_return_T_done_encrypt: - mov %r14, %rsp - pop %r14 - pop %r13 - pop %r12 - ret - -#endif - - -_key_expansion_128: -_key_expansion_256a: - pshufd $0b11111111, %xmm1, %xmm1 - shufps $0b00010000, %xmm0, %xmm4 - pxor %xmm4, %xmm0 - shufps $0b10001100, %xmm0, %xmm4 - pxor %xmm4, %xmm0 - pxor %xmm1, %xmm0 - movaps %xmm0, (TKEYP) - add $0x10, TKEYP - ret - -.align 4 -_key_expansion_192a: - pshufd $0b01010101, %xmm1, %xmm1 - shufps $0b00010000, %xmm0, %xmm4 - pxor %xmm4, %xmm0 - shufps $0b10001100, %xmm0, %xmm4 - pxor %xmm4, %xmm0 - pxor %xmm1, %xmm0 - - movaps %xmm2, %xmm5 - movaps %xmm2, %xmm6 - pslldq $4, %xmm5 - pshufd $0b11111111, %xmm0, %xmm3 - pxor %xmm3, %xmm2 - pxor %xmm5, %xmm2 - - movaps %xmm0, %xmm1 - shufps $0b01000100, %xmm0, %xmm6 - movaps %xmm6, (TKEYP) - shufps $0b01001110, %xmm2, %xmm1 - movaps %xmm1, 0x10(TKEYP) - add $0x20, TKEYP - ret - -.align 4 -_key_expansion_192b: - pshufd $0b01010101, %xmm1, %xmm1 - shufps $0b00010000, %xmm0, %xmm4 - pxor %xmm4, %xmm0 - shufps $0b10001100, %xmm0, %xmm4 - pxor %xmm4, %xmm0 - pxor %xmm1, %xmm0 - - movaps %xmm2, %xmm5 - pslldq $4, %xmm5 - pshufd $0b11111111, %xmm0, %xmm3 - pxor %xmm3, %xmm2 - pxor %xmm5, %xmm2 - - movaps %xmm0, (TKEYP) - add $0x10, TKEYP - ret - -.align 4 -_key_expansion_256b: - pshufd $0b10101010, %xmm1, %xmm1 - shufps $0b00010000, %xmm2, %xmm4 - pxor %xmm4, %xmm2 - shufps $0b10001100, %xmm2, %xmm4 - pxor %xmm4, %xmm2 - pxor %xmm1, %xmm2 - movaps %xmm2, (TKEYP) - add $0x10, TKEYP - ret - -/* - * int aesni_set_key(struct crypto_aes_ctx *ctx, const u8 *in_key, - * unsigned int key_len) - */ -ENTRY(aesni_set_key) -#ifndef __x86_64__ - pushl KEYP - movl 8(%esp), KEYP # ctx - movl 12(%esp), UKEYP # in_key - movl 16(%esp), %edx # key_len -#endif - movups (UKEYP), %xmm0 # user key (first 16 bytes) - movaps %xmm0, (KEYP) - lea 0x10(KEYP), TKEYP # key addr - movl %edx, 480(KEYP) - pxor %xmm4, %xmm4 # xmm4 is assumed 0 in _key_expansion_x - cmp $24, %dl - jb .Lenc_key128 - je .Lenc_key192 - movups 0x10(UKEYP), %xmm2 # other user key - movaps %xmm2, (TKEYP) - add $0x10, TKEYP - AESKEYGENASSIST 0x1 %xmm2 %xmm1 # round 1 - call _key_expansion_256a - AESKEYGENASSIST 0x1 %xmm0 %xmm1 - call _key_expansion_256b - AESKEYGENASSIST 0x2 %xmm2 %xmm1 # round 2 - call _key_expansion_256a - AESKEYGENASSIST 0x2 %xmm0 %xmm1 - call _key_expansion_256b - AESKEYGENASSIST 0x4 %xmm2 %xmm1 # round 3 - call _key_expansion_256a - AESKEYGENASSIST 0x4 %xmm0 %xmm1 - call _key_expansion_256b - AESKEYGENASSIST 0x8 %xmm2 %xmm1 # round 4 - call _key_expansion_256a - AESKEYGENASSIST 0x8 %xmm0 %xmm1 - call _key_expansion_256b - AESKEYGENASSIST 0x10 %xmm2 %xmm1 # round 5 - call _key_expansion_256a - AESKEYGENASSIST 0x10 %xmm0 %xmm1 - call _key_expansion_256b - AESKEYGENASSIST 0x20 %xmm2 %xmm1 # round 6 - call _key_expansion_256a - AESKEYGENASSIST 0x20 %xmm0 %xmm1 - call _key_expansion_256b - AESKEYGENASSIST 0x40 %xmm2 %xmm1 # round 7 - call _key_expansion_256a - jmp .Ldec_key -.Lenc_key192: - movq 0x10(UKEYP), %xmm2 # other user key - AESKEYGENASSIST 0x1 %xmm2 %xmm1 # round 1 - call _key_expansion_192a - AESKEYGENASSIST 0x2 %xmm2 %xmm1 # round 2 - call _key_expansion_192b - AESKEYGENASSIST 0x4 %xmm2 %xmm1 # round 3 - call _key_expansion_192a - AESKEYGENASSIST 0x8 %xmm2 %xmm1 # round 4 - call _key_expansion_192b - AESKEYGENASSIST 0x10 %xmm2 %xmm1 # round 5 - call _key_expansion_192a - AESKEYGENASSIST 0x20 %xmm2 %xmm1 # round 6 - call _key_expansion_192b - AESKEYGENASSIST 0x40 %xmm2 %xmm1 # round 7 - call _key_expansion_192a - AESKEYGENASSIST 0x80 %xmm2 %xmm1 # round 8 - call _key_expansion_192b - jmp .Ldec_key -.Lenc_key128: - AESKEYGENASSIST 0x1 %xmm0 %xmm1 # round 1 - call _key_expansion_128 - AESKEYGENASSIST 0x2 %xmm0 %xmm1 # round 2 - call _key_expansion_128 - AESKEYGENASSIST 0x4 %xmm0 %xmm1 # round 3 - call _key_expansion_128 - AESKEYGENASSIST 0x8 %xmm0 %xmm1 # round 4 - call _key_expansion_128 - AESKEYGENASSIST 0x10 %xmm0 %xmm1 # round 5 - call _key_expansion_128 - AESKEYGENASSIST 0x20 %xmm0 %xmm1 # round 6 - call _key_expansion_128 - AESKEYGENASSIST 0x40 %xmm0 %xmm1 # round 7 - call _key_expansion_128 - AESKEYGENASSIST 0x80 %xmm0 %xmm1 # round 8 - call _key_expansion_128 - AESKEYGENASSIST 0x1b %xmm0 %xmm1 # round 9 - call _key_expansion_128 - AESKEYGENASSIST 0x36 %xmm0 %xmm1 # round 10 - call _key_expansion_128 -.Ldec_key: - sub $0x10, TKEYP - movaps (KEYP), %xmm0 - movaps (TKEYP), %xmm1 - movaps %xmm0, 240(TKEYP) - movaps %xmm1, 240(KEYP) - add $0x10, KEYP - lea 240-16(TKEYP), UKEYP -.align 4 -.Ldec_key_loop: - movaps (KEYP), %xmm0 - AESIMC %xmm0 %xmm1 - movaps %xmm1, (UKEYP) - add $0x10, KEYP - sub $0x10, UKEYP - cmp TKEYP, KEYP - jb .Ldec_key_loop - xor AREG, AREG -#ifndef __x86_64__ - popl KEYP -#endif - ret - -/* - * void aesni_enc(struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src) - */ -ENTRY(aesni_enc) -#ifndef __x86_64__ - pushl KEYP - pushl KLEN - movl 12(%esp), KEYP - movl 16(%esp), OUTP - movl 20(%esp), INP -#endif - movl 480(KEYP), KLEN # key length - movups (INP), STATE # input - call _aesni_enc1 - movups STATE, (OUTP) # output -#ifndef __x86_64__ - popl KLEN - popl KEYP -#endif - ret - -/* - * _aesni_enc1: internal ABI - * input: - * KEYP: key struct pointer - * KLEN: round count - * STATE: initial state (input) - * output: - * STATE: finial state (output) - * changed: - * KEY - * TKEYP (T1) - */ -.align 4 -_aesni_enc1: - movaps (KEYP), KEY # key - mov KEYP, TKEYP - pxor KEY, STATE # round 0 - add $0x30, TKEYP - cmp $24, KLEN - jb .Lenc128 - lea 0x20(TKEYP), TKEYP - je .Lenc192 - add $0x20, TKEYP - movaps -0x60(TKEYP), KEY - AESENC KEY STATE - movaps -0x50(TKEYP), KEY - AESENC KEY STATE -.align 4 -.Lenc192: - movaps -0x40(TKEYP), KEY - AESENC KEY STATE - movaps -0x30(TKEYP), KEY - AESENC KEY STATE -.align 4 -.Lenc128: - movaps -0x20(TKEYP), KEY - AESENC KEY STATE - movaps -0x10(TKEYP), KEY - AESENC KEY STATE - movaps (TKEYP), KEY - AESENC KEY STATE - movaps 0x10(TKEYP), KEY - AESENC KEY STATE - movaps 0x20(TKEYP), KEY - AESENC KEY STATE - movaps 0x30(TKEYP), KEY - AESENC KEY STATE - movaps 0x40(TKEYP), KEY - AESENC KEY STATE - movaps 0x50(TKEYP), KEY - AESENC KEY STATE - movaps 0x60(TKEYP), KEY - AESENC KEY STATE - movaps 0x70(TKEYP), KEY - AESENCLAST KEY STATE - ret - -/* - * _aesni_enc4: internal ABI - * input: - * KEYP: key struct pointer - * KLEN: round count - * STATE1: initial state (input) - * STATE2 - * STATE3 - * STATE4 - * output: - * STATE1: finial state (output) - * STATE2 - * STATE3 - * STATE4 - * changed: - * KEY - * TKEYP (T1) - */ -.align 4 -_aesni_enc4: - movaps (KEYP), KEY # key - mov KEYP, TKEYP - pxor KEY, STATE1 # round 0 - pxor KEY, STATE2 - pxor KEY, STATE3 - pxor KEY, STATE4 - add $0x30, TKEYP - cmp $24, KLEN - jb .L4enc128 - lea 0x20(TKEYP), TKEYP - je .L4enc192 - add $0x20, TKEYP - movaps -0x60(TKEYP), KEY - AESENC KEY STATE1 - AESENC KEY STATE2 - AESENC KEY STATE3 - AESENC KEY STATE4 - movaps -0x50(TKEYP), KEY - AESENC KEY STATE1 - AESENC KEY STATE2 - AESENC KEY STATE3 - AESENC KEY STATE4 -#.align 4 -.L4enc192: - movaps -0x40(TKEYP), KEY - AESENC KEY STATE1 - AESENC KEY STATE2 - AESENC KEY STATE3 - AESENC KEY STATE4 - movaps -0x30(TKEYP), KEY - AESENC KEY STATE1 - AESENC KEY STATE2 - AESENC KEY STATE3 - AESENC KEY STATE4 -#.align 4 -.L4enc128: - movaps -0x20(TKEYP), KEY - AESENC KEY STATE1 - AESENC KEY STATE2 - AESENC KEY STATE3 - AESENC KEY STATE4 - movaps -0x10(TKEYP), KEY - AESENC KEY STATE1 - AESENC KEY STATE2 - AESENC KEY STATE3 - AESENC KEY STATE4 - movaps (TKEYP), KEY - AESENC KEY STATE1 - AESENC KEY STATE2 - AESENC KEY STATE3 - AESENC KEY STATE4 - movaps 0x10(TKEYP), KEY - AESENC KEY STATE1 - AESENC KEY STATE2 - AESENC KEY STATE3 - AESENC KEY STATE4 - movaps 0x20(TKEYP), KEY - AESENC KEY STATE1 - AESENC KEY STATE2 - AESENC KEY STATE3 - AESENC KEY STATE4 - movaps 0x30(TKEYP), KEY - AESENC KEY STATE1 - AESENC KEY STATE2 - AESENC KEY STATE3 - AESENC KEY STATE4 - movaps 0x40(TKEYP), KEY - AESENC KEY STATE1 - AESENC KEY STATE2 - AESENC KEY STATE3 - AESENC KEY STATE4 - movaps 0x50(TKEYP), KEY - AESENC KEY STATE1 - AESENC KEY STATE2 - AESENC KEY STATE3 - AESENC KEY STATE4 - movaps 0x60(TKEYP), KEY - AESENC KEY STATE1 - AESENC KEY STATE2 - AESENC KEY STATE3 - AESENC KEY STATE4 - movaps 0x70(TKEYP), KEY - AESENCLAST KEY STATE1 # last round - AESENCLAST KEY STATE2 - AESENCLAST KEY STATE3 - AESENCLAST KEY STATE4 - ret - -/* - * void aesni_dec (struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src) - */ -ENTRY(aesni_dec) -#ifndef __x86_64__ - pushl KEYP - pushl KLEN - movl 12(%esp), KEYP - movl 16(%esp), OUTP - movl 20(%esp), INP -#endif - mov 480(KEYP), KLEN # key length - add $240, KEYP - movups (INP), STATE # input - call _aesni_dec1 - movups STATE, (OUTP) #output -#ifndef __x86_64__ - popl KLEN - popl KEYP -#endif - ret - -/* - * _aesni_dec1: internal ABI - * input: - * KEYP: key struct pointer - * KLEN: key length - * STATE: initial state (input) - * output: - * STATE: finial state (output) - * changed: - * KEY - * TKEYP (T1) - */ -.align 4 -_aesni_dec1: - movaps (KEYP), KEY # key - mov KEYP, TKEYP - pxor KEY, STATE # round 0 - add $0x30, TKEYP - cmp $24, KLEN - jb .Ldec128 - lea 0x20(TKEYP), TKEYP - je .Ldec192 - add $0x20, TKEYP - movaps -0x60(TKEYP), KEY - AESDEC KEY STATE - movaps -0x50(TKEYP), KEY - AESDEC KEY STATE -.align 4 -.Ldec192: - movaps -0x40(TKEYP), KEY - AESDEC KEY STATE - movaps -0x30(TKEYP), KEY - AESDEC KEY STATE -.align 4 -.Ldec128: - movaps -0x20(TKEYP), KEY - AESDEC KEY STATE - movaps -0x10(TKEYP), KEY - AESDEC KEY STATE - movaps (TKEYP), KEY - AESDEC KEY STATE - movaps 0x10(TKEYP), KEY - AESDEC KEY STATE - movaps 0x20(TKEYP), KEY - AESDEC KEY STATE - movaps 0x30(TKEYP), KEY - AESDEC KEY STATE - movaps 0x40(TKEYP), KEY - AESDEC KEY STATE - movaps 0x50(TKEYP), KEY - AESDEC KEY STATE - movaps 0x60(TKEYP), KEY - AESDEC KEY STATE - movaps 0x70(TKEYP), KEY - AESDECLAST KEY STATE - ret - -/* - * _aesni_dec4: internal ABI - * input: - * KEYP: key struct pointer - * KLEN: key length - * STATE1: initial state (input) - * STATE2 - * STATE3 - * STATE4 - * output: - * STATE1: finial state (output) - * STATE2 - * STATE3 - * STATE4 - * changed: - * KEY - * TKEYP (T1) - */ -.align 4 -_aesni_dec4: - movaps (KEYP), KEY # key - mov KEYP, TKEYP - pxor KEY, STATE1 # round 0 - pxor KEY, STATE2 - pxor KEY, STATE3 - pxor KEY, STATE4 - add $0x30, TKEYP - cmp $24, KLEN - jb .L4dec128 - lea 0x20(TKEYP), TKEYP - je .L4dec192 - add $0x20, TKEYP - movaps -0x60(TKEYP), KEY - AESDEC KEY STATE1 - AESDEC KEY STATE2 - AESDEC KEY STATE3 - AESDEC KEY STATE4 - movaps -0x50(TKEYP), KEY - AESDEC KEY STATE1 - AESDEC KEY STATE2 - AESDEC KEY STATE3 - AESDEC KEY STATE4 -.align 4 -.L4dec192: - movaps -0x40(TKEYP), KEY - AESDEC KEY STATE1 - AESDEC KEY STATE2 - AESDEC KEY STATE3 - AESDEC KEY STATE4 - movaps -0x30(TKEYP), KEY - AESDEC KEY STATE1 - AESDEC KEY STATE2 - AESDEC KEY STATE3 - AESDEC KEY STATE4 -.align 4 -.L4dec128: - movaps -0x20(TKEYP), KEY - AESDEC KEY STATE1 - AESDEC KEY STATE2 - AESDEC KEY STATE3 - AESDEC KEY STATE4 - movaps -0x10(TKEYP), KEY - AESDEC KEY STATE1 - AESDEC KEY STATE2 - AESDEC KEY STATE3 - AESDEC KEY STATE4 - movaps (TKEYP), KEY - AESDEC KEY STATE1 - AESDEC KEY STATE2 - AESDEC KEY STATE3 - AESDEC KEY STATE4 - movaps 0x10(TKEYP), KEY - AESDEC KEY STATE1 - AESDEC KEY STATE2 - AESDEC KEY STATE3 - AESDEC KEY STATE4 - movaps 0x20(TKEYP), KEY - AESDEC KEY STATE1 - AESDEC KEY STATE2 - AESDEC KEY STATE3 - AESDEC KEY STATE4 - movaps 0x30(TKEYP), KEY - AESDEC KEY STATE1 - AESDEC KEY STATE2 - AESDEC KEY STATE3 - AESDEC KEY STATE4 - movaps 0x40(TKEYP), KEY - AESDEC KEY STATE1 - AESDEC KEY STATE2 - AESDEC KEY STATE3 - AESDEC KEY STATE4 - movaps 0x50(TKEYP), KEY - AESDEC KEY STATE1 - AESDEC KEY STATE2 - AESDEC KEY STATE3 - AESDEC KEY STATE4 - movaps 0x60(TKEYP), KEY - AESDEC KEY STATE1 - AESDEC KEY STATE2 - AESDEC KEY STATE3 - AESDEC KEY STATE4 - movaps 0x70(TKEYP), KEY - AESDECLAST KEY STATE1 # last round - AESDECLAST KEY STATE2 - AESDECLAST KEY STATE3 - AESDECLAST KEY STATE4 - ret - -/* - * void aesni_ecb_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src, - * size_t len) - */ -ENTRY(aesni_ecb_enc) -#ifndef __x86_64__ - pushl LEN - pushl KEYP - pushl KLEN - movl 16(%esp), KEYP - movl 20(%esp), OUTP - movl 24(%esp), INP - movl 28(%esp), LEN -#endif - test LEN, LEN # check length - jz .Lecb_enc_ret - mov 480(KEYP), KLEN - cmp $16, LEN - jb .Lecb_enc_ret - cmp $64, LEN - jb .Lecb_enc_loop1 -.align 4 -.Lecb_enc_loop4: - movups (INP), STATE1 - movups 0x10(INP), STATE2 - movups 0x20(INP), STATE3 - movups 0x30(INP), STATE4 - call _aesni_enc4 - movups STATE1, (OUTP) - movups STATE2, 0x10(OUTP) - movups STATE3, 0x20(OUTP) - movups STATE4, 0x30(OUTP) - sub $64, LEN - add $64, INP - add $64, OUTP - cmp $64, LEN - jge .Lecb_enc_loop4 - cmp $16, LEN - jb .Lecb_enc_ret -.align 4 -.Lecb_enc_loop1: - movups (INP), STATE1 - call _aesni_enc1 - movups STATE1, (OUTP) - sub $16, LEN - add $16, INP - add $16, OUTP - cmp $16, LEN - jge .Lecb_enc_loop1 -.Lecb_enc_ret: -#ifndef __x86_64__ - popl KLEN - popl KEYP - popl LEN -#endif - ret - -/* - * void aesni_ecb_dec(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src, - * size_t len); - */ -ENTRY(aesni_ecb_dec) -#ifndef __x86_64__ - pushl LEN - pushl KEYP - pushl KLEN - movl 16(%esp), KEYP - movl 20(%esp), OUTP - movl 24(%esp), INP - movl 28(%esp), LEN -#endif - test LEN, LEN - jz .Lecb_dec_ret - mov 480(KEYP), KLEN - add $240, KEYP - cmp $16, LEN - jb .Lecb_dec_ret - cmp $64, LEN - jb .Lecb_dec_loop1 -.align 4 -.Lecb_dec_loop4: - movups (INP), STATE1 - movups 0x10(INP), STATE2 - movups 0x20(INP), STATE3 - movups 0x30(INP), STATE4 - call _aesni_dec4 - movups STATE1, (OUTP) - movups STATE2, 0x10(OUTP) - movups STATE3, 0x20(OUTP) - movups STATE4, 0x30(OUTP) - sub $64, LEN - add $64, INP - add $64, OUTP - cmp $64, LEN - jge .Lecb_dec_loop4 - cmp $16, LEN - jb .Lecb_dec_ret -.align 4 -.Lecb_dec_loop1: - movups (INP), STATE1 - call _aesni_dec1 - movups STATE1, (OUTP) - sub $16, LEN - add $16, INP - add $16, OUTP - cmp $16, LEN - jge .Lecb_dec_loop1 -.Lecb_dec_ret: -#ifndef __x86_64__ - popl KLEN - popl KEYP - popl LEN -#endif - ret - -/* - * void aesni_cbc_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src, - * size_t len, u8 *iv) - */ -ENTRY(aesni_cbc_enc) -#ifndef __x86_64__ - pushl IVP - pushl LEN - pushl KEYP - pushl KLEN - movl 20(%esp), KEYP - movl 24(%esp), OUTP - movl 28(%esp), INP - movl 32(%esp), LEN - movl 36(%esp), IVP -#endif - cmp $16, LEN - jb .Lcbc_enc_ret - mov 480(KEYP), KLEN - movups (IVP), STATE # load iv as initial state -.align 4 -.Lcbc_enc_loop: - movups (INP), IN # load input - pxor IN, STATE - call _aesni_enc1 - movups STATE, (OUTP) # store output - sub $16, LEN - add $16, INP - add $16, OUTP - cmp $16, LEN - jge .Lcbc_enc_loop - movups STATE, (IVP) -.Lcbc_enc_ret: -#ifndef __x86_64__ - popl KLEN - popl KEYP - popl LEN - popl IVP -#endif - ret - -/* - * void aesni_cbc_dec(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src, - * size_t len, u8 *iv) - */ -ENTRY(aesni_cbc_dec) -#ifndef __x86_64__ - pushl IVP - pushl LEN - pushl KEYP - pushl KLEN - movl 20(%esp), KEYP - movl 24(%esp), OUTP - movl 28(%esp), INP - movl 32(%esp), LEN - movl 36(%esp), IVP -#endif - cmp $16, LEN - jb .Lcbc_dec_just_ret - mov 480(KEYP), KLEN - add $240, KEYP - movups (IVP), IV - cmp $64, LEN - jb .Lcbc_dec_loop1 -.align 4 -.Lcbc_dec_loop4: - movups (INP), IN1 - movaps IN1, STATE1 - movups 0x10(INP), IN2 - movaps IN2, STATE2 -#ifdef __x86_64__ - movups 0x20(INP), IN3 - movaps IN3, STATE3 - movups 0x30(INP), IN4 - movaps IN4, STATE4 -#else - movups 0x20(INP), IN1 - movaps IN1, STATE3 - movups 0x30(INP), IN2 - movaps IN2, STATE4 -#endif - call _aesni_dec4 - pxor IV, STATE1 -#ifdef __x86_64__ - pxor IN1, STATE2 - pxor IN2, STATE3 - pxor IN3, STATE4 - movaps IN4, IV -#else - pxor IN1, STATE4 - movaps IN2, IV - movups (INP), IN1 - pxor IN1, STATE2 - movups 0x10(INP), IN2 - pxor IN2, STATE3 -#endif - movups STATE1, (OUTP) - movups STATE2, 0x10(OUTP) - movups STATE3, 0x20(OUTP) - movups STATE4, 0x30(OUTP) - sub $64, LEN - add $64, INP - add $64, OUTP - cmp $64, LEN - jge .Lcbc_dec_loop4 - cmp $16, LEN - jb .Lcbc_dec_ret -.align 4 -.Lcbc_dec_loop1: - movups (INP), IN - movaps IN, STATE - call _aesni_dec1 - pxor IV, STATE - movups STATE, (OUTP) - movaps IN, IV - sub $16, LEN - add $16, INP - add $16, OUTP - cmp $16, LEN - jge .Lcbc_dec_loop1 -.Lcbc_dec_ret: - movups IV, (IVP) -.Lcbc_dec_just_ret: -#ifndef __x86_64__ - popl KLEN - popl KEYP - popl LEN - popl IVP -#endif - ret - -#ifdef __x86_64__ -.align 16 -.Lbswap_mask: - .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 - -/* - * _aesni_inc_init: internal ABI - * setup registers used by _aesni_inc - * input: - * IV - * output: - * CTR: == IV, in little endian - * TCTR_LOW: == lower qword of CTR - * INC: == 1, in little endian - * BSWAP_MASK == endian swapping mask - */ -.align 4 -_aesni_inc_init: - movaps .Lbswap_mask, BSWAP_MASK - movaps IV, CTR - PSHUFB_XMM BSWAP_MASK CTR - mov $1, TCTR_LOW - MOVQ_R64_XMM TCTR_LOW INC - MOVQ_R64_XMM CTR TCTR_LOW - ret - -/* - * _aesni_inc: internal ABI - * Increase IV by 1, IV is in big endian - * input: - * IV - * CTR: == IV, in little endian - * TCTR_LOW: == lower qword of CTR - * INC: == 1, in little endian - * BSWAP_MASK == endian swapping mask - * output: - * IV: Increase by 1 - * changed: - * CTR: == output IV, in little endian - * TCTR_LOW: == lower qword of CTR - */ -.align 4 -_aesni_inc: - paddq INC, CTR - add $1, TCTR_LOW - jnc .Linc_low - pslldq $8, INC - paddq INC, CTR - psrldq $8, INC -.Linc_low: - movaps CTR, IV - PSHUFB_XMM BSWAP_MASK IV - ret - -/* - * void aesni_ctr_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src, - * size_t len, u8 *iv) - */ -ENTRY(aesni_ctr_enc) - cmp $16, LEN - jb .Lctr_enc_just_ret - mov 480(KEYP), KLEN - movups (IVP), IV - call _aesni_inc_init - cmp $64, LEN - jb .Lctr_enc_loop1 -.align 4 -.Lctr_enc_loop4: - movaps IV, STATE1 - call _aesni_inc - movups (INP), IN1 - movaps IV, STATE2 - call _aesni_inc - movups 0x10(INP), IN2 - movaps IV, STATE3 - call _aesni_inc - movups 0x20(INP), IN3 - movaps IV, STATE4 - call _aesni_inc - movups 0x30(INP), IN4 - call _aesni_enc4 - pxor IN1, STATE1 - movups STATE1, (OUTP) - pxor IN2, STATE2 - movups STATE2, 0x10(OUTP) - pxor IN3, STATE3 - movups STATE3, 0x20(OUTP) - pxor IN4, STATE4 - movups STATE4, 0x30(OUTP) - sub $64, LEN - add $64, INP - add $64, OUTP - cmp $64, LEN - jge .Lctr_enc_loop4 - cmp $16, LEN - jb .Lctr_enc_ret -.align 4 -.Lctr_enc_loop1: - movaps IV, STATE - call _aesni_inc - movups (INP), IN - call _aesni_enc1 - pxor IN, STATE - movups STATE, (OUTP) - sub $16, LEN - add $16, INP - add $16, OUTP - cmp $16, LEN - jge .Lctr_enc_loop1 -.Lctr_enc_ret: - movups IV, (IVP) -.Lctr_enc_just_ret: - ret -#endif diff --git a/ANDROID_3.4.5/arch/x86/crypto/aesni-intel_glue.c b/ANDROID_3.4.5/arch/x86/crypto/aesni-intel_glue.c deleted file mode 100644 index c799352e..00000000 --- a/ANDROID_3.4.5/arch/x86/crypto/aesni-intel_glue.c +++ /dev/null @@ -1,1389 +0,0 @@ -/* - * Support for Intel AES-NI instructions. This file contains glue - * code, the real AES implementation is in intel-aes_asm.S. - * - * Copyright (C) 2008, Intel Corp. - * Author: Huang Ying <ying.huang@intel.com> - * - * Added RFC4106 AES-GCM support for 128-bit keys under the AEAD - * interface for 64-bit kernels. - * Authors: Adrian Hoban <adrian.hoban@intel.com> - * Gabriele Paoloni <gabriele.paoloni@intel.com> - * Tadeusz Struk (tadeusz.struk@intel.com) - * Aidan O'Mahony (aidan.o.mahony@intel.com) - * Copyright (c) 2010, Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - */ - -#include <linux/hardirq.h> -#include <linux/types.h> -#include <linux/crypto.h> -#include <linux/module.h> -#include <linux/err.h> -#include <crypto/algapi.h> -#include <crypto/aes.h> -#include <crypto/cryptd.h> -#include <crypto/ctr.h> -#include <asm/cpu_device_id.h> -#include <asm/i387.h> -#include <asm/aes.h> -#include <crypto/scatterwalk.h> -#include <crypto/internal/aead.h> -#include <linux/workqueue.h> -#include <linux/spinlock.h> - -#if defined(CONFIG_CRYPTO_CTR) || defined(CONFIG_CRYPTO_CTR_MODULE) -#define HAS_CTR -#endif - -#if defined(CONFIG_CRYPTO_LRW) || defined(CONFIG_CRYPTO_LRW_MODULE) -#define HAS_LRW -#endif - -#if defined(CONFIG_CRYPTO_PCBC) || defined(CONFIG_CRYPTO_PCBC_MODULE) -#define HAS_PCBC -#endif - -#if defined(CONFIG_CRYPTO_XTS) || defined(CONFIG_CRYPTO_XTS_MODULE) -#define HAS_XTS -#endif - -struct async_aes_ctx { - struct cryptd_ablkcipher *cryptd_tfm; -}; - -/* This data is stored at the end of the crypto_tfm struct. - * It's a type of per "session" data storage location. - * This needs to be 16 byte aligned. - */ -struct aesni_rfc4106_gcm_ctx { - u8 hash_subkey[16]; - struct crypto_aes_ctx aes_key_expanded; - u8 nonce[4]; - struct cryptd_aead *cryptd_tfm; -}; - -struct aesni_gcm_set_hash_subkey_result { - int err; - struct completion completion; -}; - -struct aesni_hash_subkey_req_data { - u8 iv[16]; - struct aesni_gcm_set_hash_subkey_result result; - struct scatterlist sg; -}; - -#define AESNI_ALIGN (16) -#define AES_BLOCK_MASK (~(AES_BLOCK_SIZE-1)) -#define RFC4106_HASH_SUBKEY_SIZE 16 - -asmlinkage int aesni_set_key(struct crypto_aes_ctx *ctx, const u8 *in_key, - unsigned int key_len); -asmlinkage void aesni_enc(struct crypto_aes_ctx *ctx, u8 *out, - const u8 *in); -asmlinkage void aesni_dec(struct crypto_aes_ctx *ctx, u8 *out, - const u8 *in); -asmlinkage void aesni_ecb_enc(struct crypto_aes_ctx *ctx, u8 *out, - const u8 *in, unsigned int len); -asmlinkage void aesni_ecb_dec(struct crypto_aes_ctx *ctx, u8 *out, - const u8 *in, unsigned int len); -asmlinkage void aesni_cbc_enc(struct crypto_aes_ctx *ctx, u8 *out, - const u8 *in, unsigned int len, u8 *iv); -asmlinkage void aesni_cbc_dec(struct crypto_aes_ctx *ctx, u8 *out, - const u8 *in, unsigned int len, u8 *iv); - -int crypto_fpu_init(void); -void crypto_fpu_exit(void); - -#ifdef CONFIG_X86_64 -asmlinkage void aesni_ctr_enc(struct crypto_aes_ctx *ctx, u8 *out, - const u8 *in, unsigned int len, u8 *iv); - -/* asmlinkage void aesni_gcm_enc() - * void *ctx, AES Key schedule. Starts on a 16 byte boundary. - * u8 *out, Ciphertext output. Encrypt in-place is allowed. - * const u8 *in, Plaintext input - * unsigned long plaintext_len, Length of data in bytes for encryption. - * u8 *iv, Pre-counter block j0: 4 byte salt (from Security Association) - * concatenated with 8 byte Initialisation Vector (from IPSec ESP - * Payload) concatenated with 0x00000001. 16-byte aligned pointer. - * u8 *hash_subkey, the Hash sub key input. Data starts on a 16-byte boundary. - * const u8 *aad, Additional Authentication Data (AAD) - * unsigned long aad_len, Length of AAD in bytes. With RFC4106 this - * is going to be 8 or 12 bytes - * u8 *auth_tag, Authenticated Tag output. - * unsigned long auth_tag_len), Authenticated Tag Length in bytes. - * Valid values are 16 (most likely), 12 or 8. - */ -asmlinkage void aesni_gcm_enc(void *ctx, u8 *out, - const u8 *in, unsigned long plaintext_len, u8 *iv, - u8 *hash_subkey, const u8 *aad, unsigned long aad_len, - u8 *auth_tag, unsigned long auth_tag_len); - -/* asmlinkage void aesni_gcm_dec() - * void *ctx, AES Key schedule. Starts on a 16 byte boundary. - * u8 *out, Plaintext output. Decrypt in-place is allowed. - * const u8 *in, Ciphertext input - * unsigned long ciphertext_len, Length of data in bytes for decryption. - * u8 *iv, Pre-counter block j0: 4 byte salt (from Security Association) - * concatenated with 8 byte Initialisation Vector (from IPSec ESP - * Payload) concatenated with 0x00000001. 16-byte aligned pointer. - * u8 *hash_subkey, the Hash sub key input. Data starts on a 16-byte boundary. - * const u8 *aad, Additional Authentication Data (AAD) - * unsigned long aad_len, Length of AAD in bytes. With RFC4106 this is going - * to be 8 or 12 bytes - * u8 *auth_tag, Authenticated Tag output. - * unsigned long auth_tag_len) Authenticated Tag Length in bytes. - * Valid values are 16 (most likely), 12 or 8. - */ -asmlinkage void aesni_gcm_dec(void *ctx, u8 *out, - const u8 *in, unsigned long ciphertext_len, u8 *iv, - u8 *hash_subkey, const u8 *aad, unsigned long aad_len, - u8 *auth_tag, unsigned long auth_tag_len); - -static inline struct -aesni_rfc4106_gcm_ctx *aesni_rfc4106_gcm_ctx_get(struct crypto_aead *tfm) -{ - return - (struct aesni_rfc4106_gcm_ctx *) - PTR_ALIGN((u8 *) - crypto_tfm_ctx(crypto_aead_tfm(tfm)), AESNI_ALIGN); -} -#endif - -static inline struct crypto_aes_ctx *aes_ctx(void *raw_ctx) -{ - unsigned long addr = (unsigned long)raw_ctx; - unsigned long align = AESNI_ALIGN; - - if (align <= crypto_tfm_ctx_alignment()) - align = 1; - return (struct crypto_aes_ctx *)ALIGN(addr, align); -} - -static int aes_set_key_common(struct crypto_tfm *tfm, void *raw_ctx, - const u8 *in_key, unsigned int key_len) -{ - struct crypto_aes_ctx *ctx = aes_ctx(raw_ctx); - u32 *flags = &tfm->crt_flags; - int err; - - if (key_len != AES_KEYSIZE_128 && key_len != AES_KEYSIZE_192 && - key_len != AES_KEYSIZE_256) { - *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; - return -EINVAL; - } - - if (!irq_fpu_usable()) - err = crypto_aes_expand_key(ctx, in_key, key_len); - else { - kernel_fpu_begin(); - err = aesni_set_key(ctx, in_key, key_len); - kernel_fpu_end(); - } - - return err; -} - -static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key, - unsigned int key_len) -{ - return aes_set_key_common(tfm, crypto_tfm_ctx(tfm), in_key, key_len); -} - -static void aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) -{ - struct crypto_aes_ctx *ctx = aes_ctx(crypto_tfm_ctx(tfm)); - - if (!irq_fpu_usable()) - crypto_aes_encrypt_x86(ctx, dst, src); - else { - kernel_fpu_begin(); - aesni_enc(ctx, dst, src); - kernel_fpu_end(); - } -} - -static void aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) -{ - struct crypto_aes_ctx *ctx = aes_ctx(crypto_tfm_ctx(tfm)); - - if (!irq_fpu_usable()) - crypto_aes_decrypt_x86(ctx, dst, src); - else { - kernel_fpu_begin(); - aesni_dec(ctx, dst, src); - kernel_fpu_end(); - } -} - -static struct crypto_alg aesni_alg = { - .cra_name = "aes", - .cra_driver_name = "aes-aesni", - .cra_priority = 300, - .cra_flags = CRYPTO_ALG_TYPE_CIPHER, - .cra_blocksize = AES_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct crypto_aes_ctx)+AESNI_ALIGN-1, - .cra_alignmask = 0, - .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(aesni_alg.cra_list), - .cra_u = { - .cipher = { - .cia_min_keysize = AES_MIN_KEY_SIZE, - .cia_max_keysize = AES_MAX_KEY_SIZE, - .cia_setkey = aes_set_key, - .cia_encrypt = aes_encrypt, - .cia_decrypt = aes_decrypt - } - } -}; - -static void __aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) -{ - struct crypto_aes_ctx *ctx = aes_ctx(crypto_tfm_ctx(tfm)); - - aesni_enc(ctx, dst, src); -} - -static void __aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) -{ - struct crypto_aes_ctx *ctx = aes_ctx(crypto_tfm_ctx(tfm)); - - aesni_dec(ctx, dst, src); -} - -static struct crypto_alg __aesni_alg = { - .cra_name = "__aes-aesni", - .cra_driver_name = "__driver-aes-aesni", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_CIPHER, - .cra_blocksize = AES_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct crypto_aes_ctx)+AESNI_ALIGN-1, - .cra_alignmask = 0, - .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(__aesni_alg.cra_list), - .cra_u = { - .cipher = { - .cia_min_keysize = AES_MIN_KEY_SIZE, - .cia_max_keysize = AES_MAX_KEY_SIZE, - .cia_setkey = aes_set_key, - .cia_encrypt = __aes_encrypt, - .cia_decrypt = __aes_decrypt - } - } -}; - -static int ecb_encrypt(struct blkcipher_desc *desc, - struct scatterlist *dst, struct scatterlist *src, - unsigned int nbytes) -{ - struct crypto_aes_ctx *ctx = aes_ctx(crypto_blkcipher_ctx(desc->tfm)); - struct blkcipher_walk walk; - int err; - - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt(desc, &walk); - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - - kernel_fpu_begin(); - while ((nbytes = walk.nbytes)) { - aesni_ecb_enc(ctx, walk.dst.virt.addr, walk.src.virt.addr, - nbytes & AES_BLOCK_MASK); - nbytes &= AES_BLOCK_SIZE - 1; - err = blkcipher_walk_done(desc, &walk, nbytes); - } - kernel_fpu_end(); - - return err; -} - -static int ecb_decrypt(struct blkcipher_desc *desc, - struct scatterlist *dst, struct scatterlist *src, - unsigned int nbytes) -{ - struct crypto_aes_ctx *ctx = aes_ctx(crypto_blkcipher_ctx(desc->tfm)); - struct blkcipher_walk walk; - int err; - - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt(desc, &walk); - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - - kernel_fpu_begin(); - while ((nbytes = walk.nbytes)) { - aesni_ecb_dec(ctx, walk.dst.virt.addr, walk.src.virt.addr, - nbytes & AES_BLOCK_MASK); - nbytes &= AES_BLOCK_SIZE - 1; - err = blkcipher_walk_done(desc, &walk, nbytes); - } - kernel_fpu_end(); - - return err; -} - -static struct crypto_alg blk_ecb_alg = { - .cra_name = "__ecb-aes-aesni", - .cra_driver_name = "__driver-ecb-aes-aesni", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, - .cra_blocksize = AES_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct crypto_aes_ctx)+AESNI_ALIGN-1, - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(blk_ecb_alg.cra_list), - .cra_u = { - .blkcipher = { - .min_keysize = AES_MIN_KEY_SIZE, - .max_keysize = AES_MAX_KEY_SIZE, - .setkey = aes_set_key, - .encrypt = ecb_encrypt, - .decrypt = ecb_decrypt, - }, - }, -}; - -static int cbc_encrypt(struct blkcipher_desc *desc, - struct scatterlist *dst, struct scatterlist *src, - unsigned int nbytes) -{ - struct crypto_aes_ctx *ctx = aes_ctx(crypto_blkcipher_ctx(desc->tfm)); - struct blkcipher_walk walk; - int err; - - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt(desc, &walk); - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - - kernel_fpu_begin(); - while ((nbytes = walk.nbytes)) { - aesni_cbc_enc(ctx, walk.dst.virt.addr, walk.src.virt.addr, - nbytes & AES_BLOCK_MASK, walk.iv); - nbytes &= AES_BLOCK_SIZE - 1; - err = blkcipher_walk_done(desc, &walk, nbytes); - } - kernel_fpu_end(); - - return err; -} - -static int cbc_decrypt(struct blkcipher_desc *desc, - struct scatterlist *dst, struct scatterlist *src, - unsigned int nbytes) -{ - struct crypto_aes_ctx *ctx = aes_ctx(crypto_blkcipher_ctx(desc->tfm)); - struct blkcipher_walk walk; - int err; - - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt(desc, &walk); - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - - kernel_fpu_begin(); - while ((nbytes = walk.nbytes)) { - aesni_cbc_dec(ctx, walk.dst.virt.addr, walk.src.virt.addr, - nbytes & AES_BLOCK_MASK, walk.iv); - nbytes &= AES_BLOCK_SIZE - 1; - err = blkcipher_walk_done(desc, &walk, nbytes); - } - kernel_fpu_end(); - - return err; -} - -static struct crypto_alg blk_cbc_alg = { - .cra_name = "__cbc-aes-aesni", - .cra_driver_name = "__driver-cbc-aes-aesni", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, - .cra_blocksize = AES_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct crypto_aes_ctx)+AESNI_ALIGN-1, - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(blk_cbc_alg.cra_list), - .cra_u = { - .blkcipher = { - .min_keysize = AES_MIN_KEY_SIZE, - .max_keysize = AES_MAX_KEY_SIZE, - .setkey = aes_set_key, - .encrypt = cbc_encrypt, - .decrypt = cbc_decrypt, - }, - }, -}; - -#ifdef CONFIG_X86_64 -static void ctr_crypt_final(struct crypto_aes_ctx *ctx, - struct blkcipher_walk *walk) -{ - u8 *ctrblk = walk->iv; - u8 keystream[AES_BLOCK_SIZE]; - u8 *src = walk->src.virt.addr; - u8 *dst = walk->dst.virt.addr; - unsigned int nbytes = walk->nbytes; - - aesni_enc(ctx, keystream, ctrblk); - crypto_xor(keystream, src, nbytes); - memcpy(dst, keystream, nbytes); - crypto_inc(ctrblk, AES_BLOCK_SIZE); -} - -static int ctr_crypt(struct blkcipher_desc *desc, - struct scatterlist *dst, struct scatterlist *src, - unsigned int nbytes) -{ - struct crypto_aes_ctx *ctx = aes_ctx(crypto_blkcipher_ctx(desc->tfm)); - struct blkcipher_walk walk; - int err; - - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt_block(desc, &walk, AES_BLOCK_SIZE); - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - - kernel_fpu_begin(); - while ((nbytes = walk.nbytes) >= AES_BLOCK_SIZE) { - aesni_ctr_enc(ctx, walk.dst.virt.addr, walk.src.virt.addr, - nbytes & AES_BLOCK_MASK, walk.iv); - nbytes &= AES_BLOCK_SIZE - 1; - err = blkcipher_walk_done(desc, &walk, nbytes); - } - if (walk.nbytes) { - ctr_crypt_final(ctx, &walk); - err = blkcipher_walk_done(desc, &walk, 0); - } - kernel_fpu_end(); - - return err; -} - -static struct crypto_alg blk_ctr_alg = { - .cra_name = "__ctr-aes-aesni", - .cra_driver_name = "__driver-ctr-aes-aesni", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, - .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct crypto_aes_ctx)+AESNI_ALIGN-1, - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(blk_ctr_alg.cra_list), - .cra_u = { - .blkcipher = { - .min_keysize = AES_MIN_KEY_SIZE, - .max_keysize = AES_MAX_KEY_SIZE, - .ivsize = AES_BLOCK_SIZE, - .setkey = aes_set_key, - .encrypt = ctr_crypt, - .decrypt = ctr_crypt, - }, - }, -}; -#endif - -static int ablk_set_key(struct crypto_ablkcipher *tfm, const u8 *key, - unsigned int key_len) -{ - struct async_aes_ctx *ctx = crypto_ablkcipher_ctx(tfm); - struct crypto_ablkcipher *child = &ctx->cryptd_tfm->base; - int err; - - crypto_ablkcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK); - crypto_ablkcipher_set_flags(child, crypto_ablkcipher_get_flags(tfm) - & CRYPTO_TFM_REQ_MASK); - err = crypto_ablkcipher_setkey(child, key, key_len); - crypto_ablkcipher_set_flags(tfm, crypto_ablkcipher_get_flags(child) - & CRYPTO_TFM_RES_MASK); - return err; -} - -static int ablk_encrypt(struct ablkcipher_request *req) -{ - struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); - struct async_aes_ctx *ctx = crypto_ablkcipher_ctx(tfm); - - if (!irq_fpu_usable()) { - struct ablkcipher_request *cryptd_req = - ablkcipher_request_ctx(req); - memcpy(cryptd_req, req, sizeof(*req)); - ablkcipher_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base); - return crypto_ablkcipher_encrypt(cryptd_req); - } else { - struct blkcipher_desc desc; - desc.tfm = cryptd_ablkcipher_child(ctx->cryptd_tfm); - desc.info = req->info; - desc.flags = 0; - return crypto_blkcipher_crt(desc.tfm)->encrypt( - &desc, req->dst, req->src, req->nbytes); - } -} - -static int ablk_decrypt(struct ablkcipher_request *req) -{ - struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); - struct async_aes_ctx *ctx = crypto_ablkcipher_ctx(tfm); - - if (!irq_fpu_usable()) { - struct ablkcipher_request *cryptd_req = - ablkcipher_request_ctx(req); - memcpy(cryptd_req, req, sizeof(*req)); - ablkcipher_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base); - return crypto_ablkcipher_decrypt(cryptd_req); - } else { - struct blkcipher_desc desc; - desc.tfm = cryptd_ablkcipher_child(ctx->cryptd_tfm); - desc.info = req->info; - desc.flags = 0; - return crypto_blkcipher_crt(desc.tfm)->decrypt( - &desc, req->dst, req->src, req->nbytes); - } -} - -static void ablk_exit(struct crypto_tfm *tfm) -{ - struct async_aes_ctx *ctx = crypto_tfm_ctx(tfm); - - cryptd_free_ablkcipher(ctx->cryptd_tfm); -} - -static void ablk_init_common(struct crypto_tfm *tfm, - struct cryptd_ablkcipher *cryptd_tfm) -{ - struct async_aes_ctx *ctx = crypto_tfm_ctx(tfm); - - ctx->cryptd_tfm = cryptd_tfm; - tfm->crt_ablkcipher.reqsize = sizeof(struct ablkcipher_request) + - crypto_ablkcipher_reqsize(&cryptd_tfm->base); -} - -static int ablk_ecb_init(struct crypto_tfm *tfm) -{ - struct cryptd_ablkcipher *cryptd_tfm; - - cryptd_tfm = cryptd_alloc_ablkcipher("__driver-ecb-aes-aesni", 0, 0); - if (IS_ERR(cryptd_tfm)) - return PTR_ERR(cryptd_tfm); - ablk_init_common(tfm, cryptd_tfm); - return 0; -} - -static struct crypto_alg ablk_ecb_alg = { - .cra_name = "ecb(aes)", - .cra_driver_name = "ecb-aes-aesni", - .cra_priority = 400, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC, - .cra_blocksize = AES_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_aes_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(ablk_ecb_alg.cra_list), - .cra_init = ablk_ecb_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = AES_MIN_KEY_SIZE, - .max_keysize = AES_MAX_KEY_SIZE, - .setkey = ablk_set_key, - .encrypt = ablk_encrypt, - .decrypt = ablk_decrypt, - }, - }, -}; - -static int ablk_cbc_init(struct crypto_tfm *tfm) -{ - struct cryptd_ablkcipher *cryptd_tfm; - - cryptd_tfm = cryptd_alloc_ablkcipher("__driver-cbc-aes-aesni", 0, 0); - if (IS_ERR(cryptd_tfm)) - return PTR_ERR(cryptd_tfm); - ablk_init_common(tfm, cryptd_tfm); - return 0; -} - -static struct crypto_alg ablk_cbc_alg = { - .cra_name = "cbc(aes)", - .cra_driver_name = "cbc-aes-aesni", - .cra_priority = 400, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC, - .cra_blocksize = AES_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_aes_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(ablk_cbc_alg.cra_list), - .cra_init = ablk_cbc_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = AES_MIN_KEY_SIZE, - .max_keysize = AES_MAX_KEY_SIZE, - .ivsize = AES_BLOCK_SIZE, - .setkey = ablk_set_key, - .encrypt = ablk_encrypt, - .decrypt = ablk_decrypt, - }, - }, -}; - -#ifdef CONFIG_X86_64 -static int ablk_ctr_init(struct crypto_tfm *tfm) -{ - struct cryptd_ablkcipher *cryptd_tfm; - - cryptd_tfm = cryptd_alloc_ablkcipher("__driver-ctr-aes-aesni", 0, 0); - if (IS_ERR(cryptd_tfm)) - return PTR_ERR(cryptd_tfm); - ablk_init_common(tfm, cryptd_tfm); - return 0; -} - -static struct crypto_alg ablk_ctr_alg = { - .cra_name = "ctr(aes)", - .cra_driver_name = "ctr-aes-aesni", - .cra_priority = 400, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC, - .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct async_aes_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(ablk_ctr_alg.cra_list), - .cra_init = ablk_ctr_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = AES_MIN_KEY_SIZE, - .max_keysize = AES_MAX_KEY_SIZE, - .ivsize = AES_BLOCK_SIZE, - .setkey = ablk_set_key, - .encrypt = ablk_encrypt, - .decrypt = ablk_encrypt, - .geniv = "chainiv", - }, - }, -}; - -#ifdef HAS_CTR -static int ablk_rfc3686_ctr_init(struct crypto_tfm *tfm) -{ - struct cryptd_ablkcipher *cryptd_tfm; - - cryptd_tfm = cryptd_alloc_ablkcipher( - "rfc3686(__driver-ctr-aes-aesni)", 0, 0); - if (IS_ERR(cryptd_tfm)) - return PTR_ERR(cryptd_tfm); - ablk_init_common(tfm, cryptd_tfm); - return 0; -} - -static struct crypto_alg ablk_rfc3686_ctr_alg = { - .cra_name = "rfc3686(ctr(aes))", - .cra_driver_name = "rfc3686-ctr-aes-aesni", - .cra_priority = 400, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC, - .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct async_aes_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(ablk_rfc3686_ctr_alg.cra_list), - .cra_init = ablk_rfc3686_ctr_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = AES_MIN_KEY_SIZE+CTR_RFC3686_NONCE_SIZE, - .max_keysize = AES_MAX_KEY_SIZE+CTR_RFC3686_NONCE_SIZE, - .ivsize = CTR_RFC3686_IV_SIZE, - .setkey = ablk_set_key, - .encrypt = ablk_encrypt, - .decrypt = ablk_decrypt, - .geniv = "seqiv", - }, - }, -}; -#endif -#endif - -#ifdef HAS_LRW -static int ablk_lrw_init(struct crypto_tfm *tfm) -{ - struct cryptd_ablkcipher *cryptd_tfm; - - cryptd_tfm = cryptd_alloc_ablkcipher("fpu(lrw(__driver-aes-aesni))", - 0, 0); - if (IS_ERR(cryptd_tfm)) - return PTR_ERR(cryptd_tfm); - ablk_init_common(tfm, cryptd_tfm); - return 0; -} - -static struct crypto_alg ablk_lrw_alg = { - .cra_name = "lrw(aes)", - .cra_driver_name = "lrw-aes-aesni", - .cra_priority = 400, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC, - .cra_blocksize = AES_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_aes_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(ablk_lrw_alg.cra_list), - .cra_init = ablk_lrw_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = AES_MIN_KEY_SIZE + AES_BLOCK_SIZE, - .max_keysize = AES_MAX_KEY_SIZE + AES_BLOCK_SIZE, - .ivsize = AES_BLOCK_SIZE, - .setkey = ablk_set_key, - .encrypt = ablk_encrypt, - .decrypt = ablk_decrypt, - }, - }, -}; -#endif - -#ifdef HAS_PCBC -static int ablk_pcbc_init(struct crypto_tfm *tfm) -{ - struct cryptd_ablkcipher *cryptd_tfm; - - cryptd_tfm = cryptd_alloc_ablkcipher("fpu(pcbc(__driver-aes-aesni))", - 0, 0); - if (IS_ERR(cryptd_tfm)) - return PTR_ERR(cryptd_tfm); - ablk_init_common(tfm, cryptd_tfm); - return 0; -} - -static struct crypto_alg ablk_pcbc_alg = { - .cra_name = "pcbc(aes)", - .cra_driver_name = "pcbc-aes-aesni", - .cra_priority = 400, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC, - .cra_blocksize = AES_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_aes_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(ablk_pcbc_alg.cra_list), - .cra_init = ablk_pcbc_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = AES_MIN_KEY_SIZE, - .max_keysize = AES_MAX_KEY_SIZE, - .ivsize = AES_BLOCK_SIZE, - .setkey = ablk_set_key, - .encrypt = ablk_encrypt, - .decrypt = ablk_decrypt, - }, - }, -}; -#endif - -#ifdef HAS_XTS -static int ablk_xts_init(struct crypto_tfm *tfm) -{ - struct cryptd_ablkcipher *cryptd_tfm; - - cryptd_tfm = cryptd_alloc_ablkcipher("fpu(xts(__driver-aes-aesni))", - 0, 0); - if (IS_ERR(cryptd_tfm)) - return PTR_ERR(cryptd_tfm); - ablk_init_common(tfm, cryptd_tfm); - return 0; -} - -static struct crypto_alg ablk_xts_alg = { - .cra_name = "xts(aes)", - .cra_driver_name = "xts-aes-aesni", - .cra_priority = 400, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC, - .cra_blocksize = AES_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_aes_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(ablk_xts_alg.cra_list), - .cra_init = ablk_xts_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = 2 * AES_MIN_KEY_SIZE, - .max_keysize = 2 * AES_MAX_KEY_SIZE, - .ivsize = AES_BLOCK_SIZE, - .setkey = ablk_set_key, - .encrypt = ablk_encrypt, - .decrypt = ablk_decrypt, - }, - }, -}; -#endif - -#ifdef CONFIG_X86_64 -static int rfc4106_init(struct crypto_tfm *tfm) -{ - struct cryptd_aead *cryptd_tfm; - struct aesni_rfc4106_gcm_ctx *ctx = (struct aesni_rfc4106_gcm_ctx *) - PTR_ALIGN((u8 *)crypto_tfm_ctx(tfm), AESNI_ALIGN); - struct crypto_aead *cryptd_child; - struct aesni_rfc4106_gcm_ctx *child_ctx; - cryptd_tfm = cryptd_alloc_aead("__driver-gcm-aes-aesni", 0, 0); - if (IS_ERR(cryptd_tfm)) - return PTR_ERR(cryptd_tfm); - - cryptd_child = cryptd_aead_child(cryptd_tfm); - child_ctx = aesni_rfc4106_gcm_ctx_get(cryptd_child); - memcpy(child_ctx, ctx, sizeof(*ctx)); - ctx->cryptd_tfm = cryptd_tfm; - tfm->crt_aead.reqsize = sizeof(struct aead_request) - + crypto_aead_reqsize(&cryptd_tfm->base); - return 0; -} - -static void rfc4106_exit(struct crypto_tfm *tfm) -{ - struct aesni_rfc4106_gcm_ctx *ctx = - (struct aesni_rfc4106_gcm_ctx *) - PTR_ALIGN((u8 *)crypto_tfm_ctx(tfm), AESNI_ALIGN); - if (!IS_ERR(ctx->cryptd_tfm)) - cryptd_free_aead(ctx->cryptd_tfm); - return; -} - -static void -rfc4106_set_hash_subkey_done(struct crypto_async_request *req, int err) -{ - struct aesni_gcm_set_hash_subkey_result *result = req->data; - - if (err == -EINPROGRESS) - return; - result->err = err; - complete(&result->completion); -} - -static int -rfc4106_set_hash_subkey(u8 *hash_subkey, const u8 *key, unsigned int key_len) -{ - struct crypto_ablkcipher *ctr_tfm; - struct ablkcipher_request *req; - int ret = -EINVAL; - struct aesni_hash_subkey_req_data *req_data; - - ctr_tfm = crypto_alloc_ablkcipher("ctr(aes)", 0, 0); - if (IS_ERR(ctr_tfm)) - return PTR_ERR(ctr_tfm); - - crypto_ablkcipher_clear_flags(ctr_tfm, ~0); - - ret = crypto_ablkcipher_setkey(ctr_tfm, key, key_len); - if (ret) - goto out_free_ablkcipher; - - ret = -ENOMEM; - req = ablkcipher_request_alloc(ctr_tfm, GFP_KERNEL); - if (!req) - goto out_free_ablkcipher; - - req_data = kmalloc(sizeof(*req_data), GFP_KERNEL); - if (!req_data) - goto out_free_request; - - memset(req_data->iv, 0, sizeof(req_data->iv)); - - /* Clear the data in the hash sub key container to zero.*/ - /* We want to cipher all zeros to create the hash sub key. */ - memset(hash_subkey, 0, RFC4106_HASH_SUBKEY_SIZE); - - init_completion(&req_data->result.completion); - sg_init_one(&req_data->sg, hash_subkey, RFC4106_HASH_SUBKEY_SIZE); - ablkcipher_request_set_tfm(req, ctr_tfm); - ablkcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_SLEEP | - CRYPTO_TFM_REQ_MAY_BACKLOG, - rfc4106_set_hash_subkey_done, - &req_data->result); - - ablkcipher_request_set_crypt(req, &req_data->sg, - &req_data->sg, RFC4106_HASH_SUBKEY_SIZE, req_data->iv); - - ret = crypto_ablkcipher_encrypt(req); - if (ret == -EINPROGRESS || ret == -EBUSY) { - ret = wait_for_completion_interruptible - (&req_data->result.completion); - if (!ret) - ret = req_data->result.err; - } - kfree(req_data); -out_free_request: - ablkcipher_request_free(req); -out_free_ablkcipher: - crypto_free_ablkcipher(ctr_tfm); - return ret; -} - -static int rfc4106_set_key(struct crypto_aead *parent, const u8 *key, - unsigned int key_len) -{ - int ret = 0; - struct crypto_tfm *tfm = crypto_aead_tfm(parent); - struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(parent); - struct crypto_aead *cryptd_child = cryptd_aead_child(ctx->cryptd_tfm); - struct aesni_rfc4106_gcm_ctx *child_ctx = - aesni_rfc4106_gcm_ctx_get(cryptd_child); - u8 *new_key_mem = NULL; - - if (key_len < 4) { - crypto_tfm_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); - return -EINVAL; - } - /*Account for 4 byte nonce at the end.*/ - key_len -= 4; - if (key_len != AES_KEYSIZE_128) { - crypto_tfm_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); - return -EINVAL; - } - - memcpy(ctx->nonce, key + key_len, sizeof(ctx->nonce)); - /*This must be on a 16 byte boundary!*/ - if ((unsigned long)(&(ctx->aes_key_expanded.key_enc[0])) % AESNI_ALIGN) - return -EINVAL; - - if ((unsigned long)key % AESNI_ALIGN) { - /*key is not aligned: use an auxuliar aligned pointer*/ - new_key_mem = kmalloc(key_len+AESNI_ALIGN, GFP_KERNEL); - if (!new_key_mem) - return -ENOMEM; - - new_key_mem = PTR_ALIGN(new_key_mem, AESNI_ALIGN); - memcpy(new_key_mem, key, key_len); - key = new_key_mem; - } - - if (!irq_fpu_usable()) - ret = crypto_aes_expand_key(&(ctx->aes_key_expanded), - key, key_len); - else { - kernel_fpu_begin(); - ret = aesni_set_key(&(ctx->aes_key_expanded), key, key_len); - kernel_fpu_end(); - } - /*This must be on a 16 byte boundary!*/ - if ((unsigned long)(&(ctx->hash_subkey[0])) % AESNI_ALIGN) { - ret = -EINVAL; - goto exit; - } - ret = rfc4106_set_hash_subkey(ctx->hash_subkey, key, key_len); - memcpy(child_ctx, ctx, sizeof(*ctx)); -exit: - kfree(new_key_mem); - return ret; -} - -/* This is the Integrity Check Value (aka the authentication tag length and can - * be 8, 12 or 16 bytes long. */ -static int rfc4106_set_authsize(struct crypto_aead *parent, - unsigned int authsize) -{ - struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(parent); - struct crypto_aead *cryptd_child = cryptd_aead_child(ctx->cryptd_tfm); - - switch (authsize) { - case 8: - case 12: - case 16: - break; - default: - return -EINVAL; - } - crypto_aead_crt(parent)->authsize = authsize; - crypto_aead_crt(cryptd_child)->authsize = authsize; - return 0; -} - -static int rfc4106_encrypt(struct aead_request *req) -{ - int ret; - struct crypto_aead *tfm = crypto_aead_reqtfm(req); - struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm); - - if (!irq_fpu_usable()) { - struct aead_request *cryptd_req = - (struct aead_request *) aead_request_ctx(req); - memcpy(cryptd_req, req, sizeof(*req)); - aead_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base); - return crypto_aead_encrypt(cryptd_req); - } else { - struct crypto_aead *cryptd_child = cryptd_aead_child(ctx->cryptd_tfm); - kernel_fpu_begin(); - ret = cryptd_child->base.crt_aead.encrypt(req); - kernel_fpu_end(); - return ret; - } -} - -static int rfc4106_decrypt(struct aead_request *req) -{ - int ret; - struct crypto_aead *tfm = crypto_aead_reqtfm(req); - struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm); - - if (!irq_fpu_usable()) { - struct aead_request *cryptd_req = - (struct aead_request *) aead_request_ctx(req); - memcpy(cryptd_req, req, sizeof(*req)); - aead_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base); - return crypto_aead_decrypt(cryptd_req); - } else { - struct crypto_aead *cryptd_child = cryptd_aead_child(ctx->cryptd_tfm); - kernel_fpu_begin(); - ret = cryptd_child->base.crt_aead.decrypt(req); - kernel_fpu_end(); - return ret; - } -} - -static struct crypto_alg rfc4106_alg = { - .cra_name = "rfc4106(gcm(aes))", - .cra_driver_name = "rfc4106-gcm-aesni", - .cra_priority = 400, - .cra_flags = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_ASYNC, - .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct aesni_rfc4106_gcm_ctx) + AESNI_ALIGN, - .cra_alignmask = 0, - .cra_type = &crypto_nivaead_type, - .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(rfc4106_alg.cra_list), - .cra_init = rfc4106_init, - .cra_exit = rfc4106_exit, - .cra_u = { - .aead = { - .setkey = rfc4106_set_key, - .setauthsize = rfc4106_set_authsize, - .encrypt = rfc4106_encrypt, - .decrypt = rfc4106_decrypt, - .geniv = "seqiv", - .ivsize = 8, - .maxauthsize = 16, - }, - }, -}; - -static int __driver_rfc4106_encrypt(struct aead_request *req) -{ - u8 one_entry_in_sg = 0; - u8 *src, *dst, *assoc; - __be32 counter = cpu_to_be32(1); - struct crypto_aead *tfm = crypto_aead_reqtfm(req); - struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm); - void *aes_ctx = &(ctx->aes_key_expanded); - unsigned long auth_tag_len = crypto_aead_authsize(tfm); - u8 iv_tab[16+AESNI_ALIGN]; - u8* iv = (u8 *) PTR_ALIGN((u8 *)iv_tab, AESNI_ALIGN); - struct scatter_walk src_sg_walk; - struct scatter_walk assoc_sg_walk; - struct scatter_walk dst_sg_walk; - unsigned int i; - - /* Assuming we are supporting rfc4106 64-bit extended */ - /* sequence numbers We need to have the AAD length equal */ - /* to 8 or 12 bytes */ - if (unlikely(req->assoclen != 8 && req->assoclen != 12)) - return -EINVAL; - /* IV below built */ - for (i = 0; i < 4; i++) - *(iv+i) = ctx->nonce[i]; - for (i = 0; i < 8; i++) - *(iv+4+i) = req->iv[i]; - *((__be32 *)(iv+12)) = counter; - - if ((sg_is_last(req->src)) && (sg_is_last(req->assoc))) { - one_entry_in_sg = 1; - scatterwalk_start(&src_sg_walk, req->src); - scatterwalk_start(&assoc_sg_walk, req->assoc); - src = scatterwalk_map(&src_sg_walk); - assoc = scatterwalk_map(&assoc_sg_walk); - dst = src; - if (unlikely(req->src != req->dst)) { - scatterwalk_start(&dst_sg_walk, req->dst); - dst = scatterwalk_map(&dst_sg_walk); - } - - } else { - /* Allocate memory for src, dst, assoc */ - src = kmalloc(req->cryptlen + auth_tag_len + req->assoclen, - GFP_ATOMIC); - if (unlikely(!src)) - return -ENOMEM; - assoc = (src + req->cryptlen + auth_tag_len); - scatterwalk_map_and_copy(src, req->src, 0, req->cryptlen, 0); - scatterwalk_map_and_copy(assoc, req->assoc, 0, - req->assoclen, 0); - dst = src; - } - - aesni_gcm_enc(aes_ctx, dst, src, (unsigned long)req->cryptlen, iv, - ctx->hash_subkey, assoc, (unsigned long)req->assoclen, dst - + ((unsigned long)req->cryptlen), auth_tag_len); - - /* The authTag (aka the Integrity Check Value) needs to be written - * back to the packet. */ - if (one_entry_in_sg) { - if (unlikely(req->src != req->dst)) { - scatterwalk_unmap(dst); - scatterwalk_done(&dst_sg_walk, 0, 0); - } - scatterwalk_unmap(src); - scatterwalk_unmap(assoc); - scatterwalk_done(&src_sg_walk, 0, 0); - scatterwalk_done(&assoc_sg_walk, 0, 0); - } else { - scatterwalk_map_and_copy(dst, req->dst, 0, - req->cryptlen + auth_tag_len, 1); - kfree(src); - } - return 0; -} - -static int __driver_rfc4106_decrypt(struct aead_request *req) -{ - u8 one_entry_in_sg = 0; - u8 *src, *dst, *assoc; - unsigned long tempCipherLen = 0; - __be32 counter = cpu_to_be32(1); - int retval = 0; - struct crypto_aead *tfm = crypto_aead_reqtfm(req); - struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm); - void *aes_ctx = &(ctx->aes_key_expanded); - unsigned long auth_tag_len = crypto_aead_authsize(tfm); - u8 iv_and_authTag[32+AESNI_ALIGN]; - u8 *iv = (u8 *) PTR_ALIGN((u8 *)iv_and_authTag, AESNI_ALIGN); - u8 *authTag = iv + 16; - struct scatter_walk src_sg_walk; - struct scatter_walk assoc_sg_walk; - struct scatter_walk dst_sg_walk; - unsigned int i; - - if (unlikely((req->cryptlen < auth_tag_len) || - (req->assoclen != 8 && req->assoclen != 12))) - return -EINVAL; - /* Assuming we are supporting rfc4106 64-bit extended */ - /* sequence numbers We need to have the AAD length */ - /* equal to 8 or 12 bytes */ - - tempCipherLen = (unsigned long)(req->cryptlen - auth_tag_len); - /* IV below built */ - for (i = 0; i < 4; i++) - *(iv+i) = ctx->nonce[i]; - for (i = 0; i < 8; i++) - *(iv+4+i) = req->iv[i]; - *((__be32 *)(iv+12)) = counter; - - if ((sg_is_last(req->src)) && (sg_is_last(req->assoc))) { - one_entry_in_sg = 1; - scatterwalk_start(&src_sg_walk, req->src); - scatterwalk_start(&assoc_sg_walk, req->assoc); - src = scatterwalk_map(&src_sg_walk); - assoc = scatterwalk_map(&assoc_sg_walk); - dst = src; - if (unlikely(req->src != req->dst)) { - scatterwalk_start(&dst_sg_walk, req->dst); - dst = scatterwalk_map(&dst_sg_walk); - } - - } else { - /* Allocate memory for src, dst, assoc */ - src = kmalloc(req->cryptlen + req->assoclen, GFP_ATOMIC); - if (!src) - return -ENOMEM; - assoc = (src + req->cryptlen + auth_tag_len); - scatterwalk_map_and_copy(src, req->src, 0, req->cryptlen, 0); - scatterwalk_map_and_copy(assoc, req->assoc, 0, - req->assoclen, 0); - dst = src; - } - - aesni_gcm_dec(aes_ctx, dst, src, tempCipherLen, iv, - ctx->hash_subkey, assoc, (unsigned long)req->assoclen, - authTag, auth_tag_len); - - /* Compare generated tag with passed in tag. */ - retval = memcmp(src + tempCipherLen, authTag, auth_tag_len) ? - -EBADMSG : 0; - - if (one_entry_in_sg) { - if (unlikely(req->src != req->dst)) { - scatterwalk_unmap(dst); - scatterwalk_done(&dst_sg_walk, 0, 0); - } - scatterwalk_unmap(src); - scatterwalk_unmap(assoc); - scatterwalk_done(&src_sg_walk, 0, 0); - scatterwalk_done(&assoc_sg_walk, 0, 0); - } else { - scatterwalk_map_and_copy(dst, req->dst, 0, req->cryptlen, 1); - kfree(src); - } - return retval; -} - -static struct crypto_alg __rfc4106_alg = { - .cra_name = "__gcm-aes-aesni", - .cra_driver_name = "__driver-gcm-aes-aesni", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_AEAD, - .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct aesni_rfc4106_gcm_ctx) + AESNI_ALIGN, - .cra_alignmask = 0, - .cra_type = &crypto_aead_type, - .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(__rfc4106_alg.cra_list), - .cra_u = { - .aead = { - .encrypt = __driver_rfc4106_encrypt, - .decrypt = __driver_rfc4106_decrypt, - }, - }, -}; -#endif - - -static const struct x86_cpu_id aesni_cpu_id[] = { - X86_FEATURE_MATCH(X86_FEATURE_AES), - {} -}; -MODULE_DEVICE_TABLE(x86cpu, aesni_cpu_id); - -static int __init aesni_init(void) -{ - int err; - - if (!x86_match_cpu(aesni_cpu_id)) - return -ENODEV; - - if ((err = crypto_fpu_init())) - goto fpu_err; - if ((err = crypto_register_alg(&aesni_alg))) - goto aes_err; - if ((err = crypto_register_alg(&__aesni_alg))) - goto __aes_err; - if ((err = crypto_register_alg(&blk_ecb_alg))) - goto blk_ecb_err; - if ((err = crypto_register_alg(&blk_cbc_alg))) - goto blk_cbc_err; - if ((err = crypto_register_alg(&ablk_ecb_alg))) - goto ablk_ecb_err; - if ((err = crypto_register_alg(&ablk_cbc_alg))) - goto ablk_cbc_err; -#ifdef CONFIG_X86_64 - if ((err = crypto_register_alg(&blk_ctr_alg))) - goto blk_ctr_err; - if ((err = crypto_register_alg(&ablk_ctr_alg))) - goto ablk_ctr_err; - if ((err = crypto_register_alg(&__rfc4106_alg))) - goto __aead_gcm_err; - if ((err = crypto_register_alg(&rfc4106_alg))) - goto aead_gcm_err; -#ifdef HAS_CTR - if ((err = crypto_register_alg(&ablk_rfc3686_ctr_alg))) - goto ablk_rfc3686_ctr_err; -#endif -#endif -#ifdef HAS_LRW - if ((err = crypto_register_alg(&ablk_lrw_alg))) - goto ablk_lrw_err; -#endif -#ifdef HAS_PCBC - if ((err = crypto_register_alg(&ablk_pcbc_alg))) - goto ablk_pcbc_err; -#endif -#ifdef HAS_XTS - if ((err = crypto_register_alg(&ablk_xts_alg))) - goto ablk_xts_err; -#endif - return err; - -#ifdef HAS_XTS -ablk_xts_err: -#endif -#ifdef HAS_PCBC - crypto_unregister_alg(&ablk_pcbc_alg); -ablk_pcbc_err: -#endif -#ifdef HAS_LRW - crypto_unregister_alg(&ablk_lrw_alg); -ablk_lrw_err: -#endif -#ifdef CONFIG_X86_64 -#ifdef HAS_CTR - crypto_unregister_alg(&ablk_rfc3686_ctr_alg); -ablk_rfc3686_ctr_err: -#endif - crypto_unregister_alg(&rfc4106_alg); -aead_gcm_err: - crypto_unregister_alg(&__rfc4106_alg); -__aead_gcm_err: - crypto_unregister_alg(&ablk_ctr_alg); -ablk_ctr_err: - crypto_unregister_alg(&blk_ctr_alg); -blk_ctr_err: -#endif - crypto_unregister_alg(&ablk_cbc_alg); -ablk_cbc_err: - crypto_unregister_alg(&ablk_ecb_alg); -ablk_ecb_err: - crypto_unregister_alg(&blk_cbc_alg); -blk_cbc_err: - crypto_unregister_alg(&blk_ecb_alg); -blk_ecb_err: - crypto_unregister_alg(&__aesni_alg); -__aes_err: - crypto_unregister_alg(&aesni_alg); -aes_err: -fpu_err: - return err; -} - -static void __exit aesni_exit(void) -{ -#ifdef HAS_XTS - crypto_unregister_alg(&ablk_xts_alg); -#endif -#ifdef HAS_PCBC - crypto_unregister_alg(&ablk_pcbc_alg); -#endif -#ifdef HAS_LRW - crypto_unregister_alg(&ablk_lrw_alg); -#endif -#ifdef CONFIG_X86_64 -#ifdef HAS_CTR - crypto_unregister_alg(&ablk_rfc3686_ctr_alg); -#endif - crypto_unregister_alg(&rfc4106_alg); - crypto_unregister_alg(&__rfc4106_alg); - crypto_unregister_alg(&ablk_ctr_alg); - crypto_unregister_alg(&blk_ctr_alg); -#endif - crypto_unregister_alg(&ablk_cbc_alg); - crypto_unregister_alg(&ablk_ecb_alg); - crypto_unregister_alg(&blk_cbc_alg); - crypto_unregister_alg(&blk_ecb_alg); - crypto_unregister_alg(&__aesni_alg); - crypto_unregister_alg(&aesni_alg); - - crypto_fpu_exit(); -} - -module_init(aesni_init); -module_exit(aesni_exit); - -MODULE_DESCRIPTION("Rijndael (AES) Cipher Algorithm, Intel AES-NI instructions optimized"); -MODULE_LICENSE("GPL"); -MODULE_ALIAS("aes"); diff --git a/ANDROID_3.4.5/arch/x86/crypto/blowfish-x86_64-asm_64.S b/ANDROID_3.4.5/arch/x86/crypto/blowfish-x86_64-asm_64.S deleted file mode 100644 index 391d245d..00000000 --- a/ANDROID_3.4.5/arch/x86/crypto/blowfish-x86_64-asm_64.S +++ /dev/null @@ -1,390 +0,0 @@ -/* - * Blowfish Cipher Algorithm (x86_64) - * - * Copyright (C) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 - * USA - * - */ - -.file "blowfish-x86_64-asm.S" -.text - -/* structure of crypto context */ -#define p 0 -#define s0 ((16 + 2) * 4) -#define s1 ((16 + 2 + (1 * 256)) * 4) -#define s2 ((16 + 2 + (2 * 256)) * 4) -#define s3 ((16 + 2 + (3 * 256)) * 4) - -/* register macros */ -#define CTX %rdi -#define RIO %rsi - -#define RX0 %rax -#define RX1 %rbx -#define RX2 %rcx -#define RX3 %rdx - -#define RX0d %eax -#define RX1d %ebx -#define RX2d %ecx -#define RX3d %edx - -#define RX0bl %al -#define RX1bl %bl -#define RX2bl %cl -#define RX3bl %dl - -#define RX0bh %ah -#define RX1bh %bh -#define RX2bh %ch -#define RX3bh %dh - -#define RT0 %rbp -#define RT1 %rsi -#define RT2 %r8 -#define RT3 %r9 - -#define RT0d %ebp -#define RT1d %esi -#define RT2d %r8d -#define RT3d %r9d - -#define RKEY %r10 - -/*********************************************************************** - * 1-way blowfish - ***********************************************************************/ -#define F() \ - rorq $16, RX0; \ - movzbl RX0bh, RT0d; \ - movzbl RX0bl, RT1d; \ - rolq $16, RX0; \ - movl s0(CTX,RT0,4), RT0d; \ - addl s1(CTX,RT1,4), RT0d; \ - movzbl RX0bh, RT1d; \ - movzbl RX0bl, RT2d; \ - rolq $32, RX0; \ - xorl s2(CTX,RT1,4), RT0d; \ - addl s3(CTX,RT2,4), RT0d; \ - xorq RT0, RX0; - -#define add_roundkey_enc(n) \ - xorq p+4*(n)(CTX), RX0; - -#define round_enc(n) \ - add_roundkey_enc(n); \ - \ - F(); \ - F(); - -#define add_roundkey_dec(n) \ - movq p+4*(n-1)(CTX), RT0; \ - rorq $32, RT0; \ - xorq RT0, RX0; - -#define round_dec(n) \ - add_roundkey_dec(n); \ - \ - F(); \ - F(); \ - -#define read_block() \ - movq (RIO), RX0; \ - rorq $32, RX0; \ - bswapq RX0; - -#define write_block() \ - bswapq RX0; \ - movq RX0, (RIO); - -#define xor_block() \ - bswapq RX0; \ - xorq RX0, (RIO); - -.align 8 -.global __blowfish_enc_blk -.type __blowfish_enc_blk,@function; - -__blowfish_enc_blk: - /* input: - * %rdi: ctx, CTX - * %rsi: dst - * %rdx: src - * %rcx: bool, if true: xor output - */ - movq %rbp, %r11; - - movq %rsi, %r10; - movq %rdx, RIO; - - read_block(); - - round_enc(0); - round_enc(2); - round_enc(4); - round_enc(6); - round_enc(8); - round_enc(10); - round_enc(12); - round_enc(14); - add_roundkey_enc(16); - - movq %r11, %rbp; - - movq %r10, RIO; - test %cl, %cl; - jnz __enc_xor; - - write_block(); - ret; -__enc_xor: - xor_block(); - ret; - -.align 8 -.global blowfish_dec_blk -.type blowfish_dec_blk,@function; - -blowfish_dec_blk: - /* input: - * %rdi: ctx, CTX - * %rsi: dst - * %rdx: src - */ - movq %rbp, %r11; - - movq %rsi, %r10; - movq %rdx, RIO; - - read_block(); - - round_dec(17); - round_dec(15); - round_dec(13); - round_dec(11); - round_dec(9); - round_dec(7); - round_dec(5); - round_dec(3); - add_roundkey_dec(1); - - movq %r10, RIO; - write_block(); - - movq %r11, %rbp; - - ret; - -/********************************************************************** - 4-way blowfish, four blocks parallel - **********************************************************************/ - -/* F() for 4-way. Slower when used alone/1-way, but faster when used - * parallel/4-way (tested on AMD Phenom II & Intel Xeon E7330). - */ -#define F4(x) \ - movzbl x ## bh, RT1d; \ - movzbl x ## bl, RT3d; \ - rorq $16, x; \ - movzbl x ## bh, RT0d; \ - movzbl x ## bl, RT2d; \ - rorq $16, x; \ - movl s0(CTX,RT0,4), RT0d; \ - addl s1(CTX,RT2,4), RT0d; \ - xorl s2(CTX,RT1,4), RT0d; \ - addl s3(CTX,RT3,4), RT0d; \ - xorq RT0, x; - -#define add_preloaded_roundkey4() \ - xorq RKEY, RX0; \ - xorq RKEY, RX1; \ - xorq RKEY, RX2; \ - xorq RKEY, RX3; - -#define preload_roundkey_enc(n) \ - movq p+4*(n)(CTX), RKEY; - -#define add_roundkey_enc4(n) \ - add_preloaded_roundkey4(); \ - preload_roundkey_enc(n + 2); - -#define round_enc4(n) \ - add_roundkey_enc4(n); \ - \ - F4(RX0); \ - F4(RX1); \ - F4(RX2); \ - F4(RX3); \ - \ - F4(RX0); \ - F4(RX1); \ - F4(RX2); \ - F4(RX3); - -#define preload_roundkey_dec(n) \ - movq p+4*((n)-1)(CTX), RKEY; \ - rorq $32, RKEY; - -#define add_roundkey_dec4(n) \ - add_preloaded_roundkey4(); \ - preload_roundkey_dec(n - 2); - -#define round_dec4(n) \ - add_roundkey_dec4(n); \ - \ - F4(RX0); \ - F4(RX1); \ - F4(RX2); \ - F4(RX3); \ - \ - F4(RX0); \ - F4(RX1); \ - F4(RX2); \ - F4(RX3); - -#define read_block4() \ - movq (RIO), RX0; \ - rorq $32, RX0; \ - bswapq RX0; \ - \ - movq 8(RIO), RX1; \ - rorq $32, RX1; \ - bswapq RX1; \ - \ - movq 16(RIO), RX2; \ - rorq $32, RX2; \ - bswapq RX2; \ - \ - movq 24(RIO), RX3; \ - rorq $32, RX3; \ - bswapq RX3; - -#define write_block4() \ - bswapq RX0; \ - movq RX0, (RIO); \ - \ - bswapq RX1; \ - movq RX1, 8(RIO); \ - \ - bswapq RX2; \ - movq RX2, 16(RIO); \ - \ - bswapq RX3; \ - movq RX3, 24(RIO); - -#define xor_block4() \ - bswapq RX0; \ - xorq RX0, (RIO); \ - \ - bswapq RX1; \ - xorq RX1, 8(RIO); \ - \ - bswapq RX2; \ - xorq RX2, 16(RIO); \ - \ - bswapq RX3; \ - xorq RX3, 24(RIO); - -.align 8 -.global __blowfish_enc_blk_4way -.type __blowfish_enc_blk_4way,@function; - -__blowfish_enc_blk_4way: - /* input: - * %rdi: ctx, CTX - * %rsi: dst - * %rdx: src - * %rcx: bool, if true: xor output - */ - pushq %rbp; - pushq %rbx; - pushq %rcx; - - preload_roundkey_enc(0); - - movq %rsi, %r11; - movq %rdx, RIO; - - read_block4(); - - round_enc4(0); - round_enc4(2); - round_enc4(4); - round_enc4(6); - round_enc4(8); - round_enc4(10); - round_enc4(12); - round_enc4(14); - add_preloaded_roundkey4(); - - popq %rbp; - movq %r11, RIO; - - test %bpl, %bpl; - jnz __enc_xor4; - - write_block4(); - - popq %rbx; - popq %rbp; - ret; - -__enc_xor4: - xor_block4(); - - popq %rbx; - popq %rbp; - ret; - -.align 8 -.global blowfish_dec_blk_4way -.type blowfish_dec_blk_4way,@function; - -blowfish_dec_blk_4way: - /* input: - * %rdi: ctx, CTX - * %rsi: dst - * %rdx: src - */ - pushq %rbp; - pushq %rbx; - preload_roundkey_dec(17); - - movq %rsi, %r11; - movq %rdx, RIO; - - read_block4(); - - round_dec4(17); - round_dec4(15); - round_dec4(13); - round_dec4(11); - round_dec4(9); - round_dec4(7); - round_dec4(5); - round_dec4(3); - add_preloaded_roundkey4(); - - movq %r11, RIO; - write_block4(); - - popq %rbx; - popq %rbp; - - ret; - diff --git a/ANDROID_3.4.5/arch/x86/crypto/blowfish_glue.c b/ANDROID_3.4.5/arch/x86/crypto/blowfish_glue.c deleted file mode 100644 index 7967474d..00000000 --- a/ANDROID_3.4.5/arch/x86/crypto/blowfish_glue.c +++ /dev/null @@ -1,489 +0,0 @@ -/* - * Glue Code for assembler optimized version of Blowfish - * - * Copyright (c) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi> - * - * CBC & ECB parts based on code (crypto/cbc.c,ecb.c) by: - * Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au> - * CTR part based on code (crypto/ctr.c) by: - * (C) Copyright IBM Corp. 2007 - Joy Latten <latten@us.ibm.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 - * USA - * - */ - -#include <asm/processor.h> -#include <crypto/blowfish.h> -#include <linux/crypto.h> -#include <linux/init.h> -#include <linux/module.h> -#include <linux/types.h> -#include <crypto/algapi.h> - -/* regular block cipher functions */ -asmlinkage void __blowfish_enc_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src, - bool xor); -asmlinkage void blowfish_dec_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src); - -/* 4-way parallel cipher functions */ -asmlinkage void __blowfish_enc_blk_4way(struct bf_ctx *ctx, u8 *dst, - const u8 *src, bool xor); -asmlinkage void blowfish_dec_blk_4way(struct bf_ctx *ctx, u8 *dst, - const u8 *src); - -static inline void blowfish_enc_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src) -{ - __blowfish_enc_blk(ctx, dst, src, false); -} - -static inline void blowfish_enc_blk_xor(struct bf_ctx *ctx, u8 *dst, - const u8 *src) -{ - __blowfish_enc_blk(ctx, dst, src, true); -} - -static inline void blowfish_enc_blk_4way(struct bf_ctx *ctx, u8 *dst, - const u8 *src) -{ - __blowfish_enc_blk_4way(ctx, dst, src, false); -} - -static inline void blowfish_enc_blk_xor_4way(struct bf_ctx *ctx, u8 *dst, - const u8 *src) -{ - __blowfish_enc_blk_4way(ctx, dst, src, true); -} - -static void blowfish_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) -{ - blowfish_enc_blk(crypto_tfm_ctx(tfm), dst, src); -} - -static void blowfish_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) -{ - blowfish_dec_blk(crypto_tfm_ctx(tfm), dst, src); -} - -static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk, - void (*fn)(struct bf_ctx *, u8 *, const u8 *), - void (*fn_4way)(struct bf_ctx *, u8 *, const u8 *)) -{ - struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - unsigned int bsize = BF_BLOCK_SIZE; - unsigned int nbytes; - int err; - - err = blkcipher_walk_virt(desc, walk); - - while ((nbytes = walk->nbytes)) { - u8 *wsrc = walk->src.virt.addr; - u8 *wdst = walk->dst.virt.addr; - - /* Process four block batch */ - if (nbytes >= bsize * 4) { - do { - fn_4way(ctx, wdst, wsrc); - - wsrc += bsize * 4; - wdst += bsize * 4; - nbytes -= bsize * 4; - } while (nbytes >= bsize * 4); - - if (nbytes < bsize) - goto done; - } - - /* Handle leftovers */ - do { - fn(ctx, wdst, wsrc); - - wsrc += bsize; - wdst += bsize; - nbytes -= bsize; - } while (nbytes >= bsize); - -done: - err = blkcipher_walk_done(desc, walk, nbytes); - } - - return err; -} - -static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - struct blkcipher_walk walk; - - blkcipher_walk_init(&walk, dst, src, nbytes); - return ecb_crypt(desc, &walk, blowfish_enc_blk, blowfish_enc_blk_4way); -} - -static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - struct blkcipher_walk walk; - - blkcipher_walk_init(&walk, dst, src, nbytes); - return ecb_crypt(desc, &walk, blowfish_dec_blk, blowfish_dec_blk_4way); -} - -static unsigned int __cbc_encrypt(struct blkcipher_desc *desc, - struct blkcipher_walk *walk) -{ - struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - unsigned int bsize = BF_BLOCK_SIZE; - unsigned int nbytes = walk->nbytes; - u64 *src = (u64 *)walk->src.virt.addr; - u64 *dst = (u64 *)walk->dst.virt.addr; - u64 *iv = (u64 *)walk->iv; - - do { - *dst = *src ^ *iv; - blowfish_enc_blk(ctx, (u8 *)dst, (u8 *)dst); - iv = dst; - - src += 1; - dst += 1; - nbytes -= bsize; - } while (nbytes >= bsize); - - *(u64 *)walk->iv = *iv; - return nbytes; -} - -static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - struct blkcipher_walk walk; - int err; - - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt(desc, &walk); - - while ((nbytes = walk.nbytes)) { - nbytes = __cbc_encrypt(desc, &walk); - err = blkcipher_walk_done(desc, &walk, nbytes); - } - - return err; -} - -static unsigned int __cbc_decrypt(struct blkcipher_desc *desc, - struct blkcipher_walk *walk) -{ - struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - unsigned int bsize = BF_BLOCK_SIZE; - unsigned int nbytes = walk->nbytes; - u64 *src = (u64 *)walk->src.virt.addr; - u64 *dst = (u64 *)walk->dst.virt.addr; - u64 ivs[4 - 1]; - u64 last_iv; - - /* Start of the last block. */ - src += nbytes / bsize - 1; - dst += nbytes / bsize - 1; - - last_iv = *src; - - /* Process four block batch */ - if (nbytes >= bsize * 4) { - do { - nbytes -= bsize * 4 - bsize; - src -= 4 - 1; - dst -= 4 - 1; - - ivs[0] = src[0]; - ivs[1] = src[1]; - ivs[2] = src[2]; - - blowfish_dec_blk_4way(ctx, (u8 *)dst, (u8 *)src); - - dst[1] ^= ivs[0]; - dst[2] ^= ivs[1]; - dst[3] ^= ivs[2]; - - nbytes -= bsize; - if (nbytes < bsize) - goto done; - - *dst ^= *(src - 1); - src -= 1; - dst -= 1; - } while (nbytes >= bsize * 4); - - if (nbytes < bsize) - goto done; - } - - /* Handle leftovers */ - for (;;) { - blowfish_dec_blk(ctx, (u8 *)dst, (u8 *)src); - - nbytes -= bsize; - if (nbytes < bsize) - break; - - *dst ^= *(src - 1); - src -= 1; - dst -= 1; - } - -done: - *dst ^= *(u64 *)walk->iv; - *(u64 *)walk->iv = last_iv; - - return nbytes; -} - -static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - struct blkcipher_walk walk; - int err; - - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt(desc, &walk); - - while ((nbytes = walk.nbytes)) { - nbytes = __cbc_decrypt(desc, &walk); - err = blkcipher_walk_done(desc, &walk, nbytes); - } - - return err; -} - -static void ctr_crypt_final(struct bf_ctx *ctx, struct blkcipher_walk *walk) -{ - u8 *ctrblk = walk->iv; - u8 keystream[BF_BLOCK_SIZE]; - u8 *src = walk->src.virt.addr; - u8 *dst = walk->dst.virt.addr; - unsigned int nbytes = walk->nbytes; - - blowfish_enc_blk(ctx, keystream, ctrblk); - crypto_xor(keystream, src, nbytes); - memcpy(dst, keystream, nbytes); - - crypto_inc(ctrblk, BF_BLOCK_SIZE); -} - -static unsigned int __ctr_crypt(struct blkcipher_desc *desc, - struct blkcipher_walk *walk) -{ - struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - unsigned int bsize = BF_BLOCK_SIZE; - unsigned int nbytes = walk->nbytes; - u64 *src = (u64 *)walk->src.virt.addr; - u64 *dst = (u64 *)walk->dst.virt.addr; - u64 ctrblk = be64_to_cpu(*(__be64 *)walk->iv); - __be64 ctrblocks[4]; - - /* Process four block batch */ - if (nbytes >= bsize * 4) { - do { - if (dst != src) { - dst[0] = src[0]; - dst[1] = src[1]; - dst[2] = src[2]; - dst[3] = src[3]; - } - - /* create ctrblks for parallel encrypt */ - ctrblocks[0] = cpu_to_be64(ctrblk++); - ctrblocks[1] = cpu_to_be64(ctrblk++); - ctrblocks[2] = cpu_to_be64(ctrblk++); - ctrblocks[3] = cpu_to_be64(ctrblk++); - - blowfish_enc_blk_xor_4way(ctx, (u8 *)dst, - (u8 *)ctrblocks); - - src += 4; - dst += 4; - } while ((nbytes -= bsize * 4) >= bsize * 4); - - if (nbytes < bsize) - goto done; - } - - /* Handle leftovers */ - do { - if (dst != src) - *dst = *src; - - ctrblocks[0] = cpu_to_be64(ctrblk++); - - blowfish_enc_blk_xor(ctx, (u8 *)dst, (u8 *)ctrblocks); - - src += 1; - dst += 1; - } while ((nbytes -= bsize) >= bsize); - -done: - *(__be64 *)walk->iv = cpu_to_be64(ctrblk); - return nbytes; -} - -static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - struct blkcipher_walk walk; - int err; - - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt_block(desc, &walk, BF_BLOCK_SIZE); - - while ((nbytes = walk.nbytes) >= BF_BLOCK_SIZE) { - nbytes = __ctr_crypt(desc, &walk); - err = blkcipher_walk_done(desc, &walk, nbytes); - } - - if (walk.nbytes) { - ctr_crypt_final(crypto_blkcipher_ctx(desc->tfm), &walk); - err = blkcipher_walk_done(desc, &walk, 0); - } - - return err; -} - -static struct crypto_alg bf_algs[4] = { { - .cra_name = "blowfish", - .cra_driver_name = "blowfish-asm", - .cra_priority = 200, - .cra_flags = CRYPTO_ALG_TYPE_CIPHER, - .cra_blocksize = BF_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct bf_ctx), - .cra_alignmask = 0, - .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(bf_algs[0].cra_list), - .cra_u = { - .cipher = { - .cia_min_keysize = BF_MIN_KEY_SIZE, - .cia_max_keysize = BF_MAX_KEY_SIZE, - .cia_setkey = blowfish_setkey, - .cia_encrypt = blowfish_encrypt, - .cia_decrypt = blowfish_decrypt, - } - } -}, { - .cra_name = "ecb(blowfish)", - .cra_driver_name = "ecb-blowfish-asm", - .cra_priority = 300, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, - .cra_blocksize = BF_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct bf_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(bf_algs[1].cra_list), - .cra_u = { - .blkcipher = { - .min_keysize = BF_MIN_KEY_SIZE, - .max_keysize = BF_MAX_KEY_SIZE, - .setkey = blowfish_setkey, - .encrypt = ecb_encrypt, - .decrypt = ecb_decrypt, - }, - }, -}, { - .cra_name = "cbc(blowfish)", - .cra_driver_name = "cbc-blowfish-asm", - .cra_priority = 300, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, - .cra_blocksize = BF_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct bf_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(bf_algs[2].cra_list), - .cra_u = { - .blkcipher = { - .min_keysize = BF_MIN_KEY_SIZE, - .max_keysize = BF_MAX_KEY_SIZE, - .ivsize = BF_BLOCK_SIZE, - .setkey = blowfish_setkey, - .encrypt = cbc_encrypt, - .decrypt = cbc_decrypt, - }, - }, -}, { - .cra_name = "ctr(blowfish)", - .cra_driver_name = "ctr-blowfish-asm", - .cra_priority = 300, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, - .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct bf_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(bf_algs[3].cra_list), - .cra_u = { - .blkcipher = { - .min_keysize = BF_MIN_KEY_SIZE, - .max_keysize = BF_MAX_KEY_SIZE, - .ivsize = BF_BLOCK_SIZE, - .setkey = blowfish_setkey, - .encrypt = ctr_crypt, - .decrypt = ctr_crypt, - }, - }, -} }; - -static bool is_blacklisted_cpu(void) -{ - if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) - return false; - - if (boot_cpu_data.x86 == 0x0f) { - /* - * On Pentium 4, blowfish-x86_64 is slower than generic C - * implementation because use of 64bit rotates (which are really - * slow on P4). Therefore blacklist P4s. - */ - return true; - } - - return false; -} - -static int force; -module_param(force, int, 0); -MODULE_PARM_DESC(force, "Force module load, ignore CPU blacklist"); - -static int __init init(void) -{ - if (!force && is_blacklisted_cpu()) { - printk(KERN_INFO - "blowfish-x86_64: performance on this CPU " - "would be suboptimal: disabling " - "blowfish-x86_64.\n"); - return -ENODEV; - } - - return crypto_register_algs(bf_algs, ARRAY_SIZE(bf_algs)); -} - -static void __exit fini(void) -{ - crypto_unregister_algs(bf_algs, ARRAY_SIZE(bf_algs)); -} - -module_init(init); -module_exit(fini); - -MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("Blowfish Cipher Algorithm, asm optimized"); -MODULE_ALIAS("blowfish"); -MODULE_ALIAS("blowfish-asm"); diff --git a/ANDROID_3.4.5/arch/x86/crypto/camellia-x86_64-asm_64.S b/ANDROID_3.4.5/arch/x86/crypto/camellia-x86_64-asm_64.S deleted file mode 100644 index 0b337433..00000000 --- a/ANDROID_3.4.5/arch/x86/crypto/camellia-x86_64-asm_64.S +++ /dev/null @@ -1,520 +0,0 @@ -/* - * Camellia Cipher Algorithm (x86_64) - * - * Copyright (C) 2012 Jussi Kivilinna <jussi.kivilinna@mbnet.fi> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 - * USA - * - */ - -.file "camellia-x86_64-asm_64.S" -.text - -.extern camellia_sp10011110; -.extern camellia_sp22000222; -.extern camellia_sp03303033; -.extern camellia_sp00444404; -.extern camellia_sp02220222; -.extern camellia_sp30333033; -.extern camellia_sp44044404; -.extern camellia_sp11101110; - -#define sp10011110 camellia_sp10011110 -#define sp22000222 camellia_sp22000222 -#define sp03303033 camellia_sp03303033 -#define sp00444404 camellia_sp00444404 -#define sp02220222 camellia_sp02220222 -#define sp30333033 camellia_sp30333033 -#define sp44044404 camellia_sp44044404 -#define sp11101110 camellia_sp11101110 - -#define CAMELLIA_TABLE_BYTE_LEN 272 - -/* struct camellia_ctx: */ -#define key_table 0 -#define key_length CAMELLIA_TABLE_BYTE_LEN - -/* register macros */ -#define CTX %rdi -#define RIO %rsi -#define RIOd %esi - -#define RAB0 %rax -#define RCD0 %rcx -#define RAB1 %rbx -#define RCD1 %rdx - -#define RAB0d %eax -#define RCD0d %ecx -#define RAB1d %ebx -#define RCD1d %edx - -#define RAB0bl %al -#define RCD0bl %cl -#define RAB1bl %bl -#define RCD1bl %dl - -#define RAB0bh %ah -#define RCD0bh %ch -#define RAB1bh %bh -#define RCD1bh %dh - -#define RT0 %rsi -#define RT1 %rbp -#define RT2 %r8 - -#define RT0d %esi -#define RT1d %ebp -#define RT2d %r8d - -#define RT2bl %r8b - -#define RXOR %r9 -#define RRBP %r10 -#define RDST %r11 - -#define RXORd %r9d -#define RXORbl %r9b - -#define xor2ror16(T0, T1, tmp1, tmp2, ab, dst) \ - movzbl ab ## bl, tmp2 ## d; \ - movzbl ab ## bh, tmp1 ## d; \ - rorq $16, ab; \ - xorq T0(, tmp2, 8), dst; \ - xorq T1(, tmp1, 8), dst; - -/********************************************************************** - 1-way camellia - **********************************************************************/ -#define roundsm(ab, subkey, cd) \ - movq (key_table + ((subkey) * 2) * 4)(CTX), RT2; \ - \ - xor2ror16(sp00444404, sp03303033, RT0, RT1, ab ## 0, cd ## 0); \ - xor2ror16(sp22000222, sp10011110, RT0, RT1, ab ## 0, RT2); \ - xor2ror16(sp11101110, sp44044404, RT0, RT1, ab ## 0, cd ## 0); \ - xor2ror16(sp30333033, sp02220222, RT0, RT1, ab ## 0, RT2); \ - \ - xorq RT2, cd ## 0; - -#define fls(l, r, kl, kr) \ - movl (key_table + ((kl) * 2) * 4)(CTX), RT0d; \ - andl l ## 0d, RT0d; \ - roll $1, RT0d; \ - shlq $32, RT0; \ - xorq RT0, l ## 0; \ - movq (key_table + ((kr) * 2) * 4)(CTX), RT1; \ - orq r ## 0, RT1; \ - shrq $32, RT1; \ - xorq RT1, r ## 0; \ - \ - movq (key_table + ((kl) * 2) * 4)(CTX), RT2; \ - orq l ## 0, RT2; \ - shrq $32, RT2; \ - xorq RT2, l ## 0; \ - movl (key_table + ((kr) * 2) * 4)(CTX), RT0d; \ - andl r ## 0d, RT0d; \ - roll $1, RT0d; \ - shlq $32, RT0; \ - xorq RT0, r ## 0; - -#define enc_rounds(i) \ - roundsm(RAB, i + 2, RCD); \ - roundsm(RCD, i + 3, RAB); \ - roundsm(RAB, i + 4, RCD); \ - roundsm(RCD, i + 5, RAB); \ - roundsm(RAB, i + 6, RCD); \ - roundsm(RCD, i + 7, RAB); - -#define enc_fls(i) \ - fls(RAB, RCD, i + 0, i + 1); - -#define enc_inpack() \ - movq (RIO), RAB0; \ - bswapq RAB0; \ - rolq $32, RAB0; \ - movq 4*2(RIO), RCD0; \ - bswapq RCD0; \ - rorq $32, RCD0; \ - xorq key_table(CTX), RAB0; - -#define enc_outunpack(op, max) \ - xorq key_table(CTX, max, 8), RCD0; \ - rorq $32, RCD0; \ - bswapq RCD0; \ - op ## q RCD0, (RIO); \ - rolq $32, RAB0; \ - bswapq RAB0; \ - op ## q RAB0, 4*2(RIO); - -#define dec_rounds(i) \ - roundsm(RAB, i + 7, RCD); \ - roundsm(RCD, i + 6, RAB); \ - roundsm(RAB, i + 5, RCD); \ - roundsm(RCD, i + 4, RAB); \ - roundsm(RAB, i + 3, RCD); \ - roundsm(RCD, i + 2, RAB); - -#define dec_fls(i) \ - fls(RAB, RCD, i + 1, i + 0); - -#define dec_inpack(max) \ - movq (RIO), RAB0; \ - bswapq RAB0; \ - rolq $32, RAB0; \ - movq 4*2(RIO), RCD0; \ - bswapq RCD0; \ - rorq $32, RCD0; \ - xorq key_table(CTX, max, 8), RAB0; - -#define dec_outunpack() \ - xorq key_table(CTX), RCD0; \ - rorq $32, RCD0; \ - bswapq RCD0; \ - movq RCD0, (RIO); \ - rolq $32, RAB0; \ - bswapq RAB0; \ - movq RAB0, 4*2(RIO); - -.global __camellia_enc_blk; -.type __camellia_enc_blk,@function; - -__camellia_enc_blk: - /* input: - * %rdi: ctx, CTX - * %rsi: dst - * %rdx: src - * %rcx: bool xor - */ - movq %rbp, RRBP; - - movq %rcx, RXOR; - movq %rsi, RDST; - movq %rdx, RIO; - - enc_inpack(); - - enc_rounds(0); - enc_fls(8); - enc_rounds(8); - enc_fls(16); - enc_rounds(16); - movl $24, RT1d; /* max */ - - cmpb $16, key_length(CTX); - je __enc_done; - - enc_fls(24); - enc_rounds(24); - movl $32, RT1d; /* max */ - -__enc_done: - testb RXORbl, RXORbl; - movq RDST, RIO; - - jnz __enc_xor; - - enc_outunpack(mov, RT1); - - movq RRBP, %rbp; - ret; - -__enc_xor: - enc_outunpack(xor, RT1); - - movq RRBP, %rbp; - ret; - -.global camellia_dec_blk; -.type camellia_dec_blk,@function; - -camellia_dec_blk: - /* input: - * %rdi: ctx, CTX - * %rsi: dst - * %rdx: src - */ - cmpl $16, key_length(CTX); - movl $32, RT2d; - movl $24, RXORd; - cmovel RXORd, RT2d; /* max */ - - movq %rbp, RRBP; - movq %rsi, RDST; - movq %rdx, RIO; - - dec_inpack(RT2); - - cmpb $24, RT2bl; - je __dec_rounds16; - - dec_rounds(24); - dec_fls(24); - -__dec_rounds16: - dec_rounds(16); - dec_fls(16); - dec_rounds(8); - dec_fls(8); - dec_rounds(0); - - movq RDST, RIO; - - dec_outunpack(); - - movq RRBP, %rbp; - ret; - -/********************************************************************** - 2-way camellia - **********************************************************************/ -#define roundsm2(ab, subkey, cd) \ - movq (key_table + ((subkey) * 2) * 4)(CTX), RT2; \ - xorq RT2, cd ## 1; \ - \ - xor2ror16(sp00444404, sp03303033, RT0, RT1, ab ## 0, cd ## 0); \ - xor2ror16(sp22000222, sp10011110, RT0, RT1, ab ## 0, RT2); \ - xor2ror16(sp11101110, sp44044404, RT0, RT1, ab ## 0, cd ## 0); \ - xor2ror16(sp30333033, sp02220222, RT0, RT1, ab ## 0, RT2); \ - \ - xor2ror16(sp00444404, sp03303033, RT0, RT1, ab ## 1, cd ## 1); \ - xorq RT2, cd ## 0; \ - xor2ror16(sp22000222, sp10011110, RT0, RT1, ab ## 1, cd ## 1); \ - xor2ror16(sp11101110, sp44044404, RT0, RT1, ab ## 1, cd ## 1); \ - xor2ror16(sp30333033, sp02220222, RT0, RT1, ab ## 1, cd ## 1); - -#define fls2(l, r, kl, kr) \ - movl (key_table + ((kl) * 2) * 4)(CTX), RT0d; \ - andl l ## 0d, RT0d; \ - roll $1, RT0d; \ - shlq $32, RT0; \ - xorq RT0, l ## 0; \ - movq (key_table + ((kr) * 2) * 4)(CTX), RT1; \ - orq r ## 0, RT1; \ - shrq $32, RT1; \ - xorq RT1, r ## 0; \ - \ - movl (key_table + ((kl) * 2) * 4)(CTX), RT2d; \ - andl l ## 1d, RT2d; \ - roll $1, RT2d; \ - shlq $32, RT2; \ - xorq RT2, l ## 1; \ - movq (key_table + ((kr) * 2) * 4)(CTX), RT0; \ - orq r ## 1, RT0; \ - shrq $32, RT0; \ - xorq RT0, r ## 1; \ - \ - movq (key_table + ((kl) * 2) * 4)(CTX), RT1; \ - orq l ## 0, RT1; \ - shrq $32, RT1; \ - xorq RT1, l ## 0; \ - movl (key_table + ((kr) * 2) * 4)(CTX), RT2d; \ - andl r ## 0d, RT2d; \ - roll $1, RT2d; \ - shlq $32, RT2; \ - xorq RT2, r ## 0; \ - \ - movq (key_table + ((kl) * 2) * 4)(CTX), RT0; \ - orq l ## 1, RT0; \ - shrq $32, RT0; \ - xorq RT0, l ## 1; \ - movl (key_table + ((kr) * 2) * 4)(CTX), RT1d; \ - andl r ## 1d, RT1d; \ - roll $1, RT1d; \ - shlq $32, RT1; \ - xorq RT1, r ## 1; - -#define enc_rounds2(i) \ - roundsm2(RAB, i + 2, RCD); \ - roundsm2(RCD, i + 3, RAB); \ - roundsm2(RAB, i + 4, RCD); \ - roundsm2(RCD, i + 5, RAB); \ - roundsm2(RAB, i + 6, RCD); \ - roundsm2(RCD, i + 7, RAB); - -#define enc_fls2(i) \ - fls2(RAB, RCD, i + 0, i + 1); - -#define enc_inpack2() \ - movq (RIO), RAB0; \ - bswapq RAB0; \ - rorq $32, RAB0; \ - movq 4*2(RIO), RCD0; \ - bswapq RCD0; \ - rolq $32, RCD0; \ - xorq key_table(CTX), RAB0; \ - \ - movq 8*2(RIO), RAB1; \ - bswapq RAB1; \ - rorq $32, RAB1; \ - movq 12*2(RIO), RCD1; \ - bswapq RCD1; \ - rolq $32, RCD1; \ - xorq key_table(CTX), RAB1; - -#define enc_outunpack2(op, max) \ - xorq key_table(CTX, max, 8), RCD0; \ - rolq $32, RCD0; \ - bswapq RCD0; \ - op ## q RCD0, (RIO); \ - rorq $32, RAB0; \ - bswapq RAB0; \ - op ## q RAB0, 4*2(RIO); \ - \ - xorq key_table(CTX, max, 8), RCD1; \ - rolq $32, RCD1; \ - bswapq RCD1; \ - op ## q RCD1, 8*2(RIO); \ - rorq $32, RAB1; \ - bswapq RAB1; \ - op ## q RAB1, 12*2(RIO); - -#define dec_rounds2(i) \ - roundsm2(RAB, i + 7, RCD); \ - roundsm2(RCD, i + 6, RAB); \ - roundsm2(RAB, i + 5, RCD); \ - roundsm2(RCD, i + 4, RAB); \ - roundsm2(RAB, i + 3, RCD); \ - roundsm2(RCD, i + 2, RAB); - -#define dec_fls2(i) \ - fls2(RAB, RCD, i + 1, i + 0); - -#define dec_inpack2(max) \ - movq (RIO), RAB0; \ - bswapq RAB0; \ - rorq $32, RAB0; \ - movq 4*2(RIO), RCD0; \ - bswapq RCD0; \ - rolq $32, RCD0; \ - xorq key_table(CTX, max, 8), RAB0; \ - \ - movq 8*2(RIO), RAB1; \ - bswapq RAB1; \ - rorq $32, RAB1; \ - movq 12*2(RIO), RCD1; \ - bswapq RCD1; \ - rolq $32, RCD1; \ - xorq key_table(CTX, max, 8), RAB1; - -#define dec_outunpack2() \ - xorq key_table(CTX), RCD0; \ - rolq $32, RCD0; \ - bswapq RCD0; \ - movq RCD0, (RIO); \ - rorq $32, RAB0; \ - bswapq RAB0; \ - movq RAB0, 4*2(RIO); \ - \ - xorq key_table(CTX), RCD1; \ - rolq $32, RCD1; \ - bswapq RCD1; \ - movq RCD1, 8*2(RIO); \ - rorq $32, RAB1; \ - bswapq RAB1; \ - movq RAB1, 12*2(RIO); - -.global __camellia_enc_blk_2way; -.type __camellia_enc_blk_2way,@function; - -__camellia_enc_blk_2way: - /* input: - * %rdi: ctx, CTX - * %rsi: dst - * %rdx: src - * %rcx: bool xor - */ - pushq %rbx; - - movq %rbp, RRBP; - movq %rcx, RXOR; - movq %rsi, RDST; - movq %rdx, RIO; - - enc_inpack2(); - - enc_rounds2(0); - enc_fls2(8); - enc_rounds2(8); - enc_fls2(16); - enc_rounds2(16); - movl $24, RT2d; /* max */ - - cmpb $16, key_length(CTX); - je __enc2_done; - - enc_fls2(24); - enc_rounds2(24); - movl $32, RT2d; /* max */ - -__enc2_done: - test RXORbl, RXORbl; - movq RDST, RIO; - jnz __enc2_xor; - - enc_outunpack2(mov, RT2); - - movq RRBP, %rbp; - popq %rbx; - ret; - -__enc2_xor: - enc_outunpack2(xor, RT2); - - movq RRBP, %rbp; - popq %rbx; - ret; - -.global camellia_dec_blk_2way; -.type camellia_dec_blk_2way,@function; - -camellia_dec_blk_2way: - /* input: - * %rdi: ctx, CTX - * %rsi: dst - * %rdx: src - */ - cmpl $16, key_length(CTX); - movl $32, RT2d; - movl $24, RXORd; - cmovel RXORd, RT2d; /* max */ - - movq %rbx, RXOR; - movq %rbp, RRBP; - movq %rsi, RDST; - movq %rdx, RIO; - - dec_inpack2(RT2); - - cmpb $24, RT2bl; - je __dec2_rounds16; - - dec_rounds2(24); - dec_fls2(24); - -__dec2_rounds16: - dec_rounds2(16); - dec_fls2(16); - dec_rounds2(8); - dec_fls2(8); - dec_rounds2(0); - - movq RDST, RIO; - - dec_outunpack2(); - - movq RRBP, %rbp; - movq RXOR, %rbx; - ret; diff --git a/ANDROID_3.4.5/arch/x86/crypto/camellia_glue.c b/ANDROID_3.4.5/arch/x86/crypto/camellia_glue.c deleted file mode 100644 index 3306dc0b..00000000 --- a/ANDROID_3.4.5/arch/x86/crypto/camellia_glue.c +++ /dev/null @@ -1,1952 +0,0 @@ -/* - * Glue Code for assembler optimized version of Camellia - * - * Copyright (c) 2012 Jussi Kivilinna <jussi.kivilinna@mbnet.fi> - * - * Camellia parts based on code by: - * Copyright (C) 2006 NTT (Nippon Telegraph and Telephone Corporation) - * CBC & ECB parts based on code (crypto/cbc.c,ecb.c) by: - * Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au> - * CTR part based on code (crypto/ctr.c) by: - * (C) Copyright IBM Corp. 2007 - Joy Latten <latten@us.ibm.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 - * USA - * - */ - -#include <asm/processor.h> -#include <asm/unaligned.h> -#include <linux/crypto.h> -#include <linux/init.h> -#include <linux/module.h> -#include <linux/types.h> -#include <crypto/algapi.h> -#include <crypto/b128ops.h> -#include <crypto/lrw.h> -#include <crypto/xts.h> - -#define CAMELLIA_MIN_KEY_SIZE 16 -#define CAMELLIA_MAX_KEY_SIZE 32 -#define CAMELLIA_BLOCK_SIZE 16 -#define CAMELLIA_TABLE_BYTE_LEN 272 - -struct camellia_ctx { - u64 key_table[CAMELLIA_TABLE_BYTE_LEN / sizeof(u64)]; - u32 key_length; -}; - -/* regular block cipher functions */ -asmlinkage void __camellia_enc_blk(struct camellia_ctx *ctx, u8 *dst, - const u8 *src, bool xor); -asmlinkage void camellia_dec_blk(struct camellia_ctx *ctx, u8 *dst, - const u8 *src); - -/* 2-way parallel cipher functions */ -asmlinkage void __camellia_enc_blk_2way(struct camellia_ctx *ctx, u8 *dst, - const u8 *src, bool xor); -asmlinkage void camellia_dec_blk_2way(struct camellia_ctx *ctx, u8 *dst, - const u8 *src); - -static inline void camellia_enc_blk(struct camellia_ctx *ctx, u8 *dst, - const u8 *src) -{ - __camellia_enc_blk(ctx, dst, src, false); -} - -static inline void camellia_enc_blk_xor(struct camellia_ctx *ctx, u8 *dst, - const u8 *src) -{ - __camellia_enc_blk(ctx, dst, src, true); -} - -static inline void camellia_enc_blk_2way(struct camellia_ctx *ctx, u8 *dst, - const u8 *src) -{ - __camellia_enc_blk_2way(ctx, dst, src, false); -} - -static inline void camellia_enc_blk_xor_2way(struct camellia_ctx *ctx, u8 *dst, - const u8 *src) -{ - __camellia_enc_blk_2way(ctx, dst, src, true); -} - -static void camellia_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) -{ - camellia_enc_blk(crypto_tfm_ctx(tfm), dst, src); -} - -static void camellia_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) -{ - camellia_dec_blk(crypto_tfm_ctx(tfm), dst, src); -} - -/* camellia sboxes */ -const u64 camellia_sp10011110[256] = { - 0x7000007070707000, 0x8200008282828200, 0x2c00002c2c2c2c00, - 0xec0000ecececec00, 0xb30000b3b3b3b300, 0x2700002727272700, - 0xc00000c0c0c0c000, 0xe50000e5e5e5e500, 0xe40000e4e4e4e400, - 0x8500008585858500, 0x5700005757575700, 0x3500003535353500, - 0xea0000eaeaeaea00, 0x0c00000c0c0c0c00, 0xae0000aeaeaeae00, - 0x4100004141414100, 0x2300002323232300, 0xef0000efefefef00, - 0x6b00006b6b6b6b00, 0x9300009393939300, 0x4500004545454500, - 0x1900001919191900, 0xa50000a5a5a5a500, 0x2100002121212100, - 0xed0000edededed00, 0x0e00000e0e0e0e00, 0x4f00004f4f4f4f00, - 0x4e00004e4e4e4e00, 0x1d00001d1d1d1d00, 0x6500006565656500, - 0x9200009292929200, 0xbd0000bdbdbdbd00, 0x8600008686868600, - 0xb80000b8b8b8b800, 0xaf0000afafafaf00, 0x8f00008f8f8f8f00, - 0x7c00007c7c7c7c00, 0xeb0000ebebebeb00, 0x1f00001f1f1f1f00, - 0xce0000cececece00, 0x3e00003e3e3e3e00, 0x3000003030303000, - 0xdc0000dcdcdcdc00, 0x5f00005f5f5f5f00, 0x5e00005e5e5e5e00, - 0xc50000c5c5c5c500, 0x0b00000b0b0b0b00, 0x1a00001a1a1a1a00, - 0xa60000a6a6a6a600, 0xe10000e1e1e1e100, 0x3900003939393900, - 0xca0000cacacaca00, 0xd50000d5d5d5d500, 0x4700004747474700, - 0x5d00005d5d5d5d00, 0x3d00003d3d3d3d00, 0xd90000d9d9d9d900, - 0x0100000101010100, 0x5a00005a5a5a5a00, 0xd60000d6d6d6d600, - 0x5100005151515100, 0x5600005656565600, 0x6c00006c6c6c6c00, - 0x4d00004d4d4d4d00, 0x8b00008b8b8b8b00, 0x0d00000d0d0d0d00, - 0x9a00009a9a9a9a00, 0x6600006666666600, 0xfb0000fbfbfbfb00, - 0xcc0000cccccccc00, 0xb00000b0b0b0b000, 0x2d00002d2d2d2d00, - 0x7400007474747400, 0x1200001212121200, 0x2b00002b2b2b2b00, - 0x2000002020202000, 0xf00000f0f0f0f000, 0xb10000b1b1b1b100, - 0x8400008484848400, 0x9900009999999900, 0xdf0000dfdfdfdf00, - 0x4c00004c4c4c4c00, 0xcb0000cbcbcbcb00, 0xc20000c2c2c2c200, - 0x3400003434343400, 0x7e00007e7e7e7e00, 0x7600007676767600, - 0x0500000505050500, 0x6d00006d6d6d6d00, 0xb70000b7b7b7b700, - 0xa90000a9a9a9a900, 0x3100003131313100, 0xd10000d1d1d1d100, - 0x1700001717171700, 0x0400000404040400, 0xd70000d7d7d7d700, - 0x1400001414141400, 0x5800005858585800, 0x3a00003a3a3a3a00, - 0x6100006161616100, 0xde0000dededede00, 0x1b00001b1b1b1b00, - 0x1100001111111100, 0x1c00001c1c1c1c00, 0x3200003232323200, - 0x0f00000f0f0f0f00, 0x9c00009c9c9c9c00, 0x1600001616161600, - 0x5300005353535300, 0x1800001818181800, 0xf20000f2f2f2f200, - 0x2200002222222200, 0xfe0000fefefefe00, 0x4400004444444400, - 0xcf0000cfcfcfcf00, 0xb20000b2b2b2b200, 0xc30000c3c3c3c300, - 0xb50000b5b5b5b500, 0x7a00007a7a7a7a00, 0x9100009191919100, - 0x2400002424242400, 0x0800000808080800, 0xe80000e8e8e8e800, - 0xa80000a8a8a8a800, 0x6000006060606000, 0xfc0000fcfcfcfc00, - 0x6900006969696900, 0x5000005050505000, 0xaa0000aaaaaaaa00, - 0xd00000d0d0d0d000, 0xa00000a0a0a0a000, 0x7d00007d7d7d7d00, - 0xa10000a1a1a1a100, 0x8900008989898900, 0x6200006262626200, - 0x9700009797979700, 0x5400005454545400, 0x5b00005b5b5b5b00, - 0x1e00001e1e1e1e00, 0x9500009595959500, 0xe00000e0e0e0e000, - 0xff0000ffffffff00, 0x6400006464646400, 0xd20000d2d2d2d200, - 0x1000001010101000, 0xc40000c4c4c4c400, 0x0000000000000000, - 0x4800004848484800, 0xa30000a3a3a3a300, 0xf70000f7f7f7f700, - 0x7500007575757500, 0xdb0000dbdbdbdb00, 0x8a00008a8a8a8a00, - 0x0300000303030300, 0xe60000e6e6e6e600, 0xda0000dadadada00, - 0x0900000909090900, 0x3f00003f3f3f3f00, 0xdd0000dddddddd00, - 0x9400009494949400, 0x8700008787878700, 0x5c00005c5c5c5c00, - 0x8300008383838300, 0x0200000202020200, 0xcd0000cdcdcdcd00, - 0x4a00004a4a4a4a00, 0x9000009090909000, 0x3300003333333300, - 0x7300007373737300, 0x6700006767676700, 0xf60000f6f6f6f600, - 0xf30000f3f3f3f300, 0x9d00009d9d9d9d00, 0x7f00007f7f7f7f00, - 0xbf0000bfbfbfbf00, 0xe20000e2e2e2e200, 0x5200005252525200, - 0x9b00009b9b9b9b00, 0xd80000d8d8d8d800, 0x2600002626262600, - 0xc80000c8c8c8c800, 0x3700003737373700, 0xc60000c6c6c6c600, - 0x3b00003b3b3b3b00, 0x8100008181818100, 0x9600009696969600, - 0x6f00006f6f6f6f00, 0x4b00004b4b4b4b00, 0x1300001313131300, - 0xbe0000bebebebe00, 0x6300006363636300, 0x2e00002e2e2e2e00, - 0xe90000e9e9e9e900, 0x7900007979797900, 0xa70000a7a7a7a700, - 0x8c00008c8c8c8c00, 0x9f00009f9f9f9f00, 0x6e00006e6e6e6e00, - 0xbc0000bcbcbcbc00, 0x8e00008e8e8e8e00, 0x2900002929292900, - 0xf50000f5f5f5f500, 0xf90000f9f9f9f900, 0xb60000b6b6b6b600, - 0x2f00002f2f2f2f00, 0xfd0000fdfdfdfd00, 0xb40000b4b4b4b400, - 0x5900005959595900, 0x7800007878787800, 0x9800009898989800, - 0x0600000606060600, 0x6a00006a6a6a6a00, 0xe70000e7e7e7e700, - 0x4600004646464600, 0x7100007171717100, 0xba0000babababa00, - 0xd40000d4d4d4d400, 0x2500002525252500, 0xab0000abababab00, - 0x4200004242424200, 0x8800008888888800, 0xa20000a2a2a2a200, - 0x8d00008d8d8d8d00, 0xfa0000fafafafa00, 0x7200007272727200, - 0x0700000707070700, 0xb90000b9b9b9b900, 0x5500005555555500, - 0xf80000f8f8f8f800, 0xee0000eeeeeeee00, 0xac0000acacacac00, - 0x0a00000a0a0a0a00, 0x3600003636363600, 0x4900004949494900, - 0x2a00002a2a2a2a00, 0x6800006868686800, 0x3c00003c3c3c3c00, - 0x3800003838383800, 0xf10000f1f1f1f100, 0xa40000a4a4a4a400, - 0x4000004040404000, 0x2800002828282800, 0xd30000d3d3d3d300, - 0x7b00007b7b7b7b00, 0xbb0000bbbbbbbb00, 0xc90000c9c9c9c900, - 0x4300004343434300, 0xc10000c1c1c1c100, 0x1500001515151500, - 0xe30000e3e3e3e300, 0xad0000adadadad00, 0xf40000f4f4f4f400, - 0x7700007777777700, 0xc70000c7c7c7c700, 0x8000008080808000, - 0x9e00009e9e9e9e00, -}; - -const u64 camellia_sp22000222[256] = { - 0xe0e0000000e0e0e0, 0x0505000000050505, 0x5858000000585858, - 0xd9d9000000d9d9d9, 0x6767000000676767, 0x4e4e0000004e4e4e, - 0x8181000000818181, 0xcbcb000000cbcbcb, 0xc9c9000000c9c9c9, - 0x0b0b0000000b0b0b, 0xaeae000000aeaeae, 0x6a6a0000006a6a6a, - 0xd5d5000000d5d5d5, 0x1818000000181818, 0x5d5d0000005d5d5d, - 0x8282000000828282, 0x4646000000464646, 0xdfdf000000dfdfdf, - 0xd6d6000000d6d6d6, 0x2727000000272727, 0x8a8a0000008a8a8a, - 0x3232000000323232, 0x4b4b0000004b4b4b, 0x4242000000424242, - 0xdbdb000000dbdbdb, 0x1c1c0000001c1c1c, 0x9e9e0000009e9e9e, - 0x9c9c0000009c9c9c, 0x3a3a0000003a3a3a, 0xcaca000000cacaca, - 0x2525000000252525, 0x7b7b0000007b7b7b, 0x0d0d0000000d0d0d, - 0x7171000000717171, 0x5f5f0000005f5f5f, 0x1f1f0000001f1f1f, - 0xf8f8000000f8f8f8, 0xd7d7000000d7d7d7, 0x3e3e0000003e3e3e, - 0x9d9d0000009d9d9d, 0x7c7c0000007c7c7c, 0x6060000000606060, - 0xb9b9000000b9b9b9, 0xbebe000000bebebe, 0xbcbc000000bcbcbc, - 0x8b8b0000008b8b8b, 0x1616000000161616, 0x3434000000343434, - 0x4d4d0000004d4d4d, 0xc3c3000000c3c3c3, 0x7272000000727272, - 0x9595000000959595, 0xabab000000ababab, 0x8e8e0000008e8e8e, - 0xbaba000000bababa, 0x7a7a0000007a7a7a, 0xb3b3000000b3b3b3, - 0x0202000000020202, 0xb4b4000000b4b4b4, 0xadad000000adadad, - 0xa2a2000000a2a2a2, 0xacac000000acacac, 0xd8d8000000d8d8d8, - 0x9a9a0000009a9a9a, 0x1717000000171717, 0x1a1a0000001a1a1a, - 0x3535000000353535, 0xcccc000000cccccc, 0xf7f7000000f7f7f7, - 0x9999000000999999, 0x6161000000616161, 0x5a5a0000005a5a5a, - 0xe8e8000000e8e8e8, 0x2424000000242424, 0x5656000000565656, - 0x4040000000404040, 0xe1e1000000e1e1e1, 0x6363000000636363, - 0x0909000000090909, 0x3333000000333333, 0xbfbf000000bfbfbf, - 0x9898000000989898, 0x9797000000979797, 0x8585000000858585, - 0x6868000000686868, 0xfcfc000000fcfcfc, 0xecec000000ececec, - 0x0a0a0000000a0a0a, 0xdada000000dadada, 0x6f6f0000006f6f6f, - 0x5353000000535353, 0x6262000000626262, 0xa3a3000000a3a3a3, - 0x2e2e0000002e2e2e, 0x0808000000080808, 0xafaf000000afafaf, - 0x2828000000282828, 0xb0b0000000b0b0b0, 0x7474000000747474, - 0xc2c2000000c2c2c2, 0xbdbd000000bdbdbd, 0x3636000000363636, - 0x2222000000222222, 0x3838000000383838, 0x6464000000646464, - 0x1e1e0000001e1e1e, 0x3939000000393939, 0x2c2c0000002c2c2c, - 0xa6a6000000a6a6a6, 0x3030000000303030, 0xe5e5000000e5e5e5, - 0x4444000000444444, 0xfdfd000000fdfdfd, 0x8888000000888888, - 0x9f9f0000009f9f9f, 0x6565000000656565, 0x8787000000878787, - 0x6b6b0000006b6b6b, 0xf4f4000000f4f4f4, 0x2323000000232323, - 0x4848000000484848, 0x1010000000101010, 0xd1d1000000d1d1d1, - 0x5151000000515151, 0xc0c0000000c0c0c0, 0xf9f9000000f9f9f9, - 0xd2d2000000d2d2d2, 0xa0a0000000a0a0a0, 0x5555000000555555, - 0xa1a1000000a1a1a1, 0x4141000000414141, 0xfafa000000fafafa, - 0x4343000000434343, 0x1313000000131313, 0xc4c4000000c4c4c4, - 0x2f2f0000002f2f2f, 0xa8a8000000a8a8a8, 0xb6b6000000b6b6b6, - 0x3c3c0000003c3c3c, 0x2b2b0000002b2b2b, 0xc1c1000000c1c1c1, - 0xffff000000ffffff, 0xc8c8000000c8c8c8, 0xa5a5000000a5a5a5, - 0x2020000000202020, 0x8989000000898989, 0x0000000000000000, - 0x9090000000909090, 0x4747000000474747, 0xefef000000efefef, - 0xeaea000000eaeaea, 0xb7b7000000b7b7b7, 0x1515000000151515, - 0x0606000000060606, 0xcdcd000000cdcdcd, 0xb5b5000000b5b5b5, - 0x1212000000121212, 0x7e7e0000007e7e7e, 0xbbbb000000bbbbbb, - 0x2929000000292929, 0x0f0f0000000f0f0f, 0xb8b8000000b8b8b8, - 0x0707000000070707, 0x0404000000040404, 0x9b9b0000009b9b9b, - 0x9494000000949494, 0x2121000000212121, 0x6666000000666666, - 0xe6e6000000e6e6e6, 0xcece000000cecece, 0xeded000000ededed, - 0xe7e7000000e7e7e7, 0x3b3b0000003b3b3b, 0xfefe000000fefefe, - 0x7f7f0000007f7f7f, 0xc5c5000000c5c5c5, 0xa4a4000000a4a4a4, - 0x3737000000373737, 0xb1b1000000b1b1b1, 0x4c4c0000004c4c4c, - 0x9191000000919191, 0x6e6e0000006e6e6e, 0x8d8d0000008d8d8d, - 0x7676000000767676, 0x0303000000030303, 0x2d2d0000002d2d2d, - 0xdede000000dedede, 0x9696000000969696, 0x2626000000262626, - 0x7d7d0000007d7d7d, 0xc6c6000000c6c6c6, 0x5c5c0000005c5c5c, - 0xd3d3000000d3d3d3, 0xf2f2000000f2f2f2, 0x4f4f0000004f4f4f, - 0x1919000000191919, 0x3f3f0000003f3f3f, 0xdcdc000000dcdcdc, - 0x7979000000797979, 0x1d1d0000001d1d1d, 0x5252000000525252, - 0xebeb000000ebebeb, 0xf3f3000000f3f3f3, 0x6d6d0000006d6d6d, - 0x5e5e0000005e5e5e, 0xfbfb000000fbfbfb, 0x6969000000696969, - 0xb2b2000000b2b2b2, 0xf0f0000000f0f0f0, 0x3131000000313131, - 0x0c0c0000000c0c0c, 0xd4d4000000d4d4d4, 0xcfcf000000cfcfcf, - 0x8c8c0000008c8c8c, 0xe2e2000000e2e2e2, 0x7575000000757575, - 0xa9a9000000a9a9a9, 0x4a4a0000004a4a4a, 0x5757000000575757, - 0x8484000000848484, 0x1111000000111111, 0x4545000000454545, - 0x1b1b0000001b1b1b, 0xf5f5000000f5f5f5, 0xe4e4000000e4e4e4, - 0x0e0e0000000e0e0e, 0x7373000000737373, 0xaaaa000000aaaaaa, - 0xf1f1000000f1f1f1, 0xdddd000000dddddd, 0x5959000000595959, - 0x1414000000141414, 0x6c6c0000006c6c6c, 0x9292000000929292, - 0x5454000000545454, 0xd0d0000000d0d0d0, 0x7878000000787878, - 0x7070000000707070, 0xe3e3000000e3e3e3, 0x4949000000494949, - 0x8080000000808080, 0x5050000000505050, 0xa7a7000000a7a7a7, - 0xf6f6000000f6f6f6, 0x7777000000777777, 0x9393000000939393, - 0x8686000000868686, 0x8383000000838383, 0x2a2a0000002a2a2a, - 0xc7c7000000c7c7c7, 0x5b5b0000005b5b5b, 0xe9e9000000e9e9e9, - 0xeeee000000eeeeee, 0x8f8f0000008f8f8f, 0x0101000000010101, - 0x3d3d0000003d3d3d, -}; - -const u64 camellia_sp03303033[256] = { - 0x0038380038003838, 0x0041410041004141, 0x0016160016001616, - 0x0076760076007676, 0x00d9d900d900d9d9, 0x0093930093009393, - 0x0060600060006060, 0x00f2f200f200f2f2, 0x0072720072007272, - 0x00c2c200c200c2c2, 0x00abab00ab00abab, 0x009a9a009a009a9a, - 0x0075750075007575, 0x0006060006000606, 0x0057570057005757, - 0x00a0a000a000a0a0, 0x0091910091009191, 0x00f7f700f700f7f7, - 0x00b5b500b500b5b5, 0x00c9c900c900c9c9, 0x00a2a200a200a2a2, - 0x008c8c008c008c8c, 0x00d2d200d200d2d2, 0x0090900090009090, - 0x00f6f600f600f6f6, 0x0007070007000707, 0x00a7a700a700a7a7, - 0x0027270027002727, 0x008e8e008e008e8e, 0x00b2b200b200b2b2, - 0x0049490049004949, 0x00dede00de00dede, 0x0043430043004343, - 0x005c5c005c005c5c, 0x00d7d700d700d7d7, 0x00c7c700c700c7c7, - 0x003e3e003e003e3e, 0x00f5f500f500f5f5, 0x008f8f008f008f8f, - 0x0067670067006767, 0x001f1f001f001f1f, 0x0018180018001818, - 0x006e6e006e006e6e, 0x00afaf00af00afaf, 0x002f2f002f002f2f, - 0x00e2e200e200e2e2, 0x0085850085008585, 0x000d0d000d000d0d, - 0x0053530053005353, 0x00f0f000f000f0f0, 0x009c9c009c009c9c, - 0x0065650065006565, 0x00eaea00ea00eaea, 0x00a3a300a300a3a3, - 0x00aeae00ae00aeae, 0x009e9e009e009e9e, 0x00ecec00ec00ecec, - 0x0080800080008080, 0x002d2d002d002d2d, 0x006b6b006b006b6b, - 0x00a8a800a800a8a8, 0x002b2b002b002b2b, 0x0036360036003636, - 0x00a6a600a600a6a6, 0x00c5c500c500c5c5, 0x0086860086008686, - 0x004d4d004d004d4d, 0x0033330033003333, 0x00fdfd00fd00fdfd, - 0x0066660066006666, 0x0058580058005858, 0x0096960096009696, - 0x003a3a003a003a3a, 0x0009090009000909, 0x0095950095009595, - 0x0010100010001010, 0x0078780078007878, 0x00d8d800d800d8d8, - 0x0042420042004242, 0x00cccc00cc00cccc, 0x00efef00ef00efef, - 0x0026260026002626, 0x00e5e500e500e5e5, 0x0061610061006161, - 0x001a1a001a001a1a, 0x003f3f003f003f3f, 0x003b3b003b003b3b, - 0x0082820082008282, 0x00b6b600b600b6b6, 0x00dbdb00db00dbdb, - 0x00d4d400d400d4d4, 0x0098980098009898, 0x00e8e800e800e8e8, - 0x008b8b008b008b8b, 0x0002020002000202, 0x00ebeb00eb00ebeb, - 0x000a0a000a000a0a, 0x002c2c002c002c2c, 0x001d1d001d001d1d, - 0x00b0b000b000b0b0, 0x006f6f006f006f6f, 0x008d8d008d008d8d, - 0x0088880088008888, 0x000e0e000e000e0e, 0x0019190019001919, - 0x0087870087008787, 0x004e4e004e004e4e, 0x000b0b000b000b0b, - 0x00a9a900a900a9a9, 0x000c0c000c000c0c, 0x0079790079007979, - 0x0011110011001111, 0x007f7f007f007f7f, 0x0022220022002222, - 0x00e7e700e700e7e7, 0x0059590059005959, 0x00e1e100e100e1e1, - 0x00dada00da00dada, 0x003d3d003d003d3d, 0x00c8c800c800c8c8, - 0x0012120012001212, 0x0004040004000404, 0x0074740074007474, - 0x0054540054005454, 0x0030300030003030, 0x007e7e007e007e7e, - 0x00b4b400b400b4b4, 0x0028280028002828, 0x0055550055005555, - 0x0068680068006868, 0x0050500050005050, 0x00bebe00be00bebe, - 0x00d0d000d000d0d0, 0x00c4c400c400c4c4, 0x0031310031003131, - 0x00cbcb00cb00cbcb, 0x002a2a002a002a2a, 0x00adad00ad00adad, - 0x000f0f000f000f0f, 0x00caca00ca00caca, 0x0070700070007070, - 0x00ffff00ff00ffff, 0x0032320032003232, 0x0069690069006969, - 0x0008080008000808, 0x0062620062006262, 0x0000000000000000, - 0x0024240024002424, 0x00d1d100d100d1d1, 0x00fbfb00fb00fbfb, - 0x00baba00ba00baba, 0x00eded00ed00eded, 0x0045450045004545, - 0x0081810081008181, 0x0073730073007373, 0x006d6d006d006d6d, - 0x0084840084008484, 0x009f9f009f009f9f, 0x00eeee00ee00eeee, - 0x004a4a004a004a4a, 0x00c3c300c300c3c3, 0x002e2e002e002e2e, - 0x00c1c100c100c1c1, 0x0001010001000101, 0x00e6e600e600e6e6, - 0x0025250025002525, 0x0048480048004848, 0x0099990099009999, - 0x00b9b900b900b9b9, 0x00b3b300b300b3b3, 0x007b7b007b007b7b, - 0x00f9f900f900f9f9, 0x00cece00ce00cece, 0x00bfbf00bf00bfbf, - 0x00dfdf00df00dfdf, 0x0071710071007171, 0x0029290029002929, - 0x00cdcd00cd00cdcd, 0x006c6c006c006c6c, 0x0013130013001313, - 0x0064640064006464, 0x009b9b009b009b9b, 0x0063630063006363, - 0x009d9d009d009d9d, 0x00c0c000c000c0c0, 0x004b4b004b004b4b, - 0x00b7b700b700b7b7, 0x00a5a500a500a5a5, 0x0089890089008989, - 0x005f5f005f005f5f, 0x00b1b100b100b1b1, 0x0017170017001717, - 0x00f4f400f400f4f4, 0x00bcbc00bc00bcbc, 0x00d3d300d300d3d3, - 0x0046460046004646, 0x00cfcf00cf00cfcf, 0x0037370037003737, - 0x005e5e005e005e5e, 0x0047470047004747, 0x0094940094009494, - 0x00fafa00fa00fafa, 0x00fcfc00fc00fcfc, 0x005b5b005b005b5b, - 0x0097970097009797, 0x00fefe00fe00fefe, 0x005a5a005a005a5a, - 0x00acac00ac00acac, 0x003c3c003c003c3c, 0x004c4c004c004c4c, - 0x0003030003000303, 0x0035350035003535, 0x00f3f300f300f3f3, - 0x0023230023002323, 0x00b8b800b800b8b8, 0x005d5d005d005d5d, - 0x006a6a006a006a6a, 0x0092920092009292, 0x00d5d500d500d5d5, - 0x0021210021002121, 0x0044440044004444, 0x0051510051005151, - 0x00c6c600c600c6c6, 0x007d7d007d007d7d, 0x0039390039003939, - 0x0083830083008383, 0x00dcdc00dc00dcdc, 0x00aaaa00aa00aaaa, - 0x007c7c007c007c7c, 0x0077770077007777, 0x0056560056005656, - 0x0005050005000505, 0x001b1b001b001b1b, 0x00a4a400a400a4a4, - 0x0015150015001515, 0x0034340034003434, 0x001e1e001e001e1e, - 0x001c1c001c001c1c, 0x00f8f800f800f8f8, 0x0052520052005252, - 0x0020200020002020, 0x0014140014001414, 0x00e9e900e900e9e9, - 0x00bdbd00bd00bdbd, 0x00dddd00dd00dddd, 0x00e4e400e400e4e4, - 0x00a1a100a100a1a1, 0x00e0e000e000e0e0, 0x008a8a008a008a8a, - 0x00f1f100f100f1f1, 0x00d6d600d600d6d6, 0x007a7a007a007a7a, - 0x00bbbb00bb00bbbb, 0x00e3e300e300e3e3, 0x0040400040004040, - 0x004f4f004f004f4f, -}; - -const u64 camellia_sp00444404[256] = { - 0x0000707070700070, 0x00002c2c2c2c002c, 0x0000b3b3b3b300b3, - 0x0000c0c0c0c000c0, 0x0000e4e4e4e400e4, 0x0000575757570057, - 0x0000eaeaeaea00ea, 0x0000aeaeaeae00ae, 0x0000232323230023, - 0x00006b6b6b6b006b, 0x0000454545450045, 0x0000a5a5a5a500a5, - 0x0000edededed00ed, 0x00004f4f4f4f004f, 0x00001d1d1d1d001d, - 0x0000929292920092, 0x0000868686860086, 0x0000afafafaf00af, - 0x00007c7c7c7c007c, 0x00001f1f1f1f001f, 0x00003e3e3e3e003e, - 0x0000dcdcdcdc00dc, 0x00005e5e5e5e005e, 0x00000b0b0b0b000b, - 0x0000a6a6a6a600a6, 0x0000393939390039, 0x0000d5d5d5d500d5, - 0x00005d5d5d5d005d, 0x0000d9d9d9d900d9, 0x00005a5a5a5a005a, - 0x0000515151510051, 0x00006c6c6c6c006c, 0x00008b8b8b8b008b, - 0x00009a9a9a9a009a, 0x0000fbfbfbfb00fb, 0x0000b0b0b0b000b0, - 0x0000747474740074, 0x00002b2b2b2b002b, 0x0000f0f0f0f000f0, - 0x0000848484840084, 0x0000dfdfdfdf00df, 0x0000cbcbcbcb00cb, - 0x0000343434340034, 0x0000767676760076, 0x00006d6d6d6d006d, - 0x0000a9a9a9a900a9, 0x0000d1d1d1d100d1, 0x0000040404040004, - 0x0000141414140014, 0x00003a3a3a3a003a, 0x0000dededede00de, - 0x0000111111110011, 0x0000323232320032, 0x00009c9c9c9c009c, - 0x0000535353530053, 0x0000f2f2f2f200f2, 0x0000fefefefe00fe, - 0x0000cfcfcfcf00cf, 0x0000c3c3c3c300c3, 0x00007a7a7a7a007a, - 0x0000242424240024, 0x0000e8e8e8e800e8, 0x0000606060600060, - 0x0000696969690069, 0x0000aaaaaaaa00aa, 0x0000a0a0a0a000a0, - 0x0000a1a1a1a100a1, 0x0000626262620062, 0x0000545454540054, - 0x00001e1e1e1e001e, 0x0000e0e0e0e000e0, 0x0000646464640064, - 0x0000101010100010, 0x0000000000000000, 0x0000a3a3a3a300a3, - 0x0000757575750075, 0x00008a8a8a8a008a, 0x0000e6e6e6e600e6, - 0x0000090909090009, 0x0000dddddddd00dd, 0x0000878787870087, - 0x0000838383830083, 0x0000cdcdcdcd00cd, 0x0000909090900090, - 0x0000737373730073, 0x0000f6f6f6f600f6, 0x00009d9d9d9d009d, - 0x0000bfbfbfbf00bf, 0x0000525252520052, 0x0000d8d8d8d800d8, - 0x0000c8c8c8c800c8, 0x0000c6c6c6c600c6, 0x0000818181810081, - 0x00006f6f6f6f006f, 0x0000131313130013, 0x0000636363630063, - 0x0000e9e9e9e900e9, 0x0000a7a7a7a700a7, 0x00009f9f9f9f009f, - 0x0000bcbcbcbc00bc, 0x0000292929290029, 0x0000f9f9f9f900f9, - 0x00002f2f2f2f002f, 0x0000b4b4b4b400b4, 0x0000787878780078, - 0x0000060606060006, 0x0000e7e7e7e700e7, 0x0000717171710071, - 0x0000d4d4d4d400d4, 0x0000abababab00ab, 0x0000888888880088, - 0x00008d8d8d8d008d, 0x0000727272720072, 0x0000b9b9b9b900b9, - 0x0000f8f8f8f800f8, 0x0000acacacac00ac, 0x0000363636360036, - 0x00002a2a2a2a002a, 0x00003c3c3c3c003c, 0x0000f1f1f1f100f1, - 0x0000404040400040, 0x0000d3d3d3d300d3, 0x0000bbbbbbbb00bb, - 0x0000434343430043, 0x0000151515150015, 0x0000adadadad00ad, - 0x0000777777770077, 0x0000808080800080, 0x0000828282820082, - 0x0000ecececec00ec, 0x0000272727270027, 0x0000e5e5e5e500e5, - 0x0000858585850085, 0x0000353535350035, 0x00000c0c0c0c000c, - 0x0000414141410041, 0x0000efefefef00ef, 0x0000939393930093, - 0x0000191919190019, 0x0000212121210021, 0x00000e0e0e0e000e, - 0x00004e4e4e4e004e, 0x0000656565650065, 0x0000bdbdbdbd00bd, - 0x0000b8b8b8b800b8, 0x00008f8f8f8f008f, 0x0000ebebebeb00eb, - 0x0000cececece00ce, 0x0000303030300030, 0x00005f5f5f5f005f, - 0x0000c5c5c5c500c5, 0x00001a1a1a1a001a, 0x0000e1e1e1e100e1, - 0x0000cacacaca00ca, 0x0000474747470047, 0x00003d3d3d3d003d, - 0x0000010101010001, 0x0000d6d6d6d600d6, 0x0000565656560056, - 0x00004d4d4d4d004d, 0x00000d0d0d0d000d, 0x0000666666660066, - 0x0000cccccccc00cc, 0x00002d2d2d2d002d, 0x0000121212120012, - 0x0000202020200020, 0x0000b1b1b1b100b1, 0x0000999999990099, - 0x00004c4c4c4c004c, 0x0000c2c2c2c200c2, 0x00007e7e7e7e007e, - 0x0000050505050005, 0x0000b7b7b7b700b7, 0x0000313131310031, - 0x0000171717170017, 0x0000d7d7d7d700d7, 0x0000585858580058, - 0x0000616161610061, 0x00001b1b1b1b001b, 0x00001c1c1c1c001c, - 0x00000f0f0f0f000f, 0x0000161616160016, 0x0000181818180018, - 0x0000222222220022, 0x0000444444440044, 0x0000b2b2b2b200b2, - 0x0000b5b5b5b500b5, 0x0000919191910091, 0x0000080808080008, - 0x0000a8a8a8a800a8, 0x0000fcfcfcfc00fc, 0x0000505050500050, - 0x0000d0d0d0d000d0, 0x00007d7d7d7d007d, 0x0000898989890089, - 0x0000979797970097, 0x00005b5b5b5b005b, 0x0000959595950095, - 0x0000ffffffff00ff, 0x0000d2d2d2d200d2, 0x0000c4c4c4c400c4, - 0x0000484848480048, 0x0000f7f7f7f700f7, 0x0000dbdbdbdb00db, - 0x0000030303030003, 0x0000dadadada00da, 0x00003f3f3f3f003f, - 0x0000949494940094, 0x00005c5c5c5c005c, 0x0000020202020002, - 0x00004a4a4a4a004a, 0x0000333333330033, 0x0000676767670067, - 0x0000f3f3f3f300f3, 0x00007f7f7f7f007f, 0x0000e2e2e2e200e2, - 0x00009b9b9b9b009b, 0x0000262626260026, 0x0000373737370037, - 0x00003b3b3b3b003b, 0x0000969696960096, 0x00004b4b4b4b004b, - 0x0000bebebebe00be, 0x00002e2e2e2e002e, 0x0000797979790079, - 0x00008c8c8c8c008c, 0x00006e6e6e6e006e, 0x00008e8e8e8e008e, - 0x0000f5f5f5f500f5, 0x0000b6b6b6b600b6, 0x0000fdfdfdfd00fd, - 0x0000595959590059, 0x0000989898980098, 0x00006a6a6a6a006a, - 0x0000464646460046, 0x0000babababa00ba, 0x0000252525250025, - 0x0000424242420042, 0x0000a2a2a2a200a2, 0x0000fafafafa00fa, - 0x0000070707070007, 0x0000555555550055, 0x0000eeeeeeee00ee, - 0x00000a0a0a0a000a, 0x0000494949490049, 0x0000686868680068, - 0x0000383838380038, 0x0000a4a4a4a400a4, 0x0000282828280028, - 0x00007b7b7b7b007b, 0x0000c9c9c9c900c9, 0x0000c1c1c1c100c1, - 0x0000e3e3e3e300e3, 0x0000f4f4f4f400f4, 0x0000c7c7c7c700c7, - 0x00009e9e9e9e009e, -}; - -const u64 camellia_sp02220222[256] = { - 0x00e0e0e000e0e0e0, 0x0005050500050505, 0x0058585800585858, - 0x00d9d9d900d9d9d9, 0x0067676700676767, 0x004e4e4e004e4e4e, - 0x0081818100818181, 0x00cbcbcb00cbcbcb, 0x00c9c9c900c9c9c9, - 0x000b0b0b000b0b0b, 0x00aeaeae00aeaeae, 0x006a6a6a006a6a6a, - 0x00d5d5d500d5d5d5, 0x0018181800181818, 0x005d5d5d005d5d5d, - 0x0082828200828282, 0x0046464600464646, 0x00dfdfdf00dfdfdf, - 0x00d6d6d600d6d6d6, 0x0027272700272727, 0x008a8a8a008a8a8a, - 0x0032323200323232, 0x004b4b4b004b4b4b, 0x0042424200424242, - 0x00dbdbdb00dbdbdb, 0x001c1c1c001c1c1c, 0x009e9e9e009e9e9e, - 0x009c9c9c009c9c9c, 0x003a3a3a003a3a3a, 0x00cacaca00cacaca, - 0x0025252500252525, 0x007b7b7b007b7b7b, 0x000d0d0d000d0d0d, - 0x0071717100717171, 0x005f5f5f005f5f5f, 0x001f1f1f001f1f1f, - 0x00f8f8f800f8f8f8, 0x00d7d7d700d7d7d7, 0x003e3e3e003e3e3e, - 0x009d9d9d009d9d9d, 0x007c7c7c007c7c7c, 0x0060606000606060, - 0x00b9b9b900b9b9b9, 0x00bebebe00bebebe, 0x00bcbcbc00bcbcbc, - 0x008b8b8b008b8b8b, 0x0016161600161616, 0x0034343400343434, - 0x004d4d4d004d4d4d, 0x00c3c3c300c3c3c3, 0x0072727200727272, - 0x0095959500959595, 0x00ababab00ababab, 0x008e8e8e008e8e8e, - 0x00bababa00bababa, 0x007a7a7a007a7a7a, 0x00b3b3b300b3b3b3, - 0x0002020200020202, 0x00b4b4b400b4b4b4, 0x00adadad00adadad, - 0x00a2a2a200a2a2a2, 0x00acacac00acacac, 0x00d8d8d800d8d8d8, - 0x009a9a9a009a9a9a, 0x0017171700171717, 0x001a1a1a001a1a1a, - 0x0035353500353535, 0x00cccccc00cccccc, 0x00f7f7f700f7f7f7, - 0x0099999900999999, 0x0061616100616161, 0x005a5a5a005a5a5a, - 0x00e8e8e800e8e8e8, 0x0024242400242424, 0x0056565600565656, - 0x0040404000404040, 0x00e1e1e100e1e1e1, 0x0063636300636363, - 0x0009090900090909, 0x0033333300333333, 0x00bfbfbf00bfbfbf, - 0x0098989800989898, 0x0097979700979797, 0x0085858500858585, - 0x0068686800686868, 0x00fcfcfc00fcfcfc, 0x00ececec00ececec, - 0x000a0a0a000a0a0a, 0x00dadada00dadada, 0x006f6f6f006f6f6f, - 0x0053535300535353, 0x0062626200626262, 0x00a3a3a300a3a3a3, - 0x002e2e2e002e2e2e, 0x0008080800080808, 0x00afafaf00afafaf, - 0x0028282800282828, 0x00b0b0b000b0b0b0, 0x0074747400747474, - 0x00c2c2c200c2c2c2, 0x00bdbdbd00bdbdbd, 0x0036363600363636, - 0x0022222200222222, 0x0038383800383838, 0x0064646400646464, - 0x001e1e1e001e1e1e, 0x0039393900393939, 0x002c2c2c002c2c2c, - 0x00a6a6a600a6a6a6, 0x0030303000303030, 0x00e5e5e500e5e5e5, - 0x0044444400444444, 0x00fdfdfd00fdfdfd, 0x0088888800888888, - 0x009f9f9f009f9f9f, 0x0065656500656565, 0x0087878700878787, - 0x006b6b6b006b6b6b, 0x00f4f4f400f4f4f4, 0x0023232300232323, - 0x0048484800484848, 0x0010101000101010, 0x00d1d1d100d1d1d1, - 0x0051515100515151, 0x00c0c0c000c0c0c0, 0x00f9f9f900f9f9f9, - 0x00d2d2d200d2d2d2, 0x00a0a0a000a0a0a0, 0x0055555500555555, - 0x00a1a1a100a1a1a1, 0x0041414100414141, 0x00fafafa00fafafa, - 0x0043434300434343, 0x0013131300131313, 0x00c4c4c400c4c4c4, - 0x002f2f2f002f2f2f, 0x00a8a8a800a8a8a8, 0x00b6b6b600b6b6b6, - 0x003c3c3c003c3c3c, 0x002b2b2b002b2b2b, 0x00c1c1c100c1c1c1, - 0x00ffffff00ffffff, 0x00c8c8c800c8c8c8, 0x00a5a5a500a5a5a5, - 0x0020202000202020, 0x0089898900898989, 0x0000000000000000, - 0x0090909000909090, 0x0047474700474747, 0x00efefef00efefef, - 0x00eaeaea00eaeaea, 0x00b7b7b700b7b7b7, 0x0015151500151515, - 0x0006060600060606, 0x00cdcdcd00cdcdcd, 0x00b5b5b500b5b5b5, - 0x0012121200121212, 0x007e7e7e007e7e7e, 0x00bbbbbb00bbbbbb, - 0x0029292900292929, 0x000f0f0f000f0f0f, 0x00b8b8b800b8b8b8, - 0x0007070700070707, 0x0004040400040404, 0x009b9b9b009b9b9b, - 0x0094949400949494, 0x0021212100212121, 0x0066666600666666, - 0x00e6e6e600e6e6e6, 0x00cecece00cecece, 0x00ededed00ededed, - 0x00e7e7e700e7e7e7, 0x003b3b3b003b3b3b, 0x00fefefe00fefefe, - 0x007f7f7f007f7f7f, 0x00c5c5c500c5c5c5, 0x00a4a4a400a4a4a4, - 0x0037373700373737, 0x00b1b1b100b1b1b1, 0x004c4c4c004c4c4c, - 0x0091919100919191, 0x006e6e6e006e6e6e, 0x008d8d8d008d8d8d, - 0x0076767600767676, 0x0003030300030303, 0x002d2d2d002d2d2d, - 0x00dedede00dedede, 0x0096969600969696, 0x0026262600262626, - 0x007d7d7d007d7d7d, 0x00c6c6c600c6c6c6, 0x005c5c5c005c5c5c, - 0x00d3d3d300d3d3d3, 0x00f2f2f200f2f2f2, 0x004f4f4f004f4f4f, - 0x0019191900191919, 0x003f3f3f003f3f3f, 0x00dcdcdc00dcdcdc, - 0x0079797900797979, 0x001d1d1d001d1d1d, 0x0052525200525252, - 0x00ebebeb00ebebeb, 0x00f3f3f300f3f3f3, 0x006d6d6d006d6d6d, - 0x005e5e5e005e5e5e, 0x00fbfbfb00fbfbfb, 0x0069696900696969, - 0x00b2b2b200b2b2b2, 0x00f0f0f000f0f0f0, 0x0031313100313131, - 0x000c0c0c000c0c0c, 0x00d4d4d400d4d4d4, 0x00cfcfcf00cfcfcf, - 0x008c8c8c008c8c8c, 0x00e2e2e200e2e2e2, 0x0075757500757575, - 0x00a9a9a900a9a9a9, 0x004a4a4a004a4a4a, 0x0057575700575757, - 0x0084848400848484, 0x0011111100111111, 0x0045454500454545, - 0x001b1b1b001b1b1b, 0x00f5f5f500f5f5f5, 0x00e4e4e400e4e4e4, - 0x000e0e0e000e0e0e, 0x0073737300737373, 0x00aaaaaa00aaaaaa, - 0x00f1f1f100f1f1f1, 0x00dddddd00dddddd, 0x0059595900595959, - 0x0014141400141414, 0x006c6c6c006c6c6c, 0x0092929200929292, - 0x0054545400545454, 0x00d0d0d000d0d0d0, 0x0078787800787878, - 0x0070707000707070, 0x00e3e3e300e3e3e3, 0x0049494900494949, - 0x0080808000808080, 0x0050505000505050, 0x00a7a7a700a7a7a7, - 0x00f6f6f600f6f6f6, 0x0077777700777777, 0x0093939300939393, - 0x0086868600868686, 0x0083838300838383, 0x002a2a2a002a2a2a, - 0x00c7c7c700c7c7c7, 0x005b5b5b005b5b5b, 0x00e9e9e900e9e9e9, - 0x00eeeeee00eeeeee, 0x008f8f8f008f8f8f, 0x0001010100010101, - 0x003d3d3d003d3d3d, -}; - -const u64 camellia_sp30333033[256] = { - 0x3800383838003838, 0x4100414141004141, 0x1600161616001616, - 0x7600767676007676, 0xd900d9d9d900d9d9, 0x9300939393009393, - 0x6000606060006060, 0xf200f2f2f200f2f2, 0x7200727272007272, - 0xc200c2c2c200c2c2, 0xab00ababab00abab, 0x9a009a9a9a009a9a, - 0x7500757575007575, 0x0600060606000606, 0x5700575757005757, - 0xa000a0a0a000a0a0, 0x9100919191009191, 0xf700f7f7f700f7f7, - 0xb500b5b5b500b5b5, 0xc900c9c9c900c9c9, 0xa200a2a2a200a2a2, - 0x8c008c8c8c008c8c, 0xd200d2d2d200d2d2, 0x9000909090009090, - 0xf600f6f6f600f6f6, 0x0700070707000707, 0xa700a7a7a700a7a7, - 0x2700272727002727, 0x8e008e8e8e008e8e, 0xb200b2b2b200b2b2, - 0x4900494949004949, 0xde00dedede00dede, 0x4300434343004343, - 0x5c005c5c5c005c5c, 0xd700d7d7d700d7d7, 0xc700c7c7c700c7c7, - 0x3e003e3e3e003e3e, 0xf500f5f5f500f5f5, 0x8f008f8f8f008f8f, - 0x6700676767006767, 0x1f001f1f1f001f1f, 0x1800181818001818, - 0x6e006e6e6e006e6e, 0xaf00afafaf00afaf, 0x2f002f2f2f002f2f, - 0xe200e2e2e200e2e2, 0x8500858585008585, 0x0d000d0d0d000d0d, - 0x5300535353005353, 0xf000f0f0f000f0f0, 0x9c009c9c9c009c9c, - 0x6500656565006565, 0xea00eaeaea00eaea, 0xa300a3a3a300a3a3, - 0xae00aeaeae00aeae, 0x9e009e9e9e009e9e, 0xec00ececec00ecec, - 0x8000808080008080, 0x2d002d2d2d002d2d, 0x6b006b6b6b006b6b, - 0xa800a8a8a800a8a8, 0x2b002b2b2b002b2b, 0x3600363636003636, - 0xa600a6a6a600a6a6, 0xc500c5c5c500c5c5, 0x8600868686008686, - 0x4d004d4d4d004d4d, 0x3300333333003333, 0xfd00fdfdfd00fdfd, - 0x6600666666006666, 0x5800585858005858, 0x9600969696009696, - 0x3a003a3a3a003a3a, 0x0900090909000909, 0x9500959595009595, - 0x1000101010001010, 0x7800787878007878, 0xd800d8d8d800d8d8, - 0x4200424242004242, 0xcc00cccccc00cccc, 0xef00efefef00efef, - 0x2600262626002626, 0xe500e5e5e500e5e5, 0x6100616161006161, - 0x1a001a1a1a001a1a, 0x3f003f3f3f003f3f, 0x3b003b3b3b003b3b, - 0x8200828282008282, 0xb600b6b6b600b6b6, 0xdb00dbdbdb00dbdb, - 0xd400d4d4d400d4d4, 0x9800989898009898, 0xe800e8e8e800e8e8, - 0x8b008b8b8b008b8b, 0x0200020202000202, 0xeb00ebebeb00ebeb, - 0x0a000a0a0a000a0a, 0x2c002c2c2c002c2c, 0x1d001d1d1d001d1d, - 0xb000b0b0b000b0b0, 0x6f006f6f6f006f6f, 0x8d008d8d8d008d8d, - 0x8800888888008888, 0x0e000e0e0e000e0e, 0x1900191919001919, - 0x8700878787008787, 0x4e004e4e4e004e4e, 0x0b000b0b0b000b0b, - 0xa900a9a9a900a9a9, 0x0c000c0c0c000c0c, 0x7900797979007979, - 0x1100111111001111, 0x7f007f7f7f007f7f, 0x2200222222002222, - 0xe700e7e7e700e7e7, 0x5900595959005959, 0xe100e1e1e100e1e1, - 0xda00dadada00dada, 0x3d003d3d3d003d3d, 0xc800c8c8c800c8c8, - 0x1200121212001212, 0x0400040404000404, 0x7400747474007474, - 0x5400545454005454, 0x3000303030003030, 0x7e007e7e7e007e7e, - 0xb400b4b4b400b4b4, 0x2800282828002828, 0x5500555555005555, - 0x6800686868006868, 0x5000505050005050, 0xbe00bebebe00bebe, - 0xd000d0d0d000d0d0, 0xc400c4c4c400c4c4, 0x3100313131003131, - 0xcb00cbcbcb00cbcb, 0x2a002a2a2a002a2a, 0xad00adadad00adad, - 0x0f000f0f0f000f0f, 0xca00cacaca00caca, 0x7000707070007070, - 0xff00ffffff00ffff, 0x3200323232003232, 0x6900696969006969, - 0x0800080808000808, 0x6200626262006262, 0x0000000000000000, - 0x2400242424002424, 0xd100d1d1d100d1d1, 0xfb00fbfbfb00fbfb, - 0xba00bababa00baba, 0xed00ededed00eded, 0x4500454545004545, - 0x8100818181008181, 0x7300737373007373, 0x6d006d6d6d006d6d, - 0x8400848484008484, 0x9f009f9f9f009f9f, 0xee00eeeeee00eeee, - 0x4a004a4a4a004a4a, 0xc300c3c3c300c3c3, 0x2e002e2e2e002e2e, - 0xc100c1c1c100c1c1, 0x0100010101000101, 0xe600e6e6e600e6e6, - 0x2500252525002525, 0x4800484848004848, 0x9900999999009999, - 0xb900b9b9b900b9b9, 0xb300b3b3b300b3b3, 0x7b007b7b7b007b7b, - 0xf900f9f9f900f9f9, 0xce00cecece00cece, 0xbf00bfbfbf00bfbf, - 0xdf00dfdfdf00dfdf, 0x7100717171007171, 0x2900292929002929, - 0xcd00cdcdcd00cdcd, 0x6c006c6c6c006c6c, 0x1300131313001313, - 0x6400646464006464, 0x9b009b9b9b009b9b, 0x6300636363006363, - 0x9d009d9d9d009d9d, 0xc000c0c0c000c0c0, 0x4b004b4b4b004b4b, - 0xb700b7b7b700b7b7, 0xa500a5a5a500a5a5, 0x8900898989008989, - 0x5f005f5f5f005f5f, 0xb100b1b1b100b1b1, 0x1700171717001717, - 0xf400f4f4f400f4f4, 0xbc00bcbcbc00bcbc, 0xd300d3d3d300d3d3, - 0x4600464646004646, 0xcf00cfcfcf00cfcf, 0x3700373737003737, - 0x5e005e5e5e005e5e, 0x4700474747004747, 0x9400949494009494, - 0xfa00fafafa00fafa, 0xfc00fcfcfc00fcfc, 0x5b005b5b5b005b5b, - 0x9700979797009797, 0xfe00fefefe00fefe, 0x5a005a5a5a005a5a, - 0xac00acacac00acac, 0x3c003c3c3c003c3c, 0x4c004c4c4c004c4c, - 0x0300030303000303, 0x3500353535003535, 0xf300f3f3f300f3f3, - 0x2300232323002323, 0xb800b8b8b800b8b8, 0x5d005d5d5d005d5d, - 0x6a006a6a6a006a6a, 0x9200929292009292, 0xd500d5d5d500d5d5, - 0x2100212121002121, 0x4400444444004444, 0x5100515151005151, - 0xc600c6c6c600c6c6, 0x7d007d7d7d007d7d, 0x3900393939003939, - 0x8300838383008383, 0xdc00dcdcdc00dcdc, 0xaa00aaaaaa00aaaa, - 0x7c007c7c7c007c7c, 0x7700777777007777, 0x5600565656005656, - 0x0500050505000505, 0x1b001b1b1b001b1b, 0xa400a4a4a400a4a4, - 0x1500151515001515, 0x3400343434003434, 0x1e001e1e1e001e1e, - 0x1c001c1c1c001c1c, 0xf800f8f8f800f8f8, 0x5200525252005252, - 0x2000202020002020, 0x1400141414001414, 0xe900e9e9e900e9e9, - 0xbd00bdbdbd00bdbd, 0xdd00dddddd00dddd, 0xe400e4e4e400e4e4, - 0xa100a1a1a100a1a1, 0xe000e0e0e000e0e0, 0x8a008a8a8a008a8a, - 0xf100f1f1f100f1f1, 0xd600d6d6d600d6d6, 0x7a007a7a7a007a7a, - 0xbb00bbbbbb00bbbb, 0xe300e3e3e300e3e3, 0x4000404040004040, - 0x4f004f4f4f004f4f, -}; - -const u64 camellia_sp44044404[256] = { - 0x7070007070700070, 0x2c2c002c2c2c002c, 0xb3b300b3b3b300b3, - 0xc0c000c0c0c000c0, 0xe4e400e4e4e400e4, 0x5757005757570057, - 0xeaea00eaeaea00ea, 0xaeae00aeaeae00ae, 0x2323002323230023, - 0x6b6b006b6b6b006b, 0x4545004545450045, 0xa5a500a5a5a500a5, - 0xeded00ededed00ed, 0x4f4f004f4f4f004f, 0x1d1d001d1d1d001d, - 0x9292009292920092, 0x8686008686860086, 0xafaf00afafaf00af, - 0x7c7c007c7c7c007c, 0x1f1f001f1f1f001f, 0x3e3e003e3e3e003e, - 0xdcdc00dcdcdc00dc, 0x5e5e005e5e5e005e, 0x0b0b000b0b0b000b, - 0xa6a600a6a6a600a6, 0x3939003939390039, 0xd5d500d5d5d500d5, - 0x5d5d005d5d5d005d, 0xd9d900d9d9d900d9, 0x5a5a005a5a5a005a, - 0x5151005151510051, 0x6c6c006c6c6c006c, 0x8b8b008b8b8b008b, - 0x9a9a009a9a9a009a, 0xfbfb00fbfbfb00fb, 0xb0b000b0b0b000b0, - 0x7474007474740074, 0x2b2b002b2b2b002b, 0xf0f000f0f0f000f0, - 0x8484008484840084, 0xdfdf00dfdfdf00df, 0xcbcb00cbcbcb00cb, - 0x3434003434340034, 0x7676007676760076, 0x6d6d006d6d6d006d, - 0xa9a900a9a9a900a9, 0xd1d100d1d1d100d1, 0x0404000404040004, - 0x1414001414140014, 0x3a3a003a3a3a003a, 0xdede00dedede00de, - 0x1111001111110011, 0x3232003232320032, 0x9c9c009c9c9c009c, - 0x5353005353530053, 0xf2f200f2f2f200f2, 0xfefe00fefefe00fe, - 0xcfcf00cfcfcf00cf, 0xc3c300c3c3c300c3, 0x7a7a007a7a7a007a, - 0x2424002424240024, 0xe8e800e8e8e800e8, 0x6060006060600060, - 0x6969006969690069, 0xaaaa00aaaaaa00aa, 0xa0a000a0a0a000a0, - 0xa1a100a1a1a100a1, 0x6262006262620062, 0x5454005454540054, - 0x1e1e001e1e1e001e, 0xe0e000e0e0e000e0, 0x6464006464640064, - 0x1010001010100010, 0x0000000000000000, 0xa3a300a3a3a300a3, - 0x7575007575750075, 0x8a8a008a8a8a008a, 0xe6e600e6e6e600e6, - 0x0909000909090009, 0xdddd00dddddd00dd, 0x8787008787870087, - 0x8383008383830083, 0xcdcd00cdcdcd00cd, 0x9090009090900090, - 0x7373007373730073, 0xf6f600f6f6f600f6, 0x9d9d009d9d9d009d, - 0xbfbf00bfbfbf00bf, 0x5252005252520052, 0xd8d800d8d8d800d8, - 0xc8c800c8c8c800c8, 0xc6c600c6c6c600c6, 0x8181008181810081, - 0x6f6f006f6f6f006f, 0x1313001313130013, 0x6363006363630063, - 0xe9e900e9e9e900e9, 0xa7a700a7a7a700a7, 0x9f9f009f9f9f009f, - 0xbcbc00bcbcbc00bc, 0x2929002929290029, 0xf9f900f9f9f900f9, - 0x2f2f002f2f2f002f, 0xb4b400b4b4b400b4, 0x7878007878780078, - 0x0606000606060006, 0xe7e700e7e7e700e7, 0x7171007171710071, - 0xd4d400d4d4d400d4, 0xabab00ababab00ab, 0x8888008888880088, - 0x8d8d008d8d8d008d, 0x7272007272720072, 0xb9b900b9b9b900b9, - 0xf8f800f8f8f800f8, 0xacac00acacac00ac, 0x3636003636360036, - 0x2a2a002a2a2a002a, 0x3c3c003c3c3c003c, 0xf1f100f1f1f100f1, - 0x4040004040400040, 0xd3d300d3d3d300d3, 0xbbbb00bbbbbb00bb, - 0x4343004343430043, 0x1515001515150015, 0xadad00adadad00ad, - 0x7777007777770077, 0x8080008080800080, 0x8282008282820082, - 0xecec00ececec00ec, 0x2727002727270027, 0xe5e500e5e5e500e5, - 0x8585008585850085, 0x3535003535350035, 0x0c0c000c0c0c000c, - 0x4141004141410041, 0xefef00efefef00ef, 0x9393009393930093, - 0x1919001919190019, 0x2121002121210021, 0x0e0e000e0e0e000e, - 0x4e4e004e4e4e004e, 0x6565006565650065, 0xbdbd00bdbdbd00bd, - 0xb8b800b8b8b800b8, 0x8f8f008f8f8f008f, 0xebeb00ebebeb00eb, - 0xcece00cecece00ce, 0x3030003030300030, 0x5f5f005f5f5f005f, - 0xc5c500c5c5c500c5, 0x1a1a001a1a1a001a, 0xe1e100e1e1e100e1, - 0xcaca00cacaca00ca, 0x4747004747470047, 0x3d3d003d3d3d003d, - 0x0101000101010001, 0xd6d600d6d6d600d6, 0x5656005656560056, - 0x4d4d004d4d4d004d, 0x0d0d000d0d0d000d, 0x6666006666660066, - 0xcccc00cccccc00cc, 0x2d2d002d2d2d002d, 0x1212001212120012, - 0x2020002020200020, 0xb1b100b1b1b100b1, 0x9999009999990099, - 0x4c4c004c4c4c004c, 0xc2c200c2c2c200c2, 0x7e7e007e7e7e007e, - 0x0505000505050005, 0xb7b700b7b7b700b7, 0x3131003131310031, - 0x1717001717170017, 0xd7d700d7d7d700d7, 0x5858005858580058, - 0x6161006161610061, 0x1b1b001b1b1b001b, 0x1c1c001c1c1c001c, - 0x0f0f000f0f0f000f, 0x1616001616160016, 0x1818001818180018, - 0x2222002222220022, 0x4444004444440044, 0xb2b200b2b2b200b2, - 0xb5b500b5b5b500b5, 0x9191009191910091, 0x0808000808080008, - 0xa8a800a8a8a800a8, 0xfcfc00fcfcfc00fc, 0x5050005050500050, - 0xd0d000d0d0d000d0, 0x7d7d007d7d7d007d, 0x8989008989890089, - 0x9797009797970097, 0x5b5b005b5b5b005b, 0x9595009595950095, - 0xffff00ffffff00ff, 0xd2d200d2d2d200d2, 0xc4c400c4c4c400c4, - 0x4848004848480048, 0xf7f700f7f7f700f7, 0xdbdb00dbdbdb00db, - 0x0303000303030003, 0xdada00dadada00da, 0x3f3f003f3f3f003f, - 0x9494009494940094, 0x5c5c005c5c5c005c, 0x0202000202020002, - 0x4a4a004a4a4a004a, 0x3333003333330033, 0x6767006767670067, - 0xf3f300f3f3f300f3, 0x7f7f007f7f7f007f, 0xe2e200e2e2e200e2, - 0x9b9b009b9b9b009b, 0x2626002626260026, 0x3737003737370037, - 0x3b3b003b3b3b003b, 0x9696009696960096, 0x4b4b004b4b4b004b, - 0xbebe00bebebe00be, 0x2e2e002e2e2e002e, 0x7979007979790079, - 0x8c8c008c8c8c008c, 0x6e6e006e6e6e006e, 0x8e8e008e8e8e008e, - 0xf5f500f5f5f500f5, 0xb6b600b6b6b600b6, 0xfdfd00fdfdfd00fd, - 0x5959005959590059, 0x9898009898980098, 0x6a6a006a6a6a006a, - 0x4646004646460046, 0xbaba00bababa00ba, 0x2525002525250025, - 0x4242004242420042, 0xa2a200a2a2a200a2, 0xfafa00fafafa00fa, - 0x0707000707070007, 0x5555005555550055, 0xeeee00eeeeee00ee, - 0x0a0a000a0a0a000a, 0x4949004949490049, 0x6868006868680068, - 0x3838003838380038, 0xa4a400a4a4a400a4, 0x2828002828280028, - 0x7b7b007b7b7b007b, 0xc9c900c9c9c900c9, 0xc1c100c1c1c100c1, - 0xe3e300e3e3e300e3, 0xf4f400f4f4f400f4, 0xc7c700c7c7c700c7, - 0x9e9e009e9e9e009e, -}; - -const u64 camellia_sp11101110[256] = { - 0x7070700070707000, 0x8282820082828200, 0x2c2c2c002c2c2c00, - 0xececec00ececec00, 0xb3b3b300b3b3b300, 0x2727270027272700, - 0xc0c0c000c0c0c000, 0xe5e5e500e5e5e500, 0xe4e4e400e4e4e400, - 0x8585850085858500, 0x5757570057575700, 0x3535350035353500, - 0xeaeaea00eaeaea00, 0x0c0c0c000c0c0c00, 0xaeaeae00aeaeae00, - 0x4141410041414100, 0x2323230023232300, 0xefefef00efefef00, - 0x6b6b6b006b6b6b00, 0x9393930093939300, 0x4545450045454500, - 0x1919190019191900, 0xa5a5a500a5a5a500, 0x2121210021212100, - 0xededed00ededed00, 0x0e0e0e000e0e0e00, 0x4f4f4f004f4f4f00, - 0x4e4e4e004e4e4e00, 0x1d1d1d001d1d1d00, 0x6565650065656500, - 0x9292920092929200, 0xbdbdbd00bdbdbd00, 0x8686860086868600, - 0xb8b8b800b8b8b800, 0xafafaf00afafaf00, 0x8f8f8f008f8f8f00, - 0x7c7c7c007c7c7c00, 0xebebeb00ebebeb00, 0x1f1f1f001f1f1f00, - 0xcecece00cecece00, 0x3e3e3e003e3e3e00, 0x3030300030303000, - 0xdcdcdc00dcdcdc00, 0x5f5f5f005f5f5f00, 0x5e5e5e005e5e5e00, - 0xc5c5c500c5c5c500, 0x0b0b0b000b0b0b00, 0x1a1a1a001a1a1a00, - 0xa6a6a600a6a6a600, 0xe1e1e100e1e1e100, 0x3939390039393900, - 0xcacaca00cacaca00, 0xd5d5d500d5d5d500, 0x4747470047474700, - 0x5d5d5d005d5d5d00, 0x3d3d3d003d3d3d00, 0xd9d9d900d9d9d900, - 0x0101010001010100, 0x5a5a5a005a5a5a00, 0xd6d6d600d6d6d600, - 0x5151510051515100, 0x5656560056565600, 0x6c6c6c006c6c6c00, - 0x4d4d4d004d4d4d00, 0x8b8b8b008b8b8b00, 0x0d0d0d000d0d0d00, - 0x9a9a9a009a9a9a00, 0x6666660066666600, 0xfbfbfb00fbfbfb00, - 0xcccccc00cccccc00, 0xb0b0b000b0b0b000, 0x2d2d2d002d2d2d00, - 0x7474740074747400, 0x1212120012121200, 0x2b2b2b002b2b2b00, - 0x2020200020202000, 0xf0f0f000f0f0f000, 0xb1b1b100b1b1b100, - 0x8484840084848400, 0x9999990099999900, 0xdfdfdf00dfdfdf00, - 0x4c4c4c004c4c4c00, 0xcbcbcb00cbcbcb00, 0xc2c2c200c2c2c200, - 0x3434340034343400, 0x7e7e7e007e7e7e00, 0x7676760076767600, - 0x0505050005050500, 0x6d6d6d006d6d6d00, 0xb7b7b700b7b7b700, - 0xa9a9a900a9a9a900, 0x3131310031313100, 0xd1d1d100d1d1d100, - 0x1717170017171700, 0x0404040004040400, 0xd7d7d700d7d7d700, - 0x1414140014141400, 0x5858580058585800, 0x3a3a3a003a3a3a00, - 0x6161610061616100, 0xdedede00dedede00, 0x1b1b1b001b1b1b00, - 0x1111110011111100, 0x1c1c1c001c1c1c00, 0x3232320032323200, - 0x0f0f0f000f0f0f00, 0x9c9c9c009c9c9c00, 0x1616160016161600, - 0x5353530053535300, 0x1818180018181800, 0xf2f2f200f2f2f200, - 0x2222220022222200, 0xfefefe00fefefe00, 0x4444440044444400, - 0xcfcfcf00cfcfcf00, 0xb2b2b200b2b2b200, 0xc3c3c300c3c3c300, - 0xb5b5b500b5b5b500, 0x7a7a7a007a7a7a00, 0x9191910091919100, - 0x2424240024242400, 0x0808080008080800, 0xe8e8e800e8e8e800, - 0xa8a8a800a8a8a800, 0x6060600060606000, 0xfcfcfc00fcfcfc00, - 0x6969690069696900, 0x5050500050505000, 0xaaaaaa00aaaaaa00, - 0xd0d0d000d0d0d000, 0xa0a0a000a0a0a000, 0x7d7d7d007d7d7d00, - 0xa1a1a100a1a1a100, 0x8989890089898900, 0x6262620062626200, - 0x9797970097979700, 0x5454540054545400, 0x5b5b5b005b5b5b00, - 0x1e1e1e001e1e1e00, 0x9595950095959500, 0xe0e0e000e0e0e000, - 0xffffff00ffffff00, 0x6464640064646400, 0xd2d2d200d2d2d200, - 0x1010100010101000, 0xc4c4c400c4c4c400, 0x0000000000000000, - 0x4848480048484800, 0xa3a3a300a3a3a300, 0xf7f7f700f7f7f700, - 0x7575750075757500, 0xdbdbdb00dbdbdb00, 0x8a8a8a008a8a8a00, - 0x0303030003030300, 0xe6e6e600e6e6e600, 0xdadada00dadada00, - 0x0909090009090900, 0x3f3f3f003f3f3f00, 0xdddddd00dddddd00, - 0x9494940094949400, 0x8787870087878700, 0x5c5c5c005c5c5c00, - 0x8383830083838300, 0x0202020002020200, 0xcdcdcd00cdcdcd00, - 0x4a4a4a004a4a4a00, 0x9090900090909000, 0x3333330033333300, - 0x7373730073737300, 0x6767670067676700, 0xf6f6f600f6f6f600, - 0xf3f3f300f3f3f300, 0x9d9d9d009d9d9d00, 0x7f7f7f007f7f7f00, - 0xbfbfbf00bfbfbf00, 0xe2e2e200e2e2e200, 0x5252520052525200, - 0x9b9b9b009b9b9b00, 0xd8d8d800d8d8d800, 0x2626260026262600, - 0xc8c8c800c8c8c800, 0x3737370037373700, 0xc6c6c600c6c6c600, - 0x3b3b3b003b3b3b00, 0x8181810081818100, 0x9696960096969600, - 0x6f6f6f006f6f6f00, 0x4b4b4b004b4b4b00, 0x1313130013131300, - 0xbebebe00bebebe00, 0x6363630063636300, 0x2e2e2e002e2e2e00, - 0xe9e9e900e9e9e900, 0x7979790079797900, 0xa7a7a700a7a7a700, - 0x8c8c8c008c8c8c00, 0x9f9f9f009f9f9f00, 0x6e6e6e006e6e6e00, - 0xbcbcbc00bcbcbc00, 0x8e8e8e008e8e8e00, 0x2929290029292900, - 0xf5f5f500f5f5f500, 0xf9f9f900f9f9f900, 0xb6b6b600b6b6b600, - 0x2f2f2f002f2f2f00, 0xfdfdfd00fdfdfd00, 0xb4b4b400b4b4b400, - 0x5959590059595900, 0x7878780078787800, 0x9898980098989800, - 0x0606060006060600, 0x6a6a6a006a6a6a00, 0xe7e7e700e7e7e700, - 0x4646460046464600, 0x7171710071717100, 0xbababa00bababa00, - 0xd4d4d400d4d4d400, 0x2525250025252500, 0xababab00ababab00, - 0x4242420042424200, 0x8888880088888800, 0xa2a2a200a2a2a200, - 0x8d8d8d008d8d8d00, 0xfafafa00fafafa00, 0x7272720072727200, - 0x0707070007070700, 0xb9b9b900b9b9b900, 0x5555550055555500, - 0xf8f8f800f8f8f800, 0xeeeeee00eeeeee00, 0xacacac00acacac00, - 0x0a0a0a000a0a0a00, 0x3636360036363600, 0x4949490049494900, - 0x2a2a2a002a2a2a00, 0x6868680068686800, 0x3c3c3c003c3c3c00, - 0x3838380038383800, 0xf1f1f100f1f1f100, 0xa4a4a400a4a4a400, - 0x4040400040404000, 0x2828280028282800, 0xd3d3d300d3d3d300, - 0x7b7b7b007b7b7b00, 0xbbbbbb00bbbbbb00, 0xc9c9c900c9c9c900, - 0x4343430043434300, 0xc1c1c100c1c1c100, 0x1515150015151500, - 0xe3e3e300e3e3e300, 0xadadad00adadad00, 0xf4f4f400f4f4f400, - 0x7777770077777700, 0xc7c7c700c7c7c700, 0x8080800080808000, - 0x9e9e9e009e9e9e00, -}; - -/* key constants */ -#define CAMELLIA_SIGMA1L (0xA09E667FL) -#define CAMELLIA_SIGMA1R (0x3BCC908BL) -#define CAMELLIA_SIGMA2L (0xB67AE858L) -#define CAMELLIA_SIGMA2R (0x4CAA73B2L) -#define CAMELLIA_SIGMA3L (0xC6EF372FL) -#define CAMELLIA_SIGMA3R (0xE94F82BEL) -#define CAMELLIA_SIGMA4L (0x54FF53A5L) -#define CAMELLIA_SIGMA4R (0xF1D36F1CL) -#define CAMELLIA_SIGMA5L (0x10E527FAL) -#define CAMELLIA_SIGMA5R (0xDE682D1DL) -#define CAMELLIA_SIGMA6L (0xB05688C2L) -#define CAMELLIA_SIGMA6R (0xB3E6C1FDL) - -/* macros */ -#define ROLDQ(l, r, bits) ({ \ - u64 t = l; \ - l = (l << bits) | (r >> (64 - bits)); \ - r = (r << bits) | (t >> (64 - bits)); \ -}) - -#define CAMELLIA_F(x, kl, kr, y) ({ \ - u64 ii = x ^ (((u64)kl << 32) | kr); \ - y = camellia_sp11101110[(uint8_t)ii]; \ - y ^= camellia_sp44044404[(uint8_t)(ii >> 8)]; \ - ii >>= 16; \ - y ^= camellia_sp30333033[(uint8_t)ii]; \ - y ^= camellia_sp02220222[(uint8_t)(ii >> 8)]; \ - ii >>= 16; \ - y ^= camellia_sp00444404[(uint8_t)ii]; \ - y ^= camellia_sp03303033[(uint8_t)(ii >> 8)]; \ - ii >>= 16; \ - y ^= camellia_sp22000222[(uint8_t)ii]; \ - y ^= camellia_sp10011110[(uint8_t)(ii >> 8)]; \ - y = ror64(y, 32); \ -}) - -#define SET_SUBKEY_LR(INDEX, sRL) (subkey[(INDEX)] = ror64((sRL), 32)) - -static void camellia_setup_tail(u64 *subkey, u64 *subRL, int max) -{ - u64 kw4, tt; - u32 dw, tl, tr; - - /* absorb kw2 to other subkeys */ - /* round 2 */ - subRL[3] ^= subRL[1]; - /* round 4 */ - subRL[5] ^= subRL[1]; - /* round 6 */ - subRL[7] ^= subRL[1]; - - subRL[1] ^= (subRL[1] & ~subRL[9]) << 32; - /* modified for FLinv(kl2) */ - dw = (subRL[1] & subRL[9]) >> 32, - subRL[1] ^= rol32(dw, 1); - - /* round 8 */ - subRL[11] ^= subRL[1]; - /* round 10 */ - subRL[13] ^= subRL[1]; - /* round 12 */ - subRL[15] ^= subRL[1]; - - subRL[1] ^= (subRL[1] & ~subRL[17]) << 32; - /* modified for FLinv(kl4) */ - dw = (subRL[1] & subRL[17]) >> 32, - subRL[1] ^= rol32(dw, 1); - - /* round 14 */ - subRL[19] ^= subRL[1]; - /* round 16 */ - subRL[21] ^= subRL[1]; - /* round 18 */ - subRL[23] ^= subRL[1]; - - if (max == 24) { - /* kw3 */ - subRL[24] ^= subRL[1]; - - /* absorb kw4 to other subkeys */ - kw4 = subRL[25]; - } else { - subRL[1] ^= (subRL[1] & ~subRL[25]) << 32; - /* modified for FLinv(kl6) */ - dw = (subRL[1] & subRL[25]) >> 32, - subRL[1] ^= rol32(dw, 1); - - /* round 20 */ - subRL[27] ^= subRL[1]; - /* round 22 */ - subRL[29] ^= subRL[1]; - /* round 24 */ - subRL[31] ^= subRL[1]; - /* kw3 */ - subRL[32] ^= subRL[1]; - - /* absorb kw4 to other subkeys */ - kw4 = subRL[33]; - /* round 23 */ - subRL[30] ^= kw4; - /* round 21 */ - subRL[28] ^= kw4; - /* round 19 */ - subRL[26] ^= kw4; - - kw4 ^= (kw4 & ~subRL[24]) << 32; - /* modified for FL(kl5) */ - dw = (kw4 & subRL[24]) >> 32, - kw4 ^= rol32(dw, 1); - } - - /* round 17 */ - subRL[22] ^= kw4; - /* round 15 */ - subRL[20] ^= kw4; - /* round 13 */ - subRL[18] ^= kw4; - - kw4 ^= (kw4 & ~subRL[16]) << 32; - /* modified for FL(kl3) */ - dw = (kw4 & subRL[16]) >> 32, - kw4 ^= rol32(dw, 1); - - /* round 11 */ - subRL[14] ^= kw4; - /* round 9 */ - subRL[12] ^= kw4; - /* round 7 */ - subRL[10] ^= kw4; - - kw4 ^= (kw4 & ~subRL[8]) << 32; - /* modified for FL(kl1) */ - dw = (kw4 & subRL[8]) >> 32, - kw4 ^= rol32(dw, 1); - - /* round 5 */ - subRL[6] ^= kw4; - /* round 3 */ - subRL[4] ^= kw4; - /* round 1 */ - subRL[2] ^= kw4; - /* kw1 */ - subRL[0] ^= kw4; - - /* key XOR is end of F-function */ - SET_SUBKEY_LR(0, subRL[0] ^ subRL[2]); /* kw1 */ - SET_SUBKEY_LR(2, subRL[3]); /* round 1 */ - SET_SUBKEY_LR(3, subRL[2] ^ subRL[4]); /* round 2 */ - SET_SUBKEY_LR(4, subRL[3] ^ subRL[5]); /* round 3 */ - SET_SUBKEY_LR(5, subRL[4] ^ subRL[6]); /* round 4 */ - SET_SUBKEY_LR(6, subRL[5] ^ subRL[7]); /* round 5 */ - - tl = (subRL[10] >> 32) ^ (subRL[10] & ~subRL[8]); - dw = tl & (subRL[8] >> 32), /* FL(kl1) */ - tr = subRL[10] ^ rol32(dw, 1); - tt = (tr | ((u64)tl << 32)); - - SET_SUBKEY_LR(7, subRL[6] ^ tt); /* round 6 */ - SET_SUBKEY_LR(8, subRL[8]); /* FL(kl1) */ - SET_SUBKEY_LR(9, subRL[9]); /* FLinv(kl2) */ - - tl = (subRL[7] >> 32) ^ (subRL[7] & ~subRL[9]); - dw = tl & (subRL[9] >> 32), /* FLinv(kl2) */ - tr = subRL[7] ^ rol32(dw, 1); - tt = (tr | ((u64)tl << 32)); - - SET_SUBKEY_LR(10, subRL[11] ^ tt); /* round 7 */ - SET_SUBKEY_LR(11, subRL[10] ^ subRL[12]); /* round 8 */ - SET_SUBKEY_LR(12, subRL[11] ^ subRL[13]); /* round 9 */ - SET_SUBKEY_LR(13, subRL[12] ^ subRL[14]); /* round 10 */ - SET_SUBKEY_LR(14, subRL[13] ^ subRL[15]); /* round 11 */ - - tl = (subRL[18] >> 32) ^ (subRL[18] & ~subRL[16]); - dw = tl & (subRL[16] >> 32), /* FL(kl3) */ - tr = subRL[18] ^ rol32(dw, 1); - tt = (tr | ((u64)tl << 32)); - - SET_SUBKEY_LR(15, subRL[14] ^ tt); /* round 12 */ - SET_SUBKEY_LR(16, subRL[16]); /* FL(kl3) */ - SET_SUBKEY_LR(17, subRL[17]); /* FLinv(kl4) */ - - tl = (subRL[15] >> 32) ^ (subRL[15] & ~subRL[17]); - dw = tl & (subRL[17] >> 32), /* FLinv(kl4) */ - tr = subRL[15] ^ rol32(dw, 1); - tt = (tr | ((u64)tl << 32)); - - SET_SUBKEY_LR(18, subRL[19] ^ tt); /* round 13 */ - SET_SUBKEY_LR(19, subRL[18] ^ subRL[20]); /* round 14 */ - SET_SUBKEY_LR(20, subRL[19] ^ subRL[21]); /* round 15 */ - SET_SUBKEY_LR(21, subRL[20] ^ subRL[22]); /* round 16 */ - SET_SUBKEY_LR(22, subRL[21] ^ subRL[23]); /* round 17 */ - - if (max == 24) { - SET_SUBKEY_LR(23, subRL[22]); /* round 18 */ - SET_SUBKEY_LR(24, subRL[24] ^ subRL[23]); /* kw3 */ - } else { - tl = (subRL[26] >> 32) ^ (subRL[26] & ~subRL[24]); - dw = tl & (subRL[24] >> 32), /* FL(kl5) */ - tr = subRL[26] ^ rol32(dw, 1); - tt = (tr | ((u64)tl << 32)); - - SET_SUBKEY_LR(23, subRL[22] ^ tt); /* round 18 */ - SET_SUBKEY_LR(24, subRL[24]); /* FL(kl5) */ - SET_SUBKEY_LR(25, subRL[25]); /* FLinv(kl6) */ - - tl = (subRL[23] >> 32) ^ (subRL[23] & ~subRL[25]); - dw = tl & (subRL[25] >> 32), /* FLinv(kl6) */ - tr = subRL[23] ^ rol32(dw, 1); - tt = (tr | ((u64)tl << 32)); - - SET_SUBKEY_LR(26, subRL[27] ^ tt); /* round 19 */ - SET_SUBKEY_LR(27, subRL[26] ^ subRL[28]); /* round 20 */ - SET_SUBKEY_LR(28, subRL[27] ^ subRL[29]); /* round 21 */ - SET_SUBKEY_LR(29, subRL[28] ^ subRL[30]); /* round 22 */ - SET_SUBKEY_LR(30, subRL[29] ^ subRL[31]); /* round 23 */ - SET_SUBKEY_LR(31, subRL[30]); /* round 24 */ - SET_SUBKEY_LR(32, subRL[32] ^ subRL[31]); /* kw3 */ - } -} - -static void camellia_setup128(const unsigned char *key, u64 *subkey) -{ - u64 kl, kr, ww; - u64 subRL[26]; - - /** - * k == kl || kr (|| is concatenation) - */ - kl = get_unaligned_be64(key); - kr = get_unaligned_be64(key + 8); - - /* generate KL dependent subkeys */ - /* kw1 */ - subRL[0] = kl; - /* kw2 */ - subRL[1] = kr; - - /* rotation left shift 15bit */ - ROLDQ(kl, kr, 15); - - /* k3 */ - subRL[4] = kl; - /* k4 */ - subRL[5] = kr; - - /* rotation left shift 15+30bit */ - ROLDQ(kl, kr, 30); - - /* k7 */ - subRL[10] = kl; - /* k8 */ - subRL[11] = kr; - - /* rotation left shift 15+30+15bit */ - ROLDQ(kl, kr, 15); - - /* k10 */ - subRL[13] = kr; - /* rotation left shift 15+30+15+17 bit */ - ROLDQ(kl, kr, 17); - - /* kl3 */ - subRL[16] = kl; - /* kl4 */ - subRL[17] = kr; - - /* rotation left shift 15+30+15+17+17 bit */ - ROLDQ(kl, kr, 17); - - /* k13 */ - subRL[18] = kl; - /* k14 */ - subRL[19] = kr; - - /* rotation left shift 15+30+15+17+17+17 bit */ - ROLDQ(kl, kr, 17); - - /* k17 */ - subRL[22] = kl; - /* k18 */ - subRL[23] = kr; - - /* generate KA */ - kl = subRL[0]; - kr = subRL[1]; - CAMELLIA_F(kl, CAMELLIA_SIGMA1L, CAMELLIA_SIGMA1R, ww); - kr ^= ww; - CAMELLIA_F(kr, CAMELLIA_SIGMA2L, CAMELLIA_SIGMA2R, kl); - - /* current status == (kll, klr, w0, w1) */ - CAMELLIA_F(kl, CAMELLIA_SIGMA3L, CAMELLIA_SIGMA3R, kr); - kr ^= ww; - CAMELLIA_F(kr, CAMELLIA_SIGMA4L, CAMELLIA_SIGMA4R, ww); - kl ^= ww; - - /* generate KA dependent subkeys */ - /* k1, k2 */ - subRL[2] = kl; - subRL[3] = kr; - ROLDQ(kl, kr, 15); - /* k5,k6 */ - subRL[6] = kl; - subRL[7] = kr; - ROLDQ(kl, kr, 15); - /* kl1, kl2 */ - subRL[8] = kl; - subRL[9] = kr; - ROLDQ(kl, kr, 15); - /* k9 */ - subRL[12] = kl; - ROLDQ(kl, kr, 15); - /* k11, k12 */ - subRL[14] = kl; - subRL[15] = kr; - ROLDQ(kl, kr, 34); - /* k15, k16 */ - subRL[20] = kl; - subRL[21] = kr; - ROLDQ(kl, kr, 17); - /* kw3, kw4 */ - subRL[24] = kl; - subRL[25] = kr; - - camellia_setup_tail(subkey, subRL, 24); -} - -static void camellia_setup256(const unsigned char *key, u64 *subkey) -{ - u64 kl, kr; /* left half of key */ - u64 krl, krr; /* right half of key */ - u64 ww; /* temporary variables */ - u64 subRL[34]; - - /** - * key = (kl || kr || krl || krr) (|| is concatenation) - */ - kl = get_unaligned_be64(key); - kr = get_unaligned_be64(key + 8); - krl = get_unaligned_be64(key + 16); - krr = get_unaligned_be64(key + 24); - - /* generate KL dependent subkeys */ - /* kw1 */ - subRL[0] = kl; - /* kw2 */ - subRL[1] = kr; - ROLDQ(kl, kr, 45); - /* k9 */ - subRL[12] = kl; - /* k10 */ - subRL[13] = kr; - ROLDQ(kl, kr, 15); - /* kl3 */ - subRL[16] = kl; - /* kl4 */ - subRL[17] = kr; - ROLDQ(kl, kr, 17); - /* k17 */ - subRL[22] = kl; - /* k18 */ - subRL[23] = kr; - ROLDQ(kl, kr, 34); - /* k23 */ - subRL[30] = kl; - /* k24 */ - subRL[31] = kr; - - /* generate KR dependent subkeys */ - ROLDQ(krl, krr, 15); - /* k3 */ - subRL[4] = krl; - /* k4 */ - subRL[5] = krr; - ROLDQ(krl, krr, 15); - /* kl1 */ - subRL[8] = krl; - /* kl2 */ - subRL[9] = krr; - ROLDQ(krl, krr, 30); - /* k13 */ - subRL[18] = krl; - /* k14 */ - subRL[19] = krr; - ROLDQ(krl, krr, 34); - /* k19 */ - subRL[26] = krl; - /* k20 */ - subRL[27] = krr; - ROLDQ(krl, krr, 34); - - /* generate KA */ - kl = subRL[0] ^ krl; - kr = subRL[1] ^ krr; - - CAMELLIA_F(kl, CAMELLIA_SIGMA1L, CAMELLIA_SIGMA1R, ww); - kr ^= ww; - CAMELLIA_F(kr, CAMELLIA_SIGMA2L, CAMELLIA_SIGMA2R, kl); - kl ^= krl; - CAMELLIA_F(kl, CAMELLIA_SIGMA3L, CAMELLIA_SIGMA3R, kr); - kr ^= ww ^ krr; - CAMELLIA_F(kr, CAMELLIA_SIGMA4L, CAMELLIA_SIGMA4R, ww); - kl ^= ww; - - /* generate KB */ - krl ^= kl; - krr ^= kr; - CAMELLIA_F(krl, CAMELLIA_SIGMA5L, CAMELLIA_SIGMA5R, ww); - krr ^= ww; - CAMELLIA_F(krr, CAMELLIA_SIGMA6L, CAMELLIA_SIGMA6R, ww); - krl ^= ww; - - /* generate KA dependent subkeys */ - ROLDQ(kl, kr, 15); - /* k5 */ - subRL[6] = kl; - /* k6 */ - subRL[7] = kr; - ROLDQ(kl, kr, 30); - /* k11 */ - subRL[14] = kl; - /* k12 */ - subRL[15] = kr; - /* rotation left shift 32bit */ - ROLDQ(kl, kr, 32); - /* kl5 */ - subRL[24] = kl; - /* kl6 */ - subRL[25] = kr; - /* rotation left shift 17 from k11,k12 -> k21,k22 */ - ROLDQ(kl, kr, 17); - /* k21 */ - subRL[28] = kl; - /* k22 */ - subRL[29] = kr; - - /* generate KB dependent subkeys */ - /* k1 */ - subRL[2] = krl; - /* k2 */ - subRL[3] = krr; - ROLDQ(krl, krr, 30); - /* k7 */ - subRL[10] = krl; - /* k8 */ - subRL[11] = krr; - ROLDQ(krl, krr, 30); - /* k15 */ - subRL[20] = krl; - /* k16 */ - subRL[21] = krr; - ROLDQ(krl, krr, 51); - /* kw3 */ - subRL[32] = krl; - /* kw4 */ - subRL[33] = krr; - - camellia_setup_tail(subkey, subRL, 32); -} - -static void camellia_setup192(const unsigned char *key, u64 *subkey) -{ - unsigned char kk[32]; - u64 krl, krr; - - memcpy(kk, key, 24); - memcpy((unsigned char *)&krl, key+16, 8); - krr = ~krl; - memcpy(kk+24, (unsigned char *)&krr, 8); - camellia_setup256(kk, subkey); -} - -static int __camellia_setkey(struct camellia_ctx *cctx, - const unsigned char *key, - unsigned int key_len, u32 *flags) -{ - if (key_len != 16 && key_len != 24 && key_len != 32) { - *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; - return -EINVAL; - } - - cctx->key_length = key_len; - - switch (key_len) { - case 16: - camellia_setup128(key, cctx->key_table); - break; - case 24: - camellia_setup192(key, cctx->key_table); - break; - case 32: - camellia_setup256(key, cctx->key_table); - break; - } - - return 0; -} - -static int camellia_setkey(struct crypto_tfm *tfm, const u8 *in_key, - unsigned int key_len) -{ - return __camellia_setkey(crypto_tfm_ctx(tfm), in_key, key_len, - &tfm->crt_flags); -} - -static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk, - void (*fn)(struct camellia_ctx *, u8 *, const u8 *), - void (*fn_2way)(struct camellia_ctx *, u8 *, const u8 *)) -{ - struct camellia_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - unsigned int bsize = CAMELLIA_BLOCK_SIZE; - unsigned int nbytes; - int err; - - err = blkcipher_walk_virt(desc, walk); - - while ((nbytes = walk->nbytes)) { - u8 *wsrc = walk->src.virt.addr; - u8 *wdst = walk->dst.virt.addr; - - /* Process two block batch */ - if (nbytes >= bsize * 2) { - do { - fn_2way(ctx, wdst, wsrc); - - wsrc += bsize * 2; - wdst += bsize * 2; - nbytes -= bsize * 2; - } while (nbytes >= bsize * 2); - - if (nbytes < bsize) - goto done; - } - - /* Handle leftovers */ - do { - fn(ctx, wdst, wsrc); - - wsrc += bsize; - wdst += bsize; - nbytes -= bsize; - } while (nbytes >= bsize); - -done: - err = blkcipher_walk_done(desc, walk, nbytes); - } - - return err; -} - -static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - struct blkcipher_walk walk; - - blkcipher_walk_init(&walk, dst, src, nbytes); - return ecb_crypt(desc, &walk, camellia_enc_blk, camellia_enc_blk_2way); -} - -static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - struct blkcipher_walk walk; - - blkcipher_walk_init(&walk, dst, src, nbytes); - return ecb_crypt(desc, &walk, camellia_dec_blk, camellia_dec_blk_2way); -} - -static unsigned int __cbc_encrypt(struct blkcipher_desc *desc, - struct blkcipher_walk *walk) -{ - struct camellia_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - unsigned int bsize = CAMELLIA_BLOCK_SIZE; - unsigned int nbytes = walk->nbytes; - u128 *src = (u128 *)walk->src.virt.addr; - u128 *dst = (u128 *)walk->dst.virt.addr; - u128 *iv = (u128 *)walk->iv; - - do { - u128_xor(dst, src, iv); - camellia_enc_blk(ctx, (u8 *)dst, (u8 *)dst); - iv = dst; - - src += 1; - dst += 1; - nbytes -= bsize; - } while (nbytes >= bsize); - - u128_xor((u128 *)walk->iv, (u128 *)walk->iv, iv); - return nbytes; -} - -static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - struct blkcipher_walk walk; - int err; - - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt(desc, &walk); - - while ((nbytes = walk.nbytes)) { - nbytes = __cbc_encrypt(desc, &walk); - err = blkcipher_walk_done(desc, &walk, nbytes); - } - - return err; -} - -static unsigned int __cbc_decrypt(struct blkcipher_desc *desc, - struct blkcipher_walk *walk) -{ - struct camellia_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - unsigned int bsize = CAMELLIA_BLOCK_SIZE; - unsigned int nbytes = walk->nbytes; - u128 *src = (u128 *)walk->src.virt.addr; - u128 *dst = (u128 *)walk->dst.virt.addr; - u128 ivs[2 - 1]; - u128 last_iv; - - /* Start of the last block. */ - src += nbytes / bsize - 1; - dst += nbytes / bsize - 1; - - last_iv = *src; - - /* Process two block batch */ - if (nbytes >= bsize * 2) { - do { - nbytes -= bsize * (2 - 1); - src -= 2 - 1; - dst -= 2 - 1; - - ivs[0] = src[0]; - - camellia_dec_blk_2way(ctx, (u8 *)dst, (u8 *)src); - - u128_xor(dst + 1, dst + 1, ivs + 0); - - nbytes -= bsize; - if (nbytes < bsize) - goto done; - - u128_xor(dst, dst, src - 1); - src -= 1; - dst -= 1; - } while (nbytes >= bsize * 2); - - if (nbytes < bsize) - goto done; - } - - /* Handle leftovers */ - for (;;) { - camellia_dec_blk(ctx, (u8 *)dst, (u8 *)src); - - nbytes -= bsize; - if (nbytes < bsize) - break; - - u128_xor(dst, dst, src - 1); - src -= 1; - dst -= 1; - } - -done: - u128_xor(dst, dst, (u128 *)walk->iv); - *(u128 *)walk->iv = last_iv; - - return nbytes; -} - -static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - struct blkcipher_walk walk; - int err; - - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt(desc, &walk); - - while ((nbytes = walk.nbytes)) { - nbytes = __cbc_decrypt(desc, &walk); - err = blkcipher_walk_done(desc, &walk, nbytes); - } - - return err; -} - -static inline void u128_to_be128(be128 *dst, const u128 *src) -{ - dst->a = cpu_to_be64(src->a); - dst->b = cpu_to_be64(src->b); -} - -static inline void be128_to_u128(u128 *dst, const be128 *src) -{ - dst->a = be64_to_cpu(src->a); - dst->b = be64_to_cpu(src->b); -} - -static inline void u128_inc(u128 *i) -{ - i->b++; - if (!i->b) - i->a++; -} - -static void ctr_crypt_final(struct blkcipher_desc *desc, - struct blkcipher_walk *walk) -{ - struct camellia_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - u8 keystream[CAMELLIA_BLOCK_SIZE]; - u8 *src = walk->src.virt.addr; - u8 *dst = walk->dst.virt.addr; - unsigned int nbytes = walk->nbytes; - u128 ctrblk; - - memcpy(keystream, src, nbytes); - camellia_enc_blk_xor(ctx, keystream, walk->iv); - memcpy(dst, keystream, nbytes); - - be128_to_u128(&ctrblk, (be128 *)walk->iv); - u128_inc(&ctrblk); - u128_to_be128((be128 *)walk->iv, &ctrblk); -} - -static unsigned int __ctr_crypt(struct blkcipher_desc *desc, - struct blkcipher_walk *walk) -{ - struct camellia_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - unsigned int bsize = CAMELLIA_BLOCK_SIZE; - unsigned int nbytes = walk->nbytes; - u128 *src = (u128 *)walk->src.virt.addr; - u128 *dst = (u128 *)walk->dst.virt.addr; - u128 ctrblk; - be128 ctrblocks[2]; - - be128_to_u128(&ctrblk, (be128 *)walk->iv); - - /* Process two block batch */ - if (nbytes >= bsize * 2) { - do { - if (dst != src) { - dst[0] = src[0]; - dst[1] = src[1]; - } - - /* create ctrblks for parallel encrypt */ - u128_to_be128(&ctrblocks[0], &ctrblk); - u128_inc(&ctrblk); - u128_to_be128(&ctrblocks[1], &ctrblk); - u128_inc(&ctrblk); - - camellia_enc_blk_xor_2way(ctx, (u8 *)dst, - (u8 *)ctrblocks); - - src += 2; - dst += 2; - nbytes -= bsize * 2; - } while (nbytes >= bsize * 2); - - if (nbytes < bsize) - goto done; - } - - /* Handle leftovers */ - do { - if (dst != src) - *dst = *src; - - u128_to_be128(&ctrblocks[0], &ctrblk); - u128_inc(&ctrblk); - - camellia_enc_blk_xor(ctx, (u8 *)dst, (u8 *)ctrblocks); - - src += 1; - dst += 1; - nbytes -= bsize; - } while (nbytes >= bsize); - -done: - u128_to_be128((be128 *)walk->iv, &ctrblk); - return nbytes; -} - -static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - struct blkcipher_walk walk; - int err; - - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt_block(desc, &walk, CAMELLIA_BLOCK_SIZE); - - while ((nbytes = walk.nbytes) >= CAMELLIA_BLOCK_SIZE) { - nbytes = __ctr_crypt(desc, &walk); - err = blkcipher_walk_done(desc, &walk, nbytes); - } - - if (walk.nbytes) { - ctr_crypt_final(desc, &walk); - err = blkcipher_walk_done(desc, &walk, 0); - } - - return err; -} - -static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) -{ - const unsigned int bsize = CAMELLIA_BLOCK_SIZE; - struct camellia_ctx *ctx = priv; - int i; - - while (nbytes >= 2 * bsize) { - camellia_enc_blk_2way(ctx, srcdst, srcdst); - srcdst += bsize * 2; - nbytes -= bsize * 2; - } - - for (i = 0; i < nbytes / bsize; i++, srcdst += bsize) - camellia_enc_blk(ctx, srcdst, srcdst); -} - -static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) -{ - const unsigned int bsize = CAMELLIA_BLOCK_SIZE; - struct camellia_ctx *ctx = priv; - int i; - - while (nbytes >= 2 * bsize) { - camellia_dec_blk_2way(ctx, srcdst, srcdst); - srcdst += bsize * 2; - nbytes -= bsize * 2; - } - - for (i = 0; i < nbytes / bsize; i++, srcdst += bsize) - camellia_dec_blk(ctx, srcdst, srcdst); -} - -struct camellia_lrw_ctx { - struct lrw_table_ctx lrw_table; - struct camellia_ctx camellia_ctx; -}; - -static int lrw_camellia_setkey(struct crypto_tfm *tfm, const u8 *key, - unsigned int keylen) -{ - struct camellia_lrw_ctx *ctx = crypto_tfm_ctx(tfm); - int err; - - err = __camellia_setkey(&ctx->camellia_ctx, key, - keylen - CAMELLIA_BLOCK_SIZE, - &tfm->crt_flags); - if (err) - return err; - - return lrw_init_table(&ctx->lrw_table, - key + keylen - CAMELLIA_BLOCK_SIZE); -} - -static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - struct camellia_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - be128 buf[2 * 4]; - struct lrw_crypt_req req = { - .tbuf = buf, - .tbuflen = sizeof(buf), - - .table_ctx = &ctx->lrw_table, - .crypt_ctx = &ctx->camellia_ctx, - .crypt_fn = encrypt_callback, - }; - - return lrw_crypt(desc, dst, src, nbytes, &req); -} - -static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - struct camellia_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - be128 buf[2 * 4]; - struct lrw_crypt_req req = { - .tbuf = buf, - .tbuflen = sizeof(buf), - - .table_ctx = &ctx->lrw_table, - .crypt_ctx = &ctx->camellia_ctx, - .crypt_fn = decrypt_callback, - }; - - return lrw_crypt(desc, dst, src, nbytes, &req); -} - -static void lrw_exit_tfm(struct crypto_tfm *tfm) -{ - struct camellia_lrw_ctx *ctx = crypto_tfm_ctx(tfm); - - lrw_free_table(&ctx->lrw_table); -} - -struct camellia_xts_ctx { - struct camellia_ctx tweak_ctx; - struct camellia_ctx crypt_ctx; -}; - -static int xts_camellia_setkey(struct crypto_tfm *tfm, const u8 *key, - unsigned int keylen) -{ - struct camellia_xts_ctx *ctx = crypto_tfm_ctx(tfm); - u32 *flags = &tfm->crt_flags; - int err; - - /* key consists of keys of equal size concatenated, therefore - * the length must be even - */ - if (keylen % 2) { - *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; - return -EINVAL; - } - - /* first half of xts-key is for crypt */ - err = __camellia_setkey(&ctx->crypt_ctx, key, keylen / 2, flags); - if (err) - return err; - - /* second half of xts-key is for tweak */ - return __camellia_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2, - flags); -} - -static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - struct camellia_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - be128 buf[2 * 4]; - struct xts_crypt_req req = { - .tbuf = buf, - .tbuflen = sizeof(buf), - - .tweak_ctx = &ctx->tweak_ctx, - .tweak_fn = XTS_TWEAK_CAST(camellia_enc_blk), - .crypt_ctx = &ctx->crypt_ctx, - .crypt_fn = encrypt_callback, - }; - - return xts_crypt(desc, dst, src, nbytes, &req); -} - -static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - struct camellia_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - be128 buf[2 * 4]; - struct xts_crypt_req req = { - .tbuf = buf, - .tbuflen = sizeof(buf), - - .tweak_ctx = &ctx->tweak_ctx, - .tweak_fn = XTS_TWEAK_CAST(camellia_enc_blk), - .crypt_ctx = &ctx->crypt_ctx, - .crypt_fn = decrypt_callback, - }; - - return xts_crypt(desc, dst, src, nbytes, &req); -} - -static struct crypto_alg camellia_algs[6] = { { - .cra_name = "camellia", - .cra_driver_name = "camellia-asm", - .cra_priority = 200, - .cra_flags = CRYPTO_ALG_TYPE_CIPHER, - .cra_blocksize = CAMELLIA_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct camellia_ctx), - .cra_alignmask = 0, - .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(camellia_algs[0].cra_list), - .cra_u = { - .cipher = { - .cia_min_keysize = CAMELLIA_MIN_KEY_SIZE, - .cia_max_keysize = CAMELLIA_MAX_KEY_SIZE, - .cia_setkey = camellia_setkey, - .cia_encrypt = camellia_encrypt, - .cia_decrypt = camellia_decrypt - } - } -}, { - .cra_name = "ecb(camellia)", - .cra_driver_name = "ecb-camellia-asm", - .cra_priority = 300, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, - .cra_blocksize = CAMELLIA_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct camellia_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(camellia_algs[1].cra_list), - .cra_u = { - .blkcipher = { - .min_keysize = CAMELLIA_MIN_KEY_SIZE, - .max_keysize = CAMELLIA_MAX_KEY_SIZE, - .setkey = camellia_setkey, - .encrypt = ecb_encrypt, - .decrypt = ecb_decrypt, - }, - }, -}, { - .cra_name = "cbc(camellia)", - .cra_driver_name = "cbc-camellia-asm", - .cra_priority = 300, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, - .cra_blocksize = CAMELLIA_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct camellia_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(camellia_algs[2].cra_list), - .cra_u = { - .blkcipher = { - .min_keysize = CAMELLIA_MIN_KEY_SIZE, - .max_keysize = CAMELLIA_MAX_KEY_SIZE, - .ivsize = CAMELLIA_BLOCK_SIZE, - .setkey = camellia_setkey, - .encrypt = cbc_encrypt, - .decrypt = cbc_decrypt, - }, - }, -}, { - .cra_name = "ctr(camellia)", - .cra_driver_name = "ctr-camellia-asm", - .cra_priority = 300, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, - .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct camellia_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(camellia_algs[3].cra_list), - .cra_u = { - .blkcipher = { - .min_keysize = CAMELLIA_MIN_KEY_SIZE, - .max_keysize = CAMELLIA_MAX_KEY_SIZE, - .ivsize = CAMELLIA_BLOCK_SIZE, - .setkey = camellia_setkey, - .encrypt = ctr_crypt, - .decrypt = ctr_crypt, - }, - }, -}, { - .cra_name = "lrw(camellia)", - .cra_driver_name = "lrw-camellia-asm", - .cra_priority = 300, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, - .cra_blocksize = CAMELLIA_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct camellia_lrw_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(camellia_algs[4].cra_list), - .cra_exit = lrw_exit_tfm, - .cra_u = { - .blkcipher = { - .min_keysize = CAMELLIA_MIN_KEY_SIZE + - CAMELLIA_BLOCK_SIZE, - .max_keysize = CAMELLIA_MAX_KEY_SIZE + - CAMELLIA_BLOCK_SIZE, - .ivsize = CAMELLIA_BLOCK_SIZE, - .setkey = lrw_camellia_setkey, - .encrypt = lrw_encrypt, - .decrypt = lrw_decrypt, - }, - }, -}, { - .cra_name = "xts(camellia)", - .cra_driver_name = "xts-camellia-asm", - .cra_priority = 300, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, - .cra_blocksize = CAMELLIA_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct camellia_xts_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(camellia_algs[5].cra_list), - .cra_u = { - .blkcipher = { - .min_keysize = CAMELLIA_MIN_KEY_SIZE * 2, - .max_keysize = CAMELLIA_MAX_KEY_SIZE * 2, - .ivsize = CAMELLIA_BLOCK_SIZE, - .setkey = xts_camellia_setkey, - .encrypt = xts_encrypt, - .decrypt = xts_decrypt, - }, - }, -} }; - -static bool is_blacklisted_cpu(void) -{ - if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) - return false; - - if (boot_cpu_data.x86 == 0x0f) { - /* - * On Pentium 4, camellia-asm is slower than original assembler - * implementation because excessive uses of 64bit rotate and - * left-shifts (which are really slow on P4) needed to store and - * handle 128bit block in two 64bit registers. - */ - return true; - } - - return false; -} - -static int force; -module_param(force, int, 0); -MODULE_PARM_DESC(force, "Force module load, ignore CPU blacklist"); - -static int __init init(void) -{ - if (!force && is_blacklisted_cpu()) { - printk(KERN_INFO - "camellia-x86_64: performance on this CPU " - "would be suboptimal: disabling " - "camellia-x86_64.\n"); - return -ENODEV; - } - - return crypto_register_algs(camellia_algs, ARRAY_SIZE(camellia_algs)); -} - -static void __exit fini(void) -{ - crypto_unregister_algs(camellia_algs, ARRAY_SIZE(camellia_algs)); -} - -module_init(init); -module_exit(fini); - -MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("Camellia Cipher Algorithm, asm optimized"); -MODULE_ALIAS("camellia"); -MODULE_ALIAS("camellia-asm"); diff --git a/ANDROID_3.4.5/arch/x86/crypto/crc32c-intel.c b/ANDROID_3.4.5/arch/x86/crypto/crc32c-intel.c deleted file mode 100644 index 493f9592..00000000 --- a/ANDROID_3.4.5/arch/x86/crypto/crc32c-intel.c +++ /dev/null @@ -1,203 +0,0 @@ -/* - * Using hardware provided CRC32 instruction to accelerate the CRC32 disposal. - * CRC32C polynomial:0x1EDC6F41(BE)/0x82F63B78(LE) - * CRC32 is a new instruction in Intel SSE4.2, the reference can be found at: - * http://www.intel.com/products/processor/manuals/ - * Intel(R) 64 and IA-32 Architectures Software Developer's Manual - * Volume 2A: Instruction Set Reference, A-M - * - * Copyright (C) 2008 Intel Corporation - * Authors: Austin Zhang <austin_zhang@linux.intel.com> - * Kent Liu <kent.liu@intel.com> - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., - * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. - * - */ -#include <linux/init.h> -#include <linux/module.h> -#include <linux/string.h> -#include <linux/kernel.h> -#include <crypto/internal/hash.h> - -#include <asm/cpufeature.h> -#include <asm/cpu_device_id.h> - -#define CHKSUM_BLOCK_SIZE 1 -#define CHKSUM_DIGEST_SIZE 4 - -#define SCALE_F sizeof(unsigned long) - -#ifdef CONFIG_X86_64 -#define REX_PRE "0x48, " -#else -#define REX_PRE -#endif - -static u32 crc32c_intel_le_hw_byte(u32 crc, unsigned char const *data, size_t length) -{ - while (length--) { - __asm__ __volatile__( - ".byte 0xf2, 0xf, 0x38, 0xf0, 0xf1" - :"=S"(crc) - :"0"(crc), "c"(*data) - ); - data++; - } - - return crc; -} - -static u32 __pure crc32c_intel_le_hw(u32 crc, unsigned char const *p, size_t len) -{ - unsigned int iquotient = len / SCALE_F; - unsigned int iremainder = len % SCALE_F; - unsigned long *ptmp = (unsigned long *)p; - - while (iquotient--) { - __asm__ __volatile__( - ".byte 0xf2, " REX_PRE "0xf, 0x38, 0xf1, 0xf1;" - :"=S"(crc) - :"0"(crc), "c"(*ptmp) - ); - ptmp++; - } - - if (iremainder) - crc = crc32c_intel_le_hw_byte(crc, (unsigned char *)ptmp, - iremainder); - - return crc; -} - -/* - * Setting the seed allows arbitrary accumulators and flexible XOR policy - * If your algorithm starts with ~0, then XOR with ~0 before you set - * the seed. - */ -static int crc32c_intel_setkey(struct crypto_shash *hash, const u8 *key, - unsigned int keylen) -{ - u32 *mctx = crypto_shash_ctx(hash); - - if (keylen != sizeof(u32)) { - crypto_shash_set_flags(hash, CRYPTO_TFM_RES_BAD_KEY_LEN); - return -EINVAL; - } - *mctx = le32_to_cpup((__le32 *)key); - return 0; -} - -static int crc32c_intel_init(struct shash_desc *desc) -{ - u32 *mctx = crypto_shash_ctx(desc->tfm); - u32 *crcp = shash_desc_ctx(desc); - - *crcp = *mctx; - - return 0; -} - -static int crc32c_intel_update(struct shash_desc *desc, const u8 *data, - unsigned int len) -{ - u32 *crcp = shash_desc_ctx(desc); - - *crcp = crc32c_intel_le_hw(*crcp, data, len); - return 0; -} - -static int __crc32c_intel_finup(u32 *crcp, const u8 *data, unsigned int len, - u8 *out) -{ - *(__le32 *)out = ~cpu_to_le32(crc32c_intel_le_hw(*crcp, data, len)); - return 0; -} - -static int crc32c_intel_finup(struct shash_desc *desc, const u8 *data, - unsigned int len, u8 *out) -{ - return __crc32c_intel_finup(shash_desc_ctx(desc), data, len, out); -} - -static int crc32c_intel_final(struct shash_desc *desc, u8 *out) -{ - u32 *crcp = shash_desc_ctx(desc); - - *(__le32 *)out = ~cpu_to_le32p(crcp); - return 0; -} - -static int crc32c_intel_digest(struct shash_desc *desc, const u8 *data, - unsigned int len, u8 *out) -{ - return __crc32c_intel_finup(crypto_shash_ctx(desc->tfm), data, len, - out); -} - -static int crc32c_intel_cra_init(struct crypto_tfm *tfm) -{ - u32 *key = crypto_tfm_ctx(tfm); - - *key = ~0; - - return 0; -} - -static struct shash_alg alg = { - .setkey = crc32c_intel_setkey, - .init = crc32c_intel_init, - .update = crc32c_intel_update, - .final = crc32c_intel_final, - .finup = crc32c_intel_finup, - .digest = crc32c_intel_digest, - .descsize = sizeof(u32), - .digestsize = CHKSUM_DIGEST_SIZE, - .base = { - .cra_name = "crc32c", - .cra_driver_name = "crc32c-intel", - .cra_priority = 200, - .cra_blocksize = CHKSUM_BLOCK_SIZE, - .cra_ctxsize = sizeof(u32), - .cra_module = THIS_MODULE, - .cra_init = crc32c_intel_cra_init, - } -}; - -static const struct x86_cpu_id crc32c_cpu_id[] = { - X86_FEATURE_MATCH(X86_FEATURE_XMM4_2), - {} -}; -MODULE_DEVICE_TABLE(x86cpu, crc32c_cpu_id); - -static int __init crc32c_intel_mod_init(void) -{ - if (!x86_match_cpu(crc32c_cpu_id)) - return -ENODEV; - return crypto_register_shash(&alg); -} - -static void __exit crc32c_intel_mod_fini(void) -{ - crypto_unregister_shash(&alg); -} - -module_init(crc32c_intel_mod_init); -module_exit(crc32c_intel_mod_fini); - -MODULE_AUTHOR("Austin Zhang <austin.zhang@intel.com>, Kent Liu <kent.liu@intel.com>"); -MODULE_DESCRIPTION("CRC32c (Castagnoli) optimization using Intel Hardware."); -MODULE_LICENSE("GPL"); - -MODULE_ALIAS("crc32c"); -MODULE_ALIAS("crc32c-intel"); diff --git a/ANDROID_3.4.5/arch/x86/crypto/fpu.c b/ANDROID_3.4.5/arch/x86/crypto/fpu.c deleted file mode 100644 index 98d7a188..00000000 --- a/ANDROID_3.4.5/arch/x86/crypto/fpu.c +++ /dev/null @@ -1,161 +0,0 @@ -/* - * FPU: Wrapper for blkcipher touching fpu - * - * Copyright (c) Intel Corp. - * Author: Huang Ying <ying.huang@intel.com> - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the Free - * Software Foundation; either version 2 of the License, or (at your option) - * any later version. - * - */ - -#include <crypto/algapi.h> -#include <linux/err.h> -#include <linux/init.h> -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/slab.h> -#include <asm/i387.h> - -struct crypto_fpu_ctx { - struct crypto_blkcipher *child; -}; - -static int crypto_fpu_setkey(struct crypto_tfm *parent, const u8 *key, - unsigned int keylen) -{ - struct crypto_fpu_ctx *ctx = crypto_tfm_ctx(parent); - struct crypto_blkcipher *child = ctx->child; - int err; - - crypto_blkcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK); - crypto_blkcipher_set_flags(child, crypto_tfm_get_flags(parent) & - CRYPTO_TFM_REQ_MASK); - err = crypto_blkcipher_setkey(child, key, keylen); - crypto_tfm_set_flags(parent, crypto_blkcipher_get_flags(child) & - CRYPTO_TFM_RES_MASK); - return err; -} - -static int crypto_fpu_encrypt(struct blkcipher_desc *desc_in, - struct scatterlist *dst, struct scatterlist *src, - unsigned int nbytes) -{ - int err; - struct crypto_fpu_ctx *ctx = crypto_blkcipher_ctx(desc_in->tfm); - struct crypto_blkcipher *child = ctx->child; - struct blkcipher_desc desc = { - .tfm = child, - .info = desc_in->info, - .flags = desc_in->flags & ~CRYPTO_TFM_REQ_MAY_SLEEP, - }; - - kernel_fpu_begin(); - err = crypto_blkcipher_crt(desc.tfm)->encrypt(&desc, dst, src, nbytes); - kernel_fpu_end(); - return err; -} - -static int crypto_fpu_decrypt(struct blkcipher_desc *desc_in, - struct scatterlist *dst, struct scatterlist *src, - unsigned int nbytes) -{ - int err; - struct crypto_fpu_ctx *ctx = crypto_blkcipher_ctx(desc_in->tfm); - struct crypto_blkcipher *child = ctx->child; - struct blkcipher_desc desc = { - .tfm = child, - .info = desc_in->info, - .flags = desc_in->flags & ~CRYPTO_TFM_REQ_MAY_SLEEP, - }; - - kernel_fpu_begin(); - err = crypto_blkcipher_crt(desc.tfm)->decrypt(&desc, dst, src, nbytes); - kernel_fpu_end(); - return err; -} - -static int crypto_fpu_init_tfm(struct crypto_tfm *tfm) -{ - struct crypto_instance *inst = crypto_tfm_alg_instance(tfm); - struct crypto_spawn *spawn = crypto_instance_ctx(inst); - struct crypto_fpu_ctx *ctx = crypto_tfm_ctx(tfm); - struct crypto_blkcipher *cipher; - - cipher = crypto_spawn_blkcipher(spawn); - if (IS_ERR(cipher)) - return PTR_ERR(cipher); - - ctx->child = cipher; - return 0; -} - -static void crypto_fpu_exit_tfm(struct crypto_tfm *tfm) -{ - struct crypto_fpu_ctx *ctx = crypto_tfm_ctx(tfm); - crypto_free_blkcipher(ctx->child); -} - -static struct crypto_instance *crypto_fpu_alloc(struct rtattr **tb) -{ - struct crypto_instance *inst; - struct crypto_alg *alg; - int err; - - err = crypto_check_attr_type(tb, CRYPTO_ALG_TYPE_BLKCIPHER); - if (err) - return ERR_PTR(err); - - alg = crypto_get_attr_alg(tb, CRYPTO_ALG_TYPE_BLKCIPHER, - CRYPTO_ALG_TYPE_MASK); - if (IS_ERR(alg)) - return ERR_CAST(alg); - - inst = crypto_alloc_instance("fpu", alg); - if (IS_ERR(inst)) - goto out_put_alg; - - inst->alg.cra_flags = alg->cra_flags; - inst->alg.cra_priority = alg->cra_priority; - inst->alg.cra_blocksize = alg->cra_blocksize; - inst->alg.cra_alignmask = alg->cra_alignmask; - inst->alg.cra_type = alg->cra_type; - inst->alg.cra_blkcipher.ivsize = alg->cra_blkcipher.ivsize; - inst->alg.cra_blkcipher.min_keysize = alg->cra_blkcipher.min_keysize; - inst->alg.cra_blkcipher.max_keysize = alg->cra_blkcipher.max_keysize; - inst->alg.cra_ctxsize = sizeof(struct crypto_fpu_ctx); - inst->alg.cra_init = crypto_fpu_init_tfm; - inst->alg.cra_exit = crypto_fpu_exit_tfm; - inst->alg.cra_blkcipher.setkey = crypto_fpu_setkey; - inst->alg.cra_blkcipher.encrypt = crypto_fpu_encrypt; - inst->alg.cra_blkcipher.decrypt = crypto_fpu_decrypt; - -out_put_alg: - crypto_mod_put(alg); - return inst; -} - -static void crypto_fpu_free(struct crypto_instance *inst) -{ - crypto_drop_spawn(crypto_instance_ctx(inst)); - kfree(inst); -} - -static struct crypto_template crypto_fpu_tmpl = { - .name = "fpu", - .alloc = crypto_fpu_alloc, - .free = crypto_fpu_free, - .module = THIS_MODULE, -}; - -int __init crypto_fpu_init(void) -{ - return crypto_register_template(&crypto_fpu_tmpl); -} - -void __exit crypto_fpu_exit(void) -{ - crypto_unregister_template(&crypto_fpu_tmpl); -} diff --git a/ANDROID_3.4.5/arch/x86/crypto/ghash-clmulni-intel_asm.S b/ANDROID_3.4.5/arch/x86/crypto/ghash-clmulni-intel_asm.S deleted file mode 100644 index 1eb7f90c..00000000 --- a/ANDROID_3.4.5/arch/x86/crypto/ghash-clmulni-intel_asm.S +++ /dev/null @@ -1,157 +0,0 @@ -/* - * Accelerated GHASH implementation with Intel PCLMULQDQ-NI - * instructions. This file contains accelerated part of ghash - * implementation. More information about PCLMULQDQ can be found at: - * - * http://software.intel.com/en-us/articles/carry-less-multiplication-and-its-usage-for-computing-the-gcm-mode/ - * - * Copyright (c) 2009 Intel Corp. - * Author: Huang Ying <ying.huang@intel.com> - * Vinodh Gopal - * Erdinc Ozturk - * Deniz Karakoyunlu - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published - * by the Free Software Foundation. - */ - -#include <linux/linkage.h> -#include <asm/inst.h> - -.data - -.align 16 -.Lbswap_mask: - .octa 0x000102030405060708090a0b0c0d0e0f -.Lpoly: - .octa 0xc2000000000000000000000000000001 -.Ltwo_one: - .octa 0x00000001000000000000000000000001 - -#define DATA %xmm0 -#define SHASH %xmm1 -#define T1 %xmm2 -#define T2 %xmm3 -#define T3 %xmm4 -#define BSWAP %xmm5 -#define IN1 %xmm6 - -.text - -/* - * __clmul_gf128mul_ble: internal ABI - * input: - * DATA: operand1 - * SHASH: operand2, hash_key << 1 mod poly - * output: - * DATA: operand1 * operand2 mod poly - * changed: - * T1 - * T2 - * T3 - */ -__clmul_gf128mul_ble: - movaps DATA, T1 - pshufd $0b01001110, DATA, T2 - pshufd $0b01001110, SHASH, T3 - pxor DATA, T2 - pxor SHASH, T3 - - PCLMULQDQ 0x00 SHASH DATA # DATA = a0 * b0 - PCLMULQDQ 0x11 SHASH T1 # T1 = a1 * b1 - PCLMULQDQ 0x00 T3 T2 # T2 = (a1 + a0) * (b1 + b0) - pxor DATA, T2 - pxor T1, T2 # T2 = a0 * b1 + a1 * b0 - - movaps T2, T3 - pslldq $8, T3 - psrldq $8, T2 - pxor T3, DATA - pxor T2, T1 # <T1:DATA> is result of - # carry-less multiplication - - # first phase of the reduction - movaps DATA, T3 - psllq $1, T3 - pxor DATA, T3 - psllq $5, T3 - pxor DATA, T3 - psllq $57, T3 - movaps T3, T2 - pslldq $8, T2 - psrldq $8, T3 - pxor T2, DATA - pxor T3, T1 - - # second phase of the reduction - movaps DATA, T2 - psrlq $5, T2 - pxor DATA, T2 - psrlq $1, T2 - pxor DATA, T2 - psrlq $1, T2 - pxor T2, T1 - pxor T1, DATA - ret - -/* void clmul_ghash_mul(char *dst, const be128 *shash) */ -ENTRY(clmul_ghash_mul) - movups (%rdi), DATA - movups (%rsi), SHASH - movaps .Lbswap_mask, BSWAP - PSHUFB_XMM BSWAP DATA - call __clmul_gf128mul_ble - PSHUFB_XMM BSWAP DATA - movups DATA, (%rdi) - ret - -/* - * void clmul_ghash_update(char *dst, const char *src, unsigned int srclen, - * const be128 *shash); - */ -ENTRY(clmul_ghash_update) - cmp $16, %rdx - jb .Lupdate_just_ret # check length - movaps .Lbswap_mask, BSWAP - movups (%rdi), DATA - movups (%rcx), SHASH - PSHUFB_XMM BSWAP DATA -.align 4 -.Lupdate_loop: - movups (%rsi), IN1 - PSHUFB_XMM BSWAP IN1 - pxor IN1, DATA - call __clmul_gf128mul_ble - sub $16, %rdx - add $16, %rsi - cmp $16, %rdx - jge .Lupdate_loop - PSHUFB_XMM BSWAP DATA - movups DATA, (%rdi) -.Lupdate_just_ret: - ret - -/* - * void clmul_ghash_setkey(be128 *shash, const u8 *key); - * - * Calculate hash_key << 1 mod poly - */ -ENTRY(clmul_ghash_setkey) - movaps .Lbswap_mask, BSWAP - movups (%rsi), %xmm0 - PSHUFB_XMM BSWAP %xmm0 - movaps %xmm0, %xmm1 - psllq $1, %xmm0 - psrlq $63, %xmm1 - movaps %xmm1, %xmm2 - pslldq $8, %xmm1 - psrldq $8, %xmm2 - por %xmm1, %xmm0 - # reduction - pshufd $0b00100100, %xmm2, %xmm1 - pcmpeqd .Ltwo_one, %xmm1 - pand .Lpoly, %xmm1 - pxor %xmm1, %xmm0 - movups %xmm0, (%rdi) - ret diff --git a/ANDROID_3.4.5/arch/x86/crypto/ghash-clmulni-intel_glue.c b/ANDROID_3.4.5/arch/x86/crypto/ghash-clmulni-intel_glue.c deleted file mode 100644 index b4bf0a63..00000000 --- a/ANDROID_3.4.5/arch/x86/crypto/ghash-clmulni-intel_glue.c +++ /dev/null @@ -1,338 +0,0 @@ -/* - * Accelerated GHASH implementation with Intel PCLMULQDQ-NI - * instructions. This file contains glue code. - * - * Copyright (c) 2009 Intel Corp. - * Author: Huang Ying <ying.huang@intel.com> - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published - * by the Free Software Foundation. - */ - -#include <linux/err.h> -#include <linux/module.h> -#include <linux/init.h> -#include <linux/kernel.h> -#include <linux/crypto.h> -#include <crypto/algapi.h> -#include <crypto/cryptd.h> -#include <crypto/gf128mul.h> -#include <crypto/internal/hash.h> -#include <asm/i387.h> -#include <asm/cpu_device_id.h> - -#define GHASH_BLOCK_SIZE 16 -#define GHASH_DIGEST_SIZE 16 - -void clmul_ghash_mul(char *dst, const be128 *shash); - -void clmul_ghash_update(char *dst, const char *src, unsigned int srclen, - const be128 *shash); - -void clmul_ghash_setkey(be128 *shash, const u8 *key); - -struct ghash_async_ctx { - struct cryptd_ahash *cryptd_tfm; -}; - -struct ghash_ctx { - be128 shash; -}; - -struct ghash_desc_ctx { - u8 buffer[GHASH_BLOCK_SIZE]; - u32 bytes; -}; - -static int ghash_init(struct shash_desc *desc) -{ - struct ghash_desc_ctx *dctx = shash_desc_ctx(desc); - - memset(dctx, 0, sizeof(*dctx)); - - return 0; -} - -static int ghash_setkey(struct crypto_shash *tfm, - const u8 *key, unsigned int keylen) -{ - struct ghash_ctx *ctx = crypto_shash_ctx(tfm); - - if (keylen != GHASH_BLOCK_SIZE) { - crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); - return -EINVAL; - } - - clmul_ghash_setkey(&ctx->shash, key); - - return 0; -} - -static int ghash_update(struct shash_desc *desc, - const u8 *src, unsigned int srclen) -{ - struct ghash_desc_ctx *dctx = shash_desc_ctx(desc); - struct ghash_ctx *ctx = crypto_shash_ctx(desc->tfm); - u8 *dst = dctx->buffer; - - kernel_fpu_begin(); - if (dctx->bytes) { - int n = min(srclen, dctx->bytes); - u8 *pos = dst + (GHASH_BLOCK_SIZE - dctx->bytes); - - dctx->bytes -= n; - srclen -= n; - - while (n--) - *pos++ ^= *src++; - - if (!dctx->bytes) - clmul_ghash_mul(dst, &ctx->shash); - } - - clmul_ghash_update(dst, src, srclen, &ctx->shash); - kernel_fpu_end(); - - if (srclen & 0xf) { - src += srclen - (srclen & 0xf); - srclen &= 0xf; - dctx->bytes = GHASH_BLOCK_SIZE - srclen; - while (srclen--) - *dst++ ^= *src++; - } - - return 0; -} - -static void ghash_flush(struct ghash_ctx *ctx, struct ghash_desc_ctx *dctx) -{ - u8 *dst = dctx->buffer; - - if (dctx->bytes) { - u8 *tmp = dst + (GHASH_BLOCK_SIZE - dctx->bytes); - - while (dctx->bytes--) - *tmp++ ^= 0; - - kernel_fpu_begin(); - clmul_ghash_mul(dst, &ctx->shash); - kernel_fpu_end(); - } - - dctx->bytes = 0; -} - -static int ghash_final(struct shash_desc *desc, u8 *dst) -{ - struct ghash_desc_ctx *dctx = shash_desc_ctx(desc); - struct ghash_ctx *ctx = crypto_shash_ctx(desc->tfm); - u8 *buf = dctx->buffer; - - ghash_flush(ctx, dctx); - memcpy(dst, buf, GHASH_BLOCK_SIZE); - - return 0; -} - -static struct shash_alg ghash_alg = { - .digestsize = GHASH_DIGEST_SIZE, - .init = ghash_init, - .update = ghash_update, - .final = ghash_final, - .setkey = ghash_setkey, - .descsize = sizeof(struct ghash_desc_ctx), - .base = { - .cra_name = "__ghash", - .cra_driver_name = "__ghash-pclmulqdqni", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_SHASH, - .cra_blocksize = GHASH_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct ghash_ctx), - .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(ghash_alg.base.cra_list), - }, -}; - -static int ghash_async_init(struct ahash_request *req) -{ - struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); - struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm); - struct ahash_request *cryptd_req = ahash_request_ctx(req); - struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm; - - if (!irq_fpu_usable()) { - memcpy(cryptd_req, req, sizeof(*req)); - ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base); - return crypto_ahash_init(cryptd_req); - } else { - struct shash_desc *desc = cryptd_shash_desc(cryptd_req); - struct crypto_shash *child = cryptd_ahash_child(cryptd_tfm); - - desc->tfm = child; - desc->flags = req->base.flags; - return crypto_shash_init(desc); - } -} - -static int ghash_async_update(struct ahash_request *req) -{ - struct ahash_request *cryptd_req = ahash_request_ctx(req); - - if (!irq_fpu_usable()) { - struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); - struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm); - struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm; - - memcpy(cryptd_req, req, sizeof(*req)); - ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base); - return crypto_ahash_update(cryptd_req); - } else { - struct shash_desc *desc = cryptd_shash_desc(cryptd_req); - return shash_ahash_update(req, desc); - } -} - -static int ghash_async_final(struct ahash_request *req) -{ - struct ahash_request *cryptd_req = ahash_request_ctx(req); - - if (!irq_fpu_usable()) { - struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); - struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm); - struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm; - - memcpy(cryptd_req, req, sizeof(*req)); - ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base); - return crypto_ahash_final(cryptd_req); - } else { - struct shash_desc *desc = cryptd_shash_desc(cryptd_req); - return crypto_shash_final(desc, req->result); - } -} - -static int ghash_async_digest(struct ahash_request *req) -{ - struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); - struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm); - struct ahash_request *cryptd_req = ahash_request_ctx(req); - struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm; - - if (!irq_fpu_usable()) { - memcpy(cryptd_req, req, sizeof(*req)); - ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base); - return crypto_ahash_digest(cryptd_req); - } else { - struct shash_desc *desc = cryptd_shash_desc(cryptd_req); - struct crypto_shash *child = cryptd_ahash_child(cryptd_tfm); - - desc->tfm = child; - desc->flags = req->base.flags; - return shash_ahash_digest(req, desc); - } -} - -static int ghash_async_setkey(struct crypto_ahash *tfm, const u8 *key, - unsigned int keylen) -{ - struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm); - struct crypto_ahash *child = &ctx->cryptd_tfm->base; - int err; - - crypto_ahash_clear_flags(child, CRYPTO_TFM_REQ_MASK); - crypto_ahash_set_flags(child, crypto_ahash_get_flags(tfm) - & CRYPTO_TFM_REQ_MASK); - err = crypto_ahash_setkey(child, key, keylen); - crypto_ahash_set_flags(tfm, crypto_ahash_get_flags(child) - & CRYPTO_TFM_RES_MASK); - - return err; -} - -static int ghash_async_init_tfm(struct crypto_tfm *tfm) -{ - struct cryptd_ahash *cryptd_tfm; - struct ghash_async_ctx *ctx = crypto_tfm_ctx(tfm); - - cryptd_tfm = cryptd_alloc_ahash("__ghash-pclmulqdqni", 0, 0); - if (IS_ERR(cryptd_tfm)) - return PTR_ERR(cryptd_tfm); - ctx->cryptd_tfm = cryptd_tfm; - crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm), - sizeof(struct ahash_request) + - crypto_ahash_reqsize(&cryptd_tfm->base)); - - return 0; -} - -static void ghash_async_exit_tfm(struct crypto_tfm *tfm) -{ - struct ghash_async_ctx *ctx = crypto_tfm_ctx(tfm); - - cryptd_free_ahash(ctx->cryptd_tfm); -} - -static struct ahash_alg ghash_async_alg = { - .init = ghash_async_init, - .update = ghash_async_update, - .final = ghash_async_final, - .setkey = ghash_async_setkey, - .digest = ghash_async_digest, - .halg = { - .digestsize = GHASH_DIGEST_SIZE, - .base = { - .cra_name = "ghash", - .cra_driver_name = "ghash-clmulni", - .cra_priority = 400, - .cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_ASYNC, - .cra_blocksize = GHASH_BLOCK_SIZE, - .cra_type = &crypto_ahash_type, - .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(ghash_async_alg.halg.base.cra_list), - .cra_init = ghash_async_init_tfm, - .cra_exit = ghash_async_exit_tfm, - }, - }, -}; - -static const struct x86_cpu_id pcmul_cpu_id[] = { - X86_FEATURE_MATCH(X86_FEATURE_PCLMULQDQ), /* Pickle-Mickle-Duck */ - {} -}; -MODULE_DEVICE_TABLE(x86cpu, pcmul_cpu_id); - -static int __init ghash_pclmulqdqni_mod_init(void) -{ - int err; - - if (!x86_match_cpu(pcmul_cpu_id)) - return -ENODEV; - - err = crypto_register_shash(&ghash_alg); - if (err) - goto err_out; - err = crypto_register_ahash(&ghash_async_alg); - if (err) - goto err_shash; - - return 0; - -err_shash: - crypto_unregister_shash(&ghash_alg); -err_out: - return err; -} - -static void __exit ghash_pclmulqdqni_mod_exit(void) -{ - crypto_unregister_ahash(&ghash_async_alg); - crypto_unregister_shash(&ghash_alg); -} - -module_init(ghash_pclmulqdqni_mod_init); -module_exit(ghash_pclmulqdqni_mod_exit); - -MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("GHASH Message Digest Algorithm, " - "acclerated by PCLMULQDQ-NI"); -MODULE_ALIAS("ghash"); diff --git a/ANDROID_3.4.5/arch/x86/crypto/salsa20-i586-asm_32.S b/ANDROID_3.4.5/arch/x86/crypto/salsa20-i586-asm_32.S deleted file mode 100644 index 72eb3066..00000000 --- a/ANDROID_3.4.5/arch/x86/crypto/salsa20-i586-asm_32.S +++ /dev/null @@ -1,1114 +0,0 @@ -# salsa20_pm.s version 20051229 -# D. J. Bernstein -# Public domain. - -# enter ECRYPT_encrypt_bytes -.text -.p2align 5 -.globl ECRYPT_encrypt_bytes -ECRYPT_encrypt_bytes: - mov %esp,%eax - and $31,%eax - add $256,%eax - sub %eax,%esp - # eax_stack = eax - movl %eax,80(%esp) - # ebx_stack = ebx - movl %ebx,84(%esp) - # esi_stack = esi - movl %esi,88(%esp) - # edi_stack = edi - movl %edi,92(%esp) - # ebp_stack = ebp - movl %ebp,96(%esp) - # x = arg1 - movl 4(%esp,%eax),%edx - # m = arg2 - movl 8(%esp,%eax),%esi - # out = arg3 - movl 12(%esp,%eax),%edi - # bytes = arg4 - movl 16(%esp,%eax),%ebx - # bytes -= 0 - sub $0,%ebx - # goto done if unsigned<= - jbe ._done -._start: - # in0 = *(uint32 *) (x + 0) - movl 0(%edx),%eax - # in1 = *(uint32 *) (x + 4) - movl 4(%edx),%ecx - # in2 = *(uint32 *) (x + 8) - movl 8(%edx),%ebp - # j0 = in0 - movl %eax,164(%esp) - # in3 = *(uint32 *) (x + 12) - movl 12(%edx),%eax - # j1 = in1 - movl %ecx,168(%esp) - # in4 = *(uint32 *) (x + 16) - movl 16(%edx),%ecx - # j2 = in2 - movl %ebp,172(%esp) - # in5 = *(uint32 *) (x + 20) - movl 20(%edx),%ebp - # j3 = in3 - movl %eax,176(%esp) - # in6 = *(uint32 *) (x + 24) - movl 24(%edx),%eax - # j4 = in4 - movl %ecx,180(%esp) - # in7 = *(uint32 *) (x + 28) - movl 28(%edx),%ecx - # j5 = in5 - movl %ebp,184(%esp) - # in8 = *(uint32 *) (x + 32) - movl 32(%edx),%ebp - # j6 = in6 - movl %eax,188(%esp) - # in9 = *(uint32 *) (x + 36) - movl 36(%edx),%eax - # j7 = in7 - movl %ecx,192(%esp) - # in10 = *(uint32 *) (x + 40) - movl 40(%edx),%ecx - # j8 = in8 - movl %ebp,196(%esp) - # in11 = *(uint32 *) (x + 44) - movl 44(%edx),%ebp - # j9 = in9 - movl %eax,200(%esp) - # in12 = *(uint32 *) (x + 48) - movl 48(%edx),%eax - # j10 = in10 - movl %ecx,204(%esp) - # in13 = *(uint32 *) (x + 52) - movl 52(%edx),%ecx - # j11 = in11 - movl %ebp,208(%esp) - # in14 = *(uint32 *) (x + 56) - movl 56(%edx),%ebp - # j12 = in12 - movl %eax,212(%esp) - # in15 = *(uint32 *) (x + 60) - movl 60(%edx),%eax - # j13 = in13 - movl %ecx,216(%esp) - # j14 = in14 - movl %ebp,220(%esp) - # j15 = in15 - movl %eax,224(%esp) - # x_backup = x - movl %edx,64(%esp) -._bytesatleast1: - # bytes - 64 - cmp $64,%ebx - # goto nocopy if unsigned>= - jae ._nocopy - # ctarget = out - movl %edi,228(%esp) - # out = &tmp - leal 0(%esp),%edi - # i = bytes - mov %ebx,%ecx - # while (i) { *out++ = *m++; --i } - rep movsb - # out = &tmp - leal 0(%esp),%edi - # m = &tmp - leal 0(%esp),%esi -._nocopy: - # out_backup = out - movl %edi,72(%esp) - # m_backup = m - movl %esi,68(%esp) - # bytes_backup = bytes - movl %ebx,76(%esp) - # in0 = j0 - movl 164(%esp),%eax - # in1 = j1 - movl 168(%esp),%ecx - # in2 = j2 - movl 172(%esp),%edx - # in3 = j3 - movl 176(%esp),%ebx - # x0 = in0 - movl %eax,100(%esp) - # x1 = in1 - movl %ecx,104(%esp) - # x2 = in2 - movl %edx,108(%esp) - # x3 = in3 - movl %ebx,112(%esp) - # in4 = j4 - movl 180(%esp),%eax - # in5 = j5 - movl 184(%esp),%ecx - # in6 = j6 - movl 188(%esp),%edx - # in7 = j7 - movl 192(%esp),%ebx - # x4 = in4 - movl %eax,116(%esp) - # x5 = in5 - movl %ecx,120(%esp) - # x6 = in6 - movl %edx,124(%esp) - # x7 = in7 - movl %ebx,128(%esp) - # in8 = j8 - movl 196(%esp),%eax - # in9 = j9 - movl 200(%esp),%ecx - # in10 = j10 - movl 204(%esp),%edx - # in11 = j11 - movl 208(%esp),%ebx - # x8 = in8 - movl %eax,132(%esp) - # x9 = in9 - movl %ecx,136(%esp) - # x10 = in10 - movl %edx,140(%esp) - # x11 = in11 - movl %ebx,144(%esp) - # in12 = j12 - movl 212(%esp),%eax - # in13 = j13 - movl 216(%esp),%ecx - # in14 = j14 - movl 220(%esp),%edx - # in15 = j15 - movl 224(%esp),%ebx - # x12 = in12 - movl %eax,148(%esp) - # x13 = in13 - movl %ecx,152(%esp) - # x14 = in14 - movl %edx,156(%esp) - # x15 = in15 - movl %ebx,160(%esp) - # i = 20 - mov $20,%ebp - # p = x0 - movl 100(%esp),%eax - # s = x5 - movl 120(%esp),%ecx - # t = x10 - movl 140(%esp),%edx - # w = x15 - movl 160(%esp),%ebx -._mainloop: - # x0 = p - movl %eax,100(%esp) - # x10 = t - movl %edx,140(%esp) - # p += x12 - addl 148(%esp),%eax - # x5 = s - movl %ecx,120(%esp) - # t += x6 - addl 124(%esp),%edx - # x15 = w - movl %ebx,160(%esp) - # r = x1 - movl 104(%esp),%esi - # r += s - add %ecx,%esi - # v = x11 - movl 144(%esp),%edi - # v += w - add %ebx,%edi - # p <<<= 7 - rol $7,%eax - # p ^= x4 - xorl 116(%esp),%eax - # t <<<= 7 - rol $7,%edx - # t ^= x14 - xorl 156(%esp),%edx - # r <<<= 7 - rol $7,%esi - # r ^= x9 - xorl 136(%esp),%esi - # v <<<= 7 - rol $7,%edi - # v ^= x3 - xorl 112(%esp),%edi - # x4 = p - movl %eax,116(%esp) - # x14 = t - movl %edx,156(%esp) - # p += x0 - addl 100(%esp),%eax - # x9 = r - movl %esi,136(%esp) - # t += x10 - addl 140(%esp),%edx - # x3 = v - movl %edi,112(%esp) - # p <<<= 9 - rol $9,%eax - # p ^= x8 - xorl 132(%esp),%eax - # t <<<= 9 - rol $9,%edx - # t ^= x2 - xorl 108(%esp),%edx - # s += r - add %esi,%ecx - # s <<<= 9 - rol $9,%ecx - # s ^= x13 - xorl 152(%esp),%ecx - # w += v - add %edi,%ebx - # w <<<= 9 - rol $9,%ebx - # w ^= x7 - xorl 128(%esp),%ebx - # x8 = p - movl %eax,132(%esp) - # x2 = t - movl %edx,108(%esp) - # p += x4 - addl 116(%esp),%eax - # x13 = s - movl %ecx,152(%esp) - # t += x14 - addl 156(%esp),%edx - # x7 = w - movl %ebx,128(%esp) - # p <<<= 13 - rol $13,%eax - # p ^= x12 - xorl 148(%esp),%eax - # t <<<= 13 - rol $13,%edx - # t ^= x6 - xorl 124(%esp),%edx - # r += s - add %ecx,%esi - # r <<<= 13 - rol $13,%esi - # r ^= x1 - xorl 104(%esp),%esi - # v += w - add %ebx,%edi - # v <<<= 13 - rol $13,%edi - # v ^= x11 - xorl 144(%esp),%edi - # x12 = p - movl %eax,148(%esp) - # x6 = t - movl %edx,124(%esp) - # p += x8 - addl 132(%esp),%eax - # x1 = r - movl %esi,104(%esp) - # t += x2 - addl 108(%esp),%edx - # x11 = v - movl %edi,144(%esp) - # p <<<= 18 - rol $18,%eax - # p ^= x0 - xorl 100(%esp),%eax - # t <<<= 18 - rol $18,%edx - # t ^= x10 - xorl 140(%esp),%edx - # s += r - add %esi,%ecx - # s <<<= 18 - rol $18,%ecx - # s ^= x5 - xorl 120(%esp),%ecx - # w += v - add %edi,%ebx - # w <<<= 18 - rol $18,%ebx - # w ^= x15 - xorl 160(%esp),%ebx - # x0 = p - movl %eax,100(%esp) - # x10 = t - movl %edx,140(%esp) - # p += x3 - addl 112(%esp),%eax - # p <<<= 7 - rol $7,%eax - # x5 = s - movl %ecx,120(%esp) - # t += x9 - addl 136(%esp),%edx - # x15 = w - movl %ebx,160(%esp) - # r = x4 - movl 116(%esp),%esi - # r += s - add %ecx,%esi - # v = x14 - movl 156(%esp),%edi - # v += w - add %ebx,%edi - # p ^= x1 - xorl 104(%esp),%eax - # t <<<= 7 - rol $7,%edx - # t ^= x11 - xorl 144(%esp),%edx - # r <<<= 7 - rol $7,%esi - # r ^= x6 - xorl 124(%esp),%esi - # v <<<= 7 - rol $7,%edi - # v ^= x12 - xorl 148(%esp),%edi - # x1 = p - movl %eax,104(%esp) - # x11 = t - movl %edx,144(%esp) - # p += x0 - addl 100(%esp),%eax - # x6 = r - movl %esi,124(%esp) - # t += x10 - addl 140(%esp),%edx - # x12 = v - movl %edi,148(%esp) - # p <<<= 9 - rol $9,%eax - # p ^= x2 - xorl 108(%esp),%eax - # t <<<= 9 - rol $9,%edx - # t ^= x8 - xorl 132(%esp),%edx - # s += r - add %esi,%ecx - # s <<<= 9 - rol $9,%ecx - # s ^= x7 - xorl 128(%esp),%ecx - # w += v - add %edi,%ebx - # w <<<= 9 - rol $9,%ebx - # w ^= x13 - xorl 152(%esp),%ebx - # x2 = p - movl %eax,108(%esp) - # x8 = t - movl %edx,132(%esp) - # p += x1 - addl 104(%esp),%eax - # x7 = s - movl %ecx,128(%esp) - # t += x11 - addl 144(%esp),%edx - # x13 = w - movl %ebx,152(%esp) - # p <<<= 13 - rol $13,%eax - # p ^= x3 - xorl 112(%esp),%eax - # t <<<= 13 - rol $13,%edx - # t ^= x9 - xorl 136(%esp),%edx - # r += s - add %ecx,%esi - # r <<<= 13 - rol $13,%esi - # r ^= x4 - xorl 116(%esp),%esi - # v += w - add %ebx,%edi - # v <<<= 13 - rol $13,%edi - # v ^= x14 - xorl 156(%esp),%edi - # x3 = p - movl %eax,112(%esp) - # x9 = t - movl %edx,136(%esp) - # p += x2 - addl 108(%esp),%eax - # x4 = r - movl %esi,116(%esp) - # t += x8 - addl 132(%esp),%edx - # x14 = v - movl %edi,156(%esp) - # p <<<= 18 - rol $18,%eax - # p ^= x0 - xorl 100(%esp),%eax - # t <<<= 18 - rol $18,%edx - # t ^= x10 - xorl 140(%esp),%edx - # s += r - add %esi,%ecx - # s <<<= 18 - rol $18,%ecx - # s ^= x5 - xorl 120(%esp),%ecx - # w += v - add %edi,%ebx - # w <<<= 18 - rol $18,%ebx - # w ^= x15 - xorl 160(%esp),%ebx - # x0 = p - movl %eax,100(%esp) - # x10 = t - movl %edx,140(%esp) - # p += x12 - addl 148(%esp),%eax - # x5 = s - movl %ecx,120(%esp) - # t += x6 - addl 124(%esp),%edx - # x15 = w - movl %ebx,160(%esp) - # r = x1 - movl 104(%esp),%esi - # r += s - add %ecx,%esi - # v = x11 - movl 144(%esp),%edi - # v += w - add %ebx,%edi - # p <<<= 7 - rol $7,%eax - # p ^= x4 - xorl 116(%esp),%eax - # t <<<= 7 - rol $7,%edx - # t ^= x14 - xorl 156(%esp),%edx - # r <<<= 7 - rol $7,%esi - # r ^= x9 - xorl 136(%esp),%esi - # v <<<= 7 - rol $7,%edi - # v ^= x3 - xorl 112(%esp),%edi - # x4 = p - movl %eax,116(%esp) - # x14 = t - movl %edx,156(%esp) - # p += x0 - addl 100(%esp),%eax - # x9 = r - movl %esi,136(%esp) - # t += x10 - addl 140(%esp),%edx - # x3 = v - movl %edi,112(%esp) - # p <<<= 9 - rol $9,%eax - # p ^= x8 - xorl 132(%esp),%eax - # t <<<= 9 - rol $9,%edx - # t ^= x2 - xorl 108(%esp),%edx - # s += r - add %esi,%ecx - # s <<<= 9 - rol $9,%ecx - # s ^= x13 - xorl 152(%esp),%ecx - # w += v - add %edi,%ebx - # w <<<= 9 - rol $9,%ebx - # w ^= x7 - xorl 128(%esp),%ebx - # x8 = p - movl %eax,132(%esp) - # x2 = t - movl %edx,108(%esp) - # p += x4 - addl 116(%esp),%eax - # x13 = s - movl %ecx,152(%esp) - # t += x14 - addl 156(%esp),%edx - # x7 = w - movl %ebx,128(%esp) - # p <<<= 13 - rol $13,%eax - # p ^= x12 - xorl 148(%esp),%eax - # t <<<= 13 - rol $13,%edx - # t ^= x6 - xorl 124(%esp),%edx - # r += s - add %ecx,%esi - # r <<<= 13 - rol $13,%esi - # r ^= x1 - xorl 104(%esp),%esi - # v += w - add %ebx,%edi - # v <<<= 13 - rol $13,%edi - # v ^= x11 - xorl 144(%esp),%edi - # x12 = p - movl %eax,148(%esp) - # x6 = t - movl %edx,124(%esp) - # p += x8 - addl 132(%esp),%eax - # x1 = r - movl %esi,104(%esp) - # t += x2 - addl 108(%esp),%edx - # x11 = v - movl %edi,144(%esp) - # p <<<= 18 - rol $18,%eax - # p ^= x0 - xorl 100(%esp),%eax - # t <<<= 18 - rol $18,%edx - # t ^= x10 - xorl 140(%esp),%edx - # s += r - add %esi,%ecx - # s <<<= 18 - rol $18,%ecx - # s ^= x5 - xorl 120(%esp),%ecx - # w += v - add %edi,%ebx - # w <<<= 18 - rol $18,%ebx - # w ^= x15 - xorl 160(%esp),%ebx - # x0 = p - movl %eax,100(%esp) - # x10 = t - movl %edx,140(%esp) - # p += x3 - addl 112(%esp),%eax - # p <<<= 7 - rol $7,%eax - # x5 = s - movl %ecx,120(%esp) - # t += x9 - addl 136(%esp),%edx - # x15 = w - movl %ebx,160(%esp) - # r = x4 - movl 116(%esp),%esi - # r += s - add %ecx,%esi - # v = x14 - movl 156(%esp),%edi - # v += w - add %ebx,%edi - # p ^= x1 - xorl 104(%esp),%eax - # t <<<= 7 - rol $7,%edx - # t ^= x11 - xorl 144(%esp),%edx - # r <<<= 7 - rol $7,%esi - # r ^= x6 - xorl 124(%esp),%esi - # v <<<= 7 - rol $7,%edi - # v ^= x12 - xorl 148(%esp),%edi - # x1 = p - movl %eax,104(%esp) - # x11 = t - movl %edx,144(%esp) - # p += x0 - addl 100(%esp),%eax - # x6 = r - movl %esi,124(%esp) - # t += x10 - addl 140(%esp),%edx - # x12 = v - movl %edi,148(%esp) - # p <<<= 9 - rol $9,%eax - # p ^= x2 - xorl 108(%esp),%eax - # t <<<= 9 - rol $9,%edx - # t ^= x8 - xorl 132(%esp),%edx - # s += r - add %esi,%ecx - # s <<<= 9 - rol $9,%ecx - # s ^= x7 - xorl 128(%esp),%ecx - # w += v - add %edi,%ebx - # w <<<= 9 - rol $9,%ebx - # w ^= x13 - xorl 152(%esp),%ebx - # x2 = p - movl %eax,108(%esp) - # x8 = t - movl %edx,132(%esp) - # p += x1 - addl 104(%esp),%eax - # x7 = s - movl %ecx,128(%esp) - # t += x11 - addl 144(%esp),%edx - # x13 = w - movl %ebx,152(%esp) - # p <<<= 13 - rol $13,%eax - # p ^= x3 - xorl 112(%esp),%eax - # t <<<= 13 - rol $13,%edx - # t ^= x9 - xorl 136(%esp),%edx - # r += s - add %ecx,%esi - # r <<<= 13 - rol $13,%esi - # r ^= x4 - xorl 116(%esp),%esi - # v += w - add %ebx,%edi - # v <<<= 13 - rol $13,%edi - # v ^= x14 - xorl 156(%esp),%edi - # x3 = p - movl %eax,112(%esp) - # x9 = t - movl %edx,136(%esp) - # p += x2 - addl 108(%esp),%eax - # x4 = r - movl %esi,116(%esp) - # t += x8 - addl 132(%esp),%edx - # x14 = v - movl %edi,156(%esp) - # p <<<= 18 - rol $18,%eax - # p ^= x0 - xorl 100(%esp),%eax - # t <<<= 18 - rol $18,%edx - # t ^= x10 - xorl 140(%esp),%edx - # s += r - add %esi,%ecx - # s <<<= 18 - rol $18,%ecx - # s ^= x5 - xorl 120(%esp),%ecx - # w += v - add %edi,%ebx - # w <<<= 18 - rol $18,%ebx - # w ^= x15 - xorl 160(%esp),%ebx - # i -= 4 - sub $4,%ebp - # goto mainloop if unsigned > - ja ._mainloop - # x0 = p - movl %eax,100(%esp) - # x5 = s - movl %ecx,120(%esp) - # x10 = t - movl %edx,140(%esp) - # x15 = w - movl %ebx,160(%esp) - # out = out_backup - movl 72(%esp),%edi - # m = m_backup - movl 68(%esp),%esi - # in0 = x0 - movl 100(%esp),%eax - # in1 = x1 - movl 104(%esp),%ecx - # in0 += j0 - addl 164(%esp),%eax - # in1 += j1 - addl 168(%esp),%ecx - # in0 ^= *(uint32 *) (m + 0) - xorl 0(%esi),%eax - # in1 ^= *(uint32 *) (m + 4) - xorl 4(%esi),%ecx - # *(uint32 *) (out + 0) = in0 - movl %eax,0(%edi) - # *(uint32 *) (out + 4) = in1 - movl %ecx,4(%edi) - # in2 = x2 - movl 108(%esp),%eax - # in3 = x3 - movl 112(%esp),%ecx - # in2 += j2 - addl 172(%esp),%eax - # in3 += j3 - addl 176(%esp),%ecx - # in2 ^= *(uint32 *) (m + 8) - xorl 8(%esi),%eax - # in3 ^= *(uint32 *) (m + 12) - xorl 12(%esi),%ecx - # *(uint32 *) (out + 8) = in2 - movl %eax,8(%edi) - # *(uint32 *) (out + 12) = in3 - movl %ecx,12(%edi) - # in4 = x4 - movl 116(%esp),%eax - # in5 = x5 - movl 120(%esp),%ecx - # in4 += j4 - addl 180(%esp),%eax - # in5 += j5 - addl 184(%esp),%ecx - # in4 ^= *(uint32 *) (m + 16) - xorl 16(%esi),%eax - # in5 ^= *(uint32 *) (m + 20) - xorl 20(%esi),%ecx - # *(uint32 *) (out + 16) = in4 - movl %eax,16(%edi) - # *(uint32 *) (out + 20) = in5 - movl %ecx,20(%edi) - # in6 = x6 - movl 124(%esp),%eax - # in7 = x7 - movl 128(%esp),%ecx - # in6 += j6 - addl 188(%esp),%eax - # in7 += j7 - addl 192(%esp),%ecx - # in6 ^= *(uint32 *) (m + 24) - xorl 24(%esi),%eax - # in7 ^= *(uint32 *) (m + 28) - xorl 28(%esi),%ecx - # *(uint32 *) (out + 24) = in6 - movl %eax,24(%edi) - # *(uint32 *) (out + 28) = in7 - movl %ecx,28(%edi) - # in8 = x8 - movl 132(%esp),%eax - # in9 = x9 - movl 136(%esp),%ecx - # in8 += j8 - addl 196(%esp),%eax - # in9 += j9 - addl 200(%esp),%ecx - # in8 ^= *(uint32 *) (m + 32) - xorl 32(%esi),%eax - # in9 ^= *(uint32 *) (m + 36) - xorl 36(%esi),%ecx - # *(uint32 *) (out + 32) = in8 - movl %eax,32(%edi) - # *(uint32 *) (out + 36) = in9 - movl %ecx,36(%edi) - # in10 = x10 - movl 140(%esp),%eax - # in11 = x11 - movl 144(%esp),%ecx - # in10 += j10 - addl 204(%esp),%eax - # in11 += j11 - addl 208(%esp),%ecx - # in10 ^= *(uint32 *) (m + 40) - xorl 40(%esi),%eax - # in11 ^= *(uint32 *) (m + 44) - xorl 44(%esi),%ecx - # *(uint32 *) (out + 40) = in10 - movl %eax,40(%edi) - # *(uint32 *) (out + 44) = in11 - movl %ecx,44(%edi) - # in12 = x12 - movl 148(%esp),%eax - # in13 = x13 - movl 152(%esp),%ecx - # in12 += j12 - addl 212(%esp),%eax - # in13 += j13 - addl 216(%esp),%ecx - # in12 ^= *(uint32 *) (m + 48) - xorl 48(%esi),%eax - # in13 ^= *(uint32 *) (m + 52) - xorl 52(%esi),%ecx - # *(uint32 *) (out + 48) = in12 - movl %eax,48(%edi) - # *(uint32 *) (out + 52) = in13 - movl %ecx,52(%edi) - # in14 = x14 - movl 156(%esp),%eax - # in15 = x15 - movl 160(%esp),%ecx - # in14 += j14 - addl 220(%esp),%eax - # in15 += j15 - addl 224(%esp),%ecx - # in14 ^= *(uint32 *) (m + 56) - xorl 56(%esi),%eax - # in15 ^= *(uint32 *) (m + 60) - xorl 60(%esi),%ecx - # *(uint32 *) (out + 56) = in14 - movl %eax,56(%edi) - # *(uint32 *) (out + 60) = in15 - movl %ecx,60(%edi) - # bytes = bytes_backup - movl 76(%esp),%ebx - # in8 = j8 - movl 196(%esp),%eax - # in9 = j9 - movl 200(%esp),%ecx - # in8 += 1 - add $1,%eax - # in9 += 0 + carry - adc $0,%ecx - # j8 = in8 - movl %eax,196(%esp) - # j9 = in9 - movl %ecx,200(%esp) - # bytes - 64 - cmp $64,%ebx - # goto bytesatleast65 if unsigned> - ja ._bytesatleast65 - # goto bytesatleast64 if unsigned>= - jae ._bytesatleast64 - # m = out - mov %edi,%esi - # out = ctarget - movl 228(%esp),%edi - # i = bytes - mov %ebx,%ecx - # while (i) { *out++ = *m++; --i } - rep movsb -._bytesatleast64: - # x = x_backup - movl 64(%esp),%eax - # in8 = j8 - movl 196(%esp),%ecx - # in9 = j9 - movl 200(%esp),%edx - # *(uint32 *) (x + 32) = in8 - movl %ecx,32(%eax) - # *(uint32 *) (x + 36) = in9 - movl %edx,36(%eax) -._done: - # eax = eax_stack - movl 80(%esp),%eax - # ebx = ebx_stack - movl 84(%esp),%ebx - # esi = esi_stack - movl 88(%esp),%esi - # edi = edi_stack - movl 92(%esp),%edi - # ebp = ebp_stack - movl 96(%esp),%ebp - # leave - add %eax,%esp - ret -._bytesatleast65: - # bytes -= 64 - sub $64,%ebx - # out += 64 - add $64,%edi - # m += 64 - add $64,%esi - # goto bytesatleast1 - jmp ._bytesatleast1 -# enter ECRYPT_keysetup -.text -.p2align 5 -.globl ECRYPT_keysetup -ECRYPT_keysetup: - mov %esp,%eax - and $31,%eax - add $256,%eax - sub %eax,%esp - # eax_stack = eax - movl %eax,64(%esp) - # ebx_stack = ebx - movl %ebx,68(%esp) - # esi_stack = esi - movl %esi,72(%esp) - # edi_stack = edi - movl %edi,76(%esp) - # ebp_stack = ebp - movl %ebp,80(%esp) - # k = arg2 - movl 8(%esp,%eax),%ecx - # kbits = arg3 - movl 12(%esp,%eax),%edx - # x = arg1 - movl 4(%esp,%eax),%eax - # in1 = *(uint32 *) (k + 0) - movl 0(%ecx),%ebx - # in2 = *(uint32 *) (k + 4) - movl 4(%ecx),%esi - # in3 = *(uint32 *) (k + 8) - movl 8(%ecx),%edi - # in4 = *(uint32 *) (k + 12) - movl 12(%ecx),%ebp - # *(uint32 *) (x + 4) = in1 - movl %ebx,4(%eax) - # *(uint32 *) (x + 8) = in2 - movl %esi,8(%eax) - # *(uint32 *) (x + 12) = in3 - movl %edi,12(%eax) - # *(uint32 *) (x + 16) = in4 - movl %ebp,16(%eax) - # kbits - 256 - cmp $256,%edx - # goto kbits128 if unsigned< - jb ._kbits128 -._kbits256: - # in11 = *(uint32 *) (k + 16) - movl 16(%ecx),%edx - # in12 = *(uint32 *) (k + 20) - movl 20(%ecx),%ebx - # in13 = *(uint32 *) (k + 24) - movl 24(%ecx),%esi - # in14 = *(uint32 *) (k + 28) - movl 28(%ecx),%ecx - # *(uint32 *) (x + 44) = in11 - movl %edx,44(%eax) - # *(uint32 *) (x + 48) = in12 - movl %ebx,48(%eax) - # *(uint32 *) (x + 52) = in13 - movl %esi,52(%eax) - # *(uint32 *) (x + 56) = in14 - movl %ecx,56(%eax) - # in0 = 1634760805 - mov $1634760805,%ecx - # in5 = 857760878 - mov $857760878,%edx - # in10 = 2036477234 - mov $2036477234,%ebx - # in15 = 1797285236 - mov $1797285236,%esi - # *(uint32 *) (x + 0) = in0 - movl %ecx,0(%eax) - # *(uint32 *) (x + 20) = in5 - movl %edx,20(%eax) - # *(uint32 *) (x + 40) = in10 - movl %ebx,40(%eax) - # *(uint32 *) (x + 60) = in15 - movl %esi,60(%eax) - # goto keysetupdone - jmp ._keysetupdone -._kbits128: - # in11 = *(uint32 *) (k + 0) - movl 0(%ecx),%edx - # in12 = *(uint32 *) (k + 4) - movl 4(%ecx),%ebx - # in13 = *(uint32 *) (k + 8) - movl 8(%ecx),%esi - # in14 = *(uint32 *) (k + 12) - movl 12(%ecx),%ecx - # *(uint32 *) (x + 44) = in11 - movl %edx,44(%eax) - # *(uint32 *) (x + 48) = in12 - movl %ebx,48(%eax) - # *(uint32 *) (x + 52) = in13 - movl %esi,52(%eax) - # *(uint32 *) (x + 56) = in14 - movl %ecx,56(%eax) - # in0 = 1634760805 - mov $1634760805,%ecx - # in5 = 824206446 - mov $824206446,%edx - # in10 = 2036477238 - mov $2036477238,%ebx - # in15 = 1797285236 - mov $1797285236,%esi - # *(uint32 *) (x + 0) = in0 - movl %ecx,0(%eax) - # *(uint32 *) (x + 20) = in5 - movl %edx,20(%eax) - # *(uint32 *) (x + 40) = in10 - movl %ebx,40(%eax) - # *(uint32 *) (x + 60) = in15 - movl %esi,60(%eax) -._keysetupdone: - # eax = eax_stack - movl 64(%esp),%eax - # ebx = ebx_stack - movl 68(%esp),%ebx - # esi = esi_stack - movl 72(%esp),%esi - # edi = edi_stack - movl 76(%esp),%edi - # ebp = ebp_stack - movl 80(%esp),%ebp - # leave - add %eax,%esp - ret -# enter ECRYPT_ivsetup -.text -.p2align 5 -.globl ECRYPT_ivsetup -ECRYPT_ivsetup: - mov %esp,%eax - and $31,%eax - add $256,%eax - sub %eax,%esp - # eax_stack = eax - movl %eax,64(%esp) - # ebx_stack = ebx - movl %ebx,68(%esp) - # esi_stack = esi - movl %esi,72(%esp) - # edi_stack = edi - movl %edi,76(%esp) - # ebp_stack = ebp - movl %ebp,80(%esp) - # iv = arg2 - movl 8(%esp,%eax),%ecx - # x = arg1 - movl 4(%esp,%eax),%eax - # in6 = *(uint32 *) (iv + 0) - movl 0(%ecx),%edx - # in7 = *(uint32 *) (iv + 4) - movl 4(%ecx),%ecx - # in8 = 0 - mov $0,%ebx - # in9 = 0 - mov $0,%esi - # *(uint32 *) (x + 24) = in6 - movl %edx,24(%eax) - # *(uint32 *) (x + 28) = in7 - movl %ecx,28(%eax) - # *(uint32 *) (x + 32) = in8 - movl %ebx,32(%eax) - # *(uint32 *) (x + 36) = in9 - movl %esi,36(%eax) - # eax = eax_stack - movl 64(%esp),%eax - # ebx = ebx_stack - movl 68(%esp),%ebx - # esi = esi_stack - movl 72(%esp),%esi - # edi = edi_stack - movl 76(%esp),%edi - # ebp = ebp_stack - movl 80(%esp),%ebp - # leave - add %eax,%esp - ret diff --git a/ANDROID_3.4.5/arch/x86/crypto/salsa20-x86_64-asm_64.S b/ANDROID_3.4.5/arch/x86/crypto/salsa20-x86_64-asm_64.S deleted file mode 100644 index 6214a9b0..00000000 --- a/ANDROID_3.4.5/arch/x86/crypto/salsa20-x86_64-asm_64.S +++ /dev/null @@ -1,920 +0,0 @@ -# enter ECRYPT_encrypt_bytes -.text -.p2align 5 -.globl ECRYPT_encrypt_bytes -ECRYPT_encrypt_bytes: - mov %rsp,%r11 - and $31,%r11 - add $256,%r11 - sub %r11,%rsp - # x = arg1 - mov %rdi,%r8 - # m = arg2 - mov %rsi,%rsi - # out = arg3 - mov %rdx,%rdi - # bytes = arg4 - mov %rcx,%rdx - # unsigned>? bytes - 0 - cmp $0,%rdx - # comment:fp stack unchanged by jump - # goto done if !unsigned> - jbe ._done - # comment:fp stack unchanged by fallthrough -# start: -._start: - # r11_stack = r11 - movq %r11,0(%rsp) - # r12_stack = r12 - movq %r12,8(%rsp) - # r13_stack = r13 - movq %r13,16(%rsp) - # r14_stack = r14 - movq %r14,24(%rsp) - # r15_stack = r15 - movq %r15,32(%rsp) - # rbx_stack = rbx - movq %rbx,40(%rsp) - # rbp_stack = rbp - movq %rbp,48(%rsp) - # in0 = *(uint64 *) (x + 0) - movq 0(%r8),%rcx - # in2 = *(uint64 *) (x + 8) - movq 8(%r8),%r9 - # in4 = *(uint64 *) (x + 16) - movq 16(%r8),%rax - # in6 = *(uint64 *) (x + 24) - movq 24(%r8),%r10 - # in8 = *(uint64 *) (x + 32) - movq 32(%r8),%r11 - # in10 = *(uint64 *) (x + 40) - movq 40(%r8),%r12 - # in12 = *(uint64 *) (x + 48) - movq 48(%r8),%r13 - # in14 = *(uint64 *) (x + 56) - movq 56(%r8),%r14 - # j0 = in0 - movq %rcx,56(%rsp) - # j2 = in2 - movq %r9,64(%rsp) - # j4 = in4 - movq %rax,72(%rsp) - # j6 = in6 - movq %r10,80(%rsp) - # j8 = in8 - movq %r11,88(%rsp) - # j10 = in10 - movq %r12,96(%rsp) - # j12 = in12 - movq %r13,104(%rsp) - # j14 = in14 - movq %r14,112(%rsp) - # x_backup = x - movq %r8,120(%rsp) -# bytesatleast1: -._bytesatleast1: - # unsigned<? bytes - 64 - cmp $64,%rdx - # comment:fp stack unchanged by jump - # goto nocopy if !unsigned< - jae ._nocopy - # ctarget = out - movq %rdi,128(%rsp) - # out = &tmp - leaq 192(%rsp),%rdi - # i = bytes - mov %rdx,%rcx - # while (i) { *out++ = *m++; --i } - rep movsb - # out = &tmp - leaq 192(%rsp),%rdi - # m = &tmp - leaq 192(%rsp),%rsi - # comment:fp stack unchanged by fallthrough -# nocopy: -._nocopy: - # out_backup = out - movq %rdi,136(%rsp) - # m_backup = m - movq %rsi,144(%rsp) - # bytes_backup = bytes - movq %rdx,152(%rsp) - # x1 = j0 - movq 56(%rsp),%rdi - # x0 = x1 - mov %rdi,%rdx - # (uint64) x1 >>= 32 - shr $32,%rdi - # x3 = j2 - movq 64(%rsp),%rsi - # x2 = x3 - mov %rsi,%rcx - # (uint64) x3 >>= 32 - shr $32,%rsi - # x5 = j4 - movq 72(%rsp),%r8 - # x4 = x5 - mov %r8,%r9 - # (uint64) x5 >>= 32 - shr $32,%r8 - # x5_stack = x5 - movq %r8,160(%rsp) - # x7 = j6 - movq 80(%rsp),%r8 - # x6 = x7 - mov %r8,%rax - # (uint64) x7 >>= 32 - shr $32,%r8 - # x9 = j8 - movq 88(%rsp),%r10 - # x8 = x9 - mov %r10,%r11 - # (uint64) x9 >>= 32 - shr $32,%r10 - # x11 = j10 - movq 96(%rsp),%r12 - # x10 = x11 - mov %r12,%r13 - # x10_stack = x10 - movq %r13,168(%rsp) - # (uint64) x11 >>= 32 - shr $32,%r12 - # x13 = j12 - movq 104(%rsp),%r13 - # x12 = x13 - mov %r13,%r14 - # (uint64) x13 >>= 32 - shr $32,%r13 - # x15 = j14 - movq 112(%rsp),%r15 - # x14 = x15 - mov %r15,%rbx - # (uint64) x15 >>= 32 - shr $32,%r15 - # x15_stack = x15 - movq %r15,176(%rsp) - # i = 20 - mov $20,%r15 -# mainloop: -._mainloop: - # i_backup = i - movq %r15,184(%rsp) - # x5 = x5_stack - movq 160(%rsp),%r15 - # a = x12 + x0 - lea (%r14,%rdx),%rbp - # (uint32) a <<<= 7 - rol $7,%ebp - # x4 ^= a - xor %rbp,%r9 - # b = x1 + x5 - lea (%rdi,%r15),%rbp - # (uint32) b <<<= 7 - rol $7,%ebp - # x9 ^= b - xor %rbp,%r10 - # a = x0 + x4 - lea (%rdx,%r9),%rbp - # (uint32) a <<<= 9 - rol $9,%ebp - # x8 ^= a - xor %rbp,%r11 - # b = x5 + x9 - lea (%r15,%r10),%rbp - # (uint32) b <<<= 9 - rol $9,%ebp - # x13 ^= b - xor %rbp,%r13 - # a = x4 + x8 - lea (%r9,%r11),%rbp - # (uint32) a <<<= 13 - rol $13,%ebp - # x12 ^= a - xor %rbp,%r14 - # b = x9 + x13 - lea (%r10,%r13),%rbp - # (uint32) b <<<= 13 - rol $13,%ebp - # x1 ^= b - xor %rbp,%rdi - # a = x8 + x12 - lea (%r11,%r14),%rbp - # (uint32) a <<<= 18 - rol $18,%ebp - # x0 ^= a - xor %rbp,%rdx - # b = x13 + x1 - lea (%r13,%rdi),%rbp - # (uint32) b <<<= 18 - rol $18,%ebp - # x5 ^= b - xor %rbp,%r15 - # x10 = x10_stack - movq 168(%rsp),%rbp - # x5_stack = x5 - movq %r15,160(%rsp) - # c = x6 + x10 - lea (%rax,%rbp),%r15 - # (uint32) c <<<= 7 - rol $7,%r15d - # x14 ^= c - xor %r15,%rbx - # c = x10 + x14 - lea (%rbp,%rbx),%r15 - # (uint32) c <<<= 9 - rol $9,%r15d - # x2 ^= c - xor %r15,%rcx - # c = x14 + x2 - lea (%rbx,%rcx),%r15 - # (uint32) c <<<= 13 - rol $13,%r15d - # x6 ^= c - xor %r15,%rax - # c = x2 + x6 - lea (%rcx,%rax),%r15 - # (uint32) c <<<= 18 - rol $18,%r15d - # x10 ^= c - xor %r15,%rbp - # x15 = x15_stack - movq 176(%rsp),%r15 - # x10_stack = x10 - movq %rbp,168(%rsp) - # d = x11 + x15 - lea (%r12,%r15),%rbp - # (uint32) d <<<= 7 - rol $7,%ebp - # x3 ^= d - xor %rbp,%rsi - # d = x15 + x3 - lea (%r15,%rsi),%rbp - # (uint32) d <<<= 9 - rol $9,%ebp - # x7 ^= d - xor %rbp,%r8 - # d = x3 + x7 - lea (%rsi,%r8),%rbp - # (uint32) d <<<= 13 - rol $13,%ebp - # x11 ^= d - xor %rbp,%r12 - # d = x7 + x11 - lea (%r8,%r12),%rbp - # (uint32) d <<<= 18 - rol $18,%ebp - # x15 ^= d - xor %rbp,%r15 - # x15_stack = x15 - movq %r15,176(%rsp) - # x5 = x5_stack - movq 160(%rsp),%r15 - # a = x3 + x0 - lea (%rsi,%rdx),%rbp - # (uint32) a <<<= 7 - rol $7,%ebp - # x1 ^= a - xor %rbp,%rdi - # b = x4 + x5 - lea (%r9,%r15),%rbp - # (uint32) b <<<= 7 - rol $7,%ebp - # x6 ^= b - xor %rbp,%rax - # a = x0 + x1 - lea (%rdx,%rdi),%rbp - # (uint32) a <<<= 9 - rol $9,%ebp - # x2 ^= a - xor %rbp,%rcx - # b = x5 + x6 - lea (%r15,%rax),%rbp - # (uint32) b <<<= 9 - rol $9,%ebp - # x7 ^= b - xor %rbp,%r8 - # a = x1 + x2 - lea (%rdi,%rcx),%rbp - # (uint32) a <<<= 13 - rol $13,%ebp - # x3 ^= a - xor %rbp,%rsi - # b = x6 + x7 - lea (%rax,%r8),%rbp - # (uint32) b <<<= 13 - rol $13,%ebp - # x4 ^= b - xor %rbp,%r9 - # a = x2 + x3 - lea (%rcx,%rsi),%rbp - # (uint32) a <<<= 18 - rol $18,%ebp - # x0 ^= a - xor %rbp,%rdx - # b = x7 + x4 - lea (%r8,%r9),%rbp - # (uint32) b <<<= 18 - rol $18,%ebp - # x5 ^= b - xor %rbp,%r15 - # x10 = x10_stack - movq 168(%rsp),%rbp - # x5_stack = x5 - movq %r15,160(%rsp) - # c = x9 + x10 - lea (%r10,%rbp),%r15 - # (uint32) c <<<= 7 - rol $7,%r15d - # x11 ^= c - xor %r15,%r12 - # c = x10 + x11 - lea (%rbp,%r12),%r15 - # (uint32) c <<<= 9 - rol $9,%r15d - # x8 ^= c - xor %r15,%r11 - # c = x11 + x8 - lea (%r12,%r11),%r15 - # (uint32) c <<<= 13 - rol $13,%r15d - # x9 ^= c - xor %r15,%r10 - # c = x8 + x9 - lea (%r11,%r10),%r15 - # (uint32) c <<<= 18 - rol $18,%r15d - # x10 ^= c - xor %r15,%rbp - # x15 = x15_stack - movq 176(%rsp),%r15 - # x10_stack = x10 - movq %rbp,168(%rsp) - # d = x14 + x15 - lea (%rbx,%r15),%rbp - # (uint32) d <<<= 7 - rol $7,%ebp - # x12 ^= d - xor %rbp,%r14 - # d = x15 + x12 - lea (%r15,%r14),%rbp - # (uint32) d <<<= 9 - rol $9,%ebp - # x13 ^= d - xor %rbp,%r13 - # d = x12 + x13 - lea (%r14,%r13),%rbp - # (uint32) d <<<= 13 - rol $13,%ebp - # x14 ^= d - xor %rbp,%rbx - # d = x13 + x14 - lea (%r13,%rbx),%rbp - # (uint32) d <<<= 18 - rol $18,%ebp - # x15 ^= d - xor %rbp,%r15 - # x15_stack = x15 - movq %r15,176(%rsp) - # x5 = x5_stack - movq 160(%rsp),%r15 - # a = x12 + x0 - lea (%r14,%rdx),%rbp - # (uint32) a <<<= 7 - rol $7,%ebp - # x4 ^= a - xor %rbp,%r9 - # b = x1 + x5 - lea (%rdi,%r15),%rbp - # (uint32) b <<<= 7 - rol $7,%ebp - # x9 ^= b - xor %rbp,%r10 - # a = x0 + x4 - lea (%rdx,%r9),%rbp - # (uint32) a <<<= 9 - rol $9,%ebp - # x8 ^= a - xor %rbp,%r11 - # b = x5 + x9 - lea (%r15,%r10),%rbp - # (uint32) b <<<= 9 - rol $9,%ebp - # x13 ^= b - xor %rbp,%r13 - # a = x4 + x8 - lea (%r9,%r11),%rbp - # (uint32) a <<<= 13 - rol $13,%ebp - # x12 ^= a - xor %rbp,%r14 - # b = x9 + x13 - lea (%r10,%r13),%rbp - # (uint32) b <<<= 13 - rol $13,%ebp - # x1 ^= b - xor %rbp,%rdi - # a = x8 + x12 - lea (%r11,%r14),%rbp - # (uint32) a <<<= 18 - rol $18,%ebp - # x0 ^= a - xor %rbp,%rdx - # b = x13 + x1 - lea (%r13,%rdi),%rbp - # (uint32) b <<<= 18 - rol $18,%ebp - # x5 ^= b - xor %rbp,%r15 - # x10 = x10_stack - movq 168(%rsp),%rbp - # x5_stack = x5 - movq %r15,160(%rsp) - # c = x6 + x10 - lea (%rax,%rbp),%r15 - # (uint32) c <<<= 7 - rol $7,%r15d - # x14 ^= c - xor %r15,%rbx - # c = x10 + x14 - lea (%rbp,%rbx),%r15 - # (uint32) c <<<= 9 - rol $9,%r15d - # x2 ^= c - xor %r15,%rcx - # c = x14 + x2 - lea (%rbx,%rcx),%r15 - # (uint32) c <<<= 13 - rol $13,%r15d - # x6 ^= c - xor %r15,%rax - # c = x2 + x6 - lea (%rcx,%rax),%r15 - # (uint32) c <<<= 18 - rol $18,%r15d - # x10 ^= c - xor %r15,%rbp - # x15 = x15_stack - movq 176(%rsp),%r15 - # x10_stack = x10 - movq %rbp,168(%rsp) - # d = x11 + x15 - lea (%r12,%r15),%rbp - # (uint32) d <<<= 7 - rol $7,%ebp - # x3 ^= d - xor %rbp,%rsi - # d = x15 + x3 - lea (%r15,%rsi),%rbp - # (uint32) d <<<= 9 - rol $9,%ebp - # x7 ^= d - xor %rbp,%r8 - # d = x3 + x7 - lea (%rsi,%r8),%rbp - # (uint32) d <<<= 13 - rol $13,%ebp - # x11 ^= d - xor %rbp,%r12 - # d = x7 + x11 - lea (%r8,%r12),%rbp - # (uint32) d <<<= 18 - rol $18,%ebp - # x15 ^= d - xor %rbp,%r15 - # x15_stack = x15 - movq %r15,176(%rsp) - # x5 = x5_stack - movq 160(%rsp),%r15 - # a = x3 + x0 - lea (%rsi,%rdx),%rbp - # (uint32) a <<<= 7 - rol $7,%ebp - # x1 ^= a - xor %rbp,%rdi - # b = x4 + x5 - lea (%r9,%r15),%rbp - # (uint32) b <<<= 7 - rol $7,%ebp - # x6 ^= b - xor %rbp,%rax - # a = x0 + x1 - lea (%rdx,%rdi),%rbp - # (uint32) a <<<= 9 - rol $9,%ebp - # x2 ^= a - xor %rbp,%rcx - # b = x5 + x6 - lea (%r15,%rax),%rbp - # (uint32) b <<<= 9 - rol $9,%ebp - # x7 ^= b - xor %rbp,%r8 - # a = x1 + x2 - lea (%rdi,%rcx),%rbp - # (uint32) a <<<= 13 - rol $13,%ebp - # x3 ^= a - xor %rbp,%rsi - # b = x6 + x7 - lea (%rax,%r8),%rbp - # (uint32) b <<<= 13 - rol $13,%ebp - # x4 ^= b - xor %rbp,%r9 - # a = x2 + x3 - lea (%rcx,%rsi),%rbp - # (uint32) a <<<= 18 - rol $18,%ebp - # x0 ^= a - xor %rbp,%rdx - # b = x7 + x4 - lea (%r8,%r9),%rbp - # (uint32) b <<<= 18 - rol $18,%ebp - # x5 ^= b - xor %rbp,%r15 - # x10 = x10_stack - movq 168(%rsp),%rbp - # x5_stack = x5 - movq %r15,160(%rsp) - # c = x9 + x10 - lea (%r10,%rbp),%r15 - # (uint32) c <<<= 7 - rol $7,%r15d - # x11 ^= c - xor %r15,%r12 - # c = x10 + x11 - lea (%rbp,%r12),%r15 - # (uint32) c <<<= 9 - rol $9,%r15d - # x8 ^= c - xor %r15,%r11 - # c = x11 + x8 - lea (%r12,%r11),%r15 - # (uint32) c <<<= 13 - rol $13,%r15d - # x9 ^= c - xor %r15,%r10 - # c = x8 + x9 - lea (%r11,%r10),%r15 - # (uint32) c <<<= 18 - rol $18,%r15d - # x10 ^= c - xor %r15,%rbp - # x15 = x15_stack - movq 176(%rsp),%r15 - # x10_stack = x10 - movq %rbp,168(%rsp) - # d = x14 + x15 - lea (%rbx,%r15),%rbp - # (uint32) d <<<= 7 - rol $7,%ebp - # x12 ^= d - xor %rbp,%r14 - # d = x15 + x12 - lea (%r15,%r14),%rbp - # (uint32) d <<<= 9 - rol $9,%ebp - # x13 ^= d - xor %rbp,%r13 - # d = x12 + x13 - lea (%r14,%r13),%rbp - # (uint32) d <<<= 13 - rol $13,%ebp - # x14 ^= d - xor %rbp,%rbx - # d = x13 + x14 - lea (%r13,%rbx),%rbp - # (uint32) d <<<= 18 - rol $18,%ebp - # x15 ^= d - xor %rbp,%r15 - # x15_stack = x15 - movq %r15,176(%rsp) - # i = i_backup - movq 184(%rsp),%r15 - # unsigned>? i -= 4 - sub $4,%r15 - # comment:fp stack unchanged by jump - # goto mainloop if unsigned> - ja ._mainloop - # (uint32) x2 += j2 - addl 64(%rsp),%ecx - # x3 <<= 32 - shl $32,%rsi - # x3 += j2 - addq 64(%rsp),%rsi - # (uint64) x3 >>= 32 - shr $32,%rsi - # x3 <<= 32 - shl $32,%rsi - # x2 += x3 - add %rsi,%rcx - # (uint32) x6 += j6 - addl 80(%rsp),%eax - # x7 <<= 32 - shl $32,%r8 - # x7 += j6 - addq 80(%rsp),%r8 - # (uint64) x7 >>= 32 - shr $32,%r8 - # x7 <<= 32 - shl $32,%r8 - # x6 += x7 - add %r8,%rax - # (uint32) x8 += j8 - addl 88(%rsp),%r11d - # x9 <<= 32 - shl $32,%r10 - # x9 += j8 - addq 88(%rsp),%r10 - # (uint64) x9 >>= 32 - shr $32,%r10 - # x9 <<= 32 - shl $32,%r10 - # x8 += x9 - add %r10,%r11 - # (uint32) x12 += j12 - addl 104(%rsp),%r14d - # x13 <<= 32 - shl $32,%r13 - # x13 += j12 - addq 104(%rsp),%r13 - # (uint64) x13 >>= 32 - shr $32,%r13 - # x13 <<= 32 - shl $32,%r13 - # x12 += x13 - add %r13,%r14 - # (uint32) x0 += j0 - addl 56(%rsp),%edx - # x1 <<= 32 - shl $32,%rdi - # x1 += j0 - addq 56(%rsp),%rdi - # (uint64) x1 >>= 32 - shr $32,%rdi - # x1 <<= 32 - shl $32,%rdi - # x0 += x1 - add %rdi,%rdx - # x5 = x5_stack - movq 160(%rsp),%rdi - # (uint32) x4 += j4 - addl 72(%rsp),%r9d - # x5 <<= 32 - shl $32,%rdi - # x5 += j4 - addq 72(%rsp),%rdi - # (uint64) x5 >>= 32 - shr $32,%rdi - # x5 <<= 32 - shl $32,%rdi - # x4 += x5 - add %rdi,%r9 - # x10 = x10_stack - movq 168(%rsp),%r8 - # (uint32) x10 += j10 - addl 96(%rsp),%r8d - # x11 <<= 32 - shl $32,%r12 - # x11 += j10 - addq 96(%rsp),%r12 - # (uint64) x11 >>= 32 - shr $32,%r12 - # x11 <<= 32 - shl $32,%r12 - # x10 += x11 - add %r12,%r8 - # x15 = x15_stack - movq 176(%rsp),%rdi - # (uint32) x14 += j14 - addl 112(%rsp),%ebx - # x15 <<= 32 - shl $32,%rdi - # x15 += j14 - addq 112(%rsp),%rdi - # (uint64) x15 >>= 32 - shr $32,%rdi - # x15 <<= 32 - shl $32,%rdi - # x14 += x15 - add %rdi,%rbx - # out = out_backup - movq 136(%rsp),%rdi - # m = m_backup - movq 144(%rsp),%rsi - # x0 ^= *(uint64 *) (m + 0) - xorq 0(%rsi),%rdx - # *(uint64 *) (out + 0) = x0 - movq %rdx,0(%rdi) - # x2 ^= *(uint64 *) (m + 8) - xorq 8(%rsi),%rcx - # *(uint64 *) (out + 8) = x2 - movq %rcx,8(%rdi) - # x4 ^= *(uint64 *) (m + 16) - xorq 16(%rsi),%r9 - # *(uint64 *) (out + 16) = x4 - movq %r9,16(%rdi) - # x6 ^= *(uint64 *) (m + 24) - xorq 24(%rsi),%rax - # *(uint64 *) (out + 24) = x6 - movq %rax,24(%rdi) - # x8 ^= *(uint64 *) (m + 32) - xorq 32(%rsi),%r11 - # *(uint64 *) (out + 32) = x8 - movq %r11,32(%rdi) - # x10 ^= *(uint64 *) (m + 40) - xorq 40(%rsi),%r8 - # *(uint64 *) (out + 40) = x10 - movq %r8,40(%rdi) - # x12 ^= *(uint64 *) (m + 48) - xorq 48(%rsi),%r14 - # *(uint64 *) (out + 48) = x12 - movq %r14,48(%rdi) - # x14 ^= *(uint64 *) (m + 56) - xorq 56(%rsi),%rbx - # *(uint64 *) (out + 56) = x14 - movq %rbx,56(%rdi) - # bytes = bytes_backup - movq 152(%rsp),%rdx - # in8 = j8 - movq 88(%rsp),%rcx - # in8 += 1 - add $1,%rcx - # j8 = in8 - movq %rcx,88(%rsp) - # unsigned>? unsigned<? bytes - 64 - cmp $64,%rdx - # comment:fp stack unchanged by jump - # goto bytesatleast65 if unsigned> - ja ._bytesatleast65 - # comment:fp stack unchanged by jump - # goto bytesatleast64 if !unsigned< - jae ._bytesatleast64 - # m = out - mov %rdi,%rsi - # out = ctarget - movq 128(%rsp),%rdi - # i = bytes - mov %rdx,%rcx - # while (i) { *out++ = *m++; --i } - rep movsb - # comment:fp stack unchanged by fallthrough -# bytesatleast64: -._bytesatleast64: - # x = x_backup - movq 120(%rsp),%rdi - # in8 = j8 - movq 88(%rsp),%rsi - # *(uint64 *) (x + 32) = in8 - movq %rsi,32(%rdi) - # r11 = r11_stack - movq 0(%rsp),%r11 - # r12 = r12_stack - movq 8(%rsp),%r12 - # r13 = r13_stack - movq 16(%rsp),%r13 - # r14 = r14_stack - movq 24(%rsp),%r14 - # r15 = r15_stack - movq 32(%rsp),%r15 - # rbx = rbx_stack - movq 40(%rsp),%rbx - # rbp = rbp_stack - movq 48(%rsp),%rbp - # comment:fp stack unchanged by fallthrough -# done: -._done: - # leave - add %r11,%rsp - mov %rdi,%rax - mov %rsi,%rdx - ret -# bytesatleast65: -._bytesatleast65: - # bytes -= 64 - sub $64,%rdx - # out += 64 - add $64,%rdi - # m += 64 - add $64,%rsi - # comment:fp stack unchanged by jump - # goto bytesatleast1 - jmp ._bytesatleast1 -# enter ECRYPT_keysetup -.text -.p2align 5 -.globl ECRYPT_keysetup -ECRYPT_keysetup: - mov %rsp,%r11 - and $31,%r11 - add $256,%r11 - sub %r11,%rsp - # k = arg2 - mov %rsi,%rsi - # kbits = arg3 - mov %rdx,%rdx - # x = arg1 - mov %rdi,%rdi - # in0 = *(uint64 *) (k + 0) - movq 0(%rsi),%r8 - # in2 = *(uint64 *) (k + 8) - movq 8(%rsi),%r9 - # *(uint64 *) (x + 4) = in0 - movq %r8,4(%rdi) - # *(uint64 *) (x + 12) = in2 - movq %r9,12(%rdi) - # unsigned<? kbits - 256 - cmp $256,%rdx - # comment:fp stack unchanged by jump - # goto kbits128 if unsigned< - jb ._kbits128 -# kbits256: -._kbits256: - # in10 = *(uint64 *) (k + 16) - movq 16(%rsi),%rdx - # in12 = *(uint64 *) (k + 24) - movq 24(%rsi),%rsi - # *(uint64 *) (x + 44) = in10 - movq %rdx,44(%rdi) - # *(uint64 *) (x + 52) = in12 - movq %rsi,52(%rdi) - # in0 = 1634760805 - mov $1634760805,%rsi - # in4 = 857760878 - mov $857760878,%rdx - # in10 = 2036477234 - mov $2036477234,%rcx - # in14 = 1797285236 - mov $1797285236,%r8 - # *(uint32 *) (x + 0) = in0 - movl %esi,0(%rdi) - # *(uint32 *) (x + 20) = in4 - movl %edx,20(%rdi) - # *(uint32 *) (x + 40) = in10 - movl %ecx,40(%rdi) - # *(uint32 *) (x + 60) = in14 - movl %r8d,60(%rdi) - # comment:fp stack unchanged by jump - # goto keysetupdone - jmp ._keysetupdone -# kbits128: -._kbits128: - # in10 = *(uint64 *) (k + 0) - movq 0(%rsi),%rdx - # in12 = *(uint64 *) (k + 8) - movq 8(%rsi),%rsi - # *(uint64 *) (x + 44) = in10 - movq %rdx,44(%rdi) - # *(uint64 *) (x + 52) = in12 - movq %rsi,52(%rdi) - # in0 = 1634760805 - mov $1634760805,%rsi - # in4 = 824206446 - mov $824206446,%rdx - # in10 = 2036477238 - mov $2036477238,%rcx - # in14 = 1797285236 - mov $1797285236,%r8 - # *(uint32 *) (x + 0) = in0 - movl %esi,0(%rdi) - # *(uint32 *) (x + 20) = in4 - movl %edx,20(%rdi) - # *(uint32 *) (x + 40) = in10 - movl %ecx,40(%rdi) - # *(uint32 *) (x + 60) = in14 - movl %r8d,60(%rdi) -# keysetupdone: -._keysetupdone: - # leave - add %r11,%rsp - mov %rdi,%rax - mov %rsi,%rdx - ret -# enter ECRYPT_ivsetup -.text -.p2align 5 -.globl ECRYPT_ivsetup -ECRYPT_ivsetup: - mov %rsp,%r11 - and $31,%r11 - add $256,%r11 - sub %r11,%rsp - # iv = arg2 - mov %rsi,%rsi - # x = arg1 - mov %rdi,%rdi - # in6 = *(uint64 *) (iv + 0) - movq 0(%rsi),%rsi - # in8 = 0 - mov $0,%r8 - # *(uint64 *) (x + 24) = in6 - movq %rsi,24(%rdi) - # *(uint64 *) (x + 32) = in8 - movq %r8,32(%rdi) - # leave - add %r11,%rsp - mov %rdi,%rax - mov %rsi,%rdx - ret diff --git a/ANDROID_3.4.5/arch/x86/crypto/salsa20_glue.c b/ANDROID_3.4.5/arch/x86/crypto/salsa20_glue.c deleted file mode 100644 index bccb76d8..00000000 --- a/ANDROID_3.4.5/arch/x86/crypto/salsa20_glue.c +++ /dev/null @@ -1,129 +0,0 @@ -/* - * Glue code for optimized assembly version of Salsa20. - * - * Copyright (c) 2007 Tan Swee Heng <thesweeheng@gmail.com> - * - * The assembly codes are public domain assembly codes written by Daniel. J. - * Bernstein <djb@cr.yp.to>. The codes are modified to include indentation - * and to remove extraneous comments and functions that are not needed. - * - i586 version, renamed as salsa20-i586-asm_32.S - * available from <http://cr.yp.to/snuffle/salsa20/x86-pm/salsa20.s> - * - x86-64 version, renamed as salsa20-x86_64-asm_64.S - * available from <http://cr.yp.to/snuffle/salsa20/amd64-3/salsa20.s> - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the Free - * Software Foundation; either version 2 of the License, or (at your option) - * any later version. - * - */ - -#include <crypto/algapi.h> -#include <linux/module.h> -#include <linux/crypto.h> - -#define SALSA20_IV_SIZE 8U -#define SALSA20_MIN_KEY_SIZE 16U -#define SALSA20_MAX_KEY_SIZE 32U - -// use the ECRYPT_* function names -#define salsa20_keysetup ECRYPT_keysetup -#define salsa20_ivsetup ECRYPT_ivsetup -#define salsa20_encrypt_bytes ECRYPT_encrypt_bytes - -struct salsa20_ctx -{ - u32 input[16]; -}; - -asmlinkage void salsa20_keysetup(struct salsa20_ctx *ctx, const u8 *k, - u32 keysize, u32 ivsize); -asmlinkage void salsa20_ivsetup(struct salsa20_ctx *ctx, const u8 *iv); -asmlinkage void salsa20_encrypt_bytes(struct salsa20_ctx *ctx, - const u8 *src, u8 *dst, u32 bytes); - -static int setkey(struct crypto_tfm *tfm, const u8 *key, - unsigned int keysize) -{ - struct salsa20_ctx *ctx = crypto_tfm_ctx(tfm); - salsa20_keysetup(ctx, key, keysize*8, SALSA20_IV_SIZE*8); - return 0; -} - -static int encrypt(struct blkcipher_desc *desc, - struct scatterlist *dst, struct scatterlist *src, - unsigned int nbytes) -{ - struct blkcipher_walk walk; - struct crypto_blkcipher *tfm = desc->tfm; - struct salsa20_ctx *ctx = crypto_blkcipher_ctx(tfm); - int err; - - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt_block(desc, &walk, 64); - - salsa20_ivsetup(ctx, walk.iv); - - if (likely(walk.nbytes == nbytes)) - { - salsa20_encrypt_bytes(ctx, walk.src.virt.addr, - walk.dst.virt.addr, nbytes); - return blkcipher_walk_done(desc, &walk, 0); - } - - while (walk.nbytes >= 64) { - salsa20_encrypt_bytes(ctx, walk.src.virt.addr, - walk.dst.virt.addr, - walk.nbytes - (walk.nbytes % 64)); - err = blkcipher_walk_done(desc, &walk, walk.nbytes % 64); - } - - if (walk.nbytes) { - salsa20_encrypt_bytes(ctx, walk.src.virt.addr, - walk.dst.virt.addr, walk.nbytes); - err = blkcipher_walk_done(desc, &walk, 0); - } - - return err; -} - -static struct crypto_alg alg = { - .cra_name = "salsa20", - .cra_driver_name = "salsa20-asm", - .cra_priority = 200, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, - .cra_type = &crypto_blkcipher_type, - .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct salsa20_ctx), - .cra_alignmask = 3, - .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(alg.cra_list), - .cra_u = { - .blkcipher = { - .setkey = setkey, - .encrypt = encrypt, - .decrypt = encrypt, - .min_keysize = SALSA20_MIN_KEY_SIZE, - .max_keysize = SALSA20_MAX_KEY_SIZE, - .ivsize = SALSA20_IV_SIZE, - } - } -}; - -static int __init init(void) -{ - return crypto_register_alg(&alg); -} - -static void __exit fini(void) -{ - crypto_unregister_alg(&alg); -} - -module_init(init); -module_exit(fini); - -MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION ("Salsa20 stream cipher algorithm (optimized assembly version)"); -MODULE_ALIAS("salsa20"); -MODULE_ALIAS("salsa20-asm"); diff --git a/ANDROID_3.4.5/arch/x86/crypto/serpent-sse2-i586-asm_32.S b/ANDROID_3.4.5/arch/x86/crypto/serpent-sse2-i586-asm_32.S deleted file mode 100644 index c00053d4..00000000 --- a/ANDROID_3.4.5/arch/x86/crypto/serpent-sse2-i586-asm_32.S +++ /dev/null @@ -1,635 +0,0 @@ -/* - * Serpent Cipher 4-way parallel algorithm (i586/SSE2) - * - * Copyright (C) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi> - * - * Based on crypto/serpent.c by - * Copyright (C) 2002 Dag Arne Osvik <osvik@ii.uib.no> - * 2003 Herbert Valerio Riedel <hvr@gnu.org> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 - * USA - * - */ - -.file "serpent-sse2-i586-asm_32.S" -.text - -#define arg_ctx 4 -#define arg_dst 8 -#define arg_src 12 -#define arg_xor 16 - -/********************************************************************** - 4-way SSE2 serpent - **********************************************************************/ -#define CTX %edx - -#define RA %xmm0 -#define RB %xmm1 -#define RC %xmm2 -#define RD %xmm3 -#define RE %xmm4 - -#define RT0 %xmm5 -#define RT1 %xmm6 - -#define RNOT %xmm7 - -#define get_key(i, j, t) \ - movd (4*(i)+(j))*4(CTX), t; \ - pshufd $0, t, t; - -#define K(x0, x1, x2, x3, x4, i) \ - get_key(i, 0, x4); \ - get_key(i, 1, RT0); \ - get_key(i, 2, RT1); \ - pxor x4, x0; \ - pxor RT0, x1; \ - pxor RT1, x2; \ - get_key(i, 3, x4); \ - pxor x4, x3; - -#define LK(x0, x1, x2, x3, x4, i) \ - movdqa x0, x4; \ - pslld $13, x0; \ - psrld $(32 - 13), x4; \ - por x4, x0; \ - pxor x0, x1; \ - movdqa x2, x4; \ - pslld $3, x2; \ - psrld $(32 - 3), x4; \ - por x4, x2; \ - pxor x2, x1; \ - movdqa x1, x4; \ - pslld $1, x1; \ - psrld $(32 - 1), x4; \ - por x4, x1; \ - movdqa x0, x4; \ - pslld $3, x4; \ - pxor x2, x3; \ - pxor x4, x3; \ - movdqa x3, x4; \ - pslld $7, x3; \ - psrld $(32 - 7), x4; \ - por x4, x3; \ - movdqa x1, x4; \ - pslld $7, x4; \ - pxor x1, x0; \ - pxor x3, x0; \ - pxor x3, x2; \ - pxor x4, x2; \ - movdqa x0, x4; \ - get_key(i, 1, RT0); \ - pxor RT0, x1; \ - get_key(i, 3, RT0); \ - pxor RT0, x3; \ - pslld $5, x0; \ - psrld $(32 - 5), x4; \ - por x4, x0; \ - movdqa x2, x4; \ - pslld $22, x2; \ - psrld $(32 - 22), x4; \ - por x4, x2; \ - get_key(i, 0, RT0); \ - pxor RT0, x0; \ - get_key(i, 2, RT0); \ - pxor RT0, x2; - -#define KL(x0, x1, x2, x3, x4, i) \ - K(x0, x1, x2, x3, x4, i); \ - movdqa x0, x4; \ - psrld $5, x0; \ - pslld $(32 - 5), x4; \ - por x4, x0; \ - movdqa x2, x4; \ - psrld $22, x2; \ - pslld $(32 - 22), x4; \ - por x4, x2; \ - pxor x3, x2; \ - pxor x3, x0; \ - movdqa x1, x4; \ - pslld $7, x4; \ - pxor x1, x0; \ - pxor x4, x2; \ - movdqa x1, x4; \ - psrld $1, x1; \ - pslld $(32 - 1), x4; \ - por x4, x1; \ - movdqa x3, x4; \ - psrld $7, x3; \ - pslld $(32 - 7), x4; \ - por x4, x3; \ - pxor x0, x1; \ - movdqa x0, x4; \ - pslld $3, x4; \ - pxor x4, x3; \ - movdqa x0, x4; \ - psrld $13, x0; \ - pslld $(32 - 13), x4; \ - por x4, x0; \ - pxor x2, x1; \ - pxor x2, x3; \ - movdqa x2, x4; \ - psrld $3, x2; \ - pslld $(32 - 3), x4; \ - por x4, x2; - -#define S0(x0, x1, x2, x3, x4) \ - movdqa x3, x4; \ - por x0, x3; \ - pxor x4, x0; \ - pxor x2, x4; \ - pxor RNOT, x4; \ - pxor x1, x3; \ - pand x0, x1; \ - pxor x4, x1; \ - pxor x0, x2; \ - pxor x3, x0; \ - por x0, x4; \ - pxor x2, x0; \ - pand x1, x2; \ - pxor x2, x3; \ - pxor RNOT, x1; \ - pxor x4, x2; \ - pxor x2, x1; - -#define S1(x0, x1, x2, x3, x4) \ - movdqa x1, x4; \ - pxor x0, x1; \ - pxor x3, x0; \ - pxor RNOT, x3; \ - pand x1, x4; \ - por x1, x0; \ - pxor x2, x3; \ - pxor x3, x0; \ - pxor x3, x1; \ - pxor x4, x3; \ - por x4, x1; \ - pxor x2, x4; \ - pand x0, x2; \ - pxor x1, x2; \ - por x0, x1; \ - pxor RNOT, x0; \ - pxor x2, x0; \ - pxor x1, x4; - -#define S2(x0, x1, x2, x3, x4) \ - pxor RNOT, x3; \ - pxor x0, x1; \ - movdqa x0, x4; \ - pand x2, x0; \ - pxor x3, x0; \ - por x4, x3; \ - pxor x1, x2; \ - pxor x1, x3; \ - pand x0, x1; \ - pxor x2, x0; \ - pand x3, x2; \ - por x1, x3; \ - pxor RNOT, x0; \ - pxor x0, x3; \ - pxor x0, x4; \ - pxor x2, x0; \ - por x2, x1; - -#define S3(x0, x1, x2, x3, x4) \ - movdqa x1, x4; \ - pxor x3, x1; \ - por x0, x3; \ - pand x0, x4; \ - pxor x2, x0; \ - pxor x1, x2; \ - pand x3, x1; \ - pxor x3, x2; \ - por x4, x0; \ - pxor x3, x4; \ - pxor x0, x1; \ - pand x3, x0; \ - pand x4, x3; \ - pxor x2, x3; \ - por x1, x4; \ - pand x1, x2; \ - pxor x3, x4; \ - pxor x3, x0; \ - pxor x2, x3; - -#define S4(x0, x1, x2, x3, x4) \ - movdqa x3, x4; \ - pand x0, x3; \ - pxor x4, x0; \ - pxor x2, x3; \ - por x4, x2; \ - pxor x1, x0; \ - pxor x3, x4; \ - por x0, x2; \ - pxor x1, x2; \ - pand x0, x1; \ - pxor x4, x1; \ - pand x2, x4; \ - pxor x3, x2; \ - pxor x0, x4; \ - por x1, x3; \ - pxor RNOT, x1; \ - pxor x0, x3; - -#define S5(x0, x1, x2, x3, x4) \ - movdqa x1, x4; \ - por x0, x1; \ - pxor x1, x2; \ - pxor RNOT, x3; \ - pxor x0, x4; \ - pxor x2, x0; \ - pand x4, x1; \ - por x3, x4; \ - pxor x0, x4; \ - pand x3, x0; \ - pxor x3, x1; \ - pxor x2, x3; \ - pxor x1, x0; \ - pand x4, x2; \ - pxor x2, x1; \ - pand x0, x2; \ - pxor x2, x3; - -#define S6(x0, x1, x2, x3, x4) \ - movdqa x1, x4; \ - pxor x0, x3; \ - pxor x2, x1; \ - pxor x0, x2; \ - pand x3, x0; \ - por x3, x1; \ - pxor RNOT, x4; \ - pxor x1, x0; \ - pxor x2, x1; \ - pxor x4, x3; \ - pxor x0, x4; \ - pand x0, x2; \ - pxor x1, x4; \ - pxor x3, x2; \ - pand x1, x3; \ - pxor x0, x3; \ - pxor x2, x1; - -#define S7(x0, x1, x2, x3, x4) \ - pxor RNOT, x1; \ - movdqa x1, x4; \ - pxor RNOT, x0; \ - pand x2, x1; \ - pxor x3, x1; \ - por x4, x3; \ - pxor x2, x4; \ - pxor x3, x2; \ - pxor x0, x3; \ - por x1, x0; \ - pand x0, x2; \ - pxor x4, x0; \ - pxor x3, x4; \ - pand x0, x3; \ - pxor x1, x4; \ - pxor x4, x2; \ - pxor x1, x3; \ - por x0, x4; \ - pxor x1, x4; - -#define SI0(x0, x1, x2, x3, x4) \ - movdqa x3, x4; \ - pxor x0, x1; \ - por x1, x3; \ - pxor x1, x4; \ - pxor RNOT, x0; \ - pxor x3, x2; \ - pxor x0, x3; \ - pand x1, x0; \ - pxor x2, x0; \ - pand x3, x2; \ - pxor x4, x3; \ - pxor x3, x2; \ - pxor x3, x1; \ - pand x0, x3; \ - pxor x0, x1; \ - pxor x2, x0; \ - pxor x3, x4; - -#define SI1(x0, x1, x2, x3, x4) \ - pxor x3, x1; \ - movdqa x0, x4; \ - pxor x2, x0; \ - pxor RNOT, x2; \ - por x1, x4; \ - pxor x3, x4; \ - pand x1, x3; \ - pxor x2, x1; \ - pand x4, x2; \ - pxor x1, x4; \ - por x3, x1; \ - pxor x0, x3; \ - pxor x0, x2; \ - por x4, x0; \ - pxor x4, x2; \ - pxor x0, x1; \ - pxor x1, x4; - -#define SI2(x0, x1, x2, x3, x4) \ - pxor x1, x2; \ - movdqa x3, x4; \ - pxor RNOT, x3; \ - por x2, x3; \ - pxor x4, x2; \ - pxor x0, x4; \ - pxor x1, x3; \ - por x2, x1; \ - pxor x0, x2; \ - pxor x4, x1; \ - por x3, x4; \ - pxor x3, x2; \ - pxor x2, x4; \ - pand x1, x2; \ - pxor x3, x2; \ - pxor x4, x3; \ - pxor x0, x4; - -#define SI3(x0, x1, x2, x3, x4) \ - pxor x1, x2; \ - movdqa x1, x4; \ - pand x2, x1; \ - pxor x0, x1; \ - por x4, x0; \ - pxor x3, x4; \ - pxor x3, x0; \ - por x1, x3; \ - pxor x2, x1; \ - pxor x3, x1; \ - pxor x2, x0; \ - pxor x3, x2; \ - pand x1, x3; \ - pxor x0, x1; \ - pand x2, x0; \ - pxor x3, x4; \ - pxor x0, x3; \ - pxor x1, x0; - -#define SI4(x0, x1, x2, x3, x4) \ - pxor x3, x2; \ - movdqa x0, x4; \ - pand x1, x0; \ - pxor x2, x0; \ - por x3, x2; \ - pxor RNOT, x4; \ - pxor x0, x1; \ - pxor x2, x0; \ - pand x4, x2; \ - pxor x0, x2; \ - por x4, x0; \ - pxor x3, x0; \ - pand x2, x3; \ - pxor x3, x4; \ - pxor x1, x3; \ - pand x0, x1; \ - pxor x1, x4; \ - pxor x3, x0; - -#define SI5(x0, x1, x2, x3, x4) \ - movdqa x1, x4; \ - por x2, x1; \ - pxor x4, x2; \ - pxor x3, x1; \ - pand x4, x3; \ - pxor x3, x2; \ - por x0, x3; \ - pxor RNOT, x0; \ - pxor x2, x3; \ - por x0, x2; \ - pxor x1, x4; \ - pxor x4, x2; \ - pand x0, x4; \ - pxor x1, x0; \ - pxor x3, x1; \ - pand x2, x0; \ - pxor x3, x2; \ - pxor x2, x0; \ - pxor x4, x2; \ - pxor x3, x4; - -#define SI6(x0, x1, x2, x3, x4) \ - pxor x2, x0; \ - movdqa x0, x4; \ - pand x3, x0; \ - pxor x3, x2; \ - pxor x2, x0; \ - pxor x1, x3; \ - por x4, x2; \ - pxor x3, x2; \ - pand x0, x3; \ - pxor RNOT, x0; \ - pxor x1, x3; \ - pand x2, x1; \ - pxor x0, x4; \ - pxor x4, x3; \ - pxor x2, x4; \ - pxor x1, x0; \ - pxor x0, x2; - -#define SI7(x0, x1, x2, x3, x4) \ - movdqa x3, x4; \ - pand x0, x3; \ - pxor x2, x0; \ - por x4, x2; \ - pxor x1, x4; \ - pxor RNOT, x0; \ - por x3, x1; \ - pxor x0, x4; \ - pand x2, x0; \ - pxor x1, x0; \ - pand x2, x1; \ - pxor x2, x3; \ - pxor x3, x4; \ - pand x3, x2; \ - por x0, x3; \ - pxor x4, x1; \ - pxor x4, x3; \ - pand x0, x4; \ - pxor x2, x4; - -#define transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \ - movdqa x0, t2; \ - punpckldq x1, x0; \ - punpckhdq x1, t2; \ - movdqa x2, t1; \ - punpckhdq x3, x2; \ - punpckldq x3, t1; \ - movdqa x0, x1; \ - punpcklqdq t1, x0; \ - punpckhqdq t1, x1; \ - movdqa t2, x3; \ - punpcklqdq x2, t2; \ - punpckhqdq x2, x3; \ - movdqa t2, x2; - -#define read_blocks(in, x0, x1, x2, x3, t0, t1, t2) \ - movdqu (0*4*4)(in), x0; \ - movdqu (1*4*4)(in), x1; \ - movdqu (2*4*4)(in), x2; \ - movdqu (3*4*4)(in), x3; \ - \ - transpose_4x4(x0, x1, x2, x3, t0, t1, t2) - -#define write_blocks(out, x0, x1, x2, x3, t0, t1, t2) \ - transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \ - \ - movdqu x0, (0*4*4)(out); \ - movdqu x1, (1*4*4)(out); \ - movdqu x2, (2*4*4)(out); \ - movdqu x3, (3*4*4)(out); - -#define xor_blocks(out, x0, x1, x2, x3, t0, t1, t2) \ - transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \ - \ - movdqu (0*4*4)(out), t0; \ - pxor t0, x0; \ - movdqu x0, (0*4*4)(out); \ - movdqu (1*4*4)(out), t0; \ - pxor t0, x1; \ - movdqu x1, (1*4*4)(out); \ - movdqu (2*4*4)(out), t0; \ - pxor t0, x2; \ - movdqu x2, (2*4*4)(out); \ - movdqu (3*4*4)(out), t0; \ - pxor t0, x3; \ - movdqu x3, (3*4*4)(out); - -.align 8 -.global __serpent_enc_blk_4way -.type __serpent_enc_blk_4way,@function; - -__serpent_enc_blk_4way: - /* input: - * arg_ctx(%esp): ctx, CTX - * arg_dst(%esp): dst - * arg_src(%esp): src - * arg_xor(%esp): bool, if true: xor output - */ - - pcmpeqd RNOT, RNOT; - - movl arg_ctx(%esp), CTX; - - movl arg_src(%esp), %eax; - read_blocks(%eax, RA, RB, RC, RD, RT0, RT1, RE); - - K(RA, RB, RC, RD, RE, 0); - S0(RA, RB, RC, RD, RE); LK(RC, RB, RD, RA, RE, 1); - S1(RC, RB, RD, RA, RE); LK(RE, RD, RA, RC, RB, 2); - S2(RE, RD, RA, RC, RB); LK(RB, RD, RE, RC, RA, 3); - S3(RB, RD, RE, RC, RA); LK(RC, RA, RD, RB, RE, 4); - S4(RC, RA, RD, RB, RE); LK(RA, RD, RB, RE, RC, 5); - S5(RA, RD, RB, RE, RC); LK(RC, RA, RD, RE, RB, 6); - S6(RC, RA, RD, RE, RB); LK(RD, RB, RA, RE, RC, 7); - S7(RD, RB, RA, RE, RC); LK(RC, RA, RE, RD, RB, 8); - S0(RC, RA, RE, RD, RB); LK(RE, RA, RD, RC, RB, 9); - S1(RE, RA, RD, RC, RB); LK(RB, RD, RC, RE, RA, 10); - S2(RB, RD, RC, RE, RA); LK(RA, RD, RB, RE, RC, 11); - S3(RA, RD, RB, RE, RC); LK(RE, RC, RD, RA, RB, 12); - S4(RE, RC, RD, RA, RB); LK(RC, RD, RA, RB, RE, 13); - S5(RC, RD, RA, RB, RE); LK(RE, RC, RD, RB, RA, 14); - S6(RE, RC, RD, RB, RA); LK(RD, RA, RC, RB, RE, 15); - S7(RD, RA, RC, RB, RE); LK(RE, RC, RB, RD, RA, 16); - S0(RE, RC, RB, RD, RA); LK(RB, RC, RD, RE, RA, 17); - S1(RB, RC, RD, RE, RA); LK(RA, RD, RE, RB, RC, 18); - S2(RA, RD, RE, RB, RC); LK(RC, RD, RA, RB, RE, 19); - S3(RC, RD, RA, RB, RE); LK(RB, RE, RD, RC, RA, 20); - S4(RB, RE, RD, RC, RA); LK(RE, RD, RC, RA, RB, 21); - S5(RE, RD, RC, RA, RB); LK(RB, RE, RD, RA, RC, 22); - S6(RB, RE, RD, RA, RC); LK(RD, RC, RE, RA, RB, 23); - S7(RD, RC, RE, RA, RB); LK(RB, RE, RA, RD, RC, 24); - S0(RB, RE, RA, RD, RC); LK(RA, RE, RD, RB, RC, 25); - S1(RA, RE, RD, RB, RC); LK(RC, RD, RB, RA, RE, 26); - S2(RC, RD, RB, RA, RE); LK(RE, RD, RC, RA, RB, 27); - S3(RE, RD, RC, RA, RB); LK(RA, RB, RD, RE, RC, 28); - S4(RA, RB, RD, RE, RC); LK(RB, RD, RE, RC, RA, 29); - S5(RB, RD, RE, RC, RA); LK(RA, RB, RD, RC, RE, 30); - S6(RA, RB, RD, RC, RE); LK(RD, RE, RB, RC, RA, 31); - S7(RD, RE, RB, RC, RA); K(RA, RB, RC, RD, RE, 32); - - movl arg_dst(%esp), %eax; - - cmpb $0, arg_xor(%esp); - jnz __enc_xor4; - - write_blocks(%eax, RA, RB, RC, RD, RT0, RT1, RE); - - ret; - -__enc_xor4: - xor_blocks(%eax, RA, RB, RC, RD, RT0, RT1, RE); - - ret; - -.align 8 -.global serpent_dec_blk_4way -.type serpent_dec_blk_4way,@function; - -serpent_dec_blk_4way: - /* input: - * arg_ctx(%esp): ctx, CTX - * arg_dst(%esp): dst - * arg_src(%esp): src - */ - - pcmpeqd RNOT, RNOT; - - movl arg_ctx(%esp), CTX; - - movl arg_src(%esp), %eax; - read_blocks(%eax, RA, RB, RC, RD, RT0, RT1, RE); - - K(RA, RB, RC, RD, RE, 32); - SI7(RA, RB, RC, RD, RE); KL(RB, RD, RA, RE, RC, 31); - SI6(RB, RD, RA, RE, RC); KL(RA, RC, RE, RB, RD, 30); - SI5(RA, RC, RE, RB, RD); KL(RC, RD, RA, RE, RB, 29); - SI4(RC, RD, RA, RE, RB); KL(RC, RA, RB, RE, RD, 28); - SI3(RC, RA, RB, RE, RD); KL(RB, RC, RD, RE, RA, 27); - SI2(RB, RC, RD, RE, RA); KL(RC, RA, RE, RD, RB, 26); - SI1(RC, RA, RE, RD, RB); KL(RB, RA, RE, RD, RC, 25); - SI0(RB, RA, RE, RD, RC); KL(RE, RC, RA, RB, RD, 24); - SI7(RE, RC, RA, RB, RD); KL(RC, RB, RE, RD, RA, 23); - SI6(RC, RB, RE, RD, RA); KL(RE, RA, RD, RC, RB, 22); - SI5(RE, RA, RD, RC, RB); KL(RA, RB, RE, RD, RC, 21); - SI4(RA, RB, RE, RD, RC); KL(RA, RE, RC, RD, RB, 20); - SI3(RA, RE, RC, RD, RB); KL(RC, RA, RB, RD, RE, 19); - SI2(RC, RA, RB, RD, RE); KL(RA, RE, RD, RB, RC, 18); - SI1(RA, RE, RD, RB, RC); KL(RC, RE, RD, RB, RA, 17); - SI0(RC, RE, RD, RB, RA); KL(RD, RA, RE, RC, RB, 16); - SI7(RD, RA, RE, RC, RB); KL(RA, RC, RD, RB, RE, 15); - SI6(RA, RC, RD, RB, RE); KL(RD, RE, RB, RA, RC, 14); - SI5(RD, RE, RB, RA, RC); KL(RE, RC, RD, RB, RA, 13); - SI4(RE, RC, RD, RB, RA); KL(RE, RD, RA, RB, RC, 12); - SI3(RE, RD, RA, RB, RC); KL(RA, RE, RC, RB, RD, 11); - SI2(RA, RE, RC, RB, RD); KL(RE, RD, RB, RC, RA, 10); - SI1(RE, RD, RB, RC, RA); KL(RA, RD, RB, RC, RE, 9); - SI0(RA, RD, RB, RC, RE); KL(RB, RE, RD, RA, RC, 8); - SI7(RB, RE, RD, RA, RC); KL(RE, RA, RB, RC, RD, 7); - SI6(RE, RA, RB, RC, RD); KL(RB, RD, RC, RE, RA, 6); - SI5(RB, RD, RC, RE, RA); KL(RD, RA, RB, RC, RE, 5); - SI4(RD, RA, RB, RC, RE); KL(RD, RB, RE, RC, RA, 4); - SI3(RD, RB, RE, RC, RA); KL(RE, RD, RA, RC, RB, 3); - SI2(RE, RD, RA, RC, RB); KL(RD, RB, RC, RA, RE, 2); - SI1(RD, RB, RC, RA, RE); KL(RE, RB, RC, RA, RD, 1); - SI0(RE, RB, RC, RA, RD); K(RC, RD, RB, RE, RA, 0); - - movl arg_dst(%esp), %eax; - write_blocks(%eax, RC, RD, RB, RE, RT0, RT1, RA); - - ret; diff --git a/ANDROID_3.4.5/arch/x86/crypto/serpent-sse2-x86_64-asm_64.S b/ANDROID_3.4.5/arch/x86/crypto/serpent-sse2-x86_64-asm_64.S deleted file mode 100644 index 3ee1ff04..00000000 --- a/ANDROID_3.4.5/arch/x86/crypto/serpent-sse2-x86_64-asm_64.S +++ /dev/null @@ -1,758 +0,0 @@ -/* - * Serpent Cipher 8-way parallel algorithm (x86_64/SSE2) - * - * Copyright (C) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi> - * - * Based on crypto/serpent.c by - * Copyright (C) 2002 Dag Arne Osvik <osvik@ii.uib.no> - * 2003 Herbert Valerio Riedel <hvr@gnu.org> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 - * USA - * - */ - -.file "serpent-sse2-x86_64-asm_64.S" -.text - -#define CTX %rdi - -/********************************************************************** - 8-way SSE2 serpent - **********************************************************************/ -#define RA1 %xmm0 -#define RB1 %xmm1 -#define RC1 %xmm2 -#define RD1 %xmm3 -#define RE1 %xmm4 - -#define RA2 %xmm5 -#define RB2 %xmm6 -#define RC2 %xmm7 -#define RD2 %xmm8 -#define RE2 %xmm9 - -#define RNOT %xmm10 - -#define RK0 %xmm11 -#define RK1 %xmm12 -#define RK2 %xmm13 -#define RK3 %xmm14 - -#define S0_1(x0, x1, x2, x3, x4) \ - movdqa x3, x4; \ - por x0, x3; \ - pxor x4, x0; \ - pxor x2, x4; \ - pxor RNOT, x4; \ - pxor x1, x3; \ - pand x0, x1; \ - pxor x4, x1; \ - pxor x0, x2; -#define S0_2(x0, x1, x2, x3, x4) \ - pxor x3, x0; \ - por x0, x4; \ - pxor x2, x0; \ - pand x1, x2; \ - pxor x2, x3; \ - pxor RNOT, x1; \ - pxor x4, x2; \ - pxor x2, x1; - -#define S1_1(x0, x1, x2, x3, x4) \ - movdqa x1, x4; \ - pxor x0, x1; \ - pxor x3, x0; \ - pxor RNOT, x3; \ - pand x1, x4; \ - por x1, x0; \ - pxor x2, x3; \ - pxor x3, x0; \ - pxor x3, x1; -#define S1_2(x0, x1, x2, x3, x4) \ - pxor x4, x3; \ - por x4, x1; \ - pxor x2, x4; \ - pand x0, x2; \ - pxor x1, x2; \ - por x0, x1; \ - pxor RNOT, x0; \ - pxor x2, x0; \ - pxor x1, x4; - -#define S2_1(x0, x1, x2, x3, x4) \ - pxor RNOT, x3; \ - pxor x0, x1; \ - movdqa x0, x4; \ - pand x2, x0; \ - pxor x3, x0; \ - por x4, x3; \ - pxor x1, x2; \ - pxor x1, x3; \ - pand x0, x1; -#define S2_2(x0, x1, x2, x3, x4) \ - pxor x2, x0; \ - pand x3, x2; \ - por x1, x3; \ - pxor RNOT, x0; \ - pxor x0, x3; \ - pxor x0, x4; \ - pxor x2, x0; \ - por x2, x1; - -#define S3_1(x0, x1, x2, x3, x4) \ - movdqa x1, x4; \ - pxor x3, x1; \ - por x0, x3; \ - pand x0, x4; \ - pxor x2, x0; \ - pxor x1, x2; \ - pand x3, x1; \ - pxor x3, x2; \ - por x4, x0; \ - pxor x3, x4; -#define S3_2(x0, x1, x2, x3, x4) \ - pxor x0, x1; \ - pand x3, x0; \ - pand x4, x3; \ - pxor x2, x3; \ - por x1, x4; \ - pand x1, x2; \ - pxor x3, x4; \ - pxor x3, x0; \ - pxor x2, x3; - -#define S4_1(x0, x1, x2, x3, x4) \ - movdqa x3, x4; \ - pand x0, x3; \ - pxor x4, x0; \ - pxor x2, x3; \ - por x4, x2; \ - pxor x1, x0; \ - pxor x3, x4; \ - por x0, x2; \ - pxor x1, x2; -#define S4_2(x0, x1, x2, x3, x4) \ - pand x0, x1; \ - pxor x4, x1; \ - pand x2, x4; \ - pxor x3, x2; \ - pxor x0, x4; \ - por x1, x3; \ - pxor RNOT, x1; \ - pxor x0, x3; - -#define S5_1(x0, x1, x2, x3, x4) \ - movdqa x1, x4; \ - por x0, x1; \ - pxor x1, x2; \ - pxor RNOT, x3; \ - pxor x0, x4; \ - pxor x2, x0; \ - pand x4, x1; \ - por x3, x4; \ - pxor x0, x4; -#define S5_2(x0, x1, x2, x3, x4) \ - pand x3, x0; \ - pxor x3, x1; \ - pxor x2, x3; \ - pxor x1, x0; \ - pand x4, x2; \ - pxor x2, x1; \ - pand x0, x2; \ - pxor x2, x3; - -#define S6_1(x0, x1, x2, x3, x4) \ - movdqa x1, x4; \ - pxor x0, x3; \ - pxor x2, x1; \ - pxor x0, x2; \ - pand x3, x0; \ - por x3, x1; \ - pxor RNOT, x4; \ - pxor x1, x0; \ - pxor x2, x1; -#define S6_2(x0, x1, x2, x3, x4) \ - pxor x4, x3; \ - pxor x0, x4; \ - pand x0, x2; \ - pxor x1, x4; \ - pxor x3, x2; \ - pand x1, x3; \ - pxor x0, x3; \ - pxor x2, x1; - -#define S7_1(x0, x1, x2, x3, x4) \ - pxor RNOT, x1; \ - movdqa x1, x4; \ - pxor RNOT, x0; \ - pand x2, x1; \ - pxor x3, x1; \ - por x4, x3; \ - pxor x2, x4; \ - pxor x3, x2; \ - pxor x0, x3; \ - por x1, x0; -#define S7_2(x0, x1, x2, x3, x4) \ - pand x0, x2; \ - pxor x4, x0; \ - pxor x3, x4; \ - pand x0, x3; \ - pxor x1, x4; \ - pxor x4, x2; \ - pxor x1, x3; \ - por x0, x4; \ - pxor x1, x4; - -#define SI0_1(x0, x1, x2, x3, x4) \ - movdqa x3, x4; \ - pxor x0, x1; \ - por x1, x3; \ - pxor x1, x4; \ - pxor RNOT, x0; \ - pxor x3, x2; \ - pxor x0, x3; \ - pand x1, x0; \ - pxor x2, x0; -#define SI0_2(x0, x1, x2, x3, x4) \ - pand x3, x2; \ - pxor x4, x3; \ - pxor x3, x2; \ - pxor x3, x1; \ - pand x0, x3; \ - pxor x0, x1; \ - pxor x2, x0; \ - pxor x3, x4; - -#define SI1_1(x0, x1, x2, x3, x4) \ - pxor x3, x1; \ - movdqa x0, x4; \ - pxor x2, x0; \ - pxor RNOT, x2; \ - por x1, x4; \ - pxor x3, x4; \ - pand x1, x3; \ - pxor x2, x1; \ - pand x4, x2; -#define SI1_2(x0, x1, x2, x3, x4) \ - pxor x1, x4; \ - por x3, x1; \ - pxor x0, x3; \ - pxor x0, x2; \ - por x4, x0; \ - pxor x4, x2; \ - pxor x0, x1; \ - pxor x1, x4; - -#define SI2_1(x0, x1, x2, x3, x4) \ - pxor x1, x2; \ - movdqa x3, x4; \ - pxor RNOT, x3; \ - por x2, x3; \ - pxor x4, x2; \ - pxor x0, x4; \ - pxor x1, x3; \ - por x2, x1; \ - pxor x0, x2; -#define SI2_2(x0, x1, x2, x3, x4) \ - pxor x4, x1; \ - por x3, x4; \ - pxor x3, x2; \ - pxor x2, x4; \ - pand x1, x2; \ - pxor x3, x2; \ - pxor x4, x3; \ - pxor x0, x4; - -#define SI3_1(x0, x1, x2, x3, x4) \ - pxor x1, x2; \ - movdqa x1, x4; \ - pand x2, x1; \ - pxor x0, x1; \ - por x4, x0; \ - pxor x3, x4; \ - pxor x3, x0; \ - por x1, x3; \ - pxor x2, x1; -#define SI3_2(x0, x1, x2, x3, x4) \ - pxor x3, x1; \ - pxor x2, x0; \ - pxor x3, x2; \ - pand x1, x3; \ - pxor x0, x1; \ - pand x2, x0; \ - pxor x3, x4; \ - pxor x0, x3; \ - pxor x1, x0; - -#define SI4_1(x0, x1, x2, x3, x4) \ - pxor x3, x2; \ - movdqa x0, x4; \ - pand x1, x0; \ - pxor x2, x0; \ - por x3, x2; \ - pxor RNOT, x4; \ - pxor x0, x1; \ - pxor x2, x0; \ - pand x4, x2; -#define SI4_2(x0, x1, x2, x3, x4) \ - pxor x0, x2; \ - por x4, x0; \ - pxor x3, x0; \ - pand x2, x3; \ - pxor x3, x4; \ - pxor x1, x3; \ - pand x0, x1; \ - pxor x1, x4; \ - pxor x3, x0; - -#define SI5_1(x0, x1, x2, x3, x4) \ - movdqa x1, x4; \ - por x2, x1; \ - pxor x4, x2; \ - pxor x3, x1; \ - pand x4, x3; \ - pxor x3, x2; \ - por x0, x3; \ - pxor RNOT, x0; \ - pxor x2, x3; \ - por x0, x2; -#define SI5_2(x0, x1, x2, x3, x4) \ - pxor x1, x4; \ - pxor x4, x2; \ - pand x0, x4; \ - pxor x1, x0; \ - pxor x3, x1; \ - pand x2, x0; \ - pxor x3, x2; \ - pxor x2, x0; \ - pxor x4, x2; \ - pxor x3, x4; - -#define SI6_1(x0, x1, x2, x3, x4) \ - pxor x2, x0; \ - movdqa x0, x4; \ - pand x3, x0; \ - pxor x3, x2; \ - pxor x2, x0; \ - pxor x1, x3; \ - por x4, x2; \ - pxor x3, x2; \ - pand x0, x3; -#define SI6_2(x0, x1, x2, x3, x4) \ - pxor RNOT, x0; \ - pxor x1, x3; \ - pand x2, x1; \ - pxor x0, x4; \ - pxor x4, x3; \ - pxor x2, x4; \ - pxor x1, x0; \ - pxor x0, x2; - -#define SI7_1(x0, x1, x2, x3, x4) \ - movdqa x3, x4; \ - pand x0, x3; \ - pxor x2, x0; \ - por x4, x2; \ - pxor x1, x4; \ - pxor RNOT, x0; \ - por x3, x1; \ - pxor x0, x4; \ - pand x2, x0; \ - pxor x1, x0; -#define SI7_2(x0, x1, x2, x3, x4) \ - pand x2, x1; \ - pxor x2, x3; \ - pxor x3, x4; \ - pand x3, x2; \ - por x0, x3; \ - pxor x4, x1; \ - pxor x4, x3; \ - pand x0, x4; \ - pxor x2, x4; - -#define get_key(i, j, t) \ - movd (4*(i)+(j))*4(CTX), t; \ - pshufd $0, t, t; - -#define K2(x0, x1, x2, x3, x4, i) \ - get_key(i, 0, RK0); \ - get_key(i, 1, RK1); \ - get_key(i, 2, RK2); \ - get_key(i, 3, RK3); \ - pxor RK0, x0 ## 1; \ - pxor RK1, x1 ## 1; \ - pxor RK2, x2 ## 1; \ - pxor RK3, x3 ## 1; \ - pxor RK0, x0 ## 2; \ - pxor RK1, x1 ## 2; \ - pxor RK2, x2 ## 2; \ - pxor RK3, x3 ## 2; - -#define LK2(x0, x1, x2, x3, x4, i) \ - movdqa x0 ## 1, x4 ## 1; \ - pslld $13, x0 ## 1; \ - psrld $(32 - 13), x4 ## 1; \ - por x4 ## 1, x0 ## 1; \ - pxor x0 ## 1, x1 ## 1; \ - movdqa x2 ## 1, x4 ## 1; \ - pslld $3, x2 ## 1; \ - psrld $(32 - 3), x4 ## 1; \ - por x4 ## 1, x2 ## 1; \ - pxor x2 ## 1, x1 ## 1; \ - movdqa x0 ## 2, x4 ## 2; \ - pslld $13, x0 ## 2; \ - psrld $(32 - 13), x4 ## 2; \ - por x4 ## 2, x0 ## 2; \ - pxor x0 ## 2, x1 ## 2; \ - movdqa x2 ## 2, x4 ## 2; \ - pslld $3, x2 ## 2; \ - psrld $(32 - 3), x4 ## 2; \ - por x4 ## 2, x2 ## 2; \ - pxor x2 ## 2, x1 ## 2; \ - movdqa x1 ## 1, x4 ## 1; \ - pslld $1, x1 ## 1; \ - psrld $(32 - 1), x4 ## 1; \ - por x4 ## 1, x1 ## 1; \ - movdqa x0 ## 1, x4 ## 1; \ - pslld $3, x4 ## 1; \ - pxor x2 ## 1, x3 ## 1; \ - pxor x4 ## 1, x3 ## 1; \ - movdqa x3 ## 1, x4 ## 1; \ - get_key(i, 1, RK1); \ - movdqa x1 ## 2, x4 ## 2; \ - pslld $1, x1 ## 2; \ - psrld $(32 - 1), x4 ## 2; \ - por x4 ## 2, x1 ## 2; \ - movdqa x0 ## 2, x4 ## 2; \ - pslld $3, x4 ## 2; \ - pxor x2 ## 2, x3 ## 2; \ - pxor x4 ## 2, x3 ## 2; \ - movdqa x3 ## 2, x4 ## 2; \ - get_key(i, 3, RK3); \ - pslld $7, x3 ## 1; \ - psrld $(32 - 7), x4 ## 1; \ - por x4 ## 1, x3 ## 1; \ - movdqa x1 ## 1, x4 ## 1; \ - pslld $7, x4 ## 1; \ - pxor x1 ## 1, x0 ## 1; \ - pxor x3 ## 1, x0 ## 1; \ - pxor x3 ## 1, x2 ## 1; \ - pxor x4 ## 1, x2 ## 1; \ - get_key(i, 0, RK0); \ - pslld $7, x3 ## 2; \ - psrld $(32 - 7), x4 ## 2; \ - por x4 ## 2, x3 ## 2; \ - movdqa x1 ## 2, x4 ## 2; \ - pslld $7, x4 ## 2; \ - pxor x1 ## 2, x0 ## 2; \ - pxor x3 ## 2, x0 ## 2; \ - pxor x3 ## 2, x2 ## 2; \ - pxor x4 ## 2, x2 ## 2; \ - get_key(i, 2, RK2); \ - pxor RK1, x1 ## 1; \ - pxor RK3, x3 ## 1; \ - movdqa x0 ## 1, x4 ## 1; \ - pslld $5, x0 ## 1; \ - psrld $(32 - 5), x4 ## 1; \ - por x4 ## 1, x0 ## 1; \ - movdqa x2 ## 1, x4 ## 1; \ - pslld $22, x2 ## 1; \ - psrld $(32 - 22), x4 ## 1; \ - por x4 ## 1, x2 ## 1; \ - pxor RK0, x0 ## 1; \ - pxor RK2, x2 ## 1; \ - pxor RK1, x1 ## 2; \ - pxor RK3, x3 ## 2; \ - movdqa x0 ## 2, x4 ## 2; \ - pslld $5, x0 ## 2; \ - psrld $(32 - 5), x4 ## 2; \ - por x4 ## 2, x0 ## 2; \ - movdqa x2 ## 2, x4 ## 2; \ - pslld $22, x2 ## 2; \ - psrld $(32 - 22), x4 ## 2; \ - por x4 ## 2, x2 ## 2; \ - pxor RK0, x0 ## 2; \ - pxor RK2, x2 ## 2; - -#define KL2(x0, x1, x2, x3, x4, i) \ - pxor RK0, x0 ## 1; \ - pxor RK2, x2 ## 1; \ - movdqa x0 ## 1, x4 ## 1; \ - psrld $5, x0 ## 1; \ - pslld $(32 - 5), x4 ## 1; \ - por x4 ## 1, x0 ## 1; \ - pxor RK3, x3 ## 1; \ - pxor RK1, x1 ## 1; \ - movdqa x2 ## 1, x4 ## 1; \ - psrld $22, x2 ## 1; \ - pslld $(32 - 22), x4 ## 1; \ - por x4 ## 1, x2 ## 1; \ - pxor x3 ## 1, x2 ## 1; \ - pxor RK0, x0 ## 2; \ - pxor RK2, x2 ## 2; \ - movdqa x0 ## 2, x4 ## 2; \ - psrld $5, x0 ## 2; \ - pslld $(32 - 5), x4 ## 2; \ - por x4 ## 2, x0 ## 2; \ - pxor RK3, x3 ## 2; \ - pxor RK1, x1 ## 2; \ - movdqa x2 ## 2, x4 ## 2; \ - psrld $22, x2 ## 2; \ - pslld $(32 - 22), x4 ## 2; \ - por x4 ## 2, x2 ## 2; \ - pxor x3 ## 2, x2 ## 2; \ - pxor x3 ## 1, x0 ## 1; \ - movdqa x1 ## 1, x4 ## 1; \ - pslld $7, x4 ## 1; \ - pxor x1 ## 1, x0 ## 1; \ - pxor x4 ## 1, x2 ## 1; \ - movdqa x1 ## 1, x4 ## 1; \ - psrld $1, x1 ## 1; \ - pslld $(32 - 1), x4 ## 1; \ - por x4 ## 1, x1 ## 1; \ - pxor x3 ## 2, x0 ## 2; \ - movdqa x1 ## 2, x4 ## 2; \ - pslld $7, x4 ## 2; \ - pxor x1 ## 2, x0 ## 2; \ - pxor x4 ## 2, x2 ## 2; \ - movdqa x1 ## 2, x4 ## 2; \ - psrld $1, x1 ## 2; \ - pslld $(32 - 1), x4 ## 2; \ - por x4 ## 2, x1 ## 2; \ - movdqa x3 ## 1, x4 ## 1; \ - psrld $7, x3 ## 1; \ - pslld $(32 - 7), x4 ## 1; \ - por x4 ## 1, x3 ## 1; \ - pxor x0 ## 1, x1 ## 1; \ - movdqa x0 ## 1, x4 ## 1; \ - pslld $3, x4 ## 1; \ - pxor x4 ## 1, x3 ## 1; \ - movdqa x0 ## 1, x4 ## 1; \ - movdqa x3 ## 2, x4 ## 2; \ - psrld $7, x3 ## 2; \ - pslld $(32 - 7), x4 ## 2; \ - por x4 ## 2, x3 ## 2; \ - pxor x0 ## 2, x1 ## 2; \ - movdqa x0 ## 2, x4 ## 2; \ - pslld $3, x4 ## 2; \ - pxor x4 ## 2, x3 ## 2; \ - movdqa x0 ## 2, x4 ## 2; \ - psrld $13, x0 ## 1; \ - pslld $(32 - 13), x4 ## 1; \ - por x4 ## 1, x0 ## 1; \ - pxor x2 ## 1, x1 ## 1; \ - pxor x2 ## 1, x3 ## 1; \ - movdqa x2 ## 1, x4 ## 1; \ - psrld $3, x2 ## 1; \ - pslld $(32 - 3), x4 ## 1; \ - por x4 ## 1, x2 ## 1; \ - psrld $13, x0 ## 2; \ - pslld $(32 - 13), x4 ## 2; \ - por x4 ## 2, x0 ## 2; \ - pxor x2 ## 2, x1 ## 2; \ - pxor x2 ## 2, x3 ## 2; \ - movdqa x2 ## 2, x4 ## 2; \ - psrld $3, x2 ## 2; \ - pslld $(32 - 3), x4 ## 2; \ - por x4 ## 2, x2 ## 2; - -#define S(SBOX, x0, x1, x2, x3, x4) \ - SBOX ## _1(x0 ## 1, x1 ## 1, x2 ## 1, x3 ## 1, x4 ## 1); \ - SBOX ## _2(x0 ## 1, x1 ## 1, x2 ## 1, x3 ## 1, x4 ## 1); \ - SBOX ## _1(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, x4 ## 2); \ - SBOX ## _2(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, x4 ## 2); - -#define SP(SBOX, x0, x1, x2, x3, x4, i) \ - get_key(i, 0, RK0); \ - SBOX ## _1(x0 ## 1, x1 ## 1, x2 ## 1, x3 ## 1, x4 ## 1); \ - get_key(i, 2, RK2); \ - SBOX ## _1(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, x4 ## 2); \ - get_key(i, 3, RK3); \ - SBOX ## _2(x0 ## 1, x1 ## 1, x2 ## 1, x3 ## 1, x4 ## 1); \ - get_key(i, 1, RK1); \ - SBOX ## _2(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, x4 ## 2); \ - -#define transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \ - movdqa x0, t2; \ - punpckldq x1, x0; \ - punpckhdq x1, t2; \ - movdqa x2, t1; \ - punpckhdq x3, x2; \ - punpckldq x3, t1; \ - movdqa x0, x1; \ - punpcklqdq t1, x0; \ - punpckhqdq t1, x1; \ - movdqa t2, x3; \ - punpcklqdq x2, t2; \ - punpckhqdq x2, x3; \ - movdqa t2, x2; - -#define read_blocks(in, x0, x1, x2, x3, t0, t1, t2) \ - movdqu (0*4*4)(in), x0; \ - movdqu (1*4*4)(in), x1; \ - movdqu (2*4*4)(in), x2; \ - movdqu (3*4*4)(in), x3; \ - \ - transpose_4x4(x0, x1, x2, x3, t0, t1, t2) - -#define write_blocks(out, x0, x1, x2, x3, t0, t1, t2) \ - transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \ - \ - movdqu x0, (0*4*4)(out); \ - movdqu x1, (1*4*4)(out); \ - movdqu x2, (2*4*4)(out); \ - movdqu x3, (3*4*4)(out); - -#define xor_blocks(out, x0, x1, x2, x3, t0, t1, t2) \ - transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \ - \ - movdqu (0*4*4)(out), t0; \ - pxor t0, x0; \ - movdqu x0, (0*4*4)(out); \ - movdqu (1*4*4)(out), t0; \ - pxor t0, x1; \ - movdqu x1, (1*4*4)(out); \ - movdqu (2*4*4)(out), t0; \ - pxor t0, x2; \ - movdqu x2, (2*4*4)(out); \ - movdqu (3*4*4)(out), t0; \ - pxor t0, x3; \ - movdqu x3, (3*4*4)(out); - -.align 8 -.global __serpent_enc_blk_8way -.type __serpent_enc_blk_8way,@function; - -__serpent_enc_blk_8way: - /* input: - * %rdi: ctx, CTX - * %rsi: dst - * %rdx: src - * %rcx: bool, if true: xor output - */ - - pcmpeqd RNOT, RNOT; - - leaq (4*4*4)(%rdx), %rax; - read_blocks(%rdx, RA1, RB1, RC1, RD1, RK0, RK1, RK2); - read_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2); - - K2(RA, RB, RC, RD, RE, 0); - S(S0, RA, RB, RC, RD, RE); LK2(RC, RB, RD, RA, RE, 1); - S(S1, RC, RB, RD, RA, RE); LK2(RE, RD, RA, RC, RB, 2); - S(S2, RE, RD, RA, RC, RB); LK2(RB, RD, RE, RC, RA, 3); - S(S3, RB, RD, RE, RC, RA); LK2(RC, RA, RD, RB, RE, 4); - S(S4, RC, RA, RD, RB, RE); LK2(RA, RD, RB, RE, RC, 5); - S(S5, RA, RD, RB, RE, RC); LK2(RC, RA, RD, RE, RB, 6); - S(S6, RC, RA, RD, RE, RB); LK2(RD, RB, RA, RE, RC, 7); - S(S7, RD, RB, RA, RE, RC); LK2(RC, RA, RE, RD, RB, 8); - S(S0, RC, RA, RE, RD, RB); LK2(RE, RA, RD, RC, RB, 9); - S(S1, RE, RA, RD, RC, RB); LK2(RB, RD, RC, RE, RA, 10); - S(S2, RB, RD, RC, RE, RA); LK2(RA, RD, RB, RE, RC, 11); - S(S3, RA, RD, RB, RE, RC); LK2(RE, RC, RD, RA, RB, 12); - S(S4, RE, RC, RD, RA, RB); LK2(RC, RD, RA, RB, RE, 13); - S(S5, RC, RD, RA, RB, RE); LK2(RE, RC, RD, RB, RA, 14); - S(S6, RE, RC, RD, RB, RA); LK2(RD, RA, RC, RB, RE, 15); - S(S7, RD, RA, RC, RB, RE); LK2(RE, RC, RB, RD, RA, 16); - S(S0, RE, RC, RB, RD, RA); LK2(RB, RC, RD, RE, RA, 17); - S(S1, RB, RC, RD, RE, RA); LK2(RA, RD, RE, RB, RC, 18); - S(S2, RA, RD, RE, RB, RC); LK2(RC, RD, RA, RB, RE, 19); - S(S3, RC, RD, RA, RB, RE); LK2(RB, RE, RD, RC, RA, 20); - S(S4, RB, RE, RD, RC, RA); LK2(RE, RD, RC, RA, RB, 21); - S(S5, RE, RD, RC, RA, RB); LK2(RB, RE, RD, RA, RC, 22); - S(S6, RB, RE, RD, RA, RC); LK2(RD, RC, RE, RA, RB, 23); - S(S7, RD, RC, RE, RA, RB); LK2(RB, RE, RA, RD, RC, 24); - S(S0, RB, RE, RA, RD, RC); LK2(RA, RE, RD, RB, RC, 25); - S(S1, RA, RE, RD, RB, RC); LK2(RC, RD, RB, RA, RE, 26); - S(S2, RC, RD, RB, RA, RE); LK2(RE, RD, RC, RA, RB, 27); - S(S3, RE, RD, RC, RA, RB); LK2(RA, RB, RD, RE, RC, 28); - S(S4, RA, RB, RD, RE, RC); LK2(RB, RD, RE, RC, RA, 29); - S(S5, RB, RD, RE, RC, RA); LK2(RA, RB, RD, RC, RE, 30); - S(S6, RA, RB, RD, RC, RE); LK2(RD, RE, RB, RC, RA, 31); - S(S7, RD, RE, RB, RC, RA); K2(RA, RB, RC, RD, RE, 32); - - leaq (4*4*4)(%rsi), %rax; - - testb %cl, %cl; - jnz __enc_xor8; - - write_blocks(%rsi, RA1, RB1, RC1, RD1, RK0, RK1, RK2); - write_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2); - - ret; - -__enc_xor8: - xor_blocks(%rsi, RA1, RB1, RC1, RD1, RK0, RK1, RK2); - xor_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2); - - ret; - -.align 8 -.global serpent_dec_blk_8way -.type serpent_dec_blk_8way,@function; - -serpent_dec_blk_8way: - /* input: - * %rdi: ctx, CTX - * %rsi: dst - * %rdx: src - */ - - pcmpeqd RNOT, RNOT; - - leaq (4*4*4)(%rdx), %rax; - read_blocks(%rdx, RA1, RB1, RC1, RD1, RK0, RK1, RK2); - read_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2); - - K2(RA, RB, RC, RD, RE, 32); - SP(SI7, RA, RB, RC, RD, RE, 31); KL2(RB, RD, RA, RE, RC, 31); - SP(SI6, RB, RD, RA, RE, RC, 30); KL2(RA, RC, RE, RB, RD, 30); - SP(SI5, RA, RC, RE, RB, RD, 29); KL2(RC, RD, RA, RE, RB, 29); - SP(SI4, RC, RD, RA, RE, RB, 28); KL2(RC, RA, RB, RE, RD, 28); - SP(SI3, RC, RA, RB, RE, RD, 27); KL2(RB, RC, RD, RE, RA, 27); - SP(SI2, RB, RC, RD, RE, RA, 26); KL2(RC, RA, RE, RD, RB, 26); - SP(SI1, RC, RA, RE, RD, RB, 25); KL2(RB, RA, RE, RD, RC, 25); - SP(SI0, RB, RA, RE, RD, RC, 24); KL2(RE, RC, RA, RB, RD, 24); - SP(SI7, RE, RC, RA, RB, RD, 23); KL2(RC, RB, RE, RD, RA, 23); - SP(SI6, RC, RB, RE, RD, RA, 22); KL2(RE, RA, RD, RC, RB, 22); - SP(SI5, RE, RA, RD, RC, RB, 21); KL2(RA, RB, RE, RD, RC, 21); - SP(SI4, RA, RB, RE, RD, RC, 20); KL2(RA, RE, RC, RD, RB, 20); - SP(SI3, RA, RE, RC, RD, RB, 19); KL2(RC, RA, RB, RD, RE, 19); - SP(SI2, RC, RA, RB, RD, RE, 18); KL2(RA, RE, RD, RB, RC, 18); - SP(SI1, RA, RE, RD, RB, RC, 17); KL2(RC, RE, RD, RB, RA, 17); - SP(SI0, RC, RE, RD, RB, RA, 16); KL2(RD, RA, RE, RC, RB, 16); - SP(SI7, RD, RA, RE, RC, RB, 15); KL2(RA, RC, RD, RB, RE, 15); - SP(SI6, RA, RC, RD, RB, RE, 14); KL2(RD, RE, RB, RA, RC, 14); - SP(SI5, RD, RE, RB, RA, RC, 13); KL2(RE, RC, RD, RB, RA, 13); - SP(SI4, RE, RC, RD, RB, RA, 12); KL2(RE, RD, RA, RB, RC, 12); - SP(SI3, RE, RD, RA, RB, RC, 11); KL2(RA, RE, RC, RB, RD, 11); - SP(SI2, RA, RE, RC, RB, RD, 10); KL2(RE, RD, RB, RC, RA, 10); - SP(SI1, RE, RD, RB, RC, RA, 9); KL2(RA, RD, RB, RC, RE, 9); - SP(SI0, RA, RD, RB, RC, RE, 8); KL2(RB, RE, RD, RA, RC, 8); - SP(SI7, RB, RE, RD, RA, RC, 7); KL2(RE, RA, RB, RC, RD, 7); - SP(SI6, RE, RA, RB, RC, RD, 6); KL2(RB, RD, RC, RE, RA, 6); - SP(SI5, RB, RD, RC, RE, RA, 5); KL2(RD, RA, RB, RC, RE, 5); - SP(SI4, RD, RA, RB, RC, RE, 4); KL2(RD, RB, RE, RC, RA, 4); - SP(SI3, RD, RB, RE, RC, RA, 3); KL2(RE, RD, RA, RC, RB, 3); - SP(SI2, RE, RD, RA, RC, RB, 2); KL2(RD, RB, RC, RA, RE, 2); - SP(SI1, RD, RB, RC, RA, RE, 1); KL2(RE, RB, RC, RA, RD, 1); - S(SI0, RE, RB, RC, RA, RD); K2(RC, RD, RB, RE, RA, 0); - - leaq (4*4*4)(%rsi), %rax; - write_blocks(%rsi, RC1, RD1, RB1, RE1, RK0, RK1, RK2); - write_blocks(%rax, RC2, RD2, RB2, RE2, RK0, RK1, RK2); - - ret; diff --git a/ANDROID_3.4.5/arch/x86/crypto/serpent_sse2_glue.c b/ANDROID_3.4.5/arch/x86/crypto/serpent_sse2_glue.c deleted file mode 100644 index 4b21be85..00000000 --- a/ANDROID_3.4.5/arch/x86/crypto/serpent_sse2_glue.c +++ /dev/null @@ -1,944 +0,0 @@ -/* - * Glue Code for SSE2 assembler versions of Serpent Cipher - * - * Copyright (c) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi> - * - * Glue code based on aesni-intel_glue.c by: - * Copyright (C) 2008, Intel Corp. - * Author: Huang Ying <ying.huang@intel.com> - * - * CBC & ECB parts based on code (crypto/cbc.c,ecb.c) by: - * Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au> - * CTR part based on code (crypto/ctr.c) by: - * (C) Copyright IBM Corp. 2007 - Joy Latten <latten@us.ibm.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 - * USA - * - */ - -#include <linux/module.h> -#include <linux/hardirq.h> -#include <linux/types.h> -#include <linux/crypto.h> -#include <linux/err.h> -#include <crypto/algapi.h> -#include <crypto/serpent.h> -#include <crypto/cryptd.h> -#include <crypto/b128ops.h> -#include <crypto/ctr.h> -#include <crypto/lrw.h> -#include <crypto/xts.h> -#include <asm/i387.h> -#include <asm/serpent.h> -#include <crypto/scatterwalk.h> -#include <linux/workqueue.h> -#include <linux/spinlock.h> - -struct async_serpent_ctx { - struct cryptd_ablkcipher *cryptd_tfm; -}; - -static inline bool serpent_fpu_begin(bool fpu_enabled, unsigned int nbytes) -{ - if (fpu_enabled) - return true; - - /* SSE2 is only used when chunk to be processed is large enough, so - * do not enable FPU until it is necessary. - */ - if (nbytes < SERPENT_BLOCK_SIZE * SERPENT_PARALLEL_BLOCKS) - return false; - - kernel_fpu_begin(); - return true; -} - -static inline void serpent_fpu_end(bool fpu_enabled) -{ - if (fpu_enabled) - kernel_fpu_end(); -} - -static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk, - bool enc) -{ - bool fpu_enabled = false; - struct serpent_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - const unsigned int bsize = SERPENT_BLOCK_SIZE; - unsigned int nbytes; - int err; - - err = blkcipher_walk_virt(desc, walk); - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - - while ((nbytes = walk->nbytes)) { - u8 *wsrc = walk->src.virt.addr; - u8 *wdst = walk->dst.virt.addr; - - fpu_enabled = serpent_fpu_begin(fpu_enabled, nbytes); - - /* Process multi-block batch */ - if (nbytes >= bsize * SERPENT_PARALLEL_BLOCKS) { - do { - if (enc) - serpent_enc_blk_xway(ctx, wdst, wsrc); - else - serpent_dec_blk_xway(ctx, wdst, wsrc); - - wsrc += bsize * SERPENT_PARALLEL_BLOCKS; - wdst += bsize * SERPENT_PARALLEL_BLOCKS; - nbytes -= bsize * SERPENT_PARALLEL_BLOCKS; - } while (nbytes >= bsize * SERPENT_PARALLEL_BLOCKS); - - if (nbytes < bsize) - goto done; - } - - /* Handle leftovers */ - do { - if (enc) - __serpent_encrypt(ctx, wdst, wsrc); - else - __serpent_decrypt(ctx, wdst, wsrc); - - wsrc += bsize; - wdst += bsize; - nbytes -= bsize; - } while (nbytes >= bsize); - -done: - err = blkcipher_walk_done(desc, walk, nbytes); - } - - serpent_fpu_end(fpu_enabled); - return err; -} - -static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - struct blkcipher_walk walk; - - blkcipher_walk_init(&walk, dst, src, nbytes); - return ecb_crypt(desc, &walk, true); -} - -static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - struct blkcipher_walk walk; - - blkcipher_walk_init(&walk, dst, src, nbytes); - return ecb_crypt(desc, &walk, false); -} - -static unsigned int __cbc_encrypt(struct blkcipher_desc *desc, - struct blkcipher_walk *walk) -{ - struct serpent_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - const unsigned int bsize = SERPENT_BLOCK_SIZE; - unsigned int nbytes = walk->nbytes; - u128 *src = (u128 *)walk->src.virt.addr; - u128 *dst = (u128 *)walk->dst.virt.addr; - u128 *iv = (u128 *)walk->iv; - - do { - u128_xor(dst, src, iv); - __serpent_encrypt(ctx, (u8 *)dst, (u8 *)dst); - iv = dst; - - src += 1; - dst += 1; - nbytes -= bsize; - } while (nbytes >= bsize); - - u128_xor((u128 *)walk->iv, (u128 *)walk->iv, iv); - return nbytes; -} - -static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - struct blkcipher_walk walk; - int err; - - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt(desc, &walk); - - while ((nbytes = walk.nbytes)) { - nbytes = __cbc_encrypt(desc, &walk); - err = blkcipher_walk_done(desc, &walk, nbytes); - } - - return err; -} - -static unsigned int __cbc_decrypt(struct blkcipher_desc *desc, - struct blkcipher_walk *walk) -{ - struct serpent_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - const unsigned int bsize = SERPENT_BLOCK_SIZE; - unsigned int nbytes = walk->nbytes; - u128 *src = (u128 *)walk->src.virt.addr; - u128 *dst = (u128 *)walk->dst.virt.addr; - u128 ivs[SERPENT_PARALLEL_BLOCKS - 1]; - u128 last_iv; - int i; - - /* Start of the last block. */ - src += nbytes / bsize - 1; - dst += nbytes / bsize - 1; - - last_iv = *src; - - /* Process multi-block batch */ - if (nbytes >= bsize * SERPENT_PARALLEL_BLOCKS) { - do { - nbytes -= bsize * (SERPENT_PARALLEL_BLOCKS - 1); - src -= SERPENT_PARALLEL_BLOCKS - 1; - dst -= SERPENT_PARALLEL_BLOCKS - 1; - - for (i = 0; i < SERPENT_PARALLEL_BLOCKS - 1; i++) - ivs[i] = src[i]; - - serpent_dec_blk_xway(ctx, (u8 *)dst, (u8 *)src); - - for (i = 0; i < SERPENT_PARALLEL_BLOCKS - 1; i++) - u128_xor(dst + (i + 1), dst + (i + 1), ivs + i); - - nbytes -= bsize; - if (nbytes < bsize) - goto done; - - u128_xor(dst, dst, src - 1); - src -= 1; - dst -= 1; - } while (nbytes >= bsize * SERPENT_PARALLEL_BLOCKS); - - if (nbytes < bsize) - goto done; - } - - /* Handle leftovers */ - for (;;) { - __serpent_decrypt(ctx, (u8 *)dst, (u8 *)src); - - nbytes -= bsize; - if (nbytes < bsize) - break; - - u128_xor(dst, dst, src - 1); - src -= 1; - dst -= 1; - } - -done: - u128_xor(dst, dst, (u128 *)walk->iv); - *(u128 *)walk->iv = last_iv; - - return nbytes; -} - -static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - bool fpu_enabled = false; - struct blkcipher_walk walk; - int err; - - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt(desc, &walk); - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - - while ((nbytes = walk.nbytes)) { - fpu_enabled = serpent_fpu_begin(fpu_enabled, nbytes); - nbytes = __cbc_decrypt(desc, &walk); - err = blkcipher_walk_done(desc, &walk, nbytes); - } - - serpent_fpu_end(fpu_enabled); - return err; -} - -static inline void u128_to_be128(be128 *dst, const u128 *src) -{ - dst->a = cpu_to_be64(src->a); - dst->b = cpu_to_be64(src->b); -} - -static inline void be128_to_u128(u128 *dst, const be128 *src) -{ - dst->a = be64_to_cpu(src->a); - dst->b = be64_to_cpu(src->b); -} - -static inline void u128_inc(u128 *i) -{ - i->b++; - if (!i->b) - i->a++; -} - -static void ctr_crypt_final(struct blkcipher_desc *desc, - struct blkcipher_walk *walk) -{ - struct serpent_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - u8 *ctrblk = walk->iv; - u8 keystream[SERPENT_BLOCK_SIZE]; - u8 *src = walk->src.virt.addr; - u8 *dst = walk->dst.virt.addr; - unsigned int nbytes = walk->nbytes; - - __serpent_encrypt(ctx, keystream, ctrblk); - crypto_xor(keystream, src, nbytes); - memcpy(dst, keystream, nbytes); - - crypto_inc(ctrblk, SERPENT_BLOCK_SIZE); -} - -static unsigned int __ctr_crypt(struct blkcipher_desc *desc, - struct blkcipher_walk *walk) -{ - struct serpent_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - const unsigned int bsize = SERPENT_BLOCK_SIZE; - unsigned int nbytes = walk->nbytes; - u128 *src = (u128 *)walk->src.virt.addr; - u128 *dst = (u128 *)walk->dst.virt.addr; - u128 ctrblk; - be128 ctrblocks[SERPENT_PARALLEL_BLOCKS]; - int i; - - be128_to_u128(&ctrblk, (be128 *)walk->iv); - - /* Process multi-block batch */ - if (nbytes >= bsize * SERPENT_PARALLEL_BLOCKS) { - do { - /* create ctrblks for parallel encrypt */ - for (i = 0; i < SERPENT_PARALLEL_BLOCKS; i++) { - if (dst != src) - dst[i] = src[i]; - - u128_to_be128(&ctrblocks[i], &ctrblk); - u128_inc(&ctrblk); - } - - serpent_enc_blk_xway_xor(ctx, (u8 *)dst, - (u8 *)ctrblocks); - - src += SERPENT_PARALLEL_BLOCKS; - dst += SERPENT_PARALLEL_BLOCKS; - nbytes -= bsize * SERPENT_PARALLEL_BLOCKS; - } while (nbytes >= bsize * SERPENT_PARALLEL_BLOCKS); - - if (nbytes < bsize) - goto done; - } - - /* Handle leftovers */ - do { - if (dst != src) - *dst = *src; - - u128_to_be128(&ctrblocks[0], &ctrblk); - u128_inc(&ctrblk); - - __serpent_encrypt(ctx, (u8 *)ctrblocks, (u8 *)ctrblocks); - u128_xor(dst, dst, (u128 *)ctrblocks); - - src += 1; - dst += 1; - nbytes -= bsize; - } while (nbytes >= bsize); - -done: - u128_to_be128((be128 *)walk->iv, &ctrblk); - return nbytes; -} - -static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - bool fpu_enabled = false; - struct blkcipher_walk walk; - int err; - - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt_block(desc, &walk, SERPENT_BLOCK_SIZE); - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - - while ((nbytes = walk.nbytes) >= SERPENT_BLOCK_SIZE) { - fpu_enabled = serpent_fpu_begin(fpu_enabled, nbytes); - nbytes = __ctr_crypt(desc, &walk); - err = blkcipher_walk_done(desc, &walk, nbytes); - } - - serpent_fpu_end(fpu_enabled); - - if (walk.nbytes) { - ctr_crypt_final(desc, &walk); - err = blkcipher_walk_done(desc, &walk, 0); - } - - return err; -} - -struct crypt_priv { - struct serpent_ctx *ctx; - bool fpu_enabled; -}; - -static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) -{ - const unsigned int bsize = SERPENT_BLOCK_SIZE; - struct crypt_priv *ctx = priv; - int i; - - ctx->fpu_enabled = serpent_fpu_begin(ctx->fpu_enabled, nbytes); - - if (nbytes == bsize * SERPENT_PARALLEL_BLOCKS) { - serpent_enc_blk_xway(ctx->ctx, srcdst, srcdst); - return; - } - - for (i = 0; i < nbytes / bsize; i++, srcdst += bsize) - __serpent_encrypt(ctx->ctx, srcdst, srcdst); -} - -static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) -{ - const unsigned int bsize = SERPENT_BLOCK_SIZE; - struct crypt_priv *ctx = priv; - int i; - - ctx->fpu_enabled = serpent_fpu_begin(ctx->fpu_enabled, nbytes); - - if (nbytes == bsize * SERPENT_PARALLEL_BLOCKS) { - serpent_dec_blk_xway(ctx->ctx, srcdst, srcdst); - return; - } - - for (i = 0; i < nbytes / bsize; i++, srcdst += bsize) - __serpent_decrypt(ctx->ctx, srcdst, srcdst); -} - -struct serpent_lrw_ctx { - struct lrw_table_ctx lrw_table; - struct serpent_ctx serpent_ctx; -}; - -static int lrw_serpent_setkey(struct crypto_tfm *tfm, const u8 *key, - unsigned int keylen) -{ - struct serpent_lrw_ctx *ctx = crypto_tfm_ctx(tfm); - int err; - - err = __serpent_setkey(&ctx->serpent_ctx, key, keylen - - SERPENT_BLOCK_SIZE); - if (err) - return err; - - return lrw_init_table(&ctx->lrw_table, key + keylen - - SERPENT_BLOCK_SIZE); -} - -static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - struct serpent_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - be128 buf[SERPENT_PARALLEL_BLOCKS]; - struct crypt_priv crypt_ctx = { - .ctx = &ctx->serpent_ctx, - .fpu_enabled = false, - }; - struct lrw_crypt_req req = { - .tbuf = buf, - .tbuflen = sizeof(buf), - - .table_ctx = &ctx->lrw_table, - .crypt_ctx = &crypt_ctx, - .crypt_fn = encrypt_callback, - }; - int ret; - - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - ret = lrw_crypt(desc, dst, src, nbytes, &req); - serpent_fpu_end(crypt_ctx.fpu_enabled); - - return ret; -} - -static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - struct serpent_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - be128 buf[SERPENT_PARALLEL_BLOCKS]; - struct crypt_priv crypt_ctx = { - .ctx = &ctx->serpent_ctx, - .fpu_enabled = false, - }; - struct lrw_crypt_req req = { - .tbuf = buf, - .tbuflen = sizeof(buf), - - .table_ctx = &ctx->lrw_table, - .crypt_ctx = &crypt_ctx, - .crypt_fn = decrypt_callback, - }; - int ret; - - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - ret = lrw_crypt(desc, dst, src, nbytes, &req); - serpent_fpu_end(crypt_ctx.fpu_enabled); - - return ret; -} - -static void lrw_exit_tfm(struct crypto_tfm *tfm) -{ - struct serpent_lrw_ctx *ctx = crypto_tfm_ctx(tfm); - - lrw_free_table(&ctx->lrw_table); -} - -struct serpent_xts_ctx { - struct serpent_ctx tweak_ctx; - struct serpent_ctx crypt_ctx; -}; - -static int xts_serpent_setkey(struct crypto_tfm *tfm, const u8 *key, - unsigned int keylen) -{ - struct serpent_xts_ctx *ctx = crypto_tfm_ctx(tfm); - u32 *flags = &tfm->crt_flags; - int err; - - /* key consists of keys of equal size concatenated, therefore - * the length must be even - */ - if (keylen % 2) { - *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; - return -EINVAL; - } - - /* first half of xts-key is for crypt */ - err = __serpent_setkey(&ctx->crypt_ctx, key, keylen / 2); - if (err) - return err; - - /* second half of xts-key is for tweak */ - return __serpent_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2); -} - -static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - struct serpent_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - be128 buf[SERPENT_PARALLEL_BLOCKS]; - struct crypt_priv crypt_ctx = { - .ctx = &ctx->crypt_ctx, - .fpu_enabled = false, - }; - struct xts_crypt_req req = { - .tbuf = buf, - .tbuflen = sizeof(buf), - - .tweak_ctx = &ctx->tweak_ctx, - .tweak_fn = XTS_TWEAK_CAST(__serpent_encrypt), - .crypt_ctx = &crypt_ctx, - .crypt_fn = encrypt_callback, - }; - int ret; - - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - ret = xts_crypt(desc, dst, src, nbytes, &req); - serpent_fpu_end(crypt_ctx.fpu_enabled); - - return ret; -} - -static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - struct serpent_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - be128 buf[SERPENT_PARALLEL_BLOCKS]; - struct crypt_priv crypt_ctx = { - .ctx = &ctx->crypt_ctx, - .fpu_enabled = false, - }; - struct xts_crypt_req req = { - .tbuf = buf, - .tbuflen = sizeof(buf), - - .tweak_ctx = &ctx->tweak_ctx, - .tweak_fn = XTS_TWEAK_CAST(__serpent_encrypt), - .crypt_ctx = &crypt_ctx, - .crypt_fn = decrypt_callback, - }; - int ret; - - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - ret = xts_crypt(desc, dst, src, nbytes, &req); - serpent_fpu_end(crypt_ctx.fpu_enabled); - - return ret; -} - -static int ablk_set_key(struct crypto_ablkcipher *tfm, const u8 *key, - unsigned int key_len) -{ - struct async_serpent_ctx *ctx = crypto_ablkcipher_ctx(tfm); - struct crypto_ablkcipher *child = &ctx->cryptd_tfm->base; - int err; - - crypto_ablkcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK); - crypto_ablkcipher_set_flags(child, crypto_ablkcipher_get_flags(tfm) - & CRYPTO_TFM_REQ_MASK); - err = crypto_ablkcipher_setkey(child, key, key_len); - crypto_ablkcipher_set_flags(tfm, crypto_ablkcipher_get_flags(child) - & CRYPTO_TFM_RES_MASK); - return err; -} - -static int __ablk_encrypt(struct ablkcipher_request *req) -{ - struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); - struct async_serpent_ctx *ctx = crypto_ablkcipher_ctx(tfm); - struct blkcipher_desc desc; - - desc.tfm = cryptd_ablkcipher_child(ctx->cryptd_tfm); - desc.info = req->info; - desc.flags = 0; - - return crypto_blkcipher_crt(desc.tfm)->encrypt( - &desc, req->dst, req->src, req->nbytes); -} - -static int ablk_encrypt(struct ablkcipher_request *req) -{ - struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); - struct async_serpent_ctx *ctx = crypto_ablkcipher_ctx(tfm); - - if (!irq_fpu_usable()) { - struct ablkcipher_request *cryptd_req = - ablkcipher_request_ctx(req); - - memcpy(cryptd_req, req, sizeof(*req)); - ablkcipher_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base); - - return crypto_ablkcipher_encrypt(cryptd_req); - } else { - return __ablk_encrypt(req); - } -} - -static int ablk_decrypt(struct ablkcipher_request *req) -{ - struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); - struct async_serpent_ctx *ctx = crypto_ablkcipher_ctx(tfm); - - if (!irq_fpu_usable()) { - struct ablkcipher_request *cryptd_req = - ablkcipher_request_ctx(req); - - memcpy(cryptd_req, req, sizeof(*req)); - ablkcipher_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base); - - return crypto_ablkcipher_decrypt(cryptd_req); - } else { - struct blkcipher_desc desc; - - desc.tfm = cryptd_ablkcipher_child(ctx->cryptd_tfm); - desc.info = req->info; - desc.flags = 0; - - return crypto_blkcipher_crt(desc.tfm)->decrypt( - &desc, req->dst, req->src, req->nbytes); - } -} - -static void ablk_exit(struct crypto_tfm *tfm) -{ - struct async_serpent_ctx *ctx = crypto_tfm_ctx(tfm); - - cryptd_free_ablkcipher(ctx->cryptd_tfm); -} - -static int ablk_init(struct crypto_tfm *tfm) -{ - struct async_serpent_ctx *ctx = crypto_tfm_ctx(tfm); - struct cryptd_ablkcipher *cryptd_tfm; - char drv_name[CRYPTO_MAX_ALG_NAME]; - - snprintf(drv_name, sizeof(drv_name), "__driver-%s", - crypto_tfm_alg_driver_name(tfm)); - - cryptd_tfm = cryptd_alloc_ablkcipher(drv_name, 0, 0); - if (IS_ERR(cryptd_tfm)) - return PTR_ERR(cryptd_tfm); - - ctx->cryptd_tfm = cryptd_tfm; - tfm->crt_ablkcipher.reqsize = sizeof(struct ablkcipher_request) + - crypto_ablkcipher_reqsize(&cryptd_tfm->base); - - return 0; -} - -static struct crypto_alg serpent_algs[10] = { { - .cra_name = "__ecb-serpent-sse2", - .cra_driver_name = "__driver-ecb-serpent-sse2", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, - .cra_blocksize = SERPENT_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct serpent_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(serpent_algs[0].cra_list), - .cra_u = { - .blkcipher = { - .min_keysize = SERPENT_MIN_KEY_SIZE, - .max_keysize = SERPENT_MAX_KEY_SIZE, - .setkey = serpent_setkey, - .encrypt = ecb_encrypt, - .decrypt = ecb_decrypt, - }, - }, -}, { - .cra_name = "__cbc-serpent-sse2", - .cra_driver_name = "__driver-cbc-serpent-sse2", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, - .cra_blocksize = SERPENT_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct serpent_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(serpent_algs[1].cra_list), - .cra_u = { - .blkcipher = { - .min_keysize = SERPENT_MIN_KEY_SIZE, - .max_keysize = SERPENT_MAX_KEY_SIZE, - .setkey = serpent_setkey, - .encrypt = cbc_encrypt, - .decrypt = cbc_decrypt, - }, - }, -}, { - .cra_name = "__ctr-serpent-sse2", - .cra_driver_name = "__driver-ctr-serpent-sse2", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, - .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct serpent_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(serpent_algs[2].cra_list), - .cra_u = { - .blkcipher = { - .min_keysize = SERPENT_MIN_KEY_SIZE, - .max_keysize = SERPENT_MAX_KEY_SIZE, - .ivsize = SERPENT_BLOCK_SIZE, - .setkey = serpent_setkey, - .encrypt = ctr_crypt, - .decrypt = ctr_crypt, - }, - }, -}, { - .cra_name = "__lrw-serpent-sse2", - .cra_driver_name = "__driver-lrw-serpent-sse2", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, - .cra_blocksize = SERPENT_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct serpent_lrw_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(serpent_algs[3].cra_list), - .cra_exit = lrw_exit_tfm, - .cra_u = { - .blkcipher = { - .min_keysize = SERPENT_MIN_KEY_SIZE + - SERPENT_BLOCK_SIZE, - .max_keysize = SERPENT_MAX_KEY_SIZE + - SERPENT_BLOCK_SIZE, - .ivsize = SERPENT_BLOCK_SIZE, - .setkey = lrw_serpent_setkey, - .encrypt = lrw_encrypt, - .decrypt = lrw_decrypt, - }, - }, -}, { - .cra_name = "__xts-serpent-sse2", - .cra_driver_name = "__driver-xts-serpent-sse2", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, - .cra_blocksize = SERPENT_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct serpent_xts_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(serpent_algs[4].cra_list), - .cra_u = { - .blkcipher = { - .min_keysize = SERPENT_MIN_KEY_SIZE * 2, - .max_keysize = SERPENT_MAX_KEY_SIZE * 2, - .ivsize = SERPENT_BLOCK_SIZE, - .setkey = xts_serpent_setkey, - .encrypt = xts_encrypt, - .decrypt = xts_decrypt, - }, - }, -}, { - .cra_name = "ecb(serpent)", - .cra_driver_name = "ecb-serpent-sse2", - .cra_priority = 400, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = SERPENT_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_serpent_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(serpent_algs[5].cra_list), - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = SERPENT_MIN_KEY_SIZE, - .max_keysize = SERPENT_MAX_KEY_SIZE, - .setkey = ablk_set_key, - .encrypt = ablk_encrypt, - .decrypt = ablk_decrypt, - }, - }, -}, { - .cra_name = "cbc(serpent)", - .cra_driver_name = "cbc-serpent-sse2", - .cra_priority = 400, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = SERPENT_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_serpent_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(serpent_algs[6].cra_list), - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = SERPENT_MIN_KEY_SIZE, - .max_keysize = SERPENT_MAX_KEY_SIZE, - .ivsize = SERPENT_BLOCK_SIZE, - .setkey = ablk_set_key, - .encrypt = __ablk_encrypt, - .decrypt = ablk_decrypt, - }, - }, -}, { - .cra_name = "ctr(serpent)", - .cra_driver_name = "ctr-serpent-sse2", - .cra_priority = 400, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct async_serpent_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(serpent_algs[7].cra_list), - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = SERPENT_MIN_KEY_SIZE, - .max_keysize = SERPENT_MAX_KEY_SIZE, - .ivsize = SERPENT_BLOCK_SIZE, - .setkey = ablk_set_key, - .encrypt = ablk_encrypt, - .decrypt = ablk_encrypt, - .geniv = "chainiv", - }, - }, -}, { - .cra_name = "lrw(serpent)", - .cra_driver_name = "lrw-serpent-sse2", - .cra_priority = 400, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = SERPENT_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_serpent_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(serpent_algs[8].cra_list), - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = SERPENT_MIN_KEY_SIZE + - SERPENT_BLOCK_SIZE, - .max_keysize = SERPENT_MAX_KEY_SIZE + - SERPENT_BLOCK_SIZE, - .ivsize = SERPENT_BLOCK_SIZE, - .setkey = ablk_set_key, - .encrypt = ablk_encrypt, - .decrypt = ablk_decrypt, - }, - }, -}, { - .cra_name = "xts(serpent)", - .cra_driver_name = "xts-serpent-sse2", - .cra_priority = 400, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = SERPENT_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_serpent_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(serpent_algs[9].cra_list), - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = SERPENT_MIN_KEY_SIZE * 2, - .max_keysize = SERPENT_MAX_KEY_SIZE * 2, - .ivsize = SERPENT_BLOCK_SIZE, - .setkey = ablk_set_key, - .encrypt = ablk_encrypt, - .decrypt = ablk_decrypt, - }, - }, -} }; - -static int __init serpent_sse2_init(void) -{ - if (!cpu_has_xmm2) { - printk(KERN_INFO "SSE2 instructions are not detected.\n"); - return -ENODEV; - } - - return crypto_register_algs(serpent_algs, ARRAY_SIZE(serpent_algs)); -} - -static void __exit serpent_sse2_exit(void) -{ - crypto_unregister_algs(serpent_algs, ARRAY_SIZE(serpent_algs)); -} - -module_init(serpent_sse2_init); -module_exit(serpent_sse2_exit); - -MODULE_DESCRIPTION("Serpent Cipher Algorithm, SSE2 optimized"); -MODULE_LICENSE("GPL"); -MODULE_ALIAS("serpent"); diff --git a/ANDROID_3.4.5/arch/x86/crypto/sha1_ssse3_asm.S b/ANDROID_3.4.5/arch/x86/crypto/sha1_ssse3_asm.S deleted file mode 100644 index b2c2f57d..00000000 --- a/ANDROID_3.4.5/arch/x86/crypto/sha1_ssse3_asm.S +++ /dev/null @@ -1,558 +0,0 @@ -/* - * This is a SIMD SHA-1 implementation. It requires the Intel(R) Supplemental - * SSE3 instruction set extensions introduced in Intel Core Microarchitecture - * processors. CPUs supporting Intel(R) AVX extensions will get an additional - * boost. - * - * This work was inspired by the vectorized implementation of Dean Gaudet. - * Additional information on it can be found at: - * http://www.arctic.org/~dean/crypto/sha1.html - * - * It was improved upon with more efficient vectorization of the message - * scheduling. This implementation has also been optimized for all current and - * several future generations of Intel CPUs. - * - * See this article for more information about the implementation details: - * http://software.intel.com/en-us/articles/improving-the-performance-of-the-secure-hash-algorithm-1/ - * - * Copyright (C) 2010, Intel Corp. - * Authors: Maxim Locktyukhin <maxim.locktyukhin@intel.com> - * Ronen Zohar <ronen.zohar@intel.com> - * - * Converted to AT&T syntax and adapted for inclusion in the Linux kernel: - * Author: Mathias Krause <minipli@googlemail.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - */ - -#define CTX %rdi // arg1 -#define BUF %rsi // arg2 -#define CNT %rdx // arg3 - -#define REG_A %ecx -#define REG_B %esi -#define REG_C %edi -#define REG_D %ebp -#define REG_E %edx - -#define REG_T1 %eax -#define REG_T2 %ebx - -#define K_BASE %r8 -#define HASH_PTR %r9 -#define BUFFER_PTR %r10 -#define BUFFER_END %r11 - -#define W_TMP1 %xmm0 -#define W_TMP2 %xmm9 - -#define W0 %xmm1 -#define W4 %xmm2 -#define W8 %xmm3 -#define W12 %xmm4 -#define W16 %xmm5 -#define W20 %xmm6 -#define W24 %xmm7 -#define W28 %xmm8 - -#define XMM_SHUFB_BSWAP %xmm10 - -/* we keep window of 64 w[i]+K pre-calculated values in a circular buffer */ -#define WK(t) (((t) & 15) * 4)(%rsp) -#define W_PRECALC_AHEAD 16 - -/* - * This macro implements the SHA-1 function's body for single 64-byte block - * param: function's name - */ -.macro SHA1_VECTOR_ASM name - .global \name - .type \name, @function - .align 32 -\name: - push %rbx - push %rbp - push %r12 - - mov %rsp, %r12 - sub $64, %rsp # allocate workspace - and $~15, %rsp # align stack - - mov CTX, HASH_PTR - mov BUF, BUFFER_PTR - - shl $6, CNT # multiply by 64 - add BUF, CNT - mov CNT, BUFFER_END - - lea K_XMM_AR(%rip), K_BASE - xmm_mov BSWAP_SHUFB_CTL(%rip), XMM_SHUFB_BSWAP - - SHA1_PIPELINED_MAIN_BODY - - # cleanup workspace - mov $8, %ecx - mov %rsp, %rdi - xor %rax, %rax - rep stosq - - mov %r12, %rsp # deallocate workspace - - pop %r12 - pop %rbp - pop %rbx - ret - - .size \name, .-\name -.endm - -/* - * This macro implements 80 rounds of SHA-1 for one 64-byte block - */ -.macro SHA1_PIPELINED_MAIN_BODY - INIT_REGALLOC - - mov (HASH_PTR), A - mov 4(HASH_PTR), B - mov 8(HASH_PTR), C - mov 12(HASH_PTR), D - mov 16(HASH_PTR), E - - .set i, 0 - .rept W_PRECALC_AHEAD - W_PRECALC i - .set i, (i+1) - .endr - -.align 4 -1: - RR F1,A,B,C,D,E,0 - RR F1,D,E,A,B,C,2 - RR F1,B,C,D,E,A,4 - RR F1,E,A,B,C,D,6 - RR F1,C,D,E,A,B,8 - - RR F1,A,B,C,D,E,10 - RR F1,D,E,A,B,C,12 - RR F1,B,C,D,E,A,14 - RR F1,E,A,B,C,D,16 - RR F1,C,D,E,A,B,18 - - RR F2,A,B,C,D,E,20 - RR F2,D,E,A,B,C,22 - RR F2,B,C,D,E,A,24 - RR F2,E,A,B,C,D,26 - RR F2,C,D,E,A,B,28 - - RR F2,A,B,C,D,E,30 - RR F2,D,E,A,B,C,32 - RR F2,B,C,D,E,A,34 - RR F2,E,A,B,C,D,36 - RR F2,C,D,E,A,B,38 - - RR F3,A,B,C,D,E,40 - RR F3,D,E,A,B,C,42 - RR F3,B,C,D,E,A,44 - RR F3,E,A,B,C,D,46 - RR F3,C,D,E,A,B,48 - - RR F3,A,B,C,D,E,50 - RR F3,D,E,A,B,C,52 - RR F3,B,C,D,E,A,54 - RR F3,E,A,B,C,D,56 - RR F3,C,D,E,A,B,58 - - add $64, BUFFER_PTR # move to the next 64-byte block - cmp BUFFER_END, BUFFER_PTR # if the current is the last one use - cmovae K_BASE, BUFFER_PTR # dummy source to avoid buffer overrun - - RR F4,A,B,C,D,E,60 - RR F4,D,E,A,B,C,62 - RR F4,B,C,D,E,A,64 - RR F4,E,A,B,C,D,66 - RR F4,C,D,E,A,B,68 - - RR F4,A,B,C,D,E,70 - RR F4,D,E,A,B,C,72 - RR F4,B,C,D,E,A,74 - RR F4,E,A,B,C,D,76 - RR F4,C,D,E,A,B,78 - - UPDATE_HASH (HASH_PTR), A - UPDATE_HASH 4(HASH_PTR), B - UPDATE_HASH 8(HASH_PTR), C - UPDATE_HASH 12(HASH_PTR), D - UPDATE_HASH 16(HASH_PTR), E - - RESTORE_RENAMED_REGS - cmp K_BASE, BUFFER_PTR # K_BASE means, we reached the end - jne 1b -.endm - -.macro INIT_REGALLOC - .set A, REG_A - .set B, REG_B - .set C, REG_C - .set D, REG_D - .set E, REG_E - .set T1, REG_T1 - .set T2, REG_T2 -.endm - -.macro RESTORE_RENAMED_REGS - # order is important (REG_C is where it should be) - mov B, REG_B - mov D, REG_D - mov A, REG_A - mov E, REG_E -.endm - -.macro SWAP_REG_NAMES a, b - .set _T, \a - .set \a, \b - .set \b, _T -.endm - -.macro F1 b, c, d - mov \c, T1 - SWAP_REG_NAMES \c, T1 - xor \d, T1 - and \b, T1 - xor \d, T1 -.endm - -.macro F2 b, c, d - mov \d, T1 - SWAP_REG_NAMES \d, T1 - xor \c, T1 - xor \b, T1 -.endm - -.macro F3 b, c ,d - mov \c, T1 - SWAP_REG_NAMES \c, T1 - mov \b, T2 - or \b, T1 - and \c, T2 - and \d, T1 - or T2, T1 -.endm - -.macro F4 b, c, d - F2 \b, \c, \d -.endm - -.macro UPDATE_HASH hash, val - add \hash, \val - mov \val, \hash -.endm - -/* - * RR does two rounds of SHA-1 back to back with W[] pre-calc - * t1 = F(b, c, d); e += w(i) - * e += t1; b <<= 30; d += w(i+1); - * t1 = F(a, b, c); - * d += t1; a <<= 5; - * e += a; - * t1 = e; a >>= 7; - * t1 <<= 5; - * d += t1; - */ -.macro RR F, a, b, c, d, e, round - add WK(\round), \e - \F \b, \c, \d # t1 = F(b, c, d); - W_PRECALC (\round + W_PRECALC_AHEAD) - rol $30, \b - add T1, \e - add WK(\round + 1), \d - - \F \a, \b, \c - W_PRECALC (\round + W_PRECALC_AHEAD + 1) - rol $5, \a - add \a, \e - add T1, \d - ror $7, \a # (a <<r 5) >>r 7) => a <<r 30) - - mov \e, T1 - SWAP_REG_NAMES \e, T1 - - rol $5, T1 - add T1, \d - - # write: \a, \b - # rotate: \a<=\d, \b<=\e, \c<=\a, \d<=\b, \e<=\c -.endm - -.macro W_PRECALC r - .set i, \r - - .if (i < 20) - .set K_XMM, 0 - .elseif (i < 40) - .set K_XMM, 16 - .elseif (i < 60) - .set K_XMM, 32 - .elseif (i < 80) - .set K_XMM, 48 - .endif - - .if ((i < 16) || ((i >= 80) && (i < (80 + W_PRECALC_AHEAD)))) - .set i, ((\r) % 80) # pre-compute for the next iteration - .if (i == 0) - W_PRECALC_RESET - .endif - W_PRECALC_00_15 - .elseif (i<32) - W_PRECALC_16_31 - .elseif (i < 80) // rounds 32-79 - W_PRECALC_32_79 - .endif -.endm - -.macro W_PRECALC_RESET - .set W, W0 - .set W_minus_04, W4 - .set W_minus_08, W8 - .set W_minus_12, W12 - .set W_minus_16, W16 - .set W_minus_20, W20 - .set W_minus_24, W24 - .set W_minus_28, W28 - .set W_minus_32, W -.endm - -.macro W_PRECALC_ROTATE - .set W_minus_32, W_minus_28 - .set W_minus_28, W_minus_24 - .set W_minus_24, W_minus_20 - .set W_minus_20, W_minus_16 - .set W_minus_16, W_minus_12 - .set W_minus_12, W_minus_08 - .set W_minus_08, W_minus_04 - .set W_minus_04, W - .set W, W_minus_32 -.endm - -.macro W_PRECALC_SSSE3 - -.macro W_PRECALC_00_15 - W_PRECALC_00_15_SSSE3 -.endm -.macro W_PRECALC_16_31 - W_PRECALC_16_31_SSSE3 -.endm -.macro W_PRECALC_32_79 - W_PRECALC_32_79_SSSE3 -.endm - -/* message scheduling pre-compute for rounds 0-15 */ -.macro W_PRECALC_00_15_SSSE3 - .if ((i & 3) == 0) - movdqu (i*4)(BUFFER_PTR), W_TMP1 - .elseif ((i & 3) == 1) - pshufb XMM_SHUFB_BSWAP, W_TMP1 - movdqa W_TMP1, W - .elseif ((i & 3) == 2) - paddd (K_BASE), W_TMP1 - .elseif ((i & 3) == 3) - movdqa W_TMP1, WK(i&~3) - W_PRECALC_ROTATE - .endif -.endm - -/* message scheduling pre-compute for rounds 16-31 - * - * - calculating last 32 w[i] values in 8 XMM registers - * - pre-calculate K+w[i] values and store to mem, for later load by ALU add - * instruction - * - * some "heavy-lifting" vectorization for rounds 16-31 due to w[i]->w[i-3] - * dependency, but improves for 32-79 - */ -.macro W_PRECALC_16_31_SSSE3 - # blended scheduling of vector and scalar instruction streams, one 4-wide - # vector iteration / 4 scalar rounds - .if ((i & 3) == 0) - movdqa W_minus_12, W - palignr $8, W_minus_16, W # w[i-14] - movdqa W_minus_04, W_TMP1 - psrldq $4, W_TMP1 # w[i-3] - pxor W_minus_08, W - .elseif ((i & 3) == 1) - pxor W_minus_16, W_TMP1 - pxor W_TMP1, W - movdqa W, W_TMP2 - movdqa W, W_TMP1 - pslldq $12, W_TMP2 - .elseif ((i & 3) == 2) - psrld $31, W - pslld $1, W_TMP1 - por W, W_TMP1 - movdqa W_TMP2, W - psrld $30, W_TMP2 - pslld $2, W - .elseif ((i & 3) == 3) - pxor W, W_TMP1 - pxor W_TMP2, W_TMP1 - movdqa W_TMP1, W - paddd K_XMM(K_BASE), W_TMP1 - movdqa W_TMP1, WK(i&~3) - W_PRECALC_ROTATE - .endif -.endm - -/* message scheduling pre-compute for rounds 32-79 - * - * in SHA-1 specification: w[i] = (w[i-3] ^ w[i-8] ^ w[i-14] ^ w[i-16]) rol 1 - * instead we do equal: w[i] = (w[i-6] ^ w[i-16] ^ w[i-28] ^ w[i-32]) rol 2 - * allows more efficient vectorization since w[i]=>w[i-3] dependency is broken - */ -.macro W_PRECALC_32_79_SSSE3 - .if ((i & 3) == 0) - movdqa W_minus_04, W_TMP1 - pxor W_minus_28, W # W is W_minus_32 before xor - palignr $8, W_minus_08, W_TMP1 - .elseif ((i & 3) == 1) - pxor W_minus_16, W - pxor W_TMP1, W - movdqa W, W_TMP1 - .elseif ((i & 3) == 2) - psrld $30, W - pslld $2, W_TMP1 - por W, W_TMP1 - .elseif ((i & 3) == 3) - movdqa W_TMP1, W - paddd K_XMM(K_BASE), W_TMP1 - movdqa W_TMP1, WK(i&~3) - W_PRECALC_ROTATE - .endif -.endm - -.endm // W_PRECALC_SSSE3 - - -#define K1 0x5a827999 -#define K2 0x6ed9eba1 -#define K3 0x8f1bbcdc -#define K4 0xca62c1d6 - -.section .rodata -.align 16 - -K_XMM_AR: - .long K1, K1, K1, K1 - .long K2, K2, K2, K2 - .long K3, K3, K3, K3 - .long K4, K4, K4, K4 - -BSWAP_SHUFB_CTL: - .long 0x00010203 - .long 0x04050607 - .long 0x08090a0b - .long 0x0c0d0e0f - - -.section .text - -W_PRECALC_SSSE3 -.macro xmm_mov a, b - movdqu \a,\b -.endm - -/* SSSE3 optimized implementation: - * extern "C" void sha1_transform_ssse3(u32 *digest, const char *data, u32 *ws, - * unsigned int rounds); - */ -SHA1_VECTOR_ASM sha1_transform_ssse3 - -#ifdef SHA1_ENABLE_AVX_SUPPORT - -.macro W_PRECALC_AVX - -.purgem W_PRECALC_00_15 -.macro W_PRECALC_00_15 - W_PRECALC_00_15_AVX -.endm -.purgem W_PRECALC_16_31 -.macro W_PRECALC_16_31 - W_PRECALC_16_31_AVX -.endm -.purgem W_PRECALC_32_79 -.macro W_PRECALC_32_79 - W_PRECALC_32_79_AVX -.endm - -.macro W_PRECALC_00_15_AVX - .if ((i & 3) == 0) - vmovdqu (i*4)(BUFFER_PTR), W_TMP1 - .elseif ((i & 3) == 1) - vpshufb XMM_SHUFB_BSWAP, W_TMP1, W - .elseif ((i & 3) == 2) - vpaddd (K_BASE), W, W_TMP1 - .elseif ((i & 3) == 3) - vmovdqa W_TMP1, WK(i&~3) - W_PRECALC_ROTATE - .endif -.endm - -.macro W_PRECALC_16_31_AVX - .if ((i & 3) == 0) - vpalignr $8, W_minus_16, W_minus_12, W # w[i-14] - vpsrldq $4, W_minus_04, W_TMP1 # w[i-3] - vpxor W_minus_08, W, W - vpxor W_minus_16, W_TMP1, W_TMP1 - .elseif ((i & 3) == 1) - vpxor W_TMP1, W, W - vpslldq $12, W, W_TMP2 - vpslld $1, W, W_TMP1 - .elseif ((i & 3) == 2) - vpsrld $31, W, W - vpor W, W_TMP1, W_TMP1 - vpslld $2, W_TMP2, W - vpsrld $30, W_TMP2, W_TMP2 - .elseif ((i & 3) == 3) - vpxor W, W_TMP1, W_TMP1 - vpxor W_TMP2, W_TMP1, W - vpaddd K_XMM(K_BASE), W, W_TMP1 - vmovdqu W_TMP1, WK(i&~3) - W_PRECALC_ROTATE - .endif -.endm - -.macro W_PRECALC_32_79_AVX - .if ((i & 3) == 0) - vpalignr $8, W_minus_08, W_minus_04, W_TMP1 - vpxor W_minus_28, W, W # W is W_minus_32 before xor - .elseif ((i & 3) == 1) - vpxor W_minus_16, W_TMP1, W_TMP1 - vpxor W_TMP1, W, W - .elseif ((i & 3) == 2) - vpslld $2, W, W_TMP1 - vpsrld $30, W, W - vpor W, W_TMP1, W - .elseif ((i & 3) == 3) - vpaddd K_XMM(K_BASE), W, W_TMP1 - vmovdqu W_TMP1, WK(i&~3) - W_PRECALC_ROTATE - .endif -.endm - -.endm // W_PRECALC_AVX - -W_PRECALC_AVX -.purgem xmm_mov -.macro xmm_mov a, b - vmovdqu \a,\b -.endm - - -/* AVX optimized implementation: - * extern "C" void sha1_transform_avx(u32 *digest, const char *data, u32 *ws, - * unsigned int rounds); - */ -SHA1_VECTOR_ASM sha1_transform_avx - -#endif diff --git a/ANDROID_3.4.5/arch/x86/crypto/sha1_ssse3_glue.c b/ANDROID_3.4.5/arch/x86/crypto/sha1_ssse3_glue.c deleted file mode 100644 index f916499d..00000000 --- a/ANDROID_3.4.5/arch/x86/crypto/sha1_ssse3_glue.c +++ /dev/null @@ -1,240 +0,0 @@ -/* - * Cryptographic API. - * - * Glue code for the SHA1 Secure Hash Algorithm assembler implementation using - * Supplemental SSE3 instructions. - * - * This file is based on sha1_generic.c - * - * Copyright (c) Alan Smithee. - * Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk> - * Copyright (c) Jean-Francois Dive <jef@linuxbe.org> - * Copyright (c) Mathias Krause <minipli@googlemail.com> - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the Free - * Software Foundation; either version 2 of the License, or (at your option) - * any later version. - * - */ - -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - -#include <crypto/internal/hash.h> -#include <linux/init.h> -#include <linux/module.h> -#include <linux/mm.h> -#include <linux/cryptohash.h> -#include <linux/types.h> -#include <crypto/sha.h> -#include <asm/byteorder.h> -#include <asm/i387.h> -#include <asm/xcr.h> -#include <asm/xsave.h> - - -asmlinkage void sha1_transform_ssse3(u32 *digest, const char *data, - unsigned int rounds); -#ifdef SHA1_ENABLE_AVX_SUPPORT -asmlinkage void sha1_transform_avx(u32 *digest, const char *data, - unsigned int rounds); -#endif - -static asmlinkage void (*sha1_transform_asm)(u32 *, const char *, unsigned int); - - -static int sha1_ssse3_init(struct shash_desc *desc) -{ - struct sha1_state *sctx = shash_desc_ctx(desc); - - *sctx = (struct sha1_state){ - .state = { SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 }, - }; - - return 0; -} - -static int __sha1_ssse3_update(struct shash_desc *desc, const u8 *data, - unsigned int len, unsigned int partial) -{ - struct sha1_state *sctx = shash_desc_ctx(desc); - unsigned int done = 0; - - sctx->count += len; - - if (partial) { - done = SHA1_BLOCK_SIZE - partial; - memcpy(sctx->buffer + partial, data, done); - sha1_transform_asm(sctx->state, sctx->buffer, 1); - } - - if (len - done >= SHA1_BLOCK_SIZE) { - const unsigned int rounds = (len - done) / SHA1_BLOCK_SIZE; - - sha1_transform_asm(sctx->state, data + done, rounds); - done += rounds * SHA1_BLOCK_SIZE; - } - - memcpy(sctx->buffer, data + done, len - done); - - return 0; -} - -static int sha1_ssse3_update(struct shash_desc *desc, const u8 *data, - unsigned int len) -{ - struct sha1_state *sctx = shash_desc_ctx(desc); - unsigned int partial = sctx->count % SHA1_BLOCK_SIZE; - int res; - - /* Handle the fast case right here */ - if (partial + len < SHA1_BLOCK_SIZE) { - sctx->count += len; - memcpy(sctx->buffer + partial, data, len); - - return 0; - } - - if (!irq_fpu_usable()) { - res = crypto_sha1_update(desc, data, len); - } else { - kernel_fpu_begin(); - res = __sha1_ssse3_update(desc, data, len, partial); - kernel_fpu_end(); - } - - return res; -} - - -/* Add padding and return the message digest. */ -static int sha1_ssse3_final(struct shash_desc *desc, u8 *out) -{ - struct sha1_state *sctx = shash_desc_ctx(desc); - unsigned int i, index, padlen; - __be32 *dst = (__be32 *)out; - __be64 bits; - static const u8 padding[SHA1_BLOCK_SIZE] = { 0x80, }; - - bits = cpu_to_be64(sctx->count << 3); - - /* Pad out to 56 mod 64 and append length */ - index = sctx->count % SHA1_BLOCK_SIZE; - padlen = (index < 56) ? (56 - index) : ((SHA1_BLOCK_SIZE+56) - index); - if (!irq_fpu_usable()) { - crypto_sha1_update(desc, padding, padlen); - crypto_sha1_update(desc, (const u8 *)&bits, sizeof(bits)); - } else { - kernel_fpu_begin(); - /* We need to fill a whole block for __sha1_ssse3_update() */ - if (padlen <= 56) { - sctx->count += padlen; - memcpy(sctx->buffer + index, padding, padlen); - } else { - __sha1_ssse3_update(desc, padding, padlen, index); - } - __sha1_ssse3_update(desc, (const u8 *)&bits, sizeof(bits), 56); - kernel_fpu_end(); - } - - /* Store state in digest */ - for (i = 0; i < 5; i++) - dst[i] = cpu_to_be32(sctx->state[i]); - - /* Wipe context */ - memset(sctx, 0, sizeof(*sctx)); - - return 0; -} - -static int sha1_ssse3_export(struct shash_desc *desc, void *out) -{ - struct sha1_state *sctx = shash_desc_ctx(desc); - - memcpy(out, sctx, sizeof(*sctx)); - - return 0; -} - -static int sha1_ssse3_import(struct shash_desc *desc, const void *in) -{ - struct sha1_state *sctx = shash_desc_ctx(desc); - - memcpy(sctx, in, sizeof(*sctx)); - - return 0; -} - -static struct shash_alg alg = { - .digestsize = SHA1_DIGEST_SIZE, - .init = sha1_ssse3_init, - .update = sha1_ssse3_update, - .final = sha1_ssse3_final, - .export = sha1_ssse3_export, - .import = sha1_ssse3_import, - .descsize = sizeof(struct sha1_state), - .statesize = sizeof(struct sha1_state), - .base = { - .cra_name = "sha1", - .cra_driver_name= "sha1-ssse3", - .cra_priority = 150, - .cra_flags = CRYPTO_ALG_TYPE_SHASH, - .cra_blocksize = SHA1_BLOCK_SIZE, - .cra_module = THIS_MODULE, - } -}; - -#ifdef SHA1_ENABLE_AVX_SUPPORT -static bool __init avx_usable(void) -{ - u64 xcr0; - - if (!cpu_has_avx || !cpu_has_osxsave) - return false; - - xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK); - if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) { - pr_info("AVX detected but unusable.\n"); - - return false; - } - - return true; -} -#endif - -static int __init sha1_ssse3_mod_init(void) -{ - /* test for SSSE3 first */ - if (cpu_has_ssse3) - sha1_transform_asm = sha1_transform_ssse3; - -#ifdef SHA1_ENABLE_AVX_SUPPORT - /* allow AVX to override SSSE3, it's a little faster */ - if (avx_usable()) - sha1_transform_asm = sha1_transform_avx; -#endif - - if (sha1_transform_asm) { - pr_info("Using %s optimized SHA-1 implementation\n", - sha1_transform_asm == sha1_transform_ssse3 ? "SSSE3" - : "AVX"); - return crypto_register_shash(&alg); - } - pr_info("Neither AVX nor SSSE3 is available/usable.\n"); - - return -ENODEV; -} - -static void __exit sha1_ssse3_mod_fini(void) -{ - crypto_unregister_shash(&alg); -} - -module_init(sha1_ssse3_mod_init); -module_exit(sha1_ssse3_mod_fini); - -MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("SHA1 Secure Hash Algorithm, Supplemental SSE3 accelerated"); - -MODULE_ALIAS("sha1"); diff --git a/ANDROID_3.4.5/arch/x86/crypto/twofish-i586-asm_32.S b/ANDROID_3.4.5/arch/x86/crypto/twofish-i586-asm_32.S deleted file mode 100644 index 658af4bb..00000000 --- a/ANDROID_3.4.5/arch/x86/crypto/twofish-i586-asm_32.S +++ /dev/null @@ -1,335 +0,0 @@ -/*************************************************************************** -* Copyright (C) 2006 by Joachim Fritschi, <jfritschi@freenet.de> * -* * -* This program is free software; you can redistribute it and/or modify * -* it under the terms of the GNU General Public License as published by * -* the Free Software Foundation; either version 2 of the License, or * -* (at your option) any later version. * -* * -* This program is distributed in the hope that it will be useful, * -* but WITHOUT ANY WARRANTY; without even the implied warranty of * -* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * -* GNU General Public License for more details. * -* * -* You should have received a copy of the GNU General Public License * -* along with this program; if not, write to the * -* Free Software Foundation, Inc., * -* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * -***************************************************************************/ - -.file "twofish-i586-asm.S" -.text - -#include <asm/asm-offsets.h> - -/* return address at 0 */ - -#define in_blk 12 /* input byte array address parameter*/ -#define out_blk 8 /* output byte array address parameter*/ -#define ctx 4 /* Twofish context structure */ - -#define a_offset 0 -#define b_offset 4 -#define c_offset 8 -#define d_offset 12 - -/* Structure of the crypto context struct*/ - -#define s0 0 /* S0 Array 256 Words each */ -#define s1 1024 /* S1 Array */ -#define s2 2048 /* S2 Array */ -#define s3 3072 /* S3 Array */ -#define w 4096 /* 8 whitening keys (word) */ -#define k 4128 /* key 1-32 ( word ) */ - -/* define a few register aliases to allow macro substitution */ - -#define R0D %eax -#define R0B %al -#define R0H %ah - -#define R1D %ebx -#define R1B %bl -#define R1H %bh - -#define R2D %ecx -#define R2B %cl -#define R2H %ch - -#define R3D %edx -#define R3B %dl -#define R3H %dh - - -/* performs input whitening */ -#define input_whitening(src,context,offset)\ - xor w+offset(context), src; - -/* performs input whitening */ -#define output_whitening(src,context,offset)\ - xor w+16+offset(context), src; - -/* - * a input register containing a (rotated 16) - * b input register containing b - * c input register containing c - * d input register containing d (already rol $1) - * operations on a and b are interleaved to increase performance - */ -#define encrypt_round(a,b,c,d,round)\ - push d ## D;\ - movzx b ## B, %edi;\ - mov s1(%ebp,%edi,4),d ## D;\ - movzx a ## B, %edi;\ - mov s2(%ebp,%edi,4),%esi;\ - movzx b ## H, %edi;\ - ror $16, b ## D;\ - xor s2(%ebp,%edi,4),d ## D;\ - movzx a ## H, %edi;\ - ror $16, a ## D;\ - xor s3(%ebp,%edi,4),%esi;\ - movzx b ## B, %edi;\ - xor s3(%ebp,%edi,4),d ## D;\ - movzx a ## B, %edi;\ - xor (%ebp,%edi,4), %esi;\ - movzx b ## H, %edi;\ - ror $15, b ## D;\ - xor (%ebp,%edi,4), d ## D;\ - movzx a ## H, %edi;\ - xor s1(%ebp,%edi,4),%esi;\ - pop %edi;\ - add d ## D, %esi;\ - add %esi, d ## D;\ - add k+round(%ebp), %esi;\ - xor %esi, c ## D;\ - rol $15, c ## D;\ - add k+4+round(%ebp),d ## D;\ - xor %edi, d ## D; - -/* - * a input register containing a (rotated 16) - * b input register containing b - * c input register containing c - * d input register containing d (already rol $1) - * operations on a and b are interleaved to increase performance - * last round has different rotations for the output preparation - */ -#define encrypt_last_round(a,b,c,d,round)\ - push d ## D;\ - movzx b ## B, %edi;\ - mov s1(%ebp,%edi,4),d ## D;\ - movzx a ## B, %edi;\ - mov s2(%ebp,%edi,4),%esi;\ - movzx b ## H, %edi;\ - ror $16, b ## D;\ - xor s2(%ebp,%edi,4),d ## D;\ - movzx a ## H, %edi;\ - ror $16, a ## D;\ - xor s3(%ebp,%edi,4),%esi;\ - movzx b ## B, %edi;\ - xor s3(%ebp,%edi,4),d ## D;\ - movzx a ## B, %edi;\ - xor (%ebp,%edi,4), %esi;\ - movzx b ## H, %edi;\ - ror $16, b ## D;\ - xor (%ebp,%edi,4), d ## D;\ - movzx a ## H, %edi;\ - xor s1(%ebp,%edi,4),%esi;\ - pop %edi;\ - add d ## D, %esi;\ - add %esi, d ## D;\ - add k+round(%ebp), %esi;\ - xor %esi, c ## D;\ - ror $1, c ## D;\ - add k+4+round(%ebp),d ## D;\ - xor %edi, d ## D; - -/* - * a input register containing a - * b input register containing b (rotated 16) - * c input register containing c - * d input register containing d (already rol $1) - * operations on a and b are interleaved to increase performance - */ -#define decrypt_round(a,b,c,d,round)\ - push c ## D;\ - movzx a ## B, %edi;\ - mov (%ebp,%edi,4), c ## D;\ - movzx b ## B, %edi;\ - mov s3(%ebp,%edi,4),%esi;\ - movzx a ## H, %edi;\ - ror $16, a ## D;\ - xor s1(%ebp,%edi,4),c ## D;\ - movzx b ## H, %edi;\ - ror $16, b ## D;\ - xor (%ebp,%edi,4), %esi;\ - movzx a ## B, %edi;\ - xor s2(%ebp,%edi,4),c ## D;\ - movzx b ## B, %edi;\ - xor s1(%ebp,%edi,4),%esi;\ - movzx a ## H, %edi;\ - ror $15, a ## D;\ - xor s3(%ebp,%edi,4),c ## D;\ - movzx b ## H, %edi;\ - xor s2(%ebp,%edi,4),%esi;\ - pop %edi;\ - add %esi, c ## D;\ - add c ## D, %esi;\ - add k+round(%ebp), c ## D;\ - xor %edi, c ## D;\ - add k+4+round(%ebp),%esi;\ - xor %esi, d ## D;\ - rol $15, d ## D; - -/* - * a input register containing a - * b input register containing b (rotated 16) - * c input register containing c - * d input register containing d (already rol $1) - * operations on a and b are interleaved to increase performance - * last round has different rotations for the output preparation - */ -#define decrypt_last_round(a,b,c,d,round)\ - push c ## D;\ - movzx a ## B, %edi;\ - mov (%ebp,%edi,4), c ## D;\ - movzx b ## B, %edi;\ - mov s3(%ebp,%edi,4),%esi;\ - movzx a ## H, %edi;\ - ror $16, a ## D;\ - xor s1(%ebp,%edi,4),c ## D;\ - movzx b ## H, %edi;\ - ror $16, b ## D;\ - xor (%ebp,%edi,4), %esi;\ - movzx a ## B, %edi;\ - xor s2(%ebp,%edi,4),c ## D;\ - movzx b ## B, %edi;\ - xor s1(%ebp,%edi,4),%esi;\ - movzx a ## H, %edi;\ - ror $16, a ## D;\ - xor s3(%ebp,%edi,4),c ## D;\ - movzx b ## H, %edi;\ - xor s2(%ebp,%edi,4),%esi;\ - pop %edi;\ - add %esi, c ## D;\ - add c ## D, %esi;\ - add k+round(%ebp), c ## D;\ - xor %edi, c ## D;\ - add k+4+round(%ebp),%esi;\ - xor %esi, d ## D;\ - ror $1, d ## D; - -.align 4 -.global twofish_enc_blk -.global twofish_dec_blk - -twofish_enc_blk: - push %ebp /* save registers according to calling convention*/ - push %ebx - push %esi - push %edi - - mov ctx + 16(%esp), %ebp /* abuse the base pointer: set new base - * pointer to the ctx address */ - mov in_blk+16(%esp),%edi /* input address in edi */ - - mov (%edi), %eax - mov b_offset(%edi), %ebx - mov c_offset(%edi), %ecx - mov d_offset(%edi), %edx - input_whitening(%eax,%ebp,a_offset) - ror $16, %eax - input_whitening(%ebx,%ebp,b_offset) - input_whitening(%ecx,%ebp,c_offset) - input_whitening(%edx,%ebp,d_offset) - rol $1, %edx - - encrypt_round(R0,R1,R2,R3,0); - encrypt_round(R2,R3,R0,R1,8); - encrypt_round(R0,R1,R2,R3,2*8); - encrypt_round(R2,R3,R0,R1,3*8); - encrypt_round(R0,R1,R2,R3,4*8); - encrypt_round(R2,R3,R0,R1,5*8); - encrypt_round(R0,R1,R2,R3,6*8); - encrypt_round(R2,R3,R0,R1,7*8); - encrypt_round(R0,R1,R2,R3,8*8); - encrypt_round(R2,R3,R0,R1,9*8); - encrypt_round(R0,R1,R2,R3,10*8); - encrypt_round(R2,R3,R0,R1,11*8); - encrypt_round(R0,R1,R2,R3,12*8); - encrypt_round(R2,R3,R0,R1,13*8); - encrypt_round(R0,R1,R2,R3,14*8); - encrypt_last_round(R2,R3,R0,R1,15*8); - - output_whitening(%eax,%ebp,c_offset) - output_whitening(%ebx,%ebp,d_offset) - output_whitening(%ecx,%ebp,a_offset) - output_whitening(%edx,%ebp,b_offset) - mov out_blk+16(%esp),%edi; - mov %eax, c_offset(%edi) - mov %ebx, d_offset(%edi) - mov %ecx, (%edi) - mov %edx, b_offset(%edi) - - pop %edi - pop %esi - pop %ebx - pop %ebp - mov $1, %eax - ret - -twofish_dec_blk: - push %ebp /* save registers according to calling convention*/ - push %ebx - push %esi - push %edi - - - mov ctx + 16(%esp), %ebp /* abuse the base pointer: set new base - * pointer to the ctx address */ - mov in_blk+16(%esp),%edi /* input address in edi */ - - mov (%edi), %eax - mov b_offset(%edi), %ebx - mov c_offset(%edi), %ecx - mov d_offset(%edi), %edx - output_whitening(%eax,%ebp,a_offset) - output_whitening(%ebx,%ebp,b_offset) - ror $16, %ebx - output_whitening(%ecx,%ebp,c_offset) - output_whitening(%edx,%ebp,d_offset) - rol $1, %ecx - - decrypt_round(R0,R1,R2,R3,15*8); - decrypt_round(R2,R3,R0,R1,14*8); - decrypt_round(R0,R1,R2,R3,13*8); - decrypt_round(R2,R3,R0,R1,12*8); - decrypt_round(R0,R1,R2,R3,11*8); - decrypt_round(R2,R3,R0,R1,10*8); - decrypt_round(R0,R1,R2,R3,9*8); - decrypt_round(R2,R3,R0,R1,8*8); - decrypt_round(R0,R1,R2,R3,7*8); - decrypt_round(R2,R3,R0,R1,6*8); - decrypt_round(R0,R1,R2,R3,5*8); - decrypt_round(R2,R3,R0,R1,4*8); - decrypt_round(R0,R1,R2,R3,3*8); - decrypt_round(R2,R3,R0,R1,2*8); - decrypt_round(R0,R1,R2,R3,1*8); - decrypt_last_round(R2,R3,R0,R1,0); - - input_whitening(%eax,%ebp,c_offset) - input_whitening(%ebx,%ebp,d_offset) - input_whitening(%ecx,%ebp,a_offset) - input_whitening(%edx,%ebp,b_offset) - mov out_blk+16(%esp),%edi; - mov %eax, c_offset(%edi) - mov %ebx, d_offset(%edi) - mov %ecx, (%edi) - mov %edx, b_offset(%edi) - - pop %edi - pop %esi - pop %ebx - pop %ebp - mov $1, %eax - ret diff --git a/ANDROID_3.4.5/arch/x86/crypto/twofish-x86_64-asm_64-3way.S b/ANDROID_3.4.5/arch/x86/crypto/twofish-x86_64-asm_64-3way.S deleted file mode 100644 index 5b012a2c..00000000 --- a/ANDROID_3.4.5/arch/x86/crypto/twofish-x86_64-asm_64-3way.S +++ /dev/null @@ -1,316 +0,0 @@ -/* - * Twofish Cipher 3-way parallel algorithm (x86_64) - * - * Copyright (C) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 - * USA - * - */ - -.file "twofish-x86_64-asm-3way.S" -.text - -/* structure of crypto context */ -#define s0 0 -#define s1 1024 -#define s2 2048 -#define s3 3072 -#define w 4096 -#define k 4128 - -/********************************************************************** - 3-way twofish - **********************************************************************/ -#define CTX %rdi -#define RIO %rdx - -#define RAB0 %rax -#define RAB1 %rbx -#define RAB2 %rcx - -#define RAB0d %eax -#define RAB1d %ebx -#define RAB2d %ecx - -#define RAB0bh %ah -#define RAB1bh %bh -#define RAB2bh %ch - -#define RAB0bl %al -#define RAB1bl %bl -#define RAB2bl %cl - -#define RCD0 %r8 -#define RCD1 %r9 -#define RCD2 %r10 - -#define RCD0d %r8d -#define RCD1d %r9d -#define RCD2d %r10d - -#define RX0 %rbp -#define RX1 %r11 -#define RX2 %r12 - -#define RX0d %ebp -#define RX1d %r11d -#define RX2d %r12d - -#define RY0 %r13 -#define RY1 %r14 -#define RY2 %r15 - -#define RY0d %r13d -#define RY1d %r14d -#define RY2d %r15d - -#define RT0 %rdx -#define RT1 %rsi - -#define RT0d %edx -#define RT1d %esi - -#define do16bit_ror(rot, op1, op2, T0, T1, tmp1, tmp2, ab, dst) \ - movzbl ab ## bl, tmp2 ## d; \ - movzbl ab ## bh, tmp1 ## d; \ - rorq $(rot), ab; \ - op1##l T0(CTX, tmp2, 4), dst ## d; \ - op2##l T1(CTX, tmp1, 4), dst ## d; - -/* - * Combined G1 & G2 function. Reordered with help of rotates to have moves - * at begining. - */ -#define g1g2_3(ab, cd, Tx0, Tx1, Tx2, Tx3, Ty0, Ty1, Ty2, Ty3, x, y) \ - /* G1,1 && G2,1 */ \ - do16bit_ror(32, mov, xor, Tx0, Tx1, RT0, x ## 0, ab ## 0, x ## 0); \ - do16bit_ror(48, mov, xor, Ty1, Ty2, RT0, y ## 0, ab ## 0, y ## 0); \ - \ - do16bit_ror(32, mov, xor, Tx0, Tx1, RT0, x ## 1, ab ## 1, x ## 1); \ - do16bit_ror(48, mov, xor, Ty1, Ty2, RT0, y ## 1, ab ## 1, y ## 1); \ - \ - do16bit_ror(32, mov, xor, Tx0, Tx1, RT0, x ## 2, ab ## 2, x ## 2); \ - do16bit_ror(48, mov, xor, Ty1, Ty2, RT0, y ## 2, ab ## 2, y ## 2); \ - \ - /* G1,2 && G2,2 */ \ - do16bit_ror(32, xor, xor, Tx2, Tx3, RT0, RT1, ab ## 0, x ## 0); \ - do16bit_ror(16, xor, xor, Ty3, Ty0, RT0, RT1, ab ## 0, y ## 0); \ - xchgq cd ## 0, ab ## 0; \ - \ - do16bit_ror(32, xor, xor, Tx2, Tx3, RT0, RT1, ab ## 1, x ## 1); \ - do16bit_ror(16, xor, xor, Ty3, Ty0, RT0, RT1, ab ## 1, y ## 1); \ - xchgq cd ## 1, ab ## 1; \ - \ - do16bit_ror(32, xor, xor, Tx2, Tx3, RT0, RT1, ab ## 2, x ## 2); \ - do16bit_ror(16, xor, xor, Ty3, Ty0, RT0, RT1, ab ## 2, y ## 2); \ - xchgq cd ## 2, ab ## 2; - -#define enc_round_end(ab, x, y, n) \ - addl y ## d, x ## d; \ - addl x ## d, y ## d; \ - addl k+4*(2*(n))(CTX), x ## d; \ - xorl ab ## d, x ## d; \ - addl k+4*(2*(n)+1)(CTX), y ## d; \ - shrq $32, ab; \ - roll $1, ab ## d; \ - xorl y ## d, ab ## d; \ - shlq $32, ab; \ - rorl $1, x ## d; \ - orq x, ab; - -#define dec_round_end(ba, x, y, n) \ - addl y ## d, x ## d; \ - addl x ## d, y ## d; \ - addl k+4*(2*(n))(CTX), x ## d; \ - addl k+4*(2*(n)+1)(CTX), y ## d; \ - xorl ba ## d, y ## d; \ - shrq $32, ba; \ - roll $1, ba ## d; \ - xorl x ## d, ba ## d; \ - shlq $32, ba; \ - rorl $1, y ## d; \ - orq y, ba; - -#define encrypt_round3(ab, cd, n) \ - g1g2_3(ab, cd, s0, s1, s2, s3, s0, s1, s2, s3, RX, RY); \ - \ - enc_round_end(ab ## 0, RX0, RY0, n); \ - enc_round_end(ab ## 1, RX1, RY1, n); \ - enc_round_end(ab ## 2, RX2, RY2, n); - -#define decrypt_round3(ba, dc, n) \ - g1g2_3(ba, dc, s1, s2, s3, s0, s3, s0, s1, s2, RY, RX); \ - \ - dec_round_end(ba ## 0, RX0, RY0, n); \ - dec_round_end(ba ## 1, RX1, RY1, n); \ - dec_round_end(ba ## 2, RX2, RY2, n); - -#define encrypt_cycle3(ab, cd, n) \ - encrypt_round3(ab, cd, n*2); \ - encrypt_round3(ab, cd, (n*2)+1); - -#define decrypt_cycle3(ba, dc, n) \ - decrypt_round3(ba, dc, (n*2)+1); \ - decrypt_round3(ba, dc, (n*2)); - -#define inpack3(in, n, xy, m) \ - movq 4*(n)(in), xy ## 0; \ - xorq w+4*m(CTX), xy ## 0; \ - \ - movq 4*(4+(n))(in), xy ## 1; \ - xorq w+4*m(CTX), xy ## 1; \ - \ - movq 4*(8+(n))(in), xy ## 2; \ - xorq w+4*m(CTX), xy ## 2; - -#define outunpack3(op, out, n, xy, m) \ - xorq w+4*m(CTX), xy ## 0; \ - op ## q xy ## 0, 4*(n)(out); \ - \ - xorq w+4*m(CTX), xy ## 1; \ - op ## q xy ## 1, 4*(4+(n))(out); \ - \ - xorq w+4*m(CTX), xy ## 2; \ - op ## q xy ## 2, 4*(8+(n))(out); - -#define inpack_enc3() \ - inpack3(RIO, 0, RAB, 0); \ - inpack3(RIO, 2, RCD, 2); - -#define outunpack_enc3(op) \ - outunpack3(op, RIO, 2, RAB, 6); \ - outunpack3(op, RIO, 0, RCD, 4); - -#define inpack_dec3() \ - inpack3(RIO, 0, RAB, 4); \ - rorq $32, RAB0; \ - rorq $32, RAB1; \ - rorq $32, RAB2; \ - inpack3(RIO, 2, RCD, 6); \ - rorq $32, RCD0; \ - rorq $32, RCD1; \ - rorq $32, RCD2; - -#define outunpack_dec3() \ - rorq $32, RCD0; \ - rorq $32, RCD1; \ - rorq $32, RCD2; \ - outunpack3(mov, RIO, 0, RCD, 0); \ - rorq $32, RAB0; \ - rorq $32, RAB1; \ - rorq $32, RAB2; \ - outunpack3(mov, RIO, 2, RAB, 2); - -.align 8 -.global __twofish_enc_blk_3way -.type __twofish_enc_blk_3way,@function; - -__twofish_enc_blk_3way: - /* input: - * %rdi: ctx, CTX - * %rsi: dst - * %rdx: src, RIO - * %rcx: bool, if true: xor output - */ - pushq %r15; - pushq %r14; - pushq %r13; - pushq %r12; - pushq %rbp; - pushq %rbx; - - pushq %rcx; /* bool xor */ - pushq %rsi; /* dst */ - - inpack_enc3(); - - encrypt_cycle3(RAB, RCD, 0); - encrypt_cycle3(RAB, RCD, 1); - encrypt_cycle3(RAB, RCD, 2); - encrypt_cycle3(RAB, RCD, 3); - encrypt_cycle3(RAB, RCD, 4); - encrypt_cycle3(RAB, RCD, 5); - encrypt_cycle3(RAB, RCD, 6); - encrypt_cycle3(RAB, RCD, 7); - - popq RIO; /* dst */ - popq %rbp; /* bool xor */ - - testb %bpl, %bpl; - jnz __enc_xor3; - - outunpack_enc3(mov); - - popq %rbx; - popq %rbp; - popq %r12; - popq %r13; - popq %r14; - popq %r15; - ret; - -__enc_xor3: - outunpack_enc3(xor); - - popq %rbx; - popq %rbp; - popq %r12; - popq %r13; - popq %r14; - popq %r15; - ret; - -.global twofish_dec_blk_3way -.type twofish_dec_blk_3way,@function; - -twofish_dec_blk_3way: - /* input: - * %rdi: ctx, CTX - * %rsi: dst - * %rdx: src, RIO - */ - pushq %r15; - pushq %r14; - pushq %r13; - pushq %r12; - pushq %rbp; - pushq %rbx; - - pushq %rsi; /* dst */ - - inpack_dec3(); - - decrypt_cycle3(RAB, RCD, 7); - decrypt_cycle3(RAB, RCD, 6); - decrypt_cycle3(RAB, RCD, 5); - decrypt_cycle3(RAB, RCD, 4); - decrypt_cycle3(RAB, RCD, 3); - decrypt_cycle3(RAB, RCD, 2); - decrypt_cycle3(RAB, RCD, 1); - decrypt_cycle3(RAB, RCD, 0); - - popq RIO; /* dst */ - - outunpack_dec3(); - - popq %rbx; - popq %rbp; - popq %r12; - popq %r13; - popq %r14; - popq %r15; - ret; - diff --git a/ANDROID_3.4.5/arch/x86/crypto/twofish-x86_64-asm_64.S b/ANDROID_3.4.5/arch/x86/crypto/twofish-x86_64-asm_64.S deleted file mode 100644 index 7bcf3fcc..00000000 --- a/ANDROID_3.4.5/arch/x86/crypto/twofish-x86_64-asm_64.S +++ /dev/null @@ -1,322 +0,0 @@ -/*************************************************************************** -* Copyright (C) 2006 by Joachim Fritschi, <jfritschi@freenet.de> * -* * -* This program is free software; you can redistribute it and/or modify * -* it under the terms of the GNU General Public License as published by * -* the Free Software Foundation; either version 2 of the License, or * -* (at your option) any later version. * -* * -* This program is distributed in the hope that it will be useful, * -* but WITHOUT ANY WARRANTY; without even the implied warranty of * -* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * -* GNU General Public License for more details. * -* * -* You should have received a copy of the GNU General Public License * -* along with this program; if not, write to the * -* Free Software Foundation, Inc., * -* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * -***************************************************************************/ - -.file "twofish-x86_64-asm.S" -.text - -#include <asm/asm-offsets.h> - -#define a_offset 0 -#define b_offset 4 -#define c_offset 8 -#define d_offset 12 - -/* Structure of the crypto context struct*/ - -#define s0 0 /* S0 Array 256 Words each */ -#define s1 1024 /* S1 Array */ -#define s2 2048 /* S2 Array */ -#define s3 3072 /* S3 Array */ -#define w 4096 /* 8 whitening keys (word) */ -#define k 4128 /* key 1-32 ( word ) */ - -/* define a few register aliases to allow macro substitution */ - -#define R0 %rax -#define R0D %eax -#define R0B %al -#define R0H %ah - -#define R1 %rbx -#define R1D %ebx -#define R1B %bl -#define R1H %bh - -#define R2 %rcx -#define R2D %ecx -#define R2B %cl -#define R2H %ch - -#define R3 %rdx -#define R3D %edx -#define R3B %dl -#define R3H %dh - - -/* performs input whitening */ -#define input_whitening(src,context,offset)\ - xor w+offset(context), src; - -/* performs input whitening */ -#define output_whitening(src,context,offset)\ - xor w+16+offset(context), src; - - -/* - * a input register containing a (rotated 16) - * b input register containing b - * c input register containing c - * d input register containing d (already rol $1) - * operations on a and b are interleaved to increase performance - */ -#define encrypt_round(a,b,c,d,round)\ - movzx b ## B, %edi;\ - mov s1(%r11,%rdi,4),%r8d;\ - movzx a ## B, %edi;\ - mov s2(%r11,%rdi,4),%r9d;\ - movzx b ## H, %edi;\ - ror $16, b ## D;\ - xor s2(%r11,%rdi,4),%r8d;\ - movzx a ## H, %edi;\ - ror $16, a ## D;\ - xor s3(%r11,%rdi,4),%r9d;\ - movzx b ## B, %edi;\ - xor s3(%r11,%rdi,4),%r8d;\ - movzx a ## B, %edi;\ - xor (%r11,%rdi,4), %r9d;\ - movzx b ## H, %edi;\ - ror $15, b ## D;\ - xor (%r11,%rdi,4), %r8d;\ - movzx a ## H, %edi;\ - xor s1(%r11,%rdi,4),%r9d;\ - add %r8d, %r9d;\ - add %r9d, %r8d;\ - add k+round(%r11), %r9d;\ - xor %r9d, c ## D;\ - rol $15, c ## D;\ - add k+4+round(%r11),%r8d;\ - xor %r8d, d ## D; - -/* - * a input register containing a(rotated 16) - * b input register containing b - * c input register containing c - * d input register containing d (already rol $1) - * operations on a and b are interleaved to increase performance - * during the round a and b are prepared for the output whitening - */ -#define encrypt_last_round(a,b,c,d,round)\ - mov b ## D, %r10d;\ - shl $32, %r10;\ - movzx b ## B, %edi;\ - mov s1(%r11,%rdi,4),%r8d;\ - movzx a ## B, %edi;\ - mov s2(%r11,%rdi,4),%r9d;\ - movzx b ## H, %edi;\ - ror $16, b ## D;\ - xor s2(%r11,%rdi,4),%r8d;\ - movzx a ## H, %edi;\ - ror $16, a ## D;\ - xor s3(%r11,%rdi,4),%r9d;\ - movzx b ## B, %edi;\ - xor s3(%r11,%rdi,4),%r8d;\ - movzx a ## B, %edi;\ - xor (%r11,%rdi,4), %r9d;\ - xor a, %r10;\ - movzx b ## H, %edi;\ - xor (%r11,%rdi,4), %r8d;\ - movzx a ## H, %edi;\ - xor s1(%r11,%rdi,4),%r9d;\ - add %r8d, %r9d;\ - add %r9d, %r8d;\ - add k+round(%r11), %r9d;\ - xor %r9d, c ## D;\ - ror $1, c ## D;\ - add k+4+round(%r11),%r8d;\ - xor %r8d, d ## D - -/* - * a input register containing a - * b input register containing b (rotated 16) - * c input register containing c (already rol $1) - * d input register containing d - * operations on a and b are interleaved to increase performance - */ -#define decrypt_round(a,b,c,d,round)\ - movzx a ## B, %edi;\ - mov (%r11,%rdi,4), %r9d;\ - movzx b ## B, %edi;\ - mov s3(%r11,%rdi,4),%r8d;\ - movzx a ## H, %edi;\ - ror $16, a ## D;\ - xor s1(%r11,%rdi,4),%r9d;\ - movzx b ## H, %edi;\ - ror $16, b ## D;\ - xor (%r11,%rdi,4), %r8d;\ - movzx a ## B, %edi;\ - xor s2(%r11,%rdi,4),%r9d;\ - movzx b ## B, %edi;\ - xor s1(%r11,%rdi,4),%r8d;\ - movzx a ## H, %edi;\ - ror $15, a ## D;\ - xor s3(%r11,%rdi,4),%r9d;\ - movzx b ## H, %edi;\ - xor s2(%r11,%rdi,4),%r8d;\ - add %r8d, %r9d;\ - add %r9d, %r8d;\ - add k+round(%r11), %r9d;\ - xor %r9d, c ## D;\ - add k+4+round(%r11),%r8d;\ - xor %r8d, d ## D;\ - rol $15, d ## D; - -/* - * a input register containing a - * b input register containing b - * c input register containing c (already rol $1) - * d input register containing d - * operations on a and b are interleaved to increase performance - * during the round a and b are prepared for the output whitening - */ -#define decrypt_last_round(a,b,c,d,round)\ - movzx a ## B, %edi;\ - mov (%r11,%rdi,4), %r9d;\ - movzx b ## B, %edi;\ - mov s3(%r11,%rdi,4),%r8d;\ - movzx b ## H, %edi;\ - ror $16, b ## D;\ - xor (%r11,%rdi,4), %r8d;\ - movzx a ## H, %edi;\ - mov b ## D, %r10d;\ - shl $32, %r10;\ - xor a, %r10;\ - ror $16, a ## D;\ - xor s1(%r11,%rdi,4),%r9d;\ - movzx b ## B, %edi;\ - xor s1(%r11,%rdi,4),%r8d;\ - movzx a ## B, %edi;\ - xor s2(%r11,%rdi,4),%r9d;\ - movzx b ## H, %edi;\ - xor s2(%r11,%rdi,4),%r8d;\ - movzx a ## H, %edi;\ - xor s3(%r11,%rdi,4),%r9d;\ - add %r8d, %r9d;\ - add %r9d, %r8d;\ - add k+round(%r11), %r9d;\ - xor %r9d, c ## D;\ - add k+4+round(%r11),%r8d;\ - xor %r8d, d ## D;\ - ror $1, d ## D; - -.align 8 -.global twofish_enc_blk -.global twofish_dec_blk - -twofish_enc_blk: - pushq R1 - - /* %rdi contains the ctx address */ - /* %rsi contains the output address */ - /* %rdx contains the input address */ - /* ctx address is moved to free one non-rex register - as target for the 8bit high operations */ - mov %rdi, %r11 - - movq (R3), R1 - movq 8(R3), R3 - input_whitening(R1,%r11,a_offset) - input_whitening(R3,%r11,c_offset) - mov R1D, R0D - rol $16, R0D - shr $32, R1 - mov R3D, R2D - shr $32, R3 - rol $1, R3D - - encrypt_round(R0,R1,R2,R3,0); - encrypt_round(R2,R3,R0,R1,8); - encrypt_round(R0,R1,R2,R3,2*8); - encrypt_round(R2,R3,R0,R1,3*8); - encrypt_round(R0,R1,R2,R3,4*8); - encrypt_round(R2,R3,R0,R1,5*8); - encrypt_round(R0,R1,R2,R3,6*8); - encrypt_round(R2,R3,R0,R1,7*8); - encrypt_round(R0,R1,R2,R3,8*8); - encrypt_round(R2,R3,R0,R1,9*8); - encrypt_round(R0,R1,R2,R3,10*8); - encrypt_round(R2,R3,R0,R1,11*8); - encrypt_round(R0,R1,R2,R3,12*8); - encrypt_round(R2,R3,R0,R1,13*8); - encrypt_round(R0,R1,R2,R3,14*8); - encrypt_last_round(R2,R3,R0,R1,15*8); - - - output_whitening(%r10,%r11,a_offset) - movq %r10, (%rsi) - - shl $32, R1 - xor R0, R1 - - output_whitening(R1,%r11,c_offset) - movq R1, 8(%rsi) - - popq R1 - movq $1,%rax - ret - -twofish_dec_blk: - pushq R1 - - /* %rdi contains the ctx address */ - /* %rsi contains the output address */ - /* %rdx contains the input address */ - /* ctx address is moved to free one non-rex register - as target for the 8bit high operations */ - mov %rdi, %r11 - - movq (R3), R1 - movq 8(R3), R3 - output_whitening(R1,%r11,a_offset) - output_whitening(R3,%r11,c_offset) - mov R1D, R0D - shr $32, R1 - rol $16, R1D - mov R3D, R2D - shr $32, R3 - rol $1, R2D - - decrypt_round(R0,R1,R2,R3,15*8); - decrypt_round(R2,R3,R0,R1,14*8); - decrypt_round(R0,R1,R2,R3,13*8); - decrypt_round(R2,R3,R0,R1,12*8); - decrypt_round(R0,R1,R2,R3,11*8); - decrypt_round(R2,R3,R0,R1,10*8); - decrypt_round(R0,R1,R2,R3,9*8); - decrypt_round(R2,R3,R0,R1,8*8); - decrypt_round(R0,R1,R2,R3,7*8); - decrypt_round(R2,R3,R0,R1,6*8); - decrypt_round(R0,R1,R2,R3,5*8); - decrypt_round(R2,R3,R0,R1,4*8); - decrypt_round(R0,R1,R2,R3,3*8); - decrypt_round(R2,R3,R0,R1,2*8); - decrypt_round(R0,R1,R2,R3,1*8); - decrypt_last_round(R2,R3,R0,R1,0); - - input_whitening(%r10,%r11,a_offset) - movq %r10, (%rsi) - - shl $32, R1 - xor R0, R1 - - input_whitening(R1,%r11,c_offset) - movq R1, 8(%rsi) - - popq R1 - movq $1,%rax - ret diff --git a/ANDROID_3.4.5/arch/x86/crypto/twofish_glue.c b/ANDROID_3.4.5/arch/x86/crypto/twofish_glue.c deleted file mode 100644 index 359ae084..00000000 --- a/ANDROID_3.4.5/arch/x86/crypto/twofish_glue.c +++ /dev/null @@ -1,101 +0,0 @@ -/* - * Glue Code for assembler optimized version of TWOFISH - * - * Originally Twofish for GPG - * By Matthew Skala <mskala@ansuz.sooke.bc.ca>, July 26, 1998 - * 256-bit key length added March 20, 1999 - * Some modifications to reduce the text size by Werner Koch, April, 1998 - * Ported to the kerneli patch by Marc Mutz <Marc@Mutz.com> - * Ported to CryptoAPI by Colin Slater <hoho@tacomeat.net> - * - * The original author has disclaimed all copyright interest in this - * code and thus put it in the public domain. The subsequent authors - * have put this under the GNU General Public License. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 - * USA - * - * This code is a "clean room" implementation, written from the paper - * _Twofish: A 128-Bit Block Cipher_ by Bruce Schneier, John Kelsey, - * Doug Whiting, David Wagner, Chris Hall, and Niels Ferguson, available - * through http://www.counterpane.com/twofish.html - * - * For background information on multiplication in finite fields, used for - * the matrix operations in the key schedule, see the book _Contemporary - * Abstract Algebra_ by Joseph A. Gallian, especially chapter 22 in the - * Third Edition. - */ - -#include <crypto/twofish.h> -#include <linux/crypto.h> -#include <linux/init.h> -#include <linux/module.h> -#include <linux/types.h> - -asmlinkage void twofish_enc_blk(struct twofish_ctx *ctx, u8 *dst, - const u8 *src); -EXPORT_SYMBOL_GPL(twofish_enc_blk); -asmlinkage void twofish_dec_blk(struct twofish_ctx *ctx, u8 *dst, - const u8 *src); -EXPORT_SYMBOL_GPL(twofish_dec_blk); - -static void twofish_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) -{ - twofish_enc_blk(crypto_tfm_ctx(tfm), dst, src); -} - -static void twofish_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) -{ - twofish_dec_blk(crypto_tfm_ctx(tfm), dst, src); -} - -static struct crypto_alg alg = { - .cra_name = "twofish", - .cra_driver_name = "twofish-asm", - .cra_priority = 200, - .cra_flags = CRYPTO_ALG_TYPE_CIPHER, - .cra_blocksize = TF_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct twofish_ctx), - .cra_alignmask = 0, - .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(alg.cra_list), - .cra_u = { - .cipher = { - .cia_min_keysize = TF_MIN_KEY_SIZE, - .cia_max_keysize = TF_MAX_KEY_SIZE, - .cia_setkey = twofish_setkey, - .cia_encrypt = twofish_encrypt, - .cia_decrypt = twofish_decrypt - } - } -}; - -static int __init init(void) -{ - return crypto_register_alg(&alg); -} - -static void __exit fini(void) -{ - crypto_unregister_alg(&alg); -} - -module_init(init); -module_exit(fini); - -MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION ("Twofish Cipher Algorithm, asm optimized"); -MODULE_ALIAS("twofish"); -MODULE_ALIAS("twofish-asm"); diff --git a/ANDROID_3.4.5/arch/x86/crypto/twofish_glue_3way.c b/ANDROID_3.4.5/arch/x86/crypto/twofish_glue_3way.c deleted file mode 100644 index 922ab24c..00000000 --- a/ANDROID_3.4.5/arch/x86/crypto/twofish_glue_3way.c +++ /dev/null @@ -1,695 +0,0 @@ -/* - * Glue Code for 3-way parallel assembler optimized version of Twofish - * - * Copyright (c) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi> - * - * CBC & ECB parts based on code (crypto/cbc.c,ecb.c) by: - * Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au> - * CTR part based on code (crypto/ctr.c) by: - * (C) Copyright IBM Corp. 2007 - Joy Latten <latten@us.ibm.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 - * USA - * - */ - -#include <asm/processor.h> -#include <linux/crypto.h> -#include <linux/init.h> -#include <linux/module.h> -#include <linux/types.h> -#include <crypto/algapi.h> -#include <crypto/twofish.h> -#include <crypto/b128ops.h> -#include <crypto/lrw.h> -#include <crypto/xts.h> - -/* regular block cipher functions from twofish_x86_64 module */ -asmlinkage void twofish_enc_blk(struct twofish_ctx *ctx, u8 *dst, - const u8 *src); -asmlinkage void twofish_dec_blk(struct twofish_ctx *ctx, u8 *dst, - const u8 *src); - -/* 3-way parallel cipher functions */ -asmlinkage void __twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst, - const u8 *src, bool xor); -asmlinkage void twofish_dec_blk_3way(struct twofish_ctx *ctx, u8 *dst, - const u8 *src); - -static inline void twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst, - const u8 *src) -{ - __twofish_enc_blk_3way(ctx, dst, src, false); -} - -static inline void twofish_enc_blk_xor_3way(struct twofish_ctx *ctx, u8 *dst, - const u8 *src) -{ - __twofish_enc_blk_3way(ctx, dst, src, true); -} - -static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk, - void (*fn)(struct twofish_ctx *, u8 *, const u8 *), - void (*fn_3way)(struct twofish_ctx *, u8 *, const u8 *)) -{ - struct twofish_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - unsigned int bsize = TF_BLOCK_SIZE; - unsigned int nbytes; - int err; - - err = blkcipher_walk_virt(desc, walk); - - while ((nbytes = walk->nbytes)) { - u8 *wsrc = walk->src.virt.addr; - u8 *wdst = walk->dst.virt.addr; - - /* Process three block batch */ - if (nbytes >= bsize * 3) { - do { - fn_3way(ctx, wdst, wsrc); - - wsrc += bsize * 3; - wdst += bsize * 3; - nbytes -= bsize * 3; - } while (nbytes >= bsize * 3); - - if (nbytes < bsize) - goto done; - } - - /* Handle leftovers */ - do { - fn(ctx, wdst, wsrc); - - wsrc += bsize; - wdst += bsize; - nbytes -= bsize; - } while (nbytes >= bsize); - -done: - err = blkcipher_walk_done(desc, walk, nbytes); - } - - return err; -} - -static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - struct blkcipher_walk walk; - - blkcipher_walk_init(&walk, dst, src, nbytes); - return ecb_crypt(desc, &walk, twofish_enc_blk, twofish_enc_blk_3way); -} - -static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - struct blkcipher_walk walk; - - blkcipher_walk_init(&walk, dst, src, nbytes); - return ecb_crypt(desc, &walk, twofish_dec_blk, twofish_dec_blk_3way); -} - -static unsigned int __cbc_encrypt(struct blkcipher_desc *desc, - struct blkcipher_walk *walk) -{ - struct twofish_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - unsigned int bsize = TF_BLOCK_SIZE; - unsigned int nbytes = walk->nbytes; - u128 *src = (u128 *)walk->src.virt.addr; - u128 *dst = (u128 *)walk->dst.virt.addr; - u128 *iv = (u128 *)walk->iv; - - do { - u128_xor(dst, src, iv); - twofish_enc_blk(ctx, (u8 *)dst, (u8 *)dst); - iv = dst; - - src += 1; - dst += 1; - nbytes -= bsize; - } while (nbytes >= bsize); - - u128_xor((u128 *)walk->iv, (u128 *)walk->iv, iv); - return nbytes; -} - -static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - struct blkcipher_walk walk; - int err; - - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt(desc, &walk); - - while ((nbytes = walk.nbytes)) { - nbytes = __cbc_encrypt(desc, &walk); - err = blkcipher_walk_done(desc, &walk, nbytes); - } - - return err; -} - -static unsigned int __cbc_decrypt(struct blkcipher_desc *desc, - struct blkcipher_walk *walk) -{ - struct twofish_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - unsigned int bsize = TF_BLOCK_SIZE; - unsigned int nbytes = walk->nbytes; - u128 *src = (u128 *)walk->src.virt.addr; - u128 *dst = (u128 *)walk->dst.virt.addr; - u128 ivs[3 - 1]; - u128 last_iv; - - /* Start of the last block. */ - src += nbytes / bsize - 1; - dst += nbytes / bsize - 1; - - last_iv = *src; - - /* Process three block batch */ - if (nbytes >= bsize * 3) { - do { - nbytes -= bsize * (3 - 1); - src -= 3 - 1; - dst -= 3 - 1; - - ivs[0] = src[0]; - ivs[1] = src[1]; - - twofish_dec_blk_3way(ctx, (u8 *)dst, (u8 *)src); - - u128_xor(dst + 1, dst + 1, ivs + 0); - u128_xor(dst + 2, dst + 2, ivs + 1); - - nbytes -= bsize; - if (nbytes < bsize) - goto done; - - u128_xor(dst, dst, src - 1); - src -= 1; - dst -= 1; - } while (nbytes >= bsize * 3); - - if (nbytes < bsize) - goto done; - } - - /* Handle leftovers */ - for (;;) { - twofish_dec_blk(ctx, (u8 *)dst, (u8 *)src); - - nbytes -= bsize; - if (nbytes < bsize) - break; - - u128_xor(dst, dst, src - 1); - src -= 1; - dst -= 1; - } - -done: - u128_xor(dst, dst, (u128 *)walk->iv); - *(u128 *)walk->iv = last_iv; - - return nbytes; -} - -static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - struct blkcipher_walk walk; - int err; - - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt(desc, &walk); - - while ((nbytes = walk.nbytes)) { - nbytes = __cbc_decrypt(desc, &walk); - err = blkcipher_walk_done(desc, &walk, nbytes); - } - - return err; -} - -static inline void u128_to_be128(be128 *dst, const u128 *src) -{ - dst->a = cpu_to_be64(src->a); - dst->b = cpu_to_be64(src->b); -} - -static inline void be128_to_u128(u128 *dst, const be128 *src) -{ - dst->a = be64_to_cpu(src->a); - dst->b = be64_to_cpu(src->b); -} - -static inline void u128_inc(u128 *i) -{ - i->b++; - if (!i->b) - i->a++; -} - -static void ctr_crypt_final(struct blkcipher_desc *desc, - struct blkcipher_walk *walk) -{ - struct twofish_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - u8 *ctrblk = walk->iv; - u8 keystream[TF_BLOCK_SIZE]; - u8 *src = walk->src.virt.addr; - u8 *dst = walk->dst.virt.addr; - unsigned int nbytes = walk->nbytes; - - twofish_enc_blk(ctx, keystream, ctrblk); - crypto_xor(keystream, src, nbytes); - memcpy(dst, keystream, nbytes); - - crypto_inc(ctrblk, TF_BLOCK_SIZE); -} - -static unsigned int __ctr_crypt(struct blkcipher_desc *desc, - struct blkcipher_walk *walk) -{ - struct twofish_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - unsigned int bsize = TF_BLOCK_SIZE; - unsigned int nbytes = walk->nbytes; - u128 *src = (u128 *)walk->src.virt.addr; - u128 *dst = (u128 *)walk->dst.virt.addr; - u128 ctrblk; - be128 ctrblocks[3]; - - be128_to_u128(&ctrblk, (be128 *)walk->iv); - - /* Process three block batch */ - if (nbytes >= bsize * 3) { - do { - if (dst != src) { - dst[0] = src[0]; - dst[1] = src[1]; - dst[2] = src[2]; - } - - /* create ctrblks for parallel encrypt */ - u128_to_be128(&ctrblocks[0], &ctrblk); - u128_inc(&ctrblk); - u128_to_be128(&ctrblocks[1], &ctrblk); - u128_inc(&ctrblk); - u128_to_be128(&ctrblocks[2], &ctrblk); - u128_inc(&ctrblk); - - twofish_enc_blk_xor_3way(ctx, (u8 *)dst, - (u8 *)ctrblocks); - - src += 3; - dst += 3; - nbytes -= bsize * 3; - } while (nbytes >= bsize * 3); - - if (nbytes < bsize) - goto done; - } - - /* Handle leftovers */ - do { - if (dst != src) - *dst = *src; - - u128_to_be128(&ctrblocks[0], &ctrblk); - u128_inc(&ctrblk); - - twofish_enc_blk(ctx, (u8 *)ctrblocks, (u8 *)ctrblocks); - u128_xor(dst, dst, (u128 *)ctrblocks); - - src += 1; - dst += 1; - nbytes -= bsize; - } while (nbytes >= bsize); - -done: - u128_to_be128((be128 *)walk->iv, &ctrblk); - return nbytes; -} - -static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - struct blkcipher_walk walk; - int err; - - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt_block(desc, &walk, TF_BLOCK_SIZE); - - while ((nbytes = walk.nbytes) >= TF_BLOCK_SIZE) { - nbytes = __ctr_crypt(desc, &walk); - err = blkcipher_walk_done(desc, &walk, nbytes); - } - - if (walk.nbytes) { - ctr_crypt_final(desc, &walk); - err = blkcipher_walk_done(desc, &walk, 0); - } - - return err; -} - -static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) -{ - const unsigned int bsize = TF_BLOCK_SIZE; - struct twofish_ctx *ctx = priv; - int i; - - if (nbytes == 3 * bsize) { - twofish_enc_blk_3way(ctx, srcdst, srcdst); - return; - } - - for (i = 0; i < nbytes / bsize; i++, srcdst += bsize) - twofish_enc_blk(ctx, srcdst, srcdst); -} - -static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) -{ - const unsigned int bsize = TF_BLOCK_SIZE; - struct twofish_ctx *ctx = priv; - int i; - - if (nbytes == 3 * bsize) { - twofish_dec_blk_3way(ctx, srcdst, srcdst); - return; - } - - for (i = 0; i < nbytes / bsize; i++, srcdst += bsize) - twofish_dec_blk(ctx, srcdst, srcdst); -} - -struct twofish_lrw_ctx { - struct lrw_table_ctx lrw_table; - struct twofish_ctx twofish_ctx; -}; - -static int lrw_twofish_setkey(struct crypto_tfm *tfm, const u8 *key, - unsigned int keylen) -{ - struct twofish_lrw_ctx *ctx = crypto_tfm_ctx(tfm); - int err; - - err = __twofish_setkey(&ctx->twofish_ctx, key, keylen - TF_BLOCK_SIZE, - &tfm->crt_flags); - if (err) - return err; - - return lrw_init_table(&ctx->lrw_table, key + keylen - TF_BLOCK_SIZE); -} - -static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - struct twofish_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - be128 buf[3]; - struct lrw_crypt_req req = { - .tbuf = buf, - .tbuflen = sizeof(buf), - - .table_ctx = &ctx->lrw_table, - .crypt_ctx = &ctx->twofish_ctx, - .crypt_fn = encrypt_callback, - }; - - return lrw_crypt(desc, dst, src, nbytes, &req); -} - -static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - struct twofish_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - be128 buf[3]; - struct lrw_crypt_req req = { - .tbuf = buf, - .tbuflen = sizeof(buf), - - .table_ctx = &ctx->lrw_table, - .crypt_ctx = &ctx->twofish_ctx, - .crypt_fn = decrypt_callback, - }; - - return lrw_crypt(desc, dst, src, nbytes, &req); -} - -static void lrw_exit_tfm(struct crypto_tfm *tfm) -{ - struct twofish_lrw_ctx *ctx = crypto_tfm_ctx(tfm); - - lrw_free_table(&ctx->lrw_table); -} - -struct twofish_xts_ctx { - struct twofish_ctx tweak_ctx; - struct twofish_ctx crypt_ctx; -}; - -static int xts_twofish_setkey(struct crypto_tfm *tfm, const u8 *key, - unsigned int keylen) -{ - struct twofish_xts_ctx *ctx = crypto_tfm_ctx(tfm); - u32 *flags = &tfm->crt_flags; - int err; - - /* key consists of keys of equal size concatenated, therefore - * the length must be even - */ - if (keylen % 2) { - *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; - return -EINVAL; - } - - /* first half of xts-key is for crypt */ - err = __twofish_setkey(&ctx->crypt_ctx, key, keylen / 2, flags); - if (err) - return err; - - /* second half of xts-key is for tweak */ - return __twofish_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2, - flags); -} - -static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - struct twofish_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - be128 buf[3]; - struct xts_crypt_req req = { - .tbuf = buf, - .tbuflen = sizeof(buf), - - .tweak_ctx = &ctx->tweak_ctx, - .tweak_fn = XTS_TWEAK_CAST(twofish_enc_blk), - .crypt_ctx = &ctx->crypt_ctx, - .crypt_fn = encrypt_callback, - }; - - return xts_crypt(desc, dst, src, nbytes, &req); -} - -static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - struct twofish_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - be128 buf[3]; - struct xts_crypt_req req = { - .tbuf = buf, - .tbuflen = sizeof(buf), - - .tweak_ctx = &ctx->tweak_ctx, - .tweak_fn = XTS_TWEAK_CAST(twofish_enc_blk), - .crypt_ctx = &ctx->crypt_ctx, - .crypt_fn = decrypt_callback, - }; - - return xts_crypt(desc, dst, src, nbytes, &req); -} - -static struct crypto_alg tf_algs[5] = { { - .cra_name = "ecb(twofish)", - .cra_driver_name = "ecb-twofish-3way", - .cra_priority = 300, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, - .cra_blocksize = TF_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct twofish_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(tf_algs[0].cra_list), - .cra_u = { - .blkcipher = { - .min_keysize = TF_MIN_KEY_SIZE, - .max_keysize = TF_MAX_KEY_SIZE, - .setkey = twofish_setkey, - .encrypt = ecb_encrypt, - .decrypt = ecb_decrypt, - }, - }, -}, { - .cra_name = "cbc(twofish)", - .cra_driver_name = "cbc-twofish-3way", - .cra_priority = 300, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, - .cra_blocksize = TF_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct twofish_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(tf_algs[1].cra_list), - .cra_u = { - .blkcipher = { - .min_keysize = TF_MIN_KEY_SIZE, - .max_keysize = TF_MAX_KEY_SIZE, - .ivsize = TF_BLOCK_SIZE, - .setkey = twofish_setkey, - .encrypt = cbc_encrypt, - .decrypt = cbc_decrypt, - }, - }, -}, { - .cra_name = "ctr(twofish)", - .cra_driver_name = "ctr-twofish-3way", - .cra_priority = 300, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, - .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct twofish_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(tf_algs[2].cra_list), - .cra_u = { - .blkcipher = { - .min_keysize = TF_MIN_KEY_SIZE, - .max_keysize = TF_MAX_KEY_SIZE, - .ivsize = TF_BLOCK_SIZE, - .setkey = twofish_setkey, - .encrypt = ctr_crypt, - .decrypt = ctr_crypt, - }, - }, -}, { - .cra_name = "lrw(twofish)", - .cra_driver_name = "lrw-twofish-3way", - .cra_priority = 300, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, - .cra_blocksize = TF_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct twofish_lrw_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(tf_algs[3].cra_list), - .cra_exit = lrw_exit_tfm, - .cra_u = { - .blkcipher = { - .min_keysize = TF_MIN_KEY_SIZE + TF_BLOCK_SIZE, - .max_keysize = TF_MAX_KEY_SIZE + TF_BLOCK_SIZE, - .ivsize = TF_BLOCK_SIZE, - .setkey = lrw_twofish_setkey, - .encrypt = lrw_encrypt, - .decrypt = lrw_decrypt, - }, - }, -}, { - .cra_name = "xts(twofish)", - .cra_driver_name = "xts-twofish-3way", - .cra_priority = 300, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, - .cra_blocksize = TF_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct twofish_xts_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(tf_algs[4].cra_list), - .cra_u = { - .blkcipher = { - .min_keysize = TF_MIN_KEY_SIZE * 2, - .max_keysize = TF_MAX_KEY_SIZE * 2, - .ivsize = TF_BLOCK_SIZE, - .setkey = xts_twofish_setkey, - .encrypt = xts_encrypt, - .decrypt = xts_decrypt, - }, - }, -} }; - -static bool is_blacklisted_cpu(void) -{ - if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) - return false; - - if (boot_cpu_data.x86 == 0x06 && - (boot_cpu_data.x86_model == 0x1c || - boot_cpu_data.x86_model == 0x26 || - boot_cpu_data.x86_model == 0x36)) { - /* - * On Atom, twofish-3way is slower than original assembler - * implementation. Twofish-3way trades off some performance in - * storing blocks in 64bit registers to allow three blocks to - * be processed parallel. Parallel operation then allows gaining - * more performance than was trade off, on out-of-order CPUs. - * However Atom does not benefit from this parallellism and - * should be blacklisted. - */ - return true; - } - - if (boot_cpu_data.x86 == 0x0f) { - /* - * On Pentium 4, twofish-3way is slower than original assembler - * implementation because excessive uses of 64bit rotate and - * left-shifts (which are really slow on P4) needed to store and - * handle 128bit block in two 64bit registers. - */ - return true; - } - - return false; -} - -static int force; -module_param(force, int, 0); -MODULE_PARM_DESC(force, "Force module load, ignore CPU blacklist"); - -static int __init init(void) -{ - if (!force && is_blacklisted_cpu()) { - printk(KERN_INFO - "twofish-x86_64-3way: performance on this CPU " - "would be suboptimal: disabling " - "twofish-x86_64-3way.\n"); - return -ENODEV; - } - - return crypto_register_algs(tf_algs, ARRAY_SIZE(tf_algs)); -} - -static void __exit fini(void) -{ - crypto_unregister_algs(tf_algs, ARRAY_SIZE(tf_algs)); -} - -module_init(init); -module_exit(fini); - -MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("Twofish Cipher Algorithm, 3-way parallel asm optimized"); -MODULE_ALIAS("twofish"); -MODULE_ALIAS("twofish-asm"); |