From 871480933a1c28f8a9fed4c4d34d06c439a7a422 Mon Sep 17 00:00:00 2001 From: Srikant Patnaik Date: Sun, 11 Jan 2015 12:28:04 +0530 Subject: Moved, renamed, and deleted files The original directory structure was scattered and unorganized. Changes are basically to make it look like kernel structure. --- arch/x86/Kbuild | 18 + arch/x86/Kconfig | 2234 +++++++ arch/x86/Kconfig.cpu | 506 ++ arch/x86/Kconfig.debug | 302 + arch/x86/Makefile | 224 + arch/x86/Makefile.um | 60 + arch/x86/Makefile_32.cpu | 71 + arch/x86/boot/Makefile | 194 + arch/x86/boot/a20.c | 165 + arch/x86/boot/apm.c | 75 + arch/x86/boot/bioscall.S | 82 + arch/x86/boot/bitops.h | 43 + arch/x86/boot/boot.h | 369 ++ arch/x86/boot/cmdline.c | 158 + arch/x86/boot/code16gcc.h | 15 + arch/x86/boot/compressed/Makefile | 76 + arch/x86/boot/compressed/cmdline.c | 21 + arch/x86/boot/compressed/early_serial_console.c | 5 + arch/x86/boot/compressed/eboot.c | 1022 ++++ arch/x86/boot/compressed/eboot.h | 61 + arch/x86/boot/compressed/efi_stub_32.S | 86 + arch/x86/boot/compressed/efi_stub_64.S | 1 + arch/x86/boot/compressed/head_32.S | 225 + arch/x86/boot/compressed/head_64.S | 371 ++ arch/x86/boot/compressed/misc.c | 379 ++ arch/x86/boot/compressed/misc.h | 39 + arch/x86/boot/compressed/mkpiggy.c | 95 + arch/x86/boot/compressed/string.c | 11 + arch/x86/boot/compressed/vmlinux.lds.S | 74 + arch/x86/boot/copy.S | 87 + arch/x86/boot/cpu.c | 85 + arch/x86/boot/cpucheck.c | 252 + arch/x86/boot/ctype.h | 21 + arch/x86/boot/early_serial_console.c | 151 + arch/x86/boot/edd.c | 181 + arch/x86/boot/header.S | 478 ++ arch/x86/boot/install.sh | 59 + arch/x86/boot/main.c | 178 + arch/x86/boot/mca.c | 38 + arch/x86/boot/memory.c | 136 + arch/x86/boot/mkcpustr.c | 49 + arch/x86/boot/mtools.conf.in | 17 + arch/x86/boot/pm.c | 126 + arch/x86/boot/pmjump.S | 77 + arch/x86/boot/printf.c | 309 + arch/x86/boot/regs.c | 29 + arch/x86/boot/setup.ld | 64 + arch/x86/boot/string.c | 148 + arch/x86/boot/tools/build.c | 266 + arch/x86/boot/tty.c | 139 + arch/x86/boot/version.c | 21 + arch/x86/boot/vesa.h | 72 + arch/x86/boot/video-bios.c | 128 + arch/x86/boot/video-mode.c | 173 + arch/x86/boot/video-vesa.c | 280 + arch/x86/boot/video-vga.c | 288 + arch/x86/boot/video.c | 341 ++ arch/x86/boot/video.h | 121 + arch/x86/configs/i386_defconfig | 319 + arch/x86/configs/x86_64_defconfig | 317 + arch/x86/crypto/Makefile | 45 + arch/x86/crypto/aes-i586-asm_32.S | 367 ++ arch/x86/crypto/aes-x86_64-asm_64.S | 188 + arch/x86/crypto/aes_glue.c | 71 + arch/x86/crypto/aesni-intel_asm.S | 2618 ++++++++ arch/x86/crypto/aesni-intel_glue.c | 1389 +++++ arch/x86/crypto/blowfish-x86_64-asm_64.S | 390 ++ arch/x86/crypto/blowfish_glue.c | 489 ++ arch/x86/crypto/camellia-x86_64-asm_64.S | 520 ++ arch/x86/crypto/camellia_glue.c | 1952 ++++++ arch/x86/crypto/crc32c-intel.c | 203 + arch/x86/crypto/fpu.c | 161 + arch/x86/crypto/ghash-clmulni-intel_asm.S | 157 + arch/x86/crypto/ghash-clmulni-intel_glue.c | 338 ++ arch/x86/crypto/salsa20-i586-asm_32.S | 1114 ++++ arch/x86/crypto/salsa20-x86_64-asm_64.S | 920 +++ arch/x86/crypto/salsa20_glue.c | 129 + arch/x86/crypto/serpent-sse2-i586-asm_32.S | 635 ++ arch/x86/crypto/serpent-sse2-x86_64-asm_64.S | 758 +++ arch/x86/crypto/serpent_sse2_glue.c | 944 +++ arch/x86/crypto/sha1_ssse3_asm.S | 558 ++ arch/x86/crypto/sha1_ssse3_glue.c | 240 + arch/x86/crypto/twofish-i586-asm_32.S | 335 ++ arch/x86/crypto/twofish-x86_64-asm_64-3way.S | 316 + arch/x86/crypto/twofish-x86_64-asm_64.S | 322 + arch/x86/crypto/twofish_glue.c | 101 + arch/x86/crypto/twofish_glue_3way.c | 695 +++ arch/x86/ia32/Makefile | 14 + arch/x86/ia32/audit.c | 42 + arch/x86/ia32/ia32_aout.c | 514 ++ arch/x86/ia32/ia32_signal.c | 562 ++ arch/x86/ia32/ia32entry.S | 493 ++ arch/x86/ia32/ipc32.c | 54 + arch/x86/ia32/nosyscall.c | 7 + arch/x86/ia32/sys_ia32.c | 516 ++ arch/x86/ia32/syscall_ia32.c | 25 + arch/x86/include/asm/Kbuild | 28 + arch/x86/include/asm/a.out-core.h | 65 + arch/x86/include/asm/a.out.h | 20 + arch/x86/include/asm/acpi.h | 193 + arch/x86/include/asm/aes.h | 11 + arch/x86/include/asm/agp.h | 31 + arch/x86/include/asm/alternative-asm.h | 26 + arch/x86/include/asm/alternative.h | 195 + arch/x86/include/asm/amd_nb.h | 72 + arch/x86/include/asm/apb_timer.h | 49 + arch/x86/include/asm/apic.h | 636 ++ arch/x86/include/asm/apic_flat_64.h | 7 + arch/x86/include/asm/apicdef.h | 445 ++ arch/x86/include/asm/apm.h | 73 + arch/x86/include/asm/arch_hweight.h | 61 + arch/x86/include/asm/archrandom.h | 75 + arch/x86/include/asm/asm-offsets.h | 1 + arch/x86/include/asm/asm.h | 58 + arch/x86/include/asm/atomic.h | 317 + arch/x86/include/asm/atomic64_32.h | 316 + arch/x86/include/asm/atomic64_64.h | 243 + arch/x86/include/asm/auxvec.h | 19 + arch/x86/include/asm/barrier.h | 116 + arch/x86/include/asm/bios_ebda.h | 60 + arch/x86/include/asm/bitops.h | 514 ++ arch/x86/include/asm/bitsperlong.h | 13 + arch/x86/include/asm/boot.h | 47 + arch/x86/include/asm/bootparam.h | 136 + arch/x86/include/asm/bug.h | 43 + arch/x86/include/asm/bugs.h | 12 + arch/x86/include/asm/byteorder.h | 6 + arch/x86/include/asm/cache.h | 23 + arch/x86/include/asm/cacheflush.h | 166 + arch/x86/include/asm/calgary.h | 70 + arch/x86/include/asm/calling.h | 196 + arch/x86/include/asm/ce4100.h | 6 + arch/x86/include/asm/checksum.h | 5 + arch/x86/include/asm/checksum_32.h | 190 + arch/x86/include/asm/checksum_64.h | 191 + arch/x86/include/asm/clocksource.h | 18 + arch/x86/include/asm/cmpxchg.h | 233 + arch/x86/include/asm/cmpxchg_32.h | 171 + arch/x86/include/asm/cmpxchg_64.h | 25 + arch/x86/include/asm/compat.h | 252 + arch/x86/include/asm/cpu.h | 37 + arch/x86/include/asm/cpu_device_id.h | 13 + arch/x86/include/asm/cpufeature.h | 400 ++ arch/x86/include/asm/cpumask.h | 14 + arch/x86/include/asm/cputime.h | 1 + arch/x86/include/asm/current.h | 21 + arch/x86/include/asm/debugreg.h | 195 + arch/x86/include/asm/delay.h | 8 + arch/x86/include/asm/desc.h | 407 ++ arch/x86/include/asm/desc_defs.h | 101 + arch/x86/include/asm/device.h | 19 + arch/x86/include/asm/div64.h | 66 + arch/x86/include/asm/dma-mapping.h | 163 + arch/x86/include/asm/dma.h | 317 + arch/x86/include/asm/dmi.h | 19 + arch/x86/include/asm/dwarf2.h | 146 + arch/x86/include/asm/e820.h | 149 + arch/x86/include/asm/edac.h | 18 + arch/x86/include/asm/efi.h | 115 + arch/x86/include/asm/elf.h | 372 ++ arch/x86/include/asm/emergency-restart.h | 20 + arch/x86/include/asm/entry_arch.h | 56 + arch/x86/include/asm/errno.h | 1 + arch/x86/include/asm/exec.h | 1 + arch/x86/include/asm/fb.h | 17 + arch/x86/include/asm/fcntl.h | 1 + arch/x86/include/asm/fixmap.h | 238 + arch/x86/include/asm/floppy.h | 281 + arch/x86/include/asm/fpu-internal.h | 520 ++ arch/x86/include/asm/frame.h | 26 + arch/x86/include/asm/ftrace.h | 57 + arch/x86/include/asm/futex.h | 141 + arch/x86/include/asm/gart.h | 113 + arch/x86/include/asm/genapic.h | 1 + arch/x86/include/asm/geode.h | 36 + arch/x86/include/asm/gpio.h | 53 + arch/x86/include/asm/hardirq.h | 55 + arch/x86/include/asm/highmem.h | 77 + arch/x86/include/asm/hpet.h | 118 + arch/x86/include/asm/hugetlb.h | 93 + arch/x86/include/asm/hw_breakpoint.h | 76 + arch/x86/include/asm/hw_irq.h | 182 + arch/x86/include/asm/hypertransport.h | 45 + arch/x86/include/asm/hyperv.h | 194 + arch/x86/include/asm/hypervisor.h | 62 + arch/x86/include/asm/i387.h | 79 + arch/x86/include/asm/i8259.h | 70 + arch/x86/include/asm/ia32.h | 163 + arch/x86/include/asm/ia32_unistd.h | 11 + arch/x86/include/asm/idle.h | 15 + arch/x86/include/asm/inat.h | 221 + arch/x86/include/asm/inat_types.h | 29 + arch/x86/include/asm/init.h | 20 + arch/x86/include/asm/insn.h | 199 + arch/x86/include/asm/inst.h | 240 + arch/x86/include/asm/intel_scu_ipc.h | 73 + arch/x86/include/asm/io.h | 348 ++ arch/x86/include/asm/io_apic.h | 217 + arch/x86/include/asm/ioctl.h | 1 + arch/x86/include/asm/ioctls.h | 1 + arch/x86/include/asm/iomap.h | 41 + arch/x86/include/asm/iommu.h | 13 + arch/x86/include/asm/iommu_table.h | 100 + arch/x86/include/asm/ipcbuf.h | 1 + arch/x86/include/asm/ipi.h | 162 + arch/x86/include/asm/irq.h | 44 + arch/x86/include/asm/irq_regs.h | 31 + arch/x86/include/asm/irq_remapping.h | 45 + arch/x86/include/asm/irq_vectors.h | 172 + arch/x86/include/asm/irqflags.h | 209 + arch/x86/include/asm/ist.h | 34 + arch/x86/include/asm/jump_label.h | 42 + arch/x86/include/asm/kdebug.h | 40 + arch/x86/include/asm/kexec.h | 168 + arch/x86/include/asm/kgdb.h | 89 + arch/x86/include/asm/kmap_types.h | 12 + arch/x86/include/asm/kmemcheck.h | 42 + arch/x86/include/asm/kprobes.h | 117 + arch/x86/include/asm/kvm.h | 328 + arch/x86/include/asm/kvm_emulate.h | 395 ++ arch/x86/include/asm/kvm_host.h | 970 +++ arch/x86/include/asm/kvm_para.h | 216 + arch/x86/include/asm/ldt.h | 40 + arch/x86/include/asm/lguest.h | 99 + arch/x86/include/asm/lguest_hcall.h | 76 + arch/x86/include/asm/linkage.h | 61 + arch/x86/include/asm/local.h | 197 + arch/x86/include/asm/local64.h | 1 + arch/x86/include/asm/mach_timer.h | 48 + arch/x86/include/asm/mach_traps.h | 43 + arch/x86/include/asm/math_emu.h | 18 + arch/x86/include/asm/mc146818rtc.h | 103 + arch/x86/include/asm/mca.h | 43 + arch/x86/include/asm/mca_dma.h | 201 + arch/x86/include/asm/mce.h | 252 + arch/x86/include/asm/microcode.h | 66 + arch/x86/include/asm/mman.h | 8 + arch/x86/include/asm/mmconfig.h | 12 + arch/x86/include/asm/mmu.h | 32 + arch/x86/include/asm/mmu_context.h | 93 + arch/x86/include/asm/mmx.h | 14 + arch/x86/include/asm/mmzone.h | 5 + arch/x86/include/asm/mmzone_32.h | 70 + arch/x86/include/asm/mmzone_64.h | 17 + arch/x86/include/asm/module.h | 66 + arch/x86/include/asm/mpspec.h | 175 + arch/x86/include/asm/mpspec_def.h | 174 + arch/x86/include/asm/mrst-vrtc.h | 9 + arch/x86/include/asm/mrst.h | 81 + arch/x86/include/asm/msgbuf.h | 1 + arch/x86/include/asm/mshyperv.h | 14 + arch/x86/include/asm/msidef.h | 56 + arch/x86/include/asm/msr-index.h | 472 ++ arch/x86/include/asm/msr.h | 319 + arch/x86/include/asm/mtrr.h | 213 + arch/x86/include/asm/mutex.h | 5 + arch/x86/include/asm/mutex_32.h | 125 + arch/x86/include/asm/mutex_64.h | 100 + arch/x86/include/asm/mwait.h | 15 + arch/x86/include/asm/nmi.h | 47 + arch/x86/include/asm/nops.h | 142 + arch/x86/include/asm/numa.h | 85 + arch/x86/include/asm/numa_32.h | 12 + arch/x86/include/asm/numa_64.h | 6 + arch/x86/include/asm/numachip/numachip_csr.h | 167 + arch/x86/include/asm/numaq.h | 171 + arch/x86/include/asm/olpc.h | 151 + arch/x86/include/asm/olpc_ofw.h | 37 + arch/x86/include/asm/page.h | 71 + arch/x86/include/asm/page_32.h | 51 + arch/x86/include/asm/page_32_types.h | 57 + arch/x86/include/asm/page_64.h | 6 + arch/x86/include/asm/page_64_types.h | 75 + arch/x86/include/asm/page_types.h | 61 + arch/x86/include/asm/param.h | 1 + arch/x86/include/asm/paravirt.h | 1051 ++++ arch/x86/include/asm/paravirt_types.h | 703 +++ arch/x86/include/asm/parport.h | 10 + arch/x86/include/asm/pat.h | 27 + arch/x86/include/asm/pci-direct.h | 21 + arch/x86/include/asm/pci-functions.h | 19 + arch/x86/include/asm/pci.h | 174 + arch/x86/include/asm/pci_64.h | 27 + arch/x86/include/asm/pci_x86.h | 199 + arch/x86/include/asm/percpu.h | 589 ++ arch/x86/include/asm/perf_event.h | 252 + arch/x86/include/asm/perf_event_p4.h | 876 +++ arch/x86/include/asm/pgalloc.h | 139 + arch/x86/include/asm/pgtable-2level.h | 111 + arch/x86/include/asm/pgtable-2level_types.h | 37 + arch/x86/include/asm/pgtable-3level.h | 196 + arch/x86/include/asm/pgtable-3level_types.h | 48 + arch/x86/include/asm/pgtable.h | 770 +++ arch/x86/include/asm/pgtable_32.h | 87 + arch/x86/include/asm/pgtable_32_types.h | 55 + arch/x86/include/asm/pgtable_64.h | 187 + arch/x86/include/asm/pgtable_64_types.h | 63 + arch/x86/include/asm/pgtable_types.h | 340 ++ arch/x86/include/asm/poll.h | 1 + arch/x86/include/asm/posix_types.h | 15 + arch/x86/include/asm/posix_types_32.h | 28 + arch/x86/include/asm/posix_types_64.h | 19 + arch/x86/include/asm/posix_types_x32.h | 19 + arch/x86/include/asm/prctl.h | 9 + arch/x86/include/asm/probe_roms.h | 8 + arch/x86/include/asm/processor-cyrix.h | 38 + arch/x86/include/asm/processor-flags.h | 103 + arch/x86/include/asm/processor.h | 987 +++ arch/x86/include/asm/prom.h | 48 + arch/x86/include/asm/proto.h | 26 + arch/x86/include/asm/ptrace-abi.h | 87 + arch/x86/include/asm/ptrace.h | 306 + arch/x86/include/asm/pvclock-abi.h | 44 + arch/x86/include/asm/pvclock.h | 59 + arch/x86/include/asm/reboot.h | 29 + arch/x86/include/asm/reboot_fixups.h | 6 + arch/x86/include/asm/required-features.h | 90 + arch/x86/include/asm/resource.h | 1 + arch/x86/include/asm/resume-trace.h | 21 + arch/x86/include/asm/rio.h | 63 + arch/x86/include/asm/rtc.h | 1 + arch/x86/include/asm/rwlock.h | 49 + arch/x86/include/asm/rwsem.h | 211 + arch/x86/include/asm/scatterlist.h | 8 + arch/x86/include/asm/seccomp.h | 5 + arch/x86/include/asm/seccomp_32.h | 11 + arch/x86/include/asm/seccomp_64.h | 17 + arch/x86/include/asm/sections.h | 14 + arch/x86/include/asm/segment.h | 272 + arch/x86/include/asm/sembuf.h | 24 + arch/x86/include/asm/serial.h | 29 + arch/x86/include/asm/serpent.h | 63 + arch/x86/include/asm/setup.h | 128 + arch/x86/include/asm/setup_arch.h | 3 + arch/x86/include/asm/shmbuf.h | 1 + arch/x86/include/asm/shmparam.h | 6 + arch/x86/include/asm/sigcontext.h | 293 + arch/x86/include/asm/sigcontext32.h | 77 + arch/x86/include/asm/sigframe.h | 83 + arch/x86/include/asm/sighandling.h | 24 + arch/x86/include/asm/siginfo.h | 16 + arch/x86/include/asm/signal.h | 264 + arch/x86/include/asm/smp.h | 235 + arch/x86/include/asm/smpboot_hooks.h | 68 + arch/x86/include/asm/socket.h | 1 + arch/x86/include/asm/sockios.h | 1 + arch/x86/include/asm/sparsemem.h | 34 + arch/x86/include/asm/special_insns.h | 199 + arch/x86/include/asm/spinlock.h | 243 + arch/x86/include/asm/spinlock_types.h | 33 + arch/x86/include/asm/stackprotector.h | 121 + arch/x86/include/asm/stacktrace.h | 118 + arch/x86/include/asm/stat.h | 114 + arch/x86/include/asm/statfs.h | 12 + arch/x86/include/asm/string.h | 5 + arch/x86/include/asm/string_32.h | 342 ++ arch/x86/include/asm/string_64.h | 68 + arch/x86/include/asm/suspend.h | 5 + arch/x86/include/asm/suspend_32.h | 27 + arch/x86/include/asm/suspend_64.h | 49 + arch/x86/include/asm/svm.h | 352 ++ arch/x86/include/asm/swab.h | 61 + arch/x86/include/asm/swiotlb.h | 32 + arch/x86/include/asm/switch_to.h | 129 + arch/x86/include/asm/sync_bitops.h | 130 + arch/x86/include/asm/sys_ia32.h | 89 + arch/x86/include/asm/syscall.h | 217 + arch/x86/include/asm/syscalls.h | 69 + arch/x86/include/asm/tce.h | 48 + arch/x86/include/asm/termbits.h | 1 + arch/x86/include/asm/termios.h | 1 + arch/x86/include/asm/thread_info.h | 289 + arch/x86/include/asm/time.h | 12 + arch/x86/include/asm/timer.h | 79 + arch/x86/include/asm/timex.h | 12 + arch/x86/include/asm/tlb.h | 11 + arch/x86/include/asm/tlbflush.h | 175 + arch/x86/include/asm/topology.h | 196 + arch/x86/include/asm/trampoline.h | 39 + arch/x86/include/asm/traps.h | 117 + arch/x86/include/asm/tsc.h | 67 + arch/x86/include/asm/types.h | 6 + arch/x86/include/asm/uaccess.h | 581 ++ arch/x86/include/asm/uaccess_32.h | 236 + arch/x86/include/asm/uaccess_64.h | 249 + arch/x86/include/asm/ucontext.h | 12 + arch/x86/include/asm/unaligned.h | 14 + arch/x86/include/asm/unistd.h | 73 + arch/x86/include/asm/user.h | 63 + arch/x86/include/asm/user32.h | 70 + arch/x86/include/asm/user_32.h | 131 + arch/x86/include/asm/user_64.h | 137 + arch/x86/include/asm/uv/bios.h | 114 + arch/x86/include/asm/uv/uv.h | 34 + arch/x86/include/asm/uv/uv_bau.h | 775 +++ arch/x86/include/asm/uv/uv_hub.h | 609 ++ arch/x86/include/asm/uv/uv_irq.h | 38 + arch/x86/include/asm/uv/uv_mmrs.h | 2077 +++++++ arch/x86/include/asm/vdso.h | 33 + arch/x86/include/asm/vga.h | 20 + arch/x86/include/asm/vgtod.h | 30 + arch/x86/include/asm/virtext.h | 131 + arch/x86/include/asm/visws/cobalt.h | 127 + arch/x86/include/asm/visws/lithium.h | 53 + arch/x86/include/asm/visws/piix4.h | 107 + arch/x86/include/asm/visws/sgivw.h | 5 + arch/x86/include/asm/vm86.h | 208 + arch/x86/include/asm/vmx.h | 484 ++ arch/x86/include/asm/vsyscall.h | 38 + arch/x86/include/asm/vvar.h | 50 + arch/x86/include/asm/word-at-a-time.h | 79 + arch/x86/include/asm/x2apic.h | 67 + arch/x86/include/asm/x86_init.h | 199 + arch/x86/include/asm/xcr.h | 49 + arch/x86/include/asm/xen/events.h | 18 + arch/x86/include/asm/xen/hypercall.h | 602 ++ arch/x86/include/asm/xen/hypervisor.h | 75 + arch/x86/include/asm/xen/interface.h | 177 + arch/x86/include/asm/xen/interface_32.h | 102 + arch/x86/include/asm/xen/interface_64.h | 148 + arch/x86/include/asm/xen/page.h | 213 + arch/x86/include/asm/xen/pci.h | 82 + arch/x86/include/asm/xen/swiotlb-xen.h | 14 + arch/x86/include/asm/xen/trace_types.h | 18 + arch/x86/include/asm/xor.h | 10 + arch/x86/include/asm/xor_32.h | 888 +++ arch/x86/include/asm/xor_64.h | 361 ++ arch/x86/include/asm/xsave.h | 150 + arch/x86/kernel/Makefile | 115 + arch/x86/kernel/acpi/Makefile | 14 + arch/x86/kernel/acpi/boot.c | 1702 ++++++ arch/x86/kernel/acpi/cstate.c | 204 + arch/x86/kernel/acpi/realmode/Makefile | 59 + arch/x86/kernel/acpi/realmode/bioscall.S | 1 + arch/x86/kernel/acpi/realmode/copy.S | 1 + arch/x86/kernel/acpi/realmode/regs.c | 1 + arch/x86/kernel/acpi/realmode/video-bios.c | 1 + arch/x86/kernel/acpi/realmode/video-mode.c | 1 + arch/x86/kernel/acpi/realmode/video-vesa.c | 1 + arch/x86/kernel/acpi/realmode/video-vga.c | 1 + arch/x86/kernel/acpi/realmode/wakemain.c | 81 + arch/x86/kernel/acpi/realmode/wakeup.S | 170 + arch/x86/kernel/acpi/realmode/wakeup.h | 48 + arch/x86/kernel/acpi/realmode/wakeup.lds.S | 62 + arch/x86/kernel/acpi/sleep.c | 137 + arch/x86/kernel/acpi/sleep.h | 19 + arch/x86/kernel/acpi/wakeup_32.S | 98 + arch/x86/kernel/acpi/wakeup_64.S | 122 + arch/x86/kernel/acpi/wakeup_rm.S | 12 + arch/x86/kernel/alternative.c | 742 +++ arch/x86/kernel/amd_gart_64.c | 899 +++ arch/x86/kernel/amd_nb.c | 282 + arch/x86/kernel/apb_timer.c | 431 ++ arch/x86/kernel/aperture_64.c | 523 ++ arch/x86/kernel/apic/Makefile | 27 + arch/x86/kernel/apic/apic.c | 2475 ++++++++ arch/x86/kernel/apic/apic_flat_64.c | 398 ++ arch/x86/kernel/apic/apic_noop.c | 192 + arch/x86/kernel/apic/apic_numachip.c | 304 + arch/x86/kernel/apic/bigsmp_32.c | 279 + arch/x86/kernel/apic/es7000_32.c | 757 +++ arch/x86/kernel/apic/hw_nmi.c | 90 + arch/x86/kernel/apic/io_apic.c | 4130 +++++++++++++ arch/x86/kernel/apic/ipi.c | 167 + arch/x86/kernel/apic/numaq_32.c | 542 ++ arch/x86/kernel/apic/probe_32.c | 271 + arch/x86/kernel/apic/probe_64.c | 79 + arch/x86/kernel/apic/summit_32.c | 557 ++ arch/x86/kernel/apic/x2apic_cluster.c | 269 + arch/x86/kernel/apic/x2apic_phys.c | 181 + arch/x86/kernel/apic/x2apic_uv_x.c | 895 +++ arch/x86/kernel/apm_32.c | 2451 ++++++++ arch/x86/kernel/asm-offsets.c | 72 + arch/x86/kernel/asm-offsets_32.c | 87 + arch/x86/kernel/asm-offsets_64.c | 89 + arch/x86/kernel/audit_64.c | 81 + arch/x86/kernel/bootflag.c | 101 + arch/x86/kernel/check.c | 159 + arch/x86/kernel/cpu/Makefile | 50 + arch/x86/kernel/cpu/amd.c | 802 +++ arch/x86/kernel/cpu/bugs.c | 174 + arch/x86/kernel/cpu/bugs_64.c | 33 + arch/x86/kernel/cpu/centaur.c | 500 ++ arch/x86/kernel/cpu/common.c | 1318 ++++ arch/x86/kernel/cpu/cpu.h | 37 + arch/x86/kernel/cpu/cyrix.c | 461 ++ arch/x86/kernel/cpu/hypervisor.c | 78 + arch/x86/kernel/cpu/intel.c | 555 ++ arch/x86/kernel/cpu/intel_cacheinfo.c | 1216 ++++ arch/x86/kernel/cpu/match.c | 91 + arch/x86/kernel/cpu/mcheck/Makefile | 11 + arch/x86/kernel/cpu/mcheck/mce-apei.c | 149 + arch/x86/kernel/cpu/mcheck/mce-inject.c | 248 + arch/x86/kernel/cpu/mcheck/mce-internal.h | 53 + arch/x86/kernel/cpu/mcheck/mce-severity.c | 285 + arch/x86/kernel/cpu/mcheck/mce.c | 2364 ++++++++ arch/x86/kernel/cpu/mcheck/mce_amd.c | 775 +++ arch/x86/kernel/cpu/mcheck/mce_intel.c | 229 + arch/x86/kernel/cpu/mcheck/p5.c | 67 + arch/x86/kernel/cpu/mcheck/therm_throt.c | 508 ++ arch/x86/kernel/cpu/mcheck/threshold.c | 29 + arch/x86/kernel/cpu/mcheck/winchip.c | 39 + arch/x86/kernel/cpu/mkcapflags.pl | 32 + arch/x86/kernel/cpu/mshyperv.c | 79 + arch/x86/kernel/cpu/mtrr/Makefile | 3 + arch/x86/kernel/cpu/mtrr/amd.c | 124 + arch/x86/kernel/cpu/mtrr/centaur.c | 126 + arch/x86/kernel/cpu/mtrr/cleanup.c | 980 +++ arch/x86/kernel/cpu/mtrr/cyrix.c | 282 + arch/x86/kernel/cpu/mtrr/generic.c | 846 +++ arch/x86/kernel/cpu/mtrr/if.c | 451 ++ arch/x86/kernel/cpu/mtrr/main.c | 764 +++ arch/x86/kernel/cpu/mtrr/mtrr.h | 78 + arch/x86/kernel/cpu/perf_event.c | 1886 ++++++ arch/x86/kernel/cpu/perf_event.h | 605 ++ arch/x86/kernel/cpu/perf_event_amd.c | 686 +++ arch/x86/kernel/cpu/perf_event_amd_ibs.c | 301 + arch/x86/kernel/cpu/perf_event_intel.c | 1886 ++++++ arch/x86/kernel/cpu/perf_event_intel_ds.c | 725 +++ arch/x86/kernel/cpu/perf_event_intel_lbr.c | 704 +++ arch/x86/kernel/cpu/perf_event_p4.c | 1345 +++++ arch/x86/kernel/cpu/perf_event_p6.c | 162 + arch/x86/kernel/cpu/perfctr-watchdog.c | 156 + arch/x86/kernel/cpu/powerflags.c | 21 + arch/x86/kernel/cpu/proc.c | 167 + arch/x86/kernel/cpu/rdrand.c | 73 + arch/x86/kernel/cpu/scattered.c | 71 + arch/x86/kernel/cpu/sched.c | 55 + arch/x86/kernel/cpu/topology.c | 99 + arch/x86/kernel/cpu/transmeta.c | 109 + arch/x86/kernel/cpu/umc.c | 26 + arch/x86/kernel/cpu/vmware.c | 134 + arch/x86/kernel/cpuid.c | 240 + arch/x86/kernel/crash.c | 106 + arch/x86/kernel/crash_dump_32.c | 95 + arch/x86/kernel/crash_dump_64.c | 49 + arch/x86/kernel/devicetree.c | 388 ++ arch/x86/kernel/doublefault_32.c | 69 + arch/x86/kernel/dumpstack.c | 329 + arch/x86/kernel/dumpstack_32.c | 141 + arch/x86/kernel/dumpstack_64.c | 307 + arch/x86/kernel/e820.c | 1111 ++++ arch/x86/kernel/early-quirks.c | 292 + arch/x86/kernel/early_printk.c | 259 + arch/x86/kernel/entry_32.S | 1417 +++++ arch/x86/kernel/entry_64.S | 1757 ++++++ arch/x86/kernel/ftrace.c | 455 ++ arch/x86/kernel/head.c | 56 + arch/x86/kernel/head32.c | 68 + arch/x86/kernel/head64.c | 124 + arch/x86/kernel/head_32.S | 731 +++ arch/x86/kernel/head_64.S | 427 ++ arch/x86/kernel/hpet.c | 1209 ++++ arch/x86/kernel/hw_breakpoint.c | 524 ++ arch/x86/kernel/i386_ksyms_32.c | 38 + arch/x86/kernel/i387.c | 830 +++ arch/x86/kernel/i8237.c | 55 + arch/x86/kernel/i8253.c | 43 + arch/x86/kernel/i8259.c | 401 ++ arch/x86/kernel/init_task.c | 42 + arch/x86/kernel/io_delay.c | 131 + arch/x86/kernel/ioport.c | 113 + arch/x86/kernel/irq.c | 333 ++ arch/x86/kernel/irq_32.c | 201 + arch/x86/kernel/irq_64.c | 110 + arch/x86/kernel/irq_work.c | 30 + arch/x86/kernel/irqinit.c | 327 + arch/x86/kernel/jump_label.c | 59 + arch/x86/kernel/kdebugfs.c | 212 + arch/x86/kernel/kgdb.c | 813 +++ arch/x86/kernel/kprobes-common.h | 102 + arch/x86/kernel/kprobes-opt.c | 512 ++ arch/x86/kernel/kprobes.c | 1063 ++++ arch/x86/kernel/kvm.c | 445 ++ arch/x86/kernel/kvmclock.c | 220 + arch/x86/kernel/ldt.c | 267 + arch/x86/kernel/machine_kexec_32.c | 272 + arch/x86/kernel/machine_kexec_64.c | 356 ++ arch/x86/kernel/mca_32.c | 476 ++ arch/x86/kernel/microcode_amd.c | 395 ++ arch/x86/kernel/microcode_core.c | 605 ++ arch/x86/kernel/microcode_intel.c | 470 ++ arch/x86/kernel/mmconf-fam10h_64.c | 237 + arch/x86/kernel/module.c | 205 + arch/x86/kernel/mpparse.c | 921 +++ arch/x86/kernel/msr.c | 296 + arch/x86/kernel/nmi.c | 539 ++ arch/x86/kernel/nmi_selftest.c | 181 + arch/x86/kernel/paravirt-spinlocks.c | 28 + arch/x86/kernel/paravirt.c | 490 ++ arch/x86/kernel/paravirt_patch_32.c | 61 + arch/x86/kernel/paravirt_patch_64.c | 75 + arch/x86/kernel/pci-calgary_64.c | 1600 +++++ arch/x86/kernel/pci-dma.c | 273 + arch/x86/kernel/pci-iommu_table.c | 79 + arch/x86/kernel/pci-nommu.c | 106 + arch/x86/kernel/pci-swiotlb.c | 109 + arch/x86/kernel/pcspeaker.c | 13 + arch/x86/kernel/probe_roms.c | 268 + arch/x86/kernel/process.c | 745 +++ arch/x86/kernel/process_32.c | 336 ++ arch/x86/kernel/process_64.c | 556 ++ arch/x86/kernel/ptrace.c | 1524 +++++ arch/x86/kernel/pvclock.c | 158 + arch/x86/kernel/quirks.c | 569 ++ arch/x86/kernel/reboot.c | 851 +++ arch/x86/kernel/reboot_32.S | 135 + arch/x86/kernel/reboot_fixups_32.c | 102 + arch/x86/kernel/relocate_kernel_32.S | 277 + arch/x86/kernel/relocate_kernel_64.S | 268 + arch/x86/kernel/resource.c | 48 + arch/x86/kernel/rtc.c | 256 + arch/x86/kernel/setup.c | 1060 ++++ arch/x86/kernel/setup_percpu.c | 287 + arch/x86/kernel/signal.c | 959 +++ arch/x86/kernel/smp.c | 312 + arch/x86/kernel/smpboot.c | 1429 +++++ arch/x86/kernel/stacktrace.c | 146 + arch/x86/kernel/step.c | 216 + arch/x86/kernel/sys_i386_32.c | 40 + arch/x86/kernel/sys_x86_64.c | 283 + arch/x86/kernel/syscall_32.c | 25 + arch/x86/kernel/syscall_64.c | 33 + arch/x86/kernel/tboot.c | 454 ++ arch/x86/kernel/tce_64.c | 190 + arch/x86/kernel/test_nx.c | 175 + arch/x86/kernel/test_rodata.c | 86 + arch/x86/kernel/time.c | 101 + arch/x86/kernel/tls.c | 217 + arch/x86/kernel/tls.h | 21 + arch/x86/kernel/topology.c | 83 + arch/x86/kernel/trampoline.c | 42 + arch/x86/kernel/trampoline_32.S | 83 + arch/x86/kernel/trampoline_64.S | 171 + arch/x86/kernel/traps.c | 733 +++ arch/x86/kernel/tsc.c | 1026 ++++ arch/x86/kernel/tsc_sync.c | 217 + arch/x86/kernel/verify_cpu.S | 139 + arch/x86/kernel/vm86_32.c | 849 +++ arch/x86/kernel/vmlinux.lds.S | 376 ++ arch/x86/kernel/vsmp_64.c | 160 + arch/x86/kernel/vsyscall_64.c | 357 ++ arch/x86/kernel/vsyscall_emu_64.S | 37 + arch/x86/kernel/vsyscall_trace.h | 29 + arch/x86/kernel/x8664_ksyms_64.c | 60 + arch/x86/kernel/x86_init.c | 121 + arch/x86/kernel/xsave.c | 473 ++ arch/x86/kvm/Kconfig | 87 + arch/x86/kvm/Makefile | 21 + arch/x86/kvm/cpuid.c | 670 +++ arch/x86/kvm/cpuid.h | 54 + arch/x86/kvm/emulate.c | 4360 ++++++++++++++ arch/x86/kvm/i8254.c | 765 +++ arch/x86/kvm/i8254.h | 58 + arch/x86/kvm/i8259.c | 664 +++ arch/x86/kvm/irq.c | 96 + arch/x86/kvm/irq.h | 106 + arch/x86/kvm/kvm_cache_regs.h | 102 + arch/x86/kvm/kvm_timer.h | 18 + arch/x86/kvm/lapic.c | 1387 +++++ arch/x86/kvm/lapic.h | 63 + arch/x86/kvm/mmu.c | 4027 +++++++++++++ arch/x86/kvm/mmu.h | 104 + arch/x86/kvm/mmu_audit.c | 303 + arch/x86/kvm/mmutrace.h | 254 + arch/x86/kvm/paging_tmpl.h | 837 +++ arch/x86/kvm/pmu.c | 537 ++ arch/x86/kvm/svm.c | 4336 ++++++++++++++ arch/x86/kvm/timer.c | 47 + arch/x86/kvm/trace.h | 830 +++ arch/x86/kvm/tss.h | 59 + arch/x86/kvm/vmx.c | 7272 +++++++++++++++++++++++ arch/x86/kvm/x86.c | 6607 ++++++++++++++++++++ arch/x86/kvm/x86.h | 127 + arch/x86/lguest/Kconfig | 15 + arch/x86/lguest/Makefile | 2 + arch/x86/lguest/boot.c | 1450 +++++ arch/x86/lguest/i386_head.S | 196 + arch/x86/lib/Makefile | 47 + arch/x86/lib/atomic64_32.c | 4 + arch/x86/lib/atomic64_386_32.S | 194 + arch/x86/lib/atomic64_cx8_32.S | 215 + arch/x86/lib/cache-smp.c | 19 + arch/x86/lib/checksum_32.S | 525 ++ arch/x86/lib/clear_page_64.S | 73 + arch/x86/lib/cmpxchg.c | 54 + arch/x86/lib/cmpxchg16b_emu.S | 65 + arch/x86/lib/cmpxchg8b_emu.S | 57 + arch/x86/lib/copy_page_64.S | 112 + arch/x86/lib/copy_user_64.S | 302 + arch/x86/lib/copy_user_nocache_64.S | 137 + arch/x86/lib/csum-copy_64.S | 249 + arch/x86/lib/csum-partial_64.c | 148 + arch/x86/lib/csum-wrappers_64.c | 150 + arch/x86/lib/delay.c | 140 + arch/x86/lib/getuser.S | 104 + arch/x86/lib/inat.c | 97 + arch/x86/lib/insn.c | 576 ++ arch/x86/lib/iomap_copy_64.S | 30 + arch/x86/lib/memcpy_32.c | 208 + arch/x86/lib/memcpy_64.S | 206 + arch/x86/lib/memmove_64.S | 223 + arch/x86/lib/memset_64.S | 154 + arch/x86/lib/mmx_32.c | 377 ++ arch/x86/lib/msr-reg-export.c | 5 + arch/x86/lib/msr-reg.S | 102 + arch/x86/lib/msr-smp.c | 204 + arch/x86/lib/msr.c | 23 + arch/x86/lib/putuser.S | 97 + arch/x86/lib/rwlock.S | 44 + arch/x86/lib/rwsem.S | 136 + arch/x86/lib/string_32.c | 235 + arch/x86/lib/strstr_32.c | 31 + arch/x86/lib/thunk_32.S | 29 + arch/x86/lib/thunk_64.S | 45 + arch/x86/lib/usercopy.c | 146 + arch/x86/lib/usercopy_32.c | 804 +++ arch/x86/lib/usercopy_64.c | 134 + arch/x86/lib/x86-opcode-map.txt | 955 +++ arch/x86/math-emu/Makefile | 29 + arch/x86/math-emu/README | 427 ++ arch/x86/math-emu/control_w.h | 45 + arch/x86/math-emu/div_Xsig.S | 365 ++ arch/x86/math-emu/div_small.S | 47 + arch/x86/math-emu/errors.c | 689 +++ arch/x86/math-emu/exception.h | 50 + arch/x86/math-emu/fpu_arith.c | 152 + arch/x86/math-emu/fpu_asm.h | 31 + arch/x86/math-emu/fpu_aux.c | 196 + arch/x86/math-emu/fpu_emu.h | 217 + arch/x86/math-emu/fpu_entry.c | 768 +++ arch/x86/math-emu/fpu_etc.c | 130 + arch/x86/math-emu/fpu_proto.h | 144 + arch/x86/math-emu/fpu_system.h | 86 + arch/x86/math-emu/fpu_tags.c | 115 + arch/x86/math-emu/fpu_trig.c | 1643 +++++ arch/x86/math-emu/get_address.c | 399 ++ arch/x86/math-emu/load_store.c | 282 + arch/x86/math-emu/mul_Xsig.S | 176 + arch/x86/math-emu/poly.h | 114 + arch/x86/math-emu/poly_2xm1.c | 145 + arch/x86/math-emu/poly_atan.c | 208 + arch/x86/math-emu/poly_l2.c | 244 + arch/x86/math-emu/poly_sin.c | 378 ++ arch/x86/math-emu/poly_tan.c | 212 + arch/x86/math-emu/polynom_Xsig.S | 135 + arch/x86/math-emu/reg_add_sub.c | 333 ++ arch/x86/math-emu/reg_compare.c | 350 ++ arch/x86/math-emu/reg_constant.c | 117 + arch/x86/math-emu/reg_constant.h | 25 + arch/x86/math-emu/reg_convert.c | 46 + arch/x86/math-emu/reg_divide.c | 182 + arch/x86/math-emu/reg_ld_str.c | 1219 ++++ arch/x86/math-emu/reg_mul.c | 115 + arch/x86/math-emu/reg_norm.S | 147 + arch/x86/math-emu/reg_round.S | 708 +++ arch/x86/math-emu/reg_u_add.S | 167 + arch/x86/math-emu/reg_u_div.S | 471 ++ arch/x86/math-emu/reg_u_mul.S | 148 + arch/x86/math-emu/reg_u_sub.S | 272 + arch/x86/math-emu/round_Xsig.S | 141 + arch/x86/math-emu/shr_Xsig.S | 87 + arch/x86/math-emu/status_w.h | 67 + arch/x86/math-emu/version.h | 12 + arch/x86/math-emu/wm_shrx.S | 204 + arch/x86/math-emu/wm_sqrt.S | 470 ++ arch/x86/mm/Makefile | 30 + arch/x86/mm/amdtopology.c | 197 + arch/x86/mm/dump_pagetables.c | 375 ++ arch/x86/mm/extable.c | 37 + arch/x86/mm/fault.c | 1211 ++++ arch/x86/mm/gup.c | 393 ++ arch/x86/mm/highmem_32.c | 141 + arch/x86/mm/hugetlbpage.c | 443 ++ arch/x86/mm/init.c | 416 ++ arch/x86/mm/init_32.c | 959 +++ arch/x86/mm/init_64.c | 988 +++ arch/x86/mm/iomap_32.c | 119 + arch/x86/mm/ioremap.c | 628 ++ arch/x86/mm/kmemcheck/Makefile | 1 + arch/x86/mm/kmemcheck/error.c | 227 + arch/x86/mm/kmemcheck/error.h | 15 + arch/x86/mm/kmemcheck/kmemcheck.c | 653 ++ arch/x86/mm/kmemcheck/opcode.c | 106 + arch/x86/mm/kmemcheck/opcode.h | 9 + arch/x86/mm/kmemcheck/pte.c | 22 + arch/x86/mm/kmemcheck/pte.h | 10 + arch/x86/mm/kmemcheck/selftest.c | 70 + arch/x86/mm/kmemcheck/selftest.h | 6 + arch/x86/mm/kmemcheck/shadow.c | 173 + arch/x86/mm/kmemcheck/shadow.h | 18 + arch/x86/mm/kmmio.c | 590 ++ arch/x86/mm/memtest.c | 124 + arch/x86/mm/mmap.c | 124 + arch/x86/mm/mmio-mod.c | 480 ++ arch/x86/mm/numa.c | 834 +++ arch/x86/mm/numa_32.c | 265 + arch/x86/mm/numa_64.c | 25 + arch/x86/mm/numa_emulation.c | 498 ++ arch/x86/mm/numa_internal.h | 39 + arch/x86/mm/pageattr-test.c | 261 + arch/x86/mm/pageattr.c | 1377 +++++ arch/x86/mm/pat.c | 828 +++ arch/x86/mm/pat_internal.h | 46 + arch/x86/mm/pat_rbtree.c | 253 + arch/x86/mm/pf_in.c | 532 ++ arch/x86/mm/pf_in.h | 39 + arch/x86/mm/pgtable.c | 447 ++ arch/x86/mm/pgtable_32.c | 133 + arch/x86/mm/physaddr.c | 70 + arch/x86/mm/physaddr.h | 10 + arch/x86/mm/setup_nx.c | 60 + arch/x86/mm/srat.c | 193 + arch/x86/mm/testmmiotrace.c | 140 + arch/x86/mm/tlb.c | 332 ++ arch/x86/net/Makefile | 4 + arch/x86/net/bpf_jit.S | 200 + arch/x86/net/bpf_jit_comp.c | 665 +++ arch/x86/oprofile/Makefile | 11 + arch/x86/oprofile/backtrace.c | 127 + arch/x86/oprofile/init.c | 38 + arch/x86/oprofile/nmi_int.c | 791 +++ arch/x86/oprofile/op_counter.h | 30 + arch/x86/oprofile/op_model_amd.c | 543 ++ arch/x86/oprofile/op_model_p4.c | 723 +++ arch/x86/oprofile/op_model_ppro.c | 245 + arch/x86/oprofile/op_x86_model.h | 90 + arch/x86/pci/Makefile | 28 + arch/x86/pci/acpi.c | 489 ++ arch/x86/pci/amd_bus.c | 421 ++ arch/x86/pci/broadcom_bus.c | 120 + arch/x86/pci/bus_numa.c | 106 + arch/x86/pci/bus_numa.h | 25 + arch/x86/pci/ce4100.c | 318 + arch/x86/pci/common.c | 740 +++ arch/x86/pci/direct.c | 315 + arch/x86/pci/early.c | 111 + arch/x86/pci/fixup.c | 521 ++ arch/x86/pci/i386.c | 387 ++ arch/x86/pci/init.c | 44 + arch/x86/pci/irq.c | 1260 ++++ arch/x86/pci/legacy.c | 74 + arch/x86/pci/mmconfig-shared.c | 667 +++ arch/x86/pci/mmconfig_32.c | 134 + arch/x86/pci/mmconfig_64.c | 131 + arch/x86/pci/mrst.c | 303 + arch/x86/pci/numaq_32.c | 165 + arch/x86/pci/olpc.c | 315 + arch/x86/pci/pcbios.c | 455 ++ arch/x86/pci/visws.c | 92 + arch/x86/pci/xen.c | 573 ++ arch/x86/platform/Makefile | 11 + arch/x86/platform/ce4100/Makefile | 1 + arch/x86/platform/ce4100/ce4100.c | 146 + arch/x86/platform/ce4100/falconfalls.dts | 433 ++ arch/x86/platform/efi/Makefile | 1 + arch/x86/platform/efi/efi.c | 946 +++ arch/x86/platform/efi/efi_32.c | 69 + arch/x86/platform/efi/efi_64.c | 99 + arch/x86/platform/efi/efi_stub_32.S | 123 + arch/x86/platform/efi/efi_stub_64.S | 116 + arch/x86/platform/geode/Makefile | 3 + arch/x86/platform/geode/alix.c | 200 + arch/x86/platform/geode/geos.c | 128 + arch/x86/platform/geode/net5501.c | 154 + arch/x86/platform/iris/Makefile | 1 + arch/x86/platform/iris/iris.c | 91 + arch/x86/platform/mrst/Makefile | 3 + arch/x86/platform/mrst/early_printk_mrst.c | 327 + arch/x86/platform/mrst/mrst.c | 1053 ++++ arch/x86/platform/mrst/vrtc.c | 169 + arch/x86/platform/olpc/Makefile | 5 + arch/x86/platform/olpc/olpc-xo1-pm.c | 216 + arch/x86/platform/olpc/olpc-xo1-rtc.c | 81 + arch/x86/platform/olpc/olpc-xo1-sci.c | 614 ++ arch/x86/platform/olpc/olpc-xo15-sci.c | 238 + arch/x86/platform/olpc/olpc.c | 476 ++ arch/x86/platform/olpc/olpc_dt.c | 304 + arch/x86/platform/olpc/olpc_ofw.c | 117 + arch/x86/platform/olpc/xo1-wakeup.S | 124 + arch/x86/platform/scx200/Makefile | 2 + arch/x86/platform/scx200/scx200_32.c | 129 + arch/x86/platform/sfi/Makefile | 1 + arch/x86/platform/sfi/sfi.c | 109 + arch/x86/platform/uv/Makefile | 1 + arch/x86/platform/uv/bios_uv.c | 216 + arch/x86/platform/uv/tlb_uv.c | 2122 +++++++ arch/x86/platform/uv/uv_irq.c | 285 + arch/x86/platform/uv/uv_sysfs.c | 76 + arch/x86/platform/uv/uv_time.c | 425 ++ arch/x86/platform/visws/Makefile | 1 + arch/x86/platform/visws/visws_quirks.c | 608 ++ arch/x86/power/Makefile | 7 + arch/x86/power/cpu.c | 239 + arch/x86/power/hibernate_32.c | 176 + arch/x86/power/hibernate_64.c | 171 + arch/x86/power/hibernate_asm_32.S | 80 + arch/x86/power/hibernate_asm_64.S | 147 + arch/x86/syscalls/Makefile | 55 + arch/x86/syscalls/syscall_32.tbl | 357 ++ arch/x86/syscalls/syscall_64.tbl | 353 ++ arch/x86/syscalls/syscallhdr.sh | 27 + arch/x86/syscalls/syscalltbl.sh | 15 + arch/x86/tools/Makefile | 42 + arch/x86/tools/chkobjdump.awk | 33 + arch/x86/tools/distill.awk | 47 + arch/x86/tools/gen-insn-attr-x86.awk | 381 ++ arch/x86/tools/insn_sanity.c | 275 + arch/x86/tools/relocs.c | 818 +++ arch/x86/tools/test_get_len.c | 173 + arch/x86/um/Kconfig | 59 + arch/x86/um/Makefile | 46 + arch/x86/um/asm/apic.h | 4 + arch/x86/um/asm/arch_hweight.h | 6 + arch/x86/um/asm/archparam.h | 20 + arch/x86/um/asm/barrier.h | 75 + arch/x86/um/asm/checksum.h | 10 + arch/x86/um/asm/checksum_32.h | 201 + arch/x86/um/asm/checksum_64.h | 144 + arch/x86/um/asm/desc.h | 16 + arch/x86/um/asm/elf.h | 221 + arch/x86/um/asm/irq_vectors.h | 10 + arch/x86/um/asm/mm_context.h | 72 + arch/x86/um/asm/module.h | 23 + arch/x86/um/asm/processor.h | 32 + arch/x86/um/asm/processor_32.h | 56 + arch/x86/um/asm/processor_64.h | 35 + arch/x86/um/asm/ptrace.h | 5 + arch/x86/um/asm/ptrace_32.h | 51 + arch/x86/um/asm/ptrace_64.h | 72 + arch/x86/um/asm/required-features.h | 9 + arch/x86/um/asm/segment.h | 10 + arch/x86/um/asm/vm-flags.h | 25 + arch/x86/um/bug.c | 21 + arch/x86/um/bugs_32.c | 74 + arch/x86/um/bugs_64.c | 15 + arch/x86/um/checksum_32.S | 458 ++ arch/x86/um/delay.c | 60 + arch/x86/um/elfcore.c | 83 + arch/x86/um/fault.c | 28 + arch/x86/um/ksyms.c | 13 + arch/x86/um/ldt.c | 502 ++ arch/x86/um/mem_32.c | 54 + arch/x86/um/mem_64.c | 26 + arch/x86/um/os-Linux/Makefile | 13 + arch/x86/um/os-Linux/mcontext.c | 31 + arch/x86/um/os-Linux/prctl.c | 12 + arch/x86/um/os-Linux/registers.c | 113 + arch/x86/um/os-Linux/task_size.c | 150 + arch/x86/um/os-Linux/tls.c | 67 + arch/x86/um/ptrace_32.c | 273 + arch/x86/um/ptrace_64.c | 271 + arch/x86/um/ptrace_user.c | 21 + arch/x86/um/setjmp_32.S | 58 + arch/x86/um/setjmp_64.S | 54 + arch/x86/um/shared/sysdep/archsetjmp.h | 5 + arch/x86/um/shared/sysdep/archsetjmp_32.h | 22 + arch/x86/um/shared/sysdep/archsetjmp_64.h | 24 + arch/x86/um/shared/sysdep/faultinfo.h | 5 + arch/x86/um/shared/sysdep/faultinfo_32.h | 35 + arch/x86/um/shared/sysdep/faultinfo_64.h | 35 + arch/x86/um/shared/sysdep/kernel-offsets.h | 21 + arch/x86/um/shared/sysdep/mcontext.h | 31 + arch/x86/um/shared/sysdep/ptrace.h | 15 + arch/x86/um/shared/sysdep/ptrace_32.h | 114 + arch/x86/um/shared/sysdep/ptrace_64.h | 157 + arch/x86/um/shared/sysdep/ptrace_user.h | 27 + arch/x86/um/shared/sysdep/skas_ptrace.h | 22 + arch/x86/um/shared/sysdep/stub.h | 14 + arch/x86/um/shared/sysdep/stub_32.h | 93 + arch/x86/um/shared/sysdep/stub_64.h | 99 + arch/x86/um/shared/sysdep/syscalls.h | 5 + arch/x86/um/shared/sysdep/syscalls_32.h | 20 + arch/x86/um/shared/sysdep/syscalls_64.h | 32 + arch/x86/um/shared/sysdep/tls.h | 39 + arch/x86/um/signal.c | 624 ++ arch/x86/um/stub_32.S | 51 + arch/x86/um/stub_64.S | 66 + arch/x86/um/stub_segv.c | 19 + arch/x86/um/sys_call_table_32.c | 55 + arch/x86/um/sys_call_table_64.c | 60 + arch/x86/um/syscalls_32.c | 66 + arch/x86/um/syscalls_64.c | 102 + arch/x86/um/sysrq_32.c | 101 + arch/x86/um/sysrq_64.c | 41 + arch/x86/um/tls_32.c | 396 ++ arch/x86/um/tls_64.c | 17 + arch/x86/um/user-offsets.c | 97 + arch/x86/um/vdso/Makefile | 90 + arch/x86/um/vdso/checkundef.sh | 10 + arch/x86/um/vdso/um_vdso.c | 71 + arch/x86/um/vdso/vdso-layout.lds.S | 64 + arch/x86/um/vdso/vdso-note.S | 12 + arch/x86/um/vdso/vdso.S | 10 + arch/x86/um/vdso/vdso.lds.S | 32 + arch/x86/um/vdso/vma.c | 73 + arch/x86/vdso/Makefile | 199 + arch/x86/vdso/checkundef.sh | 10 + arch/x86/vdso/vclock_gettime.c | 217 + arch/x86/vdso/vdso-layout.lds.S | 64 + arch/x86/vdso/vdso-note.S | 12 + arch/x86/vdso/vdso.S | 22 + arch/x86/vdso/vdso.lds.S | 32 + arch/x86/vdso/vdso32-setup.c | 435 ++ arch/x86/vdso/vdso32.S | 22 + arch/x86/vdso/vdso32/int80.S | 56 + arch/x86/vdso/vdso32/note.S | 44 + arch/x86/vdso/vdso32/sigreturn.S | 144 + arch/x86/vdso/vdso32/syscall.S | 77 + arch/x86/vdso/vdso32/sysenter.S | 116 + arch/x86/vdso/vdso32/vdso32.lds.S | 37 + arch/x86/vdso/vdsox32.S | 22 + arch/x86/vdso/vdsox32.lds.S | 28 + arch/x86/vdso/vgetcpu.c | 35 + arch/x86/vdso/vma.c | 206 + arch/x86/video/Makefile | 1 + arch/x86/video/fbdev.c | 32 + arch/x86/xen/Kconfig | 52 + arch/x86/xen/Makefile | 24 + arch/x86/xen/debugfs.c | 125 + arch/x86/xen/debugfs.h | 10 + arch/x86/xen/enlighten.c | 1557 +++++ arch/x86/xen/grant-table.c | 127 + arch/x86/xen/irq.c | 133 + arch/x86/xen/mmu.c | 2371 ++++++++ arch/x86/xen/mmu.h | 26 + arch/x86/xen/multicalls.c | 208 + arch/x86/xen/multicalls.h | 68 + arch/x86/xen/p2m.c | 949 +++ arch/x86/xen/pci-swiotlb-xen.c | 67 + arch/x86/xen/platform-pci-unplug.c | 143 + arch/x86/xen/setup.c | 427 ++ arch/x86/xen/smp.c | 592 ++ arch/x86/xen/spinlock.c | 454 ++ arch/x86/xen/suspend.c | 80 + arch/x86/xen/time.c | 525 ++ arch/x86/xen/trace.c | 62 + arch/x86/xen/vdso.h | 4 + arch/x86/xen/vga.c | 67 + arch/x86/xen/xen-asm.S | 142 + arch/x86/xen/xen-asm.h | 12 + arch/x86/xen/xen-asm_32.S | 230 + arch/x86/xen/xen-asm_64.S | 159 + arch/x86/xen/xen-head.S | 55 + arch/x86/xen/xen-ops.h | 123 + 1045 files changed, 270479 insertions(+) create mode 100644 arch/x86/Kbuild create mode 100644 arch/x86/Kconfig create mode 100644 arch/x86/Kconfig.cpu create mode 100644 arch/x86/Kconfig.debug create mode 100644 arch/x86/Makefile create mode 100644 arch/x86/Makefile.um create mode 100644 arch/x86/Makefile_32.cpu create mode 100644 arch/x86/boot/Makefile create mode 100644 arch/x86/boot/a20.c create mode 100644 arch/x86/boot/apm.c create mode 100644 arch/x86/boot/bioscall.S create mode 100644 arch/x86/boot/bitops.h create mode 100644 arch/x86/boot/boot.h create mode 100644 arch/x86/boot/cmdline.c create mode 100644 arch/x86/boot/code16gcc.h create mode 100644 arch/x86/boot/compressed/Makefile create mode 100644 arch/x86/boot/compressed/cmdline.c create mode 100644 arch/x86/boot/compressed/early_serial_console.c create mode 100644 arch/x86/boot/compressed/eboot.c create mode 100644 arch/x86/boot/compressed/eboot.h create mode 100644 arch/x86/boot/compressed/efi_stub_32.S create mode 100644 arch/x86/boot/compressed/efi_stub_64.S create mode 100644 arch/x86/boot/compressed/head_32.S create mode 100644 arch/x86/boot/compressed/head_64.S create mode 100644 arch/x86/boot/compressed/misc.c create mode 100644 arch/x86/boot/compressed/misc.h create mode 100644 arch/x86/boot/compressed/mkpiggy.c create mode 100644 arch/x86/boot/compressed/string.c create mode 100644 arch/x86/boot/compressed/vmlinux.lds.S create mode 100644 arch/x86/boot/copy.S create mode 100644 arch/x86/boot/cpu.c create mode 100644 arch/x86/boot/cpucheck.c create mode 100644 arch/x86/boot/ctype.h create mode 100644 arch/x86/boot/early_serial_console.c create mode 100644 arch/x86/boot/edd.c create mode 100644 arch/x86/boot/header.S create mode 100644 arch/x86/boot/install.sh create mode 100644 arch/x86/boot/main.c create mode 100644 arch/x86/boot/mca.c create mode 100644 arch/x86/boot/memory.c create mode 100644 arch/x86/boot/mkcpustr.c create mode 100644 arch/x86/boot/mtools.conf.in create mode 100644 arch/x86/boot/pm.c create mode 100644 arch/x86/boot/pmjump.S create mode 100644 arch/x86/boot/printf.c create mode 100644 arch/x86/boot/regs.c create mode 100644 arch/x86/boot/setup.ld create mode 100644 arch/x86/boot/string.c create mode 100644 arch/x86/boot/tools/build.c create mode 100644 arch/x86/boot/tty.c create mode 100644 arch/x86/boot/version.c create mode 100644 arch/x86/boot/vesa.h create mode 100644 arch/x86/boot/video-bios.c create mode 100644 arch/x86/boot/video-mode.c create mode 100644 arch/x86/boot/video-vesa.c create mode 100644 arch/x86/boot/video-vga.c create mode 100644 arch/x86/boot/video.c create mode 100644 arch/x86/boot/video.h create mode 100644 arch/x86/configs/i386_defconfig create mode 100644 arch/x86/configs/x86_64_defconfig create mode 100644 arch/x86/crypto/Makefile create mode 100644 arch/x86/crypto/aes-i586-asm_32.S create mode 100644 arch/x86/crypto/aes-x86_64-asm_64.S create mode 100644 arch/x86/crypto/aes_glue.c create mode 100644 arch/x86/crypto/aesni-intel_asm.S create mode 100644 arch/x86/crypto/aesni-intel_glue.c create mode 100644 arch/x86/crypto/blowfish-x86_64-asm_64.S create mode 100644 arch/x86/crypto/blowfish_glue.c create mode 100644 arch/x86/crypto/camellia-x86_64-asm_64.S create mode 100644 arch/x86/crypto/camellia_glue.c create mode 100644 arch/x86/crypto/crc32c-intel.c create mode 100644 arch/x86/crypto/fpu.c create mode 100644 arch/x86/crypto/ghash-clmulni-intel_asm.S create mode 100644 arch/x86/crypto/ghash-clmulni-intel_glue.c create mode 100644 arch/x86/crypto/salsa20-i586-asm_32.S create mode 100644 arch/x86/crypto/salsa20-x86_64-asm_64.S create mode 100644 arch/x86/crypto/salsa20_glue.c create mode 100644 arch/x86/crypto/serpent-sse2-i586-asm_32.S create mode 100644 arch/x86/crypto/serpent-sse2-x86_64-asm_64.S create mode 100644 arch/x86/crypto/serpent_sse2_glue.c create mode 100644 arch/x86/crypto/sha1_ssse3_asm.S create mode 100644 arch/x86/crypto/sha1_ssse3_glue.c create mode 100644 arch/x86/crypto/twofish-i586-asm_32.S create mode 100644 arch/x86/crypto/twofish-x86_64-asm_64-3way.S create mode 100644 arch/x86/crypto/twofish-x86_64-asm_64.S create mode 100644 arch/x86/crypto/twofish_glue.c create mode 100644 arch/x86/crypto/twofish_glue_3way.c create mode 100644 arch/x86/ia32/Makefile create mode 100644 arch/x86/ia32/audit.c create mode 100644 arch/x86/ia32/ia32_aout.c create mode 100644 arch/x86/ia32/ia32_signal.c create mode 100644 arch/x86/ia32/ia32entry.S create mode 100644 arch/x86/ia32/ipc32.c create mode 100644 arch/x86/ia32/nosyscall.c create mode 100644 arch/x86/ia32/sys_ia32.c create mode 100644 arch/x86/ia32/syscall_ia32.c create mode 100644 arch/x86/include/asm/Kbuild create mode 100644 arch/x86/include/asm/a.out-core.h create mode 100644 arch/x86/include/asm/a.out.h create mode 100644 arch/x86/include/asm/acpi.h create mode 100644 arch/x86/include/asm/aes.h create mode 100644 arch/x86/include/asm/agp.h create mode 100644 arch/x86/include/asm/alternative-asm.h create mode 100644 arch/x86/include/asm/alternative.h create mode 100644 arch/x86/include/asm/amd_nb.h create mode 100644 arch/x86/include/asm/apb_timer.h create mode 100644 arch/x86/include/asm/apic.h create mode 100644 arch/x86/include/asm/apic_flat_64.h create mode 100644 arch/x86/include/asm/apicdef.h create mode 100644 arch/x86/include/asm/apm.h create mode 100644 arch/x86/include/asm/arch_hweight.h create mode 100644 arch/x86/include/asm/archrandom.h create mode 100644 arch/x86/include/asm/asm-offsets.h create mode 100644 arch/x86/include/asm/asm.h create mode 100644 arch/x86/include/asm/atomic.h create mode 100644 arch/x86/include/asm/atomic64_32.h create mode 100644 arch/x86/include/asm/atomic64_64.h create mode 100644 arch/x86/include/asm/auxvec.h create mode 100644 arch/x86/include/asm/barrier.h create mode 100644 arch/x86/include/asm/bios_ebda.h create mode 100644 arch/x86/include/asm/bitops.h create mode 100644 arch/x86/include/asm/bitsperlong.h create mode 100644 arch/x86/include/asm/boot.h create mode 100644 arch/x86/include/asm/bootparam.h create mode 100644 arch/x86/include/asm/bug.h create mode 100644 arch/x86/include/asm/bugs.h create mode 100644 arch/x86/include/asm/byteorder.h create mode 100644 arch/x86/include/asm/cache.h create mode 100644 arch/x86/include/asm/cacheflush.h create mode 100644 arch/x86/include/asm/calgary.h create mode 100644 arch/x86/include/asm/calling.h create mode 100644 arch/x86/include/asm/ce4100.h create mode 100644 arch/x86/include/asm/checksum.h create mode 100644 arch/x86/include/asm/checksum_32.h create mode 100644 arch/x86/include/asm/checksum_64.h create mode 100644 arch/x86/include/asm/clocksource.h create mode 100644 arch/x86/include/asm/cmpxchg.h create mode 100644 arch/x86/include/asm/cmpxchg_32.h create mode 100644 arch/x86/include/asm/cmpxchg_64.h create mode 100644 arch/x86/include/asm/compat.h create mode 100644 arch/x86/include/asm/cpu.h create mode 100644 arch/x86/include/asm/cpu_device_id.h create mode 100644 arch/x86/include/asm/cpufeature.h create mode 100644 arch/x86/include/asm/cpumask.h create mode 100644 arch/x86/include/asm/cputime.h create mode 100644 arch/x86/include/asm/current.h create mode 100644 arch/x86/include/asm/debugreg.h create mode 100644 arch/x86/include/asm/delay.h create mode 100644 arch/x86/include/asm/desc.h create mode 100644 arch/x86/include/asm/desc_defs.h create mode 100644 arch/x86/include/asm/device.h create mode 100644 arch/x86/include/asm/div64.h create mode 100644 arch/x86/include/asm/dma-mapping.h create mode 100644 arch/x86/include/asm/dma.h create mode 100644 arch/x86/include/asm/dmi.h create mode 100644 arch/x86/include/asm/dwarf2.h create mode 100644 arch/x86/include/asm/e820.h create mode 100644 arch/x86/include/asm/edac.h create mode 100644 arch/x86/include/asm/efi.h create mode 100644 arch/x86/include/asm/elf.h create mode 100644 arch/x86/include/asm/emergency-restart.h create mode 100644 arch/x86/include/asm/entry_arch.h create mode 100644 arch/x86/include/asm/errno.h create mode 100644 arch/x86/include/asm/exec.h create mode 100644 arch/x86/include/asm/fb.h create mode 100644 arch/x86/include/asm/fcntl.h create mode 100644 arch/x86/include/asm/fixmap.h create mode 100644 arch/x86/include/asm/floppy.h create mode 100644 arch/x86/include/asm/fpu-internal.h create mode 100644 arch/x86/include/asm/frame.h create mode 100644 arch/x86/include/asm/ftrace.h create mode 100644 arch/x86/include/asm/futex.h create mode 100644 arch/x86/include/asm/gart.h create mode 100644 arch/x86/include/asm/genapic.h create mode 100644 arch/x86/include/asm/geode.h create mode 100644 arch/x86/include/asm/gpio.h create mode 100644 arch/x86/include/asm/hardirq.h create mode 100644 arch/x86/include/asm/highmem.h create mode 100644 arch/x86/include/asm/hpet.h create mode 100644 arch/x86/include/asm/hugetlb.h create mode 100644 arch/x86/include/asm/hw_breakpoint.h create mode 100644 arch/x86/include/asm/hw_irq.h create mode 100644 arch/x86/include/asm/hypertransport.h create mode 100644 arch/x86/include/asm/hyperv.h create mode 100644 arch/x86/include/asm/hypervisor.h create mode 100644 arch/x86/include/asm/i387.h create mode 100644 arch/x86/include/asm/i8259.h create mode 100644 arch/x86/include/asm/ia32.h create mode 100644 arch/x86/include/asm/ia32_unistd.h create mode 100644 arch/x86/include/asm/idle.h create mode 100644 arch/x86/include/asm/inat.h create mode 100644 arch/x86/include/asm/inat_types.h create mode 100644 arch/x86/include/asm/init.h create mode 100644 arch/x86/include/asm/insn.h create mode 100644 arch/x86/include/asm/inst.h create mode 100644 arch/x86/include/asm/intel_scu_ipc.h create mode 100644 arch/x86/include/asm/io.h create mode 100644 arch/x86/include/asm/io_apic.h create mode 100644 arch/x86/include/asm/ioctl.h create mode 100644 arch/x86/include/asm/ioctls.h create mode 100644 arch/x86/include/asm/iomap.h create mode 100644 arch/x86/include/asm/iommu.h create mode 100644 arch/x86/include/asm/iommu_table.h create mode 100644 arch/x86/include/asm/ipcbuf.h create mode 100644 arch/x86/include/asm/ipi.h create mode 100644 arch/x86/include/asm/irq.h create mode 100644 arch/x86/include/asm/irq_regs.h create mode 100644 arch/x86/include/asm/irq_remapping.h create mode 100644 arch/x86/include/asm/irq_vectors.h create mode 100644 arch/x86/include/asm/irqflags.h create mode 100644 arch/x86/include/asm/ist.h create mode 100644 arch/x86/include/asm/jump_label.h create mode 100644 arch/x86/include/asm/kdebug.h create mode 100644 arch/x86/include/asm/kexec.h create mode 100644 arch/x86/include/asm/kgdb.h create mode 100644 arch/x86/include/asm/kmap_types.h create mode 100644 arch/x86/include/asm/kmemcheck.h create mode 100644 arch/x86/include/asm/kprobes.h create mode 100644 arch/x86/include/asm/kvm.h create mode 100644 arch/x86/include/asm/kvm_emulate.h create mode 100644 arch/x86/include/asm/kvm_host.h create mode 100644 arch/x86/include/asm/kvm_para.h create mode 100644 arch/x86/include/asm/ldt.h create mode 100644 arch/x86/include/asm/lguest.h create mode 100644 arch/x86/include/asm/lguest_hcall.h create mode 100644 arch/x86/include/asm/linkage.h create mode 100644 arch/x86/include/asm/local.h create mode 100644 arch/x86/include/asm/local64.h create mode 100644 arch/x86/include/asm/mach_timer.h create mode 100644 arch/x86/include/asm/mach_traps.h create mode 100644 arch/x86/include/asm/math_emu.h create mode 100644 arch/x86/include/asm/mc146818rtc.h create mode 100644 arch/x86/include/asm/mca.h create mode 100644 arch/x86/include/asm/mca_dma.h create mode 100644 arch/x86/include/asm/mce.h create mode 100644 arch/x86/include/asm/microcode.h create mode 100644 arch/x86/include/asm/mman.h create mode 100644 arch/x86/include/asm/mmconfig.h create mode 100644 arch/x86/include/asm/mmu.h create mode 100644 arch/x86/include/asm/mmu_context.h create mode 100644 arch/x86/include/asm/mmx.h create mode 100644 arch/x86/include/asm/mmzone.h create mode 100644 arch/x86/include/asm/mmzone_32.h create mode 100644 arch/x86/include/asm/mmzone_64.h create mode 100644 arch/x86/include/asm/module.h create mode 100644 arch/x86/include/asm/mpspec.h create mode 100644 arch/x86/include/asm/mpspec_def.h create mode 100644 arch/x86/include/asm/mrst-vrtc.h create mode 100644 arch/x86/include/asm/mrst.h create mode 100644 arch/x86/include/asm/msgbuf.h create mode 100644 arch/x86/include/asm/mshyperv.h create mode 100644 arch/x86/include/asm/msidef.h create mode 100644 arch/x86/include/asm/msr-index.h create mode 100644 arch/x86/include/asm/msr.h create mode 100644 arch/x86/include/asm/mtrr.h create mode 100644 arch/x86/include/asm/mutex.h create mode 100644 arch/x86/include/asm/mutex_32.h create mode 100644 arch/x86/include/asm/mutex_64.h create mode 100644 arch/x86/include/asm/mwait.h create mode 100644 arch/x86/include/asm/nmi.h create mode 100644 arch/x86/include/asm/nops.h create mode 100644 arch/x86/include/asm/numa.h create mode 100644 arch/x86/include/asm/numa_32.h create mode 100644 arch/x86/include/asm/numa_64.h create mode 100644 arch/x86/include/asm/numachip/numachip_csr.h create mode 100644 arch/x86/include/asm/numaq.h create mode 100644 arch/x86/include/asm/olpc.h create mode 100644 arch/x86/include/asm/olpc_ofw.h create mode 100644 arch/x86/include/asm/page.h create mode 100644 arch/x86/include/asm/page_32.h create mode 100644 arch/x86/include/asm/page_32_types.h create mode 100644 arch/x86/include/asm/page_64.h create mode 100644 arch/x86/include/asm/page_64_types.h create mode 100644 arch/x86/include/asm/page_types.h create mode 100644 arch/x86/include/asm/param.h create mode 100644 arch/x86/include/asm/paravirt.h create mode 100644 arch/x86/include/asm/paravirt_types.h create mode 100644 arch/x86/include/asm/parport.h create mode 100644 arch/x86/include/asm/pat.h create mode 100644 arch/x86/include/asm/pci-direct.h create mode 100644 arch/x86/include/asm/pci-functions.h create mode 100644 arch/x86/include/asm/pci.h create mode 100644 arch/x86/include/asm/pci_64.h create mode 100644 arch/x86/include/asm/pci_x86.h create mode 100644 arch/x86/include/asm/percpu.h create mode 100644 arch/x86/include/asm/perf_event.h create mode 100644 arch/x86/include/asm/perf_event_p4.h create mode 100644 arch/x86/include/asm/pgalloc.h create mode 100644 arch/x86/include/asm/pgtable-2level.h create mode 100644 arch/x86/include/asm/pgtable-2level_types.h create mode 100644 arch/x86/include/asm/pgtable-3level.h create mode 100644 arch/x86/include/asm/pgtable-3level_types.h create mode 100644 arch/x86/include/asm/pgtable.h create mode 100644 arch/x86/include/asm/pgtable_32.h create mode 100644 arch/x86/include/asm/pgtable_32_types.h create mode 100644 arch/x86/include/asm/pgtable_64.h create mode 100644 arch/x86/include/asm/pgtable_64_types.h create mode 100644 arch/x86/include/asm/pgtable_types.h create mode 100644 arch/x86/include/asm/poll.h create mode 100644 arch/x86/include/asm/posix_types.h create mode 100644 arch/x86/include/asm/posix_types_32.h create mode 100644 arch/x86/include/asm/posix_types_64.h create mode 100644 arch/x86/include/asm/posix_types_x32.h create mode 100644 arch/x86/include/asm/prctl.h create mode 100644 arch/x86/include/asm/probe_roms.h create mode 100644 arch/x86/include/asm/processor-cyrix.h create mode 100644 arch/x86/include/asm/processor-flags.h create mode 100644 arch/x86/include/asm/processor.h create mode 100644 arch/x86/include/asm/prom.h create mode 100644 arch/x86/include/asm/proto.h create mode 100644 arch/x86/include/asm/ptrace-abi.h create mode 100644 arch/x86/include/asm/ptrace.h create mode 100644 arch/x86/include/asm/pvclock-abi.h create mode 100644 arch/x86/include/asm/pvclock.h create mode 100644 arch/x86/include/asm/reboot.h create mode 100644 arch/x86/include/asm/reboot_fixups.h create mode 100644 arch/x86/include/asm/required-features.h create mode 100644 arch/x86/include/asm/resource.h create mode 100644 arch/x86/include/asm/resume-trace.h create mode 100644 arch/x86/include/asm/rio.h create mode 100644 arch/x86/include/asm/rtc.h create mode 100644 arch/x86/include/asm/rwlock.h create mode 100644 arch/x86/include/asm/rwsem.h create mode 100644 arch/x86/include/asm/scatterlist.h create mode 100644 arch/x86/include/asm/seccomp.h create mode 100644 arch/x86/include/asm/seccomp_32.h create mode 100644 arch/x86/include/asm/seccomp_64.h create mode 100644 arch/x86/include/asm/sections.h create mode 100644 arch/x86/include/asm/segment.h create mode 100644 arch/x86/include/asm/sembuf.h create mode 100644 arch/x86/include/asm/serial.h create mode 100644 arch/x86/include/asm/serpent.h create mode 100644 arch/x86/include/asm/setup.h create mode 100644 arch/x86/include/asm/setup_arch.h create mode 100644 arch/x86/include/asm/shmbuf.h create mode 100644 arch/x86/include/asm/shmparam.h create mode 100644 arch/x86/include/asm/sigcontext.h create mode 100644 arch/x86/include/asm/sigcontext32.h create mode 100644 arch/x86/include/asm/sigframe.h create mode 100644 arch/x86/include/asm/sighandling.h create mode 100644 arch/x86/include/asm/siginfo.h create mode 100644 arch/x86/include/asm/signal.h create mode 100644 arch/x86/include/asm/smp.h create mode 100644 arch/x86/include/asm/smpboot_hooks.h create mode 100644 arch/x86/include/asm/socket.h create mode 100644 arch/x86/include/asm/sockios.h create mode 100644 arch/x86/include/asm/sparsemem.h create mode 100644 arch/x86/include/asm/special_insns.h create mode 100644 arch/x86/include/asm/spinlock.h create mode 100644 arch/x86/include/asm/spinlock_types.h create mode 100644 arch/x86/include/asm/stackprotector.h create mode 100644 arch/x86/include/asm/stacktrace.h create mode 100644 arch/x86/include/asm/stat.h create mode 100644 arch/x86/include/asm/statfs.h create mode 100644 arch/x86/include/asm/string.h create mode 100644 arch/x86/include/asm/string_32.h create mode 100644 arch/x86/include/asm/string_64.h create mode 100644 arch/x86/include/asm/suspend.h create mode 100644 arch/x86/include/asm/suspend_32.h create mode 100644 arch/x86/include/asm/suspend_64.h create mode 100644 arch/x86/include/asm/svm.h create mode 100644 arch/x86/include/asm/swab.h create mode 100644 arch/x86/include/asm/swiotlb.h create mode 100644 arch/x86/include/asm/switch_to.h create mode 100644 arch/x86/include/asm/sync_bitops.h create mode 100644 arch/x86/include/asm/sys_ia32.h create mode 100644 arch/x86/include/asm/syscall.h create mode 100644 arch/x86/include/asm/syscalls.h create mode 100644 arch/x86/include/asm/tce.h create mode 100644 arch/x86/include/asm/termbits.h create mode 100644 arch/x86/include/asm/termios.h create mode 100644 arch/x86/include/asm/thread_info.h create mode 100644 arch/x86/include/asm/time.h create mode 100644 arch/x86/include/asm/timer.h create mode 100644 arch/x86/include/asm/timex.h create mode 100644 arch/x86/include/asm/tlb.h create mode 100644 arch/x86/include/asm/tlbflush.h create mode 100644 arch/x86/include/asm/topology.h create mode 100644 arch/x86/include/asm/trampoline.h create mode 100644 arch/x86/include/asm/traps.h create mode 100644 arch/x86/include/asm/tsc.h create mode 100644 arch/x86/include/asm/types.h create mode 100644 arch/x86/include/asm/uaccess.h create mode 100644 arch/x86/include/asm/uaccess_32.h create mode 100644 arch/x86/include/asm/uaccess_64.h create mode 100644 arch/x86/include/asm/ucontext.h create mode 100644 arch/x86/include/asm/unaligned.h create mode 100644 arch/x86/include/asm/unistd.h create mode 100644 arch/x86/include/asm/user.h create mode 100644 arch/x86/include/asm/user32.h create mode 100644 arch/x86/include/asm/user_32.h create mode 100644 arch/x86/include/asm/user_64.h create mode 100644 arch/x86/include/asm/uv/bios.h create mode 100644 arch/x86/include/asm/uv/uv.h create mode 100644 arch/x86/include/asm/uv/uv_bau.h create mode 100644 arch/x86/include/asm/uv/uv_hub.h create mode 100644 arch/x86/include/asm/uv/uv_irq.h create mode 100644 arch/x86/include/asm/uv/uv_mmrs.h create mode 100644 arch/x86/include/asm/vdso.h create mode 100644 arch/x86/include/asm/vga.h create mode 100644 arch/x86/include/asm/vgtod.h create mode 100644 arch/x86/include/asm/virtext.h create mode 100644 arch/x86/include/asm/visws/cobalt.h create mode 100644 arch/x86/include/asm/visws/lithium.h create mode 100644 arch/x86/include/asm/visws/piix4.h create mode 100644 arch/x86/include/asm/visws/sgivw.h create mode 100644 arch/x86/include/asm/vm86.h create mode 100644 arch/x86/include/asm/vmx.h create mode 100644 arch/x86/include/asm/vsyscall.h create mode 100644 arch/x86/include/asm/vvar.h create mode 100644 arch/x86/include/asm/word-at-a-time.h create mode 100644 arch/x86/include/asm/x2apic.h create mode 100644 arch/x86/include/asm/x86_init.h create mode 100644 arch/x86/include/asm/xcr.h create mode 100644 arch/x86/include/asm/xen/events.h create mode 100644 arch/x86/include/asm/xen/hypercall.h create mode 100644 arch/x86/include/asm/xen/hypervisor.h create mode 100644 arch/x86/include/asm/xen/interface.h create mode 100644 arch/x86/include/asm/xen/interface_32.h create mode 100644 arch/x86/include/asm/xen/interface_64.h create mode 100644 arch/x86/include/asm/xen/page.h create mode 100644 arch/x86/include/asm/xen/pci.h create mode 100644 arch/x86/include/asm/xen/swiotlb-xen.h create mode 100644 arch/x86/include/asm/xen/trace_types.h create mode 100644 arch/x86/include/asm/xor.h create mode 100644 arch/x86/include/asm/xor_32.h create mode 100644 arch/x86/include/asm/xor_64.h create mode 100644 arch/x86/include/asm/xsave.h create mode 100644 arch/x86/kernel/Makefile create mode 100644 arch/x86/kernel/acpi/Makefile create mode 100644 arch/x86/kernel/acpi/boot.c create mode 100644 arch/x86/kernel/acpi/cstate.c create mode 100644 arch/x86/kernel/acpi/realmode/Makefile create mode 100644 arch/x86/kernel/acpi/realmode/bioscall.S create mode 100644 arch/x86/kernel/acpi/realmode/copy.S create mode 100644 arch/x86/kernel/acpi/realmode/regs.c create mode 100644 arch/x86/kernel/acpi/realmode/video-bios.c create mode 100644 arch/x86/kernel/acpi/realmode/video-mode.c create mode 100644 arch/x86/kernel/acpi/realmode/video-vesa.c create mode 100644 arch/x86/kernel/acpi/realmode/video-vga.c create mode 100644 arch/x86/kernel/acpi/realmode/wakemain.c create mode 100644 arch/x86/kernel/acpi/realmode/wakeup.S create mode 100644 arch/x86/kernel/acpi/realmode/wakeup.h create mode 100644 arch/x86/kernel/acpi/realmode/wakeup.lds.S create mode 100644 arch/x86/kernel/acpi/sleep.c create mode 100644 arch/x86/kernel/acpi/sleep.h create mode 100644 arch/x86/kernel/acpi/wakeup_32.S create mode 100644 arch/x86/kernel/acpi/wakeup_64.S create mode 100644 arch/x86/kernel/acpi/wakeup_rm.S create mode 100644 arch/x86/kernel/alternative.c create mode 100644 arch/x86/kernel/amd_gart_64.c create mode 100644 arch/x86/kernel/amd_nb.c create mode 100644 arch/x86/kernel/apb_timer.c create mode 100644 arch/x86/kernel/aperture_64.c create mode 100644 arch/x86/kernel/apic/Makefile create mode 100644 arch/x86/kernel/apic/apic.c create mode 100644 arch/x86/kernel/apic/apic_flat_64.c create mode 100644 arch/x86/kernel/apic/apic_noop.c create mode 100644 arch/x86/kernel/apic/apic_numachip.c create mode 100644 arch/x86/kernel/apic/bigsmp_32.c create mode 100644 arch/x86/kernel/apic/es7000_32.c create mode 100644 arch/x86/kernel/apic/hw_nmi.c create mode 100644 arch/x86/kernel/apic/io_apic.c create mode 100644 arch/x86/kernel/apic/ipi.c create mode 100644 arch/x86/kernel/apic/numaq_32.c create mode 100644 arch/x86/kernel/apic/probe_32.c create mode 100644 arch/x86/kernel/apic/probe_64.c create mode 100644 arch/x86/kernel/apic/summit_32.c create mode 100644 arch/x86/kernel/apic/x2apic_cluster.c create mode 100644 arch/x86/kernel/apic/x2apic_phys.c create mode 100644 arch/x86/kernel/apic/x2apic_uv_x.c create mode 100644 arch/x86/kernel/apm_32.c create mode 100644 arch/x86/kernel/asm-offsets.c create mode 100644 arch/x86/kernel/asm-offsets_32.c create mode 100644 arch/x86/kernel/asm-offsets_64.c create mode 100644 arch/x86/kernel/audit_64.c create mode 100644 arch/x86/kernel/bootflag.c create mode 100644 arch/x86/kernel/check.c create mode 100644 arch/x86/kernel/cpu/Makefile create mode 100644 arch/x86/kernel/cpu/amd.c create mode 100644 arch/x86/kernel/cpu/bugs.c create mode 100644 arch/x86/kernel/cpu/bugs_64.c create mode 100644 arch/x86/kernel/cpu/centaur.c create mode 100644 arch/x86/kernel/cpu/common.c create mode 100644 arch/x86/kernel/cpu/cpu.h create mode 100644 arch/x86/kernel/cpu/cyrix.c create mode 100644 arch/x86/kernel/cpu/hypervisor.c create mode 100644 arch/x86/kernel/cpu/intel.c create mode 100644 arch/x86/kernel/cpu/intel_cacheinfo.c create mode 100644 arch/x86/kernel/cpu/match.c create mode 100644 arch/x86/kernel/cpu/mcheck/Makefile create mode 100644 arch/x86/kernel/cpu/mcheck/mce-apei.c create mode 100644 arch/x86/kernel/cpu/mcheck/mce-inject.c create mode 100644 arch/x86/kernel/cpu/mcheck/mce-internal.h create mode 100644 arch/x86/kernel/cpu/mcheck/mce-severity.c create mode 100644 arch/x86/kernel/cpu/mcheck/mce.c create mode 100644 arch/x86/kernel/cpu/mcheck/mce_amd.c create mode 100644 arch/x86/kernel/cpu/mcheck/mce_intel.c create mode 100644 arch/x86/kernel/cpu/mcheck/p5.c create mode 100644 arch/x86/kernel/cpu/mcheck/therm_throt.c create mode 100644 arch/x86/kernel/cpu/mcheck/threshold.c create mode 100644 arch/x86/kernel/cpu/mcheck/winchip.c create mode 100644 arch/x86/kernel/cpu/mkcapflags.pl create mode 100644 arch/x86/kernel/cpu/mshyperv.c create mode 100644 arch/x86/kernel/cpu/mtrr/Makefile create mode 100644 arch/x86/kernel/cpu/mtrr/amd.c create mode 100644 arch/x86/kernel/cpu/mtrr/centaur.c create mode 100644 arch/x86/kernel/cpu/mtrr/cleanup.c create mode 100644 arch/x86/kernel/cpu/mtrr/cyrix.c create mode 100644 arch/x86/kernel/cpu/mtrr/generic.c create mode 100644 arch/x86/kernel/cpu/mtrr/if.c create mode 100644 arch/x86/kernel/cpu/mtrr/main.c create mode 100644 arch/x86/kernel/cpu/mtrr/mtrr.h create mode 100644 arch/x86/kernel/cpu/perf_event.c create mode 100644 arch/x86/kernel/cpu/perf_event.h create mode 100644 arch/x86/kernel/cpu/perf_event_amd.c create mode 100644 arch/x86/kernel/cpu/perf_event_amd_ibs.c create mode 100644 arch/x86/kernel/cpu/perf_event_intel.c create mode 100644 arch/x86/kernel/cpu/perf_event_intel_ds.c create mode 100644 arch/x86/kernel/cpu/perf_event_intel_lbr.c create mode 100644 arch/x86/kernel/cpu/perf_event_p4.c create mode 100644 arch/x86/kernel/cpu/perf_event_p6.c create mode 100644 arch/x86/kernel/cpu/perfctr-watchdog.c create mode 100644 arch/x86/kernel/cpu/powerflags.c create mode 100644 arch/x86/kernel/cpu/proc.c create mode 100644 arch/x86/kernel/cpu/rdrand.c create mode 100644 arch/x86/kernel/cpu/scattered.c create mode 100644 arch/x86/kernel/cpu/sched.c create mode 100644 arch/x86/kernel/cpu/topology.c create mode 100644 arch/x86/kernel/cpu/transmeta.c create mode 100644 arch/x86/kernel/cpu/umc.c create mode 100644 arch/x86/kernel/cpu/vmware.c create mode 100644 arch/x86/kernel/cpuid.c create mode 100644 arch/x86/kernel/crash.c create mode 100644 arch/x86/kernel/crash_dump_32.c create mode 100644 arch/x86/kernel/crash_dump_64.c create mode 100644 arch/x86/kernel/devicetree.c create mode 100644 arch/x86/kernel/doublefault_32.c create mode 100644 arch/x86/kernel/dumpstack.c create mode 100644 arch/x86/kernel/dumpstack_32.c create mode 100644 arch/x86/kernel/dumpstack_64.c create mode 100644 arch/x86/kernel/e820.c create mode 100644 arch/x86/kernel/early-quirks.c create mode 100644 arch/x86/kernel/early_printk.c create mode 100644 arch/x86/kernel/entry_32.S create mode 100644 arch/x86/kernel/entry_64.S create mode 100644 arch/x86/kernel/ftrace.c create mode 100644 arch/x86/kernel/head.c create mode 100644 arch/x86/kernel/head32.c create mode 100644 arch/x86/kernel/head64.c create mode 100644 arch/x86/kernel/head_32.S create mode 100644 arch/x86/kernel/head_64.S create mode 100644 arch/x86/kernel/hpet.c create mode 100644 arch/x86/kernel/hw_breakpoint.c create mode 100644 arch/x86/kernel/i386_ksyms_32.c create mode 100644 arch/x86/kernel/i387.c create mode 100644 arch/x86/kernel/i8237.c create mode 100644 arch/x86/kernel/i8253.c create mode 100644 arch/x86/kernel/i8259.c create mode 100644 arch/x86/kernel/init_task.c create mode 100644 arch/x86/kernel/io_delay.c create mode 100644 arch/x86/kernel/ioport.c create mode 100644 arch/x86/kernel/irq.c create mode 100644 arch/x86/kernel/irq_32.c create mode 100644 arch/x86/kernel/irq_64.c create mode 100644 arch/x86/kernel/irq_work.c create mode 100644 arch/x86/kernel/irqinit.c create mode 100644 arch/x86/kernel/jump_label.c create mode 100644 arch/x86/kernel/kdebugfs.c create mode 100644 arch/x86/kernel/kgdb.c create mode 100644 arch/x86/kernel/kprobes-common.h create mode 100644 arch/x86/kernel/kprobes-opt.c create mode 100644 arch/x86/kernel/kprobes.c create mode 100644 arch/x86/kernel/kvm.c create mode 100644 arch/x86/kernel/kvmclock.c create mode 100644 arch/x86/kernel/ldt.c create mode 100644 arch/x86/kernel/machine_kexec_32.c create mode 100644 arch/x86/kernel/machine_kexec_64.c create mode 100644 arch/x86/kernel/mca_32.c create mode 100644 arch/x86/kernel/microcode_amd.c create mode 100644 arch/x86/kernel/microcode_core.c create mode 100644 arch/x86/kernel/microcode_intel.c create mode 100644 arch/x86/kernel/mmconf-fam10h_64.c create mode 100644 arch/x86/kernel/module.c create mode 100644 arch/x86/kernel/mpparse.c create mode 100644 arch/x86/kernel/msr.c create mode 100644 arch/x86/kernel/nmi.c create mode 100644 arch/x86/kernel/nmi_selftest.c create mode 100644 arch/x86/kernel/paravirt-spinlocks.c create mode 100644 arch/x86/kernel/paravirt.c create mode 100644 arch/x86/kernel/paravirt_patch_32.c create mode 100644 arch/x86/kernel/paravirt_patch_64.c create mode 100644 arch/x86/kernel/pci-calgary_64.c create mode 100644 arch/x86/kernel/pci-dma.c create mode 100644 arch/x86/kernel/pci-iommu_table.c create mode 100644 arch/x86/kernel/pci-nommu.c create mode 100644 arch/x86/kernel/pci-swiotlb.c create mode 100644 arch/x86/kernel/pcspeaker.c create mode 100644 arch/x86/kernel/probe_roms.c create mode 100644 arch/x86/kernel/process.c create mode 100644 arch/x86/kernel/process_32.c create mode 100644 arch/x86/kernel/process_64.c create mode 100644 arch/x86/kernel/ptrace.c create mode 100644 arch/x86/kernel/pvclock.c create mode 100644 arch/x86/kernel/quirks.c create mode 100644 arch/x86/kernel/reboot.c create mode 100644 arch/x86/kernel/reboot_32.S create mode 100644 arch/x86/kernel/reboot_fixups_32.c create mode 100644 arch/x86/kernel/relocate_kernel_32.S create mode 100644 arch/x86/kernel/relocate_kernel_64.S create mode 100644 arch/x86/kernel/resource.c create mode 100644 arch/x86/kernel/rtc.c create mode 100644 arch/x86/kernel/setup.c create mode 100644 arch/x86/kernel/setup_percpu.c create mode 100644 arch/x86/kernel/signal.c create mode 100644 arch/x86/kernel/smp.c create mode 100644 arch/x86/kernel/smpboot.c create mode 100644 arch/x86/kernel/stacktrace.c create mode 100644 arch/x86/kernel/step.c create mode 100644 arch/x86/kernel/sys_i386_32.c create mode 100644 arch/x86/kernel/sys_x86_64.c create mode 100644 arch/x86/kernel/syscall_32.c create mode 100644 arch/x86/kernel/syscall_64.c create mode 100644 arch/x86/kernel/tboot.c create mode 100644 arch/x86/kernel/tce_64.c create mode 100644 arch/x86/kernel/test_nx.c create mode 100644 arch/x86/kernel/test_rodata.c create mode 100644 arch/x86/kernel/time.c create mode 100644 arch/x86/kernel/tls.c create mode 100644 arch/x86/kernel/tls.h create mode 100644 arch/x86/kernel/topology.c create mode 100644 arch/x86/kernel/trampoline.c create mode 100644 arch/x86/kernel/trampoline_32.S create mode 100644 arch/x86/kernel/trampoline_64.S create mode 100644 arch/x86/kernel/traps.c create mode 100644 arch/x86/kernel/tsc.c create mode 100644 arch/x86/kernel/tsc_sync.c create mode 100644 arch/x86/kernel/verify_cpu.S create mode 100644 arch/x86/kernel/vm86_32.c create mode 100644 arch/x86/kernel/vmlinux.lds.S create mode 100644 arch/x86/kernel/vsmp_64.c create mode 100644 arch/x86/kernel/vsyscall_64.c create mode 100644 arch/x86/kernel/vsyscall_emu_64.S create mode 100644 arch/x86/kernel/vsyscall_trace.h create mode 100644 arch/x86/kernel/x8664_ksyms_64.c create mode 100644 arch/x86/kernel/x86_init.c create mode 100644 arch/x86/kernel/xsave.c create mode 100644 arch/x86/kvm/Kconfig create mode 100644 arch/x86/kvm/Makefile create mode 100644 arch/x86/kvm/cpuid.c create mode 100644 arch/x86/kvm/cpuid.h create mode 100644 arch/x86/kvm/emulate.c create mode 100644 arch/x86/kvm/i8254.c create mode 100644 arch/x86/kvm/i8254.h create mode 100644 arch/x86/kvm/i8259.c create mode 100644 arch/x86/kvm/irq.c create mode 100644 arch/x86/kvm/irq.h create mode 100644 arch/x86/kvm/kvm_cache_regs.h create mode 100644 arch/x86/kvm/kvm_timer.h create mode 100644 arch/x86/kvm/lapic.c create mode 100644 arch/x86/kvm/lapic.h create mode 100644 arch/x86/kvm/mmu.c create mode 100644 arch/x86/kvm/mmu.h create mode 100644 arch/x86/kvm/mmu_audit.c create mode 100644 arch/x86/kvm/mmutrace.h create mode 100644 arch/x86/kvm/paging_tmpl.h create mode 100644 arch/x86/kvm/pmu.c create mode 100644 arch/x86/kvm/svm.c create mode 100644 arch/x86/kvm/timer.c create mode 100644 arch/x86/kvm/trace.h create mode 100644 arch/x86/kvm/tss.h create mode 100644 arch/x86/kvm/vmx.c create mode 100644 arch/x86/kvm/x86.c create mode 100644 arch/x86/kvm/x86.h create mode 100644 arch/x86/lguest/Kconfig create mode 100644 arch/x86/lguest/Makefile create mode 100644 arch/x86/lguest/boot.c create mode 100644 arch/x86/lguest/i386_head.S create mode 100644 arch/x86/lib/Makefile create mode 100644 arch/x86/lib/atomic64_32.c create mode 100644 arch/x86/lib/atomic64_386_32.S create mode 100644 arch/x86/lib/atomic64_cx8_32.S create mode 100644 arch/x86/lib/cache-smp.c create mode 100644 arch/x86/lib/checksum_32.S create mode 100644 arch/x86/lib/clear_page_64.S create mode 100644 arch/x86/lib/cmpxchg.c create mode 100644 arch/x86/lib/cmpxchg16b_emu.S create mode 100644 arch/x86/lib/cmpxchg8b_emu.S create mode 100644 arch/x86/lib/copy_page_64.S create mode 100644 arch/x86/lib/copy_user_64.S create mode 100644 arch/x86/lib/copy_user_nocache_64.S create mode 100644 arch/x86/lib/csum-copy_64.S create mode 100644 arch/x86/lib/csum-partial_64.c create mode 100644 arch/x86/lib/csum-wrappers_64.c create mode 100644 arch/x86/lib/delay.c create mode 100644 arch/x86/lib/getuser.S create mode 100644 arch/x86/lib/inat.c create mode 100644 arch/x86/lib/insn.c create mode 100644 arch/x86/lib/iomap_copy_64.S create mode 100644 arch/x86/lib/memcpy_32.c create mode 100644 arch/x86/lib/memcpy_64.S create mode 100644 arch/x86/lib/memmove_64.S create mode 100644 arch/x86/lib/memset_64.S create mode 100644 arch/x86/lib/mmx_32.c create mode 100644 arch/x86/lib/msr-reg-export.c create mode 100644 arch/x86/lib/msr-reg.S create mode 100644 arch/x86/lib/msr-smp.c create mode 100644 arch/x86/lib/msr.c create mode 100644 arch/x86/lib/putuser.S create mode 100644 arch/x86/lib/rwlock.S create mode 100644 arch/x86/lib/rwsem.S create mode 100644 arch/x86/lib/string_32.c create mode 100644 arch/x86/lib/strstr_32.c create mode 100644 arch/x86/lib/thunk_32.S create mode 100644 arch/x86/lib/thunk_64.S create mode 100644 arch/x86/lib/usercopy.c create mode 100644 arch/x86/lib/usercopy_32.c create mode 100644 arch/x86/lib/usercopy_64.c create mode 100644 arch/x86/lib/x86-opcode-map.txt create mode 100644 arch/x86/math-emu/Makefile create mode 100644 arch/x86/math-emu/README create mode 100644 arch/x86/math-emu/control_w.h create mode 100644 arch/x86/math-emu/div_Xsig.S create mode 100644 arch/x86/math-emu/div_small.S create mode 100644 arch/x86/math-emu/errors.c create mode 100644 arch/x86/math-emu/exception.h create mode 100644 arch/x86/math-emu/fpu_arith.c create mode 100644 arch/x86/math-emu/fpu_asm.h create mode 100644 arch/x86/math-emu/fpu_aux.c create mode 100644 arch/x86/math-emu/fpu_emu.h create mode 100644 arch/x86/math-emu/fpu_entry.c create mode 100644 arch/x86/math-emu/fpu_etc.c create mode 100644 arch/x86/math-emu/fpu_proto.h create mode 100644 arch/x86/math-emu/fpu_system.h create mode 100644 arch/x86/math-emu/fpu_tags.c create mode 100644 arch/x86/math-emu/fpu_trig.c create mode 100644 arch/x86/math-emu/get_address.c create mode 100644 arch/x86/math-emu/load_store.c create mode 100644 arch/x86/math-emu/mul_Xsig.S create mode 100644 arch/x86/math-emu/poly.h create mode 100644 arch/x86/math-emu/poly_2xm1.c create mode 100644 arch/x86/math-emu/poly_atan.c create mode 100644 arch/x86/math-emu/poly_l2.c create mode 100644 arch/x86/math-emu/poly_sin.c create mode 100644 arch/x86/math-emu/poly_tan.c create mode 100644 arch/x86/math-emu/polynom_Xsig.S create mode 100644 arch/x86/math-emu/reg_add_sub.c create mode 100644 arch/x86/math-emu/reg_compare.c create mode 100644 arch/x86/math-emu/reg_constant.c create mode 100644 arch/x86/math-emu/reg_constant.h create mode 100644 arch/x86/math-emu/reg_convert.c create mode 100644 arch/x86/math-emu/reg_divide.c create mode 100644 arch/x86/math-emu/reg_ld_str.c create mode 100644 arch/x86/math-emu/reg_mul.c create mode 100644 arch/x86/math-emu/reg_norm.S create mode 100644 arch/x86/math-emu/reg_round.S create mode 100644 arch/x86/math-emu/reg_u_add.S create mode 100644 arch/x86/math-emu/reg_u_div.S create mode 100644 arch/x86/math-emu/reg_u_mul.S create mode 100644 arch/x86/math-emu/reg_u_sub.S create mode 100644 arch/x86/math-emu/round_Xsig.S create mode 100644 arch/x86/math-emu/shr_Xsig.S create mode 100644 arch/x86/math-emu/status_w.h create mode 100644 arch/x86/math-emu/version.h create mode 100644 arch/x86/math-emu/wm_shrx.S create mode 100644 arch/x86/math-emu/wm_sqrt.S create mode 100644 arch/x86/mm/Makefile create mode 100644 arch/x86/mm/amdtopology.c create mode 100644 arch/x86/mm/dump_pagetables.c create mode 100644 arch/x86/mm/extable.c create mode 100644 arch/x86/mm/fault.c create mode 100644 arch/x86/mm/gup.c create mode 100644 arch/x86/mm/highmem_32.c create mode 100644 arch/x86/mm/hugetlbpage.c create mode 100644 arch/x86/mm/init.c create mode 100644 arch/x86/mm/init_32.c create mode 100644 arch/x86/mm/init_64.c create mode 100644 arch/x86/mm/iomap_32.c create mode 100644 arch/x86/mm/ioremap.c create mode 100644 arch/x86/mm/kmemcheck/Makefile create mode 100644 arch/x86/mm/kmemcheck/error.c create mode 100644 arch/x86/mm/kmemcheck/error.h create mode 100644 arch/x86/mm/kmemcheck/kmemcheck.c create mode 100644 arch/x86/mm/kmemcheck/opcode.c create mode 100644 arch/x86/mm/kmemcheck/opcode.h create mode 100644 arch/x86/mm/kmemcheck/pte.c create mode 100644 arch/x86/mm/kmemcheck/pte.h create mode 100644 arch/x86/mm/kmemcheck/selftest.c create mode 100644 arch/x86/mm/kmemcheck/selftest.h create mode 100644 arch/x86/mm/kmemcheck/shadow.c create mode 100644 arch/x86/mm/kmemcheck/shadow.h create mode 100644 arch/x86/mm/kmmio.c create mode 100644 arch/x86/mm/memtest.c create mode 100644 arch/x86/mm/mmap.c create mode 100644 arch/x86/mm/mmio-mod.c create mode 100644 arch/x86/mm/numa.c create mode 100644 arch/x86/mm/numa_32.c create mode 100644 arch/x86/mm/numa_64.c create mode 100644 arch/x86/mm/numa_emulation.c create mode 100644 arch/x86/mm/numa_internal.h create mode 100644 arch/x86/mm/pageattr-test.c create mode 100644 arch/x86/mm/pageattr.c create mode 100644 arch/x86/mm/pat.c create mode 100644 arch/x86/mm/pat_internal.h create mode 100644 arch/x86/mm/pat_rbtree.c create mode 100644 arch/x86/mm/pf_in.c create mode 100644 arch/x86/mm/pf_in.h create mode 100644 arch/x86/mm/pgtable.c create mode 100644 arch/x86/mm/pgtable_32.c create mode 100644 arch/x86/mm/physaddr.c create mode 100644 arch/x86/mm/physaddr.h create mode 100644 arch/x86/mm/setup_nx.c create mode 100644 arch/x86/mm/srat.c create mode 100644 arch/x86/mm/testmmiotrace.c create mode 100644 arch/x86/mm/tlb.c create mode 100644 arch/x86/net/Makefile create mode 100644 arch/x86/net/bpf_jit.S create mode 100644 arch/x86/net/bpf_jit_comp.c create mode 100644 arch/x86/oprofile/Makefile create mode 100644 arch/x86/oprofile/backtrace.c create mode 100644 arch/x86/oprofile/init.c create mode 100644 arch/x86/oprofile/nmi_int.c create mode 100644 arch/x86/oprofile/op_counter.h create mode 100644 arch/x86/oprofile/op_model_amd.c create mode 100644 arch/x86/oprofile/op_model_p4.c create mode 100644 arch/x86/oprofile/op_model_ppro.c create mode 100644 arch/x86/oprofile/op_x86_model.h create mode 100644 arch/x86/pci/Makefile create mode 100644 arch/x86/pci/acpi.c create mode 100644 arch/x86/pci/amd_bus.c create mode 100644 arch/x86/pci/broadcom_bus.c create mode 100644 arch/x86/pci/bus_numa.c create mode 100644 arch/x86/pci/bus_numa.h create mode 100644 arch/x86/pci/ce4100.c create mode 100644 arch/x86/pci/common.c create mode 100644 arch/x86/pci/direct.c create mode 100644 arch/x86/pci/early.c create mode 100644 arch/x86/pci/fixup.c create mode 100644 arch/x86/pci/i386.c create mode 100644 arch/x86/pci/init.c create mode 100644 arch/x86/pci/irq.c create mode 100644 arch/x86/pci/legacy.c create mode 100644 arch/x86/pci/mmconfig-shared.c create mode 100644 arch/x86/pci/mmconfig_32.c create mode 100644 arch/x86/pci/mmconfig_64.c create mode 100644 arch/x86/pci/mrst.c create mode 100644 arch/x86/pci/numaq_32.c create mode 100644 arch/x86/pci/olpc.c create mode 100644 arch/x86/pci/pcbios.c create mode 100644 arch/x86/pci/visws.c create mode 100644 arch/x86/pci/xen.c create mode 100644 arch/x86/platform/Makefile create mode 100644 arch/x86/platform/ce4100/Makefile create mode 100644 arch/x86/platform/ce4100/ce4100.c create mode 100644 arch/x86/platform/ce4100/falconfalls.dts create mode 100644 arch/x86/platform/efi/Makefile create mode 100644 arch/x86/platform/efi/efi.c create mode 100644 arch/x86/platform/efi/efi_32.c create mode 100644 arch/x86/platform/efi/efi_64.c create mode 100644 arch/x86/platform/efi/efi_stub_32.S create mode 100644 arch/x86/platform/efi/efi_stub_64.S create mode 100644 arch/x86/platform/geode/Makefile create mode 100644 arch/x86/platform/geode/alix.c create mode 100644 arch/x86/platform/geode/geos.c create mode 100644 arch/x86/platform/geode/net5501.c create mode 100644 arch/x86/platform/iris/Makefile create mode 100644 arch/x86/platform/iris/iris.c create mode 100644 arch/x86/platform/mrst/Makefile create mode 100644 arch/x86/platform/mrst/early_printk_mrst.c create mode 100644 arch/x86/platform/mrst/mrst.c create mode 100644 arch/x86/platform/mrst/vrtc.c create mode 100644 arch/x86/platform/olpc/Makefile create mode 100644 arch/x86/platform/olpc/olpc-xo1-pm.c create mode 100644 arch/x86/platform/olpc/olpc-xo1-rtc.c create mode 100644 arch/x86/platform/olpc/olpc-xo1-sci.c create mode 100644 arch/x86/platform/olpc/olpc-xo15-sci.c create mode 100644 arch/x86/platform/olpc/olpc.c create mode 100644 arch/x86/platform/olpc/olpc_dt.c create mode 100644 arch/x86/platform/olpc/olpc_ofw.c create mode 100644 arch/x86/platform/olpc/xo1-wakeup.S create mode 100644 arch/x86/platform/scx200/Makefile create mode 100644 arch/x86/platform/scx200/scx200_32.c create mode 100644 arch/x86/platform/sfi/Makefile create mode 100644 arch/x86/platform/sfi/sfi.c create mode 100644 arch/x86/platform/uv/Makefile create mode 100644 arch/x86/platform/uv/bios_uv.c create mode 100644 arch/x86/platform/uv/tlb_uv.c create mode 100644 arch/x86/platform/uv/uv_irq.c create mode 100644 arch/x86/platform/uv/uv_sysfs.c create mode 100644 arch/x86/platform/uv/uv_time.c create mode 100644 arch/x86/platform/visws/Makefile create mode 100644 arch/x86/platform/visws/visws_quirks.c create mode 100644 arch/x86/power/Makefile create mode 100644 arch/x86/power/cpu.c create mode 100644 arch/x86/power/hibernate_32.c create mode 100644 arch/x86/power/hibernate_64.c create mode 100644 arch/x86/power/hibernate_asm_32.S create mode 100644 arch/x86/power/hibernate_asm_64.S create mode 100644 arch/x86/syscalls/Makefile create mode 100644 arch/x86/syscalls/syscall_32.tbl create mode 100644 arch/x86/syscalls/syscall_64.tbl create mode 100644 arch/x86/syscalls/syscallhdr.sh create mode 100644 arch/x86/syscalls/syscalltbl.sh create mode 100644 arch/x86/tools/Makefile create mode 100644 arch/x86/tools/chkobjdump.awk create mode 100644 arch/x86/tools/distill.awk create mode 100644 arch/x86/tools/gen-insn-attr-x86.awk create mode 100644 arch/x86/tools/insn_sanity.c create mode 100644 arch/x86/tools/relocs.c create mode 100644 arch/x86/tools/test_get_len.c create mode 100644 arch/x86/um/Kconfig create mode 100644 arch/x86/um/Makefile create mode 100644 arch/x86/um/asm/apic.h create mode 100644 arch/x86/um/asm/arch_hweight.h create mode 100644 arch/x86/um/asm/archparam.h create mode 100644 arch/x86/um/asm/barrier.h create mode 100644 arch/x86/um/asm/checksum.h create mode 100644 arch/x86/um/asm/checksum_32.h create mode 100644 arch/x86/um/asm/checksum_64.h create mode 100644 arch/x86/um/asm/desc.h create mode 100644 arch/x86/um/asm/elf.h create mode 100644 arch/x86/um/asm/irq_vectors.h create mode 100644 arch/x86/um/asm/mm_context.h create mode 100644 arch/x86/um/asm/module.h create mode 100644 arch/x86/um/asm/processor.h create mode 100644 arch/x86/um/asm/processor_32.h create mode 100644 arch/x86/um/asm/processor_64.h create mode 100644 arch/x86/um/asm/ptrace.h create mode 100644 arch/x86/um/asm/ptrace_32.h create mode 100644 arch/x86/um/asm/ptrace_64.h create mode 100644 arch/x86/um/asm/required-features.h create mode 100644 arch/x86/um/asm/segment.h create mode 100644 arch/x86/um/asm/vm-flags.h create mode 100644 arch/x86/um/bug.c create mode 100644 arch/x86/um/bugs_32.c create mode 100644 arch/x86/um/bugs_64.c create mode 100644 arch/x86/um/checksum_32.S create mode 100644 arch/x86/um/delay.c create mode 100644 arch/x86/um/elfcore.c create mode 100644 arch/x86/um/fault.c create mode 100644 arch/x86/um/ksyms.c create mode 100644 arch/x86/um/ldt.c create mode 100644 arch/x86/um/mem_32.c create mode 100644 arch/x86/um/mem_64.c create mode 100644 arch/x86/um/os-Linux/Makefile create mode 100644 arch/x86/um/os-Linux/mcontext.c create mode 100644 arch/x86/um/os-Linux/prctl.c create mode 100644 arch/x86/um/os-Linux/registers.c create mode 100644 arch/x86/um/os-Linux/task_size.c create mode 100644 arch/x86/um/os-Linux/tls.c create mode 100644 arch/x86/um/ptrace_32.c create mode 100644 arch/x86/um/ptrace_64.c create mode 100644 arch/x86/um/ptrace_user.c create mode 100644 arch/x86/um/setjmp_32.S create mode 100644 arch/x86/um/setjmp_64.S create mode 100644 arch/x86/um/shared/sysdep/archsetjmp.h create mode 100644 arch/x86/um/shared/sysdep/archsetjmp_32.h create mode 100644 arch/x86/um/shared/sysdep/archsetjmp_64.h create mode 100644 arch/x86/um/shared/sysdep/faultinfo.h create mode 100644 arch/x86/um/shared/sysdep/faultinfo_32.h create mode 100644 arch/x86/um/shared/sysdep/faultinfo_64.h create mode 100644 arch/x86/um/shared/sysdep/kernel-offsets.h create mode 100644 arch/x86/um/shared/sysdep/mcontext.h create mode 100644 arch/x86/um/shared/sysdep/ptrace.h create mode 100644 arch/x86/um/shared/sysdep/ptrace_32.h create mode 100644 arch/x86/um/shared/sysdep/ptrace_64.h create mode 100644 arch/x86/um/shared/sysdep/ptrace_user.h create mode 100644 arch/x86/um/shared/sysdep/skas_ptrace.h create mode 100644 arch/x86/um/shared/sysdep/stub.h create mode 100644 arch/x86/um/shared/sysdep/stub_32.h create mode 100644 arch/x86/um/shared/sysdep/stub_64.h create mode 100644 arch/x86/um/shared/sysdep/syscalls.h create mode 100644 arch/x86/um/shared/sysdep/syscalls_32.h create mode 100644 arch/x86/um/shared/sysdep/syscalls_64.h create mode 100644 arch/x86/um/shared/sysdep/tls.h create mode 100644 arch/x86/um/signal.c create mode 100644 arch/x86/um/stub_32.S create mode 100644 arch/x86/um/stub_64.S create mode 100644 arch/x86/um/stub_segv.c create mode 100644 arch/x86/um/sys_call_table_32.c create mode 100644 arch/x86/um/sys_call_table_64.c create mode 100644 arch/x86/um/syscalls_32.c create mode 100644 arch/x86/um/syscalls_64.c create mode 100644 arch/x86/um/sysrq_32.c create mode 100644 arch/x86/um/sysrq_64.c create mode 100644 arch/x86/um/tls_32.c create mode 100644 arch/x86/um/tls_64.c create mode 100644 arch/x86/um/user-offsets.c create mode 100644 arch/x86/um/vdso/Makefile create mode 100644 arch/x86/um/vdso/checkundef.sh create mode 100644 arch/x86/um/vdso/um_vdso.c create mode 100644 arch/x86/um/vdso/vdso-layout.lds.S create mode 100644 arch/x86/um/vdso/vdso-note.S create mode 100644 arch/x86/um/vdso/vdso.S create mode 100644 arch/x86/um/vdso/vdso.lds.S create mode 100644 arch/x86/um/vdso/vma.c create mode 100644 arch/x86/vdso/Makefile create mode 100755 arch/x86/vdso/checkundef.sh create mode 100644 arch/x86/vdso/vclock_gettime.c create mode 100644 arch/x86/vdso/vdso-layout.lds.S create mode 100644 arch/x86/vdso/vdso-note.S create mode 100644 arch/x86/vdso/vdso.S create mode 100644 arch/x86/vdso/vdso.lds.S create mode 100644 arch/x86/vdso/vdso32-setup.c create mode 100644 arch/x86/vdso/vdso32.S create mode 100644 arch/x86/vdso/vdso32/int80.S create mode 100644 arch/x86/vdso/vdso32/note.S create mode 100644 arch/x86/vdso/vdso32/sigreturn.S create mode 100644 arch/x86/vdso/vdso32/syscall.S create mode 100644 arch/x86/vdso/vdso32/sysenter.S create mode 100644 arch/x86/vdso/vdso32/vdso32.lds.S create mode 100644 arch/x86/vdso/vdsox32.S create mode 100644 arch/x86/vdso/vdsox32.lds.S create mode 100644 arch/x86/vdso/vgetcpu.c create mode 100644 arch/x86/vdso/vma.c create mode 100644 arch/x86/video/Makefile create mode 100644 arch/x86/video/fbdev.c create mode 100644 arch/x86/xen/Kconfig create mode 100644 arch/x86/xen/Makefile create mode 100644 arch/x86/xen/debugfs.c create mode 100644 arch/x86/xen/debugfs.h create mode 100644 arch/x86/xen/enlighten.c create mode 100644 arch/x86/xen/grant-table.c create mode 100644 arch/x86/xen/irq.c create mode 100644 arch/x86/xen/mmu.c create mode 100644 arch/x86/xen/mmu.h create mode 100644 arch/x86/xen/multicalls.c create mode 100644 arch/x86/xen/multicalls.h create mode 100644 arch/x86/xen/p2m.c create mode 100644 arch/x86/xen/pci-swiotlb-xen.c create mode 100644 arch/x86/xen/platform-pci-unplug.c create mode 100644 arch/x86/xen/setup.c create mode 100644 arch/x86/xen/smp.c create mode 100644 arch/x86/xen/spinlock.c create mode 100644 arch/x86/xen/suspend.c create mode 100644 arch/x86/xen/time.c create mode 100644 arch/x86/xen/trace.c create mode 100644 arch/x86/xen/vdso.h create mode 100644 arch/x86/xen/vga.c create mode 100644 arch/x86/xen/xen-asm.S create mode 100644 arch/x86/xen/xen-asm.h create mode 100644 arch/x86/xen/xen-asm_32.S create mode 100644 arch/x86/xen/xen-asm_64.S create mode 100644 arch/x86/xen/xen-head.S create mode 100644 arch/x86/xen/xen-ops.h (limited to 'arch/x86') diff --git a/arch/x86/Kbuild b/arch/x86/Kbuild new file mode 100644 index 00000000..0e9dec6c --- /dev/null +++ b/arch/x86/Kbuild @@ -0,0 +1,18 @@ + +obj-$(CONFIG_KVM) += kvm/ + +# Xen paravirtualization support +obj-$(CONFIG_XEN) += xen/ + +# lguest paravirtualization support +obj-$(CONFIG_LGUEST_GUEST) += lguest/ + +obj-y += kernel/ +obj-y += mm/ + +obj-y += crypto/ +obj-y += vdso/ +obj-$(CONFIG_IA32_EMULATION) += ia32/ + +obj-y += platform/ +obj-y += net/ diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig new file mode 100644 index 00000000..c9866b0b --- /dev/null +++ b/arch/x86/Kconfig @@ -0,0 +1,2234 @@ +# Select 32 or 64 bit +config 64BIT + bool "64-bit kernel" if ARCH = "x86" + default ARCH = "x86_64" + ---help--- + Say yes to build a 64-bit kernel - formerly known as x86_64 + Say no to build a 32-bit kernel - formerly known as i386 + +config X86_32 + def_bool !64BIT + select CLKSRC_I8253 + +config X86_64 + def_bool 64BIT + +### Arch settings +config X86 + def_bool y + select HAVE_AOUT if X86_32 + select HAVE_UNSTABLE_SCHED_CLOCK + select HAVE_IDE + select HAVE_OPROFILE + select HAVE_PCSPKR_PLATFORM + select HAVE_PERF_EVENTS + select HAVE_IRQ_WORK + select HAVE_IOREMAP_PROT + select HAVE_KPROBES + select HAVE_MEMBLOCK + select HAVE_MEMBLOCK_NODE_MAP + select ARCH_DISCARD_MEMBLOCK + select ARCH_WANT_OPTIONAL_GPIOLIB + select ARCH_WANT_FRAME_POINTERS + select HAVE_DMA_ATTRS + select HAVE_KRETPROBES + select HAVE_OPTPROBES + select HAVE_FTRACE_MCOUNT_RECORD + select HAVE_C_RECORDMCOUNT + select HAVE_DYNAMIC_FTRACE + select HAVE_FUNCTION_TRACER + select HAVE_FUNCTION_GRAPH_TRACER + select HAVE_FUNCTION_GRAPH_FP_TEST + select HAVE_FUNCTION_TRACE_MCOUNT_TEST + select HAVE_FTRACE_NMI_ENTER if DYNAMIC_FTRACE + select HAVE_SYSCALL_TRACEPOINTS + select HAVE_KVM + select HAVE_ARCH_KGDB + select HAVE_ARCH_TRACEHOOK + select HAVE_GENERIC_DMA_COHERENT if X86_32 + select HAVE_EFFICIENT_UNALIGNED_ACCESS + select USER_STACKTRACE_SUPPORT + select HAVE_REGS_AND_STACK_ACCESS_API + select HAVE_DMA_API_DEBUG + select HAVE_KERNEL_GZIP + select HAVE_KERNEL_BZIP2 + select HAVE_KERNEL_LZMA + select HAVE_KERNEL_XZ + select HAVE_KERNEL_LZO + select HAVE_HW_BREAKPOINT + select HAVE_MIXED_BREAKPOINTS_REGS + select PERF_EVENTS + select HAVE_PERF_EVENTS_NMI + select ANON_INODES + select HAVE_ALIGNED_STRUCT_PAGE if SLUB && !M386 + select HAVE_CMPXCHG_LOCAL if !M386 + select HAVE_CMPXCHG_DOUBLE + select HAVE_ARCH_KMEMCHECK + select HAVE_USER_RETURN_NOTIFIER + select ARCH_BINFMT_ELF_RANDOMIZE_PIE + select HAVE_ARCH_JUMP_LABEL + select HAVE_TEXT_POKE_SMP + select HAVE_GENERIC_HARDIRQS + select SPARSE_IRQ + select GENERIC_FIND_FIRST_BIT + select GENERIC_IRQ_PROBE + select GENERIC_PENDING_IRQ if SMP + select GENERIC_IRQ_SHOW + select GENERIC_CLOCKEVENTS_MIN_ADJUST + select IRQ_FORCED_THREADING + select USE_GENERIC_SMP_HELPERS if SMP + select HAVE_BPF_JIT if (X86_64 && NET) + select CLKEVT_I8253 + select ARCH_HAVE_NMI_SAFE_CMPXCHG + select GENERIC_IOMAP + select DCACHE_WORD_ACCESS + +config INSTRUCTION_DECODER + def_bool (KPROBES || PERF_EVENTS) + +config OUTPUT_FORMAT + string + default "elf32-i386" if X86_32 + default "elf64-x86-64" if X86_64 + +config ARCH_DEFCONFIG + string + default "arch/x86/configs/i386_defconfig" if X86_32 + default "arch/x86/configs/x86_64_defconfig" if X86_64 + +config GENERIC_CMOS_UPDATE + def_bool y + +config CLOCKSOURCE_WATCHDOG + def_bool y + +config GENERIC_CLOCKEVENTS + def_bool y + +config ARCH_CLOCKSOURCE_DATA + def_bool y + depends on X86_64 + +config GENERIC_CLOCKEVENTS_BROADCAST + def_bool y + depends on X86_64 || (X86_32 && X86_LOCAL_APIC) + +config LOCKDEP_SUPPORT + def_bool y + +config STACKTRACE_SUPPORT + def_bool y + +config HAVE_LATENCYTOP_SUPPORT + def_bool y + +config MMU + def_bool y + +config SBUS + bool + +config NEED_DMA_MAP_STATE + def_bool (X86_64 || INTEL_IOMMU || DMA_API_DEBUG) + +config NEED_SG_DMA_LENGTH + def_bool y + +config GENERIC_ISA_DMA + def_bool ISA_DMA_API + +config GENERIC_BUG + def_bool y + depends on BUG + select GENERIC_BUG_RELATIVE_POINTERS if X86_64 + +config GENERIC_BUG_RELATIVE_POINTERS + bool + +config GENERIC_HWEIGHT + def_bool y + +config GENERIC_GPIO + bool + +config ARCH_MAY_HAVE_PC_FDC + def_bool ISA_DMA_API + +config RWSEM_GENERIC_SPINLOCK + def_bool !X86_XADD + +config RWSEM_XCHGADD_ALGORITHM + def_bool X86_XADD + +config ARCH_HAS_CPU_IDLE_WAIT + def_bool y + +config GENERIC_CALIBRATE_DELAY + def_bool y + +config GENERIC_TIME_VSYSCALL + bool + default X86_64 + +config ARCH_HAS_CPU_RELAX + def_bool y + +config ARCH_HAS_DEFAULT_IDLE + def_bool y + +config ARCH_HAS_CACHE_LINE_SIZE + def_bool y + +config ARCH_HAS_CPU_AUTOPROBE + def_bool y + +config HAVE_SETUP_PER_CPU_AREA + def_bool y + +config NEED_PER_CPU_EMBED_FIRST_CHUNK + def_bool y + +config NEED_PER_CPU_PAGE_FIRST_CHUNK + def_bool y + +config ARCH_HIBERNATION_POSSIBLE + def_bool y + +config ARCH_SUSPEND_POSSIBLE + def_bool y + +config ZONE_DMA32 + bool + default X86_64 + +config AUDIT_ARCH + bool + default X86_64 + +config ARCH_SUPPORTS_OPTIMIZED_INLINING + def_bool y + +config ARCH_SUPPORTS_DEBUG_PAGEALLOC + def_bool y + +config HAVE_INTEL_TXT + def_bool y + depends on EXPERIMENTAL && INTEL_IOMMU && ACPI + +config X86_32_SMP + def_bool y + depends on X86_32 && SMP + +config X86_64_SMP + def_bool y + depends on X86_64 && SMP + +config X86_HT + def_bool y + depends on SMP + +config X86_32_LAZY_GS + def_bool y + depends on X86_32 && !CC_STACKPROTECTOR + +config ARCH_HWEIGHT_CFLAGS + string + default "-fcall-saved-ecx -fcall-saved-edx" if X86_32 + default "-fcall-saved-rdi -fcall-saved-rsi -fcall-saved-rdx -fcall-saved-rcx -fcall-saved-r8 -fcall-saved-r9 -fcall-saved-r10 -fcall-saved-r11" if X86_64 + +config KTIME_SCALAR + def_bool X86_32 + +config ARCH_CPU_PROBE_RELEASE + def_bool y + depends on HOTPLUG_CPU + +source "init/Kconfig" +source "kernel/Kconfig.freezer" + +menu "Processor type and features" + +config ZONE_DMA + bool "DMA memory allocation support" if EXPERT + default y + help + DMA memory allocation support allows devices with less than 32-bit + addressing to allocate within the first 16MB of address space. + Disable if no such devices will be used. + + If unsure, say Y. + +source "kernel/time/Kconfig" + +config SMP + bool "Symmetric multi-processing support" + ---help--- + This enables support for systems with more than one CPU. If you have + a system with only one CPU, like most personal computers, say N. If + you have a system with more than one CPU, say Y. + + If you say N here, the kernel will run on single and multiprocessor + machines, but will use only one CPU of a multiprocessor machine. If + you say Y here, the kernel will run on many, but not all, + singleprocessor machines. On a singleprocessor machine, the kernel + will run faster if you say N here. + + Note that if you say Y here and choose architecture "586" or + "Pentium" under "Processor family", the kernel will not work on 486 + architectures. Similarly, multiprocessor kernels for the "PPro" + architecture may not work on all Pentium based boards. + + People using multiprocessor machines who say Y here should also say + Y to "Enhanced Real Time Clock Support", below. The "Advanced Power + Management" code will be disabled if you say Y here. + + See also , + and the SMP-HOWTO available at + . + + If you don't know what to do here, say N. + +config X86_X2APIC + bool "Support x2apic" + depends on X86_LOCAL_APIC && X86_64 && IRQ_REMAP + ---help--- + This enables x2apic support on CPUs that have this feature. + + This allows 32-bit apic IDs (so it can support very large systems), + and accesses the local apic via MSRs not via mmio. + + If you don't know what to do here, say N. + +config X86_MPPARSE + bool "Enable MPS table" if ACPI + default y + depends on X86_LOCAL_APIC + ---help--- + For old smp systems that do not have proper acpi support. Newer systems + (esp with 64bit cpus) with acpi support, MADT and DSDT will override it + +config X86_BIGSMP + bool "Support for big SMP systems with more than 8 CPUs" + depends on X86_32 && SMP + ---help--- + This option is needed for the systems that have more than 8 CPUs + +if X86_32 +config X86_EXTENDED_PLATFORM + bool "Support for extended (non-PC) x86 platforms" + default y + ---help--- + If you disable this option then the kernel will only support + standard PC platforms. (which covers the vast majority of + systems out there.) + + If you enable this option then you'll be able to select support + for the following (non-PC) 32 bit x86 platforms: + AMD Elan + NUMAQ (IBM/Sequent) + RDC R-321x SoC + SGI 320/540 (Visual Workstation) + Summit/EXA (IBM x440) + Unisys ES7000 IA32 series + Moorestown MID devices + + If you have one of these systems, or if you want to build a + generic distribution kernel, say Y here - otherwise say N. +endif + +if X86_64 +config X86_EXTENDED_PLATFORM + bool "Support for extended (non-PC) x86 platforms" + default y + ---help--- + If you disable this option then the kernel will only support + standard PC platforms. (which covers the vast majority of + systems out there.) + + If you enable this option then you'll be able to select support + for the following (non-PC) 64 bit x86 platforms: + Numascale NumaChip + ScaleMP vSMP + SGI Ultraviolet + + If you have one of these systems, or if you want to build a + generic distribution kernel, say Y here - otherwise say N. +endif +# This is an alphabetically sorted list of 64 bit extended platforms +# Please maintain the alphabetic order if and when there are additions +config X86_NUMACHIP + bool "Numascale NumaChip" + depends on X86_64 + depends on X86_EXTENDED_PLATFORM + depends on NUMA + depends on SMP + depends on X86_X2APIC + ---help--- + Adds support for Numascale NumaChip large-SMP systems. Needed to + enable more than ~168 cores. + If you don't have one of these, you should say N here. + +config X86_VSMP + bool "ScaleMP vSMP" + select PARAVIRT_GUEST + select PARAVIRT + depends on X86_64 && PCI + depends on X86_EXTENDED_PLATFORM + ---help--- + Support for ScaleMP vSMP systems. Say 'Y' here if this kernel is + supposed to run on these EM64T-based machines. Only choose this option + if you have one of these machines. + +config X86_UV + bool "SGI Ultraviolet" + depends on X86_64 + depends on X86_EXTENDED_PLATFORM + depends on NUMA + depends on X86_X2APIC + ---help--- + This option is needed in order to support SGI Ultraviolet systems. + If you don't have one of these, you should say N here. + +# Following is an alphabetically sorted list of 32 bit extended platforms +# Please maintain the alphabetic order if and when there are additions + +config X86_INTEL_CE + bool "CE4100 TV platform" + depends on PCI + depends on PCI_GODIRECT + depends on X86_32 + depends on X86_EXTENDED_PLATFORM + select X86_REBOOTFIXUPS + select OF + select OF_EARLY_FLATTREE + select IRQ_DOMAIN + ---help--- + Select for the Intel CE media processor (CE4100) SOC. + This option compiles in support for the CE4100 SOC for settop + boxes and media devices. + +config X86_WANT_INTEL_MID + bool "Intel MID platform support" + depends on X86_32 + depends on X86_EXTENDED_PLATFORM + ---help--- + Select to build a kernel capable of supporting Intel MID platform + systems which do not have the PCI legacy interfaces (Moorestown, + Medfield). If you are building for a PC class system say N here. + +if X86_WANT_INTEL_MID + +config X86_INTEL_MID + bool + +config X86_MDFLD + bool "Medfield MID platform" + depends on PCI + depends on PCI_GOANY + depends on X86_IO_APIC + select X86_INTEL_MID + select SFI + select DW_APB_TIMER + select APB_TIMER + select I2C + select SPI + select INTEL_SCU_IPC + select X86_PLATFORM_DEVICES + select MFD_INTEL_MSIC + ---help--- + Medfield is Intel's Low Power Intel Architecture (LPIA) based Moblin + Internet Device(MID) platform. + Unlike standard x86 PCs, Medfield does not have many legacy devices + nor standard legacy replacement devices/features. e.g. Medfield does + not contain i8259, i8254, HPET, legacy BIOS, most of the io ports. + +endif + +config X86_RDC321X + bool "RDC R-321x SoC" + depends on X86_32 + depends on X86_EXTENDED_PLATFORM + select M486 + select X86_REBOOTFIXUPS + ---help--- + This option is needed for RDC R-321x system-on-chip, also known + as R-8610-(G). + If you don't have one of these chips, you should say N here. + +config X86_32_NON_STANDARD + bool "Support non-standard 32-bit SMP architectures" + depends on X86_32 && SMP + depends on X86_EXTENDED_PLATFORM + ---help--- + This option compiles in the NUMAQ, Summit, bigsmp, ES7000, default + subarchitectures. It is intended for a generic binary kernel. + if you select them all, kernel will probe it one by one. and will + fallback to default. + +# Alphabetically sorted list of Non standard 32 bit platforms + +config X86_NUMAQ + bool "NUMAQ (IBM/Sequent)" + depends on X86_32_NON_STANDARD + depends on PCI + select NUMA + select X86_MPPARSE + ---help--- + This option is used for getting Linux to run on a NUMAQ (IBM/Sequent) + NUMA multiquad box. This changes the way that processors are + bootstrapped, and uses Clustered Logical APIC addressing mode instead + of Flat Logical. You will need a new lynxer.elf file to flash your + firmware with - send email to . + +config X86_SUPPORTS_MEMORY_FAILURE + def_bool y + # MCE code calls memory_failure(): + depends on X86_MCE + # On 32-bit this adds too big of NODES_SHIFT and we run out of page flags: + depends on !X86_NUMAQ + # On 32-bit SPARSEMEM adds too big of SECTIONS_WIDTH: + depends on X86_64 || !SPARSEMEM + select ARCH_SUPPORTS_MEMORY_FAILURE + +config X86_VISWS + bool "SGI 320/540 (Visual Workstation)" + depends on X86_32 && PCI && X86_MPPARSE && PCI_GODIRECT + depends on X86_32_NON_STANDARD + ---help--- + The SGI Visual Workstation series is an IA32-based workstation + based on SGI systems chips with some legacy PC hardware attached. + + Say Y here to create a kernel to run on the SGI 320 or 540. + + A kernel compiled for the Visual Workstation will run on general + PCs as well. See for details. + +config X86_SUMMIT + bool "Summit/EXA (IBM x440)" + depends on X86_32_NON_STANDARD + ---help--- + This option is needed for IBM systems that use the Summit/EXA chipset. + In particular, it is needed for the x440. + +config X86_ES7000 + bool "Unisys ES7000 IA32 series" + depends on X86_32_NON_STANDARD && X86_BIGSMP + ---help--- + Support for Unisys ES7000 systems. Say 'Y' here if this kernel is + supposed to run on an IA32-based Unisys ES7000 system. + +config X86_32_IRIS + tristate "Eurobraille/Iris poweroff module" + depends on X86_32 + ---help--- + The Iris machines from EuroBraille do not have APM or ACPI support + to shut themselves down properly. A special I/O sequence is + needed to do so, which is what this module does at + kernel shutdown. + + This is only for Iris machines from EuroBraille. + + If unused, say N. + +config SCHED_OMIT_FRAME_POINTER + def_bool y + prompt "Single-depth WCHAN output" + depends on X86 + ---help--- + Calculate simpler /proc//wchan values. If this option + is disabled then wchan values will recurse back to the + caller function. This provides more accurate wchan values, + at the expense of slightly more scheduling overhead. + + If in doubt, say "Y". + +menuconfig PARAVIRT_GUEST + bool "Paravirtualized guest support" + ---help--- + Say Y here to get to see options related to running Linux under + various hypervisors. This option alone does not add any kernel code. + + If you say N, all options in this submenu will be skipped and disabled. + +if PARAVIRT_GUEST + +config PARAVIRT_TIME_ACCOUNTING + bool "Paravirtual steal time accounting" + select PARAVIRT + default n + ---help--- + Select this option to enable fine granularity task steal time + accounting. Time spent executing other tasks in parallel with + the current vCPU is discounted from the vCPU power. To account for + that, there can be a small performance impact. + + If in doubt, say N here. + +source "arch/x86/xen/Kconfig" + +config KVM_CLOCK + bool "KVM paravirtualized clock" + select PARAVIRT + select PARAVIRT_CLOCK + ---help--- + Turning on this option will allow you to run a paravirtualized clock + when running over the KVM hypervisor. Instead of relying on a PIT + (or probably other) emulation by the underlying device model, the host + provides the guest with timing infrastructure such as time of day, and + system time + +config KVM_GUEST + bool "KVM Guest support" + select PARAVIRT + ---help--- + This option enables various optimizations for running under the KVM + hypervisor. + +source "arch/x86/lguest/Kconfig" + +config PARAVIRT + bool "Enable paravirtualization code" + ---help--- + This changes the kernel so it can modify itself when it is run + under a hypervisor, potentially improving performance significantly + over full virtualization. However, when run without a hypervisor + the kernel is theoretically slower and slightly larger. + +config PARAVIRT_SPINLOCKS + bool "Paravirtualization layer for spinlocks" + depends on PARAVIRT && SMP && EXPERIMENTAL + ---help--- + Paravirtualized spinlocks allow a pvops backend to replace the + spinlock implementation with something virtualization-friendly + (for example, block the virtual CPU rather than spinning). + + Unfortunately the downside is an up to 5% performance hit on + native kernels, with various workloads. + + If you are unsure how to answer this question, answer N. + +config PARAVIRT_CLOCK + bool + +endif + +config PARAVIRT_DEBUG + bool "paravirt-ops debugging" + depends on PARAVIRT && DEBUG_KERNEL + ---help--- + Enable to debug paravirt_ops internals. Specifically, BUG if + a paravirt_op is missing when it is called. + +config NO_BOOTMEM + def_bool y + +config MEMTEST + bool "Memtest" + ---help--- + This option adds a kernel parameter 'memtest', which allows memtest + to be set. + memtest=0, mean disabled; -- default + memtest=1, mean do 1 test pattern; + ... + memtest=4, mean do 4 test patterns. + If you are unsure how to answer this question, answer N. + +config X86_SUMMIT_NUMA + def_bool y + depends on X86_32 && NUMA && X86_32_NON_STANDARD + +config X86_CYCLONE_TIMER + def_bool y + depends on X86_SUMMIT + +source "arch/x86/Kconfig.cpu" + +config HPET_TIMER + def_bool X86_64 + prompt "HPET Timer Support" if X86_32 + ---help--- + Use the IA-PC HPET (High Precision Event Timer) to manage + time in preference to the PIT and RTC, if a HPET is + present. + HPET is the next generation timer replacing legacy 8254s. + The HPET provides a stable time base on SMP + systems, unlike the TSC, but it is more expensive to access, + as it is off-chip. You can find the HPET spec at + . + + You can safely choose Y here. However, HPET will only be + activated if the platform and the BIOS support this feature. + Otherwise the 8254 will be used for timing services. + + Choose N to continue using the legacy 8254 timer. + +config HPET_EMULATE_RTC + def_bool y + depends on HPET_TIMER && (RTC=y || RTC=m || RTC_DRV_CMOS=m || RTC_DRV_CMOS=y) + +config APB_TIMER + def_bool y if X86_INTEL_MID + prompt "Intel MID APB Timer Support" if X86_INTEL_MID + select DW_APB_TIMER + depends on X86_INTEL_MID && SFI + help + APB timer is the replacement for 8254, HPET on X86 MID platforms. + The APBT provides a stable time base on SMP + systems, unlike the TSC, but it is more expensive to access, + as it is off-chip. APB timers are always running regardless of CPU + C states, they are used as per CPU clockevent device when possible. + +# Mark as expert because too many people got it wrong. +# The code disables itself when not needed. +config DMI + default y + bool "Enable DMI scanning" if EXPERT + ---help--- + Enabled scanning of DMI to identify machine quirks. Say Y + here unless you have verified that your setup is not + affected by entries in the DMI blacklist. Required by PNP + BIOS code. + +config GART_IOMMU + bool "GART IOMMU support" if EXPERT + default y + select SWIOTLB + depends on X86_64 && PCI && AMD_NB + ---help--- + Support for full DMA access of devices with 32bit memory access only + on systems with more than 3GB. This is usually needed for USB, + sound, many IDE/SATA chipsets and some other devices. + Provides a driver for the AMD Athlon64/Opteron/Turion/Sempron GART + based hardware IOMMU and a software bounce buffer based IOMMU used + on Intel systems and as fallback. + The code is only active when needed (enough memory and limited + device) unless CONFIG_IOMMU_DEBUG or iommu=force is specified + too. + +config CALGARY_IOMMU + bool "IBM Calgary IOMMU support" + select SWIOTLB + depends on X86_64 && PCI && EXPERIMENTAL + ---help--- + Support for hardware IOMMUs in IBM's xSeries x366 and x460 + systems. Needed to run systems with more than 3GB of memory + properly with 32-bit PCI devices that do not support DAC + (Double Address Cycle). Calgary also supports bus level + isolation, where all DMAs pass through the IOMMU. This + prevents them from going anywhere except their intended + destination. This catches hard-to-find kernel bugs and + mis-behaving drivers and devices that do not use the DMA-API + properly to set up their DMA buffers. The IOMMU can be + turned off at boot time with the iommu=off parameter. + Normally the kernel will make the right choice by itself. + If unsure, say Y. + +config CALGARY_IOMMU_ENABLED_BY_DEFAULT + def_bool y + prompt "Should Calgary be enabled by default?" + depends on CALGARY_IOMMU + ---help--- + Should Calgary be enabled by default? if you choose 'y', Calgary + will be used (if it exists). If you choose 'n', Calgary will not be + used even if it exists. If you choose 'n' and would like to use + Calgary anyway, pass 'iommu=calgary' on the kernel command line. + If unsure, say Y. + +# need this always selected by IOMMU for the VIA workaround +config SWIOTLB + def_bool y if X86_64 + ---help--- + Support for software bounce buffers used on x86-64 systems + which don't have a hardware IOMMU (e.g. the current generation + of Intel's x86-64 CPUs). Using this PCI devices which can only + access 32-bits of memory can be used on systems with more than + 3 GB of memory. If unsure, say Y. + +config IOMMU_HELPER + def_bool (CALGARY_IOMMU || GART_IOMMU || SWIOTLB || AMD_IOMMU) + +config MAXSMP + bool "Enable Maximum number of SMP Processors and NUMA Nodes" + depends on X86_64 && SMP && DEBUG_KERNEL && EXPERIMENTAL + select CPUMASK_OFFSTACK + ---help--- + Enable maximum number of CPUS and NUMA Nodes for this architecture. + If unsure, say N. + +config NR_CPUS + int "Maximum number of CPUs" if SMP && !MAXSMP + range 2 8 if SMP && X86_32 && !X86_BIGSMP + range 2 512 if SMP && !MAXSMP + default "1" if !SMP + default "4096" if MAXSMP + default "32" if SMP && (X86_NUMAQ || X86_SUMMIT || X86_BIGSMP || X86_ES7000) + default "8" if SMP + ---help--- + This allows you to specify the maximum number of CPUs which this + kernel will support. The maximum supported value is 512 and the + minimum value which makes sense is 2. + + This is purely to save memory - each supported CPU adds + approximately eight kilobytes to the kernel image. + +config SCHED_SMT + bool "SMT (Hyperthreading) scheduler support" + depends on X86_HT + ---help--- + SMT scheduler support improves the CPU scheduler's decision making + when dealing with Intel Pentium 4 chips with HyperThreading at a + cost of slightly increased overhead in some places. If unsure say + N here. + +config SCHED_MC + def_bool y + prompt "Multi-core scheduler support" + depends on X86_HT + ---help--- + Multi-core scheduler support improves the CPU scheduler's decision + making when dealing with multi-core CPU chips at a cost of slightly + increased overhead in some places. If unsure say N here. + +config IRQ_TIME_ACCOUNTING + bool "Fine granularity task level IRQ time accounting" + default n + ---help--- + Select this option to enable fine granularity task irq time + accounting. This is done by reading a timestamp on each + transitions between softirq and hardirq state, so there can be a + small performance impact. + + If in doubt, say N here. + +source "kernel/Kconfig.preempt" + +config X86_UP_APIC + bool "Local APIC support on uniprocessors" + depends on X86_32 && !SMP && !X86_32_NON_STANDARD + ---help--- + A local APIC (Advanced Programmable Interrupt Controller) is an + integrated interrupt controller in the CPU. If you have a single-CPU + system which has a processor with a local APIC, you can say Y here to + enable and use it. If you say Y here even though your machine doesn't + have a local APIC, then the kernel will still run with no slowdown at + all. The local APIC supports CPU-generated self-interrupts (timer, + performance counters), and the NMI watchdog which detects hard + lockups. + +config X86_UP_IOAPIC + bool "IO-APIC support on uniprocessors" + depends on X86_UP_APIC + ---help--- + An IO-APIC (I/O Advanced Programmable Interrupt Controller) is an + SMP-capable replacement for PC-style interrupt controllers. Most + SMP systems and many recent uniprocessor systems have one. + + If you have a single-CPU system with an IO-APIC, you can say Y here + to use it. If you say Y here even though your machine doesn't have + an IO-APIC, then the kernel will still run with no slowdown at all. + +config X86_LOCAL_APIC + def_bool y + depends on X86_64 || SMP || X86_32_NON_STANDARD || X86_UP_APIC + +config X86_IO_APIC + def_bool y + depends on X86_64 || SMP || X86_32_NON_STANDARD || X86_UP_IOAPIC + +config X86_VISWS_APIC + def_bool y + depends on X86_32 && X86_VISWS + +config X86_REROUTE_FOR_BROKEN_BOOT_IRQS + bool "Reroute for broken boot IRQs" + depends on X86_IO_APIC + ---help--- + This option enables a workaround that fixes a source of + spurious interrupts. This is recommended when threaded + interrupt handling is used on systems where the generation of + superfluous "boot interrupts" cannot be disabled. + + Some chipsets generate a legacy INTx "boot IRQ" when the IRQ + entry in the chipset's IO-APIC is masked (as, e.g. the RT + kernel does during interrupt handling). On chipsets where this + boot IRQ generation cannot be disabled, this workaround keeps + the original IRQ line masked so that only the equivalent "boot + IRQ" is delivered to the CPUs. The workaround also tells the + kernel to set up the IRQ handler on the boot IRQ line. In this + way only one interrupt is delivered to the kernel. Otherwise + the spurious second interrupt may cause the kernel to bring + down (vital) interrupt lines. + + Only affects "broken" chipsets. Interrupt sharing may be + increased on these systems. + +config X86_MCE + bool "Machine Check / overheating reporting" + ---help--- + Machine Check support allows the processor to notify the + kernel if it detects a problem (e.g. overheating, data corruption). + The action the kernel takes depends on the severity of the problem, + ranging from warning messages to halting the machine. + +config X86_MCE_INTEL + def_bool y + prompt "Intel MCE features" + depends on X86_MCE && X86_LOCAL_APIC + ---help--- + Additional support for intel specific MCE features such as + the thermal monitor. + +config X86_MCE_AMD + def_bool y + prompt "AMD MCE features" + depends on X86_MCE && X86_LOCAL_APIC + ---help--- + Additional support for AMD specific MCE features such as + the DRAM Error Threshold. + +config X86_ANCIENT_MCE + bool "Support for old Pentium 5 / WinChip machine checks" + depends on X86_32 && X86_MCE + ---help--- + Include support for machine check handling on old Pentium 5 or WinChip + systems. These typically need to be enabled explicitely on the command + line. + +config X86_MCE_THRESHOLD + depends on X86_MCE_AMD || X86_MCE_INTEL + def_bool y + +config X86_MCE_INJECT + depends on X86_MCE + tristate "Machine check injector support" + ---help--- + Provide support for injecting machine checks for testing purposes. + If you don't know what a machine check is and you don't do kernel + QA it is safe to say n. + +config X86_THERMAL_VECTOR + def_bool y + depends on X86_MCE_INTEL + +config VM86 + bool "Enable VM86 support" if EXPERT + default y + depends on X86_32 + ---help--- + This option is required by programs like DOSEMU to run 16-bit legacy + code on X86 processors. It also may be needed by software like + XFree86 to initialize some video cards via BIOS. Disabling this + option saves about 6k. + +config TOSHIBA + tristate "Toshiba Laptop support" + depends on X86_32 + ---help--- + This adds a driver to safely access the System Management Mode of + the CPU on Toshiba portables with a genuine Toshiba BIOS. It does + not work on models with a Phoenix BIOS. The System Management Mode + is used to set the BIOS and power saving options on Toshiba portables. + + For information on utilities to make use of this driver see the + Toshiba Linux utilities web site at: + . + + Say Y if you intend to run this kernel on a Toshiba portable. + Say N otherwise. + +config I8K + tristate "Dell laptop support" + select HWMON + ---help--- + This adds a driver to safely access the System Management Mode + of the CPU on the Dell Inspiron 8000. The System Management Mode + is used to read cpu temperature and cooling fan status and to + control the fans on the I8K portables. + + This driver has been tested only on the Inspiron 8000 but it may + also work with other Dell laptops. You can force loading on other + models by passing the parameter `force=1' to the module. Use at + your own risk. + + For information on utilities to make use of this driver see the + I8K Linux utilities web site at: + + + Say Y if you intend to run this kernel on a Dell Inspiron 8000. + Say N otherwise. + +config X86_REBOOTFIXUPS + bool "Enable X86 board specific fixups for reboot" + depends on X86_32 + ---help--- + This enables chipset and/or board specific fixups to be done + in order to get reboot to work correctly. This is only needed on + some combinations of hardware and BIOS. The symptom, for which + this config is intended, is when reboot ends with a stalled/hung + system. + + Currently, the only fixup is for the Geode machines using + CS5530A and CS5536 chipsets and the RDC R-321x SoC. + + Say Y if you want to enable the fixup. Currently, it's safe to + enable this option even if you don't need it. + Say N otherwise. + +config MICROCODE + tristate "/dev/cpu/microcode - microcode support" + select FW_LOADER + ---help--- + If you say Y here, you will be able to update the microcode on + certain Intel and AMD processors. The Intel support is for the + IA32 family, e.g. Pentium Pro, Pentium II, Pentium III, + Pentium 4, Xeon etc. The AMD support is for family 0x10 and + 0x11 processors, e.g. Opteron, Phenom and Turion 64 Ultra. + You will obviously need the actual microcode binary data itself + which is not shipped with the Linux kernel. + + This option selects the general module only, you need to select + at least one vendor specific module as well. + + To compile this driver as a module, choose M here: the + module will be called microcode. + +config MICROCODE_INTEL + bool "Intel microcode patch loading support" + depends on MICROCODE + default MICROCODE + select FW_LOADER + ---help--- + This options enables microcode patch loading support for Intel + processors. + + For latest news and information on obtaining all the required + Intel ingredients for this driver, check: + . + +config MICROCODE_AMD + bool "AMD microcode patch loading support" + depends on MICROCODE + select FW_LOADER + ---help--- + If you select this option, microcode patch loading support for AMD + processors will be enabled. + +config MICROCODE_OLD_INTERFACE + def_bool y + depends on MICROCODE + +config X86_MSR + tristate "/dev/cpu/*/msr - Model-specific register support" + ---help--- + This device gives privileged processes access to the x86 + Model-Specific Registers (MSRs). It is a character device with + major 202 and minors 0 to 31 for /dev/cpu/0/msr to /dev/cpu/31/msr. + MSR accesses are directed to a specific CPU on multi-processor + systems. + +config X86_CPUID + tristate "/dev/cpu/*/cpuid - CPU information support" + ---help--- + This device gives processes access to the x86 CPUID instruction to + be executed on a specific processor. It is a character device + with major 203 and minors 0 to 31 for /dev/cpu/0/cpuid to + /dev/cpu/31/cpuid. + +choice + prompt "High Memory Support" + default HIGHMEM64G if X86_NUMAQ + default HIGHMEM4G + depends on X86_32 + +config NOHIGHMEM + bool "off" + depends on !X86_NUMAQ + ---help--- + Linux can use up to 64 Gigabytes of physical memory on x86 systems. + However, the address space of 32-bit x86 processors is only 4 + Gigabytes large. That means that, if you have a large amount of + physical memory, not all of it can be "permanently mapped" by the + kernel. The physical memory that's not permanently mapped is called + "high memory". + + If you are compiling a kernel which will never run on a machine with + more than 1 Gigabyte total physical RAM, answer "off" here (default + choice and suitable for most users). This will result in a "3GB/1GB" + split: 3GB are mapped so that each process sees a 3GB virtual memory + space and the remaining part of the 4GB virtual memory space is used + by the kernel to permanently map as much physical memory as + possible. + + If the machine has between 1 and 4 Gigabytes physical RAM, then + answer "4GB" here. + + If more than 4 Gigabytes is used then answer "64GB" here. This + selection turns Intel PAE (Physical Address Extension) mode on. + PAE implements 3-level paging on IA32 processors. PAE is fully + supported by Linux, PAE mode is implemented on all recent Intel + processors (Pentium Pro and better). NOTE: If you say "64GB" here, + then the kernel will not boot on CPUs that don't support PAE! + + The actual amount of total physical memory will either be + auto detected or can be forced by using a kernel command line option + such as "mem=256M". (Try "man bootparam" or see the documentation of + your boot loader (lilo or loadlin) about how to pass options to the + kernel at boot time.) + + If unsure, say "off". + +config HIGHMEM4G + bool "4GB" + depends on !X86_NUMAQ + ---help--- + Select this if you have a 32-bit processor and between 1 and 4 + gigabytes of physical RAM. + +config HIGHMEM64G + bool "64GB" + depends on !M386 && !M486 + select X86_PAE + ---help--- + Select this if you have a 32-bit processor and more than 4 + gigabytes of physical RAM. + +endchoice + +choice + depends on EXPERIMENTAL + prompt "Memory split" if EXPERT + default VMSPLIT_3G + depends on X86_32 + ---help--- + Select the desired split between kernel and user memory. + + If the address range available to the kernel is less than the + physical memory installed, the remaining memory will be available + as "high memory". Accessing high memory is a little more costly + than low memory, as it needs to be mapped into the kernel first. + Note that increasing the kernel address space limits the range + available to user programs, making the address space there + tighter. Selecting anything other than the default 3G/1G split + will also likely make your kernel incompatible with binary-only + kernel modules. + + If you are not absolutely sure what you are doing, leave this + option alone! + + config VMSPLIT_3G + bool "3G/1G user/kernel split" + config VMSPLIT_3G_OPT + depends on !X86_PAE + bool "3G/1G user/kernel split (for full 1G low memory)" + config VMSPLIT_2G + bool "2G/2G user/kernel split" + config VMSPLIT_2G_OPT + depends on !X86_PAE + bool "2G/2G user/kernel split (for full 2G low memory)" + config VMSPLIT_1G + bool "1G/3G user/kernel split" +endchoice + +config PAGE_OFFSET + hex + default 0xB0000000 if VMSPLIT_3G_OPT + default 0x80000000 if VMSPLIT_2G + default 0x78000000 if VMSPLIT_2G_OPT + default 0x40000000 if VMSPLIT_1G + default 0xC0000000 + depends on X86_32 + +config HIGHMEM + def_bool y + depends on X86_32 && (HIGHMEM64G || HIGHMEM4G) + +config X86_PAE + bool "PAE (Physical Address Extension) Support" + depends on X86_32 && !HIGHMEM4G + ---help--- + PAE is required for NX support, and furthermore enables + larger swapspace support for non-overcommit purposes. It + has the cost of more pagetable lookup overhead, and also + consumes more pagetable space per process. + +config ARCH_PHYS_ADDR_T_64BIT + def_bool X86_64 || X86_PAE + +config ARCH_DMA_ADDR_T_64BIT + def_bool X86_64 || HIGHMEM64G + +config DIRECT_GBPAGES + bool "Enable 1GB pages for kernel pagetables" if EXPERT + default y + depends on X86_64 + ---help--- + Allow the kernel linear mapping to use 1GB pages on CPUs that + support it. This can improve the kernel's performance a tiny bit by + reducing TLB pressure. If in doubt, say "Y". + +# Common NUMA Features +config NUMA + bool "Numa Memory Allocation and Scheduler Support" + depends on SMP + depends on X86_64 || (X86_32 && HIGHMEM64G && (X86_NUMAQ || X86_BIGSMP || X86_SUMMIT && ACPI) && EXPERIMENTAL) + default y if (X86_NUMAQ || X86_SUMMIT || X86_BIGSMP) + ---help--- + Enable NUMA (Non Uniform Memory Access) support. + + The kernel will try to allocate memory used by a CPU on the + local memory controller of the CPU and add some more + NUMA awareness to the kernel. + + For 64-bit this is recommended if the system is Intel Core i7 + (or later), AMD Opteron, or EM64T NUMA. + + For 32-bit this is only needed on (rare) 32-bit-only platforms + that support NUMA topologies, such as NUMAQ / Summit, or if you + boot a 32-bit kernel on a 64-bit NUMA platform. + + Otherwise, you should say N. + +comment "NUMA (Summit) requires SMP, 64GB highmem support, ACPI" + depends on X86_32 && X86_SUMMIT && (!HIGHMEM64G || !ACPI) + +config AMD_NUMA + def_bool y + prompt "Old style AMD Opteron NUMA detection" + depends on X86_64 && NUMA && PCI + ---help--- + Enable AMD NUMA node topology detection. You should say Y here if + you have a multi processor AMD system. This uses an old method to + read the NUMA configuration directly from the builtin Northbridge + of Opteron. It is recommended to use X86_64_ACPI_NUMA instead, + which also takes priority if both are compiled in. + +config X86_64_ACPI_NUMA + def_bool y + prompt "ACPI NUMA detection" + depends on X86_64 && NUMA && ACPI && PCI + select ACPI_NUMA + ---help--- + Enable ACPI SRAT based node topology detection. + +# Some NUMA nodes have memory ranges that span +# other nodes. Even though a pfn is valid and +# between a node's start and end pfns, it may not +# reside on that node. See memmap_init_zone() +# for details. +config NODES_SPAN_OTHER_NODES + def_bool y + depends on X86_64_ACPI_NUMA + +config NUMA_EMU + bool "NUMA emulation" + depends on NUMA + ---help--- + Enable NUMA emulation. A flat machine will be split + into virtual nodes when booted with "numa=fake=N", where N is the + number of nodes. This is only useful for debugging. + +config NODES_SHIFT + int "Maximum NUMA Nodes (as a power of 2)" if !MAXSMP + range 1 10 + default "10" if MAXSMP + default "6" if X86_64 + default "4" if X86_NUMAQ + default "3" + depends on NEED_MULTIPLE_NODES + ---help--- + Specify the maximum number of NUMA Nodes available on the target + system. Increases memory reserved to accommodate various tables. + +config HAVE_ARCH_BOOTMEM + def_bool y + depends on X86_32 && NUMA + +config HAVE_ARCH_ALLOC_REMAP + def_bool y + depends on X86_32 && NUMA + +config ARCH_HAVE_MEMORY_PRESENT + def_bool y + depends on X86_32 && DISCONTIGMEM + +config NEED_NODE_MEMMAP_SIZE + def_bool y + depends on X86_32 && (DISCONTIGMEM || SPARSEMEM) + +config ARCH_FLATMEM_ENABLE + def_bool y + depends on X86_32 && !NUMA + +config ARCH_DISCONTIGMEM_ENABLE + def_bool y + depends on NUMA && X86_32 + +config ARCH_DISCONTIGMEM_DEFAULT + def_bool y + depends on NUMA && X86_32 + +config ARCH_SPARSEMEM_ENABLE + def_bool y + depends on X86_64 || NUMA || (EXPERIMENTAL && X86_32) || X86_32_NON_STANDARD + select SPARSEMEM_STATIC if X86_32 + select SPARSEMEM_VMEMMAP_ENABLE if X86_64 + +config ARCH_SPARSEMEM_DEFAULT + def_bool y + depends on X86_64 + +config ARCH_SELECT_MEMORY_MODEL + def_bool y + depends on ARCH_SPARSEMEM_ENABLE + +config ARCH_MEMORY_PROBE + def_bool X86_64 + depends on MEMORY_HOTPLUG + +config ARCH_PROC_KCORE_TEXT + def_bool y + depends on X86_64 && PROC_KCORE + +config ILLEGAL_POINTER_VALUE + hex + default 0 if X86_32 + default 0xdead000000000000 if X86_64 + +source "mm/Kconfig" + +config HIGHPTE + bool "Allocate 3rd-level pagetables from highmem" + depends on HIGHMEM + ---help--- + The VM uses one page table entry for each page of physical memory. + For systems with a lot of RAM, this can be wasteful of precious + low memory. Setting this option will put user-space page table + entries in high memory. + +config X86_CHECK_BIOS_CORRUPTION + bool "Check for low memory corruption" + ---help--- + Periodically check for memory corruption in low memory, which + is suspected to be caused by BIOS. Even when enabled in the + configuration, it is disabled at runtime. Enable it by + setting "memory_corruption_check=1" on the kernel command + line. By default it scans the low 64k of memory every 60 + seconds; see the memory_corruption_check_size and + memory_corruption_check_period parameters in + Documentation/kernel-parameters.txt to adjust this. + + When enabled with the default parameters, this option has + almost no overhead, as it reserves a relatively small amount + of memory and scans it infrequently. It both detects corruption + and prevents it from affecting the running system. + + It is, however, intended as a diagnostic tool; if repeatable + BIOS-originated corruption always affects the same memory, + you can use memmap= to prevent the kernel from using that + memory. + +config X86_BOOTPARAM_MEMORY_CORRUPTION_CHECK + bool "Set the default setting of memory_corruption_check" + depends on X86_CHECK_BIOS_CORRUPTION + default y + ---help--- + Set whether the default state of memory_corruption_check is + on or off. + +config X86_RESERVE_LOW + int "Amount of low memory, in kilobytes, to reserve for the BIOS" + default 64 + range 4 640 + ---help--- + Specify the amount of low memory to reserve for the BIOS. + + The first page contains BIOS data structures that the kernel + must not use, so that page must always be reserved. + + By default we reserve the first 64K of physical RAM, as a + number of BIOSes are known to corrupt that memory range + during events such as suspend/resume or monitor cable + insertion, so it must not be used by the kernel. + + You can set this to 4 if you are absolutely sure that you + trust the BIOS to get all its memory reservations and usages + right. If you know your BIOS have problems beyond the + default 64K area, you can set this to 640 to avoid using the + entire low memory range. + + If you have doubts about the BIOS (e.g. suspend/resume does + not work or there's kernel crashes after certain hardware + hotplug events) then you might want to enable + X86_CHECK_BIOS_CORRUPTION=y to allow the kernel to check + typical corruption patterns. + + Leave this to the default value of 64 if you are unsure. + +config MATH_EMULATION + bool + prompt "Math emulation" if X86_32 + ---help--- + Linux can emulate a math coprocessor (used for floating point + operations) if you don't have one. 486DX and Pentium processors have + a math coprocessor built in, 486SX and 386 do not, unless you added + a 487DX or 387, respectively. (The messages during boot time can + give you some hints here ["man dmesg"].) Everyone needs either a + coprocessor or this emulation. + + If you don't have a math coprocessor, you need to say Y here; if you + say Y here even though you have a coprocessor, the coprocessor will + be used nevertheless. (This behavior can be changed with the kernel + command line option "no387", which comes handy if your coprocessor + is broken. Try "man bootparam" or see the documentation of your boot + loader (lilo or loadlin) about how to pass options to the kernel at + boot time.) This means that it is a good idea to say Y here if you + intend to use this kernel on different machines. + + More information about the internals of the Linux math coprocessor + emulation can be found in . + + If you are not sure, say Y; apart from resulting in a 66 KB bigger + kernel, it won't hurt. + +config MTRR + def_bool y + prompt "MTRR (Memory Type Range Register) support" if EXPERT + ---help--- + On Intel P6 family processors (Pentium Pro, Pentium II and later) + the Memory Type Range Registers (MTRRs) may be used to control + processor access to memory ranges. This is most useful if you have + a video (VGA) card on a PCI or AGP bus. Enabling write-combining + allows bus write transfers to be combined into a larger transfer + before bursting over the PCI/AGP bus. This can increase performance + of image write operations 2.5 times or more. Saying Y here creates a + /proc/mtrr file which may be used to manipulate your processor's + MTRRs. Typically the X server should use this. + + This code has a reasonably generic interface so that similar + control registers on other processors can be easily supported + as well: + + The Cyrix 6x86, 6x86MX and M II processors have Address Range + Registers (ARRs) which provide a similar functionality to MTRRs. For + these, the ARRs are used to emulate the MTRRs. + The AMD K6-2 (stepping 8 and above) and K6-3 processors have two + MTRRs. The Centaur C6 (WinChip) has 8 MCRs, allowing + write-combining. All of these processors are supported by this code + and it makes sense to say Y here if you have one of them. + + Saying Y here also fixes a problem with buggy SMP BIOSes which only + set the MTRRs for the boot CPU and not for the secondary CPUs. This + can lead to all sorts of problems, so it's good to say Y here. + + You can safely say Y even if your machine doesn't have MTRRs, you'll + just add about 9 KB to your kernel. + + See for more information. + +config MTRR_SANITIZER + def_bool y + prompt "MTRR cleanup support" + depends on MTRR + ---help--- + Convert MTRR layout from continuous to discrete, so X drivers can + add writeback entries. + + Can be disabled with disable_mtrr_cleanup on the kernel command line. + The largest mtrr entry size for a continuous block can be set with + mtrr_chunk_size. + + If unsure, say Y. + +config MTRR_SANITIZER_ENABLE_DEFAULT + int "MTRR cleanup enable value (0-1)" + range 0 1 + default "0" + depends on MTRR_SANITIZER + ---help--- + Enable mtrr cleanup default value + +config MTRR_SANITIZER_SPARE_REG_NR_DEFAULT + int "MTRR cleanup spare reg num (0-7)" + range 0 7 + default "1" + depends on MTRR_SANITIZER + ---help--- + mtrr cleanup spare entries default, it can be changed via + mtrr_spare_reg_nr=N on the kernel command line. + +config X86_PAT + def_bool y + prompt "x86 PAT support" if EXPERT + depends on MTRR + ---help--- + Use PAT attributes to setup page level cache control. + + PATs are the modern equivalents of MTRRs and are much more + flexible than MTRRs. + + Say N here if you see bootup problems (boot crash, boot hang, + spontaneous reboots) or a non-working video driver. + + If unsure, say Y. + +config ARCH_USES_PG_UNCACHED + def_bool y + depends on X86_PAT + +config ARCH_RANDOM + def_bool y + prompt "x86 architectural random number generator" if EXPERT + ---help--- + Enable the x86 architectural RDRAND instruction + (Intel Bull Mountain technology) to generate random numbers. + If supported, this is a high bandwidth, cryptographically + secure hardware random number generator. + +config EFI + bool "EFI runtime service support" + depends on ACPI + ---help--- + This enables the kernel to use EFI runtime services that are + available (such as the EFI variable services). + + This option is only useful on systems that have EFI firmware. + In addition, you should use the latest ELILO loader available + at in order to take advantage + of EFI runtime services. However, even with this option, the + resultant kernel should continue to boot on existing non-EFI + platforms. + +config EFI_STUB + bool "EFI stub support" + depends on EFI + ---help--- + This kernel feature allows a bzImage to be loaded directly + by EFI firmware without the use of a bootloader. + +config SECCOMP + def_bool y + prompt "Enable seccomp to safely compute untrusted bytecode" + ---help--- + This kernel feature is useful for number crunching applications + that may need to compute untrusted bytecode during their + execution. By using pipes or other transports made available to + the process as file descriptors supporting the read/write + syscalls, it's possible to isolate those applications in + their own address space using seccomp. Once seccomp is + enabled via prctl(PR_SET_SECCOMP), it cannot be disabled + and the task is only allowed to execute a few safe syscalls + defined by each seccomp mode. + + If unsure, say Y. Only embedded should say N here. + +config CC_STACKPROTECTOR + bool "Enable -fstack-protector buffer overflow detection (EXPERIMENTAL)" + ---help--- + This option turns on the -fstack-protector GCC feature. This + feature puts, at the beginning of functions, a canary value on + the stack just before the return address, and validates + the value just before actually returning. Stack based buffer + overflows (that need to overwrite this return address) now also + overwrite the canary, which gets detected and the attack is then + neutralized via a kernel panic. + + This feature requires gcc version 4.2 or above, or a distribution + gcc with the feature backported. Older versions are automatically + detected and for those versions, this configuration option is + ignored. (and a warning is printed during bootup) + +source kernel/Kconfig.hz + +config KEXEC + bool "kexec system call" + ---help--- + kexec is a system call that implements the ability to shutdown your + current kernel, and to start another kernel. It is like a reboot + but it is independent of the system firmware. And like a reboot + you can start any kernel with it, not just Linux. + + The name comes from the similarity to the exec system call. + + It is an ongoing process to be certain the hardware in a machine + is properly shutdown, so do not be surprised if this code does not + initially work for you. It may help to enable device hotplugging + support. As of this writing the exact hardware interface is + strongly in flux, so no good recommendation can be made. + +config CRASH_DUMP + bool "kernel crash dumps" + depends on X86_64 || (X86_32 && HIGHMEM) + ---help--- + Generate crash dump after being started by kexec. + This should be normally only set in special crash dump kernels + which are loaded in the main kernel with kexec-tools into + a specially reserved region and then later executed after + a crash by kdump/kexec. The crash dump kernel must be compiled + to a memory address not used by the main kernel or BIOS using + PHYSICAL_START, or it must be built as a relocatable image + (CONFIG_RELOCATABLE=y). + For more details see Documentation/kdump/kdump.txt + +config KEXEC_JUMP + bool "kexec jump (EXPERIMENTAL)" + depends on EXPERIMENTAL + depends on KEXEC && HIBERNATION + ---help--- + Jump between original kernel and kexeced kernel and invoke + code in physical address mode via KEXEC + +config PHYSICAL_START + hex "Physical address where the kernel is loaded" if (EXPERT || CRASH_DUMP) + default "0x1000000" + ---help--- + This gives the physical address where the kernel is loaded. + + If kernel is a not relocatable (CONFIG_RELOCATABLE=n) then + bzImage will decompress itself to above physical address and + run from there. Otherwise, bzImage will run from the address where + it has been loaded by the boot loader and will ignore above physical + address. + + In normal kdump cases one does not have to set/change this option + as now bzImage can be compiled as a completely relocatable image + (CONFIG_RELOCATABLE=y) and be used to load and run from a different + address. This option is mainly useful for the folks who don't want + to use a bzImage for capturing the crash dump and want to use a + vmlinux instead. vmlinux is not relocatable hence a kernel needs + to be specifically compiled to run from a specific memory area + (normally a reserved region) and this option comes handy. + + So if you are using bzImage for capturing the crash dump, + leave the value here unchanged to 0x1000000 and set + CONFIG_RELOCATABLE=y. Otherwise if you plan to use vmlinux + for capturing the crash dump change this value to start of + the reserved region. In other words, it can be set based on + the "X" value as specified in the "crashkernel=YM@XM" + command line boot parameter passed to the panic-ed + kernel. Please take a look at Documentation/kdump/kdump.txt + for more details about crash dumps. + + Usage of bzImage for capturing the crash dump is recommended as + one does not have to build two kernels. Same kernel can be used + as production kernel and capture kernel. Above option should have + gone away after relocatable bzImage support is introduced. But it + is present because there are users out there who continue to use + vmlinux for dump capture. This option should go away down the + line. + + Don't change this unless you know what you are doing. + +config RELOCATABLE + bool "Build a relocatable kernel" + default y + ---help--- + This builds a kernel image that retains relocation information + so it can be loaded someplace besides the default 1MB. + The relocations tend to make the kernel binary about 10% larger, + but are discarded at runtime. + + One use is for the kexec on panic case where the recovery kernel + must live at a different physical address than the primary + kernel. + + Note: If CONFIG_RELOCATABLE=y, then the kernel runs from the address + it has been loaded at and the compile time physical address + (CONFIG_PHYSICAL_START) is ignored. + +# Relocation on x86-32 needs some additional build support +config X86_NEED_RELOCS + def_bool y + depends on X86_32 && RELOCATABLE + +config PHYSICAL_ALIGN + hex "Alignment value to which kernel should be aligned" if X86_32 + default "0x1000000" + range 0x2000 0x1000000 + ---help--- + This value puts the alignment restrictions on physical address + where kernel is loaded and run from. Kernel is compiled for an + address which meets above alignment restriction. + + If bootloader loads the kernel at a non-aligned address and + CONFIG_RELOCATABLE is set, kernel will move itself to nearest + address aligned to above value and run from there. + + If bootloader loads the kernel at a non-aligned address and + CONFIG_RELOCATABLE is not set, kernel will ignore the run time + load address and decompress itself to the address it has been + compiled for and run from there. The address for which kernel is + compiled already meets above alignment restrictions. Hence the + end result is that kernel runs from a physical address meeting + above alignment restrictions. + + Don't change this unless you know what you are doing. + +config HOTPLUG_CPU + bool "Support for hot-pluggable CPUs" + depends on SMP && HOTPLUG + ---help--- + Say Y here to allow turning CPUs off and on. CPUs can be + controlled through /sys/devices/system/cpu. + ( Note: power management support will enable this option + automatically on SMP systems. ) + Say N if you want to disable CPU hotplug. + +config COMPAT_VDSO + def_bool y + prompt "Compat VDSO support" + depends on X86_32 || IA32_EMULATION + ---help--- + Map the 32-bit VDSO to the predictable old-style address too. + + Say N here if you are running a sufficiently recent glibc + version (2.3.3 or later), to remove the high-mapped + VDSO mapping and to exclusively use the randomized VDSO. + + If unsure, say Y. + +config CMDLINE_BOOL + bool "Built-in kernel command line" + ---help--- + Allow for specifying boot arguments to the kernel at + build time. On some systems (e.g. embedded ones), it is + necessary or convenient to provide some or all of the + kernel boot arguments with the kernel itself (that is, + to not rely on the boot loader to provide them.) + + To compile command line arguments into the kernel, + set this option to 'Y', then fill in the + the boot arguments in CONFIG_CMDLINE. + + Systems with fully functional boot loaders (i.e. non-embedded) + should leave this option set to 'N'. + +config CMDLINE + string "Built-in kernel command string" + depends on CMDLINE_BOOL + default "" + ---help--- + Enter arguments here that should be compiled into the kernel + image and used at boot time. If the boot loader provides a + command line at boot time, it is appended to this string to + form the full kernel command line, when the system boots. + + However, you can use the CONFIG_CMDLINE_OVERRIDE option to + change this behavior. + + In most cases, the command line (whether built-in or provided + by the boot loader) should specify the device for the root + file system. + +config CMDLINE_OVERRIDE + bool "Built-in command line overrides boot loader arguments" + depends on CMDLINE_BOOL + ---help--- + Set this option to 'Y' to have the kernel ignore the boot loader + command line, and use ONLY the built-in command line. + + This is used to work around broken boot loaders. This should + be set to 'N' under normal conditions. + +endmenu + +config ARCH_ENABLE_MEMORY_HOTPLUG + def_bool y + depends on X86_64 || (X86_32 && HIGHMEM) + +config ARCH_ENABLE_MEMORY_HOTREMOVE + def_bool y + depends on MEMORY_HOTPLUG + +config USE_PERCPU_NUMA_NODE_ID + def_bool y + depends on NUMA + +menu "Power management and ACPI options" + +config ARCH_HIBERNATION_HEADER + def_bool y + depends on X86_64 && HIBERNATION + +source "kernel/power/Kconfig" + +source "drivers/acpi/Kconfig" + +source "drivers/sfi/Kconfig" + +config X86_APM_BOOT + def_bool y + depends on APM + +menuconfig APM + tristate "APM (Advanced Power Management) BIOS support" + depends on X86_32 && PM_SLEEP + ---help--- + APM is a BIOS specification for saving power using several different + techniques. This is mostly useful for battery powered laptops with + APM compliant BIOSes. If you say Y here, the system time will be + reset after a RESUME operation, the /proc/apm device will provide + battery status information, and user-space programs will receive + notification of APM "events" (e.g. battery status change). + + If you select "Y" here, you can disable actual use of the APM + BIOS by passing the "apm=off" option to the kernel at boot time. + + Note that the APM support is almost completely disabled for + machines with more than one CPU. + + In order to use APM, you will need supporting software. For location + and more information, read + and the Battery Powered Linux mini-HOWTO, available from + . + + This driver does not spin down disk drives (see the hdparm(8) + manpage ("man 8 hdparm") for that), and it doesn't turn off + VESA-compliant "green" monitors. + + This driver does not support the TI 4000M TravelMate and the ACER + 486/DX4/75 because they don't have compliant BIOSes. Many "green" + desktop machines also don't have compliant BIOSes, and this driver + may cause those machines to panic during the boot phase. + + Generally, if you don't have a battery in your machine, there isn't + much point in using this driver and you should say N. If you get + random kernel OOPSes or reboots that don't seem to be related to + anything, try disabling/enabling this option (or disabling/enabling + APM in your BIOS). + + Some other things you should try when experiencing seemingly random, + "weird" problems: + + 1) make sure that you have enough swap space and that it is + enabled. + 2) pass the "no-hlt" option to the kernel + 3) switch on floating point emulation in the kernel and pass + the "no387" option to the kernel + 4) pass the "floppy=nodma" option to the kernel + 5) pass the "mem=4M" option to the kernel (thereby disabling + all but the first 4 MB of RAM) + 6) make sure that the CPU is not over clocked. + 7) read the sig11 FAQ at + 8) disable the cache from your BIOS settings + 9) install a fan for the video card or exchange video RAM + 10) install a better fan for the CPU + 11) exchange RAM chips + 12) exchange the motherboard. + + To compile this driver as a module, choose M here: the + module will be called apm. + +if APM + +config APM_IGNORE_USER_SUSPEND + bool "Ignore USER SUSPEND" + ---help--- + This option will ignore USER SUSPEND requests. On machines with a + compliant APM BIOS, you want to say N. However, on the NEC Versa M + series notebooks, it is necessary to say Y because of a BIOS bug. + +config APM_DO_ENABLE + bool "Enable PM at boot time" + ---help--- + Enable APM features at boot time. From page 36 of the APM BIOS + specification: "When disabled, the APM BIOS does not automatically + power manage devices, enter the Standby State, enter the Suspend + State, or take power saving steps in response to CPU Idle calls." + This driver will make CPU Idle calls when Linux is idle (unless this + feature is turned off -- see "Do CPU IDLE calls", below). This + should always save battery power, but more complicated APM features + will be dependent on your BIOS implementation. You may need to turn + this option off if your computer hangs at boot time when using APM + support, or if it beeps continuously instead of suspending. Turn + this off if you have a NEC UltraLite Versa 33/C or a Toshiba + T400CDT. This is off by default since most machines do fine without + this feature. + +config APM_CPU_IDLE + bool "Make CPU Idle calls when idle" + ---help--- + Enable calls to APM CPU Idle/CPU Busy inside the kernel's idle loop. + On some machines, this can activate improved power savings, such as + a slowed CPU clock rate, when the machine is idle. These idle calls + are made after the idle loop has run for some length of time (e.g., + 333 mS). On some machines, this will cause a hang at boot time or + whenever the CPU becomes idle. (On machines with more than one CPU, + this option does nothing.) + +config APM_DISPLAY_BLANK + bool "Enable console blanking using APM" + ---help--- + Enable console blanking using the APM. Some laptops can use this to + turn off the LCD backlight when the screen blanker of the Linux + virtual console blanks the screen. Note that this is only used by + the virtual console screen blanker, and won't turn off the backlight + when using the X Window system. This also doesn't have anything to + do with your VESA-compliant power-saving monitor. Further, this + option doesn't work for all laptops -- it might not turn off your + backlight at all, or it might print a lot of errors to the console, + especially if you are using gpm. + +config APM_ALLOW_INTS + bool "Allow interrupts during APM BIOS calls" + ---help--- + Normally we disable external interrupts while we are making calls to + the APM BIOS as a measure to lessen the effects of a badly behaving + BIOS implementation. The BIOS should reenable interrupts if it + needs to. Unfortunately, some BIOSes do not -- especially those in + many of the newer IBM Thinkpads. If you experience hangs when you + suspend, try setting this to Y. Otherwise, say N. + +endif # APM + +source "drivers/cpufreq/Kconfig" + +source "drivers/cpuidle/Kconfig" + +source "drivers/idle/Kconfig" + +endmenu + + +menu "Bus options (PCI etc.)" + +config PCI + bool "PCI support" + default y + select ARCH_SUPPORTS_MSI if (X86_LOCAL_APIC && X86_IO_APIC) + ---help--- + Find out whether you have a PCI motherboard. PCI is the name of a + bus system, i.e. the way the CPU talks to the other stuff inside + your box. Other bus systems are ISA, EISA, MicroChannel (MCA) or + VESA. If you have PCI, say Y, otherwise N. + +choice + prompt "PCI access mode" + depends on X86_32 && PCI + default PCI_GOANY + ---help--- + On PCI systems, the BIOS can be used to detect the PCI devices and + determine their configuration. However, some old PCI motherboards + have BIOS bugs and may crash if this is done. Also, some embedded + PCI-based systems don't have any BIOS at all. Linux can also try to + detect the PCI hardware directly without using the BIOS. + + With this option, you can specify how Linux should detect the + PCI devices. If you choose "BIOS", the BIOS will be used, + if you choose "Direct", the BIOS won't be used, and if you + choose "MMConfig", then PCI Express MMCONFIG will be used. + If you choose "Any", the kernel will try MMCONFIG, then the + direct access method and falls back to the BIOS if that doesn't + work. If unsure, go with the default, which is "Any". + +config PCI_GOBIOS + bool "BIOS" + +config PCI_GOMMCONFIG + bool "MMConfig" + +config PCI_GODIRECT + bool "Direct" + +config PCI_GOOLPC + bool "OLPC XO-1" + depends on OLPC + +config PCI_GOANY + bool "Any" + +endchoice + +config PCI_BIOS + def_bool y + depends on X86_32 && PCI && (PCI_GOBIOS || PCI_GOANY) + +# x86-64 doesn't support PCI BIOS access from long mode so always go direct. +config PCI_DIRECT + def_bool y + depends on PCI && (X86_64 || (PCI_GODIRECT || PCI_GOANY || PCI_GOOLPC || PCI_GOMMCONFIG)) + +config PCI_MMCONFIG + def_bool y + depends on X86_32 && PCI && (ACPI || SFI) && (PCI_GOMMCONFIG || PCI_GOANY) + +config PCI_OLPC + def_bool y + depends on PCI && OLPC && (PCI_GOOLPC || PCI_GOANY) + +config PCI_XEN + def_bool y + depends on PCI && XEN + select SWIOTLB_XEN + +config PCI_DOMAINS + def_bool y + depends on PCI + +config PCI_MMCONFIG + bool "Support mmconfig PCI config space access" + depends on X86_64 && PCI && ACPI + +config PCI_CNB20LE_QUIRK + bool "Read CNB20LE Host Bridge Windows" if EXPERT + default n + depends on PCI && EXPERIMENTAL + help + Read the PCI windows out of the CNB20LE host bridge. This allows + PCI hotplug to work on systems with the CNB20LE chipset which do + not have ACPI. + + There's no public spec for this chipset, and this functionality + is known to be incomplete. + + You should say N unless you know you need this. + +source "drivers/pci/pcie/Kconfig" + +source "drivers/pci/Kconfig" + +# x86_64 have no ISA slots, but can have ISA-style DMA. +config ISA_DMA_API + bool "ISA-style DMA support" if (X86_64 && EXPERT) + default y + help + Enables ISA-style DMA support for devices requiring such controllers. + If unsure, say Y. + +if X86_32 + +config ISA + bool "ISA support" + ---help--- + Find out whether you have ISA slots on your motherboard. ISA is the + name of a bus system, i.e. the way the CPU talks to the other stuff + inside your box. Other bus systems are PCI, EISA, MicroChannel + (MCA) or VESA. ISA is an older system, now being displaced by PCI; + newer boards don't support it. If you have ISA, say Y, otherwise N. + +config EISA + bool "EISA support" + depends on ISA + ---help--- + The Extended Industry Standard Architecture (EISA) bus was + developed as an open alternative to the IBM MicroChannel bus. + + The EISA bus provided some of the features of the IBM MicroChannel + bus while maintaining backward compatibility with cards made for + the older ISA bus. The EISA bus saw limited use between 1988 and + 1995 when it was made obsolete by the PCI bus. + + Say Y here if you are building a kernel for an EISA-based machine. + + Otherwise, say N. + +source "drivers/eisa/Kconfig" + +config MCA + bool "MCA support" + ---help--- + MicroChannel Architecture is found in some IBM PS/2 machines and + laptops. It is a bus system similar to PCI or ISA. See + (and especially the web page given + there) before attempting to build an MCA bus kernel. + +source "drivers/mca/Kconfig" + +config SCx200 + tristate "NatSemi SCx200 support" + ---help--- + This provides basic support for National Semiconductor's + (now AMD's) Geode processors. The driver probes for the + PCI-IDs of several on-chip devices, so its a good dependency + for other scx200_* drivers. + + If compiled as a module, the driver is named scx200. + +config SCx200HR_TIMER + tristate "NatSemi SCx200 27MHz High-Resolution Timer Support" + depends on SCx200 + default y + ---help--- + This driver provides a clocksource built upon the on-chip + 27MHz high-resolution timer. Its also a workaround for + NSC Geode SC-1100's buggy TSC, which loses time when the + processor goes idle (as is done by the scheduler). The + other workaround is idle=poll boot option. + +config OLPC + bool "One Laptop Per Child support" + depends on !X86_PAE + select GPIOLIB + select OF + select OF_PROMTREE + select IRQ_DOMAIN + ---help--- + Add support for detecting the unique features of the OLPC + XO hardware. + +config OLPC_XO1_PM + bool "OLPC XO-1 Power Management" + depends on OLPC && MFD_CS5535 && PM_SLEEP + select MFD_CORE + ---help--- + Add support for poweroff and suspend of the OLPC XO-1 laptop. + +config OLPC_XO1_RTC + bool "OLPC XO-1 Real Time Clock" + depends on OLPC_XO1_PM && RTC_DRV_CMOS + ---help--- + Add support for the XO-1 real time clock, which can be used as a + programmable wakeup source. + +config OLPC_XO1_SCI + bool "OLPC XO-1 SCI extras" + depends on OLPC && OLPC_XO1_PM + select POWER_SUPPLY + select GPIO_CS5535 + select MFD_CORE + ---help--- + Add support for SCI-based features of the OLPC XO-1 laptop: + - EC-driven system wakeups + - Power button + - Ebook switch + - Lid switch + - AC adapter status updates + - Battery status updates + +config OLPC_XO15_SCI + bool "OLPC XO-1.5 SCI extras" + depends on OLPC && ACPI + select POWER_SUPPLY + ---help--- + Add support for SCI-based features of the OLPC XO-1.5 laptop: + - EC-driven system wakeups + - AC adapter status updates + - Battery status updates + +config ALIX + bool "PCEngines ALIX System Support (LED setup)" + select GPIOLIB + ---help--- + This option enables system support for the PCEngines ALIX. + At present this just sets up LEDs for GPIO control on + ALIX2/3/6 boards. However, other system specific setup should + get added here. + + Note: You must still enable the drivers for GPIO and LED support + (GPIO_CS5535 & LEDS_GPIO) to actually use the LEDs + + Note: You have to set alix.force=1 for boards with Award BIOS. + +config NET5501 + bool "Soekris Engineering net5501 System Support (LEDS, GPIO, etc)" + select GPIOLIB + ---help--- + This option enables system support for the Soekris Engineering net5501. + +config GEOS + bool "Traverse Technologies GEOS System Support (LEDS, GPIO, etc)" + select GPIOLIB + depends on DMI + ---help--- + This option enables system support for the Traverse Technologies GEOS. + +endif # X86_32 + +config AMD_NB + def_bool y + depends on CPU_SUP_AMD && PCI + +source "drivers/pcmcia/Kconfig" + +source "drivers/pci/hotplug/Kconfig" + +config RAPIDIO + bool "RapidIO support" + depends on PCI + default n + help + If you say Y here, the kernel will include drivers and + infrastructure code to support RapidIO interconnect devices. + +source "drivers/rapidio/Kconfig" + +endmenu + + +menu "Executable file formats / Emulations" + +source "fs/Kconfig.binfmt" + +config IA32_EMULATION + bool "IA32 Emulation" + depends on X86_64 + select COMPAT_BINFMT_ELF + ---help--- + Include code to run legacy 32-bit programs under a + 64-bit kernel. You should likely turn this on, unless you're + 100% sure that you don't have any 32-bit programs left. + +config IA32_AOUT + tristate "IA32 a.out support" + depends on IA32_EMULATION + ---help--- + Support old a.out binaries in the 32bit emulation. + +config X86_X32 + bool "x32 ABI for 64-bit mode (EXPERIMENTAL)" + depends on X86_64 && IA32_EMULATION && EXPERIMENTAL + ---help--- + Include code to run binaries for the x32 native 32-bit ABI + for 64-bit processors. An x32 process gets access to the + full 64-bit register file and wide data path while leaving + pointers at 32 bits for smaller memory footprint. + + You will need a recent binutils (2.22 or later) with + elf32_x86_64 support enabled to compile a kernel with this + option set. + +config COMPAT + def_bool y + depends on IA32_EMULATION || X86_X32 + select ARCH_WANT_OLD_COMPAT_IPC + +config COMPAT_FOR_U64_ALIGNMENT + def_bool COMPAT + depends on X86_64 + +config SYSVIPC_COMPAT + def_bool y + depends on COMPAT && SYSVIPC + +config KEYS_COMPAT + bool + depends on COMPAT && KEYS + default y + +endmenu + + +config HAVE_ATOMIC_IOMAP + def_bool y + depends on X86_32 + +config HAVE_TEXT_POKE_SMP + bool + select STOP_MACHINE if SMP + +source "net/Kconfig" + +source "drivers/Kconfig" + +source "drivers/firmware/Kconfig" + +source "fs/Kconfig" + +source "arch/x86/Kconfig.debug" + +source "security/Kconfig" + +source "crypto/Kconfig" + +source "arch/x86/kvm/Kconfig" + +source "lib/Kconfig" diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu new file mode 100644 index 00000000..706e12e9 --- /dev/null +++ b/arch/x86/Kconfig.cpu @@ -0,0 +1,506 @@ +# Put here option for CPU selection and depending optimization +choice + prompt "Processor family" + default M686 if X86_32 + default GENERIC_CPU if X86_64 + +config M386 + bool "386" + depends on X86_32 && !UML + ---help--- + This is the processor type of your CPU. This information is used for + optimizing purposes. In order to compile a kernel that can run on + all x86 CPU types (albeit not optimally fast), you can specify + "386" here. + + The kernel will not necessarily run on earlier architectures than + the one you have chosen, e.g. a Pentium optimized kernel will run on + a PPro, but not necessarily on a i486. + + Here are the settings recommended for greatest speed: + - "386" for the AMD/Cyrix/Intel 386DX/DXL/SL/SLC/SX, Cyrix/TI + 486DLC/DLC2, and UMC 486SX-S. Only "386" kernels will run on a 386 + class machine. + - "486" for the AMD/Cyrix/IBM/Intel 486DX/DX2/DX4 or + SL/SLC/SLC2/SLC3/SX/SX2 and UMC U5D or U5S. + - "586" for generic Pentium CPUs lacking the TSC + (time stamp counter) register. + - "Pentium-Classic" for the Intel Pentium. + - "Pentium-MMX" for the Intel Pentium MMX. + - "Pentium-Pro" for the Intel Pentium Pro. + - "Pentium-II" for the Intel Pentium II or pre-Coppermine Celeron. + - "Pentium-III" for the Intel Pentium III or Coppermine Celeron. + - "Pentium-4" for the Intel Pentium 4 or P4-based Celeron. + - "K6" for the AMD K6, K6-II and K6-III (aka K6-3D). + - "Athlon" for the AMD K7 family (Athlon/Duron/Thunderbird). + - "Crusoe" for the Transmeta Crusoe series. + - "Efficeon" for the Transmeta Efficeon series. + - "Winchip-C6" for original IDT Winchip. + - "Winchip-2" for IDT Winchips with 3dNow! capabilities. + - "GeodeGX1" for Geode GX1 (Cyrix MediaGX). + - "Geode GX/LX" For AMD Geode GX and LX processors. + - "CyrixIII/VIA C3" for VIA Cyrix III or VIA C3. + - "VIA C3-2" for VIA C3-2 "Nehemiah" (model 9 and above). + - "VIA C7" for VIA C7. + + If you don't know what to do, choose "386". + +config M486 + bool "486" + depends on X86_32 + ---help--- + Select this for a 486 series processor, either Intel or one of the + compatible processors from AMD, Cyrix, IBM, or Intel. Includes DX, + DX2, and DX4 variants; also SL/SLC/SLC2/SLC3/SX/SX2 and UMC U5D or + U5S. + +config M586 + bool "586/K5/5x86/6x86/6x86MX" + depends on X86_32 + ---help--- + Select this for an 586 or 686 series processor such as the AMD K5, + the Cyrix 5x86, 6x86 and 6x86MX. This choice does not + assume the RDTSC (Read Time Stamp Counter) instruction. + +config M586TSC + bool "Pentium-Classic" + depends on X86_32 + ---help--- + Select this for a Pentium Classic processor with the RDTSC (Read + Time Stamp Counter) instruction for benchmarking. + +config M586MMX + bool "Pentium-MMX" + depends on X86_32 + ---help--- + Select this for a Pentium with the MMX graphics/multimedia + extended instructions. + +config M686 + bool "Pentium-Pro" + depends on X86_32 + ---help--- + Select this for Intel Pentium Pro chips. This enables the use of + Pentium Pro extended instructions, and disables the init-time guard + against the f00f bug found in earlier Pentiums. + +config MPENTIUMII + bool "Pentium-II/Celeron(pre-Coppermine)" + depends on X86_32 + ---help--- + Select this for Intel chips based on the Pentium-II and + pre-Coppermine Celeron core. This option enables an unaligned + copy optimization, compiles the kernel with optimization flags + tailored for the chip, and applies any applicable Pentium Pro + optimizations. + +config MPENTIUMIII + bool "Pentium-III/Celeron(Coppermine)/Pentium-III Xeon" + depends on X86_32 + ---help--- + Select this for Intel chips based on the Pentium-III and + Celeron-Coppermine core. This option enables use of some + extended prefetch instructions in addition to the Pentium II + extensions. + +config MPENTIUMM + bool "Pentium M" + depends on X86_32 + ---help--- + Select this for Intel Pentium M (not Pentium-4 M) + notebook chips. + +config MPENTIUM4 + bool "Pentium-4/Celeron(P4-based)/Pentium-4 M/older Xeon" + depends on X86_32 + ---help--- + Select this for Intel Pentium 4 chips. This includes the + Pentium 4, Pentium D, P4-based Celeron and Xeon, and + Pentium-4 M (not Pentium M) chips. This option enables compile + flags optimized for the chip, uses the correct cache line size, and + applies any applicable optimizations. + + CPUIDs: F[0-6][1-A] (in /proc/cpuinfo show = cpu family : 15 ) + + Select this for: + Pentiums (Pentium 4, Pentium D, Celeron, Celeron D) corename: + -Willamette + -Northwood + -Mobile Pentium 4 + -Mobile Pentium 4 M + -Extreme Edition (Gallatin) + -Prescott + -Prescott 2M + -Cedar Mill + -Presler + -Smithfiled + Xeons (Intel Xeon, Xeon MP, Xeon LV, Xeon MV) corename: + -Foster + -Prestonia + -Gallatin + -Nocona + -Irwindale + -Cranford + -Potomac + -Paxville + -Dempsey + + +config MK6 + bool "K6/K6-II/K6-III" + depends on X86_32 + ---help--- + Select this for an AMD K6-family processor. Enables use of + some extended instructions, and passes appropriate optimization + flags to GCC. + +config MK7 + bool "Athlon/Duron/K7" + depends on X86_32 + ---help--- + Select this for an AMD Athlon K7-family processor. Enables use of + some extended instructions, and passes appropriate optimization + flags to GCC. + +config MK8 + bool "Opteron/Athlon64/Hammer/K8" + ---help--- + Select this for an AMD Opteron or Athlon64 Hammer-family processor. + Enables use of some extended instructions, and passes appropriate + optimization flags to GCC. + +config MCRUSOE + bool "Crusoe" + depends on X86_32 + ---help--- + Select this for a Transmeta Crusoe processor. Treats the processor + like a 586 with TSC, and sets some GCC optimization flags (like a + Pentium Pro with no alignment requirements). + +config MEFFICEON + bool "Efficeon" + depends on X86_32 + ---help--- + Select this for a Transmeta Efficeon processor. + +config MWINCHIPC6 + bool "Winchip-C6" + depends on X86_32 + ---help--- + Select this for an IDT Winchip C6 chip. Linux and GCC + treat this chip as a 586TSC with some extended instructions + and alignment requirements. + +config MWINCHIP3D + bool "Winchip-2/Winchip-2A/Winchip-3" + depends on X86_32 + ---help--- + Select this for an IDT Winchip-2, 2A or 3. Linux and GCC + treat this chip as a 586TSC with some extended instructions + and alignment requirements. Also enable out of order memory + stores for this CPU, which can increase performance of some + operations. + +config MELAN + bool "AMD Elan" + depends on X86_32 + ---help--- + Select this for an AMD Elan processor. + + Do not use this option for K6/Athlon/Opteron processors! + +config MGEODEGX1 + bool "GeodeGX1" + depends on X86_32 + ---help--- + Select this for a Geode GX1 (Cyrix MediaGX) chip. + +config MGEODE_LX + bool "Geode GX/LX" + depends on X86_32 + ---help--- + Select this for AMD Geode GX and LX processors. + +config MCYRIXIII + bool "CyrixIII/VIA-C3" + depends on X86_32 + ---help--- + Select this for a Cyrix III or C3 chip. Presently Linux and GCC + treat this chip as a generic 586. Whilst the CPU is 686 class, + it lacks the cmov extension which gcc assumes is present when + generating 686 code. + Note that Nehemiah (Model 9) and above will not boot with this + kernel due to them lacking the 3DNow! instructions used in earlier + incarnations of the CPU. + +config MVIAC3_2 + bool "VIA C3-2 (Nehemiah)" + depends on X86_32 + ---help--- + Select this for a VIA C3 "Nehemiah". Selecting this enables usage + of SSE and tells gcc to treat the CPU as a 686. + Note, this kernel will not boot on older (pre model 9) C3s. + +config MVIAC7 + bool "VIA C7" + depends on X86_32 + ---help--- + Select this for a VIA C7. Selecting this uses the correct cache + shift and tells gcc to treat the CPU as a 686. + +config MPSC + bool "Intel P4 / older Netburst based Xeon" + depends on X86_64 + ---help--- + Optimize for Intel Pentium 4, Pentium D and older Nocona/Dempsey + Xeon CPUs with Intel 64bit which is compatible with x86-64. + Note that the latest Xeons (Xeon 51xx and 53xx) are not based on the + Netburst core and shouldn't use this option. You can distinguish them + using the cpu family field + in /proc/cpuinfo. Family 15 is an older Xeon, Family 6 a newer one. + +config MCORE2 + bool "Core 2/newer Xeon" + ---help--- + + Select this for Intel Core 2 and newer Core 2 Xeons (Xeon 51xx and + 53xx) CPUs. You can distinguish newer from older Xeons by the CPU + family in /proc/cpuinfo. Newer ones have 6 and older ones 15 + (not a typo) + +config MATOM + bool "Intel Atom" + ---help--- + + Select this for the Intel Atom platform. Intel Atom CPUs have an + in-order pipelining architecture and thus can benefit from + accordingly optimized code. Use a recent GCC with specific Atom + support in order to fully benefit from selecting this option. + +config GENERIC_CPU + bool "Generic-x86-64" + depends on X86_64 + ---help--- + Generic x86-64 CPU. + Run equally well on all x86-64 CPUs. + +endchoice + +config X86_GENERIC + bool "Generic x86 support" + depends on X86_32 + ---help--- + Instead of just including optimizations for the selected + x86 variant (e.g. PII, Crusoe or Athlon), include some more + generic optimizations as well. This will make the kernel + perform better on x86 CPUs other than that selected. + + This is really intended for distributors who need more + generic optimizations. + +# +# Define implied options from the CPU selection here +config X86_INTERNODE_CACHE_SHIFT + int + default "12" if X86_VSMP + default X86_L1_CACHE_SHIFT + +config X86_CMPXCHG + def_bool X86_64 || (X86_32 && !M386) + +config X86_L1_CACHE_SHIFT + int + default "7" if MPENTIUM4 || MPSC + default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MATOM || MVIAC7 || X86_GENERIC || GENERIC_CPU + default "4" if MELAN || M486 || M386 || MGEODEGX1 + default "5" if MWINCHIP3D || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODE_LX + +config X86_XADD + def_bool y + depends on X86_64 || !M386 + +config X86_PPRO_FENCE + bool "PentiumPro memory ordering errata workaround" + depends on M686 || M586MMX || M586TSC || M586 || M486 || M386 || MGEODEGX1 + ---help--- + Old PentiumPro multiprocessor systems had errata that could cause + memory operations to violate the x86 ordering standard in rare cases. + Enabling this option will attempt to work around some (but not all) + occurrences of this problem, at the cost of much heavier spinlock and + memory barrier operations. + + If unsure, say n here. Even distro kernels should think twice before + enabling this: there are few systems, and an unlikely bug. + +config X86_F00F_BUG + def_bool y + depends on M586MMX || M586TSC || M586 || M486 || M386 + +config X86_INVD_BUG + def_bool y + depends on M486 || M386 + +config X86_WP_WORKS_OK + def_bool y + depends on !M386 + +config X86_INVLPG + def_bool y + depends on X86_32 && !M386 + +config X86_BSWAP + def_bool y + depends on X86_32 && !M386 + +config X86_POPAD_OK + def_bool y + depends on X86_32 && !M386 + +config X86_ALIGNMENT_16 + def_bool y + depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || MELAN || MK6 || M586MMX || M586TSC || M586 || M486 || MVIAC3_2 || MGEODEGX1 + +config X86_INTEL_USERCOPY + def_bool y + depends on MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M586MMX || X86_GENERIC || MK8 || MK7 || MEFFICEON || MCORE2 + +config X86_USE_PPRO_CHECKSUM + def_bool y + depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC3_2 || MVIAC7 || MEFFICEON || MGEODE_LX || MCORE2 || MATOM + +config X86_USE_3DNOW + def_bool y + depends on (MCYRIXIII || MK7 || MGEODE_LX) && !UML + +config X86_OOSTORE + def_bool y + depends on (MWINCHIP3D || MWINCHIPC6) && MTRR + +# +# P6_NOPs are a relatively minor optimization that require a family >= +# 6 processor, except that it is broken on certain VIA chips. +# Furthermore, AMD chips prefer a totally different sequence of NOPs +# (which work on all CPUs). In addition, it looks like Virtual PC +# does not understand them. +# +# As a result, disallow these if we're not compiling for X86_64 (these +# NOPs do work on all x86-64 capable chips); the list of processors in +# the right-hand clause are the cores that benefit from this optimization. +# +config X86_P6_NOP + def_bool y + depends on X86_64 + depends on (MCORE2 || MPENTIUM4 || MPSC) + +config X86_TSC + def_bool y + depends on ((MWINCHIP3D || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MVIAC7 || MGEODEGX1 || MGEODE_LX || MCORE2 || MATOM) && !X86_NUMAQ) || X86_64 + +config X86_CMPXCHG64 + def_bool y + depends on X86_PAE || X86_64 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MATOM + +# this should be set for all -march=.. options where the compiler +# generates cmov. +config X86_CMOV + def_bool y + depends on (MK8 || MK7 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MCRUSOE || MEFFICEON || X86_64 || MATOM || MGEODE_LX) + +config X86_MINIMUM_CPU_FAMILY + int + default "64" if X86_64 + default "6" if X86_32 && X86_P6_NOP + default "5" if X86_32 && X86_CMPXCHG64 + default "4" if X86_32 && (X86_XADD || X86_CMPXCHG || X86_BSWAP || X86_WP_WORKS_OK) + default "3" + +config X86_DEBUGCTLMSR + def_bool y + depends on !(MK6 || MWINCHIPC6 || MWINCHIP3D || MCYRIXIII || M586MMX || M586TSC || M586 || M486 || M386) && !UML + +menuconfig PROCESSOR_SELECT + bool "Supported processor vendors" if EXPERT + ---help--- + This lets you choose what x86 vendor support code your kernel + will include. + +config CPU_SUP_INTEL + default y + bool "Support Intel processors" if PROCESSOR_SELECT + ---help--- + This enables detection, tunings and quirks for Intel processors + + You need this enabled if you want your kernel to run on an + Intel CPU. Disabling this option on other types of CPUs + makes the kernel a tiny bit smaller. Disabling it on an Intel + CPU might render the kernel unbootable. + + If unsure, say N. + +config CPU_SUP_CYRIX_32 + default y + bool "Support Cyrix processors" if PROCESSOR_SELECT + depends on M386 || M486 || M586 || M586TSC || M586MMX || (EXPERT && !64BIT) + ---help--- + This enables detection, tunings and quirks for Cyrix processors + + You need this enabled if you want your kernel to run on a + Cyrix CPU. Disabling this option on other types of CPUs + makes the kernel a tiny bit smaller. Disabling it on a Cyrix + CPU might render the kernel unbootable. + + If unsure, say N. + +config CPU_SUP_AMD + default y + bool "Support AMD processors" if PROCESSOR_SELECT + ---help--- + This enables detection, tunings and quirks for AMD processors + + You need this enabled if you want your kernel to run on an + AMD CPU. Disabling this option on other types of CPUs + makes the kernel a tiny bit smaller. Disabling it on an AMD + CPU might render the kernel unbootable. + + If unsure, say N. + +config CPU_SUP_CENTAUR + default y + bool "Support Centaur processors" if PROCESSOR_SELECT + ---help--- + This enables detection, tunings and quirks for Centaur processors + + You need this enabled if you want your kernel to run on a + Centaur CPU. Disabling this option on other types of CPUs + makes the kernel a tiny bit smaller. Disabling it on a Centaur + CPU might render the kernel unbootable. + + If unsure, say N. + +config CPU_SUP_TRANSMETA_32 + default y + bool "Support Transmeta processors" if PROCESSOR_SELECT + depends on !64BIT + ---help--- + This enables detection, tunings and quirks for Transmeta processors + + You need this enabled if you want your kernel to run on a + Transmeta CPU. Disabling this option on other types of CPUs + makes the kernel a tiny bit smaller. Disabling it on a Transmeta + CPU might render the kernel unbootable. + + If unsure, say N. + +config CPU_SUP_UMC_32 + default y + bool "Support UMC processors" if PROCESSOR_SELECT + depends on M386 || M486 || (EXPERT && !64BIT) + ---help--- + This enables detection, tunings and quirks for UMC processors + + You need this enabled if you want your kernel to run on a + UMC CPU. Disabling this option on other types of CPUs + makes the kernel a tiny bit smaller. Disabling it on a UMC + CPU might render the kernel unbootable. + + If unsure, say N. diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug new file mode 100644 index 00000000..e46c2147 --- /dev/null +++ b/arch/x86/Kconfig.debug @@ -0,0 +1,302 @@ +menu "Kernel hacking" + +config TRACE_IRQFLAGS_SUPPORT + def_bool y + +source "lib/Kconfig.debug" + +config STRICT_DEVMEM + bool "Filter access to /dev/mem" + ---help--- + If this option is disabled, you allow userspace (root) access to all + of memory, including kernel and userspace memory. Accidental + access to this is obviously disastrous, but specific access can + be used by people debugging the kernel. Note that with PAT support + enabled, even in this case there are restrictions on /dev/mem + use due to the cache aliasing requirements. + + If this option is switched on, the /dev/mem file only allows + userspace access to PCI space and the BIOS code and data regions. + This is sufficient for dosemu and X and all common users of + /dev/mem. + + If in doubt, say Y. + +config X86_VERBOSE_BOOTUP + bool "Enable verbose x86 bootup info messages" + default y + ---help--- + Enables the informational output from the decompression stage + (e.g. bzImage) of the boot. If you disable this you will still + see errors. Disable this if you want silent bootup. + +config EARLY_PRINTK + bool "Early printk" if EXPERT + default y + ---help--- + Write kernel log output directly into the VGA buffer or to a serial + port. + + This is useful for kernel debugging when your machine crashes very + early before the console code is initialized. For normal operation + it is not recommended because it looks ugly and doesn't cooperate + with klogd/syslogd or the X server. You should normally N here, + unless you want to debug such a crash. + +config EARLY_PRINTK_INTEL_MID + bool "Early printk for Intel MID platform support" + depends on EARLY_PRINTK && X86_INTEL_MID + +config EARLY_PRINTK_DBGP + bool "Early printk via EHCI debug port" + depends on EARLY_PRINTK && PCI + ---help--- + Write kernel log output directly into the EHCI debug port. + + This is useful for kernel debugging when your machine crashes very + early before the console code is initialized. For normal operation + it is not recommended because it looks ugly and doesn't cooperate + with klogd/syslogd or the X server. You should normally N here, + unless you want to debug such a crash. You need usb debug device. + +config DEBUG_STACKOVERFLOW + bool "Check for stack overflows" + depends on DEBUG_KERNEL + ---help--- + Say Y here if you want to check the overflows of kernel, IRQ + and exception stacks. This option will cause messages of the + stacks in detail when free stack space drops below a certain + limit. + If in doubt, say "N". + +config X86_PTDUMP + bool "Export kernel pagetable layout to userspace via debugfs" + depends on DEBUG_KERNEL + select DEBUG_FS + ---help--- + Say Y here if you want to show the kernel pagetable layout in a + debugfs file. This information is only useful for kernel developers + who are working in architecture specific areas of the kernel. + It is probably not a good idea to enable this feature in a production + kernel. + If in doubt, say "N" + +config DEBUG_RODATA + bool "Write protect kernel read-only data structures" + default y + depends on DEBUG_KERNEL + ---help--- + Mark the kernel read-only data as write-protected in the pagetables, + in order to catch accidental (and incorrect) writes to such const + data. This is recommended so that we can catch kernel bugs sooner. + If in doubt, say "Y". + +config DEBUG_RODATA_TEST + bool "Testcase for the DEBUG_RODATA feature" + depends on DEBUG_RODATA + default y + ---help--- + This option enables a testcase for the DEBUG_RODATA + feature as well as for the change_page_attr() infrastructure. + If in doubt, say "N" + +config DEBUG_SET_MODULE_RONX + bool "Set loadable kernel module data as NX and text as RO" + depends on MODULES + ---help--- + This option helps catch unintended modifications to loadable + kernel module's text and read-only data. It also prevents execution + of module data. Such protection may interfere with run-time code + patching and dynamic kernel tracing - and they might also protect + against certain classes of kernel exploits. + If in doubt, say "N". + +config DEBUG_NX_TEST + tristate "Testcase for the NX non-executable stack feature" + depends on DEBUG_KERNEL && m + ---help--- + This option enables a testcase for the CPU NX capability + and the software setup of this feature. + If in doubt, say "N" + +config DOUBLEFAULT + default y + bool "Enable doublefault exception handler" if EXPERT + depends on X86_32 + ---help--- + This option allows trapping of rare doublefault exceptions that + would otherwise cause a system to silently reboot. Disabling this + option saves about 4k and might cause you much additional grey + hair. + +config IOMMU_DEBUG + bool "Enable IOMMU debugging" + depends on GART_IOMMU && DEBUG_KERNEL + depends on X86_64 + ---help--- + Force the IOMMU to on even when you have less than 4GB of + memory and add debugging code. On overflow always panic. And + allow to enable IOMMU leak tracing. Can be disabled at boot + time with iommu=noforce. This will also enable scatter gather + list merging. Currently not recommended for production + code. When you use it make sure you have a big enough + IOMMU/AGP aperture. Most of the options enabled by this can + be set more finegrained using the iommu= command line + options. See Documentation/x86/x86_64/boot-options.txt for more + details. + +config IOMMU_STRESS + bool "Enable IOMMU stress-test mode" + ---help--- + This option disables various optimizations in IOMMU related + code to do real stress testing of the IOMMU code. This option + will cause a performance drop and should only be enabled for + testing. + +config IOMMU_LEAK + bool "IOMMU leak tracing" + depends on IOMMU_DEBUG && DMA_API_DEBUG + ---help--- + Add a simple leak tracer to the IOMMU code. This is useful when you + are debugging a buggy device driver that leaks IOMMU mappings. + +config HAVE_MMIOTRACE_SUPPORT + def_bool y + +config X86_DECODER_SELFTEST + bool "x86 instruction decoder selftest" + depends on DEBUG_KERNEL && KPROBES + ---help--- + Perform x86 instruction decoder selftests at build time. + This option is useful for checking the sanity of x86 instruction + decoder code. + If unsure, say "N". + +# +# IO delay types: +# + +config IO_DELAY_TYPE_0X80 + int + default "0" + +config IO_DELAY_TYPE_0XED + int + default "1" + +config IO_DELAY_TYPE_UDELAY + int + default "2" + +config IO_DELAY_TYPE_NONE + int + default "3" + +choice + prompt "IO delay type" + default IO_DELAY_0X80 + +config IO_DELAY_0X80 + bool "port 0x80 based port-IO delay [recommended]" + ---help--- + This is the traditional Linux IO delay used for in/out_p. + It is the most tested hence safest selection here. + +config IO_DELAY_0XED + bool "port 0xed based port-IO delay" + ---help--- + Use port 0xed as the IO delay. This frees up port 0x80 which is + often used as a hardware-debug port. + +config IO_DELAY_UDELAY + bool "udelay based port-IO delay" + ---help--- + Use udelay(2) as the IO delay method. This provides the delay + while not having any side-effect on the IO port space. + +config IO_DELAY_NONE + bool "no port-IO delay" + ---help--- + No port-IO delay. Will break on old boxes that require port-IO + delay for certain operations. Should work on most new machines. + +endchoice + +if IO_DELAY_0X80 +config DEFAULT_IO_DELAY_TYPE + int + default IO_DELAY_TYPE_0X80 +endif + +if IO_DELAY_0XED +config DEFAULT_IO_DELAY_TYPE + int + default IO_DELAY_TYPE_0XED +endif + +if IO_DELAY_UDELAY +config DEFAULT_IO_DELAY_TYPE + int + default IO_DELAY_TYPE_UDELAY +endif + +if IO_DELAY_NONE +config DEFAULT_IO_DELAY_TYPE + int + default IO_DELAY_TYPE_NONE +endif + +config DEBUG_BOOT_PARAMS + bool "Debug boot parameters" + depends on DEBUG_KERNEL + depends on DEBUG_FS + ---help--- + This option will cause struct boot_params to be exported via debugfs. + +config CPA_DEBUG + bool "CPA self-test code" + depends on DEBUG_KERNEL + ---help--- + Do change_page_attr() self-tests every 30 seconds. + +config OPTIMIZE_INLINING + bool "Allow gcc to uninline functions marked 'inline'" + ---help--- + This option determines if the kernel forces gcc to inline the functions + developers have marked 'inline'. Doing so takes away freedom from gcc to + do what it thinks is best, which is desirable for the gcc 3.x series of + compilers. The gcc 4.x series have a rewritten inlining algorithm and + enabling this option will generate a smaller kernel there. Hopefully + this algorithm is so good that allowing gcc 4.x and above to make the + decision will become the default in the future. Until then this option + is there to test gcc for this. + + If unsure, say N. + +config DEBUG_STRICT_USER_COPY_CHECKS + bool "Strict copy size checks" + depends on DEBUG_KERNEL && !TRACE_BRANCH_PROFILING + ---help--- + Enabling this option turns a certain set of sanity checks for user + copy operations into compile time failures. + + The copy_from_user() etc checks are there to help test if there + are sufficient security checks on the length argument of + the copy operation, by having gcc prove that the argument is + within bounds. + + If unsure, or if you run an older (pre 4.4) gcc, say N. + +config DEBUG_NMI_SELFTEST + bool "NMI Selftest" + depends on DEBUG_KERNEL && X86_LOCAL_APIC + ---help--- + Enabling this option turns on a quick NMI selftest to verify + that the NMI behaves correctly. + + This might help diagnose strange hangs that rely on NMI to + function properly. + + If unsure, say N. + +endmenu diff --git a/arch/x86/Makefile b/arch/x86/Makefile new file mode 100644 index 00000000..b1c611e6 --- /dev/null +++ b/arch/x86/Makefile @@ -0,0 +1,224 @@ +# Unified Makefile for i386 and x86_64 + +# select defconfig based on actual architecture +ifeq ($(ARCH),x86) + KBUILD_DEFCONFIG := i386_defconfig +else + KBUILD_DEFCONFIG := $(ARCH)_defconfig +endif + +# BITS is used as extension for files which are available in a 32 bit +# and a 64 bit version to simplify shared Makefiles. +# e.g.: obj-y += foo_$(BITS).o +export BITS + +ifeq ($(CONFIG_X86_32),y) + BITS := 32 + UTS_MACHINE := i386 + CHECKFLAGS += -D__i386__ + + biarch := $(call cc-option,-m32) + KBUILD_AFLAGS += $(biarch) + KBUILD_CFLAGS += $(biarch) + + ifdef CONFIG_RELOCATABLE + LDFLAGS_vmlinux := --emit-relocs + endif + + KBUILD_CFLAGS += -msoft-float -mregparm=3 -freg-struct-return + + # prevent gcc from keeping the stack 16 byte aligned + KBUILD_CFLAGS += $(call cc-option,-mpreferred-stack-boundary=2) + + # Disable unit-at-a-time mode on pre-gcc-4.0 compilers, it makes gcc use + # a lot more stack due to the lack of sharing of stacklots: + KBUILD_CFLAGS += $(call cc-ifversion, -lt, 0400, \ + $(call cc-option,-fno-unit-at-a-time)) + + # CPU-specific tuning. Anything which can be shared with UML should go here. + include $(srctree)/arch/x86/Makefile_32.cpu + KBUILD_CFLAGS += $(cflags-y) + + # temporary until string.h is fixed + KBUILD_CFLAGS += -ffreestanding +else + BITS := 64 + UTS_MACHINE := x86_64 + CHECKFLAGS += -D__x86_64__ -m64 + + KBUILD_AFLAGS += -m64 + KBUILD_CFLAGS += -m64 + + # FIXME - should be integrated in Makefile.cpu (Makefile_32.cpu) + cflags-$(CONFIG_MK8) += $(call cc-option,-march=k8) + cflags-$(CONFIG_MPSC) += $(call cc-option,-march=nocona) + + cflags-$(CONFIG_MCORE2) += \ + $(call cc-option,-march=core2,$(call cc-option,-mtune=generic)) + cflags-$(CONFIG_MATOM) += $(call cc-option,-march=atom) \ + $(call cc-option,-mtune=atom,$(call cc-option,-mtune=generic)) + cflags-$(CONFIG_GENERIC_CPU) += $(call cc-option,-mtune=generic) + KBUILD_CFLAGS += $(cflags-y) + + KBUILD_CFLAGS += -mno-red-zone + KBUILD_CFLAGS += -mcmodel=kernel + + # -funit-at-a-time shrinks the kernel .text considerably + # unfortunately it makes reading oopses harder. + KBUILD_CFLAGS += $(call cc-option,-funit-at-a-time) + + # this works around some issues with generating unwind tables in older gccs + # newer gccs do it by default + KBUILD_CFLAGS += -maccumulate-outgoing-args +endif + +ifdef CONFIG_CC_STACKPROTECTOR + cc_has_sp := $(srctree)/scripts/gcc-x86_$(BITS)-has-stack-protector.sh + ifeq ($(shell $(CONFIG_SHELL) $(cc_has_sp) $(CC) $(KBUILD_CPPFLAGS) $(biarch)),y) + stackp-y := -fstack-protector + KBUILD_CFLAGS += $(stackp-y) + else + $(warning stack protector enabled but no compiler support) + endif +endif + +ifdef CONFIG_X86_X32 + x32_ld_ok := $(call try-run,\ + /bin/echo -e '1: .quad 1b' | \ + $(CC) $(KBUILD_AFLAGS) -c -xassembler -o "$$TMP" - && \ + $(OBJCOPY) -O elf32-x86-64 "$$TMP" "$$TMPO" && \ + $(LD) -m elf32_x86_64 "$$TMPO" -o "$$TMP",y,n) + ifeq ($(x32_ld_ok),y) + CONFIG_X86_X32_ABI := y + KBUILD_AFLAGS += -DCONFIG_X86_X32_ABI + KBUILD_CFLAGS += -DCONFIG_X86_X32_ABI + else + $(warning CONFIG_X86_X32 enabled but no binutils support) + endif +endif +export CONFIG_X86_X32_ABI + +# Don't unroll struct assignments with kmemcheck enabled +ifeq ($(CONFIG_KMEMCHECK),y) + KBUILD_CFLAGS += $(call cc-option,-fno-builtin-memcpy) +endif + +# Stackpointer is addressed different for 32 bit and 64 bit x86 +sp-$(CONFIG_X86_32) := esp +sp-$(CONFIG_X86_64) := rsp + +# do binutils support CFI? +cfi := $(call as-instr,.cfi_startproc\n.cfi_rel_offset $(sp-y)$(comma)0\n.cfi_endproc,-DCONFIG_AS_CFI=1) +# is .cfi_signal_frame supported too? +cfi-sigframe := $(call as-instr,.cfi_startproc\n.cfi_signal_frame\n.cfi_endproc,-DCONFIG_AS_CFI_SIGNAL_FRAME=1) +cfi-sections := $(call as-instr,.cfi_sections .debug_frame,-DCONFIG_AS_CFI_SECTIONS=1) + +# does binutils support specific instructions? +asinstr := $(call as-instr,fxsaveq (%rax),-DCONFIG_AS_FXSAVEQ=1) + +KBUILD_AFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) $(asinstr) +KBUILD_CFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) $(asinstr) + +LDFLAGS := -m elf_$(UTS_MACHINE) + +# Speed up the build +KBUILD_CFLAGS += -pipe +# Workaround for a gcc prelease that unfortunately was shipped in a suse release +KBUILD_CFLAGS += -Wno-sign-compare +# +KBUILD_CFLAGS += -fno-asynchronous-unwind-tables +# prevent gcc from generating any FP code by mistake +KBUILD_CFLAGS += $(call cc-option,-mno-sse -mno-mmx -mno-sse2 -mno-3dnow,) +KBUILD_CFLAGS += $(call cc-option,-mno-avx,) + +KBUILD_CFLAGS += $(mflags-y) +KBUILD_AFLAGS += $(mflags-y) + +archscripts: + $(Q)$(MAKE) $(build)=arch/x86/tools relocs + +### +# Syscall table generation + +archheaders: + $(Q)$(MAKE) $(build)=arch/x86/syscalls all + +### +# Kernel objects + +head-y := arch/x86/kernel/head_$(BITS).o +head-y += arch/x86/kernel/head$(BITS).o +head-y += arch/x86/kernel/head.o +head-y += arch/x86/kernel/init_task.o + +libs-y += arch/x86/lib/ + +# See arch/x86/Kbuild for content of core part of the kernel +core-y += arch/x86/ + +# drivers-y are linked after core-y +drivers-$(CONFIG_MATH_EMULATION) += arch/x86/math-emu/ +drivers-$(CONFIG_PCI) += arch/x86/pci/ + +# must be linked after kernel/ +drivers-$(CONFIG_OPROFILE) += arch/x86/oprofile/ + +# suspend and hibernation support +drivers-$(CONFIG_PM) += arch/x86/power/ + +drivers-$(CONFIG_FB) += arch/x86/video/ + +#### +# boot loader support. Several targets are kept for legacy purposes + +boot := arch/x86/boot + +BOOT_TARGETS = bzlilo bzdisk fdimage fdimage144 fdimage288 isoimage + +PHONY += bzImage $(BOOT_TARGETS) + +# Default kernel to build +all: bzImage + +# KBUILD_IMAGE specify target image being built +KBUILD_IMAGE := $(boot)/bzImage + +bzImage: vmlinux +ifeq ($(CONFIG_X86_DECODER_SELFTEST),y) + $(Q)$(MAKE) $(build)=arch/x86/tools posttest +endif + $(Q)$(MAKE) $(build)=$(boot) $(KBUILD_IMAGE) + $(Q)mkdir -p $(objtree)/arch/$(UTS_MACHINE)/boot + $(Q)ln -fsn ../../x86/boot/bzImage $(objtree)/arch/$(UTS_MACHINE)/boot/$@ + +$(BOOT_TARGETS): vmlinux + $(Q)$(MAKE) $(build)=$(boot) $@ + +PHONY += install +install: + $(Q)$(MAKE) $(build)=$(boot) $@ + +PHONY += vdso_install +vdso_install: + $(Q)$(MAKE) $(build)=arch/x86/vdso $@ + +archclean: + $(Q)rm -rf $(objtree)/arch/i386 + $(Q)rm -rf $(objtree)/arch/x86_64 + $(Q)$(MAKE) $(clean)=$(boot) + $(Q)$(MAKE) $(clean)=arch/x86/tools + +define archhelp + echo '* bzImage - Compressed kernel image (arch/x86/boot/bzImage)' + echo ' install - Install kernel using' + echo ' (your) ~/bin/$(INSTALLKERNEL) or' + echo ' (distribution) /sbin/$(INSTALLKERNEL) or' + echo ' install to $$(INSTALL_PATH) and run lilo' + echo ' fdimage - Create 1.4MB boot floppy image (arch/x86/boot/fdimage)' + echo ' fdimage144 - Create 1.4MB boot floppy image (arch/x86/boot/fdimage)' + echo ' fdimage288 - Create 2.8MB boot floppy image (arch/x86/boot/fdimage)' + echo ' isoimage - Create a boot CD-ROM image (arch/x86/boot/image.iso)' + echo ' bzdisk/fdimage*/isoimage also accept:' + echo ' FDARGS="..." arguments for the booted kernel' + echo ' FDINITRD=file initrd for the booted kernel' +endef diff --git a/arch/x86/Makefile.um b/arch/x86/Makefile.um new file mode 100644 index 00000000..36b62bc5 --- /dev/null +++ b/arch/x86/Makefile.um @@ -0,0 +1,60 @@ +core-y += arch/x86/crypto/ + +ifeq ($(CONFIG_X86_32),y) +START := 0x8048000 + +LDFLAGS += -m elf_i386 +ELF_ARCH := i386 +ELF_FORMAT := elf32-i386 +CHECKFLAGS += -D__i386__ + +KBUILD_CFLAGS += $(call cc-option,-m32) +KBUILD_AFLAGS += $(call cc-option,-m32) +LINK-y += $(call cc-option,-m32) + +export LDFLAGS + +LDS_EXTRA := -Ui386 +export LDS_EXTRA + +# First of all, tune CFLAGS for the specific CPU. This actually sets cflags-y. +include $(srctree)/arch/x86/Makefile_32.cpu + +# prevent gcc from keeping the stack 16 byte aligned. Taken from i386. +cflags-y += $(call cc-option,-mpreferred-stack-boundary=2) + +# Prevent sprintf in nfsd from being converted to strcpy and resulting in +# an unresolved reference. +cflags-y += -ffreestanding + +# Disable unit-at-a-time mode on pre-gcc-4.0 compilers, it makes gcc use +# a lot more stack due to the lack of sharing of stacklots. Also, gcc +# 4.3.0 needs -funit-at-a-time for extern inline functions. +KBUILD_CFLAGS += $(shell if [ $(call cc-version) -lt 0400 ] ; then \ + echo $(call cc-option,-fno-unit-at-a-time); \ + else echo $(call cc-option,-funit-at-a-time); fi ;) + +KBUILD_CFLAGS += $(cflags-y) + +else + +START := 0x60000000 + +KBUILD_CFLAGS += -fno-builtin -m64 + +CHECKFLAGS += -m64 -D__x86_64__ +KBUILD_AFLAGS += -m64 +LDFLAGS += -m elf_x86_64 +KBUILD_CPPFLAGS += -m64 + +ELF_ARCH := i386:x86-64 +ELF_FORMAT := elf64-x86-64 + +# Not on all 64-bit distros /lib is a symlink to /lib64. PLD is an example. + +LINK-$(CONFIG_LD_SCRIPT_DYN) += -Wl,-rpath,/lib64 +LINK-y += -m64 + +# Do unit-at-a-time unconditionally on x86_64, following the host +KBUILD_CFLAGS += $(call cc-option,-funit-at-a-time) +endif diff --git a/arch/x86/Makefile_32.cpu b/arch/x86/Makefile_32.cpu new file mode 100644 index 00000000..86cee7b7 --- /dev/null +++ b/arch/x86/Makefile_32.cpu @@ -0,0 +1,71 @@ +# CPU tuning section - shared with UML. +# Must change only cflags-y (or [yn]), not CFLAGS! That makes a difference for UML. + +#-mtune exists since gcc 3.4 +HAS_MTUNE := $(call cc-option-yn, -mtune=i386) +ifeq ($(HAS_MTUNE),y) +tune = $(call cc-option,-mtune=$(1),$(2)) +else +tune = $(call cc-option,-mcpu=$(1),$(2)) +endif + +align := $(cc-option-align) +cflags-$(CONFIG_M386) += -march=i386 +cflags-$(CONFIG_M486) += -march=i486 +cflags-$(CONFIG_M586) += -march=i586 +cflags-$(CONFIG_M586TSC) += -march=i586 +cflags-$(CONFIG_M586MMX) += -march=pentium-mmx +cflags-$(CONFIG_M686) += -march=i686 +cflags-$(CONFIG_MPENTIUMII) += -march=i686 $(call tune,pentium2) +cflags-$(CONFIG_MPENTIUMIII) += -march=i686 $(call tune,pentium3) +cflags-$(CONFIG_MPENTIUMM) += -march=i686 $(call tune,pentium3) +cflags-$(CONFIG_MPENTIUM4) += -march=i686 $(call tune,pentium4) +cflags-$(CONFIG_MK6) += -march=k6 +# Please note, that patches that add -march=athlon-xp and friends are pointless. +# They make zero difference whatsosever to performance at this time. +cflags-$(CONFIG_MK7) += -march=athlon +cflags-$(CONFIG_MK8) += $(call cc-option,-march=k8,-march=athlon) +cflags-$(CONFIG_MCRUSOE) += -march=i686 $(align)-functions=0 $(align)-jumps=0 $(align)-loops=0 +cflags-$(CONFIG_MEFFICEON) += -march=i686 $(call tune,pentium3) $(align)-functions=0 $(align)-jumps=0 $(align)-loops=0 +cflags-$(CONFIG_MWINCHIPC6) += $(call cc-option,-march=winchip-c6,-march=i586) +cflags-$(CONFIG_MWINCHIP3D) += $(call cc-option,-march=winchip2,-march=i586) +cflags-$(CONFIG_MCYRIXIII) += $(call cc-option,-march=c3,-march=i486) $(align)-functions=0 $(align)-jumps=0 $(align)-loops=0 +cflags-$(CONFIG_MVIAC3_2) += $(call cc-option,-march=c3-2,-march=i686) +cflags-$(CONFIG_MVIAC7) += -march=i686 +cflags-$(CONFIG_MCORE2) += -march=i686 $(call tune,core2) +cflags-$(CONFIG_MATOM) += $(call cc-option,-march=atom,$(call cc-option,-march=core2,-march=i686)) \ + $(call cc-option,-mtune=atom,$(call cc-option,-mtune=generic)) + +# AMD Elan support +cflags-$(CONFIG_MELAN) += -march=i486 + +# Geode GX1 support +cflags-$(CONFIG_MGEODEGX1) += -march=pentium-mmx +cflags-$(CONFIG_MGEODE_LX) += $(call cc-option,-march=geode,-march=pentium-mmx) +# add at the end to overwrite eventual tuning options from earlier +# cpu entries +cflags-$(CONFIG_X86_GENERIC) += $(call tune,generic,$(call tune,i686)) + +# Work around the pentium-mmx code generator madness of gcc4.4.x which +# does stack alignment by generating horrible code _before_ the mcount +# prologue (push %ebp, mov %esp, %ebp) which breaks the function graph +# tracer assumptions. For i686, generic, core2 this is set by the +# compiler anyway +ifeq ($(CONFIG_FUNCTION_GRAPH_TRACER), y) +ADD_ACCUMULATE_OUTGOING_ARGS := y +endif + +# Work around to a bug with asm goto with first implementations of it +# in gcc causing gcc to mess up the push and pop of the stack in some +# uses of asm goto. +ifeq ($(CONFIG_JUMP_LABEL), y) +ADD_ACCUMULATE_OUTGOING_ARGS := y +endif + +cflags-$(ADD_ACCUMULATE_OUTGOING_ARGS) += $(call cc-option,-maccumulate-outgoing-args) + +# Bug fix for binutils: this option is required in order to keep +# binutils from generating NOPL instructions against our will. +ifneq ($(CONFIG_X86_P6_NOP),y) +cflags-y += $(call cc-option,-Wa$(comma)-mtune=generic32,) +endif diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile new file mode 100644 index 00000000..5a747dd8 --- /dev/null +++ b/arch/x86/boot/Makefile @@ -0,0 +1,194 @@ +# +# arch/x86/boot/Makefile +# +# This file is subject to the terms and conditions of the GNU General Public +# License. See the file "COPYING" in the main directory of this archive +# for more details. +# +# Copyright (C) 1994 by Linus Torvalds +# Changed by many, many contributors over the years. +# + +# If you want to preset the SVGA mode, uncomment the next line and +# set SVGA_MODE to whatever number you want. +# Set it to -DSVGA_MODE=NORMAL_VGA if you just want the EGA/VGA mode. +# The number is the same as you would ordinarily press at bootup. + +SVGA_MODE := -DSVGA_MODE=NORMAL_VGA + +targets := vmlinux.bin setup.bin setup.elf bzImage +targets += fdimage fdimage144 fdimage288 image.iso mtools.conf +subdir- := compressed + +setup-y += a20.o bioscall.o cmdline.o copy.o cpu.o cpucheck.o +setup-y += early_serial_console.o edd.o header.o main.o mca.o memory.o +setup-y += pm.o pmjump.o printf.o regs.o string.o tty.o video.o +setup-y += video-mode.o version.o +setup-$(CONFIG_X86_APM_BOOT) += apm.o + +# The link order of the video-*.o modules can matter. In particular, +# video-vga.o *must* be listed first, followed by video-vesa.o. +# Hardware-specific drivers should follow in the order they should be +# probed, and video-bios.o should typically be last. +setup-y += video-vga.o +setup-y += video-vesa.o +setup-y += video-bios.o + +targets += $(setup-y) +hostprogs-y := mkcpustr tools/build + +HOST_EXTRACFLAGS += -I$(srctree)/tools/include $(LINUXINCLUDE) \ + -D__EXPORTED_HEADERS__ + +$(obj)/cpu.o: $(obj)/cpustr.h + +quiet_cmd_cpustr = CPUSTR $@ + cmd_cpustr = $(obj)/mkcpustr > $@ +targets += cpustr.h +$(obj)/cpustr.h: $(obj)/mkcpustr FORCE + $(call if_changed,cpustr) + +# --------------------------------------------------------------------------- + +# How to compile the 16-bit code. Note we always compile for -march=i386, +# that way we can complain to the user if the CPU is insufficient. +KBUILD_CFLAGS := $(LINUXINCLUDE) -g -Os -D_SETUP -D__KERNEL__ \ + -DDISABLE_BRANCH_PROFILING \ + -Wall -Wstrict-prototypes \ + -march=i386 -mregparm=3 \ + -include $(srctree)/$(src)/code16gcc.h \ + -fno-strict-aliasing -fomit-frame-pointer \ + $(call cc-option, -ffreestanding) \ + $(call cc-option, -fno-toplevel-reorder,\ + $(call cc-option, -fno-unit-at-a-time)) \ + $(call cc-option, -fno-stack-protector) \ + $(call cc-option, -mpreferred-stack-boundary=2) +KBUILD_CFLAGS += $(call cc-option, -m32) +KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__ +GCOV_PROFILE := n + +$(obj)/bzImage: asflags-y := $(SVGA_MODE) + +quiet_cmd_image = BUILD $@ +cmd_image = $(obj)/tools/build $(obj)/setup.bin $(obj)/vmlinux.bin > $@ + +$(obj)/bzImage: $(obj)/setup.bin $(obj)/vmlinux.bin $(obj)/tools/build FORCE + $(call if_changed,image) + @echo 'Kernel: $@ is ready' ' (#'`cat .version`')' + +OBJCOPYFLAGS_vmlinux.bin := -O binary -R .note -R .comment -S +$(obj)/vmlinux.bin: $(obj)/compressed/vmlinux FORCE + $(call if_changed,objcopy) + +SETUP_OBJS = $(addprefix $(obj)/,$(setup-y)) + +sed-voffset := -e 's/^\([0-9a-fA-F]*\) . \(_text\|_end\)$$/\#define VO_\2 0x\1/p' + +quiet_cmd_voffset = VOFFSET $@ + cmd_voffset = $(NM) $< | sed -n $(sed-voffset) > $@ + +targets += voffset.h +$(obj)/voffset.h: vmlinux FORCE + $(call if_changed,voffset) + +sed-zoffset := -e 's/^\([0-9a-fA-F]*\) . \(startup_32\|input_data\|_end\|z_.*\)$$/\#define ZO_\2 0x\1/p' + +quiet_cmd_zoffset = ZOFFSET $@ + cmd_zoffset = $(NM) $< | sed -n $(sed-zoffset) > $@ + +targets += zoffset.h +$(obj)/zoffset.h: $(obj)/compressed/vmlinux FORCE + $(call if_changed,zoffset) + + +AFLAGS_header.o += -I$(obj) +$(obj)/header.o: $(obj)/voffset.h $(obj)/zoffset.h + +LDFLAGS_setup.elf := -T +$(obj)/setup.elf: $(src)/setup.ld $(SETUP_OBJS) FORCE + $(call if_changed,ld) + +OBJCOPYFLAGS_setup.bin := -O binary +$(obj)/setup.bin: $(obj)/setup.elf FORCE + $(call if_changed,objcopy) + +$(obj)/compressed/vmlinux: FORCE + $(Q)$(MAKE) $(build)=$(obj)/compressed $@ + +# Set this if you want to pass append arguments to the +# bzdisk/fdimage/isoimage kernel +FDARGS = +# Set this if you want an initrd included with the +# bzdisk/fdimage/isoimage kernel +FDINITRD = + +image_cmdline = default linux $(FDARGS) $(if $(FDINITRD),initrd=initrd.img,) + +$(obj)/mtools.conf: $(src)/mtools.conf.in + sed -e 's|@OBJ@|$(obj)|g' < $< > $@ + +# This requires write access to /dev/fd0 +bzdisk: $(obj)/bzImage $(obj)/mtools.conf + MTOOLSRC=$(obj)/mtools.conf mformat a: ; sync + syslinux /dev/fd0 ; sync + echo '$(image_cmdline)' | \ + MTOOLSRC=$(src)/mtools.conf mcopy - a:syslinux.cfg + if [ -f '$(FDINITRD)' ] ; then \ + MTOOLSRC=$(obj)/mtools.conf mcopy '$(FDINITRD)' a:initrd.img ; \ + fi + MTOOLSRC=$(obj)/mtools.conf mcopy $(obj)/bzImage a:linux ; sync + +# These require being root or having syslinux 2.02 or higher installed +fdimage fdimage144: $(obj)/bzImage $(obj)/mtools.conf + dd if=/dev/zero of=$(obj)/fdimage bs=1024 count=1440 + MTOOLSRC=$(obj)/mtools.conf mformat v: ; sync + syslinux $(obj)/fdimage ; sync + echo '$(image_cmdline)' | \ + MTOOLSRC=$(obj)/mtools.conf mcopy - v:syslinux.cfg + if [ -f '$(FDINITRD)' ] ; then \ + MTOOLSRC=$(obj)/mtools.conf mcopy '$(FDINITRD)' v:initrd.img ; \ + fi + MTOOLSRC=$(obj)/mtools.conf mcopy $(obj)/bzImage v:linux ; sync + +fdimage288: $(obj)/bzImage $(obj)/mtools.conf + dd if=/dev/zero of=$(obj)/fdimage bs=1024 count=2880 + MTOOLSRC=$(obj)/mtools.conf mformat w: ; sync + syslinux $(obj)/fdimage ; sync + echo '$(image_cmdline)' | \ + MTOOLSRC=$(obj)/mtools.conf mcopy - w:syslinux.cfg + if [ -f '$(FDINITRD)' ] ; then \ + MTOOLSRC=$(obj)/mtools.conf mcopy '$(FDINITRD)' w:initrd.img ; \ + fi + MTOOLSRC=$(obj)/mtools.conf mcopy $(obj)/bzImage w:linux ; sync + +isoimage: $(obj)/bzImage + -rm -rf $(obj)/isoimage + mkdir $(obj)/isoimage + for i in lib lib64 share end ; do \ + if [ -f /usr/$$i/syslinux/isolinux.bin ] ; then \ + cp /usr/$$i/syslinux/isolinux.bin $(obj)/isoimage ; \ + break ; \ + fi ; \ + if [ $$i = end ] ; then exit 1 ; fi ; \ + done + cp $(obj)/bzImage $(obj)/isoimage/linux + echo '$(image_cmdline)' > $(obj)/isoimage/isolinux.cfg + if [ -f '$(FDINITRD)' ] ; then \ + cp '$(FDINITRD)' $(obj)/isoimage/initrd.img ; \ + fi + mkisofs -J -r -o $(obj)/image.iso -b isolinux.bin -c boot.cat \ + -no-emul-boot -boot-load-size 4 -boot-info-table \ + $(obj)/isoimage + isohybrid $(obj)/image.iso 2>/dev/null || true + rm -rf $(obj)/isoimage + +bzlilo: $(obj)/bzImage + if [ -f $(INSTALL_PATH)/vmlinuz ]; then mv $(INSTALL_PATH)/vmlinuz $(INSTALL_PATH)/vmlinuz.old; fi + if [ -f $(INSTALL_PATH)/System.map ]; then mv $(INSTALL_PATH)/System.map $(INSTALL_PATH)/System.old; fi + cat $(obj)/bzImage > $(INSTALL_PATH)/vmlinuz + cp System.map $(INSTALL_PATH)/ + if [ -x /sbin/lilo ]; then /sbin/lilo; else /etc/lilo/install; fi + +install: + sh $(srctree)/$(src)/install.sh $(KERNELRELEASE) $(obj)/bzImage \ + System.map "$(INSTALL_PATH)" diff --git a/arch/x86/boot/a20.c b/arch/x86/boot/a20.c new file mode 100644 index 00000000..64a31a6d --- /dev/null +++ b/arch/x86/boot/a20.c @@ -0,0 +1,165 @@ +/* -*- linux-c -*- ------------------------------------------------------- * + * + * Copyright (C) 1991, 1992 Linus Torvalds + * Copyright 2007-2008 rPath, Inc. - All Rights Reserved + * Copyright 2009 Intel Corporation; author H. Peter Anvin + * + * This file is part of the Linux kernel, and is made available under + * the terms of the GNU General Public License version 2. + * + * ----------------------------------------------------------------------- */ + +/* + * Enable A20 gate (return -1 on failure) + */ + +#include "boot.h" + +#define MAX_8042_LOOPS 100000 +#define MAX_8042_FF 32 + +static int empty_8042(void) +{ + u8 status; + int loops = MAX_8042_LOOPS; + int ffs = MAX_8042_FF; + + while (loops--) { + io_delay(); + + status = inb(0x64); + if (status == 0xff) { + /* FF is a plausible, but very unlikely status */ + if (!--ffs) + return -1; /* Assume no KBC present */ + } + if (status & 1) { + /* Read and discard input data */ + io_delay(); + (void)inb(0x60); + } else if (!(status & 2)) { + /* Buffers empty, finished! */ + return 0; + } + } + + return -1; +} + +/* Returns nonzero if the A20 line is enabled. The memory address + used as a test is the int $0x80 vector, which should be safe. */ + +#define A20_TEST_ADDR (4*0x80) +#define A20_TEST_SHORT 32 +#define A20_TEST_LONG 2097152 /* 2^21 */ + +static int a20_test(int loops) +{ + int ok = 0; + int saved, ctr; + + set_fs(0x0000); + set_gs(0xffff); + + saved = ctr = rdfs32(A20_TEST_ADDR); + + while (loops--) { + wrfs32(++ctr, A20_TEST_ADDR); + io_delay(); /* Serialize and make delay constant */ + ok = rdgs32(A20_TEST_ADDR+0x10) ^ ctr; + if (ok) + break; + } + + wrfs32(saved, A20_TEST_ADDR); + return ok; +} + +/* Quick test to see if A20 is already enabled */ +static int a20_test_short(void) +{ + return a20_test(A20_TEST_SHORT); +} + +/* Longer test that actually waits for A20 to come on line; this + is useful when dealing with the KBC or other slow external circuitry. */ +static int a20_test_long(void) +{ + return a20_test(A20_TEST_LONG); +} + +static void enable_a20_bios(void) +{ + struct biosregs ireg; + + initregs(&ireg); + ireg.ax = 0x2401; + intcall(0x15, &ireg, NULL); +} + +static void enable_a20_kbc(void) +{ + empty_8042(); + + outb(0xd1, 0x64); /* Command write */ + empty_8042(); + + outb(0xdf, 0x60); /* A20 on */ + empty_8042(); + + outb(0xff, 0x64); /* Null command, but UHCI wants it */ + empty_8042(); +} + +static void enable_a20_fast(void) +{ + u8 port_a; + + port_a = inb(0x92); /* Configuration port A */ + port_a |= 0x02; /* Enable A20 */ + port_a &= ~0x01; /* Do not reset machine */ + outb(port_a, 0x92); +} + +/* + * Actual routine to enable A20; return 0 on ok, -1 on failure + */ + +#define A20_ENABLE_LOOPS 255 /* Number of times to try */ + +int enable_a20(void) +{ + int loops = A20_ENABLE_LOOPS; + int kbc_err; + + while (loops--) { + /* First, check to see if A20 is already enabled + (legacy free, etc.) */ + if (a20_test_short()) + return 0; + + /* Next, try the BIOS (INT 0x15, AX=0x2401) */ + enable_a20_bios(); + if (a20_test_short()) + return 0; + + /* Try enabling A20 through the keyboard controller */ + kbc_err = empty_8042(); + + if (a20_test_short()) + return 0; /* BIOS worked, but with delayed reaction */ + + if (!kbc_err) { + enable_a20_kbc(); + if (a20_test_long()) + return 0; + } + + /* Finally, try enabling the "fast A20 gate" */ + enable_a20_fast(); + if (a20_test_long()) + return 0; + } + + return -1; +} diff --git a/arch/x86/boot/apm.c b/arch/x86/boot/apm.c new file mode 100644 index 00000000..ee274834 --- /dev/null +++ b/arch/x86/boot/apm.c @@ -0,0 +1,75 @@ +/* -*- linux-c -*- ------------------------------------------------------- * + * + * Copyright (C) 1991, 1992 Linus Torvalds + * Copyright 2007 rPath, Inc. - All Rights Reserved + * Copyright 2009 Intel Corporation; author H. Peter Anvin + * + * Original APM BIOS checking by Stephen Rothwell, May 1994 + * (sfr@canb.auug.org.au) + * + * This file is part of the Linux kernel, and is made available under + * the terms of the GNU General Public License version 2. + * + * ----------------------------------------------------------------------- */ + +/* + * Get APM BIOS information + */ + +#include "boot.h" + +int query_apm_bios(void) +{ + struct biosregs ireg, oreg; + + /* APM BIOS installation check */ + initregs(&ireg); + ireg.ah = 0x53; + intcall(0x15, &ireg, &oreg); + + if (oreg.flags & X86_EFLAGS_CF) + return -1; /* No APM BIOS */ + + if (oreg.bx != 0x504d) /* "PM" signature */ + return -1; + + if (!(oreg.cx & 0x02)) /* 32 bits supported? */ + return -1; + + /* Disconnect first, just in case */ + ireg.al = 0x04; + intcall(0x15, &ireg, NULL); + + /* 32-bit connect */ + ireg.al = 0x03; + intcall(0x15, &ireg, &oreg); + + boot_params.apm_bios_info.cseg = oreg.ax; + boot_params.apm_bios_info.offset = oreg.ebx; + boot_params.apm_bios_info.cseg_16 = oreg.cx; + boot_params.apm_bios_info.dseg = oreg.dx; + boot_params.apm_bios_info.cseg_len = oreg.si; + boot_params.apm_bios_info.cseg_16_len = oreg.hsi; + boot_params.apm_bios_info.dseg_len = oreg.di; + + if (oreg.flags & X86_EFLAGS_CF) + return -1; + + /* Redo the installation check as the 32-bit connect; + some BIOSes return different flags this way... */ + + ireg.al = 0x00; + intcall(0x15, &ireg, &oreg); + + if ((oreg.eflags & X86_EFLAGS_CF) || oreg.bx != 0x504d) { + /* Failure with 32-bit connect, try to disconect and ignore */ + ireg.al = 0x04; + intcall(0x15, &ireg, NULL); + return -1; + } + + boot_params.apm_bios_info.version = oreg.ax; + boot_params.apm_bios_info.flags = oreg.cx; + return 0; +} + diff --git a/arch/x86/boot/bioscall.S b/arch/x86/boot/bioscall.S new file mode 100644 index 00000000..1dfbf64e --- /dev/null +++ b/arch/x86/boot/bioscall.S @@ -0,0 +1,82 @@ +/* ----------------------------------------------------------------------- + * + * Copyright 2009 Intel Corporation; author H. Peter Anvin + * + * This file is part of the Linux kernel, and is made available under + * the terms of the GNU General Public License version 2 or (at your + * option) any later version; incorporated herein by reference. + * + * ----------------------------------------------------------------------- */ + +/* + * "Glove box" for BIOS calls. Avoids the constant problems with BIOSes + * touching registers they shouldn't be. + */ + + .code16gcc + .text + .globl intcall + .type intcall, @function +intcall: + /* Self-modify the INT instruction. Ugly, but works. */ + cmpb %al, 3f + je 1f + movb %al, 3f + jmp 1f /* Synchronize pipeline */ +1: + /* Save state */ + pushfl + pushw %fs + pushw %gs + pushal + + /* Copy input state to stack frame */ + subw $44, %sp + movw %dx, %si + movw %sp, %di + movw $11, %cx + rep; movsd + + /* Pop full state from the stack */ + popal + popw %gs + popw %fs + popw %es + popw %ds + popfl + + /* Actual INT */ + .byte 0xcd /* INT opcode */ +3: .byte 0 + + /* Push full state to the stack */ + pushfl + pushw %ds + pushw %es + pushw %fs + pushw %gs + pushal + + /* Re-establish C environment invariants */ + cld + movzwl %sp, %esp + movw %cs, %ax + movw %ax, %ds + movw %ax, %es + + /* Copy output state from stack frame */ + movw 68(%esp), %di /* Original %cx == 3rd argument */ + andw %di, %di + jz 4f + movw %sp, %si + movw $11, %cx + rep; movsd +4: addw $44, %sp + + /* Restore state and return */ + popal + popw %gs + popw %fs + popfl + retl + .size intcall, .-intcall diff --git a/arch/x86/boot/bitops.h b/arch/x86/boot/bitops.h new file mode 100644 index 00000000..878e4b99 --- /dev/null +++ b/arch/x86/boot/bitops.h @@ -0,0 +1,43 @@ +/* -*- linux-c -*- ------------------------------------------------------- * + * + * Copyright (C) 1991, 1992 Linus Torvalds + * Copyright 2007 rPath, Inc. - All Rights Reserved + * + * This file is part of the Linux kernel, and is made available under + * the terms of the GNU General Public License version 2. + * + * ----------------------------------------------------------------------- */ + +/* + * Very simple bitops for the boot code. + */ + +#ifndef BOOT_BITOPS_H +#define BOOT_BITOPS_H +#define _LINUX_BITOPS_H /* Inhibit inclusion of */ + +static inline int constant_test_bit(int nr, const void *addr) +{ + const u32 *p = (const u32 *)addr; + return ((1UL << (nr & 31)) & (p[nr >> 5])) != 0; +} +static inline int variable_test_bit(int nr, const void *addr) +{ + u8 v; + const u32 *p = (const u32 *)addr; + + asm("btl %2,%1; setc %0" : "=qm" (v) : "m" (*p), "Ir" (nr)); + return v; +} + +#define test_bit(nr,addr) \ +(__builtin_constant_p(nr) ? \ + constant_test_bit((nr),(addr)) : \ + variable_test_bit((nr),(addr))) + +static inline void set_bit(int nr, void *addr) +{ + asm("btsl %1,%0" : "+m" (*(u32 *)addr) : "Ir" (nr)); +} + +#endif /* BOOT_BITOPS_H */ diff --git a/arch/x86/boot/boot.h b/arch/x86/boot/boot.h new file mode 100644 index 00000000..18997e5a --- /dev/null +++ b/arch/x86/boot/boot.h @@ -0,0 +1,369 @@ +/* -*- linux-c -*- ------------------------------------------------------- * + * + * Copyright (C) 1991, 1992 Linus Torvalds + * Copyright 2007 rPath, Inc. - All Rights Reserved + * Copyright 2009 Intel Corporation; author H. Peter Anvin + * + * This file is part of the Linux kernel, and is made available under + * the terms of the GNU General Public License version 2. + * + * ----------------------------------------------------------------------- */ + +/* + * Header file for the real-mode kernel code + */ + +#ifndef BOOT_BOOT_H +#define BOOT_BOOT_H + +#define STACK_SIZE 512 /* Minimum number of bytes for stack */ + +#ifndef __ASSEMBLY__ + +#include +#include +#include +#include +#include +#include "bitops.h" +#include +#include +#include "ctype.h" + +/* Useful macros */ +#define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)])) + +#define ARRAY_SIZE(x) (sizeof(x) / sizeof(*(x))) + +extern struct setup_header hdr; +extern struct boot_params boot_params; + +#define cpu_relax() asm volatile("rep; nop") + +/* Basic port I/O */ +static inline void outb(u8 v, u16 port) +{ + asm volatile("outb %0,%1" : : "a" (v), "dN" (port)); +} +static inline u8 inb(u16 port) +{ + u8 v; + asm volatile("inb %1,%0" : "=a" (v) : "dN" (port)); + return v; +} + +static inline void outw(u16 v, u16 port) +{ + asm volatile("outw %0,%1" : : "a" (v), "dN" (port)); +} +static inline u16 inw(u16 port) +{ + u16 v; + asm volatile("inw %1,%0" : "=a" (v) : "dN" (port)); + return v; +} + +static inline void outl(u32 v, u16 port) +{ + asm volatile("outl %0,%1" : : "a" (v), "dN" (port)); +} +static inline u32 inl(u16 port) +{ + u32 v; + asm volatile("inl %1,%0" : "=a" (v) : "dN" (port)); + return v; +} + +static inline void io_delay(void) +{ + const u16 DELAY_PORT = 0x80; + asm volatile("outb %%al,%0" : : "dN" (DELAY_PORT)); +} + +/* These functions are used to reference data in other segments. */ + +static inline u16 ds(void) +{ + u16 seg; + asm("movw %%ds,%0" : "=rm" (seg)); + return seg; +} + +static inline void set_fs(u16 seg) +{ + asm volatile("movw %0,%%fs" : : "rm" (seg)); +} +static inline u16 fs(void) +{ + u16 seg; + asm volatile("movw %%fs,%0" : "=rm" (seg)); + return seg; +} + +static inline void set_gs(u16 seg) +{ + asm volatile("movw %0,%%gs" : : "rm" (seg)); +} +static inline u16 gs(void) +{ + u16 seg; + asm volatile("movw %%gs,%0" : "=rm" (seg)); + return seg; +} + +typedef unsigned int addr_t; + +static inline u8 rdfs8(addr_t addr) +{ + u8 v; + asm volatile("movb %%fs:%1,%0" : "=q" (v) : "m" (*(u8 *)addr)); + return v; +} +static inline u16 rdfs16(addr_t addr) +{ + u16 v; + asm volatile("movw %%fs:%1,%0" : "=r" (v) : "m" (*(u16 *)addr)); + return v; +} +static inline u32 rdfs32(addr_t addr) +{ + u32 v; + asm volatile("movl %%fs:%1,%0" : "=r" (v) : "m" (*(u32 *)addr)); + return v; +} + +static inline void wrfs8(u8 v, addr_t addr) +{ + asm volatile("movb %1,%%fs:%0" : "+m" (*(u8 *)addr) : "qi" (v)); +} +static inline void wrfs16(u16 v, addr_t addr) +{ + asm volatile("movw %1,%%fs:%0" : "+m" (*(u16 *)addr) : "ri" (v)); +} +static inline void wrfs32(u32 v, addr_t addr) +{ + asm volatile("movl %1,%%fs:%0" : "+m" (*(u32 *)addr) : "ri" (v)); +} + +static inline u8 rdgs8(addr_t addr) +{ + u8 v; + asm volatile("movb %%gs:%1,%0" : "=q" (v) : "m" (*(u8 *)addr)); + return v; +} +static inline u16 rdgs16(addr_t addr) +{ + u16 v; + asm volatile("movw %%gs:%1,%0" : "=r" (v) : "m" (*(u16 *)addr)); + return v; +} +static inline u32 rdgs32(addr_t addr) +{ + u32 v; + asm volatile("movl %%gs:%1,%0" : "=r" (v) : "m" (*(u32 *)addr)); + return v; +} + +static inline void wrgs8(u8 v, addr_t addr) +{ + asm volatile("movb %1,%%gs:%0" : "+m" (*(u8 *)addr) : "qi" (v)); +} +static inline void wrgs16(u16 v, addr_t addr) +{ + asm volatile("movw %1,%%gs:%0" : "+m" (*(u16 *)addr) : "ri" (v)); +} +static inline void wrgs32(u32 v, addr_t addr) +{ + asm volatile("movl %1,%%gs:%0" : "+m" (*(u32 *)addr) : "ri" (v)); +} + +/* Note: these only return true/false, not a signed return value! */ +static inline int memcmp(const void *s1, const void *s2, size_t len) +{ + u8 diff; + asm("repe; cmpsb; setnz %0" + : "=qm" (diff), "+D" (s1), "+S" (s2), "+c" (len)); + return diff; +} + +static inline int memcmp_fs(const void *s1, addr_t s2, size_t len) +{ + u8 diff; + asm volatile("fs; repe; cmpsb; setnz %0" + : "=qm" (diff), "+D" (s1), "+S" (s2), "+c" (len)); + return diff; +} +static inline int memcmp_gs(const void *s1, addr_t s2, size_t len) +{ + u8 diff; + asm volatile("gs; repe; cmpsb; setnz %0" + : "=qm" (diff), "+D" (s1), "+S" (s2), "+c" (len)); + return diff; +} + +/* Heap -- available for dynamic lists. */ +extern char _end[]; +extern char *HEAP; +extern char *heap_end; +#define RESET_HEAP() ((void *)( HEAP = _end )) +static inline char *__get_heap(size_t s, size_t a, size_t n) +{ + char *tmp; + + HEAP = (char *)(((size_t)HEAP+(a-1)) & ~(a-1)); + tmp = HEAP; + HEAP += s*n; + return tmp; +} +#define GET_HEAP(type, n) \ + ((type *)__get_heap(sizeof(type),__alignof__(type),(n))) + +static inline bool heap_free(size_t n) +{ + return (int)(heap_end-HEAP) >= (int)n; +} + +/* copy.S */ + +void copy_to_fs(addr_t dst, void *src, size_t len); +void *copy_from_fs(void *dst, addr_t src, size_t len); +void copy_to_gs(addr_t dst, void *src, size_t len); +void *copy_from_gs(void *dst, addr_t src, size_t len); +void *memcpy(void *dst, void *src, size_t len); +void *memset(void *dst, int c, size_t len); + +#define memcpy(d,s,l) __builtin_memcpy(d,s,l) +#define memset(d,c,l) __builtin_memset(d,c,l) + +/* a20.c */ +int enable_a20(void); + +/* apm.c */ +int query_apm_bios(void); + +/* bioscall.c */ +struct biosregs { + union { + struct { + u32 edi; + u32 esi; + u32 ebp; + u32 _esp; + u32 ebx; + u32 edx; + u32 ecx; + u32 eax; + u32 _fsgs; + u32 _dses; + u32 eflags; + }; + struct { + u16 di, hdi; + u16 si, hsi; + u16 bp, hbp; + u16 _sp, _hsp; + u16 bx, hbx; + u16 dx, hdx; + u16 cx, hcx; + u16 ax, hax; + u16 gs, fs; + u16 es, ds; + u16 flags, hflags; + }; + struct { + u8 dil, dih, edi2, edi3; + u8 sil, sih, esi2, esi3; + u8 bpl, bph, ebp2, ebp3; + u8 _spl, _sph, _esp2, _esp3; + u8 bl, bh, ebx2, ebx3; + u8 dl, dh, edx2, edx3; + u8 cl, ch, ecx2, ecx3; + u8 al, ah, eax2, eax3; + }; + }; +}; +void intcall(u8 int_no, const struct biosregs *ireg, struct biosregs *oreg); + +/* cmdline.c */ +int __cmdline_find_option(u32 cmdline_ptr, const char *option, char *buffer, int bufsize); +int __cmdline_find_option_bool(u32 cmdline_ptr, const char *option); +static inline int cmdline_find_option(const char *option, char *buffer, int bufsize) +{ + return __cmdline_find_option(boot_params.hdr.cmd_line_ptr, option, buffer, bufsize); +} + +static inline int cmdline_find_option_bool(const char *option) +{ + return __cmdline_find_option_bool(boot_params.hdr.cmd_line_ptr, option); +} + + +/* cpu.c, cpucheck.c */ +struct cpu_features { + int level; /* Family, or 64 for x86-64 */ + int model; + u32 flags[NCAPINTS]; +}; +extern struct cpu_features cpu; +int check_cpu(int *cpu_level_ptr, int *req_level_ptr, u32 **err_flags_ptr); +int validate_cpu(void); + +/* early_serial_console.c */ +extern int early_serial_base; +void console_init(void); + +/* edd.c */ +void query_edd(void); + +/* header.S */ +void __attribute__((noreturn)) die(void); + +/* mca.c */ +int query_mca(void); + +/* memory.c */ +int detect_memory(void); + +/* pm.c */ +void __attribute__((noreturn)) go_to_protected_mode(void); + +/* pmjump.S */ +void __attribute__((noreturn)) + protected_mode_jump(u32 entrypoint, u32 bootparams); + +/* printf.c */ +int sprintf(char *buf, const char *fmt, ...); +int vsprintf(char *buf, const char *fmt, va_list args); +int printf(const char *fmt, ...); + +/* regs.c */ +void initregs(struct biosregs *regs); + +/* string.c */ +int strcmp(const char *str1, const char *str2); +int strncmp(const char *cs, const char *ct, size_t count); +size_t strnlen(const char *s, size_t maxlen); +unsigned int atou(const char *s); +unsigned long long simple_strtoull(const char *cp, char **endp, unsigned int base); + +/* tty.c */ +void puts(const char *); +void putchar(int); +int getchar(void); +void kbd_flush(void); +int getchar_timeout(void); + +/* video.c */ +void set_video(void); + +/* video-mode.c */ +int set_mode(u16 mode); +int mode_defined(u16 mode); +void probe_cards(int unsafe); + +/* video-vesa.c */ +void vesa_store_edid(void); + +#endif /* __ASSEMBLY__ */ + +#endif /* BOOT_BOOT_H */ diff --git a/arch/x86/boot/cmdline.c b/arch/x86/boot/cmdline.c new file mode 100644 index 00000000..6b3b6f70 --- /dev/null +++ b/arch/x86/boot/cmdline.c @@ -0,0 +1,158 @@ +/* -*- linux-c -*- ------------------------------------------------------- * + * + * Copyright (C) 1991, 1992 Linus Torvalds + * Copyright 2007 rPath, Inc. - All Rights Reserved + * + * This file is part of the Linux kernel, and is made available under + * the terms of the GNU General Public License version 2. + * + * ----------------------------------------------------------------------- */ + +/* + * Simple command-line parser for early boot. + */ + +#include "boot.h" + +static inline int myisspace(u8 c) +{ + return c <= ' '; /* Close enough approximation */ +} + +/* + * Find a non-boolean option, that is, "option=argument". In accordance + * with standard Linux practice, if this option is repeated, this returns + * the last instance on the command line. + * + * Returns the length of the argument (regardless of if it was + * truncated to fit in the buffer), or -1 on not found. + */ +int __cmdline_find_option(u32 cmdline_ptr, const char *option, char *buffer, int bufsize) +{ + addr_t cptr; + char c; + int len = -1; + const char *opptr = NULL; + char *bufptr = buffer; + enum { + st_wordstart, /* Start of word/after whitespace */ + st_wordcmp, /* Comparing this word */ + st_wordskip, /* Miscompare, skip */ + st_bufcpy /* Copying this to buffer */ + } state = st_wordstart; + + if (!cmdline_ptr || cmdline_ptr >= 0x100000) + return -1; /* No command line, or inaccessible */ + + cptr = cmdline_ptr & 0xf; + set_fs(cmdline_ptr >> 4); + + while (cptr < 0x10000 && (c = rdfs8(cptr++))) { + switch (state) { + case st_wordstart: + if (myisspace(c)) + break; + + /* else */ + state = st_wordcmp; + opptr = option; + /* fall through */ + + case st_wordcmp: + if (c == '=' && !*opptr) { + len = 0; + bufptr = buffer; + state = st_bufcpy; + } else if (myisspace(c)) { + state = st_wordstart; + } else if (c != *opptr++) { + state = st_wordskip; + } + break; + + case st_wordskip: + if (myisspace(c)) + state = st_wordstart; + break; + + case st_bufcpy: + if (myisspace(c)) { + state = st_wordstart; + } else { + if (len < bufsize-1) + *bufptr++ = c; + len++; + } + break; + } + } + + if (bufsize) + *bufptr = '\0'; + + return len; +} + +/* + * Find a boolean option (like quiet,noapic,nosmp....) + * + * Returns the position of that option (starts counting with 1) + * or 0 on not found + */ +int __cmdline_find_option_bool(u32 cmdline_ptr, const char *option) +{ + addr_t cptr; + char c; + int pos = 0, wstart = 0; + const char *opptr = NULL; + enum { + st_wordstart, /* Start of word/after whitespace */ + st_wordcmp, /* Comparing this word */ + st_wordskip, /* Miscompare, skip */ + } state = st_wordstart; + + if (!cmdline_ptr || cmdline_ptr >= 0x100000) + return -1; /* No command line, or inaccessible */ + + cptr = cmdline_ptr & 0xf; + set_fs(cmdline_ptr >> 4); + + while (cptr < 0x10000) { + c = rdfs8(cptr++); + pos++; + + switch (state) { + case st_wordstart: + if (!c) + return 0; + else if (myisspace(c)) + break; + + state = st_wordcmp; + opptr = option; + wstart = pos; + /* fall through */ + + case st_wordcmp: + if (!*opptr) + if (!c || myisspace(c)) + return wstart; + else + state = st_wordskip; + else if (!c) + return 0; + else if (c != *opptr++) + state = st_wordskip; + break; + + case st_wordskip: + if (!c) + return 0; + else if (myisspace(c)) + state = st_wordstart; + break; + } + } + + return 0; /* Buffer overrun */ +} diff --git a/arch/x86/boot/code16gcc.h b/arch/x86/boot/code16gcc.h new file mode 100644 index 00000000..d93e4801 --- /dev/null +++ b/arch/x86/boot/code16gcc.h @@ -0,0 +1,15 @@ +/* + * code16gcc.h + * + * This file is -include'd when compiling 16-bit C code. + * Note: this asm() needs to be emitted before gcc emits any code. + * Depending on gcc version, this requires -fno-unit-at-a-time or + * -fno-toplevel-reorder. + * + * Hopefully gcc will eventually have a real -m16 option so we can + * drop this hack long term. + */ + +#ifndef __ASSEMBLY__ +asm(".code16gcc"); +#endif diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile new file mode 100644 index 00000000..e398bb5d --- /dev/null +++ b/arch/x86/boot/compressed/Makefile @@ -0,0 +1,76 @@ +# +# linux/arch/x86/boot/compressed/Makefile +# +# create a compressed vmlinux image from the original vmlinux +# + +targets := vmlinux.lds vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 vmlinux.bin.lzma vmlinux.bin.xz vmlinux.bin.lzo head_$(BITS).o misc.o string.o cmdline.o early_serial_console.o piggy.o + +KBUILD_CFLAGS := -m$(BITS) -D__KERNEL__ $(LINUX_INCLUDE) -O2 +KBUILD_CFLAGS += -fno-strict-aliasing -fPIC +KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING +cflags-$(CONFIG_X86_32) := -march=i386 +cflags-$(CONFIG_X86_64) := -mcmodel=small +KBUILD_CFLAGS += $(cflags-y) +KBUILD_CFLAGS += $(call cc-option,-ffreestanding) +KBUILD_CFLAGS += $(call cc-option,-fno-stack-protector) + +KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__ +GCOV_PROFILE := n + +LDFLAGS := -m elf_$(UTS_MACHINE) +LDFLAGS_vmlinux := -T + +hostprogs-y := mkpiggy +HOST_EXTRACFLAGS += -I$(srctree)/tools/include + +VMLINUX_OBJS = $(obj)/vmlinux.lds $(obj)/head_$(BITS).o $(obj)/misc.o \ + $(obj)/string.o $(obj)/cmdline.o $(obj)/early_serial_console.o \ + $(obj)/piggy.o + +ifeq ($(CONFIG_EFI_STUB), y) + VMLINUX_OBJS += $(obj)/eboot.o $(obj)/efi_stub_$(BITS).o +endif + +$(obj)/vmlinux: $(VMLINUX_OBJS) FORCE + $(call if_changed,ld) + @: + +OBJCOPYFLAGS_vmlinux.bin := -R .comment -S +$(obj)/vmlinux.bin: vmlinux FORCE + $(call if_changed,objcopy) + +targets += vmlinux.bin.all vmlinux.relocs + +CMD_RELOCS = arch/x86/tools/relocs +quiet_cmd_relocs = RELOCS $@ + cmd_relocs = $(CMD_RELOCS) $< > $@;$(CMD_RELOCS) --abs-relocs $< +$(obj)/vmlinux.relocs: vmlinux FORCE + $(call if_changed,relocs) + +vmlinux.bin.all-y := $(obj)/vmlinux.bin +vmlinux.bin.all-$(CONFIG_X86_NEED_RELOCS) += $(obj)/vmlinux.relocs + +$(obj)/vmlinux.bin.gz: $(vmlinux.bin.all-y) FORCE + $(call if_changed,gzip) +$(obj)/vmlinux.bin.bz2: $(vmlinux.bin.all-y) FORCE + $(call if_changed,bzip2) +$(obj)/vmlinux.bin.lzma: $(vmlinux.bin.all-y) FORCE + $(call if_changed,lzma) +$(obj)/vmlinux.bin.xz: $(vmlinux.bin.all-y) FORCE + $(call if_changed,xzkern) +$(obj)/vmlinux.bin.lzo: $(vmlinux.bin.all-y) FORCE + $(call if_changed,lzo) + +suffix-$(CONFIG_KERNEL_GZIP) := gz +suffix-$(CONFIG_KERNEL_BZIP2) := bz2 +suffix-$(CONFIG_KERNEL_LZMA) := lzma +suffix-$(CONFIG_KERNEL_XZ) := xz +suffix-$(CONFIG_KERNEL_LZO) := lzo + +quiet_cmd_mkpiggy = MKPIGGY $@ + cmd_mkpiggy = $(obj)/mkpiggy $< > $@ || ( rm -f $@ ; false ) + +targets += piggy.S +$(obj)/piggy.S: $(obj)/vmlinux.bin.$(suffix-y) $(obj)/mkpiggy FORCE + $(call if_changed,mkpiggy) diff --git a/arch/x86/boot/compressed/cmdline.c b/arch/x86/boot/compressed/cmdline.c new file mode 100644 index 00000000..cb62f786 --- /dev/null +++ b/arch/x86/boot/compressed/cmdline.c @@ -0,0 +1,21 @@ +#include "misc.h" + +static unsigned long fs; +static inline void set_fs(unsigned long seg) +{ + fs = seg << 4; /* shift it back */ +} +typedef unsigned long addr_t; +static inline char rdfs8(addr_t addr) +{ + return *((char *)(fs + addr)); +} +#include "../cmdline.c" +int cmdline_find_option(const char *option, char *buffer, int bufsize) +{ + return __cmdline_find_option(real_mode->hdr.cmd_line_ptr, option, buffer, bufsize); +} +int cmdline_find_option_bool(const char *option) +{ + return __cmdline_find_option_bool(real_mode->hdr.cmd_line_ptr, option); +} diff --git a/arch/x86/boot/compressed/early_serial_console.c b/arch/x86/boot/compressed/early_serial_console.c new file mode 100644 index 00000000..261e81fb --- /dev/null +++ b/arch/x86/boot/compressed/early_serial_console.c @@ -0,0 +1,5 @@ +#include "misc.h" + +int early_serial_base; + +#include "../early_serial_console.c" diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c new file mode 100644 index 00000000..0cdfc0d2 --- /dev/null +++ b/arch/x86/boot/compressed/eboot.c @@ -0,0 +1,1022 @@ +/* ----------------------------------------------------------------------- + * + * Copyright 2011 Intel Corporation; author Matt Fleming + * + * This file is part of the Linux kernel, and is made available under + * the terms of the GNU General Public License version 2. + * + * ----------------------------------------------------------------------- */ + +#include +#include +#include +#include + +#include "eboot.h" + +static efi_system_table_t *sys_table; + +static efi_status_t __get_map(efi_memory_desc_t **map, unsigned long *map_size, + unsigned long *desc_size) +{ + efi_memory_desc_t *m = NULL; + efi_status_t status; + unsigned long key; + u32 desc_version; + + *map_size = sizeof(*m) * 32; +again: + /* + * Add an additional efi_memory_desc_t because we're doing an + * allocation which may be in a new descriptor region. + */ + *map_size += sizeof(*m); + status = efi_call_phys3(sys_table->boottime->allocate_pool, + EFI_LOADER_DATA, *map_size, (void **)&m); + if (status != EFI_SUCCESS) + goto fail; + + status = efi_call_phys5(sys_table->boottime->get_memory_map, map_size, + m, &key, desc_size, &desc_version); + if (status == EFI_BUFFER_TOO_SMALL) { + efi_call_phys1(sys_table->boottime->free_pool, m); + goto again; + } + + if (status != EFI_SUCCESS) + efi_call_phys1(sys_table->boottime->free_pool, m); + +fail: + *map = m; + return status; +} + +/* + * Allocate at the highest possible address that is not above 'max'. + */ +static efi_status_t high_alloc(unsigned long size, unsigned long align, + unsigned long *addr, unsigned long max) +{ + unsigned long map_size, desc_size; + efi_memory_desc_t *map; + efi_status_t status; + unsigned long nr_pages; + u64 max_addr = 0; + int i; + + status = __get_map(&map, &map_size, &desc_size); + if (status != EFI_SUCCESS) + goto fail; + + nr_pages = round_up(size, EFI_PAGE_SIZE) / EFI_PAGE_SIZE; +again: + for (i = 0; i < map_size / desc_size; i++) { + efi_memory_desc_t *desc; + unsigned long m = (unsigned long)map; + u64 start, end; + + desc = (efi_memory_desc_t *)(m + (i * desc_size)); + if (desc->type != EFI_CONVENTIONAL_MEMORY) + continue; + + if (desc->num_pages < nr_pages) + continue; + + start = desc->phys_addr; + end = start + desc->num_pages * (1UL << EFI_PAGE_SHIFT); + + if ((start + size) > end || (start + size) > max) + continue; + + if (end - size > max) + end = max; + + if (round_down(end - size, align) < start) + continue; + + start = round_down(end - size, align); + + /* + * Don't allocate at 0x0. It will confuse code that + * checks pointers against NULL. + */ + if (start == 0x0) + continue; + + if (start > max_addr) + max_addr = start; + } + + if (!max_addr) + status = EFI_NOT_FOUND; + else { + status = efi_call_phys4(sys_table->boottime->allocate_pages, + EFI_ALLOCATE_ADDRESS, EFI_LOADER_DATA, + nr_pages, &max_addr); + if (status != EFI_SUCCESS) { + max = max_addr; + max_addr = 0; + goto again; + } + + *addr = max_addr; + } + +free_pool: + efi_call_phys1(sys_table->boottime->free_pool, map); + +fail: + return status; +} + +/* + * Allocate at the lowest possible address. + */ +static efi_status_t low_alloc(unsigned long size, unsigned long align, + unsigned long *addr) +{ + unsigned long map_size, desc_size; + efi_memory_desc_t *map; + efi_status_t status; + unsigned long nr_pages; + int i; + + status = __get_map(&map, &map_size, &desc_size); + if (status != EFI_SUCCESS) + goto fail; + + nr_pages = round_up(size, EFI_PAGE_SIZE) / EFI_PAGE_SIZE; + for (i = 0; i < map_size / desc_size; i++) { + efi_memory_desc_t *desc; + unsigned long m = (unsigned long)map; + u64 start, end; + + desc = (efi_memory_desc_t *)(m + (i * desc_size)); + + if (desc->type != EFI_CONVENTIONAL_MEMORY) + continue; + + if (desc->num_pages < nr_pages) + continue; + + start = desc->phys_addr; + end = start + desc->num_pages * (1UL << EFI_PAGE_SHIFT); + + /* + * Don't allocate at 0x0. It will confuse code that + * checks pointers against NULL. Skip the first 8 + * bytes so we start at a nice even number. + */ + if (start == 0x0) + start += 8; + + start = round_up(start, align); + if ((start + size) > end) + continue; + + status = efi_call_phys4(sys_table->boottime->allocate_pages, + EFI_ALLOCATE_ADDRESS, EFI_LOADER_DATA, + nr_pages, &start); + if (status == EFI_SUCCESS) { + *addr = start; + break; + } + } + + if (i == map_size / desc_size) + status = EFI_NOT_FOUND; + +free_pool: + efi_call_phys1(sys_table->boottime->free_pool, map); +fail: + return status; +} + +static void low_free(unsigned long size, unsigned long addr) +{ + unsigned long nr_pages; + + nr_pages = round_up(size, EFI_PAGE_SIZE) / EFI_PAGE_SIZE; + efi_call_phys2(sys_table->boottime->free_pages, addr, size); +} + +static void find_bits(unsigned long mask, u8 *pos, u8 *size) +{ + u8 first, len; + + first = 0; + len = 0; + + if (mask) { + while (!(mask & 0x1)) { + mask = mask >> 1; + first++; + } + + while (mask & 0x1) { + mask = mask >> 1; + len++; + } + } + + *pos = first; + *size = len; +} + +/* + * See if we have Graphics Output Protocol + */ +static efi_status_t setup_gop(struct screen_info *si, efi_guid_t *proto, + unsigned long size) +{ + struct efi_graphics_output_protocol *gop, *first_gop; + struct efi_pixel_bitmask pixel_info; + unsigned long nr_gops; + efi_status_t status; + void **gop_handle; + u16 width, height; + u32 fb_base, fb_size; + u32 pixels_per_scan_line; + int pixel_format; + int i; + + status = efi_call_phys3(sys_table->boottime->allocate_pool, + EFI_LOADER_DATA, size, &gop_handle); + if (status != EFI_SUCCESS) + return status; + + status = efi_call_phys5(sys_table->boottime->locate_handle, + EFI_LOCATE_BY_PROTOCOL, proto, + NULL, &size, gop_handle); + if (status != EFI_SUCCESS) + goto free_handle; + + first_gop = NULL; + + nr_gops = size / sizeof(void *); + for (i = 0; i < nr_gops; i++) { + struct efi_graphics_output_mode_info *info; + efi_guid_t pciio_proto = EFI_PCI_IO_PROTOCOL_GUID; + void *pciio; + void *h = gop_handle[i]; + + status = efi_call_phys3(sys_table->boottime->handle_protocol, + h, proto, &gop); + if (status != EFI_SUCCESS) + continue; + + efi_call_phys3(sys_table->boottime->handle_protocol, + h, &pciio_proto, &pciio); + + status = efi_call_phys4(gop->query_mode, gop, + gop->mode->mode, &size, &info); + if (status == EFI_SUCCESS && (!first_gop || pciio)) { + /* + * Apple provide GOPs that are not backed by + * real hardware (they're used to handle + * multiple displays). The workaround is to + * search for a GOP implementing the PCIIO + * protocol, and if one isn't found, to just + * fallback to the first GOP. + */ + width = info->horizontal_resolution; + height = info->vertical_resolution; + fb_base = gop->mode->frame_buffer_base; + fb_size = gop->mode->frame_buffer_size; + pixel_format = info->pixel_format; + pixel_info = info->pixel_information; + pixels_per_scan_line = info->pixels_per_scan_line; + + /* + * Once we've found a GOP supporting PCIIO, + * don't bother looking any further. + */ + if (pciio) + break; + + first_gop = gop; + } + } + + /* Did we find any GOPs? */ + if (!first_gop) + goto free_handle; + + /* EFI framebuffer */ + si->orig_video_isVGA = VIDEO_TYPE_EFI; + + si->lfb_width = width; + si->lfb_height = height; + si->lfb_base = fb_base; + si->lfb_size = fb_size; + si->pages = 1; + + if (pixel_format == PIXEL_RGB_RESERVED_8BIT_PER_COLOR) { + si->lfb_depth = 32; + si->lfb_linelength = pixels_per_scan_line * 4; + si->red_size = 8; + si->red_pos = 0; + si->green_size = 8; + si->green_pos = 8; + si->blue_size = 8; + si->blue_pos = 16; + si->rsvd_size = 8; + si->rsvd_pos = 24; + } else if (pixel_format == PIXEL_BGR_RESERVED_8BIT_PER_COLOR) { + si->lfb_depth = 32; + si->lfb_linelength = pixels_per_scan_line * 4; + si->red_size = 8; + si->red_pos = 16; + si->green_size = 8; + si->green_pos = 8; + si->blue_size = 8; + si->blue_pos = 0; + si->rsvd_size = 8; + si->rsvd_pos = 24; + } else if (pixel_format == PIXEL_BIT_MASK) { + find_bits(pixel_info.red_mask, &si->red_pos, &si->red_size); + find_bits(pixel_info.green_mask, &si->green_pos, + &si->green_size); + find_bits(pixel_info.blue_mask, &si->blue_pos, &si->blue_size); + find_bits(pixel_info.reserved_mask, &si->rsvd_pos, + &si->rsvd_size); + si->lfb_depth = si->red_size + si->green_size + + si->blue_size + si->rsvd_size; + si->lfb_linelength = (pixels_per_scan_line * si->lfb_depth) / 8; + } else { + si->lfb_depth = 4; + si->lfb_linelength = si->lfb_width / 2; + si->red_size = 0; + si->red_pos = 0; + si->green_size = 0; + si->green_pos = 0; + si->blue_size = 0; + si->blue_pos = 0; + si->rsvd_size = 0; + si->rsvd_pos = 0; + } + +free_handle: + efi_call_phys1(sys_table->boottime->free_pool, gop_handle); + return status; +} + +/* + * See if we have Universal Graphics Adapter (UGA) protocol + */ +static efi_status_t setup_uga(struct screen_info *si, efi_guid_t *uga_proto, + unsigned long size) +{ + struct efi_uga_draw_protocol *uga, *first_uga; + unsigned long nr_ugas; + efi_status_t status; + u32 width, height; + void **uga_handle = NULL; + int i; + + status = efi_call_phys3(sys_table->boottime->allocate_pool, + EFI_LOADER_DATA, size, &uga_handle); + if (status != EFI_SUCCESS) + return status; + + status = efi_call_phys5(sys_table->boottime->locate_handle, + EFI_LOCATE_BY_PROTOCOL, uga_proto, + NULL, &size, uga_handle); + if (status != EFI_SUCCESS) + goto free_handle; + + first_uga = NULL; + + nr_ugas = size / sizeof(void *); + for (i = 0; i < nr_ugas; i++) { + efi_guid_t pciio_proto = EFI_PCI_IO_PROTOCOL_GUID; + void *handle = uga_handle[i]; + u32 w, h, depth, refresh; + void *pciio; + + status = efi_call_phys3(sys_table->boottime->handle_protocol, + handle, uga_proto, &uga); + if (status != EFI_SUCCESS) + continue; + + efi_call_phys3(sys_table->boottime->handle_protocol, + handle, &pciio_proto, &pciio); + + status = efi_call_phys5(uga->get_mode, uga, &w, &h, + &depth, &refresh); + if (status == EFI_SUCCESS && (!first_uga || pciio)) { + width = w; + height = h; + + /* + * Once we've found a UGA supporting PCIIO, + * don't bother looking any further. + */ + if (pciio) + break; + + first_uga = uga; + } + } + + if (!first_uga) + goto free_handle; + + /* EFI framebuffer */ + si->orig_video_isVGA = VIDEO_TYPE_EFI; + + si->lfb_depth = 32; + si->lfb_width = width; + si->lfb_height = height; + + si->red_size = 8; + si->red_pos = 16; + si->green_size = 8; + si->green_pos = 8; + si->blue_size = 8; + si->blue_pos = 0; + si->rsvd_size = 8; + si->rsvd_pos = 24; + + +free_handle: + efi_call_phys1(sys_table->boottime->free_pool, uga_handle); + return status; +} + +void setup_graphics(struct boot_params *boot_params) +{ + efi_guid_t graphics_proto = EFI_GRAPHICS_OUTPUT_PROTOCOL_GUID; + struct screen_info *si; + efi_guid_t uga_proto = EFI_UGA_PROTOCOL_GUID; + efi_status_t status; + unsigned long size; + void **gop_handle = NULL; + void **uga_handle = NULL; + + si = &boot_params->screen_info; + memset(si, 0, sizeof(*si)); + + size = 0; + status = efi_call_phys5(sys_table->boottime->locate_handle, + EFI_LOCATE_BY_PROTOCOL, &graphics_proto, + NULL, &size, gop_handle); + if (status == EFI_BUFFER_TOO_SMALL) + status = setup_gop(si, &graphics_proto, size); + + if (status != EFI_SUCCESS) { + size = 0; + status = efi_call_phys5(sys_table->boottime->locate_handle, + EFI_LOCATE_BY_PROTOCOL, &uga_proto, + NULL, &size, uga_handle); + if (status == EFI_BUFFER_TOO_SMALL) + setup_uga(si, &uga_proto, size); + } +} + +struct initrd { + efi_file_handle_t *handle; + u64 size; +}; + +/* + * Check the cmdline for a LILO-style initrd= arguments. + * + * We only support loading an initrd from the same filesystem as the + * kernel image. + */ +static efi_status_t handle_ramdisks(efi_loaded_image_t *image, + struct setup_header *hdr) +{ + struct initrd *initrds; + unsigned long initrd_addr; + efi_guid_t fs_proto = EFI_FILE_SYSTEM_GUID; + u64 initrd_total; + efi_file_io_interface_t *io; + efi_file_handle_t *fh; + efi_status_t status; + int nr_initrds; + char *str; + int i, j, k; + + initrd_addr = 0; + initrd_total = 0; + + str = (char *)(unsigned long)hdr->cmd_line_ptr; + + j = 0; /* See close_handles */ + + if (!str || !*str) + return EFI_SUCCESS; + + for (nr_initrds = 0; *str; nr_initrds++) { + str = strstr(str, "initrd="); + if (!str) + break; + + str += 7; + + /* Skip any leading slashes */ + while (*str == '/' || *str == '\\') + str++; + + while (*str && *str != ' ' && *str != '\n') + str++; + } + + if (!nr_initrds) + return EFI_SUCCESS; + + status = efi_call_phys3(sys_table->boottime->allocate_pool, + EFI_LOADER_DATA, + nr_initrds * sizeof(*initrds), + &initrds); + if (status != EFI_SUCCESS) + goto fail; + + str = (char *)(unsigned long)hdr->cmd_line_ptr; + for (i = 0; i < nr_initrds; i++) { + struct initrd *initrd; + efi_file_handle_t *h; + efi_file_info_t *info; + efi_char16_t filename_16[256]; + unsigned long info_sz; + efi_guid_t info_guid = EFI_FILE_INFO_ID; + efi_char16_t *p; + u64 file_sz; + + str = strstr(str, "initrd="); + if (!str) + break; + + str += 7; + + initrd = &initrds[i]; + p = filename_16; + + /* Skip any leading slashes */ + while (*str == '/' || *str == '\\') + str++; + + while (*str && *str != ' ' && *str != '\n') { + if ((u8 *)p >= (u8 *)filename_16 + sizeof(filename_16)) + break; + + *p++ = *str++; + } + + *p = '\0'; + + /* Only open the volume once. */ + if (!i) { + efi_boot_services_t *boottime; + + boottime = sys_table->boottime; + + status = efi_call_phys3(boottime->handle_protocol, + image->device_handle, &fs_proto, &io); + if (status != EFI_SUCCESS) + goto free_initrds; + + status = efi_call_phys2(io->open_volume, io, &fh); + if (status != EFI_SUCCESS) + goto free_initrds; + } + + status = efi_call_phys5(fh->open, fh, &h, filename_16, + EFI_FILE_MODE_READ, (u64)0); + if (status != EFI_SUCCESS) + goto close_handles; + + initrd->handle = h; + + info_sz = 0; + status = efi_call_phys4(h->get_info, h, &info_guid, + &info_sz, NULL); + if (status != EFI_BUFFER_TOO_SMALL) + goto close_handles; + +grow: + status = efi_call_phys3(sys_table->boottime->allocate_pool, + EFI_LOADER_DATA, info_sz, &info); + if (status != EFI_SUCCESS) + goto close_handles; + + status = efi_call_phys4(h->get_info, h, &info_guid, + &info_sz, info); + if (status == EFI_BUFFER_TOO_SMALL) { + efi_call_phys1(sys_table->boottime->free_pool, info); + goto grow; + } + + file_sz = info->file_size; + efi_call_phys1(sys_table->boottime->free_pool, info); + + if (status != EFI_SUCCESS) + goto close_handles; + + initrd->size = file_sz; + initrd_total += file_sz; + } + + if (initrd_total) { + unsigned long addr; + + /* + * Multiple initrd's need to be at consecutive + * addresses in memory, so allocate enough memory for + * all the initrd's. + */ + status = high_alloc(initrd_total, 0x1000, + &initrd_addr, hdr->initrd_addr_max); + if (status != EFI_SUCCESS) + goto close_handles; + + /* We've run out of free low memory. */ + if (initrd_addr > hdr->initrd_addr_max) { + status = EFI_INVALID_PARAMETER; + goto free_initrd_total; + } + + addr = initrd_addr; + for (j = 0; j < nr_initrds; j++) { + u64 size; + + size = initrds[j].size; + while (size) { + u64 chunksize; + if (size > EFI_READ_CHUNK_SIZE) + chunksize = EFI_READ_CHUNK_SIZE; + else + chunksize = size; + status = efi_call_phys3(fh->read, + initrds[j].handle, + &chunksize, addr); + if (status != EFI_SUCCESS) + goto free_initrd_total; + addr += chunksize; + size -= chunksize; + } + + efi_call_phys1(fh->close, initrds[j].handle); + } + + } + + efi_call_phys1(sys_table->boottime->free_pool, initrds); + + hdr->ramdisk_image = initrd_addr; + hdr->ramdisk_size = initrd_total; + + return status; + +free_initrd_total: + low_free(initrd_total, initrd_addr); + +close_handles: + for (k = j; k < nr_initrds; k++) + efi_call_phys1(fh->close, initrds[k].handle); +free_initrds: + efi_call_phys1(sys_table->boottime->free_pool, initrds); +fail: + hdr->ramdisk_image = 0; + hdr->ramdisk_size = 0; + + return status; +} + +/* + * Because the x86 boot code expects to be passed a boot_params we + * need to create one ourselves (usually the bootloader would create + * one for us). + */ +static efi_status_t make_boot_params(struct boot_params *boot_params, + efi_loaded_image_t *image, + void *handle) +{ + struct efi_info *efi = &boot_params->efi_info; + struct apm_bios_info *bi = &boot_params->apm_bios_info; + struct sys_desc_table *sdt = &boot_params->sys_desc_table; + struct e820entry *e820_map = &boot_params->e820_map[0]; + struct e820entry *prev = NULL; + struct setup_header *hdr = &boot_params->hdr; + unsigned long size, key, desc_size, _size; + efi_memory_desc_t *mem_map; + void *options = image->load_options; + u32 load_options_size = image->load_options_size / 2; /* ASCII */ + int options_size = 0; + efi_status_t status; + __u32 desc_version; + unsigned long cmdline; + u8 nr_entries; + u16 *s2; + u8 *s1; + int i; + + hdr->type_of_loader = 0x21; + + /* Convert unicode cmdline to ascii */ + cmdline = 0; + s2 = (u16 *)options; + + if (s2) { + while (*s2 && *s2 != '\n' && options_size < load_options_size) { + s2++; + options_size++; + } + + if (options_size) { + if (options_size > hdr->cmdline_size) + options_size = hdr->cmdline_size; + + options_size++; /* NUL termination */ + + status = low_alloc(options_size, 1, &cmdline); + if (status != EFI_SUCCESS) + goto fail; + + s1 = (u8 *)(unsigned long)cmdline; + s2 = (u16 *)options; + + for (i = 0; i < options_size - 1; i++) + *s1++ = *s2++; + + *s1 = '\0'; + } + } + + hdr->cmd_line_ptr = cmdline; + + hdr->ramdisk_image = 0; + hdr->ramdisk_size = 0; + + status = handle_ramdisks(image, hdr); + if (status != EFI_SUCCESS) + goto free_cmdline; + + setup_graphics(boot_params); + + /* Clear APM BIOS info */ + memset(bi, 0, sizeof(*bi)); + + memset(sdt, 0, sizeof(*sdt)); + + memcpy(&efi->efi_loader_signature, EFI_LOADER_SIGNATURE, sizeof(__u32)); + + size = sizeof(*mem_map) * 32; + +again: + size += sizeof(*mem_map); + _size = size; + status = low_alloc(size, 1, (unsigned long *)&mem_map); + if (status != EFI_SUCCESS) + goto free_cmdline; + + status = efi_call_phys5(sys_table->boottime->get_memory_map, &size, + mem_map, &key, &desc_size, &desc_version); + if (status == EFI_BUFFER_TOO_SMALL) { + low_free(_size, (unsigned long)mem_map); + goto again; + } + + if (status != EFI_SUCCESS) + goto free_mem_map; + + efi->efi_systab = (unsigned long)sys_table; + efi->efi_memdesc_size = desc_size; + efi->efi_memdesc_version = desc_version; + efi->efi_memmap = (unsigned long)mem_map; + efi->efi_memmap_size = size; + +#ifdef CONFIG_X86_64 + efi->efi_systab_hi = (unsigned long)sys_table >> 32; + efi->efi_memmap_hi = (unsigned long)mem_map >> 32; +#endif + + /* Might as well exit boot services now */ + status = efi_call_phys2(sys_table->boottime->exit_boot_services, + handle, key); + if (status != EFI_SUCCESS) + goto free_mem_map; + + /* Historic? */ + boot_params->alt_mem_k = 32 * 1024; + + /* + * Convert the EFI memory map to E820. + */ + nr_entries = 0; + for (i = 0; i < size / desc_size; i++) { + efi_memory_desc_t *d; + unsigned int e820_type = 0; + unsigned long m = (unsigned long)mem_map; + + d = (efi_memory_desc_t *)(m + (i * desc_size)); + switch (d->type) { + case EFI_RESERVED_TYPE: + case EFI_RUNTIME_SERVICES_CODE: + case EFI_RUNTIME_SERVICES_DATA: + case EFI_MEMORY_MAPPED_IO: + case EFI_MEMORY_MAPPED_IO_PORT_SPACE: + case EFI_PAL_CODE: + e820_type = E820_RESERVED; + break; + + case EFI_UNUSABLE_MEMORY: + e820_type = E820_UNUSABLE; + break; + + case EFI_ACPI_RECLAIM_MEMORY: + e820_type = E820_ACPI; + break; + + case EFI_LOADER_CODE: + case EFI_LOADER_DATA: + case EFI_BOOT_SERVICES_CODE: + case EFI_BOOT_SERVICES_DATA: + case EFI_CONVENTIONAL_MEMORY: + e820_type = E820_RAM; + break; + + case EFI_ACPI_MEMORY_NVS: + e820_type = E820_NVS; + break; + + default: + continue; + } + + /* Merge adjacent mappings */ + if (prev && prev->type == e820_type && + (prev->addr + prev->size) == d->phys_addr) + prev->size += d->num_pages << 12; + else { + e820_map->addr = d->phys_addr; + e820_map->size = d->num_pages << 12; + e820_map->type = e820_type; + prev = e820_map++; + nr_entries++; + } + } + + boot_params->e820_entries = nr_entries; + + return EFI_SUCCESS; + +free_mem_map: + low_free(_size, (unsigned long)mem_map); +free_cmdline: + if (options_size) + low_free(options_size, hdr->cmd_line_ptr); +fail: + return status; +} + +/* + * On success we return a pointer to a boot_params structure, and NULL + * on failure. + */ +struct boot_params *efi_main(void *handle, efi_system_table_t *_table) +{ + struct boot_params *boot_params; + unsigned long start, nr_pages; + struct desc_ptr *gdt, *idt; + efi_loaded_image_t *image; + struct setup_header *hdr; + efi_status_t status; + efi_guid_t proto = LOADED_IMAGE_PROTOCOL_GUID; + struct desc_struct *desc; + + sys_table = _table; + + /* Check if we were booted by the EFI firmware */ + if (sys_table->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE) + goto fail; + + status = efi_call_phys3(sys_table->boottime->handle_protocol, + handle, &proto, (void *)&image); + if (status != EFI_SUCCESS) + goto fail; + + status = low_alloc(0x4000, 1, (unsigned long *)&boot_params); + if (status != EFI_SUCCESS) + goto fail; + + memset(boot_params, 0x0, 0x4000); + + /* Copy first two sectors to boot_params */ + memcpy(boot_params, image->image_base, 1024); + + hdr = &boot_params->hdr; + + /* + * The EFI firmware loader could have placed the kernel image + * anywhere in memory, but the kernel has various restrictions + * on the max physical address it can run at. Attempt to move + * the kernel to boot_params.pref_address, or as low as + * possible. + */ + start = hdr->pref_address; + nr_pages = round_up(hdr->init_size, EFI_PAGE_SIZE) / EFI_PAGE_SIZE; + + status = efi_call_phys4(sys_table->boottime->allocate_pages, + EFI_ALLOCATE_ADDRESS, EFI_LOADER_DATA, + nr_pages, &start); + if (status != EFI_SUCCESS) { + status = low_alloc(hdr->init_size, hdr->kernel_alignment, + &start); + if (status != EFI_SUCCESS) + goto fail; + } + + hdr->code32_start = (__u32)start; + hdr->pref_address = (__u64)(unsigned long)image->image_base; + + memcpy((void *)start, image->image_base, image->image_size); + + status = efi_call_phys3(sys_table->boottime->allocate_pool, + EFI_LOADER_DATA, sizeof(*gdt), + (void **)&gdt); + if (status != EFI_SUCCESS) + goto fail; + + gdt->size = 0x800; + status = low_alloc(gdt->size, 8, (unsigned long *)&gdt->address); + if (status != EFI_SUCCESS) + goto fail; + + status = efi_call_phys3(sys_table->boottime->allocate_pool, + EFI_LOADER_DATA, sizeof(*idt), + (void **)&idt); + if (status != EFI_SUCCESS) + goto fail; + + idt->size = 0; + idt->address = 0; + + status = make_boot_params(boot_params, image, handle); + if (status != EFI_SUCCESS) + goto fail; + + memset((char *)gdt->address, 0x0, gdt->size); + desc = (struct desc_struct *)gdt->address; + + /* The first GDT is a dummy and the second is unused. */ + desc += 2; + + desc->limit0 = 0xffff; + desc->base0 = 0x0000; + desc->base1 = 0x0000; + desc->type = SEG_TYPE_CODE | SEG_TYPE_EXEC_READ; + desc->s = DESC_TYPE_CODE_DATA; + desc->dpl = 0; + desc->p = 1; + desc->limit = 0xf; + desc->avl = 0; + desc->l = 0; + desc->d = SEG_OP_SIZE_32BIT; + desc->g = SEG_GRANULARITY_4KB; + desc->base2 = 0x00; + + desc++; + desc->limit0 = 0xffff; + desc->base0 = 0x0000; + desc->base1 = 0x0000; + desc->type = SEG_TYPE_DATA | SEG_TYPE_READ_WRITE; + desc->s = DESC_TYPE_CODE_DATA; + desc->dpl = 0; + desc->p = 1; + desc->limit = 0xf; + desc->avl = 0; + desc->l = 0; + desc->d = SEG_OP_SIZE_32BIT; + desc->g = SEG_GRANULARITY_4KB; + desc->base2 = 0x00; + +#ifdef CONFIG_X86_64 + /* Task segment value */ + desc++; + desc->limit0 = 0x0000; + desc->base0 = 0x0000; + desc->base1 = 0x0000; + desc->type = SEG_TYPE_TSS; + desc->s = 0; + desc->dpl = 0; + desc->p = 1; + desc->limit = 0x0; + desc->avl = 0; + desc->l = 0; + desc->d = 0; + desc->g = SEG_GRANULARITY_4KB; + desc->base2 = 0x00; +#endif /* CONFIG_X86_64 */ + + asm volatile ("lidt %0" : : "m" (*idt)); + asm volatile ("lgdt %0" : : "m" (*gdt)); + + asm volatile("cli"); + + return boot_params; +fail: + return NULL; +} diff --git a/arch/x86/boot/compressed/eboot.h b/arch/x86/boot/compressed/eboot.h new file mode 100644 index 00000000..39251663 --- /dev/null +++ b/arch/x86/boot/compressed/eboot.h @@ -0,0 +1,61 @@ +#ifndef BOOT_COMPRESSED_EBOOT_H +#define BOOT_COMPRESSED_EBOOT_H + +#define SEG_TYPE_DATA (0 << 3) +#define SEG_TYPE_READ_WRITE (1 << 1) +#define SEG_TYPE_CODE (1 << 3) +#define SEG_TYPE_EXEC_READ (1 << 1) +#define SEG_TYPE_TSS ((1 << 3) | (1 << 0)) +#define SEG_OP_SIZE_32BIT (1 << 0) +#define SEG_GRANULARITY_4KB (1 << 0) + +#define DESC_TYPE_CODE_DATA (1 << 0) + +#define EFI_PAGE_SIZE (1UL << EFI_PAGE_SHIFT) +#define EFI_READ_CHUNK_SIZE (1024 * 1024) + +#define PIXEL_RGB_RESERVED_8BIT_PER_COLOR 0 +#define PIXEL_BGR_RESERVED_8BIT_PER_COLOR 1 +#define PIXEL_BIT_MASK 2 +#define PIXEL_BLT_ONLY 3 +#define PIXEL_FORMAT_MAX 4 + +struct efi_pixel_bitmask { + u32 red_mask; + u32 green_mask; + u32 blue_mask; + u32 reserved_mask; +}; + +struct efi_graphics_output_mode_info { + u32 version; + u32 horizontal_resolution; + u32 vertical_resolution; + int pixel_format; + struct efi_pixel_bitmask pixel_information; + u32 pixels_per_scan_line; +} __packed; + +struct efi_graphics_output_protocol_mode { + u32 max_mode; + u32 mode; + unsigned long info; + unsigned long size_of_info; + u64 frame_buffer_base; + unsigned long frame_buffer_size; +} __packed; + +struct efi_graphics_output_protocol { + void *query_mode; + unsigned long set_mode; + unsigned long blt; + struct efi_graphics_output_protocol_mode *mode; +}; + +struct efi_uga_draw_protocol { + void *get_mode; + void *set_mode; + void *blt; +}; + +#endif /* BOOT_COMPRESSED_EBOOT_H */ diff --git a/arch/x86/boot/compressed/efi_stub_32.S b/arch/x86/boot/compressed/efi_stub_32.S new file mode 100644 index 00000000..a53440e8 --- /dev/null +++ b/arch/x86/boot/compressed/efi_stub_32.S @@ -0,0 +1,86 @@ +/* + * EFI call stub for IA32. + * + * This stub allows us to make EFI calls in physical mode with interrupts + * turned off. Note that this implementation is different from the one in + * arch/x86/platform/efi/efi_stub_32.S because we're _already_ in physical + * mode at this point. + */ + +#include +#include + +/* + * efi_call_phys(void *, ...) is a function with variable parameters. + * All the callers of this function assure that all the parameters are 4-bytes. + */ + +/* + * In gcc calling convention, EBX, ESP, EBP, ESI and EDI are all callee save. + * So we'd better save all of them at the beginning of this function and restore + * at the end no matter how many we use, because we can not assure EFI runtime + * service functions will comply with gcc calling convention, too. + */ + +.text +ENTRY(efi_call_phys) + /* + * 0. The function can only be called in Linux kernel. So CS has been + * set to 0x0010, DS and SS have been set to 0x0018. In EFI, I found + * the values of these registers are the same. And, the corresponding + * GDT entries are identical. So I will do nothing about segment reg + * and GDT, but change GDT base register in prelog and epilog. + */ + + /* + * 1. Because we haven't been relocated by this point we need to + * use relative addressing. + */ + call 1f +1: popl %edx + subl $1b, %edx + + /* + * 2. Now on the top of stack is the return + * address in the caller of efi_call_phys(), then parameter 1, + * parameter 2, ..., param n. To make things easy, we save the return + * address of efi_call_phys in a global variable. + */ + popl %ecx + movl %ecx, saved_return_addr(%edx) + /* get the function pointer into ECX*/ + popl %ecx + movl %ecx, efi_rt_function_ptr(%edx) + + /* + * 3. Call the physical function. + */ + call *%ecx + + /* + * 4. Balance the stack. And because EAX contain the return value, + * we'd better not clobber it. We need to calculate our address + * again because %ecx and %edx are not preserved across EFI function + * calls. + */ + call 1f +1: popl %edx + subl $1b, %edx + + movl efi_rt_function_ptr(%edx), %ecx + pushl %ecx + + /* + * 10. Push the saved return address onto the stack and return. + */ + movl saved_return_addr(%edx), %ecx + pushl %ecx + ret +ENDPROC(efi_call_phys) +.previous + +.data +saved_return_addr: + .long 0 +efi_rt_function_ptr: + .long 0 diff --git a/arch/x86/boot/compressed/efi_stub_64.S b/arch/x86/boot/compressed/efi_stub_64.S new file mode 100644 index 00000000..cedc60de --- /dev/null +++ b/arch/x86/boot/compressed/efi_stub_64.S @@ -0,0 +1 @@ +#include "../../platform/efi/efi_stub_64.S" diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S new file mode 100644 index 00000000..c85e3ac9 --- /dev/null +++ b/arch/x86/boot/compressed/head_32.S @@ -0,0 +1,225 @@ +/* + * linux/boot/head.S + * + * Copyright (C) 1991, 1992, 1993 Linus Torvalds + */ + +/* + * head.S contains the 32-bit startup code. + * + * NOTE!!! Startup happens at absolute address 0x00001000, which is also where + * the page directory will exist. The startup code will be overwritten by + * the page directory. [According to comments etc elsewhere on a compressed + * kernel it will end up at 0x1000 + 1Mb I hope so as I assume this. - AC] + * + * Page 0 is deliberately kept safe, since System Management Mode code in + * laptops may need to access the BIOS data stored there. This is also + * useful for future device drivers that either access the BIOS via VM86 + * mode. + */ + +/* + * High loaded stuff by Hans Lermen & Werner Almesberger, Feb. 1996 + */ + .text + +#include +#include +#include +#include +#include +#include + + __HEAD +ENTRY(startup_32) +#ifdef CONFIG_EFI_STUB + jmp preferred_addr + + .balign 0x10 + /* + * We don't need the return address, so set up the stack so + * efi_main() can find its arugments. + */ + add $0x4, %esp + + call efi_main + cmpl $0, %eax + movl %eax, %esi + jne 2f +1: + /* EFI init failed, so hang. */ + hlt + jmp 1b +2: + call 3f +3: + popl %eax + subl $3b, %eax + subl BP_pref_address(%esi), %eax + add BP_code32_start(%esi), %eax + leal preferred_addr(%eax), %eax + jmp *%eax + +preferred_addr: +#endif + cld + /* + * Test KEEP_SEGMENTS flag to see if the bootloader is asking + * us to not reload segments + */ + testb $(1<<6), BP_loadflags(%esi) + jnz 1f + + cli + movl $__BOOT_DS, %eax + movl %eax, %ds + movl %eax, %es + movl %eax, %fs + movl %eax, %gs + movl %eax, %ss +1: + +/* + * Calculate the delta between where we were compiled to run + * at and where we were actually loaded at. This can only be done + * with a short local call on x86. Nothing else will tell us what + * address we are running at. The reserved chunk of the real-mode + * data at 0x1e4 (defined as a scratch field) are used as the stack + * for this calculation. Only 4 bytes are needed. + */ + leal (BP_scratch+4)(%esi), %esp + call 1f +1: popl %ebp + subl $1b, %ebp + +/* + * %ebp contains the address we are loaded at by the boot loader and %ebx + * contains the address where we should move the kernel image temporarily + * for safe in-place decompression. + */ + +#ifdef CONFIG_RELOCATABLE + movl %ebp, %ebx + movl BP_kernel_alignment(%esi), %eax + decl %eax + addl %eax, %ebx + notl %eax + andl %eax, %ebx +#else + movl $LOAD_PHYSICAL_ADDR, %ebx +#endif + + /* Target address to relocate to for decompression */ + addl $z_extract_offset, %ebx + + /* Set up the stack */ + leal boot_stack_end(%ebx), %esp + + /* Zero EFLAGS */ + pushl $0 + popfl + +/* + * Copy the compressed kernel to the end of our buffer + * where decompression in place becomes safe. + */ + pushl %esi + leal (_bss-4)(%ebp), %esi + leal (_bss-4)(%ebx), %edi + movl $(_bss - startup_32), %ecx + shrl $2, %ecx + std + rep movsl + cld + popl %esi + +/* + * Jump to the relocated address. + */ + leal relocated(%ebx), %eax + jmp *%eax +ENDPROC(startup_32) + + .text +relocated: + +/* + * Clear BSS (stack is currently empty) + */ + xorl %eax, %eax + leal _bss(%ebx), %edi + leal _ebss(%ebx), %ecx + subl %edi, %ecx + shrl $2, %ecx + rep stosl + +/* + * Adjust our own GOT + */ + leal _got(%ebx), %edx + leal _egot(%ebx), %ecx +1: + cmpl %ecx, %edx + jae 2f + addl %ebx, (%edx) + addl $4, %edx + jmp 1b +2: + +/* + * Do the decompression, and jump to the new kernel.. + */ + leal z_extract_offset_negative(%ebx), %ebp + /* push arguments for decompress_kernel: */ + pushl %ebp /* output address */ + pushl $z_input_len /* input_len */ + leal input_data(%ebx), %eax + pushl %eax /* input_data */ + leal boot_heap(%ebx), %eax + pushl %eax /* heap area */ + pushl %esi /* real mode pointer */ + call decompress_kernel + addl $20, %esp + +#if CONFIG_RELOCATABLE +/* + * Find the address of the relocations. + */ + leal z_output_len(%ebp), %edi + +/* + * Calculate the delta between where vmlinux was compiled to run + * and where it was actually loaded. + */ + movl %ebp, %ebx + subl $LOAD_PHYSICAL_ADDR, %ebx + jz 2f /* Nothing to be done if loaded at compiled addr. */ +/* + * Process relocations. + */ + +1: subl $4, %edi + movl (%edi), %ecx + testl %ecx, %ecx + jz 2f + addl %ebx, -__PAGE_OFFSET(%ebx, %ecx) + jmp 1b +2: +#endif + +/* + * Jump to the decompressed kernel. + */ + xorl %ebx, %ebx + jmp *%ebp + +/* + * Stack and heap for uncompression + */ + .bss + .balign 4 +boot_heap: + .fill BOOT_HEAP_SIZE, 1, 0 +boot_stack: + .fill BOOT_STACK_SIZE, 1, 0 +boot_stack_end: diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S new file mode 100644 index 00000000..87e03a13 --- /dev/null +++ b/arch/x86/boot/compressed/head_64.S @@ -0,0 +1,371 @@ +/* + * linux/boot/head.S + * + * Copyright (C) 1991, 1992, 1993 Linus Torvalds + */ + +/* + * head.S contains the 32-bit startup code. + * + * NOTE!!! Startup happens at absolute address 0x00001000, which is also where + * the page directory will exist. The startup code will be overwritten by + * the page directory. [According to comments etc elsewhere on a compressed + * kernel it will end up at 0x1000 + 1Mb I hope so as I assume this. - AC] + * + * Page 0 is deliberately kept safe, since System Management Mode code in + * laptops may need to access the BIOS data stored there. This is also + * useful for future device drivers that either access the BIOS via VM86 + * mode. + */ + +/* + * High loaded stuff by Hans Lermen & Werner Almesberger, Feb. 1996 + */ + .code32 + .text + +#include +#include +#include +#include +#include +#include +#include +#include +#include + + __HEAD + .code32 +ENTRY(startup_32) + cld + /* + * Test KEEP_SEGMENTS flag to see if the bootloader is asking + * us to not reload segments + */ + testb $(1<<6), BP_loadflags(%esi) + jnz 1f + + cli + movl $(__KERNEL_DS), %eax + movl %eax, %ds + movl %eax, %es + movl %eax, %ss +1: + +/* + * Calculate the delta between where we were compiled to run + * at and where we were actually loaded at. This can only be done + * with a short local call on x86. Nothing else will tell us what + * address we are running at. The reserved chunk of the real-mode + * data at 0x1e4 (defined as a scratch field) are used as the stack + * for this calculation. Only 4 bytes are needed. + */ + leal (BP_scratch+4)(%esi), %esp + call 1f +1: popl %ebp + subl $1b, %ebp + +/* setup a stack and make sure cpu supports long mode. */ + movl $boot_stack_end, %eax + addl %ebp, %eax + movl %eax, %esp + + call verify_cpu + testl %eax, %eax + jnz no_longmode + +/* + * Compute the delta between where we were compiled to run at + * and where the code will actually run at. + * + * %ebp contains the address we are loaded at by the boot loader and %ebx + * contains the address where we should move the kernel image temporarily + * for safe in-place decompression. + */ + +#ifdef CONFIG_RELOCATABLE + movl %ebp, %ebx + movl BP_kernel_alignment(%esi), %eax + decl %eax + addl %eax, %ebx + notl %eax + andl %eax, %ebx +#else + movl $LOAD_PHYSICAL_ADDR, %ebx +#endif + + /* Target address to relocate to for decompression */ + addl $z_extract_offset, %ebx + +/* + * Prepare for entering 64 bit mode + */ + + /* Load new GDT with the 64bit segments using 32bit descriptor */ + leal gdt(%ebp), %eax + movl %eax, gdt+2(%ebp) + lgdt gdt(%ebp) + + /* Enable PAE mode */ + movl $(X86_CR4_PAE), %eax + movl %eax, %cr4 + + /* + * Build early 4G boot pagetable + */ + /* Initialize Page tables to 0 */ + leal pgtable(%ebx), %edi + xorl %eax, %eax + movl $((4096*6)/4), %ecx + rep stosl + + /* Build Level 4 */ + leal pgtable + 0(%ebx), %edi + leal 0x1007 (%edi), %eax + movl %eax, 0(%edi) + + /* Build Level 3 */ + leal pgtable + 0x1000(%ebx), %edi + leal 0x1007(%edi), %eax + movl $4, %ecx +1: movl %eax, 0x00(%edi) + addl $0x00001000, %eax + addl $8, %edi + decl %ecx + jnz 1b + + /* Build Level 2 */ + leal pgtable + 0x2000(%ebx), %edi + movl $0x00000183, %eax + movl $2048, %ecx +1: movl %eax, 0(%edi) + addl $0x00200000, %eax + addl $8, %edi + decl %ecx + jnz 1b + + /* Enable the boot page tables */ + leal pgtable(%ebx), %eax + movl %eax, %cr3 + + /* Enable Long mode in EFER (Extended Feature Enable Register) */ + movl $MSR_EFER, %ecx + rdmsr + btsl $_EFER_LME, %eax + wrmsr + + /* + * Setup for the jump to 64bit mode + * + * When the jump is performend we will be in long mode but + * in 32bit compatibility mode with EFER.LME = 1, CS.L = 0, CS.D = 1 + * (and in turn EFER.LMA = 1). To jump into 64bit mode we use + * the new gdt/idt that has __KERNEL_CS with CS.L = 1. + * We place all of the values on our mini stack so lret can + * used to perform that far jump. + */ + pushl $__KERNEL_CS + leal startup_64(%ebp), %eax + pushl %eax + + /* Enter paged protected Mode, activating Long Mode */ + movl $(X86_CR0_PG | X86_CR0_PE), %eax /* Enable Paging and Protected mode */ + movl %eax, %cr0 + + /* Jump from 32bit compatibility mode into 64bit mode. */ + lret +ENDPROC(startup_32) + +no_longmode: + /* This isn't an x86-64 CPU so hang */ +1: + hlt + jmp 1b + +#include "../../kernel/verify_cpu.S" + + /* + * Be careful here startup_64 needs to be at a predictable + * address so I can export it in an ELF header. Bootloaders + * should look at the ELF header to find this address, as + * it may change in the future. + */ + .code64 + .org 0x200 +ENTRY(startup_64) + /* + * We come here either from startup_32 or directly from a + * 64bit bootloader. If we come here from a bootloader we depend on + * an identity mapped page table being provied that maps our + * entire text+data+bss and hopefully all of memory. + */ +#ifdef CONFIG_EFI_STUB + /* + * The entry point for the PE/COFF executable is 0x210, so only + * legacy boot loaders will execute this jmp. + */ + jmp preferred_addr + + .org 0x210 + mov %rcx, %rdi + mov %rdx, %rsi + call efi_main + movq %rax,%rsi + cmpq $0,%rax + jne 2f +1: + /* EFI init failed, so hang. */ + hlt + jmp 1b +2: + call 3f +3: + popq %rax + subq $3b, %rax + subq BP_pref_address(%rsi), %rax + add BP_code32_start(%esi), %eax + leaq preferred_addr(%rax), %rax + jmp *%rax + +preferred_addr: +#endif + + /* Setup data segments. */ + xorl %eax, %eax + movl %eax, %ds + movl %eax, %es + movl %eax, %ss + movl %eax, %fs + movl %eax, %gs + lldt %ax + movl $0x20, %eax + ltr %ax + + /* + * Compute the decompressed kernel start address. It is where + * we were loaded at aligned to a 2M boundary. %rbp contains the + * decompressed kernel start address. + * + * If it is a relocatable kernel then decompress and run the kernel + * from load address aligned to 2MB addr, otherwise decompress and + * run the kernel from LOAD_PHYSICAL_ADDR + * + * We cannot rely on the calculation done in 32-bit mode, since we + * may have been invoked via the 64-bit entry point. + */ + + /* Start with the delta to where the kernel will run at. */ +#ifdef CONFIG_RELOCATABLE + leaq startup_32(%rip) /* - $startup_32 */, %rbp + movl BP_kernel_alignment(%rsi), %eax + decl %eax + addq %rax, %rbp + notq %rax + andq %rax, %rbp +#else + movq $LOAD_PHYSICAL_ADDR, %rbp +#endif + + /* Target address to relocate to for decompression */ + leaq z_extract_offset(%rbp), %rbx + + /* Set up the stack */ + leaq boot_stack_end(%rbx), %rsp + + /* Zero EFLAGS */ + pushq $0 + popfq + +/* + * Copy the compressed kernel to the end of our buffer + * where decompression in place becomes safe. + */ + pushq %rsi + leaq (_bss-8)(%rip), %rsi + leaq (_bss-8)(%rbx), %rdi + movq $_bss /* - $startup_32 */, %rcx + shrq $3, %rcx + std + rep movsq + cld + popq %rsi + +/* + * Jump to the relocated address. + */ + leaq relocated(%rbx), %rax + jmp *%rax + + .text +relocated: + +/* + * Clear BSS (stack is currently empty) + */ + xorl %eax, %eax + leaq _bss(%rip), %rdi + leaq _ebss(%rip), %rcx + subq %rdi, %rcx + shrq $3, %rcx + rep stosq + +/* + * Adjust our own GOT + */ + leaq _got(%rip), %rdx + leaq _egot(%rip), %rcx +1: + cmpq %rcx, %rdx + jae 2f + addq %rbx, (%rdx) + addq $8, %rdx + jmp 1b +2: + +/* + * Do the decompression, and jump to the new kernel.. + */ + pushq %rsi /* Save the real mode argument */ + movq %rsi, %rdi /* real mode address */ + leaq boot_heap(%rip), %rsi /* malloc area for uncompression */ + leaq input_data(%rip), %rdx /* input_data */ + movl $z_input_len, %ecx /* input_len */ + movq %rbp, %r8 /* output target address */ + call decompress_kernel + popq %rsi + +/* + * Jump to the decompressed kernel. + */ + jmp *%rbp + + .data +gdt: + .word gdt_end - gdt + .long gdt + .word 0 + .quad 0x0000000000000000 /* NULL descriptor */ + .quad 0x00af9a000000ffff /* __KERNEL_CS */ + .quad 0x00cf92000000ffff /* __KERNEL_DS */ + .quad 0x0080890000000000 /* TS descriptor */ + .quad 0x0000000000000000 /* TS continued */ +gdt_end: + +/* + * Stack and heap for uncompression + */ + .bss + .balign 4 +boot_heap: + .fill BOOT_HEAP_SIZE, 1, 0 +boot_stack: + .fill BOOT_STACK_SIZE, 1, 0 +boot_stack_end: + +/* + * Space for page tables (not in .bss so not zeroed) + */ + .section ".pgtable","a",@nobits + .balign 4096 +pgtable: + .fill 6*4096, 1, 0 diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c new file mode 100644 index 00000000..7116dcba --- /dev/null +++ b/arch/x86/boot/compressed/misc.c @@ -0,0 +1,379 @@ +/* + * misc.c + * + * This is a collection of several routines from gzip-1.0.3 + * adapted for Linux. + * + * malloc by Hannu Savolainen 1993 and Matthias Urlichs 1994 + * puts by Nick Holloway 1993, better puts by Martin Mares 1995 + * High loaded stuff by Hans Lermen & Werner Almesberger, Feb. 1996 + */ + +#include "misc.h" + +/* WARNING!! + * This code is compiled with -fPIC and it is relocated dynamically + * at run time, but no relocation processing is performed. + * This means that it is not safe to place pointers in static structures. + */ + +/* + * Getting to provable safe in place decompression is hard. + * Worst case behaviours need to be analyzed. + * Background information: + * + * The file layout is: + * magic[2] + * method[1] + * flags[1] + * timestamp[4] + * extraflags[1] + * os[1] + * compressed data blocks[N] + * crc[4] orig_len[4] + * + * resulting in 18 bytes of non compressed data overhead. + * + * Files divided into blocks + * 1 bit (last block flag) + * 2 bits (block type) + * + * 1 block occurs every 32K -1 bytes or when there 50% compression + * has been achieved. The smallest block type encoding is always used. + * + * stored: + * 32 bits length in bytes. + * + * fixed: + * magic fixed tree. + * symbols. + * + * dynamic: + * dynamic tree encoding. + * symbols. + * + * + * The buffer for decompression in place is the length of the + * uncompressed data, plus a small amount extra to keep the algorithm safe. + * The compressed data is placed at the end of the buffer. The output + * pointer is placed at the start of the buffer and the input pointer + * is placed where the compressed data starts. Problems will occur + * when the output pointer overruns the input pointer. + * + * The output pointer can only overrun the input pointer if the input + * pointer is moving faster than the output pointer. A condition only + * triggered by data whose compressed form is larger than the uncompressed + * form. + * + * The worst case at the block level is a growth of the compressed data + * of 5 bytes per 32767 bytes. + * + * The worst case internal to a compressed block is very hard to figure. + * The worst case can at least be boundined by having one bit that represents + * 32764 bytes and then all of the rest of the bytes representing the very + * very last byte. + * + * All of which is enough to compute an amount of extra data that is required + * to be safe. To avoid problems at the block level allocating 5 extra bytes + * per 32767 bytes of data is sufficient. To avoind problems internal to a + * block adding an extra 32767 bytes (the worst case uncompressed block size) + * is sufficient, to ensure that in the worst case the decompressed data for + * block will stop the byte before the compressed data for a block begins. + * To avoid problems with the compressed data's meta information an extra 18 + * bytes are needed. Leading to the formula: + * + * extra_bytes = (uncompressed_size >> 12) + 32768 + 18 + decompressor_size. + * + * Adding 8 bytes per 32K is a bit excessive but much easier to calculate. + * Adding 32768 instead of 32767 just makes for round numbers. + * Adding the decompressor_size is necessary as it musht live after all + * of the data as well. Last I measured the decompressor is about 14K. + * 10K of actual data and 4K of bss. + * + */ + +/* + * gzip declarations + */ +#define STATIC static + +#undef memset +#undef memcpy +#define memzero(s, n) memset((s), 0, (n)) + + +static void error(char *m); + +/* + * This is set up by the setup-routine at boot-time + */ +struct boot_params *real_mode; /* Pointer to real-mode data */ +static int quiet; +static int debug; + +void *memset(void *s, int c, size_t n); +void *memcpy(void *dest, const void *src, size_t n); + +#ifdef CONFIG_X86_64 +#define memptr long +#else +#define memptr unsigned +#endif + +static memptr free_mem_ptr; +static memptr free_mem_end_ptr; + +static char *vidmem; +static int vidport; +static int lines, cols; + +#ifdef CONFIG_KERNEL_GZIP +#include "../../../../lib/decompress_inflate.c" +#endif + +#ifdef CONFIG_KERNEL_BZIP2 +#include "../../../../lib/decompress_bunzip2.c" +#endif + +#ifdef CONFIG_KERNEL_LZMA +#include "../../../../lib/decompress_unlzma.c" +#endif + +#ifdef CONFIG_KERNEL_XZ +#include "../../../../lib/decompress_unxz.c" +#endif + +#ifdef CONFIG_KERNEL_LZO +#include "../../../../lib/decompress_unlzo.c" +#endif + +static void scroll(void) +{ + int i; + + memcpy(vidmem, vidmem + cols * 2, (lines - 1) * cols * 2); + for (i = (lines - 1) * cols * 2; i < lines * cols * 2; i += 2) + vidmem[i] = ' '; +} + +#define XMTRDY 0x20 + +#define TXR 0 /* Transmit register (WRITE) */ +#define LSR 5 /* Line Status */ +static void serial_putchar(int ch) +{ + unsigned timeout = 0xffff; + + while ((inb(early_serial_base + LSR) & XMTRDY) == 0 && --timeout) + cpu_relax(); + + outb(ch, early_serial_base + TXR); +} + +void __putstr(int error, const char *s) +{ + int x, y, pos; + char c; + +#ifndef CONFIG_X86_VERBOSE_BOOTUP + if (!error) + return; +#endif + if (early_serial_base) { + const char *str = s; + while (*str) { + if (*str == '\n') + serial_putchar('\r'); + serial_putchar(*str++); + } + } + + if (real_mode->screen_info.orig_video_mode == 0 && + lines == 0 && cols == 0) + return; + + x = real_mode->screen_info.orig_x; + y = real_mode->screen_info.orig_y; + + while ((c = *s++) != '\0') { + if (c == '\n') { + x = 0; + if (++y >= lines) { + scroll(); + y--; + } + } else { + vidmem[(x + cols * y) * 2] = c; + if (++x >= cols) { + x = 0; + if (++y >= lines) { + scroll(); + y--; + } + } + } + } + + real_mode->screen_info.orig_x = x; + real_mode->screen_info.orig_y = y; + + pos = (x + cols * y) * 2; /* Update cursor position */ + outb(14, vidport); + outb(0xff & (pos >> 9), vidport+1); + outb(15, vidport); + outb(0xff & (pos >> 1), vidport+1); +} + +void *memset(void *s, int c, size_t n) +{ + int i; + char *ss = s; + + for (i = 0; i < n; i++) + ss[i] = c; + return s; +} +#ifdef CONFIG_X86_32 +void *memcpy(void *dest, const void *src, size_t n) +{ + int d0, d1, d2; + asm volatile( + "rep ; movsl\n\t" + "movl %4,%%ecx\n\t" + "rep ; movsb\n\t" + : "=&c" (d0), "=&D" (d1), "=&S" (d2) + : "0" (n >> 2), "g" (n & 3), "1" (dest), "2" (src) + : "memory"); + + return dest; +} +#else +void *memcpy(void *dest, const void *src, size_t n) +{ + long d0, d1, d2; + asm volatile( + "rep ; movsq\n\t" + "movq %4,%%rcx\n\t" + "rep ; movsb\n\t" + : "=&c" (d0), "=&D" (d1), "=&S" (d2) + : "0" (n >> 3), "g" (n & 7), "1" (dest), "2" (src) + : "memory"); + + return dest; +} +#endif + +static void error(char *x) +{ + __putstr(1, "\n\n"); + __putstr(1, x); + __putstr(1, "\n\n -- System halted"); + + while (1) + asm("hlt"); +} + +static void parse_elf(void *output) +{ +#ifdef CONFIG_X86_64 + Elf64_Ehdr ehdr; + Elf64_Phdr *phdrs, *phdr; +#else + Elf32_Ehdr ehdr; + Elf32_Phdr *phdrs, *phdr; +#endif + void *dest; + int i; + + memcpy(&ehdr, output, sizeof(ehdr)); + if (ehdr.e_ident[EI_MAG0] != ELFMAG0 || + ehdr.e_ident[EI_MAG1] != ELFMAG1 || + ehdr.e_ident[EI_MAG2] != ELFMAG2 || + ehdr.e_ident[EI_MAG3] != ELFMAG3) { + error("Kernel is not a valid ELF file"); + return; + } + + if (!quiet) + putstr("Parsing ELF... "); + + phdrs = malloc(sizeof(*phdrs) * ehdr.e_phnum); + if (!phdrs) + error("Failed to allocate space for phdrs"); + + memcpy(phdrs, output + ehdr.e_phoff, sizeof(*phdrs) * ehdr.e_phnum); + + for (i = 0; i < ehdr.e_phnum; i++) { + phdr = &phdrs[i]; + + switch (phdr->p_type) { + case PT_LOAD: +#ifdef CONFIG_RELOCATABLE + dest = output; + dest += (phdr->p_paddr - LOAD_PHYSICAL_ADDR); +#else + dest = (void *)(phdr->p_paddr); +#endif + memcpy(dest, + output + phdr->p_offset, + phdr->p_filesz); + break; + default: /* Ignore other PT_* */ break; + } + } + + free(phdrs); +} + +asmlinkage void decompress_kernel(void *rmode, memptr heap, + unsigned char *input_data, + unsigned long input_len, + unsigned char *output) +{ + real_mode = rmode; + + if (cmdline_find_option_bool("quiet")) + quiet = 1; + if (cmdline_find_option_bool("debug")) + debug = 1; + + if (real_mode->screen_info.orig_video_mode == 7) { + vidmem = (char *) 0xb0000; + vidport = 0x3b4; + } else { + vidmem = (char *) 0xb8000; + vidport = 0x3d4; + } + + lines = real_mode->screen_info.orig_video_lines; + cols = real_mode->screen_info.orig_video_cols; + + console_init(); + if (debug) + putstr("early console in decompress_kernel\n"); + + free_mem_ptr = heap; /* Heap */ + free_mem_end_ptr = heap + BOOT_HEAP_SIZE; + + if ((unsigned long)output & (MIN_KERNEL_ALIGN - 1)) + error("Destination address inappropriately aligned"); +#ifdef CONFIG_X86_64 + if (heap > 0x3fffffffffffUL) + error("Destination address too large"); +#else + if (heap > ((-__PAGE_OFFSET-(128<<20)-1) & 0x7fffffff)) + error("Destination address too large"); +#endif +#ifndef CONFIG_RELOCATABLE + if ((unsigned long)output != LOAD_PHYSICAL_ADDR) + error("Wrong destination address"); +#endif + + if (!quiet) + putstr("\nDecompressing Linux... "); + decompress(input_data, input_len, NULL, NULL, output, NULL, error); + parse_elf(output); + if (!quiet) + putstr("done.\nBooting the kernel.\n"); + return; +} diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h new file mode 100644 index 00000000..3f19c81a --- /dev/null +++ b/arch/x86/boot/compressed/misc.h @@ -0,0 +1,39 @@ +#ifndef BOOT_COMPRESSED_MISC_H +#define BOOT_COMPRESSED_MISC_H + +/* + * we have to be careful, because no indirections are allowed here, and + * paravirt_ops is a kind of one. As it will only run in baremetal anyway, + * we just keep it from happening + */ +#undef CONFIG_PARAVIRT +#ifdef CONFIG_X86_32 +#define _ASM_X86_DESC_H 1 +#endif + +#include +#include +#include +#include +#include +#include +#include + +#define BOOT_BOOT_H +#include "../ctype.h" + +/* misc.c */ +extern struct boot_params *real_mode; /* Pointer to real-mode data */ +void __putstr(int error, const char *s); +#define putstr(__x) __putstr(0, __x) +#define puts(__x) __putstr(0, __x) + +/* cmdline.c */ +int cmdline_find_option(const char *option, char *buffer, int bufsize); +int cmdline_find_option_bool(const char *option); + +/* early_serial_console.c */ +extern int early_serial_base; +void console_init(void); + +#endif diff --git a/arch/x86/boot/compressed/mkpiggy.c b/arch/x86/boot/compressed/mkpiggy.c new file mode 100644 index 00000000..958a6414 --- /dev/null +++ b/arch/x86/boot/compressed/mkpiggy.c @@ -0,0 +1,95 @@ +/* ----------------------------------------------------------------------- * + * + * Copyright (C) 2009 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version + * 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + * + * H. Peter Anvin + * + * ----------------------------------------------------------------------- */ + +/* + * Compute the desired load offset from a compressed program; outputs + * a small assembly wrapper with the appropriate symbols defined. + */ + +#include +#include +#include +#include +#include + +int main(int argc, char *argv[]) +{ + uint32_t olen; + long ilen; + unsigned long offs; + FILE *f; + + if (argc < 2) { + fprintf(stderr, "Usage: %s compressed_file\n", argv[0]); + return 1; + } + + /* Get the information for the compressed kernel image first */ + + f = fopen(argv[1], "r"); + if (!f) { + perror(argv[1]); + return 1; + } + + + if (fseek(f, -4L, SEEK_END)) { + perror(argv[1]); + } + + if (fread(&olen, sizeof(olen), 1, f) != 1) { + perror(argv[1]); + return 1; + } + + ilen = ftell(f); + olen = get_unaligned_le32(&olen); + fclose(f); + + /* + * Now we have the input (compressed) and output (uncompressed) + * sizes, compute the necessary decompression offset... + */ + + offs = (olen > ilen) ? olen - ilen : 0; + offs += olen >> 12; /* Add 8 bytes for each 32K block */ + offs += 64*1024 + 128; /* Add 64K + 128 bytes slack */ + offs = (offs+4095) & ~4095; /* Round to a 4K boundary */ + + printf(".section \".rodata..compressed\",\"a\",@progbits\n"); + printf(".globl z_input_len\n"); + printf("z_input_len = %lu\n", ilen); + printf(".globl z_output_len\n"); + printf("z_output_len = %lu\n", (unsigned long)olen); + printf(".globl z_extract_offset\n"); + printf("z_extract_offset = 0x%lx\n", offs); + /* z_extract_offset_negative allows simplification of head_32.S */ + printf(".globl z_extract_offset_negative\n"); + printf("z_extract_offset_negative = -0x%lx\n", offs); + + printf(".globl input_data, input_data_end\n"); + printf("input_data:\n"); + printf(".incbin \"%s\"\n", argv[1]); + printf("input_data_end:\n"); + + return 0; +} diff --git a/arch/x86/boot/compressed/string.c b/arch/x86/boot/compressed/string.c new file mode 100644 index 00000000..ffb9c5c9 --- /dev/null +++ b/arch/x86/boot/compressed/string.c @@ -0,0 +1,11 @@ +#include "misc.h" + +int memcmp(const void *s1, const void *s2, size_t len) +{ + u8 diff; + asm("repe; cmpsb; setnz %0" + : "=qm" (diff), "+D" (s1), "+S" (s2), "+c" (len)); + return diff; +} + +#include "../string.c" diff --git a/arch/x86/boot/compressed/vmlinux.lds.S b/arch/x86/boot/compressed/vmlinux.lds.S new file mode 100644 index 00000000..34d047c9 --- /dev/null +++ b/arch/x86/boot/compressed/vmlinux.lds.S @@ -0,0 +1,74 @@ +#include + +OUTPUT_FORMAT(CONFIG_OUTPUT_FORMAT, CONFIG_OUTPUT_FORMAT, CONFIG_OUTPUT_FORMAT) + +#undef i386 + +#include +#include + +#ifdef CONFIG_X86_64 +OUTPUT_ARCH(i386:x86-64) +ENTRY(startup_64) +#else +OUTPUT_ARCH(i386) +ENTRY(startup_32) +#endif + +SECTIONS +{ + /* Be careful parts of head_64.S assume startup_32 is at + * address 0. + */ + . = 0; + .head.text : { + _head = . ; + HEAD_TEXT + _ehead = . ; + } + .rodata..compressed : { + *(.rodata..compressed) + } + .text : { + _text = .; /* Text */ + *(.text) + *(.text.*) + _etext = . ; + } + .rodata : { + _rodata = . ; + *(.rodata) /* read-only data */ + *(.rodata.*) + _erodata = . ; + } + .got : { + _got = .; + KEEP(*(.got.plt)) + KEEP(*(.got)) + _egot = .; + } + .data : { + _data = . ; + *(.data) + *(.data.*) + _edata = . ; + } + . = ALIGN(L1_CACHE_BYTES); + .bss : { + _bss = . ; + *(.bss) + *(.bss.*) + *(COMMON) + . = ALIGN(8); /* For convenience during zeroing */ + _ebss = .; + } +#ifdef CONFIG_X86_64 + . = ALIGN(PAGE_SIZE); + .pgtable : { + _pgtable = . ; + *(.pgtable) + _epgtable = . ; + } +#endif + _end = .; +} diff --git a/arch/x86/boot/copy.S b/arch/x86/boot/copy.S new file mode 100644 index 00000000..11f272c6 --- /dev/null +++ b/arch/x86/boot/copy.S @@ -0,0 +1,87 @@ +/* ----------------------------------------------------------------------- * + * + * Copyright (C) 1991, 1992 Linus Torvalds + * Copyright 2007 rPath, Inc. - All Rights Reserved + * + * This file is part of the Linux kernel, and is made available under + * the terms of the GNU General Public License version 2. + * + * ----------------------------------------------------------------------- */ + +#include + +/* + * Memory copy routines + */ + + .code16gcc + .text + +GLOBAL(memcpy) + pushw %si + pushw %di + movw %ax, %di + movw %dx, %si + pushw %cx + shrw $2, %cx + rep; movsl + popw %cx + andw $3, %cx + rep; movsb + popw %di + popw %si + ret +ENDPROC(memcpy) + +GLOBAL(memset) + pushw %di + movw %ax, %di + movzbl %dl, %eax + imull $0x01010101,%eax + pushw %cx + shrw $2, %cx + rep; stosl + popw %cx + andw $3, %cx + rep; stosb + popw %di + ret +ENDPROC(memset) + +GLOBAL(copy_from_fs) + pushw %ds + pushw %fs + popw %ds + call memcpy + popw %ds + ret +ENDPROC(copy_from_fs) + +GLOBAL(copy_to_fs) + pushw %es + pushw %fs + popw %es + call memcpy + popw %es + ret +ENDPROC(copy_to_fs) + +#if 0 /* Not currently used, but can be enabled as needed */ +GLOBAL(copy_from_gs) + pushw %ds + pushw %gs + popw %ds + call memcpy + popw %ds + ret +ENDPROC(copy_from_gs) + +GLOBAL(copy_to_gs) + pushw %es + pushw %gs + popw %es + call memcpy + popw %es + ret +ENDPROC(copy_to_gs) +#endif diff --git a/arch/x86/boot/cpu.c b/arch/x86/boot/cpu.c new file mode 100644 index 00000000..6ec6bb6e --- /dev/null +++ b/arch/x86/boot/cpu.c @@ -0,0 +1,85 @@ +/* -*- linux-c -*- ------------------------------------------------------- * + * + * Copyright (C) 1991, 1992 Linus Torvalds + * Copyright 2007-2008 rPath, Inc. - All Rights Reserved + * + * This file is part of the Linux kernel, and is made available under + * the terms of the GNU General Public License version 2. + * + * ----------------------------------------------------------------------- */ + +/* + * arch/x86/boot/cpu.c + * + * Check for obligatory CPU features and abort if the features are not + * present. + */ + +#include "boot.h" +#include "cpustr.h" + +static char *cpu_name(int level) +{ + static char buf[6]; + + if (level == 64) { + return "x86-64"; + } else { + if (level == 15) + level = 6; + sprintf(buf, "i%d86", level); + return buf; + } +} + +int validate_cpu(void) +{ + u32 *err_flags; + int cpu_level, req_level; + const unsigned char *msg_strs; + + check_cpu(&cpu_level, &req_level, &err_flags); + + if (cpu_level < req_level) { + printf("This kernel requires an %s CPU, ", + cpu_name(req_level)); + printf("but only detected an %s CPU.\n", + cpu_name(cpu_level)); + return -1; + } + + if (err_flags) { + int i, j; + puts("This kernel requires the following features " + "not present on the CPU:\n"); + + msg_strs = (const unsigned char *)x86_cap_strs; + + for (i = 0; i < NCAPINTS; i++) { + u32 e = err_flags[i]; + + for (j = 0; j < 32; j++) { + if (msg_strs[0] < i || + (msg_strs[0] == i && msg_strs[1] < j)) { + /* Skip to the next string */ + msg_strs += 2; + while (*msg_strs++) + ; + } + if (e & 1) { + if (msg_strs[0] == i && + msg_strs[1] == j && + msg_strs[2]) + printf("%s ", msg_strs+2); + else + printf("%d:%d ", i, j); + } + e >>= 1; + } + } + putchar('\n'); + return -1; + } else { + return 0; + } +} diff --git a/arch/x86/boot/cpucheck.c b/arch/x86/boot/cpucheck.c new file mode 100644 index 00000000..4d3ff037 --- /dev/null +++ b/arch/x86/boot/cpucheck.c @@ -0,0 +1,252 @@ +/* -*- linux-c -*- ------------------------------------------------------- * + * + * Copyright (C) 1991, 1992 Linus Torvalds + * Copyright 2007 rPath, Inc. - All Rights Reserved + * + * This file is part of the Linux kernel, and is made available under + * the terms of the GNU General Public License version 2. + * + * ----------------------------------------------------------------------- */ + +/* + * Check for obligatory CPU features and abort if the features are not + * present. This code should be compilable as 16-, 32- or 64-bit + * code, so be very careful with types and inline assembly. + * + * This code should not contain any messages; that requires an + * additional wrapper. + * + * As written, this code is not safe for inclusion into the kernel + * proper (after FPU initialization, in particular). + */ + +#ifdef _SETUP +# include "boot.h" +#endif +#include +#include +#include +#include + +struct cpu_features cpu; +static u32 cpu_vendor[3]; +static u32 err_flags[NCAPINTS]; + +static const int req_level = CONFIG_X86_MINIMUM_CPU_FAMILY; + +static const u32 req_flags[NCAPINTS] = +{ + REQUIRED_MASK0, + REQUIRED_MASK1, + 0, /* REQUIRED_MASK2 not implemented in this file */ + 0, /* REQUIRED_MASK3 not implemented in this file */ + REQUIRED_MASK4, + 0, /* REQUIRED_MASK5 not implemented in this file */ + REQUIRED_MASK6, + 0, /* REQUIRED_MASK7 not implemented in this file */ +}; + +#define A32(a, b, c, d) (((d) << 24)+((c) << 16)+((b) << 8)+(a)) + +static int is_amd(void) +{ + return cpu_vendor[0] == A32('A', 'u', 't', 'h') && + cpu_vendor[1] == A32('e', 'n', 't', 'i') && + cpu_vendor[2] == A32('c', 'A', 'M', 'D'); +} + +static int is_centaur(void) +{ + return cpu_vendor[0] == A32('C', 'e', 'n', 't') && + cpu_vendor[1] == A32('a', 'u', 'r', 'H') && + cpu_vendor[2] == A32('a', 'u', 'l', 's'); +} + +static int is_transmeta(void) +{ + return cpu_vendor[0] == A32('G', 'e', 'n', 'u') && + cpu_vendor[1] == A32('i', 'n', 'e', 'T') && + cpu_vendor[2] == A32('M', 'x', '8', '6'); +} + +static int has_fpu(void) +{ + u16 fcw = -1, fsw = -1; + u32 cr0; + + asm("movl %%cr0,%0" : "=r" (cr0)); + if (cr0 & (X86_CR0_EM|X86_CR0_TS)) { + cr0 &= ~(X86_CR0_EM|X86_CR0_TS); + asm volatile("movl %0,%%cr0" : : "r" (cr0)); + } + + asm volatile("fninit ; fnstsw %0 ; fnstcw %1" + : "+m" (fsw), "+m" (fcw)); + + return fsw == 0 && (fcw & 0x103f) == 0x003f; +} + +static int has_eflag(u32 mask) +{ + u32 f0, f1; + + asm("pushfl ; " + "pushfl ; " + "popl %0 ; " + "movl %0,%1 ; " + "xorl %2,%1 ; " + "pushl %1 ; " + "popfl ; " + "pushfl ; " + "popl %1 ; " + "popfl" + : "=&r" (f0), "=&r" (f1) + : "ri" (mask)); + + return !!((f0^f1) & mask); +} + +static void get_flags(void) +{ + u32 max_intel_level, max_amd_level; + u32 tfms; + + if (has_fpu()) + set_bit(X86_FEATURE_FPU, cpu.flags); + + if (has_eflag(X86_EFLAGS_ID)) { + asm("cpuid" + : "=a" (max_intel_level), + "=b" (cpu_vendor[0]), + "=d" (cpu_vendor[1]), + "=c" (cpu_vendor[2]) + : "a" (0)); + + if (max_intel_level >= 0x00000001 && + max_intel_level <= 0x0000ffff) { + asm("cpuid" + : "=a" (tfms), + "=c" (cpu.flags[4]), + "=d" (cpu.flags[0]) + : "a" (0x00000001) + : "ebx"); + cpu.level = (tfms >> 8) & 15; + cpu.model = (tfms >> 4) & 15; + if (cpu.level >= 6) + cpu.model += ((tfms >> 16) & 0xf) << 4; + } + + asm("cpuid" + : "=a" (max_amd_level) + : "a" (0x80000000) + : "ebx", "ecx", "edx"); + + if (max_amd_level >= 0x80000001 && + max_amd_level <= 0x8000ffff) { + u32 eax = 0x80000001; + asm("cpuid" + : "+a" (eax), + "=c" (cpu.flags[6]), + "=d" (cpu.flags[1]) + : : "ebx"); + } + } +} + +/* Returns a bitmask of which words we have error bits in */ +static int check_flags(void) +{ + u32 err; + int i; + + err = 0; + for (i = 0; i < NCAPINTS; i++) { + err_flags[i] = req_flags[i] & ~cpu.flags[i]; + if (err_flags[i]) + err |= 1 << i; + } + + return err; +} + +/* + * Returns -1 on error. + * + * *cpu_level is set to the current CPU level; *req_level to the required + * level. x86-64 is considered level 64 for this purpose. + * + * *err_flags_ptr is set to the flags error array if there are flags missing. + */ +int check_cpu(int *cpu_level_ptr, int *req_level_ptr, u32 **err_flags_ptr) +{ + int err; + + memset(&cpu.flags, 0, sizeof cpu.flags); + cpu.level = 3; + + if (has_eflag(X86_EFLAGS_AC)) + cpu.level = 4; + + get_flags(); + err = check_flags(); + + if (test_bit(X86_FEATURE_LM, cpu.flags)) + cpu.level = 64; + + if (err == 0x01 && + !(err_flags[0] & + ~((1 << X86_FEATURE_XMM)|(1 << X86_FEATURE_XMM2))) && + is_amd()) { + /* If this is an AMD and we're only missing SSE+SSE2, try to + turn them on */ + + u32 ecx = MSR_K7_HWCR; + u32 eax, edx; + + asm("rdmsr" : "=a" (eax), "=d" (edx) : "c" (ecx)); + eax &= ~(1 << 15); + asm("wrmsr" : : "a" (eax), "d" (edx), "c" (ecx)); + + get_flags(); /* Make sure it really did something */ + err = check_flags(); + } else if (err == 0x01 && + !(err_flags[0] & ~(1 << X86_FEATURE_CX8)) && + is_centaur() && cpu.model >= 6) { + /* If this is a VIA C3, we might have to enable CX8 + explicitly */ + + u32 ecx = MSR_VIA_FCR; + u32 eax, edx; + + asm("rdmsr" : "=a" (eax), "=d" (edx) : "c" (ecx)); + eax |= (1<<1)|(1<<7); + asm("wrmsr" : : "a" (eax), "d" (edx), "c" (ecx)); + + set_bit(X86_FEATURE_CX8, cpu.flags); + err = check_flags(); + } else if (err == 0x01 && is_transmeta()) { + /* Transmeta might have masked feature bits in word 0 */ + + u32 ecx = 0x80860004; + u32 eax, edx; + u32 level = 1; + + asm("rdmsr" : "=a" (eax), "=d" (edx) : "c" (ecx)); + asm("wrmsr" : : "a" (~0), "d" (edx), "c" (ecx)); + asm("cpuid" + : "+a" (level), "=d" (cpu.flags[0]) + : : "ecx", "ebx"); + asm("wrmsr" : : "a" (eax), "d" (edx), "c" (ecx)); + + err = check_flags(); + } + + if (err_flags_ptr) + *err_flags_ptr = err ? err_flags : NULL; + if (cpu_level_ptr) + *cpu_level_ptr = cpu.level; + if (req_level_ptr) + *req_level_ptr = req_level; + + return (cpu.level < req_level || err) ? -1 : 0; +} diff --git a/arch/x86/boot/ctype.h b/arch/x86/boot/ctype.h new file mode 100644 index 00000000..25e13403 --- /dev/null +++ b/arch/x86/boot/ctype.h @@ -0,0 +1,21 @@ +#ifndef BOOT_ISDIGIT_H + +#define BOOT_ISDIGIT_H + +static inline int isdigit(int ch) +{ + return (ch >= '0') && (ch <= '9'); +} + +static inline int isxdigit(int ch) +{ + if (isdigit(ch)) + return true; + + if ((ch >= 'a') && (ch <= 'f')) + return true; + + return (ch >= 'A') && (ch <= 'F'); +} + +#endif diff --git a/arch/x86/boot/early_serial_console.c b/arch/x86/boot/early_serial_console.c new file mode 100644 index 00000000..5df2869c --- /dev/null +++ b/arch/x86/boot/early_serial_console.c @@ -0,0 +1,151 @@ +#include "boot.h" + +#define DEFAULT_SERIAL_PORT 0x3f8 /* ttyS0 */ + +#define XMTRDY 0x20 + +#define DLAB 0x80 + +#define TXR 0 /* Transmit register (WRITE) */ +#define RXR 0 /* Receive register (READ) */ +#define IER 1 /* Interrupt Enable */ +#define IIR 2 /* Interrupt ID */ +#define FCR 2 /* FIFO control */ +#define LCR 3 /* Line control */ +#define MCR 4 /* Modem control */ +#define LSR 5 /* Line Status */ +#define MSR 6 /* Modem Status */ +#define DLL 0 /* Divisor Latch Low */ +#define DLH 1 /* Divisor latch High */ + +#define DEFAULT_BAUD 9600 + +static void early_serial_init(int port, int baud) +{ + unsigned char c; + unsigned divisor; + + outb(0x3, port + LCR); /* 8n1 */ + outb(0, port + IER); /* no interrupt */ + outb(0, port + FCR); /* no fifo */ + outb(0x3, port + MCR); /* DTR + RTS */ + + divisor = 115200 / baud; + c = inb(port + LCR); + outb(c | DLAB, port + LCR); + outb(divisor & 0xff, port + DLL); + outb((divisor >> 8) & 0xff, port + DLH); + outb(c & ~DLAB, port + LCR); + + early_serial_base = port; +} + +static void parse_earlyprintk(void) +{ + int baud = DEFAULT_BAUD; + char arg[32]; + int pos = 0; + int port = 0; + + if (cmdline_find_option("earlyprintk", arg, sizeof arg) > 0) { + char *e; + + if (!strncmp(arg, "serial", 6)) { + port = DEFAULT_SERIAL_PORT; + pos += 6; + } + + if (arg[pos] == ',') + pos++; + + /* + * make sure we have + * "serial,0x3f8,115200" + * "serial,ttyS0,115200" + * "ttyS0,115200" + */ + if (pos == 7 && !strncmp(arg + pos, "0x", 2)) { + port = simple_strtoull(arg + pos, &e, 16); + if (port == 0 || arg + pos == e) + port = DEFAULT_SERIAL_PORT; + else + pos = e - arg; + } else if (!strncmp(arg + pos, "ttyS", 4)) { + static const int bases[] = { 0x3f8, 0x2f8 }; + int idx = 0; + + if (!strncmp(arg + pos, "ttyS", 4)) + pos += 4; + + if (arg[pos++] == '1') + idx = 1; + + port = bases[idx]; + } + + if (arg[pos] == ',') + pos++; + + baud = simple_strtoull(arg + pos, &e, 0); + if (baud == 0 || arg + pos == e) + baud = DEFAULT_BAUD; + } + + if (port) + early_serial_init(port, baud); +} + +#define BASE_BAUD (1843200/16) +static unsigned int probe_baud(int port) +{ + unsigned char lcr, dll, dlh; + unsigned int quot; + + lcr = inb(port + LCR); + outb(lcr | DLAB, port + LCR); + dll = inb(port + DLL); + dlh = inb(port + DLH); + outb(lcr, port + LCR); + quot = (dlh << 8) | dll; + + return BASE_BAUD / quot; +} + +static void parse_console_uart8250(void) +{ + char optstr[64], *options; + int baud = DEFAULT_BAUD; + int port = 0; + + /* + * console=uart8250,io,0x3f8,115200n8 + * need to make sure it is last one console ! + */ + if (cmdline_find_option("console", optstr, sizeof optstr) <= 0) + return; + + options = optstr; + + if (!strncmp(options, "uart8250,io,", 12)) + port = simple_strtoull(options + 12, &options, 0); + else if (!strncmp(options, "uart,io,", 8)) + port = simple_strtoull(options + 8, &options, 0); + else + return; + + if (options && (options[0] == ',')) + baud = simple_strtoull(options + 1, &options, 0); + else + baud = probe_baud(port); + + if (port) + early_serial_init(port, baud); +} + +void console_init(void) +{ + parse_earlyprintk(); + + if (!early_serial_base) + parse_console_uart8250(); +} diff --git a/arch/x86/boot/edd.c b/arch/x86/boot/edd.c new file mode 100644 index 00000000..c501a5b4 --- /dev/null +++ b/arch/x86/boot/edd.c @@ -0,0 +1,181 @@ +/* -*- linux-c -*- ------------------------------------------------------- * + * + * Copyright (C) 1991, 1992 Linus Torvalds + * Copyright 2007 rPath, Inc. - All Rights Reserved + * Copyright 2009 Intel Corporation; author H. Peter Anvin + * + * This file is part of the Linux kernel, and is made available under + * the terms of the GNU General Public License version 2. + * + * ----------------------------------------------------------------------- */ + +/* + * Get EDD BIOS disk information + */ + +#include "boot.h" +#include + +#if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE) + +/* + * Read the MBR (first sector) from a specific device. + */ +static int read_mbr(u8 devno, void *buf) +{ + struct biosregs ireg, oreg; + + initregs(&ireg); + ireg.ax = 0x0201; /* Legacy Read, one sector */ + ireg.cx = 0x0001; /* Sector 0-0-1 */ + ireg.dl = devno; + ireg.bx = (size_t)buf; + + intcall(0x13, &ireg, &oreg); + + return -(oreg.eflags & X86_EFLAGS_CF); /* 0 or -1 */ +} + +static u32 read_mbr_sig(u8 devno, struct edd_info *ei, u32 *mbrsig) +{ + int sector_size; + char *mbrbuf_ptr, *mbrbuf_end; + u32 buf_base, mbr_base; + extern char _end[]; + u16 mbr_magic; + + sector_size = ei->params.bytes_per_sector; + if (!sector_size) + sector_size = 512; /* Best available guess */ + + /* Produce a naturally aligned buffer on the heap */ + buf_base = (ds() << 4) + (u32)&_end; + mbr_base = (buf_base+sector_size-1) & ~(sector_size-1); + mbrbuf_ptr = _end + (mbr_base-buf_base); + mbrbuf_end = mbrbuf_ptr + sector_size; + + /* Make sure we actually have space on the heap... */ + if (!(boot_params.hdr.loadflags & CAN_USE_HEAP)) + return -1; + if (mbrbuf_end > (char *)(size_t)boot_params.hdr.heap_end_ptr) + return -1; + + memset(mbrbuf_ptr, 0, sector_size); + if (read_mbr(devno, mbrbuf_ptr)) + return -1; + + *mbrsig = *(u32 *)&mbrbuf_ptr[EDD_MBR_SIG_OFFSET]; + mbr_magic = *(u16 *)&mbrbuf_ptr[510]; + + /* check for valid MBR magic */ + return mbr_magic == 0xAA55 ? 0 : -1; +} + +static int get_edd_info(u8 devno, struct edd_info *ei) +{ + struct biosregs ireg, oreg; + + memset(ei, 0, sizeof *ei); + + /* Check Extensions Present */ + + initregs(&ireg); + ireg.ah = 0x41; + ireg.bx = EDDMAGIC1; + ireg.dl = devno; + intcall(0x13, &ireg, &oreg); + + if (oreg.eflags & X86_EFLAGS_CF) + return -1; /* No extended information */ + + if (oreg.bx != EDDMAGIC2) + return -1; + + ei->device = devno; + ei->version = oreg.ah; /* EDD version number */ + ei->interface_support = oreg.cx; /* EDD functionality subsets */ + + /* Extended Get Device Parameters */ + + ei->params.length = sizeof(ei->params); + ireg.ah = 0x48; + ireg.si = (size_t)&ei->params; + intcall(0x13, &ireg, &oreg); + + /* Get legacy CHS parameters */ + + /* Ralf Brown recommends setting ES:DI to 0:0 */ + ireg.ah = 0x08; + ireg.es = 0; + intcall(0x13, &ireg, &oreg); + + if (!(oreg.eflags & X86_EFLAGS_CF)) { + ei->legacy_max_cylinder = oreg.ch + ((oreg.cl & 0xc0) << 2); + ei->legacy_max_head = oreg.dh; + ei->legacy_sectors_per_track = oreg.cl & 0x3f; + } + + return 0; +} + +void query_edd(void) +{ + char eddarg[8]; + int do_mbr = 1; +#ifdef CONFIG_EDD_OFF + int do_edd = 0; +#else + int do_edd = 1; +#endif + int be_quiet; + int devno; + struct edd_info ei, *edp; + u32 *mbrptr; + + if (cmdline_find_option("edd", eddarg, sizeof eddarg) > 0) { + if (!strcmp(eddarg, "skipmbr") || !strcmp(eddarg, "skip")) { + do_edd = 1; + do_mbr = 0; + } + else if (!strcmp(eddarg, "off")) + do_edd = 0; + else if (!strcmp(eddarg, "on")) + do_edd = 1; + } + + be_quiet = cmdline_find_option_bool("quiet"); + + edp = boot_params.eddbuf; + mbrptr = boot_params.edd_mbr_sig_buffer; + + if (!do_edd) + return; + + /* Bugs in OnBoard or AddOnCards Bios may hang the EDD probe, + * so give a hint if this happens. + */ + + if (!be_quiet) + printf("Probing EDD (edd=off to disable)... "); + + for (devno = 0x80; devno < 0x80+EDD_MBR_SIG_MAX; devno++) { + /* + * Scan the BIOS-supported hard disks and query EDD + * information... + */ + if (!get_edd_info(devno, &ei) + && boot_params.eddbuf_entries < EDDMAXNR) { + memcpy(edp, &ei, sizeof ei); + edp++; + boot_params.eddbuf_entries++; + } + + if (do_mbr && !read_mbr_sig(devno, &ei, mbrptr++)) + boot_params.edd_mbr_sig_buf_entries = devno-0x80+1; + } + + if (!be_quiet) + printf("ok\n"); +} + +#endif diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S new file mode 100644 index 00000000..f1bbeeb0 --- /dev/null +++ b/arch/x86/boot/header.S @@ -0,0 +1,478 @@ +/* + * header.S + * + * Copyright (C) 1991, 1992 Linus Torvalds + * + * Based on bootsect.S and setup.S + * modified by more people than can be counted + * + * Rewritten as a common file by H. Peter Anvin (Apr 2007) + * + * BIG FAT NOTE: We're in real mode using 64k segments. Therefore segment + * addresses must be multiplied by 16 to obtain their respective linear + * addresses. To avoid confusion, linear addresses are written using leading + * hex while segment addresses are written as segment:offset. + * + */ + +#include +#include +#include +#include +#include +#include +#include "boot.h" +#include "voffset.h" +#include "zoffset.h" + +BOOTSEG = 0x07C0 /* original address of boot-sector */ +SYSSEG = 0x1000 /* historical load address >> 4 */ + +#ifndef SVGA_MODE +#define SVGA_MODE ASK_VGA +#endif + +#ifndef RAMDISK +#define RAMDISK 0 +#endif + +#ifndef ROOT_RDONLY +#define ROOT_RDONLY 1 +#endif + + .code16 + .section ".bstext", "ax" + + .global bootsect_start +bootsect_start: +#ifdef CONFIG_EFI_STUB + # "MZ", MS-DOS header + .byte 0x4d + .byte 0x5a +#endif + + # Normalize the start address + ljmp $BOOTSEG, $start2 + +start2: + movw %cs, %ax + movw %ax, %ds + movw %ax, %es + movw %ax, %ss + xorw %sp, %sp + sti + cld + + movw $bugger_off_msg, %si + +msg_loop: + lodsb + andb %al, %al + jz bs_die + movb $0xe, %ah + movw $7, %bx + int $0x10 + jmp msg_loop + +bs_die: + # Allow the user to press a key, then reboot + xorw %ax, %ax + int $0x16 + int $0x19 + + # int 0x19 should never return. In case it does anyway, + # invoke the BIOS reset code... + ljmp $0xf000,$0xfff0 + +#ifdef CONFIG_EFI_STUB + .org 0x3c + # + # Offset to the PE header. + # + .long pe_header +#endif /* CONFIG_EFI_STUB */ + + .section ".bsdata", "a" +bugger_off_msg: + .ascii "Direct booting from floppy is no longer supported.\r\n" + .ascii "Please use a boot loader program instead.\r\n" + .ascii "\n" + .ascii "Remove disk and press any key to reboot . . .\r\n" + .byte 0 + +#ifdef CONFIG_EFI_STUB +pe_header: + .ascii "PE" + .word 0 + +coff_header: +#ifdef CONFIG_X86_32 + .word 0x14c # i386 +#else + .word 0x8664 # x86-64 +#endif + .word 2 # nr_sections + .long 0 # TimeDateStamp + .long 0 # PointerToSymbolTable + .long 1 # NumberOfSymbols + .word section_table - optional_header # SizeOfOptionalHeader +#ifdef CONFIG_X86_32 + .word 0x306 # Characteristics. + # IMAGE_FILE_32BIT_MACHINE | + # IMAGE_FILE_DEBUG_STRIPPED | + # IMAGE_FILE_EXECUTABLE_IMAGE | + # IMAGE_FILE_LINE_NUMS_STRIPPED +#else + .word 0x206 # Characteristics + # IMAGE_FILE_DEBUG_STRIPPED | + # IMAGE_FILE_EXECUTABLE_IMAGE | + # IMAGE_FILE_LINE_NUMS_STRIPPED +#endif + +optional_header: +#ifdef CONFIG_X86_32 + .word 0x10b # PE32 format +#else + .word 0x20b # PE32+ format +#endif + .byte 0x02 # MajorLinkerVersion + .byte 0x14 # MinorLinkerVersion + + # Filled in by build.c + .long 0 # SizeOfCode + + .long 0 # SizeOfInitializedData + .long 0 # SizeOfUninitializedData + + # Filled in by build.c + .long 0x0000 # AddressOfEntryPoint + + .long 0x0000 # BaseOfCode +#ifdef CONFIG_X86_32 + .long 0 # data +#endif + +extra_header_fields: +#ifdef CONFIG_X86_32 + .long 0 # ImageBase +#else + .quad 0 # ImageBase +#endif + .long 0x1000 # SectionAlignment + .long 0x200 # FileAlignment + .word 0 # MajorOperatingSystemVersion + .word 0 # MinorOperatingSystemVersion + .word 0 # MajorImageVersion + .word 0 # MinorImageVersion + .word 0 # MajorSubsystemVersion + .word 0 # MinorSubsystemVersion + .long 0 # Win32VersionValue + + # + # The size of the bzImage is written in tools/build.c + # + .long 0 # SizeOfImage + + .long 0x200 # SizeOfHeaders + .long 0 # CheckSum + .word 0xa # Subsystem (EFI application) + .word 0 # DllCharacteristics +#ifdef CONFIG_X86_32 + .long 0 # SizeOfStackReserve + .long 0 # SizeOfStackCommit + .long 0 # SizeOfHeapReserve + .long 0 # SizeOfHeapCommit +#else + .quad 0 # SizeOfStackReserve + .quad 0 # SizeOfStackCommit + .quad 0 # SizeOfHeapReserve + .quad 0 # SizeOfHeapCommit +#endif + .long 0 # LoaderFlags + .long 0x1 # NumberOfRvaAndSizes + + .quad 0 # ExportTable + .quad 0 # ImportTable + .quad 0 # ResourceTable + .quad 0 # ExceptionTable + .quad 0 # CertificationTable + .quad 0 # BaseRelocationTable + + # Section table +section_table: + .ascii ".text" + .byte 0 + .byte 0 + .byte 0 + .long 0 + .long 0x0 # startup_{32,64} + .long 0 # Size of initialized data + # on disk + .long 0x0 # startup_{32,64} + .long 0 # PointerToRelocations + .long 0 # PointerToLineNumbers + .word 0 # NumberOfRelocations + .word 0 # NumberOfLineNumbers + .long 0x60500020 # Characteristics (section flags) + + # + # The EFI application loader requires a relocation section + # because EFI applications are relocatable and not having + # this section seems to confuse it. But since we don't need + # the loader to fixup any relocs for us just fill it with a + # single dummy reloc. + # + .ascii ".reloc" + .byte 0 + .byte 0 + .long reloc_end - reloc_start + .long reloc_start + .long reloc_end - reloc_start # SizeOfRawData + .long reloc_start # PointerToRawData + .long 0 # PointerToRelocations + .long 0 # PointerToLineNumbers + .word 0 # NumberOfRelocations + .word 0 # NumberOfLineNumbers + .long 0x42100040 # Characteristics (section flags) +#endif /* CONFIG_EFI_STUB */ + + # Kernel attributes; used by setup. This is part 1 of the + # header, from the old boot sector. + + .section ".header", "a" + .globl hdr +hdr: +setup_sects: .byte 0 /* Filled in by build.c */ +root_flags: .word ROOT_RDONLY +syssize: .long 0 /* Filled in by build.c */ +ram_size: .word 0 /* Obsolete */ +vid_mode: .word SVGA_MODE +root_dev: .word 0 /* Filled in by build.c */ +boot_flag: .word 0xAA55 + + # offset 512, entry point + + .globl _start +_start: + # Explicitly enter this as bytes, or the assembler + # tries to generate a 3-byte jump here, which causes + # everything else to push off to the wrong offset. + .byte 0xeb # short (2-byte) jump + .byte start_of_setup-1f +1: + + # Part 2 of the header, from the old setup.S + + .ascii "HdrS" # header signature + .word 0x020a # header version number (>= 0x0105) + # or else old loadlin-1.5 will fail) + .globl realmode_swtch +realmode_swtch: .word 0, 0 # default_switch, SETUPSEG +start_sys_seg: .word SYSSEG # obsolete and meaningless, but just + # in case something decided to "use" it + .word kernel_version-512 # pointing to kernel version string + # above section of header is compatible + # with loadlin-1.5 (header v1.5). Don't + # change it. + +type_of_loader: .byte 0 # 0 means ancient bootloader, newer + # bootloaders know to change this. + # See Documentation/x86/boot.txt for + # assigned ids + +# flags, unused bits must be zero (RFU) bit within loadflags +loadflags: +LOADED_HIGH = 1 # If set, the kernel is loaded high +CAN_USE_HEAP = 0x80 # If set, the loader also has set + # heap_end_ptr to tell how much + # space behind setup.S can be used for + # heap purposes. + # Only the loader knows what is free + .byte LOADED_HIGH + +setup_move_size: .word 0x8000 # size to move, when setup is not + # loaded at 0x90000. We will move setup + # to 0x90000 then just before jumping + # into the kernel. However, only the + # loader knows how much data behind + # us also needs to be loaded. + +code32_start: # here loaders can put a different + # start address for 32-bit code. + .long 0x100000 # 0x100000 = default for big kernel + +ramdisk_image: .long 0 # address of loaded ramdisk image + # Here the loader puts the 32-bit + # address where it loaded the image. + # This only will be read by the kernel. + +ramdisk_size: .long 0 # its size in bytes + +bootsect_kludge: + .long 0 # obsolete + +heap_end_ptr: .word _end+STACK_SIZE-512 + # (Header version 0x0201 or later) + # space from here (exclusive) down to + # end of setup code can be used by setup + # for local heap purposes. + +ext_loader_ver: + .byte 0 # Extended boot loader version +ext_loader_type: + .byte 0 # Extended boot loader type + +cmd_line_ptr: .long 0 # (Header version 0x0202 or later) + # If nonzero, a 32-bit pointer + # to the kernel command line. + # The command line should be + # located between the start of + # setup and the end of low + # memory (0xa0000), or it may + # get overwritten before it + # gets read. If this field is + # used, there is no longer + # anything magical about the + # 0x90000 segment; the setup + # can be located anywhere in + # low memory 0x10000 or higher. + +ramdisk_max: .long 0x7fffffff + # (Header version 0x0203 or later) + # The highest safe address for + # the contents of an initrd + # The current kernel allows up to 4 GB, + # but leave it at 2 GB to avoid + # possible bootloader bugs. + +kernel_alignment: .long CONFIG_PHYSICAL_ALIGN #physical addr alignment + #required for protected mode + #kernel +#ifdef CONFIG_RELOCATABLE +relocatable_kernel: .byte 1 +#else +relocatable_kernel: .byte 0 +#endif +min_alignment: .byte MIN_KERNEL_ALIGN_LG2 # minimum alignment +pad3: .word 0 + +cmdline_size: .long COMMAND_LINE_SIZE-1 #length of the command line, + #added with boot protocol + #version 2.06 + +hardware_subarch: .long 0 # subarchitecture, added with 2.07 + # default to 0 for normal x86 PC + +hardware_subarch_data: .quad 0 + +payload_offset: .long ZO_input_data +payload_length: .long ZO_z_input_len + +setup_data: .quad 0 # 64-bit physical pointer to + # single linked list of + # struct setup_data + +pref_address: .quad LOAD_PHYSICAL_ADDR # preferred load addr + +#define ZO_INIT_SIZE (ZO__end - ZO_startup_32 + ZO_z_extract_offset) +#define VO_INIT_SIZE (VO__end - VO__text) +#if ZO_INIT_SIZE > VO_INIT_SIZE +#define INIT_SIZE ZO_INIT_SIZE +#else +#define INIT_SIZE VO_INIT_SIZE +#endif +init_size: .long INIT_SIZE # kernel initialization size + +# End of setup header ##################################################### + + .section ".entrytext", "ax" +start_of_setup: +#ifdef SAFE_RESET_DISK_CONTROLLER +# Reset the disk controller. + movw $0x0000, %ax # Reset disk controller + movb $0x80, %dl # All disks + int $0x13 +#endif + +# Force %es = %ds + movw %ds, %ax + movw %ax, %es + cld + +# Apparently some ancient versions of LILO invoked the kernel with %ss != %ds, +# which happened to work by accident for the old code. Recalculate the stack +# pointer if %ss is invalid. Otherwise leave it alone, LOADLIN sets up the +# stack behind its own code, so we can't blindly put it directly past the heap. + + movw %ss, %dx + cmpw %ax, %dx # %ds == %ss? + movw %sp, %dx + je 2f # -> assume %sp is reasonably set + + # Invalid %ss, make up a new stack + movw $_end, %dx + testb $CAN_USE_HEAP, loadflags + jz 1f + movw heap_end_ptr, %dx +1: addw $STACK_SIZE, %dx + jnc 2f + xorw %dx, %dx # Prevent wraparound + +2: # Now %dx should point to the end of our stack space + andw $~3, %dx # dword align (might as well...) + jnz 3f + movw $0xfffc, %dx # Make sure we're not zero +3: movw %ax, %ss + movzwl %dx, %esp # Clear upper half of %esp + sti # Now we should have a working stack + +# We will have entered with %cs = %ds+0x20, normalize %cs so +# it is on par with the other segments. + pushw %ds + pushw $6f + lretw +6: + +# Check signature at end of setup + cmpl $0x5a5aaa55, setup_sig + jne setup_bad + +# Zero the bss + movw $__bss_start, %di + movw $_end+3, %cx + xorl %eax, %eax + subw %di, %cx + shrw $2, %cx + rep; stosl + +# Jump to C code (should not return) + calll main + +# Setup corrupt somehow... +setup_bad: + movl $setup_corrupt, %eax + calll puts + # Fall through... + + .globl die + .type die, @function +die: + hlt + jmp die + + .size die, .-die + + .section ".initdata", "a" +setup_corrupt: + .byte 7 + .string "No setup signature found...\n" + + .data +dummy: .long 0 + + .section .reloc +reloc_start: + .long dummy - reloc_start + .long 10 + .word 0 +reloc_end: diff --git a/arch/x86/boot/install.sh b/arch/x86/boot/install.sh new file mode 100644 index 00000000..d13ec1c3 --- /dev/null +++ b/arch/x86/boot/install.sh @@ -0,0 +1,59 @@ +#!/bin/sh +# +# This file is subject to the terms and conditions of the GNU General Public +# License. See the file "COPYING" in the main directory of this archive +# for more details. +# +# Copyright (C) 1995 by Linus Torvalds +# +# Adapted from code in arch/i386/boot/Makefile by H. Peter Anvin +# +# "make install" script for i386 architecture +# +# Arguments: +# $1 - kernel version +# $2 - kernel image file +# $3 - kernel map file +# $4 - default install path (blank if root directory) +# + +verify () { + if [ ! -f "$1" ]; then + echo "" 1>&2 + echo " *** Missing file: $1" 1>&2 + echo ' *** You need to run "make" before "make install".' 1>&2 + echo "" 1>&2 + exit 1 + fi +} + +# Make sure the files actually exist +verify "$2" +verify "$3" + +# User may have a custom install script + +if [ -x ~/bin/${INSTALLKERNEL} ]; then exec ~/bin/${INSTALLKERNEL} "$@"; fi +if [ -x /sbin/${INSTALLKERNEL} ]; then exec /sbin/${INSTALLKERNEL} "$@"; fi + +# Default install - same as make zlilo + +if [ -f $4/vmlinuz ]; then + mv $4/vmlinuz $4/vmlinuz.old +fi + +if [ -f $4/System.map ]; then + mv $4/System.map $4/System.old +fi + +cat $2 > $4/vmlinuz +cp $3 $4/System.map + +if [ -x /sbin/lilo ]; then + /sbin/lilo +elif [ -x /etc/lilo/install ]; then + /etc/lilo/install +else + sync + echo "Cannot find LILO." +fi diff --git a/arch/x86/boot/main.c b/arch/x86/boot/main.c new file mode 100644 index 00000000..40358c89 --- /dev/null +++ b/arch/x86/boot/main.c @@ -0,0 +1,178 @@ +/* -*- linux-c -*- ------------------------------------------------------- * + * + * Copyright (C) 1991, 1992 Linus Torvalds + * Copyright 2007 rPath, Inc. - All Rights Reserved + * Copyright 2009 Intel Corporation; author H. Peter Anvin + * + * This file is part of the Linux kernel, and is made available under + * the terms of the GNU General Public License version 2. + * + * ----------------------------------------------------------------------- */ + +/* + * Main module for the real-mode kernel code + */ + +#include "boot.h" + +struct boot_params boot_params __attribute__((aligned(16))); + +char *HEAP = _end; +char *heap_end = _end; /* Default end of heap = no heap */ + +/* + * Copy the header into the boot parameter block. Since this + * screws up the old-style command line protocol, adjust by + * filling in the new-style command line pointer instead. + */ + +static void copy_boot_params(void) +{ + struct old_cmdline { + u16 cl_magic; + u16 cl_offset; + }; + const struct old_cmdline * const oldcmd = + (const struct old_cmdline *)OLD_CL_ADDRESS; + + BUILD_BUG_ON(sizeof boot_params != 4096); + memcpy(&boot_params.hdr, &hdr, sizeof hdr); + + if (!boot_params.hdr.cmd_line_ptr && + oldcmd->cl_magic == OLD_CL_MAGIC) { + /* Old-style command line protocol. */ + u16 cmdline_seg; + + /* Figure out if the command line falls in the region + of memory that an old kernel would have copied up + to 0x90000... */ + if (oldcmd->cl_offset < boot_params.hdr.setup_move_size) + cmdline_seg = ds(); + else + cmdline_seg = 0x9000; + + boot_params.hdr.cmd_line_ptr = + (cmdline_seg << 4) + oldcmd->cl_offset; + } +} + +/* + * Set the keyboard repeat rate to maximum. Unclear why this + * is done here; this might be possible to kill off as stale code. + */ +static void keyboard_set_repeat(void) +{ + struct biosregs ireg; + initregs(&ireg); + ireg.ax = 0x0305; + intcall(0x16, &ireg, NULL); +} + +/* + * Get Intel SpeedStep (IST) information. + */ +static void query_ist(void) +{ + struct biosregs ireg, oreg; + + /* Some older BIOSes apparently crash on this call, so filter + it from machines too old to have SpeedStep at all. */ + if (cpu.level < 6) + return; + + initregs(&ireg); + ireg.ax = 0xe980; /* IST Support */ + ireg.edx = 0x47534943; /* Request value */ + intcall(0x15, &ireg, &oreg); + + boot_params.ist_info.signature = oreg.eax; + boot_params.ist_info.command = oreg.ebx; + boot_params.ist_info.event = oreg.ecx; + boot_params.ist_info.perf_level = oreg.edx; +} + +/* + * Tell the BIOS what CPU mode we intend to run in. + */ +static void set_bios_mode(void) +{ +#ifdef CONFIG_X86_64 + struct biosregs ireg; + + initregs(&ireg); + ireg.ax = 0xec00; + ireg.bx = 2; + intcall(0x15, &ireg, NULL); +#endif +} + +static void init_heap(void) +{ + char *stack_end; + + if (boot_params.hdr.loadflags & CAN_USE_HEAP) { + asm("leal %P1(%%esp),%0" + : "=r" (stack_end) : "i" (-STACK_SIZE)); + + heap_end = (char *) + ((size_t)boot_params.hdr.heap_end_ptr + 0x200); + if (heap_end > stack_end) + heap_end = stack_end; + } else { + /* Boot protocol 2.00 only, no heap available */ + puts("WARNING: Ancient bootloader, some functionality " + "may be limited!\n"); + } +} + +void main(void) +{ + /* First, copy the boot header into the "zeropage" */ + copy_boot_params(); + + /* Initialize the early-boot console */ + console_init(); + if (cmdline_find_option_bool("debug")) + puts("early console in setup code\n"); + + /* End of heap check */ + init_heap(); + + /* Make sure we have all the proper CPU support */ + if (validate_cpu()) { + puts("Unable to boot - please use a kernel appropriate " + "for your CPU.\n"); + die(); + } + + /* Tell the BIOS what CPU mode we intend to run in. */ + set_bios_mode(); + + /* Detect memory layout */ + detect_memory(); + + /* Set keyboard repeat rate (why?) */ + keyboard_set_repeat(); + + /* Query MCA information */ + query_mca(); + + /* Query Intel SpeedStep (IST) information */ + query_ist(); + + /* Query APM information */ +#if defined(CONFIG_APM) || defined(CONFIG_APM_MODULE) + query_apm_bios(); +#endif + + /* Query EDD information */ +#if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE) + query_edd(); +#endif + + /* Set the video mode */ + set_video(); + + /* Do the last things and invoke protected mode */ + go_to_protected_mode(); +} diff --git a/arch/x86/boot/mca.c b/arch/x86/boot/mca.c new file mode 100644 index 00000000..a95a5311 --- /dev/null +++ b/arch/x86/boot/mca.c @@ -0,0 +1,38 @@ +/* -*- linux-c -*- ------------------------------------------------------- * + * + * Copyright (C) 1991, 1992 Linus Torvalds + * Copyright 2007 rPath, Inc. - All Rights Reserved + * Copyright 2009 Intel Corporation; author H. Peter Anvin + * + * This file is part of the Linux kernel, and is made available under + * the terms of the GNU General Public License version 2. + * + * ----------------------------------------------------------------------- */ + +/* + * Get the MCA system description table + */ + +#include "boot.h" + +int query_mca(void) +{ + struct biosregs ireg, oreg; + u16 len; + + initregs(&ireg); + ireg.ah = 0xc0; + intcall(0x15, &ireg, &oreg); + + if (oreg.eflags & X86_EFLAGS_CF) + return -1; /* No MCA present */ + + set_fs(oreg.es); + len = rdfs16(oreg.bx); + + if (len > sizeof(boot_params.sys_desc_table)) + len = sizeof(boot_params.sys_desc_table); + + copy_from_fs(&boot_params.sys_desc_table, oreg.bx, len); + return 0; +} diff --git a/arch/x86/boot/memory.c b/arch/x86/boot/memory.c new file mode 100644 index 00000000..db75d07c --- /dev/null +++ b/arch/x86/boot/memory.c @@ -0,0 +1,136 @@ +/* -*- linux-c -*- ------------------------------------------------------- * + * + * Copyright (C) 1991, 1992 Linus Torvalds + * Copyright 2007 rPath, Inc. - All Rights Reserved + * Copyright 2009 Intel Corporation; author H. Peter Anvin + * + * This file is part of the Linux kernel, and is made available under + * the terms of the GNU General Public License version 2. + * + * ----------------------------------------------------------------------- */ + +/* + * Memory detection code + */ + +#include "boot.h" + +#define SMAP 0x534d4150 /* ASCII "SMAP" */ + +static int detect_memory_e820(void) +{ + int count = 0; + struct biosregs ireg, oreg; + struct e820entry *desc = boot_params.e820_map; + static struct e820entry buf; /* static so it is zeroed */ + + initregs(&ireg); + ireg.ax = 0xe820; + ireg.cx = sizeof buf; + ireg.edx = SMAP; + ireg.di = (size_t)&buf; + + /* + * Note: at least one BIOS is known which assumes that the + * buffer pointed to by one e820 call is the same one as + * the previous call, and only changes modified fields. Therefore, + * we use a temporary buffer and copy the results entry by entry. + * + * This routine deliberately does not try to account for + * ACPI 3+ extended attributes. This is because there are + * BIOSes in the field which report zero for the valid bit for + * all ranges, and we don't currently make any use of the + * other attribute bits. Revisit this if we see the extended + * attribute bits deployed in a meaningful way in the future. + */ + + do { + intcall(0x15, &ireg, &oreg); + ireg.ebx = oreg.ebx; /* for next iteration... */ + + /* BIOSes which terminate the chain with CF = 1 as opposed + to %ebx = 0 don't always report the SMAP signature on + the final, failing, probe. */ + if (oreg.eflags & X86_EFLAGS_CF) + break; + + /* Some BIOSes stop returning SMAP in the middle of + the search loop. We don't know exactly how the BIOS + screwed up the map at that point, we might have a + partial map, the full map, or complete garbage, so + just return failure. */ + if (oreg.eax != SMAP) { + count = 0; + break; + } + + *desc++ = buf; + count++; + } while (ireg.ebx && count < ARRAY_SIZE(boot_params.e820_map)); + + return boot_params.e820_entries = count; +} + +static int detect_memory_e801(void) +{ + struct biosregs ireg, oreg; + + initregs(&ireg); + ireg.ax = 0xe801; + intcall(0x15, &ireg, &oreg); + + if (oreg.eflags & X86_EFLAGS_CF) + return -1; + + /* Do we really need to do this? */ + if (oreg.cx || oreg.dx) { + oreg.ax = oreg.cx; + oreg.bx = oreg.dx; + } + + if (oreg.ax > 15*1024) { + return -1; /* Bogus! */ + } else if (oreg.ax == 15*1024) { + boot_params.alt_mem_k = (oreg.bx << 6) + oreg.ax; + } else { + /* + * This ignores memory above 16MB if we have a memory + * hole there. If someone actually finds a machine + * with a memory hole at 16MB and no support for + * 0E820h they should probably generate a fake e820 + * map. + */ + boot_params.alt_mem_k = oreg.ax; + } + + return 0; +} + +static int detect_memory_88(void) +{ + struct biosregs ireg, oreg; + + initregs(&ireg); + ireg.ah = 0x88; + intcall(0x15, &ireg, &oreg); + + boot_params.screen_info.ext_mem_k = oreg.ax; + + return -(oreg.eflags & X86_EFLAGS_CF); /* 0 or -1 */ +} + +int detect_memory(void) +{ + int err = -1; + + if (detect_memory_e820() > 0) + err = 0; + + if (!detect_memory_e801()) + err = 0; + + if (!detect_memory_88()) + err = 0; + + return err; +} diff --git a/arch/x86/boot/mkcpustr.c b/arch/x86/boot/mkcpustr.c new file mode 100644 index 00000000..919257f5 --- /dev/null +++ b/arch/x86/boot/mkcpustr.c @@ -0,0 +1,49 @@ +/* ----------------------------------------------------------------------- * + * + * Copyright 2008 rPath, Inc. - All Rights Reserved + * + * This file is part of the Linux kernel, and is made available under + * the terms of the GNU General Public License version 2 or (at your + * option) any later version; incorporated herein by reference. + * + * ----------------------------------------------------------------------- */ + +/* + * This is a host program to preprocess the CPU strings into a + * compact format suitable for the setup code. + */ + +#include + +#include "../kernel/cpu/capflags.c" + +int main(void) +{ + int i, j; + const char *str; + + printf("static const char x86_cap_strs[] =\n"); + + for (i = 0; i < NCAPINTS; i++) { + for (j = 0; j < 32; j++) { + str = x86_cap_flags[i*32+j]; + + if (i == NCAPINTS-1 && j == 31) { + /* The last entry must be unconditional; this + also consumes the compiler-added null + character */ + if (!str) + str = ""; + printf("\t\"\\x%02x\\x%02x\"\"%s\"\n", + i, j, str); + } else if (str) { + printf("#if REQUIRED_MASK%d & (1 << %d)\n" + "\t\"\\x%02x\\x%02x\"\"%s\\0\"\n" + "#endif\n", + i, j, i, j, str); + } + } + } + printf("\t;\n"); + return 0; +} diff --git a/arch/x86/boot/mtools.conf.in b/arch/x86/boot/mtools.conf.in new file mode 100644 index 00000000..efd6d249 --- /dev/null +++ b/arch/x86/boot/mtools.conf.in @@ -0,0 +1,17 @@ +# +# mtools configuration file for "make (b)zdisk" +# + +# Actual floppy drive +drive a: + file="/dev/fd0" + +# 1.44 MB floppy disk image +drive v: + file="@OBJ@/fdimage" cylinders=80 heads=2 sectors=18 filter + +# 2.88 MB floppy disk image (mostly for virtual uses) +drive w: + file="@OBJ@/fdimage" cylinders=80 heads=2 sectors=36 filter + + diff --git a/arch/x86/boot/pm.c b/arch/x86/boot/pm.c new file mode 100644 index 00000000..8062f891 --- /dev/null +++ b/arch/x86/boot/pm.c @@ -0,0 +1,126 @@ +/* -*- linux-c -*- ------------------------------------------------------- * + * + * Copyright (C) 1991, 1992 Linus Torvalds + * Copyright 2007 rPath, Inc. - All Rights Reserved + * + * This file is part of the Linux kernel, and is made available under + * the terms of the GNU General Public License version 2. + * + * ----------------------------------------------------------------------- */ + +/* + * Prepare the machine for transition to protected mode. + */ + +#include "boot.h" +#include + +/* + * Invoke the realmode switch hook if present; otherwise + * disable all interrupts. + */ +static void realmode_switch_hook(void) +{ + if (boot_params.hdr.realmode_swtch) { + asm volatile("lcallw *%0" + : : "m" (boot_params.hdr.realmode_swtch) + : "eax", "ebx", "ecx", "edx"); + } else { + asm volatile("cli"); + outb(0x80, 0x70); /* Disable NMI */ + io_delay(); + } +} + +/* + * Disable all interrupts at the legacy PIC. + */ +static void mask_all_interrupts(void) +{ + outb(0xff, 0xa1); /* Mask all interrupts on the secondary PIC */ + io_delay(); + outb(0xfb, 0x21); /* Mask all but cascade on the primary PIC */ + io_delay(); +} + +/* + * Reset IGNNE# if asserted in the FPU. + */ +static void reset_coprocessor(void) +{ + outb(0, 0xf0); + io_delay(); + outb(0, 0xf1); + io_delay(); +} + +/* + * Set up the GDT + */ + +struct gdt_ptr { + u16 len; + u32 ptr; +} __attribute__((packed)); + +static void setup_gdt(void) +{ + /* There are machines which are known to not boot with the GDT + being 8-byte unaligned. Intel recommends 16 byte alignment. */ + static const u64 boot_gdt[] __attribute__((aligned(16))) = { + /* CS: code, read/execute, 4 GB, base 0 */ + [GDT_ENTRY_BOOT_CS] = GDT_ENTRY(0xc09b, 0, 0xfffff), + /* DS: data, read/write, 4 GB, base 0 */ + [GDT_ENTRY_BOOT_DS] = GDT_ENTRY(0xc093, 0, 0xfffff), + /* TSS: 32-bit tss, 104 bytes, base 4096 */ + /* We only have a TSS here to keep Intel VT happy; + we don't actually use it for anything. */ + [GDT_ENTRY_BOOT_TSS] = GDT_ENTRY(0x0089, 4096, 103), + }; + /* Xen HVM incorrectly stores a pointer to the gdt_ptr, instead + of the gdt_ptr contents. Thus, make it static so it will + stay in memory, at least long enough that we switch to the + proper kernel GDT. */ + static struct gdt_ptr gdt; + + gdt.len = sizeof(boot_gdt)-1; + gdt.ptr = (u32)&boot_gdt + (ds() << 4); + + asm volatile("lgdtl %0" : : "m" (gdt)); +} + +/* + * Set up the IDT + */ +static void setup_idt(void) +{ + static const struct gdt_ptr null_idt = {0, 0}; + asm volatile("lidtl %0" : : "m" (null_idt)); +} + +/* + * Actual invocation sequence + */ +void go_to_protected_mode(void) +{ + /* Hook before leaving real mode, also disables interrupts */ + realmode_switch_hook(); + + /* Enable the A20 gate */ + if (enable_a20()) { + puts("A20 gate not responding, unable to boot...\n"); + die(); + } + + /* Reset coprocessor (IGNNE#) */ + reset_coprocessor(); + + /* Mask all interrupts in the PIC */ + mask_all_interrupts(); + + /* Actual transition to protected mode... */ + setup_idt(); + setup_gdt(); + protected_mode_jump(boot_params.hdr.code32_start, + (u32)&boot_params + (ds() << 4)); +} diff --git a/arch/x86/boot/pmjump.S b/arch/x86/boot/pmjump.S new file mode 100644 index 00000000..3e0edc6d --- /dev/null +++ b/arch/x86/boot/pmjump.S @@ -0,0 +1,77 @@ +/* ----------------------------------------------------------------------- * + * + * Copyright (C) 1991, 1992 Linus Torvalds + * Copyright 2007 rPath, Inc. - All Rights Reserved + * + * This file is part of the Linux kernel, and is made available under + * the terms of the GNU General Public License version 2. + * + * ----------------------------------------------------------------------- */ + +/* + * The actual transition into protected mode + */ + +#include +#include +#include +#include + + .text + .code16 + +/* + * void protected_mode_jump(u32 entrypoint, u32 bootparams); + */ +GLOBAL(protected_mode_jump) + movl %edx, %esi # Pointer to boot_params table + + xorl %ebx, %ebx + movw %cs, %bx + shll $4, %ebx + addl %ebx, 2f + jmp 1f # Short jump to serialize on 386/486 +1: + + movw $__BOOT_DS, %cx + movw $__BOOT_TSS, %di + + movl %cr0, %edx + orb $X86_CR0_PE, %dl # Protected mode + movl %edx, %cr0 + + # Transition to 32-bit mode + .byte 0x66, 0xea # ljmpl opcode +2: .long in_pm32 # offset + .word __BOOT_CS # segment +ENDPROC(protected_mode_jump) + + .code32 + .section ".text32","ax" +GLOBAL(in_pm32) + # Set up data segments for flat 32-bit mode + movl %ecx, %ds + movl %ecx, %es + movl %ecx, %fs + movl %ecx, %gs + movl %ecx, %ss + # The 32-bit code sets up its own stack, but this way we do have + # a valid stack if some debugging hack wants to use it. + addl %ebx, %esp + + # Set up TR to make Intel VT happy + ltr %di + + # Clear registers to allow for future extensions to the + # 32-bit boot protocol + xorl %ecx, %ecx + xorl %edx, %edx + xorl %ebx, %ebx + xorl %ebp, %ebp + xorl %edi, %edi + + # Set up LDTR to make Intel VT happy + lldt %cx + + jmpl *%eax # Jump to the 32-bit entrypoint +ENDPROC(in_pm32) diff --git a/arch/x86/boot/printf.c b/arch/x86/boot/printf.c new file mode 100644 index 00000000..cdac91ca --- /dev/null +++ b/arch/x86/boot/printf.c @@ -0,0 +1,309 @@ +/* -*- linux-c -*- ------------------------------------------------------- * + * + * Copyright (C) 1991, 1992 Linus Torvalds + * Copyright 2007 rPath, Inc. - All Rights Reserved + * + * This file is part of the Linux kernel, and is made available under + * the terms of the GNU General Public License version 2. + * + * ----------------------------------------------------------------------- */ + +/* + * Oh, it's a waste of space, but oh-so-yummy for debugging. This + * version of printf() does not include 64-bit support. "Live with + * it." + * + */ + +#include "boot.h" + +static int skip_atoi(const char **s) +{ + int i = 0; + + while (isdigit(**s)) + i = i * 10 + *((*s)++) - '0'; + return i; +} + +#define ZEROPAD 1 /* pad with zero */ +#define SIGN 2 /* unsigned/signed long */ +#define PLUS 4 /* show plus */ +#define SPACE 8 /* space if plus */ +#define LEFT 16 /* left justified */ +#define SMALL 32 /* Must be 32 == 0x20 */ +#define SPECIAL 64 /* 0x */ + +#define __do_div(n, base) ({ \ +int __res; \ +__res = ((unsigned long) n) % (unsigned) base; \ +n = ((unsigned long) n) / (unsigned) base; \ +__res; }) + +static char *number(char *str, long num, int base, int size, int precision, + int type) +{ + /* we are called with base 8, 10 or 16, only, thus don't need "G..." */ + static const char digits[16] = "0123456789ABCDEF"; /* "GHIJKLMNOPQRSTUVWXYZ"; */ + + char tmp[66]; + char c, sign, locase; + int i; + + /* locase = 0 or 0x20. ORing digits or letters with 'locase' + * produces same digits or (maybe lowercased) letters */ + locase = (type & SMALL); + if (type & LEFT) + type &= ~ZEROPAD; + if (base < 2 || base > 36) + return NULL; + c = (type & ZEROPAD) ? '0' : ' '; + sign = 0; + if (type & SIGN) { + if (num < 0) { + sign = '-'; + num = -num; + size--; + } else if (type & PLUS) { + sign = '+'; + size--; + } else if (type & SPACE) { + sign = ' '; + size--; + } + } + if (type & SPECIAL) { + if (base == 16) + size -= 2; + else if (base == 8) + size--; + } + i = 0; + if (num == 0) + tmp[i++] = '0'; + else + while (num != 0) + tmp[i++] = (digits[__do_div(num, base)] | locase); + if (i > precision) + precision = i; + size -= precision; + if (!(type & (ZEROPAD + LEFT))) + while (size-- > 0) + *str++ = ' '; + if (sign) + *str++ = sign; + if (type & SPECIAL) { + if (base == 8) + *str++ = '0'; + else if (base == 16) { + *str++ = '0'; + *str++ = ('X' | locase); + } + } + if (!(type & LEFT)) + while (size-- > 0) + *str++ = c; + while (i < precision--) + *str++ = '0'; + while (i-- > 0) + *str++ = tmp[i]; + while (size-- > 0) + *str++ = ' '; + return str; +} + +int vsprintf(char *buf, const char *fmt, va_list args) +{ + int len; + unsigned long num; + int i, base; + char *str; + const char *s; + + int flags; /* flags to number() */ + + int field_width; /* width of output field */ + int precision; /* min. # of digits for integers; max + number of chars for from string */ + int qualifier; /* 'h', 'l', or 'L' for integer fields */ + + for (str = buf; *fmt; ++fmt) { + if (*fmt != '%') { + *str++ = *fmt; + continue; + } + + /* process flags */ + flags = 0; + repeat: + ++fmt; /* this also skips first '%' */ + switch (*fmt) { + case '-': + flags |= LEFT; + goto repeat; + case '+': + flags |= PLUS; + goto repeat; + case ' ': + flags |= SPACE; + goto repeat; + case '#': + flags |= SPECIAL; + goto repeat; + case '0': + flags |= ZEROPAD; + goto repeat; + } + + /* get field width */ + field_width = -1; + if (isdigit(*fmt)) + field_width = skip_atoi(&fmt); + else if (*fmt == '*') { + ++fmt; + /* it's the next argument */ + field_width = va_arg(args, int); + if (field_width < 0) { + field_width = -field_width; + flags |= LEFT; + } + } + + /* get the precision */ + precision = -1; + if (*fmt == '.') { + ++fmt; + if (isdigit(*fmt)) + precision = skip_atoi(&fmt); + else if (*fmt == '*') { + ++fmt; + /* it's the next argument */ + precision = va_arg(args, int); + } + if (precision < 0) + precision = 0; + } + + /* get the conversion qualifier */ + qualifier = -1; + if (*fmt == 'h' || *fmt == 'l' || *fmt == 'L') { + qualifier = *fmt; + ++fmt; + } + + /* default base */ + base = 10; + + switch (*fmt) { + case 'c': + if (!(flags & LEFT)) + while (--field_width > 0) + *str++ = ' '; + *str++ = (unsigned char)va_arg(args, int); + while (--field_width > 0) + *str++ = ' '; + continue; + + case 's': + s = va_arg(args, char *); + len = strnlen(s, precision); + + if (!(flags & LEFT)) + while (len < field_width--) + *str++ = ' '; + for (i = 0; i < len; ++i) + *str++ = *s++; + while (len < field_width--) + *str++ = ' '; + continue; + + case 'p': + if (field_width == -1) { + field_width = 2 * sizeof(void *); + flags |= ZEROPAD; + } + str = number(str, + (unsigned long)va_arg(args, void *), 16, + field_width, precision, flags); + continue; + + case 'n': + if (qualifier == 'l') { + long *ip = va_arg(args, long *); + *ip = (str - buf); + } else { + int *ip = va_arg(args, int *); + *ip = (str - buf); + } + continue; + + case '%': + *str++ = '%'; + continue; + + /* integer number formats - set up the flags and "break" */ + case 'o': + base = 8; + break; + + case 'x': + flags |= SMALL; + case 'X': + base = 16; + break; + + case 'd': + case 'i': + flags |= SIGN; + case 'u': + break; + + default: + *str++ = '%'; + if (*fmt) + *str++ = *fmt; + else + --fmt; + continue; + } + if (qualifier == 'l') + num = va_arg(args, unsigned long); + else if (qualifier == 'h') { + num = (unsigned short)va_arg(args, int); + if (flags & SIGN) + num = (short)num; + } else if (flags & SIGN) + num = va_arg(args, int); + else + num = va_arg(args, unsigned int); + str = number(str, num, base, field_width, precision, flags); + } + *str = '\0'; + return str - buf; +} + +int sprintf(char *buf, const char *fmt, ...) +{ + va_list args; + int i; + + va_start(args, fmt); + i = vsprintf(buf, fmt, args); + va_end(args); + return i; +} + +int printf(const char *fmt, ...) +{ + char printf_buf[1024]; + va_list args; + int printed; + + va_start(args, fmt); + printed = vsprintf(printf_buf, fmt, args); + va_end(args); + + puts(printf_buf); + + return printed; +} diff --git a/arch/x86/boot/regs.c b/arch/x86/boot/regs.c new file mode 100644 index 00000000..958019b1 --- /dev/null +++ b/arch/x86/boot/regs.c @@ -0,0 +1,29 @@ +/* ----------------------------------------------------------------------- + * + * Copyright 2009 Intel Corporation; author H. Peter Anvin + * + * This file is part of the Linux kernel, and is made available under + * the terms of the GNU General Public License version 2 or (at your + * option) any later version; incorporated herein by reference. + * + * ----------------------------------------------------------------------- */ + +/* + * Simple helper function for initializing a register set. + * + * Note that this sets EFLAGS_CF in the input register set; this + * makes it easier to catch functions which do nothing but don't + * explicitly set CF. + */ + +#include "boot.h" + +void initregs(struct biosregs *reg) +{ + memset(reg, 0, sizeof *reg); + reg->eflags |= X86_EFLAGS_CF; + reg->ds = ds(); + reg->es = ds(); + reg->fs = fs(); + reg->gs = gs(); +} diff --git a/arch/x86/boot/setup.ld b/arch/x86/boot/setup.ld new file mode 100644 index 00000000..03c06836 --- /dev/null +++ b/arch/x86/boot/setup.ld @@ -0,0 +1,64 @@ +/* + * setup.ld + * + * Linker script for the i386 setup code + */ +OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386") +OUTPUT_ARCH(i386) +ENTRY(_start) + +SECTIONS +{ + . = 0; + .bstext : { *(.bstext) } + .bsdata : { *(.bsdata) } + + . = 497; + .header : { *(.header) } + .entrytext : { *(.entrytext) } + .inittext : { *(.inittext) } + .initdata : { *(.initdata) } + __end_init = .; + + .text : { *(.text) } + .text32 : { *(.text32) } + + . = ALIGN(16); + .rodata : { *(.rodata*) } + + .videocards : { + video_cards = .; + *(.videocards) + video_cards_end = .; + } + + . = ALIGN(16); + .data : { *(.data*) } + + .signature : { + setup_sig = .; + LONG(0x5a5aaa55) + } + + + . = ALIGN(16); + .bss : + { + __bss_start = .; + *(.bss) + __bss_end = .; + } + . = ALIGN(16); + _end = .; + + /DISCARD/ : { *(.note*) } + + /* + * The ASSERT() sink to . is intentional, for binutils 2.14 compatibility: + */ + . = ASSERT(_end <= 0x8000, "Setup too big!"); + . = ASSERT(hdr == 0x1f1, "The setup header has the wrong offset!"); + /* Necessary for the very-old-loader check to work... */ + . = ASSERT(__end_init <= 5*512, "init sections too big!"); + +} diff --git a/arch/x86/boot/string.c b/arch/x86/boot/string.c new file mode 100644 index 00000000..574dedfe --- /dev/null +++ b/arch/x86/boot/string.c @@ -0,0 +1,148 @@ +/* -*- linux-c -*- ------------------------------------------------------- * + * + * Copyright (C) 1991, 1992 Linus Torvalds + * Copyright 2007 rPath, Inc. - All Rights Reserved + * + * This file is part of the Linux kernel, and is made available under + * the terms of the GNU General Public License version 2. + * + * ----------------------------------------------------------------------- */ + +/* + * Very basic string functions + */ + +#include "boot.h" + +int strcmp(const char *str1, const char *str2) +{ + const unsigned char *s1 = (const unsigned char *)str1; + const unsigned char *s2 = (const unsigned char *)str2; + int delta = 0; + + while (*s1 || *s2) { + delta = *s2 - *s1; + if (delta) + return delta; + s1++; + s2++; + } + return 0; +} + +int strncmp(const char *cs, const char *ct, size_t count) +{ + unsigned char c1, c2; + + while (count) { + c1 = *cs++; + c2 = *ct++; + if (c1 != c2) + return c1 < c2 ? -1 : 1; + if (!c1) + break; + count--; + } + return 0; +} + +size_t strnlen(const char *s, size_t maxlen) +{ + const char *es = s; + while (*es && maxlen) { + es++; + maxlen--; + } + + return (es - s); +} + +unsigned int atou(const char *s) +{ + unsigned int i = 0; + while (isdigit(*s)) + i = i * 10 + (*s++ - '0'); + return i; +} + +/* Works only for digits and letters, but small and fast */ +#define TOLOWER(x) ((x) | 0x20) + +static unsigned int simple_guess_base(const char *cp) +{ + if (cp[0] == '0') { + if (TOLOWER(cp[1]) == 'x' && isxdigit(cp[2])) + return 16; + else + return 8; + } else { + return 10; + } +} + +/** + * simple_strtoull - convert a string to an unsigned long long + * @cp: The start of the string + * @endp: A pointer to the end of the parsed string will be placed here + * @base: The number base to use + */ + +unsigned long long simple_strtoull(const char *cp, char **endp, unsigned int base) +{ + unsigned long long result = 0; + + if (!base) + base = simple_guess_base(cp); + + if (base == 16 && cp[0] == '0' && TOLOWER(cp[1]) == 'x') + cp += 2; + + while (isxdigit(*cp)) { + unsigned int value; + + value = isdigit(*cp) ? *cp - '0' : TOLOWER(*cp) - 'a' + 10; + if (value >= base) + break; + result = result * base + value; + cp++; + } + if (endp) + *endp = (char *)cp; + + return result; +} + +/** + * strlen - Find the length of a string + * @s: The string to be sized + */ +size_t strlen(const char *s) +{ + const char *sc; + + for (sc = s; *sc != '\0'; ++sc) + /* nothing */; + return sc - s; +} + +/** + * strstr - Find the first substring in a %NUL terminated string + * @s1: The string to be searched + * @s2: The string to search for + */ +char *strstr(const char *s1, const char *s2) +{ + size_t l1, l2; + + l2 = strlen(s2); + if (!l2) + return (char *)s1; + l1 = strlen(s1); + while (l1 >= l2) { + l1--; + if (!memcmp(s1, s2, l2)) + return (char *)s1; + s1++; + } + return NULL; +} diff --git a/arch/x86/boot/tools/build.c b/arch/x86/boot/tools/build.c new file mode 100644 index 00000000..24443a33 --- /dev/null +++ b/arch/x86/boot/tools/build.c @@ -0,0 +1,266 @@ +/* + * Copyright (C) 1991, 1992 Linus Torvalds + * Copyright (C) 1997 Martin Mares + * Copyright (C) 2007 H. Peter Anvin + */ + +/* + * This file builds a disk-image from two different files: + * + * - setup: 8086 machine code, sets up system parm + * - system: 80386 code for actual system + * + * It does some checking that all files are of the correct type, and + * just writes the result to stdout, removing headers and padding to + * the right amount. It also writes some system data to stderr. + */ + +/* + * Changes by tytso to allow root device specification + * High loaded stuff by Hans Lermen & Werner Almesberger, Feb. 1996 + * Cross compiling fixes by Gertjan van Wingerde, July 1996 + * Rewritten by Martin Mares, April 1997 + * Substantially overhauled by H. Peter Anvin, April 2007 + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +typedef unsigned char u8; +typedef unsigned short u16; +typedef unsigned int u32; + +#define DEFAULT_MAJOR_ROOT 0 +#define DEFAULT_MINOR_ROOT 0 +#define DEFAULT_ROOT_DEV (DEFAULT_MAJOR_ROOT << 8 | DEFAULT_MINOR_ROOT) + +/* Minimal number of setup sectors */ +#define SETUP_SECT_MIN 5 +#define SETUP_SECT_MAX 64 + +/* This must be large enough to hold the entire setup */ +u8 buf[SETUP_SECT_MAX*512]; +int is_big_kernel; + +/*----------------------------------------------------------------------*/ + +static const u32 crctab32[] = { + 0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419, + 0x706af48f, 0xe963a535, 0x9e6495a3, 0x0edb8832, 0x79dcb8a4, + 0xe0d5e91e, 0x97d2d988, 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, + 0x90bf1d91, 0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de, + 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7, 0x136c9856, + 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, 0x14015c4f, 0x63066cd9, + 0xfa0f3d63, 0x8d080df5, 0x3b6e20c8, 0x4c69105e, 0xd56041e4, + 0xa2677172, 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b, + 0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940, 0x32d86ce3, + 0x45df5c75, 0xdcd60dcf, 0xabd13d59, 0x26d930ac, 0x51de003a, + 0xc8d75180, 0xbfd06116, 0x21b4f4b5, 0x56b3c423, 0xcfba9599, + 0xb8bda50f, 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924, + 0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d, 0x76dc4190, + 0x01db7106, 0x98d220bc, 0xefd5102a, 0x71b18589, 0x06b6b51f, + 0x9fbfe4a5, 0xe8b8d433, 0x7807c9a2, 0x0f00f934, 0x9609a88e, + 0xe10e9818, 0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01, + 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e, 0x6c0695ed, + 0x1b01a57b, 0x8208f4c1, 0xf50fc457, 0x65b0d9c6, 0x12b7e950, + 0x8bbeb8ea, 0xfcb9887c, 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, + 0xfbd44c65, 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2, + 0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb, 0x4369e96a, + 0x346ed9fc, 0xad678846, 0xda60b8d0, 0x44042d73, 0x33031de5, + 0xaa0a4c5f, 0xdd0d7cc9, 0x5005713c, 0x270241aa, 0xbe0b1010, + 0xc90c2086, 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f, + 0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, 0x59b33d17, + 0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad, 0xedb88320, 0x9abfb3b6, + 0x03b6e20c, 0x74b1d29a, 0xead54739, 0x9dd277af, 0x04db2615, + 0x73dc1683, 0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8, + 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1, 0xf00f9344, + 0x8708a3d2, 0x1e01f268, 0x6906c2fe, 0xf762575d, 0x806567cb, + 0x196c3671, 0x6e6b06e7, 0xfed41b76, 0x89d32be0, 0x10da7a5a, + 0x67dd4acc, 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5, + 0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252, 0xd1bb67f1, + 0xa6bc5767, 0x3fb506dd, 0x48b2364b, 0xd80d2bda, 0xaf0a1b4c, + 0x36034af6, 0x41047a60, 0xdf60efc3, 0xa867df55, 0x316e8eef, + 0x4669be79, 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236, + 0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, 0xc5ba3bbe, + 0xb2bd0b28, 0x2bb45a92, 0x5cb36a04, 0xc2d7ffa7, 0xb5d0cf31, + 0x2cd99e8b, 0x5bdeae1d, 0x9b64c2b0, 0xec63f226, 0x756aa39c, + 0x026d930a, 0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713, + 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38, 0x92d28e9b, + 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, 0x86d3d2d4, 0xf1d4e242, + 0x68ddb3f8, 0x1fda836e, 0x81be16cd, 0xf6b9265b, 0x6fb077e1, + 0x18b74777, 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c, + 0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45, 0xa00ae278, + 0xd70dd2ee, 0x4e048354, 0x3903b3c2, 0xa7672661, 0xd06016f7, + 0x4969474d, 0x3e6e77db, 0xaed16a4a, 0xd9d65adc, 0x40df0b66, + 0x37d83bf0, 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9, + 0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, 0xbad03605, + 0xcdd70693, 0x54de5729, 0x23d967bf, 0xb3667a2e, 0xc4614ab8, + 0x5d681b02, 0x2a6f2b94, 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, + 0x2d02ef8d +}; + +static u32 partial_crc32_one(u8 c, u32 crc) +{ + return crctab32[(crc ^ c) & 0xff] ^ (crc >> 8); +} + +static u32 partial_crc32(const u8 *s, int len, u32 crc) +{ + while (len--) + crc = partial_crc32_one(*s++, crc); + return crc; +} + +static void die(const char * str, ...) +{ + va_list args; + va_start(args, str); + vfprintf(stderr, str, args); + fputc('\n', stderr); + exit(1); +} + +static void usage(void) +{ + die("Usage: build setup system [> image]"); +} + +int main(int argc, char ** argv) +{ +#ifdef CONFIG_EFI_STUB + unsigned int file_sz, pe_header; +#endif + unsigned int i, sz, setup_sectors; + int c; + u32 sys_size; + struct stat sb; + FILE *file; + int fd; + void *kernel; + u32 crc = 0xffffffffUL; + + if (argc != 3) + usage(); + + /* Copy the setup code */ + file = fopen(argv[1], "r"); + if (!file) + die("Unable to open `%s': %m", argv[1]); + c = fread(buf, 1, sizeof(buf), file); + if (ferror(file)) + die("read-error on `setup'"); + if (c < 1024) + die("The setup must be at least 1024 bytes"); + if (get_unaligned_le16(&buf[510]) != 0xAA55) + die("Boot block hasn't got boot flag (0xAA55)"); + fclose(file); + + /* Pad unused space with zeros */ + setup_sectors = (c + 511) / 512; + if (setup_sectors < SETUP_SECT_MIN) + setup_sectors = SETUP_SECT_MIN; + i = setup_sectors*512; + memset(buf+c, 0, i-c); + + /* Set the default root device */ + put_unaligned_le16(DEFAULT_ROOT_DEV, &buf[508]); + + fprintf(stderr, "Setup is %d bytes (padded to %d bytes).\n", c, i); + + /* Open and stat the kernel file */ + fd = open(argv[2], O_RDONLY); + if (fd < 0) + die("Unable to open `%s': %m", argv[2]); + if (fstat(fd, &sb)) + die("Unable to stat `%s': %m", argv[2]); + sz = sb.st_size; + fprintf (stderr, "System is %d kB\n", (sz+1023)/1024); + kernel = mmap(NULL, sz, PROT_READ, MAP_SHARED, fd, 0); + if (kernel == MAP_FAILED) + die("Unable to mmap '%s': %m", argv[2]); + /* Number of 16-byte paragraphs, including space for a 4-byte CRC */ + sys_size = (sz + 15 + 4) / 16; + + /* Patch the setup code with the appropriate size parameters */ + buf[0x1f1] = setup_sectors-1; + put_unaligned_le32(sys_size, &buf[0x1f4]); + +#ifdef CONFIG_EFI_STUB + file_sz = sz + i + ((sys_size * 16) - sz); + + pe_header = get_unaligned_le32(&buf[0x3c]); + + /* Size of code */ + put_unaligned_le32(file_sz, &buf[pe_header + 0x1c]); + + /* Size of image */ + put_unaligned_le32(file_sz, &buf[pe_header + 0x50]); + +#ifdef CONFIG_X86_32 + /* + * Address of entry point. + * + * The EFI stub entry point is +16 bytes from the start of + * the .text section. + */ + put_unaligned_le32(i + 16, &buf[pe_header + 0x28]); + + /* .text size */ + put_unaligned_le32(file_sz, &buf[pe_header + 0xb0]); + + /* .text size of initialised data */ + put_unaligned_le32(file_sz, &buf[pe_header + 0xb8]); +#else + /* + * Address of entry point. startup_32 is at the beginning and + * the 64-bit entry point (startup_64) is always 512 bytes + * after. The EFI stub entry point is 16 bytes after that, as + * the first instruction allows legacy loaders to jump over + * the EFI stub initialisation + */ + put_unaligned_le32(i + 528, &buf[pe_header + 0x28]); + + /* .text size */ + put_unaligned_le32(file_sz, &buf[pe_header + 0xc0]); + + /* .text size of initialised data */ + put_unaligned_le32(file_sz, &buf[pe_header + 0xc8]); + +#endif /* CONFIG_X86_32 */ +#endif /* CONFIG_EFI_STUB */ + + crc = partial_crc32(buf, i, crc); + if (fwrite(buf, 1, i, stdout) != i) + die("Writing setup failed"); + + /* Copy the kernel code */ + crc = partial_crc32(kernel, sz, crc); + if (fwrite(kernel, 1, sz, stdout) != sz) + die("Writing kernel failed"); + + /* Add padding leaving 4 bytes for the checksum */ + while (sz++ < (sys_size*16) - 4) { + crc = partial_crc32_one('\0', crc); + if (fwrite("\0", 1, 1, stdout) != 1) + die("Writing padding failed"); + } + + /* Write the CRC */ + fprintf(stderr, "CRC %x\n", crc); + put_unaligned_le32(crc, buf); + if (fwrite(buf, 1, 4, stdout) != 4) + die("Writing CRC failed"); + + close(fd); + + /* Everything is OK */ + return 0; +} diff --git a/arch/x86/boot/tty.c b/arch/x86/boot/tty.c new file mode 100644 index 00000000..def2451f --- /dev/null +++ b/arch/x86/boot/tty.c @@ -0,0 +1,139 @@ +/* -*- linux-c -*- ------------------------------------------------------- * + * + * Copyright (C) 1991, 1992 Linus Torvalds + * Copyright 2007 rPath, Inc. - All Rights Reserved + * Copyright 2009 Intel Corporation; author H. Peter Anvin + * + * This file is part of the Linux kernel, and is made available under + * the terms of the GNU General Public License version 2. + * + * ----------------------------------------------------------------------- */ + +/* + * Very simple screen and serial I/O + */ + +#include "boot.h" + +int early_serial_base; + +#define XMTRDY 0x20 + +#define TXR 0 /* Transmit register (WRITE) */ +#define LSR 5 /* Line Status */ + +/* + * These functions are in .inittext so they can be used to signal + * error during initialization. + */ + +static void __attribute__((section(".inittext"))) serial_putchar(int ch) +{ + unsigned timeout = 0xffff; + + while ((inb(early_serial_base + LSR) & XMTRDY) == 0 && --timeout) + cpu_relax(); + + outb(ch, early_serial_base + TXR); +} + +static void __attribute__((section(".inittext"))) bios_putchar(int ch) +{ + struct biosregs ireg; + + initregs(&ireg); + ireg.bx = 0x0007; + ireg.cx = 0x0001; + ireg.ah = 0x0e; + ireg.al = ch; + intcall(0x10, &ireg, NULL); +} + +void __attribute__((section(".inittext"))) putchar(int ch) +{ + if (ch == '\n') + putchar('\r'); /* \n -> \r\n */ + + bios_putchar(ch); + + if (early_serial_base != 0) + serial_putchar(ch); +} + +void __attribute__((section(".inittext"))) puts(const char *str) +{ + while (*str) + putchar(*str++); +} + +/* + * Read the CMOS clock through the BIOS, and return the + * seconds in BCD. + */ + +static u8 gettime(void) +{ + struct biosregs ireg, oreg; + + initregs(&ireg); + ireg.ah = 0x02; + intcall(0x1a, &ireg, &oreg); + + return oreg.dh; +} + +/* + * Read from the keyboard + */ +int getchar(void) +{ + struct biosregs ireg, oreg; + + initregs(&ireg); + /* ireg.ah = 0x00; */ + intcall(0x16, &ireg, &oreg); + + return oreg.al; +} + +static int kbd_pending(void) +{ + struct biosregs ireg, oreg; + + initregs(&ireg); + ireg.ah = 0x01; + intcall(0x16, &ireg, &oreg); + + return !(oreg.eflags & X86_EFLAGS_ZF); +} + +void kbd_flush(void) +{ + for (;;) { + if (!kbd_pending()) + break; + getchar(); + } +} + +int getchar_timeout(void) +{ + int cnt = 30; + int t0, t1; + + t0 = gettime(); + + while (cnt) { + if (kbd_pending()) + return getchar(); + + t1 = gettime(); + if (t0 != t1) { + cnt--; + t0 = t1; + } + } + + return 0; /* Timeout! */ +} + diff --git a/arch/x86/boot/version.c b/arch/x86/boot/version.c new file mode 100644 index 00000000..2b15aa48 --- /dev/null +++ b/arch/x86/boot/version.c @@ -0,0 +1,21 @@ +/* -*- linux-c -*- ------------------------------------------------------- * + * + * Copyright (C) 1991, 1992 Linus Torvalds + * Copyright 2007 rPath, Inc. - All Rights Reserved + * + * This file is part of the Linux kernel, and is made available under + * the terms of the GNU General Public License version 2. + * + * ----------------------------------------------------------------------- */ + +/* + * Kernel version string + */ + +#include "boot.h" +#include +#include + +const char kernel_version[] = + UTS_RELEASE " (" LINUX_COMPILE_BY "@" LINUX_COMPILE_HOST ") " + UTS_VERSION; diff --git a/arch/x86/boot/vesa.h b/arch/x86/boot/vesa.h new file mode 100644 index 00000000..468e4446 --- /dev/null +++ b/arch/x86/boot/vesa.h @@ -0,0 +1,72 @@ +/* ----------------------------------------------------------------------- * + * + * Copyright 1999-2007 H. Peter Anvin - All Rights Reserved + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, Inc., 53 Temple Place Ste 330, + * Boston MA 02111-1307, USA; either version 2 of the License, or + * (at your option) any later version; incorporated herein by reference. + * + * ----------------------------------------------------------------------- */ + +#ifndef BOOT_VESA_H +#define BOOT_VESA_H + +typedef struct { + u16 off, seg; +} far_ptr; + +/* VESA General Information table */ +struct vesa_general_info { + u32 signature; /* 0 Magic number = "VESA" */ + u16 version; /* 4 */ + far_ptr vendor_string; /* 6 */ + u32 capabilities; /* 10 */ + far_ptr video_mode_ptr; /* 14 */ + u16 total_memory; /* 18 */ + + u8 reserved[236]; /* 20 */ +} __attribute__ ((packed)); + +#define VESA_MAGIC ('V' + ('E' << 8) + ('S' << 16) + ('A' << 24)) + +struct vesa_mode_info { + u16 mode_attr; /* 0 */ + u8 win_attr[2]; /* 2 */ + u16 win_grain; /* 4 */ + u16 win_size; /* 6 */ + u16 win_seg[2]; /* 8 */ + far_ptr win_scheme; /* 12 */ + u16 logical_scan; /* 16 */ + + u16 h_res; /* 18 */ + u16 v_res; /* 20 */ + u8 char_width; /* 22 */ + u8 char_height; /* 23 */ + u8 memory_planes; /* 24 */ + u8 bpp; /* 25 */ + u8 banks; /* 26 */ + u8 memory_layout; /* 27 */ + u8 bank_size; /* 28 */ + u8 image_planes; /* 29 */ + u8 page_function; /* 30 */ + + u8 rmask; /* 31 */ + u8 rpos; /* 32 */ + u8 gmask; /* 33 */ + u8 gpos; /* 34 */ + u8 bmask; /* 35 */ + u8 bpos; /* 36 */ + u8 resv_mask; /* 37 */ + u8 resv_pos; /* 38 */ + u8 dcm_info; /* 39 */ + + u32 lfb_ptr; /* 40 Linear frame buffer address */ + u32 offscreen_ptr; /* 44 Offscreen memory address */ + u16 offscreen_size; /* 48 */ + + u8 reserved[206]; /* 50 */ +} __attribute__ ((packed)); + +#endif /* LIB_SYS_VESA_H */ diff --git a/arch/x86/boot/video-bios.c b/arch/x86/boot/video-bios.c new file mode 100644 index 00000000..49e0c188 --- /dev/null +++ b/arch/x86/boot/video-bios.c @@ -0,0 +1,128 @@ +/* -*- linux-c -*- ------------------------------------------------------- * + * + * Copyright (C) 1991, 1992 Linus Torvalds + * Copyright 2007 rPath, Inc. - All Rights Reserved + * Copyright 2009 Intel Corporation; author H. Peter Anvin + * + * This file is part of the Linux kernel, and is made available under + * the terms of the GNU General Public License version 2. + * + * ----------------------------------------------------------------------- */ + +/* + * Standard video BIOS modes + * + * We have two options for this; silent and scanned. + */ + +#include "boot.h" +#include "video.h" + +static __videocard video_bios; + +/* Set a conventional BIOS mode */ +static int set_bios_mode(u8 mode); + +static int bios_set_mode(struct mode_info *mi) +{ + return set_bios_mode(mi->mode - VIDEO_FIRST_BIOS); +} + +static int set_bios_mode(u8 mode) +{ + struct biosregs ireg, oreg; + u8 new_mode; + + initregs(&ireg); + ireg.al = mode; /* AH=0x00 Set Video Mode */ + intcall(0x10, &ireg, NULL); + + ireg.ah = 0x0f; /* Get Current Video Mode */ + intcall(0x10, &ireg, &oreg); + + do_restore = 1; /* Assume video contents were lost */ + + /* Not all BIOSes are clean with the top bit */ + new_mode = oreg.al & 0x7f; + + if (new_mode == mode) + return 0; /* Mode change OK */ + +#ifndef _WAKEUP + if (new_mode != boot_params.screen_info.orig_video_mode) { + /* Mode setting failed, but we didn't end up where we + started. That's bad. Try to revert to the original + video mode. */ + ireg.ax = boot_params.screen_info.orig_video_mode; + intcall(0x10, &ireg, NULL); + } +#endif + return -1; +} + +static int bios_probe(void) +{ + u8 mode; +#ifdef _WAKEUP + u8 saved_mode = 0x03; +#else + u8 saved_mode = boot_params.screen_info.orig_video_mode; +#endif + u16 crtc; + struct mode_info *mi; + int nmodes = 0; + + if (adapter != ADAPTER_EGA && adapter != ADAPTER_VGA) + return 0; + + set_fs(0); + crtc = vga_crtc(); + + video_bios.modes = GET_HEAP(struct mode_info, 0); + + for (mode = 0x14; mode <= 0x7f; mode++) { + if (!heap_free(sizeof(struct mode_info))) + break; + + if (mode_defined(VIDEO_FIRST_BIOS+mode)) + continue; + + if (set_bios_mode(mode)) + continue; + + /* Try to verify that it's a text mode. */ + + /* Attribute Controller: make graphics controller disabled */ + if (in_idx(0x3c0, 0x10) & 0x01) + continue; + + /* Graphics Controller: verify Alpha addressing enabled */ + if (in_idx(0x3ce, 0x06) & 0x01) + continue; + + /* CRTC cursor location low should be zero(?) */ + if (in_idx(crtc, 0x0f)) + continue; + + mi = GET_HEAP(struct mode_info, 1); + mi->mode = VIDEO_FIRST_BIOS+mode; + mi->depth = 0; /* text */ + mi->x = rdfs16(0x44a); + mi->y = rdfs8(0x484)+1; + nmodes++; + } + + set_bios_mode(saved_mode); + + return nmodes; +} + +static __videocard video_bios = +{ + .card_name = "BIOS", + .probe = bios_probe, + .set_mode = bios_set_mode, + .unsafe = 1, + .xmode_first = VIDEO_FIRST_BIOS, + .xmode_n = 0x80, +}; diff --git a/arch/x86/boot/video-mode.c b/arch/x86/boot/video-mode.c new file mode 100644 index 00000000..748e8d06 --- /dev/null +++ b/arch/x86/boot/video-mode.c @@ -0,0 +1,173 @@ +/* -*- linux-c -*- ------------------------------------------------------- * + * + * Copyright (C) 1991, 1992 Linus Torvalds + * Copyright 2007-2008 rPath, Inc. - All Rights Reserved + * + * This file is part of the Linux kernel, and is made available under + * the terms of the GNU General Public License version 2. + * + * ----------------------------------------------------------------------- */ + +/* + * arch/i386/boot/video-mode.c + * + * Set the video mode. This is separated out into a different + * file in order to be shared with the ACPI wakeup code. + */ + +#include "boot.h" +#include "video.h" +#include "vesa.h" + +/* + * Common variables + */ +int adapter; /* 0=CGA/MDA/HGC, 1=EGA, 2=VGA+ */ +u16 video_segment; +int force_x, force_y; /* Don't query the BIOS for cols/rows */ + +int do_restore; /* Screen contents changed during mode flip */ +int graphic_mode; /* Graphic mode with linear frame buffer */ + +/* Probe the video drivers and have them generate their mode lists. */ +void probe_cards(int unsafe) +{ + struct card_info *card; + static u8 probed[2]; + + if (probed[unsafe]) + return; + + probed[unsafe] = 1; + + for (card = video_cards; card < video_cards_end; card++) { + if (card->unsafe == unsafe) { + if (card->probe) + card->nmodes = card->probe(); + else + card->nmodes = 0; + } + } +} + +/* Test if a mode is defined */ +int mode_defined(u16 mode) +{ + struct card_info *card; + struct mode_info *mi; + int i; + + for (card = video_cards; card < video_cards_end; card++) { + mi = card->modes; + for (i = 0; i < card->nmodes; i++, mi++) { + if (mi->mode == mode) + return 1; + } + } + + return 0; +} + +/* Set mode (without recalc) */ +static int raw_set_mode(u16 mode, u16 *real_mode) +{ + int nmode, i; + struct card_info *card; + struct mode_info *mi; + + /* Drop the recalc bit if set */ + mode &= ~VIDEO_RECALC; + + /* Scan for mode based on fixed ID, position, or resolution */ + nmode = 0; + for (card = video_cards; card < video_cards_end; card++) { + mi = card->modes; + for (i = 0; i < card->nmodes; i++, mi++) { + int visible = mi->x || mi->y; + + if ((mode == nmode && visible) || + mode == mi->mode || + mode == (mi->y << 8)+mi->x) { + *real_mode = mi->mode; + return card->set_mode(mi); + } + + if (visible) + nmode++; + } + } + + /* Nothing found? Is it an "exceptional" (unprobed) mode? */ + for (card = video_cards; card < video_cards_end; card++) { + if (mode >= card->xmode_first && + mode < card->xmode_first+card->xmode_n) { + struct mode_info mix; + *real_mode = mix.mode = mode; + mix.x = mix.y = 0; + return card->set_mode(&mix); + } + } + + /* Otherwise, failure... */ + return -1; +} + +/* + * Recalculate the vertical video cutoff (hack!) + */ +static void vga_recalc_vertical(void) +{ + unsigned int font_size, rows; + u16 crtc; + u8 pt, ov; + + set_fs(0); + font_size = rdfs8(0x485); /* BIOS: font size (pixels) */ + rows = force_y ? force_y : rdfs8(0x484)+1; /* Text rows */ + + rows *= font_size; /* Visible scan lines */ + rows--; /* ... minus one */ + + crtc = vga_crtc(); + + pt = in_idx(crtc, 0x11); + pt &= ~0x80; /* Unlock CR0-7 */ + out_idx(pt, crtc, 0x11); + + out_idx((u8)rows, crtc, 0x12); /* Lower height register */ + + ov = in_idx(crtc, 0x07); /* Overflow register */ + ov &= 0xbd; + ov |= (rows >> (8-1)) & 0x02; + ov |= (rows >> (9-6)) & 0x40; + out_idx(ov, crtc, 0x07); +} + +/* Set mode (with recalc if specified) */ +int set_mode(u16 mode) +{ + int rv; + u16 real_mode; + + /* Very special mode numbers... */ + if (mode == VIDEO_CURRENT_MODE) + return 0; /* Nothing to do... */ + else if (mode == NORMAL_VGA) + mode = VIDEO_80x25; + else if (mode == EXTENDED_VGA) + mode = VIDEO_8POINT; + + rv = raw_set_mode(mode, &real_mode); + if (rv) + return rv; + + if (mode & VIDEO_RECALC) + vga_recalc_vertical(); + + /* Save the canonical mode number for the kernel, not + an alias, size specification or menu position */ +#ifndef _WAKEUP + boot_params.hdr.vid_mode = real_mode; +#endif + return 0; +} diff --git a/arch/x86/boot/video-vesa.c b/arch/x86/boot/video-vesa.c new file mode 100644 index 00000000..11e8c6eb --- /dev/null +++ b/arch/x86/boot/video-vesa.c @@ -0,0 +1,280 @@ +/* -*- linux-c -*- ------------------------------------------------------- * + * + * Copyright (C) 1991, 1992 Linus Torvalds + * Copyright 2007 rPath, Inc. - All Rights Reserved + * Copyright 2009 Intel Corporation; author H. Peter Anvin + * + * This file is part of the Linux kernel, and is made available under + * the terms of the GNU General Public License version 2. + * + * ----------------------------------------------------------------------- */ + +/* + * VESA text modes + */ + +#include "boot.h" +#include "video.h" +#include "vesa.h" + +/* VESA information */ +static struct vesa_general_info vginfo; +static struct vesa_mode_info vminfo; + +static __videocard video_vesa; + +#ifndef _WAKEUP +static void vesa_store_mode_params_graphics(void); +#else /* _WAKEUP */ +static inline void vesa_store_mode_params_graphics(void) {} +#endif /* _WAKEUP */ + +static int vesa_probe(void) +{ + struct biosregs ireg, oreg; + u16 mode; + addr_t mode_ptr; + struct mode_info *mi; + int nmodes = 0; + + video_vesa.modes = GET_HEAP(struct mode_info, 0); + + initregs(&ireg); + ireg.ax = 0x4f00; + ireg.di = (size_t)&vginfo; + intcall(0x10, &ireg, &oreg); + + if (oreg.ax != 0x004f || + vginfo.signature != VESA_MAGIC || + vginfo.version < 0x0102) + return 0; /* Not present */ + + set_fs(vginfo.video_mode_ptr.seg); + mode_ptr = vginfo.video_mode_ptr.off; + + while ((mode = rdfs16(mode_ptr)) != 0xffff) { + mode_ptr += 2; + + if (!heap_free(sizeof(struct mode_info))) + break; /* Heap full, can't save mode info */ + + if (mode & ~0x1ff) + continue; + + memset(&vminfo, 0, sizeof vminfo); /* Just in case... */ + + ireg.ax = 0x4f01; + ireg.cx = mode; + ireg.di = (size_t)&vminfo; + intcall(0x10, &ireg, &oreg); + + if (oreg.ax != 0x004f) + continue; + + if ((vminfo.mode_attr & 0x15) == 0x05) { + /* Text Mode, TTY BIOS supported, + supported by hardware */ + mi = GET_HEAP(struct mode_info, 1); + mi->mode = mode + VIDEO_FIRST_VESA; + mi->depth = 0; /* text */ + mi->x = vminfo.h_res; + mi->y = vminfo.v_res; + nmodes++; + } else if ((vminfo.mode_attr & 0x99) == 0x99 && + (vminfo.memory_layout == 4 || + vminfo.memory_layout == 6) && + vminfo.memory_planes == 1) { +#ifdef CONFIG_FB_BOOT_VESA_SUPPORT + /* Graphics mode, color, linear frame buffer + supported. Only register the mode if + if framebuffer is configured, however, + otherwise the user will be left without a screen. */ + mi = GET_HEAP(struct mode_info, 1); + mi->mode = mode + VIDEO_FIRST_VESA; + mi->depth = vminfo.bpp; + mi->x = vminfo.h_res; + mi->y = vminfo.v_res; + nmodes++; +#endif + } + } + + return nmodes; +} + +static int vesa_set_mode(struct mode_info *mode) +{ + struct biosregs ireg, oreg; + int is_graphic; + u16 vesa_mode = mode->mode - VIDEO_FIRST_VESA; + + memset(&vminfo, 0, sizeof vminfo); /* Just in case... */ + + initregs(&ireg); + ireg.ax = 0x4f01; + ireg.cx = vesa_mode; + ireg.di = (size_t)&vminfo; + intcall(0x10, &ireg, &oreg); + + if (oreg.ax != 0x004f) + return -1; + + if ((vminfo.mode_attr & 0x15) == 0x05) { + /* It's a supported text mode */ + is_graphic = 0; +#ifdef CONFIG_FB_BOOT_VESA_SUPPORT + } else if ((vminfo.mode_attr & 0x99) == 0x99) { + /* It's a graphics mode with linear frame buffer */ + is_graphic = 1; + vesa_mode |= 0x4000; /* Request linear frame buffer */ +#endif + } else { + return -1; /* Invalid mode */ + } + + + initregs(&ireg); + ireg.ax = 0x4f02; + ireg.bx = vesa_mode; + intcall(0x10, &ireg, &oreg); + + if (oreg.ax != 0x004f) + return -1; + + graphic_mode = is_graphic; + if (!is_graphic) { + /* Text mode */ + force_x = mode->x; + force_y = mode->y; + do_restore = 1; + } else { + /* Graphics mode */ + vesa_store_mode_params_graphics(); + } + + return 0; +} + + +#ifndef _WAKEUP + +/* Switch DAC to 8-bit mode */ +static void vesa_dac_set_8bits(void) +{ + struct biosregs ireg, oreg; + u8 dac_size = 6; + + /* If possible, switch the DAC to 8-bit mode */ + if (vginfo.capabilities & 1) { + initregs(&ireg); + ireg.ax = 0x4f08; + ireg.bh = 0x08; + intcall(0x10, &ireg, &oreg); + if (oreg.ax == 0x004f) + dac_size = oreg.bh; + } + + /* Set the color sizes to the DAC size, and offsets to 0 */ + boot_params.screen_info.red_size = dac_size; + boot_params.screen_info.green_size = dac_size; + boot_params.screen_info.blue_size = dac_size; + boot_params.screen_info.rsvd_size = dac_size; + + boot_params.screen_info.red_pos = 0; + boot_params.screen_info.green_pos = 0; + boot_params.screen_info.blue_pos = 0; + boot_params.screen_info.rsvd_pos = 0; +} + +/* Save the VESA protected mode info */ +static void vesa_store_pm_info(void) +{ + struct biosregs ireg, oreg; + + initregs(&ireg); + ireg.ax = 0x4f0a; + intcall(0x10, &ireg, &oreg); + + if (oreg.ax != 0x004f) + return; + + boot_params.screen_info.vesapm_seg = oreg.es; + boot_params.screen_info.vesapm_off = oreg.di; +} + +/* + * Save video mode parameters for graphics mode + */ +static void vesa_store_mode_params_graphics(void) +{ + /* Tell the kernel we're in VESA graphics mode */ + boot_params.screen_info.orig_video_isVGA = VIDEO_TYPE_VLFB; + + /* Mode parameters */ + boot_params.screen_info.vesa_attributes = vminfo.mode_attr; + boot_params.screen_info.lfb_linelength = vminfo.logical_scan; + boot_params.screen_info.lfb_width = vminfo.h_res; + boot_params.screen_info.lfb_height = vminfo.v_res; + boot_params.screen_info.lfb_depth = vminfo.bpp; + boot_params.screen_info.pages = vminfo.image_planes; + boot_params.screen_info.lfb_base = vminfo.lfb_ptr; + memcpy(&boot_params.screen_info.red_size, + &vminfo.rmask, 8); + + /* General parameters */ + boot_params.screen_info.lfb_size = vginfo.total_memory; + + if (vminfo.bpp <= 8) + vesa_dac_set_8bits(); + + vesa_store_pm_info(); +} + +/* + * Save EDID information for the kernel; this is invoked, separately, + * after mode-setting. + */ +void vesa_store_edid(void) +{ +#ifdef CONFIG_FIRMWARE_EDID + struct biosregs ireg, oreg; + + /* Apparently used as a nonsense token... */ + memset(&boot_params.edid_info, 0x13, sizeof boot_params.edid_info); + + if (vginfo.version < 0x0200) + return; /* EDID requires VBE 2.0+ */ + + initregs(&ireg); + ireg.ax = 0x4f15; /* VBE DDC */ + /* ireg.bx = 0x0000; */ /* Report DDC capabilities */ + /* ireg.cx = 0; */ /* Controller 0 */ + ireg.es = 0; /* ES:DI must be 0 by spec */ + intcall(0x10, &ireg, &oreg); + + if (oreg.ax != 0x004f) + return; /* No EDID */ + + /* BH = time in seconds to transfer EDD information */ + /* BL = DDC level supported */ + + ireg.ax = 0x4f15; /* VBE DDC */ + ireg.bx = 0x0001; /* Read EDID */ + /* ireg.cx = 0; */ /* Controller 0 */ + /* ireg.dx = 0; */ /* EDID block number */ + ireg.es = ds(); + ireg.di =(size_t)&boot_params.edid_info; /* (ES:)Pointer to block */ + intcall(0x10, &ireg, &oreg); +#endif /* CONFIG_FIRMWARE_EDID */ +} + +#endif /* not _WAKEUP */ + +static __videocard video_vesa = +{ + .card_name = "VESA", + .probe = vesa_probe, + .set_mode = vesa_set_mode, + .xmode_first = VIDEO_FIRST_VESA, + .xmode_n = 0x200, +}; diff --git a/arch/x86/boot/video-vga.c b/arch/x86/boot/video-vga.c new file mode 100644 index 00000000..45bc9402 --- /dev/null +++ b/arch/x86/boot/video-vga.c @@ -0,0 +1,288 @@ +/* -*- linux-c -*- ------------------------------------------------------- * + * + * Copyright (C) 1991, 1992 Linus Torvalds + * Copyright 2007 rPath, Inc. - All Rights Reserved + * Copyright 2009 Intel Corporation; author H. Peter Anvin + * + * This file is part of the Linux kernel, and is made available under + * the terms of the GNU General Public License version 2. + * + * ----------------------------------------------------------------------- */ + +/* + * Common all-VGA modes + */ + +#include "boot.h" +#include "video.h" + +static struct mode_info vga_modes[] = { + { VIDEO_80x25, 80, 25, 0 }, + { VIDEO_8POINT, 80, 50, 0 }, + { VIDEO_80x43, 80, 43, 0 }, + { VIDEO_80x28, 80, 28, 0 }, + { VIDEO_80x30, 80, 30, 0 }, + { VIDEO_80x34, 80, 34, 0 }, + { VIDEO_80x60, 80, 60, 0 }, +}; + +static struct mode_info ega_modes[] = { + { VIDEO_80x25, 80, 25, 0 }, + { VIDEO_8POINT, 80, 43, 0 }, +}; + +static struct mode_info cga_modes[] = { + { VIDEO_80x25, 80, 25, 0 }, +}; + +static __videocard video_vga; + +/* Set basic 80x25 mode */ +static u8 vga_set_basic_mode(void) +{ + struct biosregs ireg, oreg; + u8 mode; + + initregs(&ireg); + + /* Query current mode */ + ireg.ax = 0x0f00; + intcall(0x10, &ireg, &oreg); + mode = oreg.al; + + if (mode != 3 && mode != 7) + mode = 3; + + /* Set the mode */ + ireg.ax = mode; /* AH=0: set mode */ + intcall(0x10, &ireg, NULL); + do_restore = 1; + return mode; +} + +static void vga_set_8font(void) +{ + /* Set 8x8 font - 80x43 on EGA, 80x50 on VGA */ + struct biosregs ireg; + + initregs(&ireg); + + /* Set 8x8 font */ + ireg.ax = 0x1112; + /* ireg.bl = 0; */ + intcall(0x10, &ireg, NULL); + + /* Use alternate print screen */ + ireg.ax = 0x1200; + ireg.bl = 0x20; + intcall(0x10, &ireg, NULL); + + /* Turn off cursor emulation */ + ireg.ax = 0x1201; + ireg.bl = 0x34; + intcall(0x10, &ireg, NULL); + + /* Cursor is scan lines 6-7 */ + ireg.ax = 0x0100; + ireg.cx = 0x0607; + intcall(0x10, &ireg, NULL); +} + +static void vga_set_14font(void) +{ + /* Set 9x14 font - 80x28 on VGA */ + struct biosregs ireg; + + initregs(&ireg); + + /* Set 9x14 font */ + ireg.ax = 0x1111; + /* ireg.bl = 0; */ + intcall(0x10, &ireg, NULL); + + /* Turn off cursor emulation */ + ireg.ax = 0x1201; + ireg.bl = 0x34; + intcall(0x10, &ireg, NULL); + + /* Cursor is scan lines 11-12 */ + ireg.ax = 0x0100; + ireg.cx = 0x0b0c; + intcall(0x10, &ireg, NULL); +} + +static void vga_set_80x43(void) +{ + /* Set 80x43 mode on VGA (not EGA) */ + struct biosregs ireg; + + initregs(&ireg); + + /* Set 350 scans */ + ireg.ax = 0x1201; + ireg.bl = 0x30; + intcall(0x10, &ireg, NULL); + + /* Reset video mode */ + ireg.ax = 0x0003; + intcall(0x10, &ireg, NULL); + + vga_set_8font(); +} + +/* I/O address of the VGA CRTC */ +u16 vga_crtc(void) +{ + return (inb(0x3cc) & 1) ? 0x3d4 : 0x3b4; +} + +static void vga_set_480_scanlines(void) +{ + u16 crtc; /* CRTC base address */ + u8 csel; /* CRTC miscellaneous output register */ + + crtc = vga_crtc(); + + out_idx(0x0c, crtc, 0x11); /* Vertical sync end, unlock CR0-7 */ + out_idx(0x0b, crtc, 0x06); /* Vertical total */ + out_idx(0x3e, crtc, 0x07); /* Vertical overflow */ + out_idx(0xea, crtc, 0x10); /* Vertical sync start */ + out_idx(0xdf, crtc, 0x12); /* Vertical display end */ + out_idx(0xe7, crtc, 0x15); /* Vertical blank start */ + out_idx(0x04, crtc, 0x16); /* Vertical blank end */ + csel = inb(0x3cc); + csel &= 0x0d; + csel |= 0xe2; + outb(csel, 0x3c2); +} + +static void vga_set_vertical_end(int lines) +{ + u16 crtc; /* CRTC base address */ + u8 ovfw; /* CRTC overflow register */ + int end = lines-1; + + crtc = vga_crtc(); + + ovfw = 0x3c | ((end >> (8-1)) & 0x02) | ((end >> (9-6)) & 0x40); + + out_idx(ovfw, crtc, 0x07); /* Vertical overflow */ + out_idx(end, crtc, 0x12); /* Vertical display end */ +} + +static void vga_set_80x30(void) +{ + vga_set_480_scanlines(); + vga_set_vertical_end(30*16); +} + +static void vga_set_80x34(void) +{ + vga_set_480_scanlines(); + vga_set_14font(); + vga_set_vertical_end(34*14); +} + +static void vga_set_80x60(void) +{ + vga_set_480_scanlines(); + vga_set_8font(); + vga_set_vertical_end(60*8); +} + +static int vga_set_mode(struct mode_info *mode) +{ + /* Set the basic mode */ + vga_set_basic_mode(); + + /* Override a possibly broken BIOS */ + force_x = mode->x; + force_y = mode->y; + + switch (mode->mode) { + case VIDEO_80x25: + break; + case VIDEO_8POINT: + vga_set_8font(); + break; + case VIDEO_80x43: + vga_set_80x43(); + break; + case VIDEO_80x28: + vga_set_14font(); + break; + case VIDEO_80x30: + vga_set_80x30(); + break; + case VIDEO_80x34: + vga_set_80x34(); + break; + case VIDEO_80x60: + vga_set_80x60(); + break; + } + + return 0; +} + +/* + * Note: this probe includes basic information required by all + * systems. It should be executed first, by making sure + * video-vga.c is listed first in the Makefile. + */ +static int vga_probe(void) +{ + static const char *card_name[] = { + "CGA/MDA/HGC", "EGA", "VGA" + }; + static struct mode_info *mode_lists[] = { + cga_modes, + ega_modes, + vga_modes, + }; + static int mode_count[] = { + sizeof(cga_modes)/sizeof(struct mode_info), + sizeof(ega_modes)/sizeof(struct mode_info), + sizeof(vga_modes)/sizeof(struct mode_info), + }; + + struct biosregs ireg, oreg; + + initregs(&ireg); + + ireg.ax = 0x1200; + ireg.bl = 0x10; /* Check EGA/VGA */ + intcall(0x10, &ireg, &oreg); + +#ifndef _WAKEUP + boot_params.screen_info.orig_video_ega_bx = oreg.bx; +#endif + + /* If we have MDA/CGA/HGC then BL will be unchanged at 0x10 */ + if (oreg.bl != 0x10) { + /* EGA/VGA */ + ireg.ax = 0x1a00; + intcall(0x10, &ireg, &oreg); + + if (oreg.al == 0x1a) { + adapter = ADAPTER_VGA; +#ifndef _WAKEUP + boot_params.screen_info.orig_video_isVGA = 1; +#endif + } else { + adapter = ADAPTER_EGA; + } + } else { + adapter = ADAPTER_CGA; + } + + video_vga.modes = mode_lists[adapter]; + video_vga.card_name = card_name[adapter]; + return mode_count[adapter]; +} + +static __videocard video_vga = { + .card_name = "VGA", + .probe = vga_probe, + .set_mode = vga_set_mode, +}; diff --git a/arch/x86/boot/video.c b/arch/x86/boot/video.c new file mode 100644 index 00000000..43eda284 --- /dev/null +++ b/arch/x86/boot/video.c @@ -0,0 +1,341 @@ +/* -*- linux-c -*- ------------------------------------------------------- * + * + * Copyright (C) 1991, 1992 Linus Torvalds + * Copyright 2007 rPath, Inc. - All Rights Reserved + * Copyright 2009 Intel Corporation; author H. Peter Anvin + * + * This file is part of the Linux kernel, and is made available under + * the terms of the GNU General Public License version 2. + * + * ----------------------------------------------------------------------- */ + +/* + * Select video mode + */ + +#include "boot.h" +#include "video.h" +#include "vesa.h" + +static void store_cursor_position(void) +{ + struct biosregs ireg, oreg; + + initregs(&ireg); + ireg.ah = 0x03; + intcall(0x10, &ireg, &oreg); + + boot_params.screen_info.orig_x = oreg.dl; + boot_params.screen_info.orig_y = oreg.dh; + + if (oreg.ch & 0x20) + boot_params.screen_info.flags |= VIDEO_FLAGS_NOCURSOR; + + if ((oreg.ch & 0x1f) > (oreg.cl & 0x1f)) + boot_params.screen_info.flags |= VIDEO_FLAGS_NOCURSOR; +} + +static void store_video_mode(void) +{ + struct biosregs ireg, oreg; + + /* N.B.: the saving of the video page here is a bit silly, + since we pretty much assume page 0 everywhere. */ + initregs(&ireg); + ireg.ah = 0x0f; + intcall(0x10, &ireg, &oreg); + + /* Not all BIOSes are clean with respect to the top bit */ + boot_params.screen_info.orig_video_mode = oreg.al & 0x7f; + boot_params.screen_info.orig_video_page = oreg.bh; +} + +/* + * Store the video mode parameters for later usage by the kernel. + * This is done by asking the BIOS except for the rows/columns + * parameters in the default 80x25 mode -- these are set directly, + * because some very obscure BIOSes supply insane values. + */ +static void store_mode_params(void) +{ + u16 font_size; + int x, y; + + /* For graphics mode, it is up to the mode-setting driver + (currently only video-vesa.c) to store the parameters */ + if (graphic_mode) + return; + + store_cursor_position(); + store_video_mode(); + + if (boot_params.screen_info.orig_video_mode == 0x07) { + /* MDA, HGC, or VGA in monochrome mode */ + video_segment = 0xb000; + } else { + /* CGA, EGA, VGA and so forth */ + video_segment = 0xb800; + } + + set_fs(0); + font_size = rdfs16(0x485); /* Font size, BIOS area */ + boot_params.screen_info.orig_video_points = font_size; + + x = rdfs16(0x44a); + y = (adapter == ADAPTER_CGA) ? 25 : rdfs8(0x484)+1; + + if (force_x) + x = force_x; + if (force_y) + y = force_y; + + boot_params.screen_info.orig_video_cols = x; + boot_params.screen_info.orig_video_lines = y; +} + +static unsigned int get_entry(void) +{ + char entry_buf[4]; + int i, len = 0; + int key; + unsigned int v; + + do { + key = getchar(); + + if (key == '\b') { + if (len > 0) { + puts("\b \b"); + len--; + } + } else if ((key >= '0' && key <= '9') || + (key >= 'A' && key <= 'Z') || + (key >= 'a' && key <= 'z')) { + if (len < sizeof entry_buf) { + entry_buf[len++] = key; + putchar(key); + } + } + } while (key != '\r'); + putchar('\n'); + + if (len == 0) + return VIDEO_CURRENT_MODE; /* Default */ + + v = 0; + for (i = 0; i < len; i++) { + v <<= 4; + key = entry_buf[i] | 0x20; + v += (key > '9') ? key-'a'+10 : key-'0'; + } + + return v; +} + +static void display_menu(void) +{ + struct card_info *card; + struct mode_info *mi; + char ch; + int i; + int nmodes; + int modes_per_line; + int col; + + nmodes = 0; + for (card = video_cards; card < video_cards_end; card++) + nmodes += card->nmodes; + + modes_per_line = 1; + if (nmodes >= 20) + modes_per_line = 3; + + for (col = 0; col < modes_per_line; col++) + puts("Mode: Resolution: Type: "); + putchar('\n'); + + col = 0; + ch = '0'; + for (card = video_cards; card < video_cards_end; card++) { + mi = card->modes; + for (i = 0; i < card->nmodes; i++, mi++) { + char resbuf[32]; + int visible = mi->x && mi->y; + u16 mode_id = mi->mode ? mi->mode : + (mi->y << 8)+mi->x; + + if (!visible) + continue; /* Hidden mode */ + + if (mi->depth) + sprintf(resbuf, "%dx%d", mi->y, mi->depth); + else + sprintf(resbuf, "%d", mi->y); + + printf("%c %03X %4dx%-7s %-6s", + ch, mode_id, mi->x, resbuf, card->card_name); + col++; + if (col >= modes_per_line) { + putchar('\n'); + col = 0; + } + + if (ch == '9') + ch = 'a'; + else if (ch == 'z' || ch == ' ') + ch = ' '; /* Out of keys... */ + else + ch++; + } + } + if (col) + putchar('\n'); +} + +#define H(x) ((x)-'a'+10) +#define SCAN ((H('s')<<12)+(H('c')<<8)+(H('a')<<4)+H('n')) + +static unsigned int mode_menu(void) +{ + int key; + unsigned int sel; + + puts("Press to see video modes available, " + " to continue, or wait 30 sec\n"); + + kbd_flush(); + while (1) { + key = getchar_timeout(); + if (key == ' ' || key == 0) + return VIDEO_CURRENT_MODE; /* Default */ + if (key == '\r') + break; + putchar('\a'); /* Beep! */ + } + + + for (;;) { + display_menu(); + + puts("Enter a video mode or \"scan\" to scan for " + "additional modes: "); + sel = get_entry(); + if (sel != SCAN) + return sel; + + probe_cards(1); + } +} + +/* Save screen content to the heap */ +static struct saved_screen { + int x, y; + int curx, cury; + u16 *data; +} saved; + +static void save_screen(void) +{ + /* Should be called after store_mode_params() */ + saved.x = boot_params.screen_info.orig_video_cols; + saved.y = boot_params.screen_info.orig_video_lines; + saved.curx = boot_params.screen_info.orig_x; + saved.cury = boot_params.screen_info.orig_y; + + if (!heap_free(saved.x*saved.y*sizeof(u16)+512)) + return; /* Not enough heap to save the screen */ + + saved.data = GET_HEAP(u16, saved.x*saved.y); + + set_fs(video_segment); + copy_from_fs(saved.data, 0, saved.x*saved.y*sizeof(u16)); +} + +static void restore_screen(void) +{ + /* Should be called after store_mode_params() */ + int xs = boot_params.screen_info.orig_video_cols; + int ys = boot_params.screen_info.orig_video_lines; + int y; + addr_t dst = 0; + u16 *src = saved.data; + struct biosregs ireg; + + if (graphic_mode) + return; /* Can't restore onto a graphic mode */ + + if (!src) + return; /* No saved screen contents */ + + /* Restore screen contents */ + + set_fs(video_segment); + for (y = 0; y < ys; y++) { + int npad; + + if (y < saved.y) { + int copy = (xs < saved.x) ? xs : saved.x; + copy_to_fs(dst, src, copy*sizeof(u16)); + dst += copy*sizeof(u16); + src += saved.x; + npad = (xs < saved.x) ? 0 : xs-saved.x; + } else { + npad = xs; + } + + /* Writes "npad" blank characters to + video_segment:dst and advances dst */ + asm volatile("pushw %%es ; " + "movw %2,%%es ; " + "shrw %%cx ; " + "jnc 1f ; " + "stosw \n\t" + "1: rep;stosl ; " + "popw %%es" + : "+D" (dst), "+c" (npad) + : "bdS" (video_segment), + "a" (0x07200720)); + } + + /* Restore cursor position */ + if (saved.curx >= xs) + saved.curx = xs-1; + if (saved.cury >= ys) + saved.cury = ys-1; + + initregs(&ireg); + ireg.ah = 0x02; /* Set cursor position */ + ireg.dh = saved.cury; + ireg.dl = saved.curx; + intcall(0x10, &ireg, NULL); + + store_cursor_position(); +} + +void set_video(void) +{ + u16 mode = boot_params.hdr.vid_mode; + + RESET_HEAP(); + + store_mode_params(); + save_screen(); + probe_cards(0); + + for (;;) { + if (mode == ASK_VGA) + mode = mode_menu(); + + if (!set_mode(mode)) + break; + + printf("Undefined video mode number: %x\n", mode); + mode = ASK_VGA; + } + boot_params.hdr.vid_mode = mode; + vesa_store_edid(); + store_mode_params(); + + if (do_restore) + restore_screen(); +} diff --git a/arch/x86/boot/video.h b/arch/x86/boot/video.h new file mode 100644 index 00000000..ff339c5d --- /dev/null +++ b/arch/x86/boot/video.h @@ -0,0 +1,121 @@ +/* -*- linux-c -*- ------------------------------------------------------- * + * + * Copyright (C) 1991, 1992 Linus Torvalds + * Copyright 2007 rPath, Inc. - All Rights Reserved + * + * This file is part of the Linux kernel, and is made available under + * the terms of the GNU General Public License version 2. + * + * ----------------------------------------------------------------------- */ + +/* + * Header file for the real-mode video probing code + */ + +#ifndef BOOT_VIDEO_H +#define BOOT_VIDEO_H + +#include + +/* + * This code uses an extended set of video mode numbers. These include: + * Aliases for standard modes + * NORMAL_VGA (-1) + * EXTENDED_VGA (-2) + * ASK_VGA (-3) + * Video modes numbered by menu position -- NOT RECOMMENDED because of lack + * of compatibility when extending the table. These are between 0x00 and 0xff. + */ +#define VIDEO_FIRST_MENU 0x0000 + +/* Standard BIOS video modes (BIOS number + 0x0100) */ +#define VIDEO_FIRST_BIOS 0x0100 + +/* VESA BIOS video modes (VESA number + 0x0200) */ +#define VIDEO_FIRST_VESA 0x0200 + +/* Video7 special modes (BIOS number + 0x0900) */ +#define VIDEO_FIRST_V7 0x0900 + +/* Special video modes */ +#define VIDEO_FIRST_SPECIAL 0x0f00 +#define VIDEO_80x25 0x0f00 +#define VIDEO_8POINT 0x0f01 +#define VIDEO_80x43 0x0f02 +#define VIDEO_80x28 0x0f03 +#define VIDEO_CURRENT_MODE 0x0f04 +#define VIDEO_80x30 0x0f05 +#define VIDEO_80x34 0x0f06 +#define VIDEO_80x60 0x0f07 +#define VIDEO_GFX_HACK 0x0f08 +#define VIDEO_LAST_SPECIAL 0x0f09 + +/* Video modes given by resolution */ +#define VIDEO_FIRST_RESOLUTION 0x1000 + +/* The "recalculate timings" flag */ +#define VIDEO_RECALC 0x8000 + +void store_screen(void); +#define DO_STORE() store_screen() + +/* + * Mode table structures + */ + +struct mode_info { + u16 mode; /* Mode number (vga= style) */ + u16 x, y; /* Width, height */ + u16 depth; /* Bits per pixel, 0 for text mode */ +}; + +struct card_info { + const char *card_name; + int (*set_mode)(struct mode_info *mode); + int (*probe)(void); + struct mode_info *modes; + int nmodes; /* Number of probed modes so far */ + int unsafe; /* Probing is unsafe, only do after "scan" */ + u16 xmode_first; /* Unprobed modes to try to call anyway */ + u16 xmode_n; /* Size of unprobed mode range */ +}; + +#define __videocard struct card_info __attribute__((section(".videocards"))) +extern struct card_info video_cards[], video_cards_end[]; + +int mode_defined(u16 mode); /* video.c */ + +/* Basic video information */ +#define ADAPTER_CGA 0 /* CGA/MDA/HGC */ +#define ADAPTER_EGA 1 +#define ADAPTER_VGA 2 + +extern int adapter; +extern u16 video_segment; +extern int force_x, force_y; /* Don't query the BIOS for cols/rows */ +extern int do_restore; /* Restore screen contents */ +extern int graphic_mode; /* Graphics mode with linear frame buffer */ + +/* Accessing VGA indexed registers */ +static inline u8 in_idx(u16 port, u8 index) +{ + outb(index, port); + return inb(port+1); +} + +static inline void out_idx(u8 v, u16 port, u8 index) +{ + outw(index+(v << 8), port); +} + +/* Writes a value to an indexed port and then reads the port again */ +static inline u8 tst_idx(u8 v, u16 port, u8 index) +{ + out_idx(port, index, v); + return in_idx(port, index); +} + +/* Get the I/O port of the VGA CRTC */ +u16 vga_crtc(void); /* video-vga.c */ + +#endif /* BOOT_VIDEO_H */ diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig new file mode 100644 index 00000000..119db67d --- /dev/null +++ b/arch/x86/configs/i386_defconfig @@ -0,0 +1,319 @@ +CONFIG_EXPERIMENTAL=y +# CONFIG_LOCALVERSION_AUTO is not set +CONFIG_SYSVIPC=y +CONFIG_POSIX_MQUEUE=y +CONFIG_BSD_PROCESS_ACCT=y +CONFIG_TASKSTATS=y +CONFIG_TASK_DELAY_ACCT=y +CONFIG_TASK_XACCT=y +CONFIG_TASK_IO_ACCOUNTING=y +CONFIG_AUDIT=y +CONFIG_LOG_BUF_SHIFT=18 +CONFIG_CGROUPS=y +CONFIG_CGROUP_FREEZER=y +CONFIG_CPUSETS=y +CONFIG_CGROUP_CPUACCT=y +CONFIG_RESOURCE_COUNTERS=y +CONFIG_CGROUP_SCHED=y +CONFIG_BLK_DEV_INITRD=y +# CONFIG_COMPAT_BRK is not set +CONFIG_PROFILING=y +CONFIG_KPROBES=y +CONFIG_MODULES=y +CONFIG_MODULE_UNLOAD=y +CONFIG_MODULE_FORCE_UNLOAD=y +CONFIG_PARTITION_ADVANCED=y +CONFIG_OSF_PARTITION=y +CONFIG_AMIGA_PARTITION=y +CONFIG_MAC_PARTITION=y +CONFIG_BSD_DISKLABEL=y +CONFIG_MINIX_SUBPARTITION=y +CONFIG_SOLARIS_X86_PARTITION=y +CONFIG_UNIXWARE_DISKLABEL=y +CONFIG_SGI_PARTITION=y +CONFIG_SUN_PARTITION=y +CONFIG_KARMA_PARTITION=y +CONFIG_EFI_PARTITION=y +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y +CONFIG_SMP=y +CONFIG_X86_GENERIC=y +CONFIG_HPET_TIMER=y +CONFIG_SCHED_SMT=y +CONFIG_PREEMPT_VOLUNTARY=y +CONFIG_X86_REROUTE_FOR_BROKEN_BOOT_IRQS=y +CONFIG_X86_MCE=y +CONFIG_X86_REBOOTFIXUPS=y +CONFIG_MICROCODE=y +CONFIG_MICROCODE_AMD=y +CONFIG_X86_MSR=y +CONFIG_X86_CPUID=y +CONFIG_HIGHPTE=y +CONFIG_X86_CHECK_BIOS_CORRUPTION=y +# CONFIG_MTRR_SANITIZER is not set +CONFIG_EFI=y +CONFIG_HZ_1000=y +CONFIG_KEXEC=y +CONFIG_CRASH_DUMP=y +# CONFIG_COMPAT_VDSO is not set +CONFIG_HIBERNATION=y +CONFIG_PM_DEBUG=y +CONFIG_PM_TRACE_RTC=y +CONFIG_ACPI_PROCFS=y +CONFIG_ACPI_DOCK=y +CONFIG_CPU_FREQ=y +# CONFIG_CPU_FREQ_STAT is not set +CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE=y +CONFIG_CPU_FREQ_GOV_PERFORMANCE=y +CONFIG_CPU_FREQ_GOV_ONDEMAND=y +CONFIG_X86_ACPI_CPUFREQ=y +CONFIG_PCIEPORTBUS=y +CONFIG_PCI_MSI=y +CONFIG_PCCARD=y +CONFIG_YENTA=y +CONFIG_HOTPLUG_PCI=y +CONFIG_BINFMT_MISC=y +CONFIG_NET=y +CONFIG_PACKET=y +CONFIG_UNIX=y +CONFIG_XFRM_USER=y +CONFIG_INET=y +CONFIG_IP_MULTICAST=y +CONFIG_IP_ADVANCED_ROUTER=y +CONFIG_IP_MULTIPLE_TABLES=y +CONFIG_IP_ROUTE_MULTIPATH=y +CONFIG_IP_ROUTE_VERBOSE=y +CONFIG_IP_PNP=y +CONFIG_IP_PNP_DHCP=y +CONFIG_IP_PNP_BOOTP=y +CONFIG_IP_PNP_RARP=y +CONFIG_IP_MROUTE=y +CONFIG_IP_PIMSM_V1=y +CONFIG_IP_PIMSM_V2=y +CONFIG_SYN_COOKIES=y +# CONFIG_INET_XFRM_MODE_TRANSPORT is not set +# CONFIG_INET_XFRM_MODE_TUNNEL is not set +# CONFIG_INET_XFRM_MODE_BEET is not set +# CONFIG_INET_DIAG is not set +CONFIG_TCP_CONG_ADVANCED=y +# CONFIG_TCP_CONG_BIC is not set +# CONFIG_TCP_CONG_WESTWOOD is not set +# CONFIG_TCP_CONG_HTCP is not set +CONFIG_TCP_MD5SIG=y +CONFIG_IPV6=y +CONFIG_INET6_AH=y +CONFIG_INET6_ESP=y +CONFIG_NETLABEL=y +CONFIG_NETFILTER=y +# CONFIG_NETFILTER_ADVANCED is not set +CONFIG_NF_CONNTRACK=y +CONFIG_NF_CONNTRACK_FTP=y +CONFIG_NF_CONNTRACK_IRC=y +CONFIG_NF_CONNTRACK_SIP=y +CONFIG_NF_CT_NETLINK=y +CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=y +CONFIG_NETFILTER_XT_TARGET_NFLOG=y +CONFIG_NETFILTER_XT_TARGET_SECMARK=y +CONFIG_NETFILTER_XT_TARGET_TCPMSS=y +CONFIG_NETFILTER_XT_MATCH_CONNTRACK=y +CONFIG_NETFILTER_XT_MATCH_POLICY=y +CONFIG_NETFILTER_XT_MATCH_STATE=y +CONFIG_NF_CONNTRACK_IPV4=y +CONFIG_IP_NF_IPTABLES=y +CONFIG_IP_NF_FILTER=y +CONFIG_IP_NF_TARGET_REJECT=y +CONFIG_IP_NF_TARGET_ULOG=y +CONFIG_NF_NAT=y +CONFIG_IP_NF_TARGET_MASQUERADE=y +CONFIG_IP_NF_MANGLE=y +CONFIG_NF_CONNTRACK_IPV6=y +CONFIG_IP6_NF_IPTABLES=y +CONFIG_IP6_NF_MATCH_IPV6HEADER=y +CONFIG_IP6_NF_FILTER=y +CONFIG_IP6_NF_TARGET_REJECT=y +CONFIG_IP6_NF_MANGLE=y +CONFIG_NET_SCHED=y +CONFIG_NET_EMATCH=y +CONFIG_NET_CLS_ACT=y +CONFIG_HAMRADIO=y +CONFIG_CFG80211=y +CONFIG_MAC80211=y +CONFIG_MAC80211_LEDS=y +CONFIG_RFKILL=y +CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" +CONFIG_DEBUG_DEVRES=y +CONFIG_CONNECTOR=y +CONFIG_BLK_DEV_LOOP=y +CONFIG_BLK_DEV_RAM=y +CONFIG_BLK_DEV_RAM_SIZE=16384 +CONFIG_BLK_DEV_SD=y +CONFIG_BLK_DEV_SR=y +CONFIG_BLK_DEV_SR_VENDOR=y +CONFIG_CHR_DEV_SG=y +CONFIG_SCSI_CONSTANTS=y +CONFIG_SCSI_SPI_ATTRS=y +# CONFIG_SCSI_LOWLEVEL is not set +CONFIG_ATA=y +CONFIG_SATA_AHCI=y +CONFIG_ATA_PIIX=y +CONFIG_PATA_AMD=y +CONFIG_PATA_OLDPIIX=y +CONFIG_PATA_SCH=y +CONFIG_PATA_MPIIX=y +CONFIG_ATA_GENERIC=y +CONFIG_MD=y +CONFIG_BLK_DEV_MD=y +CONFIG_BLK_DEV_DM=y +CONFIG_DM_MIRROR=y +CONFIG_DM_ZERO=y +CONFIG_MACINTOSH_DRIVERS=y +CONFIG_MAC_EMUMOUSEBTN=y +CONFIG_NETDEVICES=y +CONFIG_NETCONSOLE=y +CONFIG_BNX2=y +CONFIG_TIGON3=y +CONFIG_NET_TULIP=y +CONFIG_E100=y +CONFIG_E1000=y +CONFIG_E1000E=y +CONFIG_SKY2=y +CONFIG_NE2K_PCI=y +CONFIG_FORCEDETH=y +CONFIG_8139TOO=y +# CONFIG_8139TOO_PIO is not set +CONFIG_R8169=y +CONFIG_FDDI=y +CONFIG_INPUT_POLLDEV=y +# CONFIG_INPUT_MOUSEDEV_PSAUX is not set +CONFIG_INPUT_EVDEV=y +CONFIG_INPUT_JOYSTICK=y +CONFIG_INPUT_TABLET=y +CONFIG_INPUT_TOUCHSCREEN=y +CONFIG_INPUT_MISC=y +CONFIG_VT_HW_CONSOLE_BINDING=y +# CONFIG_LEGACY_PTYS is not set +CONFIG_SERIAL_NONSTANDARD=y +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_SERIAL_8250_NR_UARTS=32 +CONFIG_SERIAL_8250_EXTENDED=y +CONFIG_SERIAL_8250_MANY_PORTS=y +CONFIG_SERIAL_8250_SHARE_IRQ=y +CONFIG_SERIAL_8250_DETECT_IRQ=y +CONFIG_SERIAL_8250_RSA=y +CONFIG_HW_RANDOM=y +CONFIG_NVRAM=y +CONFIG_HPET=y +# CONFIG_HPET_MMAP is not set +CONFIG_I2C_I801=y +CONFIG_WATCHDOG=y +CONFIG_AGP=y +CONFIG_AGP_AMD64=y +CONFIG_AGP_INTEL=y +CONFIG_DRM=y +CONFIG_DRM_I915=y +CONFIG_FB_MODE_HELPERS=y +CONFIG_FB_TILEBLITTING=y +CONFIG_FB_EFI=y +# CONFIG_LCD_CLASS_DEVICE is not set +CONFIG_VGACON_SOFT_SCROLLBACK=y +CONFIG_LOGO=y +# CONFIG_LOGO_LINUX_MONO is not set +# CONFIG_LOGO_LINUX_VGA16 is not set +CONFIG_SOUND=y +CONFIG_SND=y +CONFIG_SND_SEQUENCER=y +CONFIG_SND_SEQ_DUMMY=y +CONFIG_SND_MIXER_OSS=y +CONFIG_SND_PCM_OSS=y +CONFIG_SND_SEQUENCER_OSS=y +CONFIG_SND_HRTIMER=y +CONFIG_SND_HDA_INTEL=y +CONFIG_SND_HDA_HWDEP=y +CONFIG_HIDRAW=y +CONFIG_HID_PID=y +CONFIG_USB_HIDDEV=y +CONFIG_HID_GYRATION=y +CONFIG_LOGITECH_FF=y +CONFIG_HID_NTRIG=y +CONFIG_HID_PANTHERLORD=y +CONFIG_PANTHERLORD_FF=y +CONFIG_HID_PETALYNX=y +CONFIG_HID_SAMSUNG=y +CONFIG_HID_SONY=y +CONFIG_HID_SUNPLUS=y +CONFIG_HID_TOPSEED=y +CONFIG_USB=y +CONFIG_USB_DEBUG=y +CONFIG_USB_ANNOUNCE_NEW_DEVICES=y +CONFIG_USB_DEVICEFS=y +# CONFIG_USB_DEVICE_CLASS is not set +CONFIG_USB_MON=y +CONFIG_USB_EHCI_HCD=y +# CONFIG_USB_EHCI_TT_NEWSCHED is not set +CONFIG_USB_OHCI_HCD=y +CONFIG_USB_UHCI_HCD=y +CONFIG_USB_PRINTER=y +CONFIG_USB_STORAGE=y +CONFIG_USB_LIBUSUAL=y +CONFIG_EDAC=y +CONFIG_RTC_CLASS=y +# CONFIG_RTC_HCTOSYS is not set +CONFIG_DMADEVICES=y +CONFIG_EEEPC_LAPTOP=y +CONFIG_EFI_VARS=y +CONFIG_EXT3_FS=y +# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set +CONFIG_EXT3_FS_POSIX_ACL=y +CONFIG_EXT3_FS_SECURITY=y +CONFIG_QUOTA=y +CONFIG_QUOTA_NETLINK_INTERFACE=y +# CONFIG_PRINT_QUOTA_WARNING is not set +CONFIG_QFMT_V2=y +CONFIG_AUTOFS4_FS=y +CONFIG_ISO9660_FS=y +CONFIG_JOLIET=y +CONFIG_ZISOFS=y +CONFIG_MSDOS_FS=y +CONFIG_VFAT_FS=y +CONFIG_PROC_KCORE=y +CONFIG_TMPFS_POSIX_ACL=y +CONFIG_HUGETLBFS=y +CONFIG_NFS_FS=y +CONFIG_NFS_V3=y +CONFIG_NFS_V3_ACL=y +CONFIG_NFS_V4=y +CONFIG_ROOT_NFS=y +CONFIG_NLS_DEFAULT="utf8" +CONFIG_NLS_CODEPAGE_437=y +CONFIG_NLS_ASCII=y +CONFIG_NLS_ISO8859_1=y +CONFIG_NLS_UTF8=y +CONFIG_PRINTK_TIME=y +# CONFIG_ENABLE_WARN_DEPRECATED is not set +CONFIG_FRAME_WARN=2048 +CONFIG_MAGIC_SYSRQ=y +# CONFIG_UNUSED_SYMBOLS is not set +CONFIG_DEBUG_KERNEL=y +# CONFIG_SCHED_DEBUG is not set +CONFIG_SCHEDSTATS=y +CONFIG_TIMER_STATS=y +CONFIG_DEBUG_STACK_USAGE=y +CONFIG_SYSCTL_SYSCALL_CHECK=y +CONFIG_BLK_DEV_IO_TRACE=y +CONFIG_PROVIDE_OHCI1394_DMA_INIT=y +CONFIG_EARLY_PRINTK_DBGP=y +CONFIG_DEBUG_STACKOVERFLOW=y +# CONFIG_DEBUG_RODATA_TEST is not set +CONFIG_DEBUG_NX_TEST=m +CONFIG_DEBUG_BOOT_PARAMS=y +CONFIG_OPTIMIZE_INLINING=y +CONFIG_KEYS_DEBUG_PROC_KEYS=y +CONFIG_SECURITY=y +CONFIG_SECURITY_NETWORK=y +CONFIG_SECURITY_SELINUX=y +CONFIG_SECURITY_SELINUX_BOOTPARAM=y +CONFIG_SECURITY_SELINUX_DISABLE=y +CONFIG_CRYPTO_AES_586=y +# CONFIG_CRYPTO_ANSI_CPRNG is not set +CONFIG_CRC_T10DIF=y diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig new file mode 100644 index 00000000..76eb2903 --- /dev/null +++ b/arch/x86/configs/x86_64_defconfig @@ -0,0 +1,317 @@ +CONFIG_EXPERIMENTAL=y +# CONFIG_LOCALVERSION_AUTO is not set +CONFIG_SYSVIPC=y +CONFIG_POSIX_MQUEUE=y +CONFIG_BSD_PROCESS_ACCT=y +CONFIG_TASKSTATS=y +CONFIG_TASK_DELAY_ACCT=y +CONFIG_TASK_XACCT=y +CONFIG_TASK_IO_ACCOUNTING=y +CONFIG_AUDIT=y +CONFIG_LOG_BUF_SHIFT=18 +CONFIG_CGROUPS=y +CONFIG_CGROUP_FREEZER=y +CONFIG_CPUSETS=y +CONFIG_CGROUP_CPUACCT=y +CONFIG_RESOURCE_COUNTERS=y +CONFIG_CGROUP_SCHED=y +CONFIG_BLK_DEV_INITRD=y +# CONFIG_COMPAT_BRK is not set +CONFIG_PROFILING=y +CONFIG_KPROBES=y +CONFIG_MODULES=y +CONFIG_MODULE_UNLOAD=y +CONFIG_MODULE_FORCE_UNLOAD=y +CONFIG_PARTITION_ADVANCED=y +CONFIG_OSF_PARTITION=y +CONFIG_AMIGA_PARTITION=y +CONFIG_MAC_PARTITION=y +CONFIG_BSD_DISKLABEL=y +CONFIG_MINIX_SUBPARTITION=y +CONFIG_SOLARIS_X86_PARTITION=y +CONFIG_UNIXWARE_DISKLABEL=y +CONFIG_SGI_PARTITION=y +CONFIG_SUN_PARTITION=y +CONFIG_KARMA_PARTITION=y +CONFIG_EFI_PARTITION=y +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y +CONFIG_SMP=y +CONFIG_CALGARY_IOMMU=y +CONFIG_NR_CPUS=64 +CONFIG_SCHED_SMT=y +CONFIG_PREEMPT_VOLUNTARY=y +CONFIG_X86_REROUTE_FOR_BROKEN_BOOT_IRQS=y +CONFIG_X86_MCE=y +CONFIG_MICROCODE=y +CONFIG_MICROCODE_AMD=y +CONFIG_X86_MSR=y +CONFIG_X86_CPUID=y +CONFIG_NUMA=y +CONFIG_X86_CHECK_BIOS_CORRUPTION=y +# CONFIG_MTRR_SANITIZER is not set +CONFIG_EFI=y +CONFIG_HZ_1000=y +CONFIG_KEXEC=y +CONFIG_CRASH_DUMP=y +# CONFIG_COMPAT_VDSO is not set +CONFIG_HIBERNATION=y +CONFIG_PM_DEBUG=y +CONFIG_PM_TRACE_RTC=y +CONFIG_ACPI_PROCFS=y +CONFIG_ACPI_DOCK=y +CONFIG_CPU_FREQ=y +# CONFIG_CPU_FREQ_STAT is not set +CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE=y +CONFIG_CPU_FREQ_GOV_PERFORMANCE=y +CONFIG_CPU_FREQ_GOV_ONDEMAND=y +CONFIG_X86_ACPI_CPUFREQ=y +CONFIG_PCI_MMCONFIG=y +CONFIG_PCIEPORTBUS=y +CONFIG_PCCARD=y +CONFIG_YENTA=y +CONFIG_HOTPLUG_PCI=y +CONFIG_BINFMT_MISC=y +CONFIG_IA32_EMULATION=y +CONFIG_NET=y +CONFIG_PACKET=y +CONFIG_UNIX=y +CONFIG_XFRM_USER=y +CONFIG_INET=y +CONFIG_IP_MULTICAST=y +CONFIG_IP_ADVANCED_ROUTER=y +CONFIG_IP_MULTIPLE_TABLES=y +CONFIG_IP_ROUTE_MULTIPATH=y +CONFIG_IP_ROUTE_VERBOSE=y +CONFIG_IP_PNP=y +CONFIG_IP_PNP_DHCP=y +CONFIG_IP_PNP_BOOTP=y +CONFIG_IP_PNP_RARP=y +CONFIG_IP_MROUTE=y +CONFIG_IP_PIMSM_V1=y +CONFIG_IP_PIMSM_V2=y +CONFIG_SYN_COOKIES=y +# CONFIG_INET_XFRM_MODE_TRANSPORT is not set +# CONFIG_INET_XFRM_MODE_TUNNEL is not set +# CONFIG_INET_XFRM_MODE_BEET is not set +# CONFIG_INET_DIAG is not set +CONFIG_TCP_CONG_ADVANCED=y +# CONFIG_TCP_CONG_BIC is not set +# CONFIG_TCP_CONG_WESTWOOD is not set +# CONFIG_TCP_CONG_HTCP is not set +CONFIG_TCP_MD5SIG=y +CONFIG_IPV6=y +CONFIG_INET6_AH=y +CONFIG_INET6_ESP=y +CONFIG_NETLABEL=y +CONFIG_NETFILTER=y +# CONFIG_NETFILTER_ADVANCED is not set +CONFIG_NF_CONNTRACK=y +CONFIG_NF_CONNTRACK_FTP=y +CONFIG_NF_CONNTRACK_IRC=y +CONFIG_NF_CONNTRACK_SIP=y +CONFIG_NF_CT_NETLINK=y +CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=y +CONFIG_NETFILTER_XT_TARGET_NFLOG=y +CONFIG_NETFILTER_XT_TARGET_SECMARK=y +CONFIG_NETFILTER_XT_TARGET_TCPMSS=y +CONFIG_NETFILTER_XT_MATCH_CONNTRACK=y +CONFIG_NETFILTER_XT_MATCH_POLICY=y +CONFIG_NETFILTER_XT_MATCH_STATE=y +CONFIG_NF_CONNTRACK_IPV4=y +CONFIG_IP_NF_IPTABLES=y +CONFIG_IP_NF_FILTER=y +CONFIG_IP_NF_TARGET_REJECT=y +CONFIG_IP_NF_TARGET_ULOG=y +CONFIG_NF_NAT=y +CONFIG_IP_NF_TARGET_MASQUERADE=y +CONFIG_IP_NF_MANGLE=y +CONFIG_NF_CONNTRACK_IPV6=y +CONFIG_IP6_NF_IPTABLES=y +CONFIG_IP6_NF_MATCH_IPV6HEADER=y +CONFIG_IP6_NF_FILTER=y +CONFIG_IP6_NF_TARGET_REJECT=y +CONFIG_IP6_NF_MANGLE=y +CONFIG_NET_SCHED=y +CONFIG_NET_EMATCH=y +CONFIG_NET_CLS_ACT=y +CONFIG_HAMRADIO=y +CONFIG_CFG80211=y +CONFIG_MAC80211=y +CONFIG_MAC80211_LEDS=y +CONFIG_RFKILL=y +CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" +CONFIG_DEBUG_DEVRES=y +CONFIG_CONNECTOR=y +CONFIG_BLK_DEV_LOOP=y +CONFIG_BLK_DEV_RAM=y +CONFIG_BLK_DEV_RAM_SIZE=16384 +CONFIG_BLK_DEV_SD=y +CONFIG_BLK_DEV_SR=y +CONFIG_BLK_DEV_SR_VENDOR=y +CONFIG_CHR_DEV_SG=y +CONFIG_SCSI_CONSTANTS=y +CONFIG_SCSI_SPI_ATTRS=y +# CONFIG_SCSI_LOWLEVEL is not set +CONFIG_ATA=y +CONFIG_SATA_AHCI=y +CONFIG_ATA_PIIX=y +CONFIG_PATA_AMD=y +CONFIG_PATA_OLDPIIX=y +CONFIG_PATA_SCH=y +CONFIG_MD=y +CONFIG_BLK_DEV_MD=y +CONFIG_BLK_DEV_DM=y +CONFIG_DM_MIRROR=y +CONFIG_DM_ZERO=y +CONFIG_MACINTOSH_DRIVERS=y +CONFIG_MAC_EMUMOUSEBTN=y +CONFIG_NETDEVICES=y +CONFIG_NETCONSOLE=y +CONFIG_TIGON3=y +CONFIG_NET_TULIP=y +CONFIG_E100=y +CONFIG_E1000=y +CONFIG_SKY2=y +CONFIG_FORCEDETH=y +CONFIG_8139TOO=y +CONFIG_FDDI=y +CONFIG_INPUT_POLLDEV=y +# CONFIG_INPUT_MOUSEDEV_PSAUX is not set +CONFIG_INPUT_EVDEV=y +CONFIG_INPUT_JOYSTICK=y +CONFIG_INPUT_TABLET=y +CONFIG_INPUT_TOUCHSCREEN=y +CONFIG_INPUT_MISC=y +CONFIG_VT_HW_CONSOLE_BINDING=y +# CONFIG_LEGACY_PTYS is not set +CONFIG_SERIAL_NONSTANDARD=y +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_SERIAL_8250_NR_UARTS=32 +CONFIG_SERIAL_8250_EXTENDED=y +CONFIG_SERIAL_8250_MANY_PORTS=y +CONFIG_SERIAL_8250_SHARE_IRQ=y +CONFIG_SERIAL_8250_DETECT_IRQ=y +CONFIG_SERIAL_8250_RSA=y +CONFIG_HW_RANDOM=y +# CONFIG_HW_RANDOM_INTEL is not set +# CONFIG_HW_RANDOM_AMD is not set +CONFIG_NVRAM=y +CONFIG_HPET=y +# CONFIG_HPET_MMAP is not set +CONFIG_I2C_I801=y +CONFIG_WATCHDOG=y +CONFIG_AGP=y +CONFIG_AGP_AMD64=y +CONFIG_AGP_INTEL=y +CONFIG_DRM=y +CONFIG_DRM_I915=y +CONFIG_DRM_I915_KMS=y +CONFIG_FB_MODE_HELPERS=y +CONFIG_FB_TILEBLITTING=y +CONFIG_FB_EFI=y +# CONFIG_LCD_CLASS_DEVICE is not set +CONFIG_VGACON_SOFT_SCROLLBACK=y +CONFIG_LOGO=y +# CONFIG_LOGO_LINUX_MONO is not set +# CONFIG_LOGO_LINUX_VGA16 is not set +CONFIG_SOUND=y +CONFIG_SND=y +CONFIG_SND_SEQUENCER=y +CONFIG_SND_SEQ_DUMMY=y +CONFIG_SND_MIXER_OSS=y +CONFIG_SND_PCM_OSS=y +CONFIG_SND_SEQUENCER_OSS=y +CONFIG_SND_HRTIMER=y +CONFIG_SND_HDA_INTEL=y +CONFIG_SND_HDA_HWDEP=y +CONFIG_HIDRAW=y +CONFIG_HID_PID=y +CONFIG_USB_HIDDEV=y +CONFIG_HID_GYRATION=y +CONFIG_LOGITECH_FF=y +CONFIG_HID_NTRIG=y +CONFIG_HID_PANTHERLORD=y +CONFIG_PANTHERLORD_FF=y +CONFIG_HID_PETALYNX=y +CONFIG_HID_SAMSUNG=y +CONFIG_HID_SONY=y +CONFIG_HID_SUNPLUS=y +CONFIG_HID_TOPSEED=y +CONFIG_USB=y +CONFIG_USB_DEBUG=y +CONFIG_USB_ANNOUNCE_NEW_DEVICES=y +CONFIG_USB_DEVICEFS=y +# CONFIG_USB_DEVICE_CLASS is not set +CONFIG_USB_MON=y +CONFIG_USB_EHCI_HCD=y +# CONFIG_USB_EHCI_TT_NEWSCHED is not set +CONFIG_USB_OHCI_HCD=y +CONFIG_USB_UHCI_HCD=y +CONFIG_USB_PRINTER=y +CONFIG_USB_STORAGE=y +CONFIG_USB_LIBUSUAL=y +CONFIG_EDAC=y +CONFIG_RTC_CLASS=y +# CONFIG_RTC_HCTOSYS is not set +CONFIG_DMADEVICES=y +CONFIG_EEEPC_LAPTOP=y +CONFIG_AMD_IOMMU=y +CONFIG_AMD_IOMMU_STATS=y +CONFIG_INTEL_IOMMU=y +# CONFIG_INTEL_IOMMU_DEFAULT_ON is not set +CONFIG_EFI_VARS=y +CONFIG_EXT3_FS=y +# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set +CONFIG_EXT3_FS_POSIX_ACL=y +CONFIG_EXT3_FS_SECURITY=y +CONFIG_QUOTA=y +CONFIG_QUOTA_NETLINK_INTERFACE=y +# CONFIG_PRINT_QUOTA_WARNING is not set +CONFIG_QFMT_V2=y +CONFIG_AUTOFS4_FS=y +CONFIG_ISO9660_FS=y +CONFIG_JOLIET=y +CONFIG_ZISOFS=y +CONFIG_MSDOS_FS=y +CONFIG_VFAT_FS=y +CONFIG_PROC_KCORE=y +CONFIG_TMPFS_POSIX_ACL=y +CONFIG_HUGETLBFS=y +CONFIG_NFS_FS=y +CONFIG_NFS_V3=y +CONFIG_NFS_V3_ACL=y +CONFIG_NFS_V4=y +CONFIG_ROOT_NFS=y +CONFIG_NLS_DEFAULT="utf8" +CONFIG_NLS_CODEPAGE_437=y +CONFIG_NLS_ASCII=y +CONFIG_NLS_ISO8859_1=y +CONFIG_NLS_UTF8=y +CONFIG_PRINTK_TIME=y +# CONFIG_ENABLE_WARN_DEPRECATED is not set +CONFIG_MAGIC_SYSRQ=y +# CONFIG_UNUSED_SYMBOLS is not set +CONFIG_DEBUG_KERNEL=y +# CONFIG_SCHED_DEBUG is not set +CONFIG_SCHEDSTATS=y +CONFIG_TIMER_STATS=y +CONFIG_DEBUG_STACK_USAGE=y +CONFIG_SYSCTL_SYSCALL_CHECK=y +CONFIG_BLK_DEV_IO_TRACE=y +CONFIG_PROVIDE_OHCI1394_DMA_INIT=y +CONFIG_EARLY_PRINTK_DBGP=y +CONFIG_DEBUG_STACKOVERFLOW=y +# CONFIG_DEBUG_RODATA_TEST is not set +CONFIG_DEBUG_NX_TEST=m +CONFIG_DEBUG_BOOT_PARAMS=y +CONFIG_OPTIMIZE_INLINING=y +CONFIG_KEYS_DEBUG_PROC_KEYS=y +CONFIG_SECURITY=y +CONFIG_SECURITY_NETWORK=y +CONFIG_SECURITY_SELINUX=y +CONFIG_SECURITY_SELINUX_BOOTPARAM=y +CONFIG_SECURITY_SELINUX_DISABLE=y +# CONFIG_CRYPTO_ANSI_CPRNG is not set +CONFIG_CRC_T10DIF=y diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile new file mode 100644 index 00000000..e191ac04 --- /dev/null +++ b/arch/x86/crypto/Makefile @@ -0,0 +1,45 @@ +# +# Arch-specific CryptoAPI modules. +# + +obj-$(CONFIG_CRYPTO_AES_586) += aes-i586.o +obj-$(CONFIG_CRYPTO_TWOFISH_586) += twofish-i586.o +obj-$(CONFIG_CRYPTO_SALSA20_586) += salsa20-i586.o +obj-$(CONFIG_CRYPTO_SERPENT_SSE2_586) += serpent-sse2-i586.o + +obj-$(CONFIG_CRYPTO_AES_X86_64) += aes-x86_64.o +obj-$(CONFIG_CRYPTO_CAMELLIA_X86_64) += camellia-x86_64.o +obj-$(CONFIG_CRYPTO_BLOWFISH_X86_64) += blowfish-x86_64.o +obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o +obj-$(CONFIG_CRYPTO_TWOFISH_X86_64_3WAY) += twofish-x86_64-3way.o +obj-$(CONFIG_CRYPTO_SALSA20_X86_64) += salsa20-x86_64.o +obj-$(CONFIG_CRYPTO_SERPENT_SSE2_X86_64) += serpent-sse2-x86_64.o +obj-$(CONFIG_CRYPTO_AES_NI_INTEL) += aesni-intel.o +obj-$(CONFIG_CRYPTO_GHASH_CLMUL_NI_INTEL) += ghash-clmulni-intel.o + +obj-$(CONFIG_CRYPTO_CRC32C_INTEL) += crc32c-intel.o +obj-$(CONFIG_CRYPTO_SHA1_SSSE3) += sha1-ssse3.o + +aes-i586-y := aes-i586-asm_32.o aes_glue.o +twofish-i586-y := twofish-i586-asm_32.o twofish_glue.o +salsa20-i586-y := salsa20-i586-asm_32.o salsa20_glue.o +serpent-sse2-i586-y := serpent-sse2-i586-asm_32.o serpent_sse2_glue.o + +aes-x86_64-y := aes-x86_64-asm_64.o aes_glue.o +camellia-x86_64-y := camellia-x86_64-asm_64.o camellia_glue.o +blowfish-x86_64-y := blowfish-x86_64-asm_64.o blowfish_glue.o +twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_glue.o +twofish-x86_64-3way-y := twofish-x86_64-asm_64-3way.o twofish_glue_3way.o +salsa20-x86_64-y := salsa20-x86_64-asm_64.o salsa20_glue.o +serpent-sse2-x86_64-y := serpent-sse2-x86_64-asm_64.o serpent_sse2_glue.o + +aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o fpu.o + +ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o + +# enable AVX support only when $(AS) can actually assemble the instructions +ifeq ($(call as-instr,vpxor %xmm0$(comma)%xmm1$(comma)%xmm2,yes,no),yes) +AFLAGS_sha1_ssse3_asm.o += -DSHA1_ENABLE_AVX_SUPPORT +CFLAGS_sha1_ssse3_glue.o += -DSHA1_ENABLE_AVX_SUPPORT +endif +sha1-ssse3-y := sha1_ssse3_asm.o sha1_ssse3_glue.o diff --git a/arch/x86/crypto/aes-i586-asm_32.S b/arch/x86/crypto/aes-i586-asm_32.S new file mode 100644 index 00000000..b949ec2f --- /dev/null +++ b/arch/x86/crypto/aes-i586-asm_32.S @@ -0,0 +1,367 @@ +// ------------------------------------------------------------------------- +// Copyright (c) 2001, Dr Brian Gladman < >, Worcester, UK. +// All rights reserved. +// +// LICENSE TERMS +// +// The free distribution and use of this software in both source and binary +// form is allowed (with or without changes) provided that: +// +// 1. distributions of this source code include the above copyright +// notice, this list of conditions and the following disclaimer// +// +// 2. distributions in binary form include the above copyright +// notice, this list of conditions and the following disclaimer +// in the documentation and/or other associated materials// +// +// 3. the copyright holder's name is not used to endorse products +// built using this software without specific written permission. +// +// +// ALTERNATIVELY, provided that this notice is retained in full, this product +// may be distributed under the terms of the GNU General Public License (GPL), +// in which case the provisions of the GPL apply INSTEAD OF those given above. +// +// Copyright (c) 2004 Linus Torvalds +// Copyright (c) 2004 Red Hat, Inc., James Morris + +// DISCLAIMER +// +// This software is provided 'as is' with no explicit or implied warranties +// in respect of its properties including, but not limited to, correctness +// and fitness for purpose. +// ------------------------------------------------------------------------- +// Issue Date: 29/07/2002 + +.file "aes-i586-asm.S" +.text + +#include + +#define tlen 1024 // length of each of 4 'xor' arrays (256 32-bit words) + +/* offsets to parameters with one register pushed onto stack */ +#define ctx 8 +#define out_blk 12 +#define in_blk 16 + +/* offsets in crypto_aes_ctx structure */ +#define klen (480) +#define ekey (0) +#define dkey (240) + +// register mapping for encrypt and decrypt subroutines + +#define r0 eax +#define r1 ebx +#define r2 ecx +#define r3 edx +#define r4 esi +#define r5 edi + +#define eaxl al +#define eaxh ah +#define ebxl bl +#define ebxh bh +#define ecxl cl +#define ecxh ch +#define edxl dl +#define edxh dh + +#define _h(reg) reg##h +#define h(reg) _h(reg) + +#define _l(reg) reg##l +#define l(reg) _l(reg) + +// This macro takes a 32-bit word representing a column and uses +// each of its four bytes to index into four tables of 256 32-bit +// words to obtain values that are then xored into the appropriate +// output registers r0, r1, r4 or r5. + +// Parameters: +// table table base address +// %1 out_state[0] +// %2 out_state[1] +// %3 out_state[2] +// %4 out_state[3] +// idx input register for the round (destroyed) +// tmp scratch register for the round +// sched key schedule + +#define do_col(table, a1,a2,a3,a4, idx, tmp) \ + movzx %l(idx),%tmp; \ + xor table(,%tmp,4),%a1; \ + movzx %h(idx),%tmp; \ + shr $16,%idx; \ + xor table+tlen(,%tmp,4),%a2; \ + movzx %l(idx),%tmp; \ + movzx %h(idx),%idx; \ + xor table+2*tlen(,%tmp,4),%a3; \ + xor table+3*tlen(,%idx,4),%a4; + +// initialise output registers from the key schedule +// NB1: original value of a3 is in idx on exit +// NB2: original values of a1,a2,a4 aren't used +#define do_fcol(table, a1,a2,a3,a4, idx, tmp, sched) \ + mov 0 sched,%a1; \ + movzx %l(idx),%tmp; \ + mov 12 sched,%a2; \ + xor table(,%tmp,4),%a1; \ + mov 4 sched,%a4; \ + movzx %h(idx),%tmp; \ + shr $16,%idx; \ + xor table+tlen(,%tmp,4),%a2; \ + movzx %l(idx),%tmp; \ + movzx %h(idx),%idx; \ + xor table+3*tlen(,%idx,4),%a4; \ + mov %a3,%idx; \ + mov 8 sched,%a3; \ + xor table+2*tlen(,%tmp,4),%a3; + +// initialise output registers from the key schedule +// NB1: original value of a3 is in idx on exit +// NB2: original values of a1,a2,a4 aren't used +#define do_icol(table, a1,a2,a3,a4, idx, tmp, sched) \ + mov 0 sched,%a1; \ + movzx %l(idx),%tmp; \ + mov 4 sched,%a2; \ + xor table(,%tmp,4),%a1; \ + mov 12 sched,%a4; \ + movzx %h(idx),%tmp; \ + shr $16,%idx; \ + xor table+tlen(,%tmp,4),%a2; \ + movzx %l(idx),%tmp; \ + movzx %h(idx),%idx; \ + xor table+3*tlen(,%idx,4),%a4; \ + mov %a3,%idx; \ + mov 8 sched,%a3; \ + xor table+2*tlen(,%tmp,4),%a3; + + +// original Gladman had conditional saves to MMX regs. +#define save(a1, a2) \ + mov %a2,4*a1(%esp) + +#define restore(a1, a2) \ + mov 4*a2(%esp),%a1 + +// These macros perform a forward encryption cycle. They are entered with +// the first previous round column values in r0,r1,r4,r5 and +// exit with the final values in the same registers, using stack +// for temporary storage. + +// round column values +// on entry: r0,r1,r4,r5 +// on exit: r2,r1,r4,r5 +#define fwd_rnd1(arg, table) \ + save (0,r1); \ + save (1,r5); \ + \ + /* compute new column values */ \ + do_fcol(table, r2,r5,r4,r1, r0,r3, arg); /* idx=r0 */ \ + do_col (table, r4,r1,r2,r5, r0,r3); /* idx=r4 */ \ + restore(r0,0); \ + do_col (table, r1,r2,r5,r4, r0,r3); /* idx=r1 */ \ + restore(r0,1); \ + do_col (table, r5,r4,r1,r2, r0,r3); /* idx=r5 */ + +// round column values +// on entry: r2,r1,r4,r5 +// on exit: r0,r1,r4,r5 +#define fwd_rnd2(arg, table) \ + save (0,r1); \ + save (1,r5); \ + \ + /* compute new column values */ \ + do_fcol(table, r0,r5,r4,r1, r2,r3, arg); /* idx=r2 */ \ + do_col (table, r4,r1,r0,r5, r2,r3); /* idx=r4 */ \ + restore(r2,0); \ + do_col (table, r1,r0,r5,r4, r2,r3); /* idx=r1 */ \ + restore(r2,1); \ + do_col (table, r5,r4,r1,r0, r2,r3); /* idx=r5 */ + +// These macros performs an inverse encryption cycle. They are entered with +// the first previous round column values in r0,r1,r4,r5 and +// exit with the final values in the same registers, using stack +// for temporary storage + +// round column values +// on entry: r0,r1,r4,r5 +// on exit: r2,r1,r4,r5 +#define inv_rnd1(arg, table) \ + save (0,r1); \ + save (1,r5); \ + \ + /* compute new column values */ \ + do_icol(table, r2,r1,r4,r5, r0,r3, arg); /* idx=r0 */ \ + do_col (table, r4,r5,r2,r1, r0,r3); /* idx=r4 */ \ + restore(r0,0); \ + do_col (table, r1,r4,r5,r2, r0,r3); /* idx=r1 */ \ + restore(r0,1); \ + do_col (table, r5,r2,r1,r4, r0,r3); /* idx=r5 */ + +// round column values +// on entry: r2,r1,r4,r5 +// on exit: r0,r1,r4,r5 +#define inv_rnd2(arg, table) \ + save (0,r1); \ + save (1,r5); \ + \ + /* compute new column values */ \ + do_icol(table, r0,r1,r4,r5, r2,r3, arg); /* idx=r2 */ \ + do_col (table, r4,r5,r0,r1, r2,r3); /* idx=r4 */ \ + restore(r2,0); \ + do_col (table, r1,r4,r5,r0, r2,r3); /* idx=r1 */ \ + restore(r2,1); \ + do_col (table, r5,r0,r1,r4, r2,r3); /* idx=r5 */ + +// AES (Rijndael) Encryption Subroutine +/* void aes_enc_blk(struct crypto_aes_ctx *ctx, u8 *out_blk, const u8 *in_blk) */ + +.global aes_enc_blk + +.extern crypto_ft_tab +.extern crypto_fl_tab + +.align 4 + +aes_enc_blk: + push %ebp + mov ctx(%esp),%ebp + +// CAUTION: the order and the values used in these assigns +// rely on the register mappings + +1: push %ebx + mov in_blk+4(%esp),%r2 + push %esi + mov klen(%ebp),%r3 // key size + push %edi +#if ekey != 0 + lea ekey(%ebp),%ebp // key pointer +#endif + +// input four columns and xor in first round key + + mov (%r2),%r0 + mov 4(%r2),%r1 + mov 8(%r2),%r4 + mov 12(%r2),%r5 + xor (%ebp),%r0 + xor 4(%ebp),%r1 + xor 8(%ebp),%r4 + xor 12(%ebp),%r5 + + sub $8,%esp // space for register saves on stack + add $16,%ebp // increment to next round key + cmp $24,%r3 + jb 4f // 10 rounds for 128-bit key + lea 32(%ebp),%ebp + je 3f // 12 rounds for 192-bit key + lea 32(%ebp),%ebp + +2: fwd_rnd1( -64(%ebp), crypto_ft_tab) // 14 rounds for 256-bit key + fwd_rnd2( -48(%ebp), crypto_ft_tab) +3: fwd_rnd1( -32(%ebp), crypto_ft_tab) // 12 rounds for 192-bit key + fwd_rnd2( -16(%ebp), crypto_ft_tab) +4: fwd_rnd1( (%ebp), crypto_ft_tab) // 10 rounds for 128-bit key + fwd_rnd2( +16(%ebp), crypto_ft_tab) + fwd_rnd1( +32(%ebp), crypto_ft_tab) + fwd_rnd2( +48(%ebp), crypto_ft_tab) + fwd_rnd1( +64(%ebp), crypto_ft_tab) + fwd_rnd2( +80(%ebp), crypto_ft_tab) + fwd_rnd1( +96(%ebp), crypto_ft_tab) + fwd_rnd2(+112(%ebp), crypto_ft_tab) + fwd_rnd1(+128(%ebp), crypto_ft_tab) + fwd_rnd2(+144(%ebp), crypto_fl_tab) // last round uses a different table + +// move final values to the output array. CAUTION: the +// order of these assigns rely on the register mappings + + add $8,%esp + mov out_blk+12(%esp),%ebp + mov %r5,12(%ebp) + pop %edi + mov %r4,8(%ebp) + pop %esi + mov %r1,4(%ebp) + pop %ebx + mov %r0,(%ebp) + pop %ebp + ret + +// AES (Rijndael) Decryption Subroutine +/* void aes_dec_blk(struct crypto_aes_ctx *ctx, u8 *out_blk, const u8 *in_blk) */ + +.global aes_dec_blk + +.extern crypto_it_tab +.extern crypto_il_tab + +.align 4 + +aes_dec_blk: + push %ebp + mov ctx(%esp),%ebp + +// CAUTION: the order and the values used in these assigns +// rely on the register mappings + +1: push %ebx + mov in_blk+4(%esp),%r2 + push %esi + mov klen(%ebp),%r3 // key size + push %edi +#if dkey != 0 + lea dkey(%ebp),%ebp // key pointer +#endif + +// input four columns and xor in first round key + + mov (%r2),%r0 + mov 4(%r2),%r1 + mov 8(%r2),%r4 + mov 12(%r2),%r5 + xor (%ebp),%r0 + xor 4(%ebp),%r1 + xor 8(%ebp),%r4 + xor 12(%ebp),%r5 + + sub $8,%esp // space for register saves on stack + add $16,%ebp // increment to next round key + cmp $24,%r3 + jb 4f // 10 rounds for 128-bit key + lea 32(%ebp),%ebp + je 3f // 12 rounds for 192-bit key + lea 32(%ebp),%ebp + +2: inv_rnd1( -64(%ebp), crypto_it_tab) // 14 rounds for 256-bit key + inv_rnd2( -48(%ebp), crypto_it_tab) +3: inv_rnd1( -32(%ebp), crypto_it_tab) // 12 rounds for 192-bit key + inv_rnd2( -16(%ebp), crypto_it_tab) +4: inv_rnd1( (%ebp), crypto_it_tab) // 10 rounds for 128-bit key + inv_rnd2( +16(%ebp), crypto_it_tab) + inv_rnd1( +32(%ebp), crypto_it_tab) + inv_rnd2( +48(%ebp), crypto_it_tab) + inv_rnd1( +64(%ebp), crypto_it_tab) + inv_rnd2( +80(%ebp), crypto_it_tab) + inv_rnd1( +96(%ebp), crypto_it_tab) + inv_rnd2(+112(%ebp), crypto_it_tab) + inv_rnd1(+128(%ebp), crypto_it_tab) + inv_rnd2(+144(%ebp), crypto_il_tab) // last round uses a different table + +// move final values to the output array. CAUTION: the +// order of these assigns rely on the register mappings + + add $8,%esp + mov out_blk+12(%esp),%ebp + mov %r5,12(%ebp) + pop %edi + mov %r4,8(%ebp) + pop %esi + mov %r1,4(%ebp) + pop %ebx + mov %r0,(%ebp) + pop %ebp + ret diff --git a/arch/x86/crypto/aes-x86_64-asm_64.S b/arch/x86/crypto/aes-x86_64-asm_64.S new file mode 100644 index 00000000..5b577d5a --- /dev/null +++ b/arch/x86/crypto/aes-x86_64-asm_64.S @@ -0,0 +1,188 @@ +/* AES (Rijndael) implementation (FIPS PUB 197) for x86_64 + * + * Copyright (C) 2005 Andreas Steinmetz, + * + * License: + * This code can be distributed under the terms of the GNU General Public + * License (GPL) Version 2 provided that the above header down to and + * including this sentence is retained in full. + */ + +.extern crypto_ft_tab +.extern crypto_it_tab +.extern crypto_fl_tab +.extern crypto_il_tab + +.text + +#include + +#define R1 %rax +#define R1E %eax +#define R1X %ax +#define R1H %ah +#define R1L %al +#define R2 %rbx +#define R2E %ebx +#define R2X %bx +#define R2H %bh +#define R2L %bl +#define R3 %rcx +#define R3E %ecx +#define R3X %cx +#define R3H %ch +#define R3L %cl +#define R4 %rdx +#define R4E %edx +#define R4X %dx +#define R4H %dh +#define R4L %dl +#define R5 %rsi +#define R5E %esi +#define R6 %rdi +#define R6E %edi +#define R7 %rbp +#define R7E %ebp +#define R8 %r8 +#define R9 %r9 +#define R10 %r10 +#define R11 %r11 + +#define prologue(FUNC,KEY,B128,B192,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11) \ + .global FUNC; \ + .type FUNC,@function; \ + .align 8; \ +FUNC: movq r1,r2; \ + movq r3,r4; \ + leaq KEY+48(r8),r9; \ + movq r10,r11; \ + movl (r7),r5 ## E; \ + movl 4(r7),r1 ## E; \ + movl 8(r7),r6 ## E; \ + movl 12(r7),r7 ## E; \ + movl 480(r8),r10 ## E; \ + xorl -48(r9),r5 ## E; \ + xorl -44(r9),r1 ## E; \ + xorl -40(r9),r6 ## E; \ + xorl -36(r9),r7 ## E; \ + cmpl $24,r10 ## E; \ + jb B128; \ + leaq 32(r9),r9; \ + je B192; \ + leaq 32(r9),r9; + +#define epilogue(r1,r2,r3,r4,r5,r6,r7,r8,r9) \ + movq r1,r2; \ + movq r3,r4; \ + movl r5 ## E,(r9); \ + movl r6 ## E,4(r9); \ + movl r7 ## E,8(r9); \ + movl r8 ## E,12(r9); \ + ret; + +#define round(TAB,OFFSET,r1,r2,r3,r4,r5,r6,r7,r8,ra,rb,rc,rd) \ + movzbl r2 ## H,r5 ## E; \ + movzbl r2 ## L,r6 ## E; \ + movl TAB+1024(,r5,4),r5 ## E;\ + movw r4 ## X,r2 ## X; \ + movl TAB(,r6,4),r6 ## E; \ + roll $16,r2 ## E; \ + shrl $16,r4 ## E; \ + movzbl r4 ## H,r7 ## E; \ + movzbl r4 ## L,r4 ## E; \ + xorl OFFSET(r8),ra ## E; \ + xorl OFFSET+4(r8),rb ## E; \ + xorl TAB+3072(,r7,4),r5 ## E;\ + xorl TAB+2048(,r4,4),r6 ## E;\ + movzbl r1 ## L,r7 ## E; \ + movzbl r1 ## H,r4 ## E; \ + movl TAB+1024(,r4,4),r4 ## E;\ + movw r3 ## X,r1 ## X; \ + roll $16,r1 ## E; \ + shrl $16,r3 ## E; \ + xorl TAB(,r7,4),r5 ## E; \ + movzbl r3 ## H,r7 ## E; \ + movzbl r3 ## L,r3 ## E; \ + xorl TAB+3072(,r7,4),r4 ## E;\ + xorl TAB+2048(,r3,4),r5 ## E;\ + movzbl r1 ## H,r7 ## E; \ + movzbl r1 ## L,r3 ## E; \ + shrl $16,r1 ## E; \ + xorl TAB+3072(,r7,4),r6 ## E;\ + movl TAB+2048(,r3,4),r3 ## E;\ + movzbl r1 ## H,r7 ## E; \ + movzbl r1 ## L,r1 ## E; \ + xorl TAB+1024(,r7,4),r6 ## E;\ + xorl TAB(,r1,4),r3 ## E; \ + movzbl r2 ## H,r1 ## E; \ + movzbl r2 ## L,r7 ## E; \ + shrl $16,r2 ## E; \ + xorl TAB+3072(,r1,4),r3 ## E;\ + xorl TAB+2048(,r7,4),r4 ## E;\ + movzbl r2 ## H,r1 ## E; \ + movzbl r2 ## L,r2 ## E; \ + xorl OFFSET+8(r8),rc ## E; \ + xorl OFFSET+12(r8),rd ## E; \ + xorl TAB+1024(,r1,4),r3 ## E;\ + xorl TAB(,r2,4),r4 ## E; + +#define move_regs(r1,r2,r3,r4) \ + movl r3 ## E,r1 ## E; \ + movl r4 ## E,r2 ## E; + +#define entry(FUNC,KEY,B128,B192) \ + prologue(FUNC,KEY,B128,B192,R2,R8,R7,R9,R1,R3,R4,R6,R10,R5,R11) + +#define return epilogue(R8,R2,R9,R7,R5,R6,R3,R4,R11) + +#define encrypt_round(TAB,OFFSET) \ + round(TAB,OFFSET,R1,R2,R3,R4,R5,R6,R7,R10,R5,R6,R3,R4) \ + move_regs(R1,R2,R5,R6) + +#define encrypt_final(TAB,OFFSET) \ + round(TAB,OFFSET,R1,R2,R3,R4,R5,R6,R7,R10,R5,R6,R3,R4) + +#define decrypt_round(TAB,OFFSET) \ + round(TAB,OFFSET,R2,R1,R4,R3,R6,R5,R7,R10,R5,R6,R3,R4) \ + move_regs(R1,R2,R5,R6) + +#define decrypt_final(TAB,OFFSET) \ + round(TAB,OFFSET,R2,R1,R4,R3,R6,R5,R7,R10,R5,R6,R3,R4) + +/* void aes_enc_blk(stuct crypto_tfm *tfm, u8 *out, const u8 *in) */ + + entry(aes_enc_blk,0,enc128,enc192) + encrypt_round(crypto_ft_tab,-96) + encrypt_round(crypto_ft_tab,-80) +enc192: encrypt_round(crypto_ft_tab,-64) + encrypt_round(crypto_ft_tab,-48) +enc128: encrypt_round(crypto_ft_tab,-32) + encrypt_round(crypto_ft_tab,-16) + encrypt_round(crypto_ft_tab, 0) + encrypt_round(crypto_ft_tab, 16) + encrypt_round(crypto_ft_tab, 32) + encrypt_round(crypto_ft_tab, 48) + encrypt_round(crypto_ft_tab, 64) + encrypt_round(crypto_ft_tab, 80) + encrypt_round(crypto_ft_tab, 96) + encrypt_final(crypto_fl_tab,112) + return + +/* void aes_dec_blk(struct crypto_tfm *tfm, u8 *out, const u8 *in) */ + + entry(aes_dec_blk,240,dec128,dec192) + decrypt_round(crypto_it_tab,-96) + decrypt_round(crypto_it_tab,-80) +dec192: decrypt_round(crypto_it_tab,-64) + decrypt_round(crypto_it_tab,-48) +dec128: decrypt_round(crypto_it_tab,-32) + decrypt_round(crypto_it_tab,-16) + decrypt_round(crypto_it_tab, 0) + decrypt_round(crypto_it_tab, 16) + decrypt_round(crypto_it_tab, 32) + decrypt_round(crypto_it_tab, 48) + decrypt_round(crypto_it_tab, 64) + decrypt_round(crypto_it_tab, 80) + decrypt_round(crypto_it_tab, 96) + decrypt_final(crypto_il_tab,112) + return diff --git a/arch/x86/crypto/aes_glue.c b/arch/x86/crypto/aes_glue.c new file mode 100644 index 00000000..8efcf42a --- /dev/null +++ b/arch/x86/crypto/aes_glue.c @@ -0,0 +1,71 @@ +/* + * Glue Code for the asm optimized version of the AES Cipher Algorithm + * + */ + +#include +#include +#include + +asmlinkage void aes_enc_blk(struct crypto_aes_ctx *ctx, u8 *out, const u8 *in); +asmlinkage void aes_dec_blk(struct crypto_aes_ctx *ctx, u8 *out, const u8 *in); + +void crypto_aes_encrypt_x86(struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src) +{ + aes_enc_blk(ctx, dst, src); +} +EXPORT_SYMBOL_GPL(crypto_aes_encrypt_x86); + +void crypto_aes_decrypt_x86(struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src) +{ + aes_dec_blk(ctx, dst, src); +} +EXPORT_SYMBOL_GPL(crypto_aes_decrypt_x86); + +static void aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) +{ + aes_enc_blk(crypto_tfm_ctx(tfm), dst, src); +} + +static void aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) +{ + aes_dec_blk(crypto_tfm_ctx(tfm), dst, src); +} + +static struct crypto_alg aes_alg = { + .cra_name = "aes", + .cra_driver_name = "aes-asm", + .cra_priority = 200, + .cra_flags = CRYPTO_ALG_TYPE_CIPHER, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct crypto_aes_ctx), + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(aes_alg.cra_list), + .cra_u = { + .cipher = { + .cia_min_keysize = AES_MIN_KEY_SIZE, + .cia_max_keysize = AES_MAX_KEY_SIZE, + .cia_setkey = crypto_aes_set_key, + .cia_encrypt = aes_encrypt, + .cia_decrypt = aes_decrypt + } + } +}; + +static int __init aes_init(void) +{ + return crypto_register_alg(&aes_alg); +} + +static void __exit aes_fini(void) +{ + crypto_unregister_alg(&aes_alg); +} + +module_init(aes_init); +module_exit(aes_fini); + +MODULE_DESCRIPTION("Rijndael (AES) Cipher Algorithm, asm optimized"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("aes"); +MODULE_ALIAS("aes-asm"); diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S new file mode 100644 index 00000000..3470624d --- /dev/null +++ b/arch/x86/crypto/aesni-intel_asm.S @@ -0,0 +1,2618 @@ +/* + * Implement AES algorithm in Intel AES-NI instructions. + * + * The white paper of AES-NI instructions can be downloaded from: + * http://softwarecommunity.intel.com/isn/downloads/intelavx/AES-Instructions-Set_WP.pdf + * + * Copyright (C) 2008, Intel Corp. + * Author: Huang Ying + * Vinodh Gopal + * Kahraman Akdemir + * + * Added RFC4106 AES-GCM support for 128-bit keys under the AEAD + * interface for 64-bit kernels. + * Authors: Erdinc Ozturk (erdinc.ozturk@intel.com) + * Aidan O'Mahony (aidan.o.mahony@intel.com) + * Adrian Hoban + * James Guilford (james.guilford@intel.com) + * Gabriele Paoloni + * Tadeusz Struk (tadeusz.struk@intel.com) + * Wajdi Feghali (wajdi.k.feghali@intel.com) + * Copyright (c) 2010, Intel Corporation. + * + * Ported x86_64 version to x86: + * Author: Mathias Krause + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include +#include + +#ifdef __x86_64__ +.data +POLY: .octa 0xC2000000000000000000000000000001 +TWOONE: .octa 0x00000001000000000000000000000001 + +# order of these constants should not change. +# more specifically, ALL_F should follow SHIFT_MASK, +# and ZERO should follow ALL_F + +SHUF_MASK: .octa 0x000102030405060708090A0B0C0D0E0F +MASK1: .octa 0x0000000000000000ffffffffffffffff +MASK2: .octa 0xffffffffffffffff0000000000000000 +SHIFT_MASK: .octa 0x0f0e0d0c0b0a09080706050403020100 +ALL_F: .octa 0xffffffffffffffffffffffffffffffff +ZERO: .octa 0x00000000000000000000000000000000 +ONE: .octa 0x00000000000000000000000000000001 +F_MIN_MASK: .octa 0xf1f2f3f4f5f6f7f8f9fafbfcfdfeff0 +dec: .octa 0x1 +enc: .octa 0x2 + + +.text + + +#define STACK_OFFSET 8*3 +#define HashKey 16*0 // store HashKey <<1 mod poly here +#define HashKey_2 16*1 // store HashKey^2 <<1 mod poly here +#define HashKey_3 16*2 // store HashKey^3 <<1 mod poly here +#define HashKey_4 16*3 // store HashKey^4 <<1 mod poly here +#define HashKey_k 16*4 // store XOR of High 64 bits and Low 64 + // bits of HashKey <<1 mod poly here + //(for Karatsuba purposes) +#define HashKey_2_k 16*5 // store XOR of High 64 bits and Low 64 + // bits of HashKey^2 <<1 mod poly here + // (for Karatsuba purposes) +#define HashKey_3_k 16*6 // store XOR of High 64 bits and Low 64 + // bits of HashKey^3 <<1 mod poly here + // (for Karatsuba purposes) +#define HashKey_4_k 16*7 // store XOR of High 64 bits and Low 64 + // bits of HashKey^4 <<1 mod poly here + // (for Karatsuba purposes) +#define VARIABLE_OFFSET 16*8 + +#define arg1 rdi +#define arg2 rsi +#define arg3 rdx +#define arg4 rcx +#define arg5 r8 +#define arg6 r9 +#define arg7 STACK_OFFSET+8(%r14) +#define arg8 STACK_OFFSET+16(%r14) +#define arg9 STACK_OFFSET+24(%r14) +#define arg10 STACK_OFFSET+32(%r14) +#endif + + +#define STATE1 %xmm0 +#define STATE2 %xmm4 +#define STATE3 %xmm5 +#define STATE4 %xmm6 +#define STATE STATE1 +#define IN1 %xmm1 +#define IN2 %xmm7 +#define IN3 %xmm8 +#define IN4 %xmm9 +#define IN IN1 +#define KEY %xmm2 +#define IV %xmm3 + +#define BSWAP_MASK %xmm10 +#define CTR %xmm11 +#define INC %xmm12 + +#ifdef __x86_64__ +#define AREG %rax +#define KEYP %rdi +#define OUTP %rsi +#define UKEYP OUTP +#define INP %rdx +#define LEN %rcx +#define IVP %r8 +#define KLEN %r9d +#define T1 %r10 +#define TKEYP T1 +#define T2 %r11 +#define TCTR_LOW T2 +#else +#define AREG %eax +#define KEYP %edi +#define OUTP AREG +#define UKEYP OUTP +#define INP %edx +#define LEN %esi +#define IVP %ebp +#define KLEN %ebx +#define T1 %ecx +#define TKEYP T1 +#endif + + +#ifdef __x86_64__ +/* GHASH_MUL MACRO to implement: Data*HashKey mod (128,127,126,121,0) +* +* +* Input: A and B (128-bits each, bit-reflected) +* Output: C = A*B*x mod poly, (i.e. >>1 ) +* To compute GH = GH*HashKey mod poly, give HK = HashKey<<1 mod poly as input +* GH = GH * HK * x mod poly which is equivalent to GH*HashKey mod poly. +* +*/ +.macro GHASH_MUL GH HK TMP1 TMP2 TMP3 TMP4 TMP5 + movdqa \GH, \TMP1 + pshufd $78, \GH, \TMP2 + pshufd $78, \HK, \TMP3 + pxor \GH, \TMP2 # TMP2 = a1+a0 + pxor \HK, \TMP3 # TMP3 = b1+b0 + PCLMULQDQ 0x11, \HK, \TMP1 # TMP1 = a1*b1 + PCLMULQDQ 0x00, \HK, \GH # GH = a0*b0 + PCLMULQDQ 0x00, \TMP3, \TMP2 # TMP2 = (a0+a1)*(b1+b0) + pxor \GH, \TMP2 + pxor \TMP1, \TMP2 # TMP2 = (a0*b0)+(a1*b0) + movdqa \TMP2, \TMP3 + pslldq $8, \TMP3 # left shift TMP3 2 DWs + psrldq $8, \TMP2 # right shift TMP2 2 DWs + pxor \TMP3, \GH + pxor \TMP2, \TMP1 # TMP2:GH holds the result of GH*HK + + # first phase of the reduction + + movdqa \GH, \TMP2 + movdqa \GH, \TMP3 + movdqa \GH, \TMP4 # copy GH into TMP2,TMP3 and TMP4 + # in in order to perform + # independent shifts + pslld $31, \TMP2 # packed right shift <<31 + pslld $30, \TMP3 # packed right shift <<30 + pslld $25, \TMP4 # packed right shift <<25 + pxor \TMP3, \TMP2 # xor the shifted versions + pxor \TMP4, \TMP2 + movdqa \TMP2, \TMP5 + psrldq $4, \TMP5 # right shift TMP5 1 DW + pslldq $12, \TMP2 # left shift TMP2 3 DWs + pxor \TMP2, \GH + + # second phase of the reduction + + movdqa \GH,\TMP2 # copy GH into TMP2,TMP3 and TMP4 + # in in order to perform + # independent shifts + movdqa \GH,\TMP3 + movdqa \GH,\TMP4 + psrld $1,\TMP2 # packed left shift >>1 + psrld $2,\TMP3 # packed left shift >>2 + psrld $7,\TMP4 # packed left shift >>7 + pxor \TMP3,\TMP2 # xor the shifted versions + pxor \TMP4,\TMP2 + pxor \TMP5, \TMP2 + pxor \TMP2, \GH + pxor \TMP1, \GH # result is in TMP1 +.endm + +/* +* if a = number of total plaintext bytes +* b = floor(a/16) +* num_initial_blocks = b mod 4 +* encrypt the initial num_initial_blocks blocks and apply ghash on +* the ciphertext +* %r10, %r11, %r12, %rax, %xmm5, %xmm6, %xmm7, %xmm8, %xmm9 registers +* are clobbered +* arg1, %arg2, %arg3, %r14 are used as a pointer only, not modified +*/ + + +.macro INITIAL_BLOCKS_DEC num_initial_blocks TMP1 TMP2 TMP3 TMP4 TMP5 XMM0 XMM1 \ +XMM2 XMM3 XMM4 XMMDst TMP6 TMP7 i i_seq operation + mov arg7, %r10 # %r10 = AAD + mov arg8, %r12 # %r12 = aadLen + mov %r12, %r11 + pxor %xmm\i, %xmm\i +_get_AAD_loop\num_initial_blocks\operation: + movd (%r10), \TMP1 + pslldq $12, \TMP1 + psrldq $4, %xmm\i + pxor \TMP1, %xmm\i + add $4, %r10 + sub $4, %r12 + jne _get_AAD_loop\num_initial_blocks\operation + cmp $16, %r11 + je _get_AAD_loop2_done\num_initial_blocks\operation + mov $16, %r12 +_get_AAD_loop2\num_initial_blocks\operation: + psrldq $4, %xmm\i + sub $4, %r12 + cmp %r11, %r12 + jne _get_AAD_loop2\num_initial_blocks\operation +_get_AAD_loop2_done\num_initial_blocks\operation: + movdqa SHUF_MASK(%rip), %xmm14 + PSHUFB_XMM %xmm14, %xmm\i # byte-reflect the AAD data + + xor %r11, %r11 # initialise the data pointer offset as zero + + # start AES for num_initial_blocks blocks + + mov %arg5, %rax # %rax = *Y0 + movdqu (%rax), \XMM0 # XMM0 = Y0 + movdqa SHUF_MASK(%rip), %xmm14 + PSHUFB_XMM %xmm14, \XMM0 + +.if (\i == 5) || (\i == 6) || (\i == 7) +.irpc index, \i_seq + paddd ONE(%rip), \XMM0 # INCR Y0 + movdqa \XMM0, %xmm\index + movdqa SHUF_MASK(%rip), %xmm14 + PSHUFB_XMM %xmm14, %xmm\index # perform a 16 byte swap + +.endr +.irpc index, \i_seq + pxor 16*0(%arg1), %xmm\index +.endr +.irpc index, \i_seq + movaps 0x10(%rdi), \TMP1 + AESENC \TMP1, %xmm\index # Round 1 +.endr +.irpc index, \i_seq + movaps 0x20(%arg1), \TMP1 + AESENC \TMP1, %xmm\index # Round 2 +.endr +.irpc index, \i_seq + movaps 0x30(%arg1), \TMP1 + AESENC \TMP1, %xmm\index # Round 2 +.endr +.irpc index, \i_seq + movaps 0x40(%arg1), \TMP1 + AESENC \TMP1, %xmm\index # Round 2 +.endr +.irpc index, \i_seq + movaps 0x50(%arg1), \TMP1 + AESENC \TMP1, %xmm\index # Round 2 +.endr +.irpc index, \i_seq + movaps 0x60(%arg1), \TMP1 + AESENC \TMP1, %xmm\index # Round 2 +.endr +.irpc index, \i_seq + movaps 0x70(%arg1), \TMP1 + AESENC \TMP1, %xmm\index # Round 2 +.endr +.irpc index, \i_seq + movaps 0x80(%arg1), \TMP1 + AESENC \TMP1, %xmm\index # Round 2 +.endr +.irpc index, \i_seq + movaps 0x90(%arg1), \TMP1 + AESENC \TMP1, %xmm\index # Round 2 +.endr +.irpc index, \i_seq + movaps 0xa0(%arg1), \TMP1 + AESENCLAST \TMP1, %xmm\index # Round 10 +.endr +.irpc index, \i_seq + movdqu (%arg3 , %r11, 1), \TMP1 + pxor \TMP1, %xmm\index + movdqu %xmm\index, (%arg2 , %r11, 1) + # write back plaintext/ciphertext for num_initial_blocks + add $16, %r11 + + movdqa \TMP1, %xmm\index + movdqa SHUF_MASK(%rip), %xmm14 + PSHUFB_XMM %xmm14, %xmm\index + + # prepare plaintext/ciphertext for GHASH computation +.endr +.endif + GHASH_MUL %xmm\i, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 + # apply GHASH on num_initial_blocks blocks + +.if \i == 5 + pxor %xmm5, %xmm6 + GHASH_MUL %xmm6, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 + pxor %xmm6, %xmm7 + GHASH_MUL %xmm7, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 + pxor %xmm7, %xmm8 + GHASH_MUL %xmm8, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 +.elseif \i == 6 + pxor %xmm6, %xmm7 + GHASH_MUL %xmm7, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 + pxor %xmm7, %xmm8 + GHASH_MUL %xmm8, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 +.elseif \i == 7 + pxor %xmm7, %xmm8 + GHASH_MUL %xmm8, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 +.endif + cmp $64, %r13 + jl _initial_blocks_done\num_initial_blocks\operation + # no need for precomputed values +/* +* +* Precomputations for HashKey parallel with encryption of first 4 blocks. +* Haskey_i_k holds XORed values of the low and high parts of the Haskey_i +*/ + paddd ONE(%rip), \XMM0 # INCR Y0 + movdqa \XMM0, \XMM1 + movdqa SHUF_MASK(%rip), %xmm14 + PSHUFB_XMM %xmm14, \XMM1 # perform a 16 byte swap + + paddd ONE(%rip), \XMM0 # INCR Y0 + movdqa \XMM0, \XMM2 + movdqa SHUF_MASK(%rip), %xmm14 + PSHUFB_XMM %xmm14, \XMM2 # perform a 16 byte swap + + paddd ONE(%rip), \XMM0 # INCR Y0 + movdqa \XMM0, \XMM3 + movdqa SHUF_MASK(%rip), %xmm14 + PSHUFB_XMM %xmm14, \XMM3 # perform a 16 byte swap + + paddd ONE(%rip), \XMM0 # INCR Y0 + movdqa \XMM0, \XMM4 + movdqa SHUF_MASK(%rip), %xmm14 + PSHUFB_XMM %xmm14, \XMM4 # perform a 16 byte swap + + pxor 16*0(%arg1), \XMM1 + pxor 16*0(%arg1), \XMM2 + pxor 16*0(%arg1), \XMM3 + pxor 16*0(%arg1), \XMM4 + movdqa \TMP3, \TMP5 + pshufd $78, \TMP3, \TMP1 + pxor \TMP3, \TMP1 + movdqa \TMP1, HashKey_k(%rsp) + GHASH_MUL \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7 +# TMP5 = HashKey^2<<1 (mod poly) + movdqa \TMP5, HashKey_2(%rsp) +# HashKey_2 = HashKey^2<<1 (mod poly) + pshufd $78, \TMP5, \TMP1 + pxor \TMP5, \TMP1 + movdqa \TMP1, HashKey_2_k(%rsp) +.irpc index, 1234 # do 4 rounds + movaps 0x10*\index(%arg1), \TMP1 + AESENC \TMP1, \XMM1 + AESENC \TMP1, \XMM2 + AESENC \TMP1, \XMM3 + AESENC \TMP1, \XMM4 +.endr + GHASH_MUL \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7 +# TMP5 = HashKey^3<<1 (mod poly) + movdqa \TMP5, HashKey_3(%rsp) + pshufd $78, \TMP5, \TMP1 + pxor \TMP5, \TMP1 + movdqa \TMP1, HashKey_3_k(%rsp) +.irpc index, 56789 # do next 5 rounds + movaps 0x10*\index(%arg1), \TMP1 + AESENC \TMP1, \XMM1 + AESENC \TMP1, \XMM2 + AESENC \TMP1, \XMM3 + AESENC \TMP1, \XMM4 +.endr + GHASH_MUL \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7 +# TMP5 = HashKey^3<<1 (mod poly) + movdqa \TMP5, HashKey_4(%rsp) + pshufd $78, \TMP5, \TMP1 + pxor \TMP5, \TMP1 + movdqa \TMP1, HashKey_4_k(%rsp) + movaps 0xa0(%arg1), \TMP2 + AESENCLAST \TMP2, \XMM1 + AESENCLAST \TMP2, \XMM2 + AESENCLAST \TMP2, \XMM3 + AESENCLAST \TMP2, \XMM4 + movdqu 16*0(%arg3 , %r11 , 1), \TMP1 + pxor \TMP1, \XMM1 + movdqu \XMM1, 16*0(%arg2 , %r11 , 1) + movdqa \TMP1, \XMM1 + movdqu 16*1(%arg3 , %r11 , 1), \TMP1 + pxor \TMP1, \XMM2 + movdqu \XMM2, 16*1(%arg2 , %r11 , 1) + movdqa \TMP1, \XMM2 + movdqu 16*2(%arg3 , %r11 , 1), \TMP1 + pxor \TMP1, \XMM3 + movdqu \XMM3, 16*2(%arg2 , %r11 , 1) + movdqa \TMP1, \XMM3 + movdqu 16*3(%arg3 , %r11 , 1), \TMP1 + pxor \TMP1, \XMM4 + movdqu \XMM4, 16*3(%arg2 , %r11 , 1) + movdqa \TMP1, \XMM4 + add $64, %r11 + movdqa SHUF_MASK(%rip), %xmm14 + PSHUFB_XMM %xmm14, \XMM1 # perform a 16 byte swap + pxor \XMMDst, \XMM1 +# combine GHASHed value with the corresponding ciphertext + movdqa SHUF_MASK(%rip), %xmm14 + PSHUFB_XMM %xmm14, \XMM2 # perform a 16 byte swap + movdqa SHUF_MASK(%rip), %xmm14 + PSHUFB_XMM %xmm14, \XMM3 # perform a 16 byte swap + movdqa SHUF_MASK(%rip), %xmm14 + PSHUFB_XMM %xmm14, \XMM4 # perform a 16 byte swap + +_initial_blocks_done\num_initial_blocks\operation: + +.endm + + +/* +* if a = number of total plaintext bytes +* b = floor(a/16) +* num_initial_blocks = b mod 4 +* encrypt the initial num_initial_blocks blocks and apply ghash on +* the ciphertext +* %r10, %r11, %r12, %rax, %xmm5, %xmm6, %xmm7, %xmm8, %xmm9 registers +* are clobbered +* arg1, %arg2, %arg3, %r14 are used as a pointer only, not modified +*/ + + +.macro INITIAL_BLOCKS_ENC num_initial_blocks TMP1 TMP2 TMP3 TMP4 TMP5 XMM0 XMM1 \ +XMM2 XMM3 XMM4 XMMDst TMP6 TMP7 i i_seq operation + mov arg7, %r10 # %r10 = AAD + mov arg8, %r12 # %r12 = aadLen + mov %r12, %r11 + pxor %xmm\i, %xmm\i +_get_AAD_loop\num_initial_blocks\operation: + movd (%r10), \TMP1 + pslldq $12, \TMP1 + psrldq $4, %xmm\i + pxor \TMP1, %xmm\i + add $4, %r10 + sub $4, %r12 + jne _get_AAD_loop\num_initial_blocks\operation + cmp $16, %r11 + je _get_AAD_loop2_done\num_initial_blocks\operation + mov $16, %r12 +_get_AAD_loop2\num_initial_blocks\operation: + psrldq $4, %xmm\i + sub $4, %r12 + cmp %r11, %r12 + jne _get_AAD_loop2\num_initial_blocks\operation +_get_AAD_loop2_done\num_initial_blocks\operation: + movdqa SHUF_MASK(%rip), %xmm14 + PSHUFB_XMM %xmm14, %xmm\i # byte-reflect the AAD data + + xor %r11, %r11 # initialise the data pointer offset as zero + + # start AES for num_initial_blocks blocks + + mov %arg5, %rax # %rax = *Y0 + movdqu (%rax), \XMM0 # XMM0 = Y0 + movdqa SHUF_MASK(%rip), %xmm14 + PSHUFB_XMM %xmm14, \XMM0 + +.if (\i == 5) || (\i == 6) || (\i == 7) +.irpc index, \i_seq + paddd ONE(%rip), \XMM0 # INCR Y0 + movdqa \XMM0, %xmm\index + movdqa SHUF_MASK(%rip), %xmm14 + PSHUFB_XMM %xmm14, %xmm\index # perform a 16 byte swap + +.endr +.irpc index, \i_seq + pxor 16*0(%arg1), %xmm\index +.endr +.irpc index, \i_seq + movaps 0x10(%rdi), \TMP1 + AESENC \TMP1, %xmm\index # Round 1 +.endr +.irpc index, \i_seq + movaps 0x20(%arg1), \TMP1 + AESENC \TMP1, %xmm\index # Round 2 +.endr +.irpc index, \i_seq + movaps 0x30(%arg1), \TMP1 + AESENC \TMP1, %xmm\index # Round 2 +.endr +.irpc index, \i_seq + movaps 0x40(%arg1), \TMP1 + AESENC \TMP1, %xmm\index # Round 2 +.endr +.irpc index, \i_seq + movaps 0x50(%arg1), \TMP1 + AESENC \TMP1, %xmm\index # Round 2 +.endr +.irpc index, \i_seq + movaps 0x60(%arg1), \TMP1 + AESENC \TMP1, %xmm\index # Round 2 +.endr +.irpc index, \i_seq + movaps 0x70(%arg1), \TMP1 + AESENC \TMP1, %xmm\index # Round 2 +.endr +.irpc index, \i_seq + movaps 0x80(%arg1), \TMP1 + AESENC \TMP1, %xmm\index # Round 2 +.endr +.irpc index, \i_seq + movaps 0x90(%arg1), \TMP1 + AESENC \TMP1, %xmm\index # Round 2 +.endr +.irpc index, \i_seq + movaps 0xa0(%arg1), \TMP1 + AESENCLAST \TMP1, %xmm\index # Round 10 +.endr +.irpc index, \i_seq + movdqu (%arg3 , %r11, 1), \TMP1 + pxor \TMP1, %xmm\index + movdqu %xmm\index, (%arg2 , %r11, 1) + # write back plaintext/ciphertext for num_initial_blocks + add $16, %r11 + + movdqa SHUF_MASK(%rip), %xmm14 + PSHUFB_XMM %xmm14, %xmm\index + + # prepare plaintext/ciphertext for GHASH computation +.endr +.endif + GHASH_MUL %xmm\i, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 + # apply GHASH on num_initial_blocks blocks + +.if \i == 5 + pxor %xmm5, %xmm6 + GHASH_MUL %xmm6, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 + pxor %xmm6, %xmm7 + GHASH_MUL %xmm7, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 + pxor %xmm7, %xmm8 + GHASH_MUL %xmm8, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 +.elseif \i == 6 + pxor %xmm6, %xmm7 + GHASH_MUL %xmm7, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 + pxor %xmm7, %xmm8 + GHASH_MUL %xmm8, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 +.elseif \i == 7 + pxor %xmm7, %xmm8 + GHASH_MUL %xmm8, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 +.endif + cmp $64, %r13 + jl _initial_blocks_done\num_initial_blocks\operation + # no need for precomputed values +/* +* +* Precomputations for HashKey parallel with encryption of first 4 blocks. +* Haskey_i_k holds XORed values of the low and high parts of the Haskey_i +*/ + paddd ONE(%rip), \XMM0 # INCR Y0 + movdqa \XMM0, \XMM1 + movdqa SHUF_MASK(%rip), %xmm14 + PSHUFB_XMM %xmm14, \XMM1 # perform a 16 byte swap + + paddd ONE(%rip), \XMM0 # INCR Y0 + movdqa \XMM0, \XMM2 + movdqa SHUF_MASK(%rip), %xmm14 + PSHUFB_XMM %xmm14, \XMM2 # perform a 16 byte swap + + paddd ONE(%rip), \XMM0 # INCR Y0 + movdqa \XMM0, \XMM3 + movdqa SHUF_MASK(%rip), %xmm14 + PSHUFB_XMM %xmm14, \XMM3 # perform a 16 byte swap + + paddd ONE(%rip), \XMM0 # INCR Y0 + movdqa \XMM0, \XMM4 + movdqa SHUF_MASK(%rip), %xmm14 + PSHUFB_XMM %xmm14, \XMM4 # perform a 16 byte swap + + pxor 16*0(%arg1), \XMM1 + pxor 16*0(%arg1), \XMM2 + pxor 16*0(%arg1), \XMM3 + pxor 16*0(%arg1), \XMM4 + movdqa \TMP3, \TMP5 + pshufd $78, \TMP3, \TMP1 + pxor \TMP3, \TMP1 + movdqa \TMP1, HashKey_k(%rsp) + GHASH_MUL \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7 +# TMP5 = HashKey^2<<1 (mod poly) + movdqa \TMP5, HashKey_2(%rsp) +# HashKey_2 = HashKey^2<<1 (mod poly) + pshufd $78, \TMP5, \TMP1 + pxor \TMP5, \TMP1 + movdqa \TMP1, HashKey_2_k(%rsp) +.irpc index, 1234 # do 4 rounds + movaps 0x10*\index(%arg1), \TMP1 + AESENC \TMP1, \XMM1 + AESENC \TMP1, \XMM2 + AESENC \TMP1, \XMM3 + AESENC \TMP1, \XMM4 +.endr + GHASH_MUL \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7 +# TMP5 = HashKey^3<<1 (mod poly) + movdqa \TMP5, HashKey_3(%rsp) + pshufd $78, \TMP5, \TMP1 + pxor \TMP5, \TMP1 + movdqa \TMP1, HashKey_3_k(%rsp) +.irpc index, 56789 # do next 5 rounds + movaps 0x10*\index(%arg1), \TMP1 + AESENC \TMP1, \XMM1 + AESENC \TMP1, \XMM2 + AESENC \TMP1, \XMM3 + AESENC \TMP1, \XMM4 +.endr + GHASH_MUL \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7 +# TMP5 = HashKey^3<<1 (mod poly) + movdqa \TMP5, HashKey_4(%rsp) + pshufd $78, \TMP5, \TMP1 + pxor \TMP5, \TMP1 + movdqa \TMP1, HashKey_4_k(%rsp) + movaps 0xa0(%arg1), \TMP2 + AESENCLAST \TMP2, \XMM1 + AESENCLAST \TMP2, \XMM2 + AESENCLAST \TMP2, \XMM3 + AESENCLAST \TMP2, \XMM4 + movdqu 16*0(%arg3 , %r11 , 1), \TMP1 + pxor \TMP1, \XMM1 + movdqu 16*1(%arg3 , %r11 , 1), \TMP1 + pxor \TMP1, \XMM2 + movdqu 16*2(%arg3 , %r11 , 1), \TMP1 + pxor \TMP1, \XMM3 + movdqu 16*3(%arg3 , %r11 , 1), \TMP1 + pxor \TMP1, \XMM4 + movdqu \XMM1, 16*0(%arg2 , %r11 , 1) + movdqu \XMM2, 16*1(%arg2 , %r11 , 1) + movdqu \XMM3, 16*2(%arg2 , %r11 , 1) + movdqu \XMM4, 16*3(%arg2 , %r11 , 1) + + add $64, %r11 + movdqa SHUF_MASK(%rip), %xmm14 + PSHUFB_XMM %xmm14, \XMM1 # perform a 16 byte swap + pxor \XMMDst, \XMM1 +# combine GHASHed value with the corresponding ciphertext + movdqa SHUF_MASK(%rip), %xmm14 + PSHUFB_XMM %xmm14, \XMM2 # perform a 16 byte swap + movdqa SHUF_MASK(%rip), %xmm14 + PSHUFB_XMM %xmm14, \XMM3 # perform a 16 byte swap + movdqa SHUF_MASK(%rip), %xmm14 + PSHUFB_XMM %xmm14, \XMM4 # perform a 16 byte swap + +_initial_blocks_done\num_initial_blocks\operation: + +.endm + +/* +* encrypt 4 blocks at a time +* ghash the 4 previously encrypted ciphertext blocks +* arg1, %arg2, %arg3 are used as pointers only, not modified +* %r11 is the data offset value +*/ +.macro GHASH_4_ENCRYPT_4_PARALLEL_ENC TMP1 TMP2 TMP3 TMP4 TMP5 \ +TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation + + movdqa \XMM1, \XMM5 + movdqa \XMM2, \XMM6 + movdqa \XMM3, \XMM7 + movdqa \XMM4, \XMM8 + + movdqa SHUF_MASK(%rip), %xmm15 + # multiply TMP5 * HashKey using karatsuba + + movdqa \XMM5, \TMP4 + pshufd $78, \XMM5, \TMP6 + pxor \XMM5, \TMP6 + paddd ONE(%rip), \XMM0 # INCR CNT + movdqa HashKey_4(%rsp), \TMP5 + PCLMULQDQ 0x11, \TMP5, \TMP4 # TMP4 = a1*b1 + movdqa \XMM0, \XMM1 + paddd ONE(%rip), \XMM0 # INCR CNT + movdqa \XMM0, \XMM2 + paddd ONE(%rip), \XMM0 # INCR CNT + movdqa \XMM0, \XMM3 + paddd ONE(%rip), \XMM0 # INCR CNT + movdqa \XMM0, \XMM4 + PSHUFB_XMM %xmm15, \XMM1 # perform a 16 byte swap + PCLMULQDQ 0x00, \TMP5, \XMM5 # XMM5 = a0*b0 + PSHUFB_XMM %xmm15, \XMM2 # perform a 16 byte swap + PSHUFB_XMM %xmm15, \XMM3 # perform a 16 byte swap + PSHUFB_XMM %xmm15, \XMM4 # perform a 16 byte swap + + pxor (%arg1), \XMM1 + pxor (%arg1), \XMM2 + pxor (%arg1), \XMM3 + pxor (%arg1), \XMM4 + movdqa HashKey_4_k(%rsp), \TMP5 + PCLMULQDQ 0x00, \TMP5, \TMP6 # TMP6 = (a1+a0)*(b1+b0) + movaps 0x10(%arg1), \TMP1 + AESENC \TMP1, \XMM1 # Round 1 + AESENC \TMP1, \XMM2 + AESENC \TMP1, \XMM3 + AESENC \TMP1, \XMM4 + movaps 0x20(%arg1), \TMP1 + AESENC \TMP1, \XMM1 # Round 2 + AESENC \TMP1, \XMM2 + AESENC \TMP1, \XMM3 + AESENC \TMP1, \XMM4 + movdqa \XMM6, \TMP1 + pshufd $78, \XMM6, \TMP2 + pxor \XMM6, \TMP2 + movdqa HashKey_3(%rsp), \TMP5 + PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1 * b1 + movaps 0x30(%arg1), \TMP3 + AESENC \TMP3, \XMM1 # Round 3 + AESENC \TMP3, \XMM2 + AESENC \TMP3, \XMM3 + AESENC \TMP3, \XMM4 + PCLMULQDQ 0x00, \TMP5, \XMM6 # XMM6 = a0*b0 + movaps 0x40(%arg1), \TMP3 + AESENC \TMP3, \XMM1 # Round 4 + AESENC \TMP3, \XMM2 + AESENC \TMP3, \XMM3 + AESENC \TMP3, \XMM4 + movdqa HashKey_3_k(%rsp), \TMP5 + PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0) + movaps 0x50(%arg1), \TMP3 + AESENC \TMP3, \XMM1 # Round 5 + AESENC \TMP3, \XMM2 + AESENC \TMP3, \XMM3 + AESENC \TMP3, \XMM4 + pxor \TMP1, \TMP4 +# accumulate the results in TMP4:XMM5, TMP6 holds the middle part + pxor \XMM6, \XMM5 + pxor \TMP2, \TMP6 + movdqa \XMM7, \TMP1 + pshufd $78, \XMM7, \TMP2 + pxor \XMM7, \TMP2 + movdqa HashKey_2(%rsp ), \TMP5 + + # Multiply TMP5 * HashKey using karatsuba + + PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1*b1 + movaps 0x60(%arg1), \TMP3 + AESENC \TMP3, \XMM1 # Round 6 + AESENC \TMP3, \XMM2 + AESENC \TMP3, \XMM3 + AESENC \TMP3, \XMM4 + PCLMULQDQ 0x00, \TMP5, \XMM7 # XMM7 = a0*b0 + movaps 0x70(%arg1), \TMP3 + AESENC \TMP3, \XMM1 # Round 7 + AESENC \TMP3, \XMM2 + AESENC \TMP3, \XMM3 + AESENC \TMP3, \XMM4 + movdqa HashKey_2_k(%rsp), \TMP5 + PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0) + movaps 0x80(%arg1), \TMP3 + AESENC \TMP3, \XMM1 # Round 8 + AESENC \TMP3, \XMM2 + AESENC \TMP3, \XMM3 + AESENC \TMP3, \XMM4 + pxor \TMP1, \TMP4 +# accumulate the results in TMP4:XMM5, TMP6 holds the middle part + pxor \XMM7, \XMM5 + pxor \TMP2, \TMP6 + + # Multiply XMM8 * HashKey + # XMM8 and TMP5 hold the values for the two operands + + movdqa \XMM8, \TMP1 + pshufd $78, \XMM8, \TMP2 + pxor \XMM8, \TMP2 + movdqa HashKey(%rsp), \TMP5 + PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1*b1 + movaps 0x90(%arg1), \TMP3 + AESENC \TMP3, \XMM1 # Round 9 + AESENC \TMP3, \XMM2 + AESENC \TMP3, \XMM3 + AESENC \TMP3, \XMM4 + PCLMULQDQ 0x00, \TMP5, \XMM8 # XMM8 = a0*b0 + movaps 0xa0(%arg1), \TMP3 + AESENCLAST \TMP3, \XMM1 # Round 10 + AESENCLAST \TMP3, \XMM2 + AESENCLAST \TMP3, \XMM3 + AESENCLAST \TMP3, \XMM4 + movdqa HashKey_k(%rsp), \TMP5 + PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0) + movdqu (%arg3,%r11,1), \TMP3 + pxor \TMP3, \XMM1 # Ciphertext/Plaintext XOR EK + movdqu 16(%arg3,%r11,1), \TMP3 + pxor \TMP3, \XMM2 # Ciphertext/Plaintext XOR EK + movdqu 32(%arg3,%r11,1), \TMP3 + pxor \TMP3, \XMM3 # Ciphertext/Plaintext XOR EK + movdqu 48(%arg3,%r11,1), \TMP3 + pxor \TMP3, \XMM4 # Ciphertext/Plaintext XOR EK + movdqu \XMM1, (%arg2,%r11,1) # Write to the ciphertext buffer + movdqu \XMM2, 16(%arg2,%r11,1) # Write to the ciphertext buffer + movdqu \XMM3, 32(%arg2,%r11,1) # Write to the ciphertext buffer + movdqu \XMM4, 48(%arg2,%r11,1) # Write to the ciphertext buffer + PSHUFB_XMM %xmm15, \XMM1 # perform a 16 byte swap + PSHUFB_XMM %xmm15, \XMM2 # perform a 16 byte swap + PSHUFB_XMM %xmm15, \XMM3 # perform a 16 byte swap + PSHUFB_XMM %xmm15, \XMM4 # perform a 16 byte swap + + pxor \TMP4, \TMP1 + pxor \XMM8, \XMM5 + pxor \TMP6, \TMP2 + pxor \TMP1, \TMP2 + pxor \XMM5, \TMP2 + movdqa \TMP2, \TMP3 + pslldq $8, \TMP3 # left shift TMP3 2 DWs + psrldq $8, \TMP2 # right shift TMP2 2 DWs + pxor \TMP3, \XMM5 + pxor \TMP2, \TMP1 # accumulate the results in TMP1:XMM5 + + # first phase of reduction + + movdqa \XMM5, \TMP2 + movdqa \XMM5, \TMP3 + movdqa \XMM5, \TMP4 +# move XMM5 into TMP2, TMP3, TMP4 in order to perform shifts independently + pslld $31, \TMP2 # packed right shift << 31 + pslld $30, \TMP3 # packed right shift << 30 + pslld $25, \TMP4 # packed right shift << 25 + pxor \TMP3, \TMP2 # xor the shifted versions + pxor \TMP4, \TMP2 + movdqa \TMP2, \TMP5 + psrldq $4, \TMP5 # right shift T5 1 DW + pslldq $12, \TMP2 # left shift T2 3 DWs + pxor \TMP2, \XMM5 + + # second phase of reduction + + movdqa \XMM5,\TMP2 # make 3 copies of XMM5 into TMP2, TMP3, TMP4 + movdqa \XMM5,\TMP3 + movdqa \XMM5,\TMP4 + psrld $1, \TMP2 # packed left shift >>1 + psrld $2, \TMP3 # packed left shift >>2 + psrld $7, \TMP4 # packed left shift >>7 + pxor \TMP3,\TMP2 # xor the shifted versions + pxor \TMP4,\TMP2 + pxor \TMP5, \TMP2 + pxor \TMP2, \XMM5 + pxor \TMP1, \XMM5 # result is in TMP1 + + pxor \XMM5, \XMM1 +.endm + +/* +* decrypt 4 blocks at a time +* ghash the 4 previously decrypted ciphertext blocks +* arg1, %arg2, %arg3 are used as pointers only, not modified +* %r11 is the data offset value +*/ +.macro GHASH_4_ENCRYPT_4_PARALLEL_DEC TMP1 TMP2 TMP3 TMP4 TMP5 \ +TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation + + movdqa \XMM1, \XMM5 + movdqa \XMM2, \XMM6 + movdqa \XMM3, \XMM7 + movdqa \XMM4, \XMM8 + + movdqa SHUF_MASK(%rip), %xmm15 + # multiply TMP5 * HashKey using karatsuba + + movdqa \XMM5, \TMP4 + pshufd $78, \XMM5, \TMP6 + pxor \XMM5, \TMP6 + paddd ONE(%rip), \XMM0 # INCR CNT + movdqa HashKey_4(%rsp), \TMP5 + PCLMULQDQ 0x11, \TMP5, \TMP4 # TMP4 = a1*b1 + movdqa \XMM0, \XMM1 + paddd ONE(%rip), \XMM0 # INCR CNT + movdqa \XMM0, \XMM2 + paddd ONE(%rip), \XMM0 # INCR CNT + movdqa \XMM0, \XMM3 + paddd ONE(%rip), \XMM0 # INCR CNT + movdqa \XMM0, \XMM4 + PSHUFB_XMM %xmm15, \XMM1 # perform a 16 byte swap + PCLMULQDQ 0x00, \TMP5, \XMM5 # XMM5 = a0*b0 + PSHUFB_XMM %xmm15, \XMM2 # perform a 16 byte swap + PSHUFB_XMM %xmm15, \XMM3 # perform a 16 byte swap + PSHUFB_XMM %xmm15, \XMM4 # perform a 16 byte swap + + pxor (%arg1), \XMM1 + pxor (%arg1), \XMM2 + pxor (%arg1), \XMM3 + pxor (%arg1), \XMM4 + movdqa HashKey_4_k(%rsp), \TMP5 + PCLMULQDQ 0x00, \TMP5, \TMP6 # TMP6 = (a1+a0)*(b1+b0) + movaps 0x10(%arg1), \TMP1 + AESENC \TMP1, \XMM1 # Round 1 + AESENC \TMP1, \XMM2 + AESENC \TMP1, \XMM3 + AESENC \TMP1, \XMM4 + movaps 0x20(%arg1), \TMP1 + AESENC \TMP1, \XMM1 # Round 2 + AESENC \TMP1, \XMM2 + AESENC \TMP1, \XMM3 + AESENC \TMP1, \XMM4 + movdqa \XMM6, \TMP1 + pshufd $78, \XMM6, \TMP2 + pxor \XMM6, \TMP2 + movdqa HashKey_3(%rsp), \TMP5 + PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1 * b1 + movaps 0x30(%arg1), \TMP3 + AESENC \TMP3, \XMM1 # Round 3 + AESENC \TMP3, \XMM2 + AESENC \TMP3, \XMM3 + AESENC \TMP3, \XMM4 + PCLMULQDQ 0x00, \TMP5, \XMM6 # XMM6 = a0*b0 + movaps 0x40(%arg1), \TMP3 + AESENC \TMP3, \XMM1 # Round 4 + AESENC \TMP3, \XMM2 + AESENC \TMP3, \XMM3 + AESENC \TMP3, \XMM4 + movdqa HashKey_3_k(%rsp), \TMP5 + PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0) + movaps 0x50(%arg1), \TMP3 + AESENC \TMP3, \XMM1 # Round 5 + AESENC \TMP3, \XMM2 + AESENC \TMP3, \XMM3 + AESENC \TMP3, \XMM4 + pxor \TMP1, \TMP4 +# accumulate the results in TMP4:XMM5, TMP6 holds the middle part + pxor \XMM6, \XMM5 + pxor \TMP2, \TMP6 + movdqa \XMM7, \TMP1 + pshufd $78, \XMM7, \TMP2 + pxor \XMM7, \TMP2 + movdqa HashKey_2(%rsp ), \TMP5 + + # Multiply TMP5 * HashKey using karatsuba + + PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1*b1 + movaps 0x60(%arg1), \TMP3 + AESENC \TMP3, \XMM1 # Round 6 + AESENC \TMP3, \XMM2 + AESENC \TMP3, \XMM3 + AESENC \TMP3, \XMM4 + PCLMULQDQ 0x00, \TMP5, \XMM7 # XMM7 = a0*b0 + movaps 0x70(%arg1), \TMP3 + AESENC \TMP3, \XMM1 # Round 7 + AESENC \TMP3, \XMM2 + AESENC \TMP3, \XMM3 + AESENC \TMP3, \XMM4 + movdqa HashKey_2_k(%rsp), \TMP5 + PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0) + movaps 0x80(%arg1), \TMP3 + AESENC \TMP3, \XMM1 # Round 8 + AESENC \TMP3, \XMM2 + AESENC \TMP3, \XMM3 + AESENC \TMP3, \XMM4 + pxor \TMP1, \TMP4 +# accumulate the results in TMP4:XMM5, TMP6 holds the middle part + pxor \XMM7, \XMM5 + pxor \TMP2, \TMP6 + + # Multiply XMM8 * HashKey + # XMM8 and TMP5 hold the values for the two operands + + movdqa \XMM8, \TMP1 + pshufd $78, \XMM8, \TMP2 + pxor \XMM8, \TMP2 + movdqa HashKey(%rsp), \TMP5 + PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1*b1 + movaps 0x90(%arg1), \TMP3 + AESENC \TMP3, \XMM1 # Round 9 + AESENC \TMP3, \XMM2 + AESENC \TMP3, \XMM3 + AESENC \TMP3, \XMM4 + PCLMULQDQ 0x00, \TMP5, \XMM8 # XMM8 = a0*b0 + movaps 0xa0(%arg1), \TMP3 + AESENCLAST \TMP3, \XMM1 # Round 10 + AESENCLAST \TMP3, \XMM2 + AESENCLAST \TMP3, \XMM3 + AESENCLAST \TMP3, \XMM4 + movdqa HashKey_k(%rsp), \TMP5 + PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0) + movdqu (%arg3,%r11,1), \TMP3 + pxor \TMP3, \XMM1 # Ciphertext/Plaintext XOR EK + movdqu \XMM1, (%arg2,%r11,1) # Write to plaintext buffer + movdqa \TMP3, \XMM1 + movdqu 16(%arg3,%r11,1), \TMP3 + pxor \TMP3, \XMM2 # Ciphertext/Plaintext XOR EK + movdqu \XMM2, 16(%arg2,%r11,1) # Write to plaintext buffer + movdqa \TMP3, \XMM2 + movdqu 32(%arg3,%r11,1), \TMP3 + pxor \TMP3, \XMM3 # Ciphertext/Plaintext XOR EK + movdqu \XMM3, 32(%arg2,%r11,1) # Write to plaintext buffer + movdqa \TMP3, \XMM3 + movdqu 48(%arg3,%r11,1), \TMP3 + pxor \TMP3, \XMM4 # Ciphertext/Plaintext XOR EK + movdqu \XMM4, 48(%arg2,%r11,1) # Write to plaintext buffer + movdqa \TMP3, \XMM4 + PSHUFB_XMM %xmm15, \XMM1 # perform a 16 byte swap + PSHUFB_XMM %xmm15, \XMM2 # perform a 16 byte swap + PSHUFB_XMM %xmm15, \XMM3 # perform a 16 byte swap + PSHUFB_XMM %xmm15, \XMM4 # perform a 16 byte swap + + pxor \TMP4, \TMP1 + pxor \XMM8, \XMM5 + pxor \TMP6, \TMP2 + pxor \TMP1, \TMP2 + pxor \XMM5, \TMP2 + movdqa \TMP2, \TMP3 + pslldq $8, \TMP3 # left shift TMP3 2 DWs + psrldq $8, \TMP2 # right shift TMP2 2 DWs + pxor \TMP3, \XMM5 + pxor \TMP2, \TMP1 # accumulate the results in TMP1:XMM5 + + # first phase of reduction + + movdqa \XMM5, \TMP2 + movdqa \XMM5, \TMP3 + movdqa \XMM5, \TMP4 +# move XMM5 into TMP2, TMP3, TMP4 in order to perform shifts independently + pslld $31, \TMP2 # packed right shift << 31 + pslld $30, \TMP3 # packed right shift << 30 + pslld $25, \TMP4 # packed right shift << 25 + pxor \TMP3, \TMP2 # xor the shifted versions + pxor \TMP4, \TMP2 + movdqa \TMP2, \TMP5 + psrldq $4, \TMP5 # right shift T5 1 DW + pslldq $12, \TMP2 # left shift T2 3 DWs + pxor \TMP2, \XMM5 + + # second phase of reduction + + movdqa \XMM5,\TMP2 # make 3 copies of XMM5 into TMP2, TMP3, TMP4 + movdqa \XMM5,\TMP3 + movdqa \XMM5,\TMP4 + psrld $1, \TMP2 # packed left shift >>1 + psrld $2, \TMP3 # packed left shift >>2 + psrld $7, \TMP4 # packed left shift >>7 + pxor \TMP3,\TMP2 # xor the shifted versions + pxor \TMP4,\TMP2 + pxor \TMP5, \TMP2 + pxor \TMP2, \XMM5 + pxor \TMP1, \XMM5 # result is in TMP1 + + pxor \XMM5, \XMM1 +.endm + +/* GHASH the last 4 ciphertext blocks. */ +.macro GHASH_LAST_4 TMP1 TMP2 TMP3 TMP4 TMP5 TMP6 \ +TMP7 XMM1 XMM2 XMM3 XMM4 XMMDst + + # Multiply TMP6 * HashKey (using Karatsuba) + + movdqa \XMM1, \TMP6 + pshufd $78, \XMM1, \TMP2 + pxor \XMM1, \TMP2 + movdqa HashKey_4(%rsp), \TMP5 + PCLMULQDQ 0x11, \TMP5, \TMP6 # TMP6 = a1*b1 + PCLMULQDQ 0x00, \TMP5, \XMM1 # XMM1 = a0*b0 + movdqa HashKey_4_k(%rsp), \TMP4 + PCLMULQDQ 0x00, \TMP4, \TMP2 # TMP2 = (a1+a0)*(b1+b0) + movdqa \XMM1, \XMMDst + movdqa \TMP2, \XMM1 # result in TMP6, XMMDst, XMM1 + + # Multiply TMP1 * HashKey (using Karatsuba) + + movdqa \XMM2, \TMP1 + pshufd $78, \XMM2, \TMP2 + pxor \XMM2, \TMP2 + movdqa HashKey_3(%rsp), \TMP5 + PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1*b1 + PCLMULQDQ 0x00, \TMP5, \XMM2 # XMM2 = a0*b0 + movdqa HashKey_3_k(%rsp), \TMP4 + PCLMULQDQ 0x00, \TMP4, \TMP2 # TMP2 = (a1+a0)*(b1+b0) + pxor \TMP1, \TMP6 + pxor \XMM2, \XMMDst + pxor \TMP2, \XMM1 +# results accumulated in TMP6, XMMDst, XMM1 + + # Multiply TMP1 * HashKey (using Karatsuba) + + movdqa \XMM3, \TMP1 + pshufd $78, \XMM3, \TMP2 + pxor \XMM3, \TMP2 + movdqa HashKey_2(%rsp), \TMP5 + PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1*b1 + PCLMULQDQ 0x00, \TMP5, \XMM3 # XMM3 = a0*b0 + movdqa HashKey_2_k(%rsp), \TMP4 + PCLMULQDQ 0x00, \TMP4, \TMP2 # TMP2 = (a1+a0)*(b1+b0) + pxor \TMP1, \TMP6 + pxor \XMM3, \XMMDst + pxor \TMP2, \XMM1 # results accumulated in TMP6, XMMDst, XMM1 + + # Multiply TMP1 * HashKey (using Karatsuba) + movdqa \XMM4, \TMP1 + pshufd $78, \XMM4, \TMP2 + pxor \XMM4, \TMP2 + movdqa HashKey(%rsp), \TMP5 + PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1*b1 + PCLMULQDQ 0x00, \TMP5, \XMM4 # XMM4 = a0*b0 + movdqa HashKey_k(%rsp), \TMP4 + PCLMULQDQ 0x00, \TMP4, \TMP2 # TMP2 = (a1+a0)*(b1+b0) + pxor \TMP1, \TMP6 + pxor \XMM4, \XMMDst + pxor \XMM1, \TMP2 + pxor \TMP6, \TMP2 + pxor \XMMDst, \TMP2 + # middle section of the temp results combined as in karatsuba algorithm + movdqa \TMP2, \TMP4 + pslldq $8, \TMP4 # left shift TMP4 2 DWs + psrldq $8, \TMP2 # right shift TMP2 2 DWs + pxor \TMP4, \XMMDst + pxor \TMP2, \TMP6 +# TMP6:XMMDst holds the result of the accumulated carry-less multiplications + # first phase of the reduction + movdqa \XMMDst, \TMP2 + movdqa \XMMDst, \TMP3 + movdqa \XMMDst, \TMP4 +# move XMMDst into TMP2, TMP3, TMP4 in order to perform 3 shifts independently + pslld $31, \TMP2 # packed right shifting << 31 + pslld $30, \TMP3 # packed right shifting << 30 + pslld $25, \TMP4 # packed right shifting << 25 + pxor \TMP3, \TMP2 # xor the shifted versions + pxor \TMP4, \TMP2 + movdqa \TMP2, \TMP7 + psrldq $4, \TMP7 # right shift TMP7 1 DW + pslldq $12, \TMP2 # left shift TMP2 3 DWs + pxor \TMP2, \XMMDst + + # second phase of the reduction + movdqa \XMMDst, \TMP2 + # make 3 copies of XMMDst for doing 3 shift operations + movdqa \XMMDst, \TMP3 + movdqa \XMMDst, \TMP4 + psrld $1, \TMP2 # packed left shift >> 1 + psrld $2, \TMP3 # packed left shift >> 2 + psrld $7, \TMP4 # packed left shift >> 7 + pxor \TMP3, \TMP2 # xor the shifted versions + pxor \TMP4, \TMP2 + pxor \TMP7, \TMP2 + pxor \TMP2, \XMMDst + pxor \TMP6, \XMMDst # reduced result is in XMMDst +.endm + +/* Encryption of a single block done*/ +.macro ENCRYPT_SINGLE_BLOCK XMM0 TMP1 + + pxor (%arg1), \XMM0 + movaps 16(%arg1), \TMP1 + AESENC \TMP1, \XMM0 + movaps 32(%arg1), \TMP1 + AESENC \TMP1, \XMM0 + movaps 48(%arg1), \TMP1 + AESENC \TMP1, \XMM0 + movaps 64(%arg1), \TMP1 + AESENC \TMP1, \XMM0 + movaps 80(%arg1), \TMP1 + AESENC \TMP1, \XMM0 + movaps 96(%arg1), \TMP1 + AESENC \TMP1, \XMM0 + movaps 112(%arg1), \TMP1 + AESENC \TMP1, \XMM0 + movaps 128(%arg1), \TMP1 + AESENC \TMP1, \XMM0 + movaps 144(%arg1), \TMP1 + AESENC \TMP1, \XMM0 + movaps 160(%arg1), \TMP1 + AESENCLAST \TMP1, \XMM0 +.endm + + +/***************************************************************************** +* void aesni_gcm_dec(void *aes_ctx, // AES Key schedule. Starts on a 16 byte boundary. +* u8 *out, // Plaintext output. Encrypt in-place is allowed. +* const u8 *in, // Ciphertext input +* u64 plaintext_len, // Length of data in bytes for decryption. +* u8 *iv, // Pre-counter block j0: 4 byte salt (from Security Association) +* // concatenated with 8 byte Initialisation Vector (from IPSec ESP Payload) +* // concatenated with 0x00000001. 16-byte aligned pointer. +* u8 *hash_subkey, // H, the Hash sub key input. Data starts on a 16-byte boundary. +* const u8 *aad, // Additional Authentication Data (AAD) +* u64 aad_len, // Length of AAD in bytes. With RFC4106 this is going to be 8 or 12 bytes +* u8 *auth_tag, // Authenticated Tag output. The driver will compare this to the +* // given authentication tag and only return the plaintext if they match. +* u64 auth_tag_len); // Authenticated Tag Length in bytes. Valid values are 16 +* // (most likely), 12 or 8. +* +* Assumptions: +* +* keys: +* keys are pre-expanded and aligned to 16 bytes. we are using the first +* set of 11 keys in the data structure void *aes_ctx +* +* iv: +* 0 1 2 3 +* 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 +* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +* | Salt (From the SA) | +* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +* | Initialization Vector | +* | (This is the sequence number from IPSec header) | +* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +* | 0x1 | +* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +* +* +* +* AAD: +* AAD padded to 128 bits with 0 +* for example, assume AAD is a u32 vector +* +* if AAD is 8 bytes: +* AAD[3] = {A0, A1}; +* padded AAD in xmm register = {A1 A0 0 0} +* +* 0 1 2 3 +* 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 +* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +* | SPI (A1) | +* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +* | 32-bit Sequence Number (A0) | +* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +* | 0x0 | +* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +* +* AAD Format with 32-bit Sequence Number +* +* if AAD is 12 bytes: +* AAD[3] = {A0, A1, A2}; +* padded AAD in xmm register = {A2 A1 A0 0} +* +* 0 1 2 3 +* 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 +* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +* 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 +* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +* | SPI (A2) | +* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +* | 64-bit Extended Sequence Number {A1,A0} | +* | | +* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +* | 0x0 | +* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +* +* AAD Format with 64-bit Extended Sequence Number +* +* aadLen: +* from the definition of the spec, aadLen can only be 8 or 12 bytes. +* The code supports 16 too but for other sizes, the code will fail. +* +* TLen: +* from the definition of the spec, TLen can only be 8, 12 or 16 bytes. +* For other sizes, the code will fail. +* +* poly = x^128 + x^127 + x^126 + x^121 + 1 +* +*****************************************************************************/ + +ENTRY(aesni_gcm_dec) + push %r12 + push %r13 + push %r14 + mov %rsp, %r14 +/* +* states of %xmm registers %xmm6:%xmm15 not saved +* all %xmm registers are clobbered +*/ + sub $VARIABLE_OFFSET, %rsp + and $~63, %rsp # align rsp to 64 bytes + mov %arg6, %r12 + movdqu (%r12), %xmm13 # %xmm13 = HashKey + movdqa SHUF_MASK(%rip), %xmm2 + PSHUFB_XMM %xmm2, %xmm13 + + +# Precompute HashKey<<1 (mod poly) from the hash key (required for GHASH) + + movdqa %xmm13, %xmm2 + psllq $1, %xmm13 + psrlq $63, %xmm2 + movdqa %xmm2, %xmm1 + pslldq $8, %xmm2 + psrldq $8, %xmm1 + por %xmm2, %xmm13 + + # Reduction + + pshufd $0x24, %xmm1, %xmm2 + pcmpeqd TWOONE(%rip), %xmm2 + pand POLY(%rip), %xmm2 + pxor %xmm2, %xmm13 # %xmm13 holds the HashKey<<1 (mod poly) + + + # Decrypt first few blocks + + movdqa %xmm13, HashKey(%rsp) # store HashKey<<1 (mod poly) + mov %arg4, %r13 # save the number of bytes of plaintext/ciphertext + and $-16, %r13 # %r13 = %r13 - (%r13 mod 16) + mov %r13, %r12 + and $(3<<4), %r12 + jz _initial_num_blocks_is_0_decrypt + cmp $(2<<4), %r12 + jb _initial_num_blocks_is_1_decrypt + je _initial_num_blocks_is_2_decrypt +_initial_num_blocks_is_3_decrypt: + INITIAL_BLOCKS_DEC 3, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ +%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 5, 678, dec + sub $48, %r13 + jmp _initial_blocks_decrypted +_initial_num_blocks_is_2_decrypt: + INITIAL_BLOCKS_DEC 2, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ +%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 6, 78, dec + sub $32, %r13 + jmp _initial_blocks_decrypted +_initial_num_blocks_is_1_decrypt: + INITIAL_BLOCKS_DEC 1, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ +%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 7, 8, dec + sub $16, %r13 + jmp _initial_blocks_decrypted +_initial_num_blocks_is_0_decrypt: + INITIAL_BLOCKS_DEC 0, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ +%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 8, 0, dec +_initial_blocks_decrypted: + cmp $0, %r13 + je _zero_cipher_left_decrypt + sub $64, %r13 + je _four_cipher_left_decrypt +_decrypt_by_4: + GHASH_4_ENCRYPT_4_PARALLEL_DEC %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, \ +%xmm14, %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, dec + add $64, %r11 + sub $64, %r13 + jne _decrypt_by_4 +_four_cipher_left_decrypt: + GHASH_LAST_4 %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, \ +%xmm15, %xmm1, %xmm2, %xmm3, %xmm4, %xmm8 +_zero_cipher_left_decrypt: + mov %arg4, %r13 + and $15, %r13 # %r13 = arg4 (mod 16) + je _multiple_of_16_bytes_decrypt + + # Handle the last <16 byte block separately + + paddd ONE(%rip), %xmm0 # increment CNT to get Yn + movdqa SHUF_MASK(%rip), %xmm10 + PSHUFB_XMM %xmm10, %xmm0 + + ENCRYPT_SINGLE_BLOCK %xmm0, %xmm1 # E(K, Yn) + sub $16, %r11 + add %r13, %r11 + movdqu (%arg3,%r11,1), %xmm1 # receive the last <16 byte block + lea SHIFT_MASK+16(%rip), %r12 + sub %r13, %r12 +# adjust the shuffle mask pointer to be able to shift 16-%r13 bytes +# (%r13 is the number of bytes in plaintext mod 16) + movdqu (%r12), %xmm2 # get the appropriate shuffle mask + PSHUFB_XMM %xmm2, %xmm1 # right shift 16-%r13 butes + + movdqa %xmm1, %xmm2 + pxor %xmm1, %xmm0 # Ciphertext XOR E(K, Yn) + movdqu ALL_F-SHIFT_MASK(%r12), %xmm1 + # get the appropriate mask to mask out top 16-%r13 bytes of %xmm0 + pand %xmm1, %xmm0 # mask out top 16-%r13 bytes of %xmm0 + pand %xmm1, %xmm2 + movdqa SHUF_MASK(%rip), %xmm10 + PSHUFB_XMM %xmm10 ,%xmm2 + + pxor %xmm2, %xmm8 + GHASH_MUL %xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6 + # GHASH computation for the last <16 byte block + sub %r13, %r11 + add $16, %r11 + + # output %r13 bytes + MOVQ_R64_XMM %xmm0, %rax + cmp $8, %r13 + jle _less_than_8_bytes_left_decrypt + mov %rax, (%arg2 , %r11, 1) + add $8, %r11 + psrldq $8, %xmm0 + MOVQ_R64_XMM %xmm0, %rax + sub $8, %r13 +_less_than_8_bytes_left_decrypt: + mov %al, (%arg2, %r11, 1) + add $1, %r11 + shr $8, %rax + sub $1, %r13 + jne _less_than_8_bytes_left_decrypt +_multiple_of_16_bytes_decrypt: + mov arg8, %r12 # %r13 = aadLen (number of bytes) + shl $3, %r12 # convert into number of bits + movd %r12d, %xmm15 # len(A) in %xmm15 + shl $3, %arg4 # len(C) in bits (*128) + MOVQ_R64_XMM %arg4, %xmm1 + pslldq $8, %xmm15 # %xmm15 = len(A)||0x0000000000000000 + pxor %xmm1, %xmm15 # %xmm15 = len(A)||len(C) + pxor %xmm15, %xmm8 + GHASH_MUL %xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6 + # final GHASH computation + movdqa SHUF_MASK(%rip), %xmm10 + PSHUFB_XMM %xmm10, %xmm8 + + mov %arg5, %rax # %rax = *Y0 + movdqu (%rax), %xmm0 # %xmm0 = Y0 + ENCRYPT_SINGLE_BLOCK %xmm0, %xmm1 # E(K, Y0) + pxor %xmm8, %xmm0 +_return_T_decrypt: + mov arg9, %r10 # %r10 = authTag + mov arg10, %r11 # %r11 = auth_tag_len + cmp $16, %r11 + je _T_16_decrypt + cmp $12, %r11 + je _T_12_decrypt +_T_8_decrypt: + MOVQ_R64_XMM %xmm0, %rax + mov %rax, (%r10) + jmp _return_T_done_decrypt +_T_12_decrypt: + MOVQ_R64_XMM %xmm0, %rax + mov %rax, (%r10) + psrldq $8, %xmm0 + movd %xmm0, %eax + mov %eax, 8(%r10) + jmp _return_T_done_decrypt +_T_16_decrypt: + movdqu %xmm0, (%r10) +_return_T_done_decrypt: + mov %r14, %rsp + pop %r14 + pop %r13 + pop %r12 + ret + + +/***************************************************************************** +* void aesni_gcm_enc(void *aes_ctx, // AES Key schedule. Starts on a 16 byte boundary. +* u8 *out, // Ciphertext output. Encrypt in-place is allowed. +* const u8 *in, // Plaintext input +* u64 plaintext_len, // Length of data in bytes for encryption. +* u8 *iv, // Pre-counter block j0: 4 byte salt (from Security Association) +* // concatenated with 8 byte Initialisation Vector (from IPSec ESP Payload) +* // concatenated with 0x00000001. 16-byte aligned pointer. +* u8 *hash_subkey, // H, the Hash sub key input. Data starts on a 16-byte boundary. +* const u8 *aad, // Additional Authentication Data (AAD) +* u64 aad_len, // Length of AAD in bytes. With RFC4106 this is going to be 8 or 12 bytes +* u8 *auth_tag, // Authenticated Tag output. +* u64 auth_tag_len); // Authenticated Tag Length in bytes. Valid values are 16 (most likely), +* // 12 or 8. +* +* Assumptions: +* +* keys: +* keys are pre-expanded and aligned to 16 bytes. we are using the +* first set of 11 keys in the data structure void *aes_ctx +* +* +* iv: +* 0 1 2 3 +* 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 +* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +* | Salt (From the SA) | +* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +* | Initialization Vector | +* | (This is the sequence number from IPSec header) | +* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +* | 0x1 | +* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +* +* +* +* AAD: +* AAD padded to 128 bits with 0 +* for example, assume AAD is a u32 vector +* +* if AAD is 8 bytes: +* AAD[3] = {A0, A1}; +* padded AAD in xmm register = {A1 A0 0 0} +* +* 0 1 2 3 +* 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 +* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +* | SPI (A1) | +* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +* | 32-bit Sequence Number (A0) | +* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +* | 0x0 | +* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +* +* AAD Format with 32-bit Sequence Number +* +* if AAD is 12 bytes: +* AAD[3] = {A0, A1, A2}; +* padded AAD in xmm register = {A2 A1 A0 0} +* +* 0 1 2 3 +* 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 +* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +* | SPI (A2) | +* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +* | 64-bit Extended Sequence Number {A1,A0} | +* | | +* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +* | 0x0 | +* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +* +* AAD Format with 64-bit Extended Sequence Number +* +* aadLen: +* from the definition of the spec, aadLen can only be 8 or 12 bytes. +* The code supports 16 too but for other sizes, the code will fail. +* +* TLen: +* from the definition of the spec, TLen can only be 8, 12 or 16 bytes. +* For other sizes, the code will fail. +* +* poly = x^128 + x^127 + x^126 + x^121 + 1 +***************************************************************************/ +ENTRY(aesni_gcm_enc) + push %r12 + push %r13 + push %r14 + mov %rsp, %r14 +# +# states of %xmm registers %xmm6:%xmm15 not saved +# all %xmm registers are clobbered +# + sub $VARIABLE_OFFSET, %rsp + and $~63, %rsp + mov %arg6, %r12 + movdqu (%r12), %xmm13 + movdqa SHUF_MASK(%rip), %xmm2 + PSHUFB_XMM %xmm2, %xmm13 + + +# precompute HashKey<<1 mod poly from the HashKey (required for GHASH) + + movdqa %xmm13, %xmm2 + psllq $1, %xmm13 + psrlq $63, %xmm2 + movdqa %xmm2, %xmm1 + pslldq $8, %xmm2 + psrldq $8, %xmm1 + por %xmm2, %xmm13 + + # reduce HashKey<<1 + + pshufd $0x24, %xmm1, %xmm2 + pcmpeqd TWOONE(%rip), %xmm2 + pand POLY(%rip), %xmm2 + pxor %xmm2, %xmm13 + movdqa %xmm13, HashKey(%rsp) + mov %arg4, %r13 # %xmm13 holds HashKey<<1 (mod poly) + and $-16, %r13 + mov %r13, %r12 + + # Encrypt first few blocks + + and $(3<<4), %r12 + jz _initial_num_blocks_is_0_encrypt + cmp $(2<<4), %r12 + jb _initial_num_blocks_is_1_encrypt + je _initial_num_blocks_is_2_encrypt +_initial_num_blocks_is_3_encrypt: + INITIAL_BLOCKS_ENC 3, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ +%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 5, 678, enc + sub $48, %r13 + jmp _initial_blocks_encrypted +_initial_num_blocks_is_2_encrypt: + INITIAL_BLOCKS_ENC 2, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ +%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 6, 78, enc + sub $32, %r13 + jmp _initial_blocks_encrypted +_initial_num_blocks_is_1_encrypt: + INITIAL_BLOCKS_ENC 1, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ +%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 7, 8, enc + sub $16, %r13 + jmp _initial_blocks_encrypted +_initial_num_blocks_is_0_encrypt: + INITIAL_BLOCKS_ENC 0, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ +%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 8, 0, enc +_initial_blocks_encrypted: + + # Main loop - Encrypt remaining blocks + + cmp $0, %r13 + je _zero_cipher_left_encrypt + sub $64, %r13 + je _four_cipher_left_encrypt +_encrypt_by_4_encrypt: + GHASH_4_ENCRYPT_4_PARALLEL_ENC %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, \ +%xmm14, %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, enc + add $64, %r11 + sub $64, %r13 + jne _encrypt_by_4_encrypt +_four_cipher_left_encrypt: + GHASH_LAST_4 %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, \ +%xmm15, %xmm1, %xmm2, %xmm3, %xmm4, %xmm8 +_zero_cipher_left_encrypt: + mov %arg4, %r13 + and $15, %r13 # %r13 = arg4 (mod 16) + je _multiple_of_16_bytes_encrypt + + # Handle the last <16 Byte block separately + paddd ONE(%rip), %xmm0 # INCR CNT to get Yn + movdqa SHUF_MASK(%rip), %xmm10 + PSHUFB_XMM %xmm10, %xmm0 + + + ENCRYPT_SINGLE_BLOCK %xmm0, %xmm1 # Encrypt(K, Yn) + sub $16, %r11 + add %r13, %r11 + movdqu (%arg3,%r11,1), %xmm1 # receive the last <16 byte blocks + lea SHIFT_MASK+16(%rip), %r12 + sub %r13, %r12 + # adjust the shuffle mask pointer to be able to shift 16-r13 bytes + # (%r13 is the number of bytes in plaintext mod 16) + movdqu (%r12), %xmm2 # get the appropriate shuffle mask + PSHUFB_XMM %xmm2, %xmm1 # shift right 16-r13 byte + pxor %xmm1, %xmm0 # Plaintext XOR Encrypt(K, Yn) + movdqu ALL_F-SHIFT_MASK(%r12), %xmm1 + # get the appropriate mask to mask out top 16-r13 bytes of xmm0 + pand %xmm1, %xmm0 # mask out top 16-r13 bytes of xmm0 + movdqa SHUF_MASK(%rip), %xmm10 + PSHUFB_XMM %xmm10,%xmm0 + + pxor %xmm0, %xmm8 + GHASH_MUL %xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6 + # GHASH computation for the last <16 byte block + sub %r13, %r11 + add $16, %r11 + + movdqa SHUF_MASK(%rip), %xmm10 + PSHUFB_XMM %xmm10, %xmm0 + + # shuffle xmm0 back to output as ciphertext + + # Output %r13 bytes + MOVQ_R64_XMM %xmm0, %rax + cmp $8, %r13 + jle _less_than_8_bytes_left_encrypt + mov %rax, (%arg2 , %r11, 1) + add $8, %r11 + psrldq $8, %xmm0 + MOVQ_R64_XMM %xmm0, %rax + sub $8, %r13 +_less_than_8_bytes_left_encrypt: + mov %al, (%arg2, %r11, 1) + add $1, %r11 + shr $8, %rax + sub $1, %r13 + jne _less_than_8_bytes_left_encrypt +_multiple_of_16_bytes_encrypt: + mov arg8, %r12 # %r12 = addLen (number of bytes) + shl $3, %r12 + movd %r12d, %xmm15 # len(A) in %xmm15 + shl $3, %arg4 # len(C) in bits (*128) + MOVQ_R64_XMM %arg4, %xmm1 + pslldq $8, %xmm15 # %xmm15 = len(A)||0x0000000000000000 + pxor %xmm1, %xmm15 # %xmm15 = len(A)||len(C) + pxor %xmm15, %xmm8 + GHASH_MUL %xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6 + # final GHASH computation + movdqa SHUF_MASK(%rip), %xmm10 + PSHUFB_XMM %xmm10, %xmm8 # perform a 16 byte swap + + mov %arg5, %rax # %rax = *Y0 + movdqu (%rax), %xmm0 # %xmm0 = Y0 + ENCRYPT_SINGLE_BLOCK %xmm0, %xmm15 # Encrypt(K, Y0) + pxor %xmm8, %xmm0 +_return_T_encrypt: + mov arg9, %r10 # %r10 = authTag + mov arg10, %r11 # %r11 = auth_tag_len + cmp $16, %r11 + je _T_16_encrypt + cmp $12, %r11 + je _T_12_encrypt +_T_8_encrypt: + MOVQ_R64_XMM %xmm0, %rax + mov %rax, (%r10) + jmp _return_T_done_encrypt +_T_12_encrypt: + MOVQ_R64_XMM %xmm0, %rax + mov %rax, (%r10) + psrldq $8, %xmm0 + movd %xmm0, %eax + mov %eax, 8(%r10) + jmp _return_T_done_encrypt +_T_16_encrypt: + movdqu %xmm0, (%r10) +_return_T_done_encrypt: + mov %r14, %rsp + pop %r14 + pop %r13 + pop %r12 + ret + +#endif + + +_key_expansion_128: +_key_expansion_256a: + pshufd $0b11111111, %xmm1, %xmm1 + shufps $0b00010000, %xmm0, %xmm4 + pxor %xmm4, %xmm0 + shufps $0b10001100, %xmm0, %xmm4 + pxor %xmm4, %xmm0 + pxor %xmm1, %xmm0 + movaps %xmm0, (TKEYP) + add $0x10, TKEYP + ret + +.align 4 +_key_expansion_192a: + pshufd $0b01010101, %xmm1, %xmm1 + shufps $0b00010000, %xmm0, %xmm4 + pxor %xmm4, %xmm0 + shufps $0b10001100, %xmm0, %xmm4 + pxor %xmm4, %xmm0 + pxor %xmm1, %xmm0 + + movaps %xmm2, %xmm5 + movaps %xmm2, %xmm6 + pslldq $4, %xmm5 + pshufd $0b11111111, %xmm0, %xmm3 + pxor %xmm3, %xmm2 + pxor %xmm5, %xmm2 + + movaps %xmm0, %xmm1 + shufps $0b01000100, %xmm0, %xmm6 + movaps %xmm6, (TKEYP) + shufps $0b01001110, %xmm2, %xmm1 + movaps %xmm1, 0x10(TKEYP) + add $0x20, TKEYP + ret + +.align 4 +_key_expansion_192b: + pshufd $0b01010101, %xmm1, %xmm1 + shufps $0b00010000, %xmm0, %xmm4 + pxor %xmm4, %xmm0 + shufps $0b10001100, %xmm0, %xmm4 + pxor %xmm4, %xmm0 + pxor %xmm1, %xmm0 + + movaps %xmm2, %xmm5 + pslldq $4, %xmm5 + pshufd $0b11111111, %xmm0, %xmm3 + pxor %xmm3, %xmm2 + pxor %xmm5, %xmm2 + + movaps %xmm0, (TKEYP) + add $0x10, TKEYP + ret + +.align 4 +_key_expansion_256b: + pshufd $0b10101010, %xmm1, %xmm1 + shufps $0b00010000, %xmm2, %xmm4 + pxor %xmm4, %xmm2 + shufps $0b10001100, %xmm2, %xmm4 + pxor %xmm4, %xmm2 + pxor %xmm1, %xmm2 + movaps %xmm2, (TKEYP) + add $0x10, TKEYP + ret + +/* + * int aesni_set_key(struct crypto_aes_ctx *ctx, const u8 *in_key, + * unsigned int key_len) + */ +ENTRY(aesni_set_key) +#ifndef __x86_64__ + pushl KEYP + movl 8(%esp), KEYP # ctx + movl 12(%esp), UKEYP # in_key + movl 16(%esp), %edx # key_len +#endif + movups (UKEYP), %xmm0 # user key (first 16 bytes) + movaps %xmm0, (KEYP) + lea 0x10(KEYP), TKEYP # key addr + movl %edx, 480(KEYP) + pxor %xmm4, %xmm4 # xmm4 is assumed 0 in _key_expansion_x + cmp $24, %dl + jb .Lenc_key128 + je .Lenc_key192 + movups 0x10(UKEYP), %xmm2 # other user key + movaps %xmm2, (TKEYP) + add $0x10, TKEYP + AESKEYGENASSIST 0x1 %xmm2 %xmm1 # round 1 + call _key_expansion_256a + AESKEYGENASSIST 0x1 %xmm0 %xmm1 + call _key_expansion_256b + AESKEYGENASSIST 0x2 %xmm2 %xmm1 # round 2 + call _key_expansion_256a + AESKEYGENASSIST 0x2 %xmm0 %xmm1 + call _key_expansion_256b + AESKEYGENASSIST 0x4 %xmm2 %xmm1 # round 3 + call _key_expansion_256a + AESKEYGENASSIST 0x4 %xmm0 %xmm1 + call _key_expansion_256b + AESKEYGENASSIST 0x8 %xmm2 %xmm1 # round 4 + call _key_expansion_256a + AESKEYGENASSIST 0x8 %xmm0 %xmm1 + call _key_expansion_256b + AESKEYGENASSIST 0x10 %xmm2 %xmm1 # round 5 + call _key_expansion_256a + AESKEYGENASSIST 0x10 %xmm0 %xmm1 + call _key_expansion_256b + AESKEYGENASSIST 0x20 %xmm2 %xmm1 # round 6 + call _key_expansion_256a + AESKEYGENASSIST 0x20 %xmm0 %xmm1 + call _key_expansion_256b + AESKEYGENASSIST 0x40 %xmm2 %xmm1 # round 7 + call _key_expansion_256a + jmp .Ldec_key +.Lenc_key192: + movq 0x10(UKEYP), %xmm2 # other user key + AESKEYGENASSIST 0x1 %xmm2 %xmm1 # round 1 + call _key_expansion_192a + AESKEYGENASSIST 0x2 %xmm2 %xmm1 # round 2 + call _key_expansion_192b + AESKEYGENASSIST 0x4 %xmm2 %xmm1 # round 3 + call _key_expansion_192a + AESKEYGENASSIST 0x8 %xmm2 %xmm1 # round 4 + call _key_expansion_192b + AESKEYGENASSIST 0x10 %xmm2 %xmm1 # round 5 + call _key_expansion_192a + AESKEYGENASSIST 0x20 %xmm2 %xmm1 # round 6 + call _key_expansion_192b + AESKEYGENASSIST 0x40 %xmm2 %xmm1 # round 7 + call _key_expansion_192a + AESKEYGENASSIST 0x80 %xmm2 %xmm1 # round 8 + call _key_expansion_192b + jmp .Ldec_key +.Lenc_key128: + AESKEYGENASSIST 0x1 %xmm0 %xmm1 # round 1 + call _key_expansion_128 + AESKEYGENASSIST 0x2 %xmm0 %xmm1 # round 2 + call _key_expansion_128 + AESKEYGENASSIST 0x4 %xmm0 %xmm1 # round 3 + call _key_expansion_128 + AESKEYGENASSIST 0x8 %xmm0 %xmm1 # round 4 + call _key_expansion_128 + AESKEYGENASSIST 0x10 %xmm0 %xmm1 # round 5 + call _key_expansion_128 + AESKEYGENASSIST 0x20 %xmm0 %xmm1 # round 6 + call _key_expansion_128 + AESKEYGENASSIST 0x40 %xmm0 %xmm1 # round 7 + call _key_expansion_128 + AESKEYGENASSIST 0x80 %xmm0 %xmm1 # round 8 + call _key_expansion_128 + AESKEYGENASSIST 0x1b %xmm0 %xmm1 # round 9 + call _key_expansion_128 + AESKEYGENASSIST 0x36 %xmm0 %xmm1 # round 10 + call _key_expansion_128 +.Ldec_key: + sub $0x10, TKEYP + movaps (KEYP), %xmm0 + movaps (TKEYP), %xmm1 + movaps %xmm0, 240(TKEYP) + movaps %xmm1, 240(KEYP) + add $0x10, KEYP + lea 240-16(TKEYP), UKEYP +.align 4 +.Ldec_key_loop: + movaps (KEYP), %xmm0 + AESIMC %xmm0 %xmm1 + movaps %xmm1, (UKEYP) + add $0x10, KEYP + sub $0x10, UKEYP + cmp TKEYP, KEYP + jb .Ldec_key_loop + xor AREG, AREG +#ifndef __x86_64__ + popl KEYP +#endif + ret + +/* + * void aesni_enc(struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src) + */ +ENTRY(aesni_enc) +#ifndef __x86_64__ + pushl KEYP + pushl KLEN + movl 12(%esp), KEYP + movl 16(%esp), OUTP + movl 20(%esp), INP +#endif + movl 480(KEYP), KLEN # key length + movups (INP), STATE # input + call _aesni_enc1 + movups STATE, (OUTP) # output +#ifndef __x86_64__ + popl KLEN + popl KEYP +#endif + ret + +/* + * _aesni_enc1: internal ABI + * input: + * KEYP: key struct pointer + * KLEN: round count + * STATE: initial state (input) + * output: + * STATE: finial state (output) + * changed: + * KEY + * TKEYP (T1) + */ +.align 4 +_aesni_enc1: + movaps (KEYP), KEY # key + mov KEYP, TKEYP + pxor KEY, STATE # round 0 + add $0x30, TKEYP + cmp $24, KLEN + jb .Lenc128 + lea 0x20(TKEYP), TKEYP + je .Lenc192 + add $0x20, TKEYP + movaps -0x60(TKEYP), KEY + AESENC KEY STATE + movaps -0x50(TKEYP), KEY + AESENC KEY STATE +.align 4 +.Lenc192: + movaps -0x40(TKEYP), KEY + AESENC KEY STATE + movaps -0x30(TKEYP), KEY + AESENC KEY STATE +.align 4 +.Lenc128: + movaps -0x20(TKEYP), KEY + AESENC KEY STATE + movaps -0x10(TKEYP), KEY + AESENC KEY STATE + movaps (TKEYP), KEY + AESENC KEY STATE + movaps 0x10(TKEYP), KEY + AESENC KEY STATE + movaps 0x20(TKEYP), KEY + AESENC KEY STATE + movaps 0x30(TKEYP), KEY + AESENC KEY STATE + movaps 0x40(TKEYP), KEY + AESENC KEY STATE + movaps 0x50(TKEYP), KEY + AESENC KEY STATE + movaps 0x60(TKEYP), KEY + AESENC KEY STATE + movaps 0x70(TKEYP), KEY + AESENCLAST KEY STATE + ret + +/* + * _aesni_enc4: internal ABI + * input: + * KEYP: key struct pointer + * KLEN: round count + * STATE1: initial state (input) + * STATE2 + * STATE3 + * STATE4 + * output: + * STATE1: finial state (output) + * STATE2 + * STATE3 + * STATE4 + * changed: + * KEY + * TKEYP (T1) + */ +.align 4 +_aesni_enc4: + movaps (KEYP), KEY # key + mov KEYP, TKEYP + pxor KEY, STATE1 # round 0 + pxor KEY, STATE2 + pxor KEY, STATE3 + pxor KEY, STATE4 + add $0x30, TKEYP + cmp $24, KLEN + jb .L4enc128 + lea 0x20(TKEYP), TKEYP + je .L4enc192 + add $0x20, TKEYP + movaps -0x60(TKEYP), KEY + AESENC KEY STATE1 + AESENC KEY STATE2 + AESENC KEY STATE3 + AESENC KEY STATE4 + movaps -0x50(TKEYP), KEY + AESENC KEY STATE1 + AESENC KEY STATE2 + AESENC KEY STATE3 + AESENC KEY STATE4 +#.align 4 +.L4enc192: + movaps -0x40(TKEYP), KEY + AESENC KEY STATE1 + AESENC KEY STATE2 + AESENC KEY STATE3 + AESENC KEY STATE4 + movaps -0x30(TKEYP), KEY + AESENC KEY STATE1 + AESENC KEY STATE2 + AESENC KEY STATE3 + AESENC KEY STATE4 +#.align 4 +.L4enc128: + movaps -0x20(TKEYP), KEY + AESENC KEY STATE1 + AESENC KEY STATE2 + AESENC KEY STATE3 + AESENC KEY STATE4 + movaps -0x10(TKEYP), KEY + AESENC KEY STATE1 + AESENC KEY STATE2 + AESENC KEY STATE3 + AESENC KEY STATE4 + movaps (TKEYP), KEY + AESENC KEY STATE1 + AESENC KEY STATE2 + AESENC KEY STATE3 + AESENC KEY STATE4 + movaps 0x10(TKEYP), KEY + AESENC KEY STATE1 + AESENC KEY STATE2 + AESENC KEY STATE3 + AESENC KEY STATE4 + movaps 0x20(TKEYP), KEY + AESENC KEY STATE1 + AESENC KEY STATE2 + AESENC KEY STATE3 + AESENC KEY STATE4 + movaps 0x30(TKEYP), KEY + AESENC KEY STATE1 + AESENC KEY STATE2 + AESENC KEY STATE3 + AESENC KEY STATE4 + movaps 0x40(TKEYP), KEY + AESENC KEY STATE1 + AESENC KEY STATE2 + AESENC KEY STATE3 + AESENC KEY STATE4 + movaps 0x50(TKEYP), KEY + AESENC KEY STATE1 + AESENC KEY STATE2 + AESENC KEY STATE3 + AESENC KEY STATE4 + movaps 0x60(TKEYP), KEY + AESENC KEY STATE1 + AESENC KEY STATE2 + AESENC KEY STATE3 + AESENC KEY STATE4 + movaps 0x70(TKEYP), KEY + AESENCLAST KEY STATE1 # last round + AESENCLAST KEY STATE2 + AESENCLAST KEY STATE3 + AESENCLAST KEY STATE4 + ret + +/* + * void aesni_dec (struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src) + */ +ENTRY(aesni_dec) +#ifndef __x86_64__ + pushl KEYP + pushl KLEN + movl 12(%esp), KEYP + movl 16(%esp), OUTP + movl 20(%esp), INP +#endif + mov 480(KEYP), KLEN # key length + add $240, KEYP + movups (INP), STATE # input + call _aesni_dec1 + movups STATE, (OUTP) #output +#ifndef __x86_64__ + popl KLEN + popl KEYP +#endif + ret + +/* + * _aesni_dec1: internal ABI + * input: + * KEYP: key struct pointer + * KLEN: key length + * STATE: initial state (input) + * output: + * STATE: finial state (output) + * changed: + * KEY + * TKEYP (T1) + */ +.align 4 +_aesni_dec1: + movaps (KEYP), KEY # key + mov KEYP, TKEYP + pxor KEY, STATE # round 0 + add $0x30, TKEYP + cmp $24, KLEN + jb .Ldec128 + lea 0x20(TKEYP), TKEYP + je .Ldec192 + add $0x20, TKEYP + movaps -0x60(TKEYP), KEY + AESDEC KEY STATE + movaps -0x50(TKEYP), KEY + AESDEC KEY STATE +.align 4 +.Ldec192: + movaps -0x40(TKEYP), KEY + AESDEC KEY STATE + movaps -0x30(TKEYP), KEY + AESDEC KEY STATE +.align 4 +.Ldec128: + movaps -0x20(TKEYP), KEY + AESDEC KEY STATE + movaps -0x10(TKEYP), KEY + AESDEC KEY STATE + movaps (TKEYP), KEY + AESDEC KEY STATE + movaps 0x10(TKEYP), KEY + AESDEC KEY STATE + movaps 0x20(TKEYP), KEY + AESDEC KEY STATE + movaps 0x30(TKEYP), KEY + AESDEC KEY STATE + movaps 0x40(TKEYP), KEY + AESDEC KEY STATE + movaps 0x50(TKEYP), KEY + AESDEC KEY STATE + movaps 0x60(TKEYP), KEY + AESDEC KEY STATE + movaps 0x70(TKEYP), KEY + AESDECLAST KEY STATE + ret + +/* + * _aesni_dec4: internal ABI + * input: + * KEYP: key struct pointer + * KLEN: key length + * STATE1: initial state (input) + * STATE2 + * STATE3 + * STATE4 + * output: + * STATE1: finial state (output) + * STATE2 + * STATE3 + * STATE4 + * changed: + * KEY + * TKEYP (T1) + */ +.align 4 +_aesni_dec4: + movaps (KEYP), KEY # key + mov KEYP, TKEYP + pxor KEY, STATE1 # round 0 + pxor KEY, STATE2 + pxor KEY, STATE3 + pxor KEY, STATE4 + add $0x30, TKEYP + cmp $24, KLEN + jb .L4dec128 + lea 0x20(TKEYP), TKEYP + je .L4dec192 + add $0x20, TKEYP + movaps -0x60(TKEYP), KEY + AESDEC KEY STATE1 + AESDEC KEY STATE2 + AESDEC KEY STATE3 + AESDEC KEY STATE4 + movaps -0x50(TKEYP), KEY + AESDEC KEY STATE1 + AESDEC KEY STATE2 + AESDEC KEY STATE3 + AESDEC KEY STATE4 +.align 4 +.L4dec192: + movaps -0x40(TKEYP), KEY + AESDEC KEY STATE1 + AESDEC KEY STATE2 + AESDEC KEY STATE3 + AESDEC KEY STATE4 + movaps -0x30(TKEYP), KEY + AESDEC KEY STATE1 + AESDEC KEY STATE2 + AESDEC KEY STATE3 + AESDEC KEY STATE4 +.align 4 +.L4dec128: + movaps -0x20(TKEYP), KEY + AESDEC KEY STATE1 + AESDEC KEY STATE2 + AESDEC KEY STATE3 + AESDEC KEY STATE4 + movaps -0x10(TKEYP), KEY + AESDEC KEY STATE1 + AESDEC KEY STATE2 + AESDEC KEY STATE3 + AESDEC KEY STATE4 + movaps (TKEYP), KEY + AESDEC KEY STATE1 + AESDEC KEY STATE2 + AESDEC KEY STATE3 + AESDEC KEY STATE4 + movaps 0x10(TKEYP), KEY + AESDEC KEY STATE1 + AESDEC KEY STATE2 + AESDEC KEY STATE3 + AESDEC KEY STATE4 + movaps 0x20(TKEYP), KEY + AESDEC KEY STATE1 + AESDEC KEY STATE2 + AESDEC KEY STATE3 + AESDEC KEY STATE4 + movaps 0x30(TKEYP), KEY + AESDEC KEY STATE1 + AESDEC KEY STATE2 + AESDEC KEY STATE3 + AESDEC KEY STATE4 + movaps 0x40(TKEYP), KEY + AESDEC KEY STATE1 + AESDEC KEY STATE2 + AESDEC KEY STATE3 + AESDEC KEY STATE4 + movaps 0x50(TKEYP), KEY + AESDEC KEY STATE1 + AESDEC KEY STATE2 + AESDEC KEY STATE3 + AESDEC KEY STATE4 + movaps 0x60(TKEYP), KEY + AESDEC KEY STATE1 + AESDEC KEY STATE2 + AESDEC KEY STATE3 + AESDEC KEY STATE4 + movaps 0x70(TKEYP), KEY + AESDECLAST KEY STATE1 # last round + AESDECLAST KEY STATE2 + AESDECLAST KEY STATE3 + AESDECLAST KEY STATE4 + ret + +/* + * void aesni_ecb_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src, + * size_t len) + */ +ENTRY(aesni_ecb_enc) +#ifndef __x86_64__ + pushl LEN + pushl KEYP + pushl KLEN + movl 16(%esp), KEYP + movl 20(%esp), OUTP + movl 24(%esp), INP + movl 28(%esp), LEN +#endif + test LEN, LEN # check length + jz .Lecb_enc_ret + mov 480(KEYP), KLEN + cmp $16, LEN + jb .Lecb_enc_ret + cmp $64, LEN + jb .Lecb_enc_loop1 +.align 4 +.Lecb_enc_loop4: + movups (INP), STATE1 + movups 0x10(INP), STATE2 + movups 0x20(INP), STATE3 + movups 0x30(INP), STATE4 + call _aesni_enc4 + movups STATE1, (OUTP) + movups STATE2, 0x10(OUTP) + movups STATE3, 0x20(OUTP) + movups STATE4, 0x30(OUTP) + sub $64, LEN + add $64, INP + add $64, OUTP + cmp $64, LEN + jge .Lecb_enc_loop4 + cmp $16, LEN + jb .Lecb_enc_ret +.align 4 +.Lecb_enc_loop1: + movups (INP), STATE1 + call _aesni_enc1 + movups STATE1, (OUTP) + sub $16, LEN + add $16, INP + add $16, OUTP + cmp $16, LEN + jge .Lecb_enc_loop1 +.Lecb_enc_ret: +#ifndef __x86_64__ + popl KLEN + popl KEYP + popl LEN +#endif + ret + +/* + * void aesni_ecb_dec(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src, + * size_t len); + */ +ENTRY(aesni_ecb_dec) +#ifndef __x86_64__ + pushl LEN + pushl KEYP + pushl KLEN + movl 16(%esp), KEYP + movl 20(%esp), OUTP + movl 24(%esp), INP + movl 28(%esp), LEN +#endif + test LEN, LEN + jz .Lecb_dec_ret + mov 480(KEYP), KLEN + add $240, KEYP + cmp $16, LEN + jb .Lecb_dec_ret + cmp $64, LEN + jb .Lecb_dec_loop1 +.align 4 +.Lecb_dec_loop4: + movups (INP), STATE1 + movups 0x10(INP), STATE2 + movups 0x20(INP), STATE3 + movups 0x30(INP), STATE4 + call _aesni_dec4 + movups STATE1, (OUTP) + movups STATE2, 0x10(OUTP) + movups STATE3, 0x20(OUTP) + movups STATE4, 0x30(OUTP) + sub $64, LEN + add $64, INP + add $64, OUTP + cmp $64, LEN + jge .Lecb_dec_loop4 + cmp $16, LEN + jb .Lecb_dec_ret +.align 4 +.Lecb_dec_loop1: + movups (INP), STATE1 + call _aesni_dec1 + movups STATE1, (OUTP) + sub $16, LEN + add $16, INP + add $16, OUTP + cmp $16, LEN + jge .Lecb_dec_loop1 +.Lecb_dec_ret: +#ifndef __x86_64__ + popl KLEN + popl KEYP + popl LEN +#endif + ret + +/* + * void aesni_cbc_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src, + * size_t len, u8 *iv) + */ +ENTRY(aesni_cbc_enc) +#ifndef __x86_64__ + pushl IVP + pushl LEN + pushl KEYP + pushl KLEN + movl 20(%esp), KEYP + movl 24(%esp), OUTP + movl 28(%esp), INP + movl 32(%esp), LEN + movl 36(%esp), IVP +#endif + cmp $16, LEN + jb .Lcbc_enc_ret + mov 480(KEYP), KLEN + movups (IVP), STATE # load iv as initial state +.align 4 +.Lcbc_enc_loop: + movups (INP), IN # load input + pxor IN, STATE + call _aesni_enc1 + movups STATE, (OUTP) # store output + sub $16, LEN + add $16, INP + add $16, OUTP + cmp $16, LEN + jge .Lcbc_enc_loop + movups STATE, (IVP) +.Lcbc_enc_ret: +#ifndef __x86_64__ + popl KLEN + popl KEYP + popl LEN + popl IVP +#endif + ret + +/* + * void aesni_cbc_dec(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src, + * size_t len, u8 *iv) + */ +ENTRY(aesni_cbc_dec) +#ifndef __x86_64__ + pushl IVP + pushl LEN + pushl KEYP + pushl KLEN + movl 20(%esp), KEYP + movl 24(%esp), OUTP + movl 28(%esp), INP + movl 32(%esp), LEN + movl 36(%esp), IVP +#endif + cmp $16, LEN + jb .Lcbc_dec_just_ret + mov 480(KEYP), KLEN + add $240, KEYP + movups (IVP), IV + cmp $64, LEN + jb .Lcbc_dec_loop1 +.align 4 +.Lcbc_dec_loop4: + movups (INP), IN1 + movaps IN1, STATE1 + movups 0x10(INP), IN2 + movaps IN2, STATE2 +#ifdef __x86_64__ + movups 0x20(INP), IN3 + movaps IN3, STATE3 + movups 0x30(INP), IN4 + movaps IN4, STATE4 +#else + movups 0x20(INP), IN1 + movaps IN1, STATE3 + movups 0x30(INP), IN2 + movaps IN2, STATE4 +#endif + call _aesni_dec4 + pxor IV, STATE1 +#ifdef __x86_64__ + pxor IN1, STATE2 + pxor IN2, STATE3 + pxor IN3, STATE4 + movaps IN4, IV +#else + pxor IN1, STATE4 + movaps IN2, IV + movups (INP), IN1 + pxor IN1, STATE2 + movups 0x10(INP), IN2 + pxor IN2, STATE3 +#endif + movups STATE1, (OUTP) + movups STATE2, 0x10(OUTP) + movups STATE3, 0x20(OUTP) + movups STATE4, 0x30(OUTP) + sub $64, LEN + add $64, INP + add $64, OUTP + cmp $64, LEN + jge .Lcbc_dec_loop4 + cmp $16, LEN + jb .Lcbc_dec_ret +.align 4 +.Lcbc_dec_loop1: + movups (INP), IN + movaps IN, STATE + call _aesni_dec1 + pxor IV, STATE + movups STATE, (OUTP) + movaps IN, IV + sub $16, LEN + add $16, INP + add $16, OUTP + cmp $16, LEN + jge .Lcbc_dec_loop1 +.Lcbc_dec_ret: + movups IV, (IVP) +.Lcbc_dec_just_ret: +#ifndef __x86_64__ + popl KLEN + popl KEYP + popl LEN + popl IVP +#endif + ret + +#ifdef __x86_64__ +.align 16 +.Lbswap_mask: + .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 + +/* + * _aesni_inc_init: internal ABI + * setup registers used by _aesni_inc + * input: + * IV + * output: + * CTR: == IV, in little endian + * TCTR_LOW: == lower qword of CTR + * INC: == 1, in little endian + * BSWAP_MASK == endian swapping mask + */ +.align 4 +_aesni_inc_init: + movaps .Lbswap_mask, BSWAP_MASK + movaps IV, CTR + PSHUFB_XMM BSWAP_MASK CTR + mov $1, TCTR_LOW + MOVQ_R64_XMM TCTR_LOW INC + MOVQ_R64_XMM CTR TCTR_LOW + ret + +/* + * _aesni_inc: internal ABI + * Increase IV by 1, IV is in big endian + * input: + * IV + * CTR: == IV, in little endian + * TCTR_LOW: == lower qword of CTR + * INC: == 1, in little endian + * BSWAP_MASK == endian swapping mask + * output: + * IV: Increase by 1 + * changed: + * CTR: == output IV, in little endian + * TCTR_LOW: == lower qword of CTR + */ +.align 4 +_aesni_inc: + paddq INC, CTR + add $1, TCTR_LOW + jnc .Linc_low + pslldq $8, INC + paddq INC, CTR + psrldq $8, INC +.Linc_low: + movaps CTR, IV + PSHUFB_XMM BSWAP_MASK IV + ret + +/* + * void aesni_ctr_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src, + * size_t len, u8 *iv) + */ +ENTRY(aesni_ctr_enc) + cmp $16, LEN + jb .Lctr_enc_just_ret + mov 480(KEYP), KLEN + movups (IVP), IV + call _aesni_inc_init + cmp $64, LEN + jb .Lctr_enc_loop1 +.align 4 +.Lctr_enc_loop4: + movaps IV, STATE1 + call _aesni_inc + movups (INP), IN1 + movaps IV, STATE2 + call _aesni_inc + movups 0x10(INP), IN2 + movaps IV, STATE3 + call _aesni_inc + movups 0x20(INP), IN3 + movaps IV, STATE4 + call _aesni_inc + movups 0x30(INP), IN4 + call _aesni_enc4 + pxor IN1, STATE1 + movups STATE1, (OUTP) + pxor IN2, STATE2 + movups STATE2, 0x10(OUTP) + pxor IN3, STATE3 + movups STATE3, 0x20(OUTP) + pxor IN4, STATE4 + movups STATE4, 0x30(OUTP) + sub $64, LEN + add $64, INP + add $64, OUTP + cmp $64, LEN + jge .Lctr_enc_loop4 + cmp $16, LEN + jb .Lctr_enc_ret +.align 4 +.Lctr_enc_loop1: + movaps IV, STATE + call _aesni_inc + movups (INP), IN + call _aesni_enc1 + pxor IN, STATE + movups STATE, (OUTP) + sub $16, LEN + add $16, INP + add $16, OUTP + cmp $16, LEN + jge .Lctr_enc_loop1 +.Lctr_enc_ret: + movups IV, (IVP) +.Lctr_enc_just_ret: + ret +#endif diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c new file mode 100644 index 00000000..c799352e --- /dev/null +++ b/arch/x86/crypto/aesni-intel_glue.c @@ -0,0 +1,1389 @@ +/* + * Support for Intel AES-NI instructions. This file contains glue + * code, the real AES implementation is in intel-aes_asm.S. + * + * Copyright (C) 2008, Intel Corp. + * Author: Huang Ying + * + * Added RFC4106 AES-GCM support for 128-bit keys under the AEAD + * interface for 64-bit kernels. + * Authors: Adrian Hoban + * Gabriele Paoloni + * Tadeusz Struk (tadeusz.struk@intel.com) + * Aidan O'Mahony (aidan.o.mahony@intel.com) + * Copyright (c) 2010, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#if defined(CONFIG_CRYPTO_CTR) || defined(CONFIG_CRYPTO_CTR_MODULE) +#define HAS_CTR +#endif + +#if defined(CONFIG_CRYPTO_LRW) || defined(CONFIG_CRYPTO_LRW_MODULE) +#define HAS_LRW +#endif + +#if defined(CONFIG_CRYPTO_PCBC) || defined(CONFIG_CRYPTO_PCBC_MODULE) +#define HAS_PCBC +#endif + +#if defined(CONFIG_CRYPTO_XTS) || defined(CONFIG_CRYPTO_XTS_MODULE) +#define HAS_XTS +#endif + +struct async_aes_ctx { + struct cryptd_ablkcipher *cryptd_tfm; +}; + +/* This data is stored at the end of the crypto_tfm struct. + * It's a type of per "session" data storage location. + * This needs to be 16 byte aligned. + */ +struct aesni_rfc4106_gcm_ctx { + u8 hash_subkey[16]; + struct crypto_aes_ctx aes_key_expanded; + u8 nonce[4]; + struct cryptd_aead *cryptd_tfm; +}; + +struct aesni_gcm_set_hash_subkey_result { + int err; + struct completion completion; +}; + +struct aesni_hash_subkey_req_data { + u8 iv[16]; + struct aesni_gcm_set_hash_subkey_result result; + struct scatterlist sg; +}; + +#define AESNI_ALIGN (16) +#define AES_BLOCK_MASK (~(AES_BLOCK_SIZE-1)) +#define RFC4106_HASH_SUBKEY_SIZE 16 + +asmlinkage int aesni_set_key(struct crypto_aes_ctx *ctx, const u8 *in_key, + unsigned int key_len); +asmlinkage void aesni_enc(struct crypto_aes_ctx *ctx, u8 *out, + const u8 *in); +asmlinkage void aesni_dec(struct crypto_aes_ctx *ctx, u8 *out, + const u8 *in); +asmlinkage void aesni_ecb_enc(struct crypto_aes_ctx *ctx, u8 *out, + const u8 *in, unsigned int len); +asmlinkage void aesni_ecb_dec(struct crypto_aes_ctx *ctx, u8 *out, + const u8 *in, unsigned int len); +asmlinkage void aesni_cbc_enc(struct crypto_aes_ctx *ctx, u8 *out, + const u8 *in, unsigned int len, u8 *iv); +asmlinkage void aesni_cbc_dec(struct crypto_aes_ctx *ctx, u8 *out, + const u8 *in, unsigned int len, u8 *iv); + +int crypto_fpu_init(void); +void crypto_fpu_exit(void); + +#ifdef CONFIG_X86_64 +asmlinkage void aesni_ctr_enc(struct crypto_aes_ctx *ctx, u8 *out, + const u8 *in, unsigned int len, u8 *iv); + +/* asmlinkage void aesni_gcm_enc() + * void *ctx, AES Key schedule. Starts on a 16 byte boundary. + * u8 *out, Ciphertext output. Encrypt in-place is allowed. + * const u8 *in, Plaintext input + * unsigned long plaintext_len, Length of data in bytes for encryption. + * u8 *iv, Pre-counter block j0: 4 byte salt (from Security Association) + * concatenated with 8 byte Initialisation Vector (from IPSec ESP + * Payload) concatenated with 0x00000001. 16-byte aligned pointer. + * u8 *hash_subkey, the Hash sub key input. Data starts on a 16-byte boundary. + * const u8 *aad, Additional Authentication Data (AAD) + * unsigned long aad_len, Length of AAD in bytes. With RFC4106 this + * is going to be 8 or 12 bytes + * u8 *auth_tag, Authenticated Tag output. + * unsigned long auth_tag_len), Authenticated Tag Length in bytes. + * Valid values are 16 (most likely), 12 or 8. + */ +asmlinkage void aesni_gcm_enc(void *ctx, u8 *out, + const u8 *in, unsigned long plaintext_len, u8 *iv, + u8 *hash_subkey, const u8 *aad, unsigned long aad_len, + u8 *auth_tag, unsigned long auth_tag_len); + +/* asmlinkage void aesni_gcm_dec() + * void *ctx, AES Key schedule. Starts on a 16 byte boundary. + * u8 *out, Plaintext output. Decrypt in-place is allowed. + * const u8 *in, Ciphertext input + * unsigned long ciphertext_len, Length of data in bytes for decryption. + * u8 *iv, Pre-counter block j0: 4 byte salt (from Security Association) + * concatenated with 8 byte Initialisation Vector (from IPSec ESP + * Payload) concatenated with 0x00000001. 16-byte aligned pointer. + * u8 *hash_subkey, the Hash sub key input. Data starts on a 16-byte boundary. + * const u8 *aad, Additional Authentication Data (AAD) + * unsigned long aad_len, Length of AAD in bytes. With RFC4106 this is going + * to be 8 or 12 bytes + * u8 *auth_tag, Authenticated Tag output. + * unsigned long auth_tag_len) Authenticated Tag Length in bytes. + * Valid values are 16 (most likely), 12 or 8. + */ +asmlinkage void aesni_gcm_dec(void *ctx, u8 *out, + const u8 *in, unsigned long ciphertext_len, u8 *iv, + u8 *hash_subkey, const u8 *aad, unsigned long aad_len, + u8 *auth_tag, unsigned long auth_tag_len); + +static inline struct +aesni_rfc4106_gcm_ctx *aesni_rfc4106_gcm_ctx_get(struct crypto_aead *tfm) +{ + return + (struct aesni_rfc4106_gcm_ctx *) + PTR_ALIGN((u8 *) + crypto_tfm_ctx(crypto_aead_tfm(tfm)), AESNI_ALIGN); +} +#endif + +static inline struct crypto_aes_ctx *aes_ctx(void *raw_ctx) +{ + unsigned long addr = (unsigned long)raw_ctx; + unsigned long align = AESNI_ALIGN; + + if (align <= crypto_tfm_ctx_alignment()) + align = 1; + return (struct crypto_aes_ctx *)ALIGN(addr, align); +} + +static int aes_set_key_common(struct crypto_tfm *tfm, void *raw_ctx, + const u8 *in_key, unsigned int key_len) +{ + struct crypto_aes_ctx *ctx = aes_ctx(raw_ctx); + u32 *flags = &tfm->crt_flags; + int err; + + if (key_len != AES_KEYSIZE_128 && key_len != AES_KEYSIZE_192 && + key_len != AES_KEYSIZE_256) { + *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; + return -EINVAL; + } + + if (!irq_fpu_usable()) + err = crypto_aes_expand_key(ctx, in_key, key_len); + else { + kernel_fpu_begin(); + err = aesni_set_key(ctx, in_key, key_len); + kernel_fpu_end(); + } + + return err; +} + +static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key, + unsigned int key_len) +{ + return aes_set_key_common(tfm, crypto_tfm_ctx(tfm), in_key, key_len); +} + +static void aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) +{ + struct crypto_aes_ctx *ctx = aes_ctx(crypto_tfm_ctx(tfm)); + + if (!irq_fpu_usable()) + crypto_aes_encrypt_x86(ctx, dst, src); + else { + kernel_fpu_begin(); + aesni_enc(ctx, dst, src); + kernel_fpu_end(); + } +} + +static void aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) +{ + struct crypto_aes_ctx *ctx = aes_ctx(crypto_tfm_ctx(tfm)); + + if (!irq_fpu_usable()) + crypto_aes_decrypt_x86(ctx, dst, src); + else { + kernel_fpu_begin(); + aesni_dec(ctx, dst, src); + kernel_fpu_end(); + } +} + +static struct crypto_alg aesni_alg = { + .cra_name = "aes", + .cra_driver_name = "aes-aesni", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_TYPE_CIPHER, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct crypto_aes_ctx)+AESNI_ALIGN-1, + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(aesni_alg.cra_list), + .cra_u = { + .cipher = { + .cia_min_keysize = AES_MIN_KEY_SIZE, + .cia_max_keysize = AES_MAX_KEY_SIZE, + .cia_setkey = aes_set_key, + .cia_encrypt = aes_encrypt, + .cia_decrypt = aes_decrypt + } + } +}; + +static void __aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) +{ + struct crypto_aes_ctx *ctx = aes_ctx(crypto_tfm_ctx(tfm)); + + aesni_enc(ctx, dst, src); +} + +static void __aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) +{ + struct crypto_aes_ctx *ctx = aes_ctx(crypto_tfm_ctx(tfm)); + + aesni_dec(ctx, dst, src); +} + +static struct crypto_alg __aesni_alg = { + .cra_name = "__aes-aesni", + .cra_driver_name = "__driver-aes-aesni", + .cra_priority = 0, + .cra_flags = CRYPTO_ALG_TYPE_CIPHER, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct crypto_aes_ctx)+AESNI_ALIGN-1, + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(__aesni_alg.cra_list), + .cra_u = { + .cipher = { + .cia_min_keysize = AES_MIN_KEY_SIZE, + .cia_max_keysize = AES_MAX_KEY_SIZE, + .cia_setkey = aes_set_key, + .cia_encrypt = __aes_encrypt, + .cia_decrypt = __aes_decrypt + } + } +}; + +static int ecb_encrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + struct crypto_aes_ctx *ctx = aes_ctx(crypto_blkcipher_ctx(desc->tfm)); + struct blkcipher_walk walk; + int err; + + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt(desc, &walk); + desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + + kernel_fpu_begin(); + while ((nbytes = walk.nbytes)) { + aesni_ecb_enc(ctx, walk.dst.virt.addr, walk.src.virt.addr, + nbytes & AES_BLOCK_MASK); + nbytes &= AES_BLOCK_SIZE - 1; + err = blkcipher_walk_done(desc, &walk, nbytes); + } + kernel_fpu_end(); + + return err; +} + +static int ecb_decrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + struct crypto_aes_ctx *ctx = aes_ctx(crypto_blkcipher_ctx(desc->tfm)); + struct blkcipher_walk walk; + int err; + + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt(desc, &walk); + desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + + kernel_fpu_begin(); + while ((nbytes = walk.nbytes)) { + aesni_ecb_dec(ctx, walk.dst.virt.addr, walk.src.virt.addr, + nbytes & AES_BLOCK_MASK); + nbytes &= AES_BLOCK_SIZE - 1; + err = blkcipher_walk_done(desc, &walk, nbytes); + } + kernel_fpu_end(); + + return err; +} + +static struct crypto_alg blk_ecb_alg = { + .cra_name = "__ecb-aes-aesni", + .cra_driver_name = "__driver-ecb-aes-aesni", + .cra_priority = 0, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct crypto_aes_ctx)+AESNI_ALIGN-1, + .cra_alignmask = 0, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(blk_ecb_alg.cra_list), + .cra_u = { + .blkcipher = { + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .setkey = aes_set_key, + .encrypt = ecb_encrypt, + .decrypt = ecb_decrypt, + }, + }, +}; + +static int cbc_encrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + struct crypto_aes_ctx *ctx = aes_ctx(crypto_blkcipher_ctx(desc->tfm)); + struct blkcipher_walk walk; + int err; + + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt(desc, &walk); + desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + + kernel_fpu_begin(); + while ((nbytes = walk.nbytes)) { + aesni_cbc_enc(ctx, walk.dst.virt.addr, walk.src.virt.addr, + nbytes & AES_BLOCK_MASK, walk.iv); + nbytes &= AES_BLOCK_SIZE - 1; + err = blkcipher_walk_done(desc, &walk, nbytes); + } + kernel_fpu_end(); + + return err; +} + +static int cbc_decrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + struct crypto_aes_ctx *ctx = aes_ctx(crypto_blkcipher_ctx(desc->tfm)); + struct blkcipher_walk walk; + int err; + + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt(desc, &walk); + desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + + kernel_fpu_begin(); + while ((nbytes = walk.nbytes)) { + aesni_cbc_dec(ctx, walk.dst.virt.addr, walk.src.virt.addr, + nbytes & AES_BLOCK_MASK, walk.iv); + nbytes &= AES_BLOCK_SIZE - 1; + err = blkcipher_walk_done(desc, &walk, nbytes); + } + kernel_fpu_end(); + + return err; +} + +static struct crypto_alg blk_cbc_alg = { + .cra_name = "__cbc-aes-aesni", + .cra_driver_name = "__driver-cbc-aes-aesni", + .cra_priority = 0, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct crypto_aes_ctx)+AESNI_ALIGN-1, + .cra_alignmask = 0, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(blk_cbc_alg.cra_list), + .cra_u = { + .blkcipher = { + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .setkey = aes_set_key, + .encrypt = cbc_encrypt, + .decrypt = cbc_decrypt, + }, + }, +}; + +#ifdef CONFIG_X86_64 +static void ctr_crypt_final(struct crypto_aes_ctx *ctx, + struct blkcipher_walk *walk) +{ + u8 *ctrblk = walk->iv; + u8 keystream[AES_BLOCK_SIZE]; + u8 *src = walk->src.virt.addr; + u8 *dst = walk->dst.virt.addr; + unsigned int nbytes = walk->nbytes; + + aesni_enc(ctx, keystream, ctrblk); + crypto_xor(keystream, src, nbytes); + memcpy(dst, keystream, nbytes); + crypto_inc(ctrblk, AES_BLOCK_SIZE); +} + +static int ctr_crypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + struct crypto_aes_ctx *ctx = aes_ctx(crypto_blkcipher_ctx(desc->tfm)); + struct blkcipher_walk walk; + int err; + + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt_block(desc, &walk, AES_BLOCK_SIZE); + desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + + kernel_fpu_begin(); + while ((nbytes = walk.nbytes) >= AES_BLOCK_SIZE) { + aesni_ctr_enc(ctx, walk.dst.virt.addr, walk.src.virt.addr, + nbytes & AES_BLOCK_MASK, walk.iv); + nbytes &= AES_BLOCK_SIZE - 1; + err = blkcipher_walk_done(desc, &walk, nbytes); + } + if (walk.nbytes) { + ctr_crypt_final(ctx, &walk); + err = blkcipher_walk_done(desc, &walk, 0); + } + kernel_fpu_end(); + + return err; +} + +static struct crypto_alg blk_ctr_alg = { + .cra_name = "__ctr-aes-aesni", + .cra_driver_name = "__driver-ctr-aes-aesni", + .cra_priority = 0, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct crypto_aes_ctx)+AESNI_ALIGN-1, + .cra_alignmask = 0, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(blk_ctr_alg.cra_list), + .cra_u = { + .blkcipher = { + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = aes_set_key, + .encrypt = ctr_crypt, + .decrypt = ctr_crypt, + }, + }, +}; +#endif + +static int ablk_set_key(struct crypto_ablkcipher *tfm, const u8 *key, + unsigned int key_len) +{ + struct async_aes_ctx *ctx = crypto_ablkcipher_ctx(tfm); + struct crypto_ablkcipher *child = &ctx->cryptd_tfm->base; + int err; + + crypto_ablkcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK); + crypto_ablkcipher_set_flags(child, crypto_ablkcipher_get_flags(tfm) + & CRYPTO_TFM_REQ_MASK); + err = crypto_ablkcipher_setkey(child, key, key_len); + crypto_ablkcipher_set_flags(tfm, crypto_ablkcipher_get_flags(child) + & CRYPTO_TFM_RES_MASK); + return err; +} + +static int ablk_encrypt(struct ablkcipher_request *req) +{ + struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); + struct async_aes_ctx *ctx = crypto_ablkcipher_ctx(tfm); + + if (!irq_fpu_usable()) { + struct ablkcipher_request *cryptd_req = + ablkcipher_request_ctx(req); + memcpy(cryptd_req, req, sizeof(*req)); + ablkcipher_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base); + return crypto_ablkcipher_encrypt(cryptd_req); + } else { + struct blkcipher_desc desc; + desc.tfm = cryptd_ablkcipher_child(ctx->cryptd_tfm); + desc.info = req->info; + desc.flags = 0; + return crypto_blkcipher_crt(desc.tfm)->encrypt( + &desc, req->dst, req->src, req->nbytes); + } +} + +static int ablk_decrypt(struct ablkcipher_request *req) +{ + struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); + struct async_aes_ctx *ctx = crypto_ablkcipher_ctx(tfm); + + if (!irq_fpu_usable()) { + struct ablkcipher_request *cryptd_req = + ablkcipher_request_ctx(req); + memcpy(cryptd_req, req, sizeof(*req)); + ablkcipher_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base); + return crypto_ablkcipher_decrypt(cryptd_req); + } else { + struct blkcipher_desc desc; + desc.tfm = cryptd_ablkcipher_child(ctx->cryptd_tfm); + desc.info = req->info; + desc.flags = 0; + return crypto_blkcipher_crt(desc.tfm)->decrypt( + &desc, req->dst, req->src, req->nbytes); + } +} + +static void ablk_exit(struct crypto_tfm *tfm) +{ + struct async_aes_ctx *ctx = crypto_tfm_ctx(tfm); + + cryptd_free_ablkcipher(ctx->cryptd_tfm); +} + +static void ablk_init_common(struct crypto_tfm *tfm, + struct cryptd_ablkcipher *cryptd_tfm) +{ + struct async_aes_ctx *ctx = crypto_tfm_ctx(tfm); + + ctx->cryptd_tfm = cryptd_tfm; + tfm->crt_ablkcipher.reqsize = sizeof(struct ablkcipher_request) + + crypto_ablkcipher_reqsize(&cryptd_tfm->base); +} + +static int ablk_ecb_init(struct crypto_tfm *tfm) +{ + struct cryptd_ablkcipher *cryptd_tfm; + + cryptd_tfm = cryptd_alloc_ablkcipher("__driver-ecb-aes-aesni", 0, 0); + if (IS_ERR(cryptd_tfm)) + return PTR_ERR(cryptd_tfm); + ablk_init_common(tfm, cryptd_tfm); + return 0; +} + +static struct crypto_alg ablk_ecb_alg = { + .cra_name = "ecb(aes)", + .cra_driver_name = "ecb-aes-aesni", + .cra_priority = 400, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct async_aes_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(ablk_ecb_alg.cra_list), + .cra_init = ablk_ecb_init, + .cra_exit = ablk_exit, + .cra_u = { + .ablkcipher = { + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .setkey = ablk_set_key, + .encrypt = ablk_encrypt, + .decrypt = ablk_decrypt, + }, + }, +}; + +static int ablk_cbc_init(struct crypto_tfm *tfm) +{ + struct cryptd_ablkcipher *cryptd_tfm; + + cryptd_tfm = cryptd_alloc_ablkcipher("__driver-cbc-aes-aesni", 0, 0); + if (IS_ERR(cryptd_tfm)) + return PTR_ERR(cryptd_tfm); + ablk_init_common(tfm, cryptd_tfm); + return 0; +} + +static struct crypto_alg ablk_cbc_alg = { + .cra_name = "cbc(aes)", + .cra_driver_name = "cbc-aes-aesni", + .cra_priority = 400, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct async_aes_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(ablk_cbc_alg.cra_list), + .cra_init = ablk_cbc_init, + .cra_exit = ablk_exit, + .cra_u = { + .ablkcipher = { + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = ablk_set_key, + .encrypt = ablk_encrypt, + .decrypt = ablk_decrypt, + }, + }, +}; + +#ifdef CONFIG_X86_64 +static int ablk_ctr_init(struct crypto_tfm *tfm) +{ + struct cryptd_ablkcipher *cryptd_tfm; + + cryptd_tfm = cryptd_alloc_ablkcipher("__driver-ctr-aes-aesni", 0, 0); + if (IS_ERR(cryptd_tfm)) + return PTR_ERR(cryptd_tfm); + ablk_init_common(tfm, cryptd_tfm); + return 0; +} + +static struct crypto_alg ablk_ctr_alg = { + .cra_name = "ctr(aes)", + .cra_driver_name = "ctr-aes-aesni", + .cra_priority = 400, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct async_aes_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(ablk_ctr_alg.cra_list), + .cra_init = ablk_ctr_init, + .cra_exit = ablk_exit, + .cra_u = { + .ablkcipher = { + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = ablk_set_key, + .encrypt = ablk_encrypt, + .decrypt = ablk_encrypt, + .geniv = "chainiv", + }, + }, +}; + +#ifdef HAS_CTR +static int ablk_rfc3686_ctr_init(struct crypto_tfm *tfm) +{ + struct cryptd_ablkcipher *cryptd_tfm; + + cryptd_tfm = cryptd_alloc_ablkcipher( + "rfc3686(__driver-ctr-aes-aesni)", 0, 0); + if (IS_ERR(cryptd_tfm)) + return PTR_ERR(cryptd_tfm); + ablk_init_common(tfm, cryptd_tfm); + return 0; +} + +static struct crypto_alg ablk_rfc3686_ctr_alg = { + .cra_name = "rfc3686(ctr(aes))", + .cra_driver_name = "rfc3686-ctr-aes-aesni", + .cra_priority = 400, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct async_aes_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(ablk_rfc3686_ctr_alg.cra_list), + .cra_init = ablk_rfc3686_ctr_init, + .cra_exit = ablk_exit, + .cra_u = { + .ablkcipher = { + .min_keysize = AES_MIN_KEY_SIZE+CTR_RFC3686_NONCE_SIZE, + .max_keysize = AES_MAX_KEY_SIZE+CTR_RFC3686_NONCE_SIZE, + .ivsize = CTR_RFC3686_IV_SIZE, + .setkey = ablk_set_key, + .encrypt = ablk_encrypt, + .decrypt = ablk_decrypt, + .geniv = "seqiv", + }, + }, +}; +#endif +#endif + +#ifdef HAS_LRW +static int ablk_lrw_init(struct crypto_tfm *tfm) +{ + struct cryptd_ablkcipher *cryptd_tfm; + + cryptd_tfm = cryptd_alloc_ablkcipher("fpu(lrw(__driver-aes-aesni))", + 0, 0); + if (IS_ERR(cryptd_tfm)) + return PTR_ERR(cryptd_tfm); + ablk_init_common(tfm, cryptd_tfm); + return 0; +} + +static struct crypto_alg ablk_lrw_alg = { + .cra_name = "lrw(aes)", + .cra_driver_name = "lrw-aes-aesni", + .cra_priority = 400, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct async_aes_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(ablk_lrw_alg.cra_list), + .cra_init = ablk_lrw_init, + .cra_exit = ablk_exit, + .cra_u = { + .ablkcipher = { + .min_keysize = AES_MIN_KEY_SIZE + AES_BLOCK_SIZE, + .max_keysize = AES_MAX_KEY_SIZE + AES_BLOCK_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = ablk_set_key, + .encrypt = ablk_encrypt, + .decrypt = ablk_decrypt, + }, + }, +}; +#endif + +#ifdef HAS_PCBC +static int ablk_pcbc_init(struct crypto_tfm *tfm) +{ + struct cryptd_ablkcipher *cryptd_tfm; + + cryptd_tfm = cryptd_alloc_ablkcipher("fpu(pcbc(__driver-aes-aesni))", + 0, 0); + if (IS_ERR(cryptd_tfm)) + return PTR_ERR(cryptd_tfm); + ablk_init_common(tfm, cryptd_tfm); + return 0; +} + +static struct crypto_alg ablk_pcbc_alg = { + .cra_name = "pcbc(aes)", + .cra_driver_name = "pcbc-aes-aesni", + .cra_priority = 400, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct async_aes_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(ablk_pcbc_alg.cra_list), + .cra_init = ablk_pcbc_init, + .cra_exit = ablk_exit, + .cra_u = { + .ablkcipher = { + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = ablk_set_key, + .encrypt = ablk_encrypt, + .decrypt = ablk_decrypt, + }, + }, +}; +#endif + +#ifdef HAS_XTS +static int ablk_xts_init(struct crypto_tfm *tfm) +{ + struct cryptd_ablkcipher *cryptd_tfm; + + cryptd_tfm = cryptd_alloc_ablkcipher("fpu(xts(__driver-aes-aesni))", + 0, 0); + if (IS_ERR(cryptd_tfm)) + return PTR_ERR(cryptd_tfm); + ablk_init_common(tfm, cryptd_tfm); + return 0; +} + +static struct crypto_alg ablk_xts_alg = { + .cra_name = "xts(aes)", + .cra_driver_name = "xts-aes-aesni", + .cra_priority = 400, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct async_aes_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(ablk_xts_alg.cra_list), + .cra_init = ablk_xts_init, + .cra_exit = ablk_exit, + .cra_u = { + .ablkcipher = { + .min_keysize = 2 * AES_MIN_KEY_SIZE, + .max_keysize = 2 * AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = ablk_set_key, + .encrypt = ablk_encrypt, + .decrypt = ablk_decrypt, + }, + }, +}; +#endif + +#ifdef CONFIG_X86_64 +static int rfc4106_init(struct crypto_tfm *tfm) +{ + struct cryptd_aead *cryptd_tfm; + struct aesni_rfc4106_gcm_ctx *ctx = (struct aesni_rfc4106_gcm_ctx *) + PTR_ALIGN((u8 *)crypto_tfm_ctx(tfm), AESNI_ALIGN); + struct crypto_aead *cryptd_child; + struct aesni_rfc4106_gcm_ctx *child_ctx; + cryptd_tfm = cryptd_alloc_aead("__driver-gcm-aes-aesni", 0, 0); + if (IS_ERR(cryptd_tfm)) + return PTR_ERR(cryptd_tfm); + + cryptd_child = cryptd_aead_child(cryptd_tfm); + child_ctx = aesni_rfc4106_gcm_ctx_get(cryptd_child); + memcpy(child_ctx, ctx, sizeof(*ctx)); + ctx->cryptd_tfm = cryptd_tfm; + tfm->crt_aead.reqsize = sizeof(struct aead_request) + + crypto_aead_reqsize(&cryptd_tfm->base); + return 0; +} + +static void rfc4106_exit(struct crypto_tfm *tfm) +{ + struct aesni_rfc4106_gcm_ctx *ctx = + (struct aesni_rfc4106_gcm_ctx *) + PTR_ALIGN((u8 *)crypto_tfm_ctx(tfm), AESNI_ALIGN); + if (!IS_ERR(ctx->cryptd_tfm)) + cryptd_free_aead(ctx->cryptd_tfm); + return; +} + +static void +rfc4106_set_hash_subkey_done(struct crypto_async_request *req, int err) +{ + struct aesni_gcm_set_hash_subkey_result *result = req->data; + + if (err == -EINPROGRESS) + return; + result->err = err; + complete(&result->completion); +} + +static int +rfc4106_set_hash_subkey(u8 *hash_subkey, const u8 *key, unsigned int key_len) +{ + struct crypto_ablkcipher *ctr_tfm; + struct ablkcipher_request *req; + int ret = -EINVAL; + struct aesni_hash_subkey_req_data *req_data; + + ctr_tfm = crypto_alloc_ablkcipher("ctr(aes)", 0, 0); + if (IS_ERR(ctr_tfm)) + return PTR_ERR(ctr_tfm); + + crypto_ablkcipher_clear_flags(ctr_tfm, ~0); + + ret = crypto_ablkcipher_setkey(ctr_tfm, key, key_len); + if (ret) + goto out_free_ablkcipher; + + ret = -ENOMEM; + req = ablkcipher_request_alloc(ctr_tfm, GFP_KERNEL); + if (!req) + goto out_free_ablkcipher; + + req_data = kmalloc(sizeof(*req_data), GFP_KERNEL); + if (!req_data) + goto out_free_request; + + memset(req_data->iv, 0, sizeof(req_data->iv)); + + /* Clear the data in the hash sub key container to zero.*/ + /* We want to cipher all zeros to create the hash sub key. */ + memset(hash_subkey, 0, RFC4106_HASH_SUBKEY_SIZE); + + init_completion(&req_data->result.completion); + sg_init_one(&req_data->sg, hash_subkey, RFC4106_HASH_SUBKEY_SIZE); + ablkcipher_request_set_tfm(req, ctr_tfm); + ablkcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_SLEEP | + CRYPTO_TFM_REQ_MAY_BACKLOG, + rfc4106_set_hash_subkey_done, + &req_data->result); + + ablkcipher_request_set_crypt(req, &req_data->sg, + &req_data->sg, RFC4106_HASH_SUBKEY_SIZE, req_data->iv); + + ret = crypto_ablkcipher_encrypt(req); + if (ret == -EINPROGRESS || ret == -EBUSY) { + ret = wait_for_completion_interruptible + (&req_data->result.completion); + if (!ret) + ret = req_data->result.err; + } + kfree(req_data); +out_free_request: + ablkcipher_request_free(req); +out_free_ablkcipher: + crypto_free_ablkcipher(ctr_tfm); + return ret; +} + +static int rfc4106_set_key(struct crypto_aead *parent, const u8 *key, + unsigned int key_len) +{ + int ret = 0; + struct crypto_tfm *tfm = crypto_aead_tfm(parent); + struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(parent); + struct crypto_aead *cryptd_child = cryptd_aead_child(ctx->cryptd_tfm); + struct aesni_rfc4106_gcm_ctx *child_ctx = + aesni_rfc4106_gcm_ctx_get(cryptd_child); + u8 *new_key_mem = NULL; + + if (key_len < 4) { + crypto_tfm_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); + return -EINVAL; + } + /*Account for 4 byte nonce at the end.*/ + key_len -= 4; + if (key_len != AES_KEYSIZE_128) { + crypto_tfm_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); + return -EINVAL; + } + + memcpy(ctx->nonce, key + key_len, sizeof(ctx->nonce)); + /*This must be on a 16 byte boundary!*/ + if ((unsigned long)(&(ctx->aes_key_expanded.key_enc[0])) % AESNI_ALIGN) + return -EINVAL; + + if ((unsigned long)key % AESNI_ALIGN) { + /*key is not aligned: use an auxuliar aligned pointer*/ + new_key_mem = kmalloc(key_len+AESNI_ALIGN, GFP_KERNEL); + if (!new_key_mem) + return -ENOMEM; + + new_key_mem = PTR_ALIGN(new_key_mem, AESNI_ALIGN); + memcpy(new_key_mem, key, key_len); + key = new_key_mem; + } + + if (!irq_fpu_usable()) + ret = crypto_aes_expand_key(&(ctx->aes_key_expanded), + key, key_len); + else { + kernel_fpu_begin(); + ret = aesni_set_key(&(ctx->aes_key_expanded), key, key_len); + kernel_fpu_end(); + } + /*This must be on a 16 byte boundary!*/ + if ((unsigned long)(&(ctx->hash_subkey[0])) % AESNI_ALIGN) { + ret = -EINVAL; + goto exit; + } + ret = rfc4106_set_hash_subkey(ctx->hash_subkey, key, key_len); + memcpy(child_ctx, ctx, sizeof(*ctx)); +exit: + kfree(new_key_mem); + return ret; +} + +/* This is the Integrity Check Value (aka the authentication tag length and can + * be 8, 12 or 16 bytes long. */ +static int rfc4106_set_authsize(struct crypto_aead *parent, + unsigned int authsize) +{ + struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(parent); + struct crypto_aead *cryptd_child = cryptd_aead_child(ctx->cryptd_tfm); + + switch (authsize) { + case 8: + case 12: + case 16: + break; + default: + return -EINVAL; + } + crypto_aead_crt(parent)->authsize = authsize; + crypto_aead_crt(cryptd_child)->authsize = authsize; + return 0; +} + +static int rfc4106_encrypt(struct aead_request *req) +{ + int ret; + struct crypto_aead *tfm = crypto_aead_reqtfm(req); + struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm); + + if (!irq_fpu_usable()) { + struct aead_request *cryptd_req = + (struct aead_request *) aead_request_ctx(req); + memcpy(cryptd_req, req, sizeof(*req)); + aead_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base); + return crypto_aead_encrypt(cryptd_req); + } else { + struct crypto_aead *cryptd_child = cryptd_aead_child(ctx->cryptd_tfm); + kernel_fpu_begin(); + ret = cryptd_child->base.crt_aead.encrypt(req); + kernel_fpu_end(); + return ret; + } +} + +static int rfc4106_decrypt(struct aead_request *req) +{ + int ret; + struct crypto_aead *tfm = crypto_aead_reqtfm(req); + struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm); + + if (!irq_fpu_usable()) { + struct aead_request *cryptd_req = + (struct aead_request *) aead_request_ctx(req); + memcpy(cryptd_req, req, sizeof(*req)); + aead_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base); + return crypto_aead_decrypt(cryptd_req); + } else { + struct crypto_aead *cryptd_child = cryptd_aead_child(ctx->cryptd_tfm); + kernel_fpu_begin(); + ret = cryptd_child->base.crt_aead.decrypt(req); + kernel_fpu_end(); + return ret; + } +} + +static struct crypto_alg rfc4106_alg = { + .cra_name = "rfc4106(gcm(aes))", + .cra_driver_name = "rfc4106-gcm-aesni", + .cra_priority = 400, + .cra_flags = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_ASYNC, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct aesni_rfc4106_gcm_ctx) + AESNI_ALIGN, + .cra_alignmask = 0, + .cra_type = &crypto_nivaead_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(rfc4106_alg.cra_list), + .cra_init = rfc4106_init, + .cra_exit = rfc4106_exit, + .cra_u = { + .aead = { + .setkey = rfc4106_set_key, + .setauthsize = rfc4106_set_authsize, + .encrypt = rfc4106_encrypt, + .decrypt = rfc4106_decrypt, + .geniv = "seqiv", + .ivsize = 8, + .maxauthsize = 16, + }, + }, +}; + +static int __driver_rfc4106_encrypt(struct aead_request *req) +{ + u8 one_entry_in_sg = 0; + u8 *src, *dst, *assoc; + __be32 counter = cpu_to_be32(1); + struct crypto_aead *tfm = crypto_aead_reqtfm(req); + struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm); + void *aes_ctx = &(ctx->aes_key_expanded); + unsigned long auth_tag_len = crypto_aead_authsize(tfm); + u8 iv_tab[16+AESNI_ALIGN]; + u8* iv = (u8 *) PTR_ALIGN((u8 *)iv_tab, AESNI_ALIGN); + struct scatter_walk src_sg_walk; + struct scatter_walk assoc_sg_walk; + struct scatter_walk dst_sg_walk; + unsigned int i; + + /* Assuming we are supporting rfc4106 64-bit extended */ + /* sequence numbers We need to have the AAD length equal */ + /* to 8 or 12 bytes */ + if (unlikely(req->assoclen != 8 && req->assoclen != 12)) + return -EINVAL; + /* IV below built */ + for (i = 0; i < 4; i++) + *(iv+i) = ctx->nonce[i]; + for (i = 0; i < 8; i++) + *(iv+4+i) = req->iv[i]; + *((__be32 *)(iv+12)) = counter; + + if ((sg_is_last(req->src)) && (sg_is_last(req->assoc))) { + one_entry_in_sg = 1; + scatterwalk_start(&src_sg_walk, req->src); + scatterwalk_start(&assoc_sg_walk, req->assoc); + src = scatterwalk_map(&src_sg_walk); + assoc = scatterwalk_map(&assoc_sg_walk); + dst = src; + if (unlikely(req->src != req->dst)) { + scatterwalk_start(&dst_sg_walk, req->dst); + dst = scatterwalk_map(&dst_sg_walk); + } + + } else { + /* Allocate memory for src, dst, assoc */ + src = kmalloc(req->cryptlen + auth_tag_len + req->assoclen, + GFP_ATOMIC); + if (unlikely(!src)) + return -ENOMEM; + assoc = (src + req->cryptlen + auth_tag_len); + scatterwalk_map_and_copy(src, req->src, 0, req->cryptlen, 0); + scatterwalk_map_and_copy(assoc, req->assoc, 0, + req->assoclen, 0); + dst = src; + } + + aesni_gcm_enc(aes_ctx, dst, src, (unsigned long)req->cryptlen, iv, + ctx->hash_subkey, assoc, (unsigned long)req->assoclen, dst + + ((unsigned long)req->cryptlen), auth_tag_len); + + /* The authTag (aka the Integrity Check Value) needs to be written + * back to the packet. */ + if (one_entry_in_sg) { + if (unlikely(req->src != req->dst)) { + scatterwalk_unmap(dst); + scatterwalk_done(&dst_sg_walk, 0, 0); + } + scatterwalk_unmap(src); + scatterwalk_unmap(assoc); + scatterwalk_done(&src_sg_walk, 0, 0); + scatterwalk_done(&assoc_sg_walk, 0, 0); + } else { + scatterwalk_map_and_copy(dst, req->dst, 0, + req->cryptlen + auth_tag_len, 1); + kfree(src); + } + return 0; +} + +static int __driver_rfc4106_decrypt(struct aead_request *req) +{ + u8 one_entry_in_sg = 0; + u8 *src, *dst, *assoc; + unsigned long tempCipherLen = 0; + __be32 counter = cpu_to_be32(1); + int retval = 0; + struct crypto_aead *tfm = crypto_aead_reqtfm(req); + struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm); + void *aes_ctx = &(ctx->aes_key_expanded); + unsigned long auth_tag_len = crypto_aead_authsize(tfm); + u8 iv_and_authTag[32+AESNI_ALIGN]; + u8 *iv = (u8 *) PTR_ALIGN((u8 *)iv_and_authTag, AESNI_ALIGN); + u8 *authTag = iv + 16; + struct scatter_walk src_sg_walk; + struct scatter_walk assoc_sg_walk; + struct scatter_walk dst_sg_walk; + unsigned int i; + + if (unlikely((req->cryptlen < auth_tag_len) || + (req->assoclen != 8 && req->assoclen != 12))) + return -EINVAL; + /* Assuming we are supporting rfc4106 64-bit extended */ + /* sequence numbers We need to have the AAD length */ + /* equal to 8 or 12 bytes */ + + tempCipherLen = (unsigned long)(req->cryptlen - auth_tag_len); + /* IV below built */ + for (i = 0; i < 4; i++) + *(iv+i) = ctx->nonce[i]; + for (i = 0; i < 8; i++) + *(iv+4+i) = req->iv[i]; + *((__be32 *)(iv+12)) = counter; + + if ((sg_is_last(req->src)) && (sg_is_last(req->assoc))) { + one_entry_in_sg = 1; + scatterwalk_start(&src_sg_walk, req->src); + scatterwalk_start(&assoc_sg_walk, req->assoc); + src = scatterwalk_map(&src_sg_walk); + assoc = scatterwalk_map(&assoc_sg_walk); + dst = src; + if (unlikely(req->src != req->dst)) { + scatterwalk_start(&dst_sg_walk, req->dst); + dst = scatterwalk_map(&dst_sg_walk); + } + + } else { + /* Allocate memory for src, dst, assoc */ + src = kmalloc(req->cryptlen + req->assoclen, GFP_ATOMIC); + if (!src) + return -ENOMEM; + assoc = (src + req->cryptlen + auth_tag_len); + scatterwalk_map_and_copy(src, req->src, 0, req->cryptlen, 0); + scatterwalk_map_and_copy(assoc, req->assoc, 0, + req->assoclen, 0); + dst = src; + } + + aesni_gcm_dec(aes_ctx, dst, src, tempCipherLen, iv, + ctx->hash_subkey, assoc, (unsigned long)req->assoclen, + authTag, auth_tag_len); + + /* Compare generated tag with passed in tag. */ + retval = memcmp(src + tempCipherLen, authTag, auth_tag_len) ? + -EBADMSG : 0; + + if (one_entry_in_sg) { + if (unlikely(req->src != req->dst)) { + scatterwalk_unmap(dst); + scatterwalk_done(&dst_sg_walk, 0, 0); + } + scatterwalk_unmap(src); + scatterwalk_unmap(assoc); + scatterwalk_done(&src_sg_walk, 0, 0); + scatterwalk_done(&assoc_sg_walk, 0, 0); + } else { + scatterwalk_map_and_copy(dst, req->dst, 0, req->cryptlen, 1); + kfree(src); + } + return retval; +} + +static struct crypto_alg __rfc4106_alg = { + .cra_name = "__gcm-aes-aesni", + .cra_driver_name = "__driver-gcm-aes-aesni", + .cra_priority = 0, + .cra_flags = CRYPTO_ALG_TYPE_AEAD, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct aesni_rfc4106_gcm_ctx) + AESNI_ALIGN, + .cra_alignmask = 0, + .cra_type = &crypto_aead_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(__rfc4106_alg.cra_list), + .cra_u = { + .aead = { + .encrypt = __driver_rfc4106_encrypt, + .decrypt = __driver_rfc4106_decrypt, + }, + }, +}; +#endif + + +static const struct x86_cpu_id aesni_cpu_id[] = { + X86_FEATURE_MATCH(X86_FEATURE_AES), + {} +}; +MODULE_DEVICE_TABLE(x86cpu, aesni_cpu_id); + +static int __init aesni_init(void) +{ + int err; + + if (!x86_match_cpu(aesni_cpu_id)) + return -ENODEV; + + if ((err = crypto_fpu_init())) + goto fpu_err; + if ((err = crypto_register_alg(&aesni_alg))) + goto aes_err; + if ((err = crypto_register_alg(&__aesni_alg))) + goto __aes_err; + if ((err = crypto_register_alg(&blk_ecb_alg))) + goto blk_ecb_err; + if ((err = crypto_register_alg(&blk_cbc_alg))) + goto blk_cbc_err; + if ((err = crypto_register_alg(&ablk_ecb_alg))) + goto ablk_ecb_err; + if ((err = crypto_register_alg(&ablk_cbc_alg))) + goto ablk_cbc_err; +#ifdef CONFIG_X86_64 + if ((err = crypto_register_alg(&blk_ctr_alg))) + goto blk_ctr_err; + if ((err = crypto_register_alg(&ablk_ctr_alg))) + goto ablk_ctr_err; + if ((err = crypto_register_alg(&__rfc4106_alg))) + goto __aead_gcm_err; + if ((err = crypto_register_alg(&rfc4106_alg))) + goto aead_gcm_err; +#ifdef HAS_CTR + if ((err = crypto_register_alg(&ablk_rfc3686_ctr_alg))) + goto ablk_rfc3686_ctr_err; +#endif +#endif +#ifdef HAS_LRW + if ((err = crypto_register_alg(&ablk_lrw_alg))) + goto ablk_lrw_err; +#endif +#ifdef HAS_PCBC + if ((err = crypto_register_alg(&ablk_pcbc_alg))) + goto ablk_pcbc_err; +#endif +#ifdef HAS_XTS + if ((err = crypto_register_alg(&ablk_xts_alg))) + goto ablk_xts_err; +#endif + return err; + +#ifdef HAS_XTS +ablk_xts_err: +#endif +#ifdef HAS_PCBC + crypto_unregister_alg(&ablk_pcbc_alg); +ablk_pcbc_err: +#endif +#ifdef HAS_LRW + crypto_unregister_alg(&ablk_lrw_alg); +ablk_lrw_err: +#endif +#ifdef CONFIG_X86_64 +#ifdef HAS_CTR + crypto_unregister_alg(&ablk_rfc3686_ctr_alg); +ablk_rfc3686_ctr_err: +#endif + crypto_unregister_alg(&rfc4106_alg); +aead_gcm_err: + crypto_unregister_alg(&__rfc4106_alg); +__aead_gcm_err: + crypto_unregister_alg(&ablk_ctr_alg); +ablk_ctr_err: + crypto_unregister_alg(&blk_ctr_alg); +blk_ctr_err: +#endif + crypto_unregister_alg(&ablk_cbc_alg); +ablk_cbc_err: + crypto_unregister_alg(&ablk_ecb_alg); +ablk_ecb_err: + crypto_unregister_alg(&blk_cbc_alg); +blk_cbc_err: + crypto_unregister_alg(&blk_ecb_alg); +blk_ecb_err: + crypto_unregister_alg(&__aesni_alg); +__aes_err: + crypto_unregister_alg(&aesni_alg); +aes_err: +fpu_err: + return err; +} + +static void __exit aesni_exit(void) +{ +#ifdef HAS_XTS + crypto_unregister_alg(&ablk_xts_alg); +#endif +#ifdef HAS_PCBC + crypto_unregister_alg(&ablk_pcbc_alg); +#endif +#ifdef HAS_LRW + crypto_unregister_alg(&ablk_lrw_alg); +#endif +#ifdef CONFIG_X86_64 +#ifdef HAS_CTR + crypto_unregister_alg(&ablk_rfc3686_ctr_alg); +#endif + crypto_unregister_alg(&rfc4106_alg); + crypto_unregister_alg(&__rfc4106_alg); + crypto_unregister_alg(&ablk_ctr_alg); + crypto_unregister_alg(&blk_ctr_alg); +#endif + crypto_unregister_alg(&ablk_cbc_alg); + crypto_unregister_alg(&ablk_ecb_alg); + crypto_unregister_alg(&blk_cbc_alg); + crypto_unregister_alg(&blk_ecb_alg); + crypto_unregister_alg(&__aesni_alg); + crypto_unregister_alg(&aesni_alg); + + crypto_fpu_exit(); +} + +module_init(aesni_init); +module_exit(aesni_exit); + +MODULE_DESCRIPTION("Rijndael (AES) Cipher Algorithm, Intel AES-NI instructions optimized"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("aes"); diff --git a/arch/x86/crypto/blowfish-x86_64-asm_64.S b/arch/x86/crypto/blowfish-x86_64-asm_64.S new file mode 100644 index 00000000..391d245d --- /dev/null +++ b/arch/x86/crypto/blowfish-x86_64-asm_64.S @@ -0,0 +1,390 @@ +/* + * Blowfish Cipher Algorithm (x86_64) + * + * Copyright (C) 2011 Jussi Kivilinna + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 + * USA + * + */ + +.file "blowfish-x86_64-asm.S" +.text + +/* structure of crypto context */ +#define p 0 +#define s0 ((16 + 2) * 4) +#define s1 ((16 + 2 + (1 * 256)) * 4) +#define s2 ((16 + 2 + (2 * 256)) * 4) +#define s3 ((16 + 2 + (3 * 256)) * 4) + +/* register macros */ +#define CTX %rdi +#define RIO %rsi + +#define RX0 %rax +#define RX1 %rbx +#define RX2 %rcx +#define RX3 %rdx + +#define RX0d %eax +#define RX1d %ebx +#define RX2d %ecx +#define RX3d %edx + +#define RX0bl %al +#define RX1bl %bl +#define RX2bl %cl +#define RX3bl %dl + +#define RX0bh %ah +#define RX1bh %bh +#define RX2bh %ch +#define RX3bh %dh + +#define RT0 %rbp +#define RT1 %rsi +#define RT2 %r8 +#define RT3 %r9 + +#define RT0d %ebp +#define RT1d %esi +#define RT2d %r8d +#define RT3d %r9d + +#define RKEY %r10 + +/*********************************************************************** + * 1-way blowfish + ***********************************************************************/ +#define F() \ + rorq $16, RX0; \ + movzbl RX0bh, RT0d; \ + movzbl RX0bl, RT1d; \ + rolq $16, RX0; \ + movl s0(CTX,RT0,4), RT0d; \ + addl s1(CTX,RT1,4), RT0d; \ + movzbl RX0bh, RT1d; \ + movzbl RX0bl, RT2d; \ + rolq $32, RX0; \ + xorl s2(CTX,RT1,4), RT0d; \ + addl s3(CTX,RT2,4), RT0d; \ + xorq RT0, RX0; + +#define add_roundkey_enc(n) \ + xorq p+4*(n)(CTX), RX0; + +#define round_enc(n) \ + add_roundkey_enc(n); \ + \ + F(); \ + F(); + +#define add_roundkey_dec(n) \ + movq p+4*(n-1)(CTX), RT0; \ + rorq $32, RT0; \ + xorq RT0, RX0; + +#define round_dec(n) \ + add_roundkey_dec(n); \ + \ + F(); \ + F(); \ + +#define read_block() \ + movq (RIO), RX0; \ + rorq $32, RX0; \ + bswapq RX0; + +#define write_block() \ + bswapq RX0; \ + movq RX0, (RIO); + +#define xor_block() \ + bswapq RX0; \ + xorq RX0, (RIO); + +.align 8 +.global __blowfish_enc_blk +.type __blowfish_enc_blk,@function; + +__blowfish_enc_blk: + /* input: + * %rdi: ctx, CTX + * %rsi: dst + * %rdx: src + * %rcx: bool, if true: xor output + */ + movq %rbp, %r11; + + movq %rsi, %r10; + movq %rdx, RIO; + + read_block(); + + round_enc(0); + round_enc(2); + round_enc(4); + round_enc(6); + round_enc(8); + round_enc(10); + round_enc(12); + round_enc(14); + add_roundkey_enc(16); + + movq %r11, %rbp; + + movq %r10, RIO; + test %cl, %cl; + jnz __enc_xor; + + write_block(); + ret; +__enc_xor: + xor_block(); + ret; + +.align 8 +.global blowfish_dec_blk +.type blowfish_dec_blk,@function; + +blowfish_dec_blk: + /* input: + * %rdi: ctx, CTX + * %rsi: dst + * %rdx: src + */ + movq %rbp, %r11; + + movq %rsi, %r10; + movq %rdx, RIO; + + read_block(); + + round_dec(17); + round_dec(15); + round_dec(13); + round_dec(11); + round_dec(9); + round_dec(7); + round_dec(5); + round_dec(3); + add_roundkey_dec(1); + + movq %r10, RIO; + write_block(); + + movq %r11, %rbp; + + ret; + +/********************************************************************** + 4-way blowfish, four blocks parallel + **********************************************************************/ + +/* F() for 4-way. Slower when used alone/1-way, but faster when used + * parallel/4-way (tested on AMD Phenom II & Intel Xeon E7330). + */ +#define F4(x) \ + movzbl x ## bh, RT1d; \ + movzbl x ## bl, RT3d; \ + rorq $16, x; \ + movzbl x ## bh, RT0d; \ + movzbl x ## bl, RT2d; \ + rorq $16, x; \ + movl s0(CTX,RT0,4), RT0d; \ + addl s1(CTX,RT2,4), RT0d; \ + xorl s2(CTX,RT1,4), RT0d; \ + addl s3(CTX,RT3,4), RT0d; \ + xorq RT0, x; + +#define add_preloaded_roundkey4() \ + xorq RKEY, RX0; \ + xorq RKEY, RX1; \ + xorq RKEY, RX2; \ + xorq RKEY, RX3; + +#define preload_roundkey_enc(n) \ + movq p+4*(n)(CTX), RKEY; + +#define add_roundkey_enc4(n) \ + add_preloaded_roundkey4(); \ + preload_roundkey_enc(n + 2); + +#define round_enc4(n) \ + add_roundkey_enc4(n); \ + \ + F4(RX0); \ + F4(RX1); \ + F4(RX2); \ + F4(RX3); \ + \ + F4(RX0); \ + F4(RX1); \ + F4(RX2); \ + F4(RX3); + +#define preload_roundkey_dec(n) \ + movq p+4*((n)-1)(CTX), RKEY; \ + rorq $32, RKEY; + +#define add_roundkey_dec4(n) \ + add_preloaded_roundkey4(); \ + preload_roundkey_dec(n - 2); + +#define round_dec4(n) \ + add_roundkey_dec4(n); \ + \ + F4(RX0); \ + F4(RX1); \ + F4(RX2); \ + F4(RX3); \ + \ + F4(RX0); \ + F4(RX1); \ + F4(RX2); \ + F4(RX3); + +#define read_block4() \ + movq (RIO), RX0; \ + rorq $32, RX0; \ + bswapq RX0; \ + \ + movq 8(RIO), RX1; \ + rorq $32, RX1; \ + bswapq RX1; \ + \ + movq 16(RIO), RX2; \ + rorq $32, RX2; \ + bswapq RX2; \ + \ + movq 24(RIO), RX3; \ + rorq $32, RX3; \ + bswapq RX3; + +#define write_block4() \ + bswapq RX0; \ + movq RX0, (RIO); \ + \ + bswapq RX1; \ + movq RX1, 8(RIO); \ + \ + bswapq RX2; \ + movq RX2, 16(RIO); \ + \ + bswapq RX3; \ + movq RX3, 24(RIO); + +#define xor_block4() \ + bswapq RX0; \ + xorq RX0, (RIO); \ + \ + bswapq RX1; \ + xorq RX1, 8(RIO); \ + \ + bswapq RX2; \ + xorq RX2, 16(RIO); \ + \ + bswapq RX3; \ + xorq RX3, 24(RIO); + +.align 8 +.global __blowfish_enc_blk_4way +.type __blowfish_enc_blk_4way,@function; + +__blowfish_enc_blk_4way: + /* input: + * %rdi: ctx, CTX + * %rsi: dst + * %rdx: src + * %rcx: bool, if true: xor output + */ + pushq %rbp; + pushq %rbx; + pushq %rcx; + + preload_roundkey_enc(0); + + movq %rsi, %r11; + movq %rdx, RIO; + + read_block4(); + + round_enc4(0); + round_enc4(2); + round_enc4(4); + round_enc4(6); + round_enc4(8); + round_enc4(10); + round_enc4(12); + round_enc4(14); + add_preloaded_roundkey4(); + + popq %rbp; + movq %r11, RIO; + + test %bpl, %bpl; + jnz __enc_xor4; + + write_block4(); + + popq %rbx; + popq %rbp; + ret; + +__enc_xor4: + xor_block4(); + + popq %rbx; + popq %rbp; + ret; + +.align 8 +.global blowfish_dec_blk_4way +.type blowfish_dec_blk_4way,@function; + +blowfish_dec_blk_4way: + /* input: + * %rdi: ctx, CTX + * %rsi: dst + * %rdx: src + */ + pushq %rbp; + pushq %rbx; + preload_roundkey_dec(17); + + movq %rsi, %r11; + movq %rdx, RIO; + + read_block4(); + + round_dec4(17); + round_dec4(15); + round_dec4(13); + round_dec4(11); + round_dec4(9); + round_dec4(7); + round_dec4(5); + round_dec4(3); + add_preloaded_roundkey4(); + + movq %r11, RIO; + write_block4(); + + popq %rbx; + popq %rbp; + + ret; + diff --git a/arch/x86/crypto/blowfish_glue.c b/arch/x86/crypto/blowfish_glue.c new file mode 100644 index 00000000..7967474d --- /dev/null +++ b/arch/x86/crypto/blowfish_glue.c @@ -0,0 +1,489 @@ +/* + * Glue Code for assembler optimized version of Blowfish + * + * Copyright (c) 2011 Jussi Kivilinna + * + * CBC & ECB parts based on code (crypto/cbc.c,ecb.c) by: + * Copyright (c) 2006 Herbert Xu + * CTR part based on code (crypto/ctr.c) by: + * (C) Copyright IBM Corp. 2007 - Joy Latten + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 + * USA + * + */ + +#include +#include +#include +#include +#include +#include +#include + +/* regular block cipher functions */ +asmlinkage void __blowfish_enc_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src, + bool xor); +asmlinkage void blowfish_dec_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src); + +/* 4-way parallel cipher functions */ +asmlinkage void __blowfish_enc_blk_4way(struct bf_ctx *ctx, u8 *dst, + const u8 *src, bool xor); +asmlinkage void blowfish_dec_blk_4way(struct bf_ctx *ctx, u8 *dst, + const u8 *src); + +static inline void blowfish_enc_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src) +{ + __blowfish_enc_blk(ctx, dst, src, false); +} + +static inline void blowfish_enc_blk_xor(struct bf_ctx *ctx, u8 *dst, + const u8 *src) +{ + __blowfish_enc_blk(ctx, dst, src, true); +} + +static inline void blowfish_enc_blk_4way(struct bf_ctx *ctx, u8 *dst, + const u8 *src) +{ + __blowfish_enc_blk_4way(ctx, dst, src, false); +} + +static inline void blowfish_enc_blk_xor_4way(struct bf_ctx *ctx, u8 *dst, + const u8 *src) +{ + __blowfish_enc_blk_4way(ctx, dst, src, true); +} + +static void blowfish_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) +{ + blowfish_enc_blk(crypto_tfm_ctx(tfm), dst, src); +} + +static void blowfish_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) +{ + blowfish_dec_blk(crypto_tfm_ctx(tfm), dst, src); +} + +static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk, + void (*fn)(struct bf_ctx *, u8 *, const u8 *), + void (*fn_4way)(struct bf_ctx *, u8 *, const u8 *)) +{ + struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + unsigned int bsize = BF_BLOCK_SIZE; + unsigned int nbytes; + int err; + + err = blkcipher_walk_virt(desc, walk); + + while ((nbytes = walk->nbytes)) { + u8 *wsrc = walk->src.virt.addr; + u8 *wdst = walk->dst.virt.addr; + + /* Process four block batch */ + if (nbytes >= bsize * 4) { + do { + fn_4way(ctx, wdst, wsrc); + + wsrc += bsize * 4; + wdst += bsize * 4; + nbytes -= bsize * 4; + } while (nbytes >= bsize * 4); + + if (nbytes < bsize) + goto done; + } + + /* Handle leftovers */ + do { + fn(ctx, wdst, wsrc); + + wsrc += bsize; + wdst += bsize; + nbytes -= bsize; + } while (nbytes >= bsize); + +done: + err = blkcipher_walk_done(desc, walk, nbytes); + } + + return err; +} + +static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct blkcipher_walk walk; + + blkcipher_walk_init(&walk, dst, src, nbytes); + return ecb_crypt(desc, &walk, blowfish_enc_blk, blowfish_enc_blk_4way); +} + +static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct blkcipher_walk walk; + + blkcipher_walk_init(&walk, dst, src, nbytes); + return ecb_crypt(desc, &walk, blowfish_dec_blk, blowfish_dec_blk_4way); +} + +static unsigned int __cbc_encrypt(struct blkcipher_desc *desc, + struct blkcipher_walk *walk) +{ + struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + unsigned int bsize = BF_BLOCK_SIZE; + unsigned int nbytes = walk->nbytes; + u64 *src = (u64 *)walk->src.virt.addr; + u64 *dst = (u64 *)walk->dst.virt.addr; + u64 *iv = (u64 *)walk->iv; + + do { + *dst = *src ^ *iv; + blowfish_enc_blk(ctx, (u8 *)dst, (u8 *)dst); + iv = dst; + + src += 1; + dst += 1; + nbytes -= bsize; + } while (nbytes >= bsize); + + *(u64 *)walk->iv = *iv; + return nbytes; +} + +static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct blkcipher_walk walk; + int err; + + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt(desc, &walk); + + while ((nbytes = walk.nbytes)) { + nbytes = __cbc_encrypt(desc, &walk); + err = blkcipher_walk_done(desc, &walk, nbytes); + } + + return err; +} + +static unsigned int __cbc_decrypt(struct blkcipher_desc *desc, + struct blkcipher_walk *walk) +{ + struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + unsigned int bsize = BF_BLOCK_SIZE; + unsigned int nbytes = walk->nbytes; + u64 *src = (u64 *)walk->src.virt.addr; + u64 *dst = (u64 *)walk->dst.virt.addr; + u64 ivs[4 - 1]; + u64 last_iv; + + /* Start of the last block. */ + src += nbytes / bsize - 1; + dst += nbytes / bsize - 1; + + last_iv = *src; + + /* Process four block batch */ + if (nbytes >= bsize * 4) { + do { + nbytes -= bsize * 4 - bsize; + src -= 4 - 1; + dst -= 4 - 1; + + ivs[0] = src[0]; + ivs[1] = src[1]; + ivs[2] = src[2]; + + blowfish_dec_blk_4way(ctx, (u8 *)dst, (u8 *)src); + + dst[1] ^= ivs[0]; + dst[2] ^= ivs[1]; + dst[3] ^= ivs[2]; + + nbytes -= bsize; + if (nbytes < bsize) + goto done; + + *dst ^= *(src - 1); + src -= 1; + dst -= 1; + } while (nbytes >= bsize * 4); + + if (nbytes < bsize) + goto done; + } + + /* Handle leftovers */ + for (;;) { + blowfish_dec_blk(ctx, (u8 *)dst, (u8 *)src); + + nbytes -= bsize; + if (nbytes < bsize) + break; + + *dst ^= *(src - 1); + src -= 1; + dst -= 1; + } + +done: + *dst ^= *(u64 *)walk->iv; + *(u64 *)walk->iv = last_iv; + + return nbytes; +} + +static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct blkcipher_walk walk; + int err; + + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt(desc, &walk); + + while ((nbytes = walk.nbytes)) { + nbytes = __cbc_decrypt(desc, &walk); + err = blkcipher_walk_done(desc, &walk, nbytes); + } + + return err; +} + +static void ctr_crypt_final(struct bf_ctx *ctx, struct blkcipher_walk *walk) +{ + u8 *ctrblk = walk->iv; + u8 keystream[BF_BLOCK_SIZE]; + u8 *src = walk->src.virt.addr; + u8 *dst = walk->dst.virt.addr; + unsigned int nbytes = walk->nbytes; + + blowfish_enc_blk(ctx, keystream, ctrblk); + crypto_xor(keystream, src, nbytes); + memcpy(dst, keystream, nbytes); + + crypto_inc(ctrblk, BF_BLOCK_SIZE); +} + +static unsigned int __ctr_crypt(struct blkcipher_desc *desc, + struct blkcipher_walk *walk) +{ + struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + unsigned int bsize = BF_BLOCK_SIZE; + unsigned int nbytes = walk->nbytes; + u64 *src = (u64 *)walk->src.virt.addr; + u64 *dst = (u64 *)walk->dst.virt.addr; + u64 ctrblk = be64_to_cpu(*(__be64 *)walk->iv); + __be64 ctrblocks[4]; + + /* Process four block batch */ + if (nbytes >= bsize * 4) { + do { + if (dst != src) { + dst[0] = src[0]; + dst[1] = src[1]; + dst[2] = src[2]; + dst[3] = src[3]; + } + + /* create ctrblks for parallel encrypt */ + ctrblocks[0] = cpu_to_be64(ctrblk++); + ctrblocks[1] = cpu_to_be64(ctrblk++); + ctrblocks[2] = cpu_to_be64(ctrblk++); + ctrblocks[3] = cpu_to_be64(ctrblk++); + + blowfish_enc_blk_xor_4way(ctx, (u8 *)dst, + (u8 *)ctrblocks); + + src += 4; + dst += 4; + } while ((nbytes -= bsize * 4) >= bsize * 4); + + if (nbytes < bsize) + goto done; + } + + /* Handle leftovers */ + do { + if (dst != src) + *dst = *src; + + ctrblocks[0] = cpu_to_be64(ctrblk++); + + blowfish_enc_blk_xor(ctx, (u8 *)dst, (u8 *)ctrblocks); + + src += 1; + dst += 1; + } while ((nbytes -= bsize) >= bsize); + +done: + *(__be64 *)walk->iv = cpu_to_be64(ctrblk); + return nbytes; +} + +static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct blkcipher_walk walk; + int err; + + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt_block(desc, &walk, BF_BLOCK_SIZE); + + while ((nbytes = walk.nbytes) >= BF_BLOCK_SIZE) { + nbytes = __ctr_crypt(desc, &walk); + err = blkcipher_walk_done(desc, &walk, nbytes); + } + + if (walk.nbytes) { + ctr_crypt_final(crypto_blkcipher_ctx(desc->tfm), &walk); + err = blkcipher_walk_done(desc, &walk, 0); + } + + return err; +} + +static struct crypto_alg bf_algs[4] = { { + .cra_name = "blowfish", + .cra_driver_name = "blowfish-asm", + .cra_priority = 200, + .cra_flags = CRYPTO_ALG_TYPE_CIPHER, + .cra_blocksize = BF_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct bf_ctx), + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(bf_algs[0].cra_list), + .cra_u = { + .cipher = { + .cia_min_keysize = BF_MIN_KEY_SIZE, + .cia_max_keysize = BF_MAX_KEY_SIZE, + .cia_setkey = blowfish_setkey, + .cia_encrypt = blowfish_encrypt, + .cia_decrypt = blowfish_decrypt, + } + } +}, { + .cra_name = "ecb(blowfish)", + .cra_driver_name = "ecb-blowfish-asm", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = BF_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct bf_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(bf_algs[1].cra_list), + .cra_u = { + .blkcipher = { + .min_keysize = BF_MIN_KEY_SIZE, + .max_keysize = BF_MAX_KEY_SIZE, + .setkey = blowfish_setkey, + .encrypt = ecb_encrypt, + .decrypt = ecb_decrypt, + }, + }, +}, { + .cra_name = "cbc(blowfish)", + .cra_driver_name = "cbc-blowfish-asm", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = BF_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct bf_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(bf_algs[2].cra_list), + .cra_u = { + .blkcipher = { + .min_keysize = BF_MIN_KEY_SIZE, + .max_keysize = BF_MAX_KEY_SIZE, + .ivsize = BF_BLOCK_SIZE, + .setkey = blowfish_setkey, + .encrypt = cbc_encrypt, + .decrypt = cbc_decrypt, + }, + }, +}, { + .cra_name = "ctr(blowfish)", + .cra_driver_name = "ctr-blowfish-asm", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct bf_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(bf_algs[3].cra_list), + .cra_u = { + .blkcipher = { + .min_keysize = BF_MIN_KEY_SIZE, + .max_keysize = BF_MAX_KEY_SIZE, + .ivsize = BF_BLOCK_SIZE, + .setkey = blowfish_setkey, + .encrypt = ctr_crypt, + .decrypt = ctr_crypt, + }, + }, +} }; + +static bool is_blacklisted_cpu(void) +{ + if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) + return false; + + if (boot_cpu_data.x86 == 0x0f) { + /* + * On Pentium 4, blowfish-x86_64 is slower than generic C + * implementation because use of 64bit rotates (which are really + * slow on P4). Therefore blacklist P4s. + */ + return true; + } + + return false; +} + +static int force; +module_param(force, int, 0); +MODULE_PARM_DESC(force, "Force module load, ignore CPU blacklist"); + +static int __init init(void) +{ + if (!force && is_blacklisted_cpu()) { + printk(KERN_INFO + "blowfish-x86_64: performance on this CPU " + "would be suboptimal: disabling " + "blowfish-x86_64.\n"); + return -ENODEV; + } + + return crypto_register_algs(bf_algs, ARRAY_SIZE(bf_algs)); +} + +static void __exit fini(void) +{ + crypto_unregister_algs(bf_algs, ARRAY_SIZE(bf_algs)); +} + +module_init(init); +module_exit(fini); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Blowfish Cipher Algorithm, asm optimized"); +MODULE_ALIAS("blowfish"); +MODULE_ALIAS("blowfish-asm"); diff --git a/arch/x86/crypto/camellia-x86_64-asm_64.S b/arch/x86/crypto/camellia-x86_64-asm_64.S new file mode 100644 index 00000000..0b337433 --- /dev/null +++ b/arch/x86/crypto/camellia-x86_64-asm_64.S @@ -0,0 +1,520 @@ +/* + * Camellia Cipher Algorithm (x86_64) + * + * Copyright (C) 2012 Jussi Kivilinna + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 + * USA + * + */ + +.file "camellia-x86_64-asm_64.S" +.text + +.extern camellia_sp10011110; +.extern camellia_sp22000222; +.extern camellia_sp03303033; +.extern camellia_sp00444404; +.extern camellia_sp02220222; +.extern camellia_sp30333033; +.extern camellia_sp44044404; +.extern camellia_sp11101110; + +#define sp10011110 camellia_sp10011110 +#define sp22000222 camellia_sp22000222 +#define sp03303033 camellia_sp03303033 +#define sp00444404 camellia_sp00444404 +#define sp02220222 camellia_sp02220222 +#define sp30333033 camellia_sp30333033 +#define sp44044404 camellia_sp44044404 +#define sp11101110 camellia_sp11101110 + +#define CAMELLIA_TABLE_BYTE_LEN 272 + +/* struct camellia_ctx: */ +#define key_table 0 +#define key_length CAMELLIA_TABLE_BYTE_LEN + +/* register macros */ +#define CTX %rdi +#define RIO %rsi +#define RIOd %esi + +#define RAB0 %rax +#define RCD0 %rcx +#define RAB1 %rbx +#define RCD1 %rdx + +#define RAB0d %eax +#define RCD0d %ecx +#define RAB1d %ebx +#define RCD1d %edx + +#define RAB0bl %al +#define RCD0bl %cl +#define RAB1bl %bl +#define RCD1bl %dl + +#define RAB0bh %ah +#define RCD0bh %ch +#define RAB1bh %bh +#define RCD1bh %dh + +#define RT0 %rsi +#define RT1 %rbp +#define RT2 %r8 + +#define RT0d %esi +#define RT1d %ebp +#define RT2d %r8d + +#define RT2bl %r8b + +#define RXOR %r9 +#define RRBP %r10 +#define RDST %r11 + +#define RXORd %r9d +#define RXORbl %r9b + +#define xor2ror16(T0, T1, tmp1, tmp2, ab, dst) \ + movzbl ab ## bl, tmp2 ## d; \ + movzbl ab ## bh, tmp1 ## d; \ + rorq $16, ab; \ + xorq T0(, tmp2, 8), dst; \ + xorq T1(, tmp1, 8), dst; + +/********************************************************************** + 1-way camellia + **********************************************************************/ +#define roundsm(ab, subkey, cd) \ + movq (key_table + ((subkey) * 2) * 4)(CTX), RT2; \ + \ + xor2ror16(sp00444404, sp03303033, RT0, RT1, ab ## 0, cd ## 0); \ + xor2ror16(sp22000222, sp10011110, RT0, RT1, ab ## 0, RT2); \ + xor2ror16(sp11101110, sp44044404, RT0, RT1, ab ## 0, cd ## 0); \ + xor2ror16(sp30333033, sp02220222, RT0, RT1, ab ## 0, RT2); \ + \ + xorq RT2, cd ## 0; + +#define fls(l, r, kl, kr) \ + movl (key_table + ((kl) * 2) * 4)(CTX), RT0d; \ + andl l ## 0d, RT0d; \ + roll $1, RT0d; \ + shlq $32, RT0; \ + xorq RT0, l ## 0; \ + movq (key_table + ((kr) * 2) * 4)(CTX), RT1; \ + orq r ## 0, RT1; \ + shrq $32, RT1; \ + xorq RT1, r ## 0; \ + \ + movq (key_table + ((kl) * 2) * 4)(CTX), RT2; \ + orq l ## 0, RT2; \ + shrq $32, RT2; \ + xorq RT2, l ## 0; \ + movl (key_table + ((kr) * 2) * 4)(CTX), RT0d; \ + andl r ## 0d, RT0d; \ + roll $1, RT0d; \ + shlq $32, RT0; \ + xorq RT0, r ## 0; + +#define enc_rounds(i) \ + roundsm(RAB, i + 2, RCD); \ + roundsm(RCD, i + 3, RAB); \ + roundsm(RAB, i + 4, RCD); \ + roundsm(RCD, i + 5, RAB); \ + roundsm(RAB, i + 6, RCD); \ + roundsm(RCD, i + 7, RAB); + +#define enc_fls(i) \ + fls(RAB, RCD, i + 0, i + 1); + +#define enc_inpack() \ + movq (RIO), RAB0; \ + bswapq RAB0; \ + rolq $32, RAB0; \ + movq 4*2(RIO), RCD0; \ + bswapq RCD0; \ + rorq $32, RCD0; \ + xorq key_table(CTX), RAB0; + +#define enc_outunpack(op, max) \ + xorq key_table(CTX, max, 8), RCD0; \ + rorq $32, RCD0; \ + bswapq RCD0; \ + op ## q RCD0, (RIO); \ + rolq $32, RAB0; \ + bswapq RAB0; \ + op ## q RAB0, 4*2(RIO); + +#define dec_rounds(i) \ + roundsm(RAB, i + 7, RCD); \ + roundsm(RCD, i + 6, RAB); \ + roundsm(RAB, i + 5, RCD); \ + roundsm(RCD, i + 4, RAB); \ + roundsm(RAB, i + 3, RCD); \ + roundsm(RCD, i + 2, RAB); + +#define dec_fls(i) \ + fls(RAB, RCD, i + 1, i + 0); + +#define dec_inpack(max) \ + movq (RIO), RAB0; \ + bswapq RAB0; \ + rolq $32, RAB0; \ + movq 4*2(RIO), RCD0; \ + bswapq RCD0; \ + rorq $32, RCD0; \ + xorq key_table(CTX, max, 8), RAB0; + +#define dec_outunpack() \ + xorq key_table(CTX), RCD0; \ + rorq $32, RCD0; \ + bswapq RCD0; \ + movq RCD0, (RIO); \ + rolq $32, RAB0; \ + bswapq RAB0; \ + movq RAB0, 4*2(RIO); + +.global __camellia_enc_blk; +.type __camellia_enc_blk,@function; + +__camellia_enc_blk: + /* input: + * %rdi: ctx, CTX + * %rsi: dst + * %rdx: src + * %rcx: bool xor + */ + movq %rbp, RRBP; + + movq %rcx, RXOR; + movq %rsi, RDST; + movq %rdx, RIO; + + enc_inpack(); + + enc_rounds(0); + enc_fls(8); + enc_rounds(8); + enc_fls(16); + enc_rounds(16); + movl $24, RT1d; /* max */ + + cmpb $16, key_length(CTX); + je __enc_done; + + enc_fls(24); + enc_rounds(24); + movl $32, RT1d; /* max */ + +__enc_done: + testb RXORbl, RXORbl; + movq RDST, RIO; + + jnz __enc_xor; + + enc_outunpack(mov, RT1); + + movq RRBP, %rbp; + ret; + +__enc_xor: + enc_outunpack(xor, RT1); + + movq RRBP, %rbp; + ret; + +.global camellia_dec_blk; +.type camellia_dec_blk,@function; + +camellia_dec_blk: + /* input: + * %rdi: ctx, CTX + * %rsi: dst + * %rdx: src + */ + cmpl $16, key_length(CTX); + movl $32, RT2d; + movl $24, RXORd; + cmovel RXORd, RT2d; /* max */ + + movq %rbp, RRBP; + movq %rsi, RDST; + movq %rdx, RIO; + + dec_inpack(RT2); + + cmpb $24, RT2bl; + je __dec_rounds16; + + dec_rounds(24); + dec_fls(24); + +__dec_rounds16: + dec_rounds(16); + dec_fls(16); + dec_rounds(8); + dec_fls(8); + dec_rounds(0); + + movq RDST, RIO; + + dec_outunpack(); + + movq RRBP, %rbp; + ret; + +/********************************************************************** + 2-way camellia + **********************************************************************/ +#define roundsm2(ab, subkey, cd) \ + movq (key_table + ((subkey) * 2) * 4)(CTX), RT2; \ + xorq RT2, cd ## 1; \ + \ + xor2ror16(sp00444404, sp03303033, RT0, RT1, ab ## 0, cd ## 0); \ + xor2ror16(sp22000222, sp10011110, RT0, RT1, ab ## 0, RT2); \ + xor2ror16(sp11101110, sp44044404, RT0, RT1, ab ## 0, cd ## 0); \ + xor2ror16(sp30333033, sp02220222, RT0, RT1, ab ## 0, RT2); \ + \ + xor2ror16(sp00444404, sp03303033, RT0, RT1, ab ## 1, cd ## 1); \ + xorq RT2, cd ## 0; \ + xor2ror16(sp22000222, sp10011110, RT0, RT1, ab ## 1, cd ## 1); \ + xor2ror16(sp11101110, sp44044404, RT0, RT1, ab ## 1, cd ## 1); \ + xor2ror16(sp30333033, sp02220222, RT0, RT1, ab ## 1, cd ## 1); + +#define fls2(l, r, kl, kr) \ + movl (key_table + ((kl) * 2) * 4)(CTX), RT0d; \ + andl l ## 0d, RT0d; \ + roll $1, RT0d; \ + shlq $32, RT0; \ + xorq RT0, l ## 0; \ + movq (key_table + ((kr) * 2) * 4)(CTX), RT1; \ + orq r ## 0, RT1; \ + shrq $32, RT1; \ + xorq RT1, r ## 0; \ + \ + movl (key_table + ((kl) * 2) * 4)(CTX), RT2d; \ + andl l ## 1d, RT2d; \ + roll $1, RT2d; \ + shlq $32, RT2; \ + xorq RT2, l ## 1; \ + movq (key_table + ((kr) * 2) * 4)(CTX), RT0; \ + orq r ## 1, RT0; \ + shrq $32, RT0; \ + xorq RT0, r ## 1; \ + \ + movq (key_table + ((kl) * 2) * 4)(CTX), RT1; \ + orq l ## 0, RT1; \ + shrq $32, RT1; \ + xorq RT1, l ## 0; \ + movl (key_table + ((kr) * 2) * 4)(CTX), RT2d; \ + andl r ## 0d, RT2d; \ + roll $1, RT2d; \ + shlq $32, RT2; \ + xorq RT2, r ## 0; \ + \ + movq (key_table + ((kl) * 2) * 4)(CTX), RT0; \ + orq l ## 1, RT0; \ + shrq $32, RT0; \ + xorq RT0, l ## 1; \ + movl (key_table + ((kr) * 2) * 4)(CTX), RT1d; \ + andl r ## 1d, RT1d; \ + roll $1, RT1d; \ + shlq $32, RT1; \ + xorq RT1, r ## 1; + +#define enc_rounds2(i) \ + roundsm2(RAB, i + 2, RCD); \ + roundsm2(RCD, i + 3, RAB); \ + roundsm2(RAB, i + 4, RCD); \ + roundsm2(RCD, i + 5, RAB); \ + roundsm2(RAB, i + 6, RCD); \ + roundsm2(RCD, i + 7, RAB); + +#define enc_fls2(i) \ + fls2(RAB, RCD, i + 0, i + 1); + +#define enc_inpack2() \ + movq (RIO), RAB0; \ + bswapq RAB0; \ + rorq $32, RAB0; \ + movq 4*2(RIO), RCD0; \ + bswapq RCD0; \ + rolq $32, RCD0; \ + xorq key_table(CTX), RAB0; \ + \ + movq 8*2(RIO), RAB1; \ + bswapq RAB1; \ + rorq $32, RAB1; \ + movq 12*2(RIO), RCD1; \ + bswapq RCD1; \ + rolq $32, RCD1; \ + xorq key_table(CTX), RAB1; + +#define enc_outunpack2(op, max) \ + xorq key_table(CTX, max, 8), RCD0; \ + rolq $32, RCD0; \ + bswapq RCD0; \ + op ## q RCD0, (RIO); \ + rorq $32, RAB0; \ + bswapq RAB0; \ + op ## q RAB0, 4*2(RIO); \ + \ + xorq key_table(CTX, max, 8), RCD1; \ + rolq $32, RCD1; \ + bswapq RCD1; \ + op ## q RCD1, 8*2(RIO); \ + rorq $32, RAB1; \ + bswapq RAB1; \ + op ## q RAB1, 12*2(RIO); + +#define dec_rounds2(i) \ + roundsm2(RAB, i + 7, RCD); \ + roundsm2(RCD, i + 6, RAB); \ + roundsm2(RAB, i + 5, RCD); \ + roundsm2(RCD, i + 4, RAB); \ + roundsm2(RAB, i + 3, RCD); \ + roundsm2(RCD, i + 2, RAB); + +#define dec_fls2(i) \ + fls2(RAB, RCD, i + 1, i + 0); + +#define dec_inpack2(max) \ + movq (RIO), RAB0; \ + bswapq RAB0; \ + rorq $32, RAB0; \ + movq 4*2(RIO), RCD0; \ + bswapq RCD0; \ + rolq $32, RCD0; \ + xorq key_table(CTX, max, 8), RAB0; \ + \ + movq 8*2(RIO), RAB1; \ + bswapq RAB1; \ + rorq $32, RAB1; \ + movq 12*2(RIO), RCD1; \ + bswapq RCD1; \ + rolq $32, RCD1; \ + xorq key_table(CTX, max, 8), RAB1; + +#define dec_outunpack2() \ + xorq key_table(CTX), RCD0; \ + rolq $32, RCD0; \ + bswapq RCD0; \ + movq RCD0, (RIO); \ + rorq $32, RAB0; \ + bswapq RAB0; \ + movq RAB0, 4*2(RIO); \ + \ + xorq key_table(CTX), RCD1; \ + rolq $32, RCD1; \ + bswapq RCD1; \ + movq RCD1, 8*2(RIO); \ + rorq $32, RAB1; \ + bswapq RAB1; \ + movq RAB1, 12*2(RIO); + +.global __camellia_enc_blk_2way; +.type __camellia_enc_blk_2way,@function; + +__camellia_enc_blk_2way: + /* input: + * %rdi: ctx, CTX + * %rsi: dst + * %rdx: src + * %rcx: bool xor + */ + pushq %rbx; + + movq %rbp, RRBP; + movq %rcx, RXOR; + movq %rsi, RDST; + movq %rdx, RIO; + + enc_inpack2(); + + enc_rounds2(0); + enc_fls2(8); + enc_rounds2(8); + enc_fls2(16); + enc_rounds2(16); + movl $24, RT2d; /* max */ + + cmpb $16, key_length(CTX); + je __enc2_done; + + enc_fls2(24); + enc_rounds2(24); + movl $32, RT2d; /* max */ + +__enc2_done: + test RXORbl, RXORbl; + movq RDST, RIO; + jnz __enc2_xor; + + enc_outunpack2(mov, RT2); + + movq RRBP, %rbp; + popq %rbx; + ret; + +__enc2_xor: + enc_outunpack2(xor, RT2); + + movq RRBP, %rbp; + popq %rbx; + ret; + +.global camellia_dec_blk_2way; +.type camellia_dec_blk_2way,@function; + +camellia_dec_blk_2way: + /* input: + * %rdi: ctx, CTX + * %rsi: dst + * %rdx: src + */ + cmpl $16, key_length(CTX); + movl $32, RT2d; + movl $24, RXORd; + cmovel RXORd, RT2d; /* max */ + + movq %rbx, RXOR; + movq %rbp, RRBP; + movq %rsi, RDST; + movq %rdx, RIO; + + dec_inpack2(RT2); + + cmpb $24, RT2bl; + je __dec2_rounds16; + + dec_rounds2(24); + dec_fls2(24); + +__dec2_rounds16: + dec_rounds2(16); + dec_fls2(16); + dec_rounds2(8); + dec_fls2(8); + dec_rounds2(0); + + movq RDST, RIO; + + dec_outunpack2(); + + movq RRBP, %rbp; + movq RXOR, %rbx; + ret; diff --git a/arch/x86/crypto/camellia_glue.c b/arch/x86/crypto/camellia_glue.c new file mode 100644 index 00000000..3306dc0b --- /dev/null +++ b/arch/x86/crypto/camellia_glue.c @@ -0,0 +1,1952 @@ +/* + * Glue Code for assembler optimized version of Camellia + * + * Copyright (c) 2012 Jussi Kivilinna + * + * Camellia parts based on code by: + * Copyright (C) 2006 NTT (Nippon Telegraph and Telephone Corporation) + * CBC & ECB parts based on code (crypto/cbc.c,ecb.c) by: + * Copyright (c) 2006 Herbert Xu + * CTR part based on code (crypto/ctr.c) by: + * (C) Copyright IBM Corp. 2007 - Joy Latten + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 + * USA + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define CAMELLIA_MIN_KEY_SIZE 16 +#define CAMELLIA_MAX_KEY_SIZE 32 +#define CAMELLIA_BLOCK_SIZE 16 +#define CAMELLIA_TABLE_BYTE_LEN 272 + +struct camellia_ctx { + u64 key_table[CAMELLIA_TABLE_BYTE_LEN / sizeof(u64)]; + u32 key_length; +}; + +/* regular block cipher functions */ +asmlinkage void __camellia_enc_blk(struct camellia_ctx *ctx, u8 *dst, + const u8 *src, bool xor); +asmlinkage void camellia_dec_blk(struct camellia_ctx *ctx, u8 *dst, + const u8 *src); + +/* 2-way parallel cipher functions */ +asmlinkage void __camellia_enc_blk_2way(struct camellia_ctx *ctx, u8 *dst, + const u8 *src, bool xor); +asmlinkage void camellia_dec_blk_2way(struct camellia_ctx *ctx, u8 *dst, + const u8 *src); + +static inline void camellia_enc_blk(struct camellia_ctx *ctx, u8 *dst, + const u8 *src) +{ + __camellia_enc_blk(ctx, dst, src, false); +} + +static inline void camellia_enc_blk_xor(struct camellia_ctx *ctx, u8 *dst, + const u8 *src) +{ + __camellia_enc_blk(ctx, dst, src, true); +} + +static inline void camellia_enc_blk_2way(struct camellia_ctx *ctx, u8 *dst, + const u8 *src) +{ + __camellia_enc_blk_2way(ctx, dst, src, false); +} + +static inline void camellia_enc_blk_xor_2way(struct camellia_ctx *ctx, u8 *dst, + const u8 *src) +{ + __camellia_enc_blk_2way(ctx, dst, src, true); +} + +static void camellia_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) +{ + camellia_enc_blk(crypto_tfm_ctx(tfm), dst, src); +} + +static void camellia_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) +{ + camellia_dec_blk(crypto_tfm_ctx(tfm), dst, src); +} + +/* camellia sboxes */ +const u64 camellia_sp10011110[256] = { + 0x7000007070707000, 0x8200008282828200, 0x2c00002c2c2c2c00, + 0xec0000ecececec00, 0xb30000b3b3b3b300, 0x2700002727272700, + 0xc00000c0c0c0c000, 0xe50000e5e5e5e500, 0xe40000e4e4e4e400, + 0x8500008585858500, 0x5700005757575700, 0x3500003535353500, + 0xea0000eaeaeaea00, 0x0c00000c0c0c0c00, 0xae0000aeaeaeae00, + 0x4100004141414100, 0x2300002323232300, 0xef0000efefefef00, + 0x6b00006b6b6b6b00, 0x9300009393939300, 0x4500004545454500, + 0x1900001919191900, 0xa50000a5a5a5a500, 0x2100002121212100, + 0xed0000edededed00, 0x0e00000e0e0e0e00, 0x4f00004f4f4f4f00, + 0x4e00004e4e4e4e00, 0x1d00001d1d1d1d00, 0x6500006565656500, + 0x9200009292929200, 0xbd0000bdbdbdbd00, 0x8600008686868600, + 0xb80000b8b8b8b800, 0xaf0000afafafaf00, 0x8f00008f8f8f8f00, + 0x7c00007c7c7c7c00, 0xeb0000ebebebeb00, 0x1f00001f1f1f1f00, + 0xce0000cececece00, 0x3e00003e3e3e3e00, 0x3000003030303000, + 0xdc0000dcdcdcdc00, 0x5f00005f5f5f5f00, 0x5e00005e5e5e5e00, + 0xc50000c5c5c5c500, 0x0b00000b0b0b0b00, 0x1a00001a1a1a1a00, + 0xa60000a6a6a6a600, 0xe10000e1e1e1e100, 0x3900003939393900, + 0xca0000cacacaca00, 0xd50000d5d5d5d500, 0x4700004747474700, + 0x5d00005d5d5d5d00, 0x3d00003d3d3d3d00, 0xd90000d9d9d9d900, + 0x0100000101010100, 0x5a00005a5a5a5a00, 0xd60000d6d6d6d600, + 0x5100005151515100, 0x5600005656565600, 0x6c00006c6c6c6c00, + 0x4d00004d4d4d4d00, 0x8b00008b8b8b8b00, 0x0d00000d0d0d0d00, + 0x9a00009a9a9a9a00, 0x6600006666666600, 0xfb0000fbfbfbfb00, + 0xcc0000cccccccc00, 0xb00000b0b0b0b000, 0x2d00002d2d2d2d00, + 0x7400007474747400, 0x1200001212121200, 0x2b00002b2b2b2b00, + 0x2000002020202000, 0xf00000f0f0f0f000, 0xb10000b1b1b1b100, + 0x8400008484848400, 0x9900009999999900, 0xdf0000dfdfdfdf00, + 0x4c00004c4c4c4c00, 0xcb0000cbcbcbcb00, 0xc20000c2c2c2c200, + 0x3400003434343400, 0x7e00007e7e7e7e00, 0x7600007676767600, + 0x0500000505050500, 0x6d00006d6d6d6d00, 0xb70000b7b7b7b700, + 0xa90000a9a9a9a900, 0x3100003131313100, 0xd10000d1d1d1d100, + 0x1700001717171700, 0x0400000404040400, 0xd70000d7d7d7d700, + 0x1400001414141400, 0x5800005858585800, 0x3a00003a3a3a3a00, + 0x6100006161616100, 0xde0000dededede00, 0x1b00001b1b1b1b00, + 0x1100001111111100, 0x1c00001c1c1c1c00, 0x3200003232323200, + 0x0f00000f0f0f0f00, 0x9c00009c9c9c9c00, 0x1600001616161600, + 0x5300005353535300, 0x1800001818181800, 0xf20000f2f2f2f200, + 0x2200002222222200, 0xfe0000fefefefe00, 0x4400004444444400, + 0xcf0000cfcfcfcf00, 0xb20000b2b2b2b200, 0xc30000c3c3c3c300, + 0xb50000b5b5b5b500, 0x7a00007a7a7a7a00, 0x9100009191919100, + 0x2400002424242400, 0x0800000808080800, 0xe80000e8e8e8e800, + 0xa80000a8a8a8a800, 0x6000006060606000, 0xfc0000fcfcfcfc00, + 0x6900006969696900, 0x5000005050505000, 0xaa0000aaaaaaaa00, + 0xd00000d0d0d0d000, 0xa00000a0a0a0a000, 0x7d00007d7d7d7d00, + 0xa10000a1a1a1a100, 0x8900008989898900, 0x6200006262626200, + 0x9700009797979700, 0x5400005454545400, 0x5b00005b5b5b5b00, + 0x1e00001e1e1e1e00, 0x9500009595959500, 0xe00000e0e0e0e000, + 0xff0000ffffffff00, 0x6400006464646400, 0xd20000d2d2d2d200, + 0x1000001010101000, 0xc40000c4c4c4c400, 0x0000000000000000, + 0x4800004848484800, 0xa30000a3a3a3a300, 0xf70000f7f7f7f700, + 0x7500007575757500, 0xdb0000dbdbdbdb00, 0x8a00008a8a8a8a00, + 0x0300000303030300, 0xe60000e6e6e6e600, 0xda0000dadadada00, + 0x0900000909090900, 0x3f00003f3f3f3f00, 0xdd0000dddddddd00, + 0x9400009494949400, 0x8700008787878700, 0x5c00005c5c5c5c00, + 0x8300008383838300, 0x0200000202020200, 0xcd0000cdcdcdcd00, + 0x4a00004a4a4a4a00, 0x9000009090909000, 0x3300003333333300, + 0x7300007373737300, 0x6700006767676700, 0xf60000f6f6f6f600, + 0xf30000f3f3f3f300, 0x9d00009d9d9d9d00, 0x7f00007f7f7f7f00, + 0xbf0000bfbfbfbf00, 0xe20000e2e2e2e200, 0x5200005252525200, + 0x9b00009b9b9b9b00, 0xd80000d8d8d8d800, 0x2600002626262600, + 0xc80000c8c8c8c800, 0x3700003737373700, 0xc60000c6c6c6c600, + 0x3b00003b3b3b3b00, 0x8100008181818100, 0x9600009696969600, + 0x6f00006f6f6f6f00, 0x4b00004b4b4b4b00, 0x1300001313131300, + 0xbe0000bebebebe00, 0x6300006363636300, 0x2e00002e2e2e2e00, + 0xe90000e9e9e9e900, 0x7900007979797900, 0xa70000a7a7a7a700, + 0x8c00008c8c8c8c00, 0x9f00009f9f9f9f00, 0x6e00006e6e6e6e00, + 0xbc0000bcbcbcbc00, 0x8e00008e8e8e8e00, 0x2900002929292900, + 0xf50000f5f5f5f500, 0xf90000f9f9f9f900, 0xb60000b6b6b6b600, + 0x2f00002f2f2f2f00, 0xfd0000fdfdfdfd00, 0xb40000b4b4b4b400, + 0x5900005959595900, 0x7800007878787800, 0x9800009898989800, + 0x0600000606060600, 0x6a00006a6a6a6a00, 0xe70000e7e7e7e700, + 0x4600004646464600, 0x7100007171717100, 0xba0000babababa00, + 0xd40000d4d4d4d400, 0x2500002525252500, 0xab0000abababab00, + 0x4200004242424200, 0x8800008888888800, 0xa20000a2a2a2a200, + 0x8d00008d8d8d8d00, 0xfa0000fafafafa00, 0x7200007272727200, + 0x0700000707070700, 0xb90000b9b9b9b900, 0x5500005555555500, + 0xf80000f8f8f8f800, 0xee0000eeeeeeee00, 0xac0000acacacac00, + 0x0a00000a0a0a0a00, 0x3600003636363600, 0x4900004949494900, + 0x2a00002a2a2a2a00, 0x6800006868686800, 0x3c00003c3c3c3c00, + 0x3800003838383800, 0xf10000f1f1f1f100, 0xa40000a4a4a4a400, + 0x4000004040404000, 0x2800002828282800, 0xd30000d3d3d3d300, + 0x7b00007b7b7b7b00, 0xbb0000bbbbbbbb00, 0xc90000c9c9c9c900, + 0x4300004343434300, 0xc10000c1c1c1c100, 0x1500001515151500, + 0xe30000e3e3e3e300, 0xad0000adadadad00, 0xf40000f4f4f4f400, + 0x7700007777777700, 0xc70000c7c7c7c700, 0x8000008080808000, + 0x9e00009e9e9e9e00, +}; + +const u64 camellia_sp22000222[256] = { + 0xe0e0000000e0e0e0, 0x0505000000050505, 0x5858000000585858, + 0xd9d9000000d9d9d9, 0x6767000000676767, 0x4e4e0000004e4e4e, + 0x8181000000818181, 0xcbcb000000cbcbcb, 0xc9c9000000c9c9c9, + 0x0b0b0000000b0b0b, 0xaeae000000aeaeae, 0x6a6a0000006a6a6a, + 0xd5d5000000d5d5d5, 0x1818000000181818, 0x5d5d0000005d5d5d, + 0x8282000000828282, 0x4646000000464646, 0xdfdf000000dfdfdf, + 0xd6d6000000d6d6d6, 0x2727000000272727, 0x8a8a0000008a8a8a, + 0x3232000000323232, 0x4b4b0000004b4b4b, 0x4242000000424242, + 0xdbdb000000dbdbdb, 0x1c1c0000001c1c1c, 0x9e9e0000009e9e9e, + 0x9c9c0000009c9c9c, 0x3a3a0000003a3a3a, 0xcaca000000cacaca, + 0x2525000000252525, 0x7b7b0000007b7b7b, 0x0d0d0000000d0d0d, + 0x7171000000717171, 0x5f5f0000005f5f5f, 0x1f1f0000001f1f1f, + 0xf8f8000000f8f8f8, 0xd7d7000000d7d7d7, 0x3e3e0000003e3e3e, + 0x9d9d0000009d9d9d, 0x7c7c0000007c7c7c, 0x6060000000606060, + 0xb9b9000000b9b9b9, 0xbebe000000bebebe, 0xbcbc000000bcbcbc, + 0x8b8b0000008b8b8b, 0x1616000000161616, 0x3434000000343434, + 0x4d4d0000004d4d4d, 0xc3c3000000c3c3c3, 0x7272000000727272, + 0x9595000000959595, 0xabab000000ababab, 0x8e8e0000008e8e8e, + 0xbaba000000bababa, 0x7a7a0000007a7a7a, 0xb3b3000000b3b3b3, + 0x0202000000020202, 0xb4b4000000b4b4b4, 0xadad000000adadad, + 0xa2a2000000a2a2a2, 0xacac000000acacac, 0xd8d8000000d8d8d8, + 0x9a9a0000009a9a9a, 0x1717000000171717, 0x1a1a0000001a1a1a, + 0x3535000000353535, 0xcccc000000cccccc, 0xf7f7000000f7f7f7, + 0x9999000000999999, 0x6161000000616161, 0x5a5a0000005a5a5a, + 0xe8e8000000e8e8e8, 0x2424000000242424, 0x5656000000565656, + 0x4040000000404040, 0xe1e1000000e1e1e1, 0x6363000000636363, + 0x0909000000090909, 0x3333000000333333, 0xbfbf000000bfbfbf, + 0x9898000000989898, 0x9797000000979797, 0x8585000000858585, + 0x6868000000686868, 0xfcfc000000fcfcfc, 0xecec000000ececec, + 0x0a0a0000000a0a0a, 0xdada000000dadada, 0x6f6f0000006f6f6f, + 0x5353000000535353, 0x6262000000626262, 0xa3a3000000a3a3a3, + 0x2e2e0000002e2e2e, 0x0808000000080808, 0xafaf000000afafaf, + 0x2828000000282828, 0xb0b0000000b0b0b0, 0x7474000000747474, + 0xc2c2000000c2c2c2, 0xbdbd000000bdbdbd, 0x3636000000363636, + 0x2222000000222222, 0x3838000000383838, 0x6464000000646464, + 0x1e1e0000001e1e1e, 0x3939000000393939, 0x2c2c0000002c2c2c, + 0xa6a6000000a6a6a6, 0x3030000000303030, 0xe5e5000000e5e5e5, + 0x4444000000444444, 0xfdfd000000fdfdfd, 0x8888000000888888, + 0x9f9f0000009f9f9f, 0x6565000000656565, 0x8787000000878787, + 0x6b6b0000006b6b6b, 0xf4f4000000f4f4f4, 0x2323000000232323, + 0x4848000000484848, 0x1010000000101010, 0xd1d1000000d1d1d1, + 0x5151000000515151, 0xc0c0000000c0c0c0, 0xf9f9000000f9f9f9, + 0xd2d2000000d2d2d2, 0xa0a0000000a0a0a0, 0x5555000000555555, + 0xa1a1000000a1a1a1, 0x4141000000414141, 0xfafa000000fafafa, + 0x4343000000434343, 0x1313000000131313, 0xc4c4000000c4c4c4, + 0x2f2f0000002f2f2f, 0xa8a8000000a8a8a8, 0xb6b6000000b6b6b6, + 0x3c3c0000003c3c3c, 0x2b2b0000002b2b2b, 0xc1c1000000c1c1c1, + 0xffff000000ffffff, 0xc8c8000000c8c8c8, 0xa5a5000000a5a5a5, + 0x2020000000202020, 0x8989000000898989, 0x0000000000000000, + 0x9090000000909090, 0x4747000000474747, 0xefef000000efefef, + 0xeaea000000eaeaea, 0xb7b7000000b7b7b7, 0x1515000000151515, + 0x0606000000060606, 0xcdcd000000cdcdcd, 0xb5b5000000b5b5b5, + 0x1212000000121212, 0x7e7e0000007e7e7e, 0xbbbb000000bbbbbb, + 0x2929000000292929, 0x0f0f0000000f0f0f, 0xb8b8000000b8b8b8, + 0x0707000000070707, 0x0404000000040404, 0x9b9b0000009b9b9b, + 0x9494000000949494, 0x2121000000212121, 0x6666000000666666, + 0xe6e6000000e6e6e6, 0xcece000000cecece, 0xeded000000ededed, + 0xe7e7000000e7e7e7, 0x3b3b0000003b3b3b, 0xfefe000000fefefe, + 0x7f7f0000007f7f7f, 0xc5c5000000c5c5c5, 0xa4a4000000a4a4a4, + 0x3737000000373737, 0xb1b1000000b1b1b1, 0x4c4c0000004c4c4c, + 0x9191000000919191, 0x6e6e0000006e6e6e, 0x8d8d0000008d8d8d, + 0x7676000000767676, 0x0303000000030303, 0x2d2d0000002d2d2d, + 0xdede000000dedede, 0x9696000000969696, 0x2626000000262626, + 0x7d7d0000007d7d7d, 0xc6c6000000c6c6c6, 0x5c5c0000005c5c5c, + 0xd3d3000000d3d3d3, 0xf2f2000000f2f2f2, 0x4f4f0000004f4f4f, + 0x1919000000191919, 0x3f3f0000003f3f3f, 0xdcdc000000dcdcdc, + 0x7979000000797979, 0x1d1d0000001d1d1d, 0x5252000000525252, + 0xebeb000000ebebeb, 0xf3f3000000f3f3f3, 0x6d6d0000006d6d6d, + 0x5e5e0000005e5e5e, 0xfbfb000000fbfbfb, 0x6969000000696969, + 0xb2b2000000b2b2b2, 0xf0f0000000f0f0f0, 0x3131000000313131, + 0x0c0c0000000c0c0c, 0xd4d4000000d4d4d4, 0xcfcf000000cfcfcf, + 0x8c8c0000008c8c8c, 0xe2e2000000e2e2e2, 0x7575000000757575, + 0xa9a9000000a9a9a9, 0x4a4a0000004a4a4a, 0x5757000000575757, + 0x8484000000848484, 0x1111000000111111, 0x4545000000454545, + 0x1b1b0000001b1b1b, 0xf5f5000000f5f5f5, 0xe4e4000000e4e4e4, + 0x0e0e0000000e0e0e, 0x7373000000737373, 0xaaaa000000aaaaaa, + 0xf1f1000000f1f1f1, 0xdddd000000dddddd, 0x5959000000595959, + 0x1414000000141414, 0x6c6c0000006c6c6c, 0x9292000000929292, + 0x5454000000545454, 0xd0d0000000d0d0d0, 0x7878000000787878, + 0x7070000000707070, 0xe3e3000000e3e3e3, 0x4949000000494949, + 0x8080000000808080, 0x5050000000505050, 0xa7a7000000a7a7a7, + 0xf6f6000000f6f6f6, 0x7777000000777777, 0x9393000000939393, + 0x8686000000868686, 0x8383000000838383, 0x2a2a0000002a2a2a, + 0xc7c7000000c7c7c7, 0x5b5b0000005b5b5b, 0xe9e9000000e9e9e9, + 0xeeee000000eeeeee, 0x8f8f0000008f8f8f, 0x0101000000010101, + 0x3d3d0000003d3d3d, +}; + +const u64 camellia_sp03303033[256] = { + 0x0038380038003838, 0x0041410041004141, 0x0016160016001616, + 0x0076760076007676, 0x00d9d900d900d9d9, 0x0093930093009393, + 0x0060600060006060, 0x00f2f200f200f2f2, 0x0072720072007272, + 0x00c2c200c200c2c2, 0x00abab00ab00abab, 0x009a9a009a009a9a, + 0x0075750075007575, 0x0006060006000606, 0x0057570057005757, + 0x00a0a000a000a0a0, 0x0091910091009191, 0x00f7f700f700f7f7, + 0x00b5b500b500b5b5, 0x00c9c900c900c9c9, 0x00a2a200a200a2a2, + 0x008c8c008c008c8c, 0x00d2d200d200d2d2, 0x0090900090009090, + 0x00f6f600f600f6f6, 0x0007070007000707, 0x00a7a700a700a7a7, + 0x0027270027002727, 0x008e8e008e008e8e, 0x00b2b200b200b2b2, + 0x0049490049004949, 0x00dede00de00dede, 0x0043430043004343, + 0x005c5c005c005c5c, 0x00d7d700d700d7d7, 0x00c7c700c700c7c7, + 0x003e3e003e003e3e, 0x00f5f500f500f5f5, 0x008f8f008f008f8f, + 0x0067670067006767, 0x001f1f001f001f1f, 0x0018180018001818, + 0x006e6e006e006e6e, 0x00afaf00af00afaf, 0x002f2f002f002f2f, + 0x00e2e200e200e2e2, 0x0085850085008585, 0x000d0d000d000d0d, + 0x0053530053005353, 0x00f0f000f000f0f0, 0x009c9c009c009c9c, + 0x0065650065006565, 0x00eaea00ea00eaea, 0x00a3a300a300a3a3, + 0x00aeae00ae00aeae, 0x009e9e009e009e9e, 0x00ecec00ec00ecec, + 0x0080800080008080, 0x002d2d002d002d2d, 0x006b6b006b006b6b, + 0x00a8a800a800a8a8, 0x002b2b002b002b2b, 0x0036360036003636, + 0x00a6a600a600a6a6, 0x00c5c500c500c5c5, 0x0086860086008686, + 0x004d4d004d004d4d, 0x0033330033003333, 0x00fdfd00fd00fdfd, + 0x0066660066006666, 0x0058580058005858, 0x0096960096009696, + 0x003a3a003a003a3a, 0x0009090009000909, 0x0095950095009595, + 0x0010100010001010, 0x0078780078007878, 0x00d8d800d800d8d8, + 0x0042420042004242, 0x00cccc00cc00cccc, 0x00efef00ef00efef, + 0x0026260026002626, 0x00e5e500e500e5e5, 0x0061610061006161, + 0x001a1a001a001a1a, 0x003f3f003f003f3f, 0x003b3b003b003b3b, + 0x0082820082008282, 0x00b6b600b600b6b6, 0x00dbdb00db00dbdb, + 0x00d4d400d400d4d4, 0x0098980098009898, 0x00e8e800e800e8e8, + 0x008b8b008b008b8b, 0x0002020002000202, 0x00ebeb00eb00ebeb, + 0x000a0a000a000a0a, 0x002c2c002c002c2c, 0x001d1d001d001d1d, + 0x00b0b000b000b0b0, 0x006f6f006f006f6f, 0x008d8d008d008d8d, + 0x0088880088008888, 0x000e0e000e000e0e, 0x0019190019001919, + 0x0087870087008787, 0x004e4e004e004e4e, 0x000b0b000b000b0b, + 0x00a9a900a900a9a9, 0x000c0c000c000c0c, 0x0079790079007979, + 0x0011110011001111, 0x007f7f007f007f7f, 0x0022220022002222, + 0x00e7e700e700e7e7, 0x0059590059005959, 0x00e1e100e100e1e1, + 0x00dada00da00dada, 0x003d3d003d003d3d, 0x00c8c800c800c8c8, + 0x0012120012001212, 0x0004040004000404, 0x0074740074007474, + 0x0054540054005454, 0x0030300030003030, 0x007e7e007e007e7e, + 0x00b4b400b400b4b4, 0x0028280028002828, 0x0055550055005555, + 0x0068680068006868, 0x0050500050005050, 0x00bebe00be00bebe, + 0x00d0d000d000d0d0, 0x00c4c400c400c4c4, 0x0031310031003131, + 0x00cbcb00cb00cbcb, 0x002a2a002a002a2a, 0x00adad00ad00adad, + 0x000f0f000f000f0f, 0x00caca00ca00caca, 0x0070700070007070, + 0x00ffff00ff00ffff, 0x0032320032003232, 0x0069690069006969, + 0x0008080008000808, 0x0062620062006262, 0x0000000000000000, + 0x0024240024002424, 0x00d1d100d100d1d1, 0x00fbfb00fb00fbfb, + 0x00baba00ba00baba, 0x00eded00ed00eded, 0x0045450045004545, + 0x0081810081008181, 0x0073730073007373, 0x006d6d006d006d6d, + 0x0084840084008484, 0x009f9f009f009f9f, 0x00eeee00ee00eeee, + 0x004a4a004a004a4a, 0x00c3c300c300c3c3, 0x002e2e002e002e2e, + 0x00c1c100c100c1c1, 0x0001010001000101, 0x00e6e600e600e6e6, + 0x0025250025002525, 0x0048480048004848, 0x0099990099009999, + 0x00b9b900b900b9b9, 0x00b3b300b300b3b3, 0x007b7b007b007b7b, + 0x00f9f900f900f9f9, 0x00cece00ce00cece, 0x00bfbf00bf00bfbf, + 0x00dfdf00df00dfdf, 0x0071710071007171, 0x0029290029002929, + 0x00cdcd00cd00cdcd, 0x006c6c006c006c6c, 0x0013130013001313, + 0x0064640064006464, 0x009b9b009b009b9b, 0x0063630063006363, + 0x009d9d009d009d9d, 0x00c0c000c000c0c0, 0x004b4b004b004b4b, + 0x00b7b700b700b7b7, 0x00a5a500a500a5a5, 0x0089890089008989, + 0x005f5f005f005f5f, 0x00b1b100b100b1b1, 0x0017170017001717, + 0x00f4f400f400f4f4, 0x00bcbc00bc00bcbc, 0x00d3d300d300d3d3, + 0x0046460046004646, 0x00cfcf00cf00cfcf, 0x0037370037003737, + 0x005e5e005e005e5e, 0x0047470047004747, 0x0094940094009494, + 0x00fafa00fa00fafa, 0x00fcfc00fc00fcfc, 0x005b5b005b005b5b, + 0x0097970097009797, 0x00fefe00fe00fefe, 0x005a5a005a005a5a, + 0x00acac00ac00acac, 0x003c3c003c003c3c, 0x004c4c004c004c4c, + 0x0003030003000303, 0x0035350035003535, 0x00f3f300f300f3f3, + 0x0023230023002323, 0x00b8b800b800b8b8, 0x005d5d005d005d5d, + 0x006a6a006a006a6a, 0x0092920092009292, 0x00d5d500d500d5d5, + 0x0021210021002121, 0x0044440044004444, 0x0051510051005151, + 0x00c6c600c600c6c6, 0x007d7d007d007d7d, 0x0039390039003939, + 0x0083830083008383, 0x00dcdc00dc00dcdc, 0x00aaaa00aa00aaaa, + 0x007c7c007c007c7c, 0x0077770077007777, 0x0056560056005656, + 0x0005050005000505, 0x001b1b001b001b1b, 0x00a4a400a400a4a4, + 0x0015150015001515, 0x0034340034003434, 0x001e1e001e001e1e, + 0x001c1c001c001c1c, 0x00f8f800f800f8f8, 0x0052520052005252, + 0x0020200020002020, 0x0014140014001414, 0x00e9e900e900e9e9, + 0x00bdbd00bd00bdbd, 0x00dddd00dd00dddd, 0x00e4e400e400e4e4, + 0x00a1a100a100a1a1, 0x00e0e000e000e0e0, 0x008a8a008a008a8a, + 0x00f1f100f100f1f1, 0x00d6d600d600d6d6, 0x007a7a007a007a7a, + 0x00bbbb00bb00bbbb, 0x00e3e300e300e3e3, 0x0040400040004040, + 0x004f4f004f004f4f, +}; + +const u64 camellia_sp00444404[256] = { + 0x0000707070700070, 0x00002c2c2c2c002c, 0x0000b3b3b3b300b3, + 0x0000c0c0c0c000c0, 0x0000e4e4e4e400e4, 0x0000575757570057, + 0x0000eaeaeaea00ea, 0x0000aeaeaeae00ae, 0x0000232323230023, + 0x00006b6b6b6b006b, 0x0000454545450045, 0x0000a5a5a5a500a5, + 0x0000edededed00ed, 0x00004f4f4f4f004f, 0x00001d1d1d1d001d, + 0x0000929292920092, 0x0000868686860086, 0x0000afafafaf00af, + 0x00007c7c7c7c007c, 0x00001f1f1f1f001f, 0x00003e3e3e3e003e, + 0x0000dcdcdcdc00dc, 0x00005e5e5e5e005e, 0x00000b0b0b0b000b, + 0x0000a6a6a6a600a6, 0x0000393939390039, 0x0000d5d5d5d500d5, + 0x00005d5d5d5d005d, 0x0000d9d9d9d900d9, 0x00005a5a5a5a005a, + 0x0000515151510051, 0x00006c6c6c6c006c, 0x00008b8b8b8b008b, + 0x00009a9a9a9a009a, 0x0000fbfbfbfb00fb, 0x0000b0b0b0b000b0, + 0x0000747474740074, 0x00002b2b2b2b002b, 0x0000f0f0f0f000f0, + 0x0000848484840084, 0x0000dfdfdfdf00df, 0x0000cbcbcbcb00cb, + 0x0000343434340034, 0x0000767676760076, 0x00006d6d6d6d006d, + 0x0000a9a9a9a900a9, 0x0000d1d1d1d100d1, 0x0000040404040004, + 0x0000141414140014, 0x00003a3a3a3a003a, 0x0000dededede00de, + 0x0000111111110011, 0x0000323232320032, 0x00009c9c9c9c009c, + 0x0000535353530053, 0x0000f2f2f2f200f2, 0x0000fefefefe00fe, + 0x0000cfcfcfcf00cf, 0x0000c3c3c3c300c3, 0x00007a7a7a7a007a, + 0x0000242424240024, 0x0000e8e8e8e800e8, 0x0000606060600060, + 0x0000696969690069, 0x0000aaaaaaaa00aa, 0x0000a0a0a0a000a0, + 0x0000a1a1a1a100a1, 0x0000626262620062, 0x0000545454540054, + 0x00001e1e1e1e001e, 0x0000e0e0e0e000e0, 0x0000646464640064, + 0x0000101010100010, 0x0000000000000000, 0x0000a3a3a3a300a3, + 0x0000757575750075, 0x00008a8a8a8a008a, 0x0000e6e6e6e600e6, + 0x0000090909090009, 0x0000dddddddd00dd, 0x0000878787870087, + 0x0000838383830083, 0x0000cdcdcdcd00cd, 0x0000909090900090, + 0x0000737373730073, 0x0000f6f6f6f600f6, 0x00009d9d9d9d009d, + 0x0000bfbfbfbf00bf, 0x0000525252520052, 0x0000d8d8d8d800d8, + 0x0000c8c8c8c800c8, 0x0000c6c6c6c600c6, 0x0000818181810081, + 0x00006f6f6f6f006f, 0x0000131313130013, 0x0000636363630063, + 0x0000e9e9e9e900e9, 0x0000a7a7a7a700a7, 0x00009f9f9f9f009f, + 0x0000bcbcbcbc00bc, 0x0000292929290029, 0x0000f9f9f9f900f9, + 0x00002f2f2f2f002f, 0x0000b4b4b4b400b4, 0x0000787878780078, + 0x0000060606060006, 0x0000e7e7e7e700e7, 0x0000717171710071, + 0x0000d4d4d4d400d4, 0x0000abababab00ab, 0x0000888888880088, + 0x00008d8d8d8d008d, 0x0000727272720072, 0x0000b9b9b9b900b9, + 0x0000f8f8f8f800f8, 0x0000acacacac00ac, 0x0000363636360036, + 0x00002a2a2a2a002a, 0x00003c3c3c3c003c, 0x0000f1f1f1f100f1, + 0x0000404040400040, 0x0000d3d3d3d300d3, 0x0000bbbbbbbb00bb, + 0x0000434343430043, 0x0000151515150015, 0x0000adadadad00ad, + 0x0000777777770077, 0x0000808080800080, 0x0000828282820082, + 0x0000ecececec00ec, 0x0000272727270027, 0x0000e5e5e5e500e5, + 0x0000858585850085, 0x0000353535350035, 0x00000c0c0c0c000c, + 0x0000414141410041, 0x0000efefefef00ef, 0x0000939393930093, + 0x0000191919190019, 0x0000212121210021, 0x00000e0e0e0e000e, + 0x00004e4e4e4e004e, 0x0000656565650065, 0x0000bdbdbdbd00bd, + 0x0000b8b8b8b800b8, 0x00008f8f8f8f008f, 0x0000ebebebeb00eb, + 0x0000cececece00ce, 0x0000303030300030, 0x00005f5f5f5f005f, + 0x0000c5c5c5c500c5, 0x00001a1a1a1a001a, 0x0000e1e1e1e100e1, + 0x0000cacacaca00ca, 0x0000474747470047, 0x00003d3d3d3d003d, + 0x0000010101010001, 0x0000d6d6d6d600d6, 0x0000565656560056, + 0x00004d4d4d4d004d, 0x00000d0d0d0d000d, 0x0000666666660066, + 0x0000cccccccc00cc, 0x00002d2d2d2d002d, 0x0000121212120012, + 0x0000202020200020, 0x0000b1b1b1b100b1, 0x0000999999990099, + 0x00004c4c4c4c004c, 0x0000c2c2c2c200c2, 0x00007e7e7e7e007e, + 0x0000050505050005, 0x0000b7b7b7b700b7, 0x0000313131310031, + 0x0000171717170017, 0x0000d7d7d7d700d7, 0x0000585858580058, + 0x0000616161610061, 0x00001b1b1b1b001b, 0x00001c1c1c1c001c, + 0x00000f0f0f0f000f, 0x0000161616160016, 0x0000181818180018, + 0x0000222222220022, 0x0000444444440044, 0x0000b2b2b2b200b2, + 0x0000b5b5b5b500b5, 0x0000919191910091, 0x0000080808080008, + 0x0000a8a8a8a800a8, 0x0000fcfcfcfc00fc, 0x0000505050500050, + 0x0000d0d0d0d000d0, 0x00007d7d7d7d007d, 0x0000898989890089, + 0x0000979797970097, 0x00005b5b5b5b005b, 0x0000959595950095, + 0x0000ffffffff00ff, 0x0000d2d2d2d200d2, 0x0000c4c4c4c400c4, + 0x0000484848480048, 0x0000f7f7f7f700f7, 0x0000dbdbdbdb00db, + 0x0000030303030003, 0x0000dadadada00da, 0x00003f3f3f3f003f, + 0x0000949494940094, 0x00005c5c5c5c005c, 0x0000020202020002, + 0x00004a4a4a4a004a, 0x0000333333330033, 0x0000676767670067, + 0x0000f3f3f3f300f3, 0x00007f7f7f7f007f, 0x0000e2e2e2e200e2, + 0x00009b9b9b9b009b, 0x0000262626260026, 0x0000373737370037, + 0x00003b3b3b3b003b, 0x0000969696960096, 0x00004b4b4b4b004b, + 0x0000bebebebe00be, 0x00002e2e2e2e002e, 0x0000797979790079, + 0x00008c8c8c8c008c, 0x00006e6e6e6e006e, 0x00008e8e8e8e008e, + 0x0000f5f5f5f500f5, 0x0000b6b6b6b600b6, 0x0000fdfdfdfd00fd, + 0x0000595959590059, 0x0000989898980098, 0x00006a6a6a6a006a, + 0x0000464646460046, 0x0000babababa00ba, 0x0000252525250025, + 0x0000424242420042, 0x0000a2a2a2a200a2, 0x0000fafafafa00fa, + 0x0000070707070007, 0x0000555555550055, 0x0000eeeeeeee00ee, + 0x00000a0a0a0a000a, 0x0000494949490049, 0x0000686868680068, + 0x0000383838380038, 0x0000a4a4a4a400a4, 0x0000282828280028, + 0x00007b7b7b7b007b, 0x0000c9c9c9c900c9, 0x0000c1c1c1c100c1, + 0x0000e3e3e3e300e3, 0x0000f4f4f4f400f4, 0x0000c7c7c7c700c7, + 0x00009e9e9e9e009e, +}; + +const u64 camellia_sp02220222[256] = { + 0x00e0e0e000e0e0e0, 0x0005050500050505, 0x0058585800585858, + 0x00d9d9d900d9d9d9, 0x0067676700676767, 0x004e4e4e004e4e4e, + 0x0081818100818181, 0x00cbcbcb00cbcbcb, 0x00c9c9c900c9c9c9, + 0x000b0b0b000b0b0b, 0x00aeaeae00aeaeae, 0x006a6a6a006a6a6a, + 0x00d5d5d500d5d5d5, 0x0018181800181818, 0x005d5d5d005d5d5d, + 0x0082828200828282, 0x0046464600464646, 0x00dfdfdf00dfdfdf, + 0x00d6d6d600d6d6d6, 0x0027272700272727, 0x008a8a8a008a8a8a, + 0x0032323200323232, 0x004b4b4b004b4b4b, 0x0042424200424242, + 0x00dbdbdb00dbdbdb, 0x001c1c1c001c1c1c, 0x009e9e9e009e9e9e, + 0x009c9c9c009c9c9c, 0x003a3a3a003a3a3a, 0x00cacaca00cacaca, + 0x0025252500252525, 0x007b7b7b007b7b7b, 0x000d0d0d000d0d0d, + 0x0071717100717171, 0x005f5f5f005f5f5f, 0x001f1f1f001f1f1f, + 0x00f8f8f800f8f8f8, 0x00d7d7d700d7d7d7, 0x003e3e3e003e3e3e, + 0x009d9d9d009d9d9d, 0x007c7c7c007c7c7c, 0x0060606000606060, + 0x00b9b9b900b9b9b9, 0x00bebebe00bebebe, 0x00bcbcbc00bcbcbc, + 0x008b8b8b008b8b8b, 0x0016161600161616, 0x0034343400343434, + 0x004d4d4d004d4d4d, 0x00c3c3c300c3c3c3, 0x0072727200727272, + 0x0095959500959595, 0x00ababab00ababab, 0x008e8e8e008e8e8e, + 0x00bababa00bababa, 0x007a7a7a007a7a7a, 0x00b3b3b300b3b3b3, + 0x0002020200020202, 0x00b4b4b400b4b4b4, 0x00adadad00adadad, + 0x00a2a2a200a2a2a2, 0x00acacac00acacac, 0x00d8d8d800d8d8d8, + 0x009a9a9a009a9a9a, 0x0017171700171717, 0x001a1a1a001a1a1a, + 0x0035353500353535, 0x00cccccc00cccccc, 0x00f7f7f700f7f7f7, + 0x0099999900999999, 0x0061616100616161, 0x005a5a5a005a5a5a, + 0x00e8e8e800e8e8e8, 0x0024242400242424, 0x0056565600565656, + 0x0040404000404040, 0x00e1e1e100e1e1e1, 0x0063636300636363, + 0x0009090900090909, 0x0033333300333333, 0x00bfbfbf00bfbfbf, + 0x0098989800989898, 0x0097979700979797, 0x0085858500858585, + 0x0068686800686868, 0x00fcfcfc00fcfcfc, 0x00ececec00ececec, + 0x000a0a0a000a0a0a, 0x00dadada00dadada, 0x006f6f6f006f6f6f, + 0x0053535300535353, 0x0062626200626262, 0x00a3a3a300a3a3a3, + 0x002e2e2e002e2e2e, 0x0008080800080808, 0x00afafaf00afafaf, + 0x0028282800282828, 0x00b0b0b000b0b0b0, 0x0074747400747474, + 0x00c2c2c200c2c2c2, 0x00bdbdbd00bdbdbd, 0x0036363600363636, + 0x0022222200222222, 0x0038383800383838, 0x0064646400646464, + 0x001e1e1e001e1e1e, 0x0039393900393939, 0x002c2c2c002c2c2c, + 0x00a6a6a600a6a6a6, 0x0030303000303030, 0x00e5e5e500e5e5e5, + 0x0044444400444444, 0x00fdfdfd00fdfdfd, 0x0088888800888888, + 0x009f9f9f009f9f9f, 0x0065656500656565, 0x0087878700878787, + 0x006b6b6b006b6b6b, 0x00f4f4f400f4f4f4, 0x0023232300232323, + 0x0048484800484848, 0x0010101000101010, 0x00d1d1d100d1d1d1, + 0x0051515100515151, 0x00c0c0c000c0c0c0, 0x00f9f9f900f9f9f9, + 0x00d2d2d200d2d2d2, 0x00a0a0a000a0a0a0, 0x0055555500555555, + 0x00a1a1a100a1a1a1, 0x0041414100414141, 0x00fafafa00fafafa, + 0x0043434300434343, 0x0013131300131313, 0x00c4c4c400c4c4c4, + 0x002f2f2f002f2f2f, 0x00a8a8a800a8a8a8, 0x00b6b6b600b6b6b6, + 0x003c3c3c003c3c3c, 0x002b2b2b002b2b2b, 0x00c1c1c100c1c1c1, + 0x00ffffff00ffffff, 0x00c8c8c800c8c8c8, 0x00a5a5a500a5a5a5, + 0x0020202000202020, 0x0089898900898989, 0x0000000000000000, + 0x0090909000909090, 0x0047474700474747, 0x00efefef00efefef, + 0x00eaeaea00eaeaea, 0x00b7b7b700b7b7b7, 0x0015151500151515, + 0x0006060600060606, 0x00cdcdcd00cdcdcd, 0x00b5b5b500b5b5b5, + 0x0012121200121212, 0x007e7e7e007e7e7e, 0x00bbbbbb00bbbbbb, + 0x0029292900292929, 0x000f0f0f000f0f0f, 0x00b8b8b800b8b8b8, + 0x0007070700070707, 0x0004040400040404, 0x009b9b9b009b9b9b, + 0x0094949400949494, 0x0021212100212121, 0x0066666600666666, + 0x00e6e6e600e6e6e6, 0x00cecece00cecece, 0x00ededed00ededed, + 0x00e7e7e700e7e7e7, 0x003b3b3b003b3b3b, 0x00fefefe00fefefe, + 0x007f7f7f007f7f7f, 0x00c5c5c500c5c5c5, 0x00a4a4a400a4a4a4, + 0x0037373700373737, 0x00b1b1b100b1b1b1, 0x004c4c4c004c4c4c, + 0x0091919100919191, 0x006e6e6e006e6e6e, 0x008d8d8d008d8d8d, + 0x0076767600767676, 0x0003030300030303, 0x002d2d2d002d2d2d, + 0x00dedede00dedede, 0x0096969600969696, 0x0026262600262626, + 0x007d7d7d007d7d7d, 0x00c6c6c600c6c6c6, 0x005c5c5c005c5c5c, + 0x00d3d3d300d3d3d3, 0x00f2f2f200f2f2f2, 0x004f4f4f004f4f4f, + 0x0019191900191919, 0x003f3f3f003f3f3f, 0x00dcdcdc00dcdcdc, + 0x0079797900797979, 0x001d1d1d001d1d1d, 0x0052525200525252, + 0x00ebebeb00ebebeb, 0x00f3f3f300f3f3f3, 0x006d6d6d006d6d6d, + 0x005e5e5e005e5e5e, 0x00fbfbfb00fbfbfb, 0x0069696900696969, + 0x00b2b2b200b2b2b2, 0x00f0f0f000f0f0f0, 0x0031313100313131, + 0x000c0c0c000c0c0c, 0x00d4d4d400d4d4d4, 0x00cfcfcf00cfcfcf, + 0x008c8c8c008c8c8c, 0x00e2e2e200e2e2e2, 0x0075757500757575, + 0x00a9a9a900a9a9a9, 0x004a4a4a004a4a4a, 0x0057575700575757, + 0x0084848400848484, 0x0011111100111111, 0x0045454500454545, + 0x001b1b1b001b1b1b, 0x00f5f5f500f5f5f5, 0x00e4e4e400e4e4e4, + 0x000e0e0e000e0e0e, 0x0073737300737373, 0x00aaaaaa00aaaaaa, + 0x00f1f1f100f1f1f1, 0x00dddddd00dddddd, 0x0059595900595959, + 0x0014141400141414, 0x006c6c6c006c6c6c, 0x0092929200929292, + 0x0054545400545454, 0x00d0d0d000d0d0d0, 0x0078787800787878, + 0x0070707000707070, 0x00e3e3e300e3e3e3, 0x0049494900494949, + 0x0080808000808080, 0x0050505000505050, 0x00a7a7a700a7a7a7, + 0x00f6f6f600f6f6f6, 0x0077777700777777, 0x0093939300939393, + 0x0086868600868686, 0x0083838300838383, 0x002a2a2a002a2a2a, + 0x00c7c7c700c7c7c7, 0x005b5b5b005b5b5b, 0x00e9e9e900e9e9e9, + 0x00eeeeee00eeeeee, 0x008f8f8f008f8f8f, 0x0001010100010101, + 0x003d3d3d003d3d3d, +}; + +const u64 camellia_sp30333033[256] = { + 0x3800383838003838, 0x4100414141004141, 0x1600161616001616, + 0x7600767676007676, 0xd900d9d9d900d9d9, 0x9300939393009393, + 0x6000606060006060, 0xf200f2f2f200f2f2, 0x7200727272007272, + 0xc200c2c2c200c2c2, 0xab00ababab00abab, 0x9a009a9a9a009a9a, + 0x7500757575007575, 0x0600060606000606, 0x5700575757005757, + 0xa000a0a0a000a0a0, 0x9100919191009191, 0xf700f7f7f700f7f7, + 0xb500b5b5b500b5b5, 0xc900c9c9c900c9c9, 0xa200a2a2a200a2a2, + 0x8c008c8c8c008c8c, 0xd200d2d2d200d2d2, 0x9000909090009090, + 0xf600f6f6f600f6f6, 0x0700070707000707, 0xa700a7a7a700a7a7, + 0x2700272727002727, 0x8e008e8e8e008e8e, 0xb200b2b2b200b2b2, + 0x4900494949004949, 0xde00dedede00dede, 0x4300434343004343, + 0x5c005c5c5c005c5c, 0xd700d7d7d700d7d7, 0xc700c7c7c700c7c7, + 0x3e003e3e3e003e3e, 0xf500f5f5f500f5f5, 0x8f008f8f8f008f8f, + 0x6700676767006767, 0x1f001f1f1f001f1f, 0x1800181818001818, + 0x6e006e6e6e006e6e, 0xaf00afafaf00afaf, 0x2f002f2f2f002f2f, + 0xe200e2e2e200e2e2, 0x8500858585008585, 0x0d000d0d0d000d0d, + 0x5300535353005353, 0xf000f0f0f000f0f0, 0x9c009c9c9c009c9c, + 0x6500656565006565, 0xea00eaeaea00eaea, 0xa300a3a3a300a3a3, + 0xae00aeaeae00aeae, 0x9e009e9e9e009e9e, 0xec00ececec00ecec, + 0x8000808080008080, 0x2d002d2d2d002d2d, 0x6b006b6b6b006b6b, + 0xa800a8a8a800a8a8, 0x2b002b2b2b002b2b, 0x3600363636003636, + 0xa600a6a6a600a6a6, 0xc500c5c5c500c5c5, 0x8600868686008686, + 0x4d004d4d4d004d4d, 0x3300333333003333, 0xfd00fdfdfd00fdfd, + 0x6600666666006666, 0x5800585858005858, 0x9600969696009696, + 0x3a003a3a3a003a3a, 0x0900090909000909, 0x9500959595009595, + 0x1000101010001010, 0x7800787878007878, 0xd800d8d8d800d8d8, + 0x4200424242004242, 0xcc00cccccc00cccc, 0xef00efefef00efef, + 0x2600262626002626, 0xe500e5e5e500e5e5, 0x6100616161006161, + 0x1a001a1a1a001a1a, 0x3f003f3f3f003f3f, 0x3b003b3b3b003b3b, + 0x8200828282008282, 0xb600b6b6b600b6b6, 0xdb00dbdbdb00dbdb, + 0xd400d4d4d400d4d4, 0x9800989898009898, 0xe800e8e8e800e8e8, + 0x8b008b8b8b008b8b, 0x0200020202000202, 0xeb00ebebeb00ebeb, + 0x0a000a0a0a000a0a, 0x2c002c2c2c002c2c, 0x1d001d1d1d001d1d, + 0xb000b0b0b000b0b0, 0x6f006f6f6f006f6f, 0x8d008d8d8d008d8d, + 0x8800888888008888, 0x0e000e0e0e000e0e, 0x1900191919001919, + 0x8700878787008787, 0x4e004e4e4e004e4e, 0x0b000b0b0b000b0b, + 0xa900a9a9a900a9a9, 0x0c000c0c0c000c0c, 0x7900797979007979, + 0x1100111111001111, 0x7f007f7f7f007f7f, 0x2200222222002222, + 0xe700e7e7e700e7e7, 0x5900595959005959, 0xe100e1e1e100e1e1, + 0xda00dadada00dada, 0x3d003d3d3d003d3d, 0xc800c8c8c800c8c8, + 0x1200121212001212, 0x0400040404000404, 0x7400747474007474, + 0x5400545454005454, 0x3000303030003030, 0x7e007e7e7e007e7e, + 0xb400b4b4b400b4b4, 0x2800282828002828, 0x5500555555005555, + 0x6800686868006868, 0x5000505050005050, 0xbe00bebebe00bebe, + 0xd000d0d0d000d0d0, 0xc400c4c4c400c4c4, 0x3100313131003131, + 0xcb00cbcbcb00cbcb, 0x2a002a2a2a002a2a, 0xad00adadad00adad, + 0x0f000f0f0f000f0f, 0xca00cacaca00caca, 0x7000707070007070, + 0xff00ffffff00ffff, 0x3200323232003232, 0x6900696969006969, + 0x0800080808000808, 0x6200626262006262, 0x0000000000000000, + 0x2400242424002424, 0xd100d1d1d100d1d1, 0xfb00fbfbfb00fbfb, + 0xba00bababa00baba, 0xed00ededed00eded, 0x4500454545004545, + 0x8100818181008181, 0x7300737373007373, 0x6d006d6d6d006d6d, + 0x8400848484008484, 0x9f009f9f9f009f9f, 0xee00eeeeee00eeee, + 0x4a004a4a4a004a4a, 0xc300c3c3c300c3c3, 0x2e002e2e2e002e2e, + 0xc100c1c1c100c1c1, 0x0100010101000101, 0xe600e6e6e600e6e6, + 0x2500252525002525, 0x4800484848004848, 0x9900999999009999, + 0xb900b9b9b900b9b9, 0xb300b3b3b300b3b3, 0x7b007b7b7b007b7b, + 0xf900f9f9f900f9f9, 0xce00cecece00cece, 0xbf00bfbfbf00bfbf, + 0xdf00dfdfdf00dfdf, 0x7100717171007171, 0x2900292929002929, + 0xcd00cdcdcd00cdcd, 0x6c006c6c6c006c6c, 0x1300131313001313, + 0x6400646464006464, 0x9b009b9b9b009b9b, 0x6300636363006363, + 0x9d009d9d9d009d9d, 0xc000c0c0c000c0c0, 0x4b004b4b4b004b4b, + 0xb700b7b7b700b7b7, 0xa500a5a5a500a5a5, 0x8900898989008989, + 0x5f005f5f5f005f5f, 0xb100b1b1b100b1b1, 0x1700171717001717, + 0xf400f4f4f400f4f4, 0xbc00bcbcbc00bcbc, 0xd300d3d3d300d3d3, + 0x4600464646004646, 0xcf00cfcfcf00cfcf, 0x3700373737003737, + 0x5e005e5e5e005e5e, 0x4700474747004747, 0x9400949494009494, + 0xfa00fafafa00fafa, 0xfc00fcfcfc00fcfc, 0x5b005b5b5b005b5b, + 0x9700979797009797, 0xfe00fefefe00fefe, 0x5a005a5a5a005a5a, + 0xac00acacac00acac, 0x3c003c3c3c003c3c, 0x4c004c4c4c004c4c, + 0x0300030303000303, 0x3500353535003535, 0xf300f3f3f300f3f3, + 0x2300232323002323, 0xb800b8b8b800b8b8, 0x5d005d5d5d005d5d, + 0x6a006a6a6a006a6a, 0x9200929292009292, 0xd500d5d5d500d5d5, + 0x2100212121002121, 0x4400444444004444, 0x5100515151005151, + 0xc600c6c6c600c6c6, 0x7d007d7d7d007d7d, 0x3900393939003939, + 0x8300838383008383, 0xdc00dcdcdc00dcdc, 0xaa00aaaaaa00aaaa, + 0x7c007c7c7c007c7c, 0x7700777777007777, 0x5600565656005656, + 0x0500050505000505, 0x1b001b1b1b001b1b, 0xa400a4a4a400a4a4, + 0x1500151515001515, 0x3400343434003434, 0x1e001e1e1e001e1e, + 0x1c001c1c1c001c1c, 0xf800f8f8f800f8f8, 0x5200525252005252, + 0x2000202020002020, 0x1400141414001414, 0xe900e9e9e900e9e9, + 0xbd00bdbdbd00bdbd, 0xdd00dddddd00dddd, 0xe400e4e4e400e4e4, + 0xa100a1a1a100a1a1, 0xe000e0e0e000e0e0, 0x8a008a8a8a008a8a, + 0xf100f1f1f100f1f1, 0xd600d6d6d600d6d6, 0x7a007a7a7a007a7a, + 0xbb00bbbbbb00bbbb, 0xe300e3e3e300e3e3, 0x4000404040004040, + 0x4f004f4f4f004f4f, +}; + +const u64 camellia_sp44044404[256] = { + 0x7070007070700070, 0x2c2c002c2c2c002c, 0xb3b300b3b3b300b3, + 0xc0c000c0c0c000c0, 0xe4e400e4e4e400e4, 0x5757005757570057, + 0xeaea00eaeaea00ea, 0xaeae00aeaeae00ae, 0x2323002323230023, + 0x6b6b006b6b6b006b, 0x4545004545450045, 0xa5a500a5a5a500a5, + 0xeded00ededed00ed, 0x4f4f004f4f4f004f, 0x1d1d001d1d1d001d, + 0x9292009292920092, 0x8686008686860086, 0xafaf00afafaf00af, + 0x7c7c007c7c7c007c, 0x1f1f001f1f1f001f, 0x3e3e003e3e3e003e, + 0xdcdc00dcdcdc00dc, 0x5e5e005e5e5e005e, 0x0b0b000b0b0b000b, + 0xa6a600a6a6a600a6, 0x3939003939390039, 0xd5d500d5d5d500d5, + 0x5d5d005d5d5d005d, 0xd9d900d9d9d900d9, 0x5a5a005a5a5a005a, + 0x5151005151510051, 0x6c6c006c6c6c006c, 0x8b8b008b8b8b008b, + 0x9a9a009a9a9a009a, 0xfbfb00fbfbfb00fb, 0xb0b000b0b0b000b0, + 0x7474007474740074, 0x2b2b002b2b2b002b, 0xf0f000f0f0f000f0, + 0x8484008484840084, 0xdfdf00dfdfdf00df, 0xcbcb00cbcbcb00cb, + 0x3434003434340034, 0x7676007676760076, 0x6d6d006d6d6d006d, + 0xa9a900a9a9a900a9, 0xd1d100d1d1d100d1, 0x0404000404040004, + 0x1414001414140014, 0x3a3a003a3a3a003a, 0xdede00dedede00de, + 0x1111001111110011, 0x3232003232320032, 0x9c9c009c9c9c009c, + 0x5353005353530053, 0xf2f200f2f2f200f2, 0xfefe00fefefe00fe, + 0xcfcf00cfcfcf00cf, 0xc3c300c3c3c300c3, 0x7a7a007a7a7a007a, + 0x2424002424240024, 0xe8e800e8e8e800e8, 0x6060006060600060, + 0x6969006969690069, 0xaaaa00aaaaaa00aa, 0xa0a000a0a0a000a0, + 0xa1a100a1a1a100a1, 0x6262006262620062, 0x5454005454540054, + 0x1e1e001e1e1e001e, 0xe0e000e0e0e000e0, 0x6464006464640064, + 0x1010001010100010, 0x0000000000000000, 0xa3a300a3a3a300a3, + 0x7575007575750075, 0x8a8a008a8a8a008a, 0xe6e600e6e6e600e6, + 0x0909000909090009, 0xdddd00dddddd00dd, 0x8787008787870087, + 0x8383008383830083, 0xcdcd00cdcdcd00cd, 0x9090009090900090, + 0x7373007373730073, 0xf6f600f6f6f600f6, 0x9d9d009d9d9d009d, + 0xbfbf00bfbfbf00bf, 0x5252005252520052, 0xd8d800d8d8d800d8, + 0xc8c800c8c8c800c8, 0xc6c600c6c6c600c6, 0x8181008181810081, + 0x6f6f006f6f6f006f, 0x1313001313130013, 0x6363006363630063, + 0xe9e900e9e9e900e9, 0xa7a700a7a7a700a7, 0x9f9f009f9f9f009f, + 0xbcbc00bcbcbc00bc, 0x2929002929290029, 0xf9f900f9f9f900f9, + 0x2f2f002f2f2f002f, 0xb4b400b4b4b400b4, 0x7878007878780078, + 0x0606000606060006, 0xe7e700e7e7e700e7, 0x7171007171710071, + 0xd4d400d4d4d400d4, 0xabab00ababab00ab, 0x8888008888880088, + 0x8d8d008d8d8d008d, 0x7272007272720072, 0xb9b900b9b9b900b9, + 0xf8f800f8f8f800f8, 0xacac00acacac00ac, 0x3636003636360036, + 0x2a2a002a2a2a002a, 0x3c3c003c3c3c003c, 0xf1f100f1f1f100f1, + 0x4040004040400040, 0xd3d300d3d3d300d3, 0xbbbb00bbbbbb00bb, + 0x4343004343430043, 0x1515001515150015, 0xadad00adadad00ad, + 0x7777007777770077, 0x8080008080800080, 0x8282008282820082, + 0xecec00ececec00ec, 0x2727002727270027, 0xe5e500e5e5e500e5, + 0x8585008585850085, 0x3535003535350035, 0x0c0c000c0c0c000c, + 0x4141004141410041, 0xefef00efefef00ef, 0x9393009393930093, + 0x1919001919190019, 0x2121002121210021, 0x0e0e000e0e0e000e, + 0x4e4e004e4e4e004e, 0x6565006565650065, 0xbdbd00bdbdbd00bd, + 0xb8b800b8b8b800b8, 0x8f8f008f8f8f008f, 0xebeb00ebebeb00eb, + 0xcece00cecece00ce, 0x3030003030300030, 0x5f5f005f5f5f005f, + 0xc5c500c5c5c500c5, 0x1a1a001a1a1a001a, 0xe1e100e1e1e100e1, + 0xcaca00cacaca00ca, 0x4747004747470047, 0x3d3d003d3d3d003d, + 0x0101000101010001, 0xd6d600d6d6d600d6, 0x5656005656560056, + 0x4d4d004d4d4d004d, 0x0d0d000d0d0d000d, 0x6666006666660066, + 0xcccc00cccccc00cc, 0x2d2d002d2d2d002d, 0x1212001212120012, + 0x2020002020200020, 0xb1b100b1b1b100b1, 0x9999009999990099, + 0x4c4c004c4c4c004c, 0xc2c200c2c2c200c2, 0x7e7e007e7e7e007e, + 0x0505000505050005, 0xb7b700b7b7b700b7, 0x3131003131310031, + 0x1717001717170017, 0xd7d700d7d7d700d7, 0x5858005858580058, + 0x6161006161610061, 0x1b1b001b1b1b001b, 0x1c1c001c1c1c001c, + 0x0f0f000f0f0f000f, 0x1616001616160016, 0x1818001818180018, + 0x2222002222220022, 0x4444004444440044, 0xb2b200b2b2b200b2, + 0xb5b500b5b5b500b5, 0x9191009191910091, 0x0808000808080008, + 0xa8a800a8a8a800a8, 0xfcfc00fcfcfc00fc, 0x5050005050500050, + 0xd0d000d0d0d000d0, 0x7d7d007d7d7d007d, 0x8989008989890089, + 0x9797009797970097, 0x5b5b005b5b5b005b, 0x9595009595950095, + 0xffff00ffffff00ff, 0xd2d200d2d2d200d2, 0xc4c400c4c4c400c4, + 0x4848004848480048, 0xf7f700f7f7f700f7, 0xdbdb00dbdbdb00db, + 0x0303000303030003, 0xdada00dadada00da, 0x3f3f003f3f3f003f, + 0x9494009494940094, 0x5c5c005c5c5c005c, 0x0202000202020002, + 0x4a4a004a4a4a004a, 0x3333003333330033, 0x6767006767670067, + 0xf3f300f3f3f300f3, 0x7f7f007f7f7f007f, 0xe2e200e2e2e200e2, + 0x9b9b009b9b9b009b, 0x2626002626260026, 0x3737003737370037, + 0x3b3b003b3b3b003b, 0x9696009696960096, 0x4b4b004b4b4b004b, + 0xbebe00bebebe00be, 0x2e2e002e2e2e002e, 0x7979007979790079, + 0x8c8c008c8c8c008c, 0x6e6e006e6e6e006e, 0x8e8e008e8e8e008e, + 0xf5f500f5f5f500f5, 0xb6b600b6b6b600b6, 0xfdfd00fdfdfd00fd, + 0x5959005959590059, 0x9898009898980098, 0x6a6a006a6a6a006a, + 0x4646004646460046, 0xbaba00bababa00ba, 0x2525002525250025, + 0x4242004242420042, 0xa2a200a2a2a200a2, 0xfafa00fafafa00fa, + 0x0707000707070007, 0x5555005555550055, 0xeeee00eeeeee00ee, + 0x0a0a000a0a0a000a, 0x4949004949490049, 0x6868006868680068, + 0x3838003838380038, 0xa4a400a4a4a400a4, 0x2828002828280028, + 0x7b7b007b7b7b007b, 0xc9c900c9c9c900c9, 0xc1c100c1c1c100c1, + 0xe3e300e3e3e300e3, 0xf4f400f4f4f400f4, 0xc7c700c7c7c700c7, + 0x9e9e009e9e9e009e, +}; + +const u64 camellia_sp11101110[256] = { + 0x7070700070707000, 0x8282820082828200, 0x2c2c2c002c2c2c00, + 0xececec00ececec00, 0xb3b3b300b3b3b300, 0x2727270027272700, + 0xc0c0c000c0c0c000, 0xe5e5e500e5e5e500, 0xe4e4e400e4e4e400, + 0x8585850085858500, 0x5757570057575700, 0x3535350035353500, + 0xeaeaea00eaeaea00, 0x0c0c0c000c0c0c00, 0xaeaeae00aeaeae00, + 0x4141410041414100, 0x2323230023232300, 0xefefef00efefef00, + 0x6b6b6b006b6b6b00, 0x9393930093939300, 0x4545450045454500, + 0x1919190019191900, 0xa5a5a500a5a5a500, 0x2121210021212100, + 0xededed00ededed00, 0x0e0e0e000e0e0e00, 0x4f4f4f004f4f4f00, + 0x4e4e4e004e4e4e00, 0x1d1d1d001d1d1d00, 0x6565650065656500, + 0x9292920092929200, 0xbdbdbd00bdbdbd00, 0x8686860086868600, + 0xb8b8b800b8b8b800, 0xafafaf00afafaf00, 0x8f8f8f008f8f8f00, + 0x7c7c7c007c7c7c00, 0xebebeb00ebebeb00, 0x1f1f1f001f1f1f00, + 0xcecece00cecece00, 0x3e3e3e003e3e3e00, 0x3030300030303000, + 0xdcdcdc00dcdcdc00, 0x5f5f5f005f5f5f00, 0x5e5e5e005e5e5e00, + 0xc5c5c500c5c5c500, 0x0b0b0b000b0b0b00, 0x1a1a1a001a1a1a00, + 0xa6a6a600a6a6a600, 0xe1e1e100e1e1e100, 0x3939390039393900, + 0xcacaca00cacaca00, 0xd5d5d500d5d5d500, 0x4747470047474700, + 0x5d5d5d005d5d5d00, 0x3d3d3d003d3d3d00, 0xd9d9d900d9d9d900, + 0x0101010001010100, 0x5a5a5a005a5a5a00, 0xd6d6d600d6d6d600, + 0x5151510051515100, 0x5656560056565600, 0x6c6c6c006c6c6c00, + 0x4d4d4d004d4d4d00, 0x8b8b8b008b8b8b00, 0x0d0d0d000d0d0d00, + 0x9a9a9a009a9a9a00, 0x6666660066666600, 0xfbfbfb00fbfbfb00, + 0xcccccc00cccccc00, 0xb0b0b000b0b0b000, 0x2d2d2d002d2d2d00, + 0x7474740074747400, 0x1212120012121200, 0x2b2b2b002b2b2b00, + 0x2020200020202000, 0xf0f0f000f0f0f000, 0xb1b1b100b1b1b100, + 0x8484840084848400, 0x9999990099999900, 0xdfdfdf00dfdfdf00, + 0x4c4c4c004c4c4c00, 0xcbcbcb00cbcbcb00, 0xc2c2c200c2c2c200, + 0x3434340034343400, 0x7e7e7e007e7e7e00, 0x7676760076767600, + 0x0505050005050500, 0x6d6d6d006d6d6d00, 0xb7b7b700b7b7b700, + 0xa9a9a900a9a9a900, 0x3131310031313100, 0xd1d1d100d1d1d100, + 0x1717170017171700, 0x0404040004040400, 0xd7d7d700d7d7d700, + 0x1414140014141400, 0x5858580058585800, 0x3a3a3a003a3a3a00, + 0x6161610061616100, 0xdedede00dedede00, 0x1b1b1b001b1b1b00, + 0x1111110011111100, 0x1c1c1c001c1c1c00, 0x3232320032323200, + 0x0f0f0f000f0f0f00, 0x9c9c9c009c9c9c00, 0x1616160016161600, + 0x5353530053535300, 0x1818180018181800, 0xf2f2f200f2f2f200, + 0x2222220022222200, 0xfefefe00fefefe00, 0x4444440044444400, + 0xcfcfcf00cfcfcf00, 0xb2b2b200b2b2b200, 0xc3c3c300c3c3c300, + 0xb5b5b500b5b5b500, 0x7a7a7a007a7a7a00, 0x9191910091919100, + 0x2424240024242400, 0x0808080008080800, 0xe8e8e800e8e8e800, + 0xa8a8a800a8a8a800, 0x6060600060606000, 0xfcfcfc00fcfcfc00, + 0x6969690069696900, 0x5050500050505000, 0xaaaaaa00aaaaaa00, + 0xd0d0d000d0d0d000, 0xa0a0a000a0a0a000, 0x7d7d7d007d7d7d00, + 0xa1a1a100a1a1a100, 0x8989890089898900, 0x6262620062626200, + 0x9797970097979700, 0x5454540054545400, 0x5b5b5b005b5b5b00, + 0x1e1e1e001e1e1e00, 0x9595950095959500, 0xe0e0e000e0e0e000, + 0xffffff00ffffff00, 0x6464640064646400, 0xd2d2d200d2d2d200, + 0x1010100010101000, 0xc4c4c400c4c4c400, 0x0000000000000000, + 0x4848480048484800, 0xa3a3a300a3a3a300, 0xf7f7f700f7f7f700, + 0x7575750075757500, 0xdbdbdb00dbdbdb00, 0x8a8a8a008a8a8a00, + 0x0303030003030300, 0xe6e6e600e6e6e600, 0xdadada00dadada00, + 0x0909090009090900, 0x3f3f3f003f3f3f00, 0xdddddd00dddddd00, + 0x9494940094949400, 0x8787870087878700, 0x5c5c5c005c5c5c00, + 0x8383830083838300, 0x0202020002020200, 0xcdcdcd00cdcdcd00, + 0x4a4a4a004a4a4a00, 0x9090900090909000, 0x3333330033333300, + 0x7373730073737300, 0x6767670067676700, 0xf6f6f600f6f6f600, + 0xf3f3f300f3f3f300, 0x9d9d9d009d9d9d00, 0x7f7f7f007f7f7f00, + 0xbfbfbf00bfbfbf00, 0xe2e2e200e2e2e200, 0x5252520052525200, + 0x9b9b9b009b9b9b00, 0xd8d8d800d8d8d800, 0x2626260026262600, + 0xc8c8c800c8c8c800, 0x3737370037373700, 0xc6c6c600c6c6c600, + 0x3b3b3b003b3b3b00, 0x8181810081818100, 0x9696960096969600, + 0x6f6f6f006f6f6f00, 0x4b4b4b004b4b4b00, 0x1313130013131300, + 0xbebebe00bebebe00, 0x6363630063636300, 0x2e2e2e002e2e2e00, + 0xe9e9e900e9e9e900, 0x7979790079797900, 0xa7a7a700a7a7a700, + 0x8c8c8c008c8c8c00, 0x9f9f9f009f9f9f00, 0x6e6e6e006e6e6e00, + 0xbcbcbc00bcbcbc00, 0x8e8e8e008e8e8e00, 0x2929290029292900, + 0xf5f5f500f5f5f500, 0xf9f9f900f9f9f900, 0xb6b6b600b6b6b600, + 0x2f2f2f002f2f2f00, 0xfdfdfd00fdfdfd00, 0xb4b4b400b4b4b400, + 0x5959590059595900, 0x7878780078787800, 0x9898980098989800, + 0x0606060006060600, 0x6a6a6a006a6a6a00, 0xe7e7e700e7e7e700, + 0x4646460046464600, 0x7171710071717100, 0xbababa00bababa00, + 0xd4d4d400d4d4d400, 0x2525250025252500, 0xababab00ababab00, + 0x4242420042424200, 0x8888880088888800, 0xa2a2a200a2a2a200, + 0x8d8d8d008d8d8d00, 0xfafafa00fafafa00, 0x7272720072727200, + 0x0707070007070700, 0xb9b9b900b9b9b900, 0x5555550055555500, + 0xf8f8f800f8f8f800, 0xeeeeee00eeeeee00, 0xacacac00acacac00, + 0x0a0a0a000a0a0a00, 0x3636360036363600, 0x4949490049494900, + 0x2a2a2a002a2a2a00, 0x6868680068686800, 0x3c3c3c003c3c3c00, + 0x3838380038383800, 0xf1f1f100f1f1f100, 0xa4a4a400a4a4a400, + 0x4040400040404000, 0x2828280028282800, 0xd3d3d300d3d3d300, + 0x7b7b7b007b7b7b00, 0xbbbbbb00bbbbbb00, 0xc9c9c900c9c9c900, + 0x4343430043434300, 0xc1c1c100c1c1c100, 0x1515150015151500, + 0xe3e3e300e3e3e300, 0xadadad00adadad00, 0xf4f4f400f4f4f400, + 0x7777770077777700, 0xc7c7c700c7c7c700, 0x8080800080808000, + 0x9e9e9e009e9e9e00, +}; + +/* key constants */ +#define CAMELLIA_SIGMA1L (0xA09E667FL) +#define CAMELLIA_SIGMA1R (0x3BCC908BL) +#define CAMELLIA_SIGMA2L (0xB67AE858L) +#define CAMELLIA_SIGMA2R (0x4CAA73B2L) +#define CAMELLIA_SIGMA3L (0xC6EF372FL) +#define CAMELLIA_SIGMA3R (0xE94F82BEL) +#define CAMELLIA_SIGMA4L (0x54FF53A5L) +#define CAMELLIA_SIGMA4R (0xF1D36F1CL) +#define CAMELLIA_SIGMA5L (0x10E527FAL) +#define CAMELLIA_SIGMA5R (0xDE682D1DL) +#define CAMELLIA_SIGMA6L (0xB05688C2L) +#define CAMELLIA_SIGMA6R (0xB3E6C1FDL) + +/* macros */ +#define ROLDQ(l, r, bits) ({ \ + u64 t = l; \ + l = (l << bits) | (r >> (64 - bits)); \ + r = (r << bits) | (t >> (64 - bits)); \ +}) + +#define CAMELLIA_F(x, kl, kr, y) ({ \ + u64 ii = x ^ (((u64)kl << 32) | kr); \ + y = camellia_sp11101110[(uint8_t)ii]; \ + y ^= camellia_sp44044404[(uint8_t)(ii >> 8)]; \ + ii >>= 16; \ + y ^= camellia_sp30333033[(uint8_t)ii]; \ + y ^= camellia_sp02220222[(uint8_t)(ii >> 8)]; \ + ii >>= 16; \ + y ^= camellia_sp00444404[(uint8_t)ii]; \ + y ^= camellia_sp03303033[(uint8_t)(ii >> 8)]; \ + ii >>= 16; \ + y ^= camellia_sp22000222[(uint8_t)ii]; \ + y ^= camellia_sp10011110[(uint8_t)(ii >> 8)]; \ + y = ror64(y, 32); \ +}) + +#define SET_SUBKEY_LR(INDEX, sRL) (subkey[(INDEX)] = ror64((sRL), 32)) + +static void camellia_setup_tail(u64 *subkey, u64 *subRL, int max) +{ + u64 kw4, tt; + u32 dw, tl, tr; + + /* absorb kw2 to other subkeys */ + /* round 2 */ + subRL[3] ^= subRL[1]; + /* round 4 */ + subRL[5] ^= subRL[1]; + /* round 6 */ + subRL[7] ^= subRL[1]; + + subRL[1] ^= (subRL[1] & ~subRL[9]) << 32; + /* modified for FLinv(kl2) */ + dw = (subRL[1] & subRL[9]) >> 32, + subRL[1] ^= rol32(dw, 1); + + /* round 8 */ + subRL[11] ^= subRL[1]; + /* round 10 */ + subRL[13] ^= subRL[1]; + /* round 12 */ + subRL[15] ^= subRL[1]; + + subRL[1] ^= (subRL[1] & ~subRL[17]) << 32; + /* modified for FLinv(kl4) */ + dw = (subRL[1] & subRL[17]) >> 32, + subRL[1] ^= rol32(dw, 1); + + /* round 14 */ + subRL[19] ^= subRL[1]; + /* round 16 */ + subRL[21] ^= subRL[1]; + /* round 18 */ + subRL[23] ^= subRL[1]; + + if (max == 24) { + /* kw3 */ + subRL[24] ^= subRL[1]; + + /* absorb kw4 to other subkeys */ + kw4 = subRL[25]; + } else { + subRL[1] ^= (subRL[1] & ~subRL[25]) << 32; + /* modified for FLinv(kl6) */ + dw = (subRL[1] & subRL[25]) >> 32, + subRL[1] ^= rol32(dw, 1); + + /* round 20 */ + subRL[27] ^= subRL[1]; + /* round 22 */ + subRL[29] ^= subRL[1]; + /* round 24 */ + subRL[31] ^= subRL[1]; + /* kw3 */ + subRL[32] ^= subRL[1]; + + /* absorb kw4 to other subkeys */ + kw4 = subRL[33]; + /* round 23 */ + subRL[30] ^= kw4; + /* round 21 */ + subRL[28] ^= kw4; + /* round 19 */ + subRL[26] ^= kw4; + + kw4 ^= (kw4 & ~subRL[24]) << 32; + /* modified for FL(kl5) */ + dw = (kw4 & subRL[24]) >> 32, + kw4 ^= rol32(dw, 1); + } + + /* round 17 */ + subRL[22] ^= kw4; + /* round 15 */ + subRL[20] ^= kw4; + /* round 13 */ + subRL[18] ^= kw4; + + kw4 ^= (kw4 & ~subRL[16]) << 32; + /* modified for FL(kl3) */ + dw = (kw4 & subRL[16]) >> 32, + kw4 ^= rol32(dw, 1); + + /* round 11 */ + subRL[14] ^= kw4; + /* round 9 */ + subRL[12] ^= kw4; + /* round 7 */ + subRL[10] ^= kw4; + + kw4 ^= (kw4 & ~subRL[8]) << 32; + /* modified for FL(kl1) */ + dw = (kw4 & subRL[8]) >> 32, + kw4 ^= rol32(dw, 1); + + /* round 5 */ + subRL[6] ^= kw4; + /* round 3 */ + subRL[4] ^= kw4; + /* round 1 */ + subRL[2] ^= kw4; + /* kw1 */ + subRL[0] ^= kw4; + + /* key XOR is end of F-function */ + SET_SUBKEY_LR(0, subRL[0] ^ subRL[2]); /* kw1 */ + SET_SUBKEY_LR(2, subRL[3]); /* round 1 */ + SET_SUBKEY_LR(3, subRL[2] ^ subRL[4]); /* round 2 */ + SET_SUBKEY_LR(4, subRL[3] ^ subRL[5]); /* round 3 */ + SET_SUBKEY_LR(5, subRL[4] ^ subRL[6]); /* round 4 */ + SET_SUBKEY_LR(6, subRL[5] ^ subRL[7]); /* round 5 */ + + tl = (subRL[10] >> 32) ^ (subRL[10] & ~subRL[8]); + dw = tl & (subRL[8] >> 32), /* FL(kl1) */ + tr = subRL[10] ^ rol32(dw, 1); + tt = (tr | ((u64)tl << 32)); + + SET_SUBKEY_LR(7, subRL[6] ^ tt); /* round 6 */ + SET_SUBKEY_LR(8, subRL[8]); /* FL(kl1) */ + SET_SUBKEY_LR(9, subRL[9]); /* FLinv(kl2) */ + + tl = (subRL[7] >> 32) ^ (subRL[7] & ~subRL[9]); + dw = tl & (subRL[9] >> 32), /* FLinv(kl2) */ + tr = subRL[7] ^ rol32(dw, 1); + tt = (tr | ((u64)tl << 32)); + + SET_SUBKEY_LR(10, subRL[11] ^ tt); /* round 7 */ + SET_SUBKEY_LR(11, subRL[10] ^ subRL[12]); /* round 8 */ + SET_SUBKEY_LR(12, subRL[11] ^ subRL[13]); /* round 9 */ + SET_SUBKEY_LR(13, subRL[12] ^ subRL[14]); /* round 10 */ + SET_SUBKEY_LR(14, subRL[13] ^ subRL[15]); /* round 11 */ + + tl = (subRL[18] >> 32) ^ (subRL[18] & ~subRL[16]); + dw = tl & (subRL[16] >> 32), /* FL(kl3) */ + tr = subRL[18] ^ rol32(dw, 1); + tt = (tr | ((u64)tl << 32)); + + SET_SUBKEY_LR(15, subRL[14] ^ tt); /* round 12 */ + SET_SUBKEY_LR(16, subRL[16]); /* FL(kl3) */ + SET_SUBKEY_LR(17, subRL[17]); /* FLinv(kl4) */ + + tl = (subRL[15] >> 32) ^ (subRL[15] & ~subRL[17]); + dw = tl & (subRL[17] >> 32), /* FLinv(kl4) */ + tr = subRL[15] ^ rol32(dw, 1); + tt = (tr | ((u64)tl << 32)); + + SET_SUBKEY_LR(18, subRL[19] ^ tt); /* round 13 */ + SET_SUBKEY_LR(19, subRL[18] ^ subRL[20]); /* round 14 */ + SET_SUBKEY_LR(20, subRL[19] ^ subRL[21]); /* round 15 */ + SET_SUBKEY_LR(21, subRL[20] ^ subRL[22]); /* round 16 */ + SET_SUBKEY_LR(22, subRL[21] ^ subRL[23]); /* round 17 */ + + if (max == 24) { + SET_SUBKEY_LR(23, subRL[22]); /* round 18 */ + SET_SUBKEY_LR(24, subRL[24] ^ subRL[23]); /* kw3 */ + } else { + tl = (subRL[26] >> 32) ^ (subRL[26] & ~subRL[24]); + dw = tl & (subRL[24] >> 32), /* FL(kl5) */ + tr = subRL[26] ^ rol32(dw, 1); + tt = (tr | ((u64)tl << 32)); + + SET_SUBKEY_LR(23, subRL[22] ^ tt); /* round 18 */ + SET_SUBKEY_LR(24, subRL[24]); /* FL(kl5) */ + SET_SUBKEY_LR(25, subRL[25]); /* FLinv(kl6) */ + + tl = (subRL[23] >> 32) ^ (subRL[23] & ~subRL[25]); + dw = tl & (subRL[25] >> 32), /* FLinv(kl6) */ + tr = subRL[23] ^ rol32(dw, 1); + tt = (tr | ((u64)tl << 32)); + + SET_SUBKEY_LR(26, subRL[27] ^ tt); /* round 19 */ + SET_SUBKEY_LR(27, subRL[26] ^ subRL[28]); /* round 20 */ + SET_SUBKEY_LR(28, subRL[27] ^ subRL[29]); /* round 21 */ + SET_SUBKEY_LR(29, subRL[28] ^ subRL[30]); /* round 22 */ + SET_SUBKEY_LR(30, subRL[29] ^ subRL[31]); /* round 23 */ + SET_SUBKEY_LR(31, subRL[30]); /* round 24 */ + SET_SUBKEY_LR(32, subRL[32] ^ subRL[31]); /* kw3 */ + } +} + +static void camellia_setup128(const unsigned char *key, u64 *subkey) +{ + u64 kl, kr, ww; + u64 subRL[26]; + + /** + * k == kl || kr (|| is concatenation) + */ + kl = get_unaligned_be64(key); + kr = get_unaligned_be64(key + 8); + + /* generate KL dependent subkeys */ + /* kw1 */ + subRL[0] = kl; + /* kw2 */ + subRL[1] = kr; + + /* rotation left shift 15bit */ + ROLDQ(kl, kr, 15); + + /* k3 */ + subRL[4] = kl; + /* k4 */ + subRL[5] = kr; + + /* rotation left shift 15+30bit */ + ROLDQ(kl, kr, 30); + + /* k7 */ + subRL[10] = kl; + /* k8 */ + subRL[11] = kr; + + /* rotation left shift 15+30+15bit */ + ROLDQ(kl, kr, 15); + + /* k10 */ + subRL[13] = kr; + /* rotation left shift 15+30+15+17 bit */ + ROLDQ(kl, kr, 17); + + /* kl3 */ + subRL[16] = kl; + /* kl4 */ + subRL[17] = kr; + + /* rotation left shift 15+30+15+17+17 bit */ + ROLDQ(kl, kr, 17); + + /* k13 */ + subRL[18] = kl; + /* k14 */ + subRL[19] = kr; + + /* rotation left shift 15+30+15+17+17+17 bit */ + ROLDQ(kl, kr, 17); + + /* k17 */ + subRL[22] = kl; + /* k18 */ + subRL[23] = kr; + + /* generate KA */ + kl = subRL[0]; + kr = subRL[1]; + CAMELLIA_F(kl, CAMELLIA_SIGMA1L, CAMELLIA_SIGMA1R, ww); + kr ^= ww; + CAMELLIA_F(kr, CAMELLIA_SIGMA2L, CAMELLIA_SIGMA2R, kl); + + /* current status == (kll, klr, w0, w1) */ + CAMELLIA_F(kl, CAMELLIA_SIGMA3L, CAMELLIA_SIGMA3R, kr); + kr ^= ww; + CAMELLIA_F(kr, CAMELLIA_SIGMA4L, CAMELLIA_SIGMA4R, ww); + kl ^= ww; + + /* generate KA dependent subkeys */ + /* k1, k2 */ + subRL[2] = kl; + subRL[3] = kr; + ROLDQ(kl, kr, 15); + /* k5,k6 */ + subRL[6] = kl; + subRL[7] = kr; + ROLDQ(kl, kr, 15); + /* kl1, kl2 */ + subRL[8] = kl; + subRL[9] = kr; + ROLDQ(kl, kr, 15); + /* k9 */ + subRL[12] = kl; + ROLDQ(kl, kr, 15); + /* k11, k12 */ + subRL[14] = kl; + subRL[15] = kr; + ROLDQ(kl, kr, 34); + /* k15, k16 */ + subRL[20] = kl; + subRL[21] = kr; + ROLDQ(kl, kr, 17); + /* kw3, kw4 */ + subRL[24] = kl; + subRL[25] = kr; + + camellia_setup_tail(subkey, subRL, 24); +} + +static void camellia_setup256(const unsigned char *key, u64 *subkey) +{ + u64 kl, kr; /* left half of key */ + u64 krl, krr; /* right half of key */ + u64 ww; /* temporary variables */ + u64 subRL[34]; + + /** + * key = (kl || kr || krl || krr) (|| is concatenation) + */ + kl = get_unaligned_be64(key); + kr = get_unaligned_be64(key + 8); + krl = get_unaligned_be64(key + 16); + krr = get_unaligned_be64(key + 24); + + /* generate KL dependent subkeys */ + /* kw1 */ + subRL[0] = kl; + /* kw2 */ + subRL[1] = kr; + ROLDQ(kl, kr, 45); + /* k9 */ + subRL[12] = kl; + /* k10 */ + subRL[13] = kr; + ROLDQ(kl, kr, 15); + /* kl3 */ + subRL[16] = kl; + /* kl4 */ + subRL[17] = kr; + ROLDQ(kl, kr, 17); + /* k17 */ + subRL[22] = kl; + /* k18 */ + subRL[23] = kr; + ROLDQ(kl, kr, 34); + /* k23 */ + subRL[30] = kl; + /* k24 */ + subRL[31] = kr; + + /* generate KR dependent subkeys */ + ROLDQ(krl, krr, 15); + /* k3 */ + subRL[4] = krl; + /* k4 */ + subRL[5] = krr; + ROLDQ(krl, krr, 15); + /* kl1 */ + subRL[8] = krl; + /* kl2 */ + subRL[9] = krr; + ROLDQ(krl, krr, 30); + /* k13 */ + subRL[18] = krl; + /* k14 */ + subRL[19] = krr; + ROLDQ(krl, krr, 34); + /* k19 */ + subRL[26] = krl; + /* k20 */ + subRL[27] = krr; + ROLDQ(krl, krr, 34); + + /* generate KA */ + kl = subRL[0] ^ krl; + kr = subRL[1] ^ krr; + + CAMELLIA_F(kl, CAMELLIA_SIGMA1L, CAMELLIA_SIGMA1R, ww); + kr ^= ww; + CAMELLIA_F(kr, CAMELLIA_SIGMA2L, CAMELLIA_SIGMA2R, kl); + kl ^= krl; + CAMELLIA_F(kl, CAMELLIA_SIGMA3L, CAMELLIA_SIGMA3R, kr); + kr ^= ww ^ krr; + CAMELLIA_F(kr, CAMELLIA_SIGMA4L, CAMELLIA_SIGMA4R, ww); + kl ^= ww; + + /* generate KB */ + krl ^= kl; + krr ^= kr; + CAMELLIA_F(krl, CAMELLIA_SIGMA5L, CAMELLIA_SIGMA5R, ww); + krr ^= ww; + CAMELLIA_F(krr, CAMELLIA_SIGMA6L, CAMELLIA_SIGMA6R, ww); + krl ^= ww; + + /* generate KA dependent subkeys */ + ROLDQ(kl, kr, 15); + /* k5 */ + subRL[6] = kl; + /* k6 */ + subRL[7] = kr; + ROLDQ(kl, kr, 30); + /* k11 */ + subRL[14] = kl; + /* k12 */ + subRL[15] = kr; + /* rotation left shift 32bit */ + ROLDQ(kl, kr, 32); + /* kl5 */ + subRL[24] = kl; + /* kl6 */ + subRL[25] = kr; + /* rotation left shift 17 from k11,k12 -> k21,k22 */ + ROLDQ(kl, kr, 17); + /* k21 */ + subRL[28] = kl; + /* k22 */ + subRL[29] = kr; + + /* generate KB dependent subkeys */ + /* k1 */ + subRL[2] = krl; + /* k2 */ + subRL[3] = krr; + ROLDQ(krl, krr, 30); + /* k7 */ + subRL[10] = krl; + /* k8 */ + subRL[11] = krr; + ROLDQ(krl, krr, 30); + /* k15 */ + subRL[20] = krl; + /* k16 */ + subRL[21] = krr; + ROLDQ(krl, krr, 51); + /* kw3 */ + subRL[32] = krl; + /* kw4 */ + subRL[33] = krr; + + camellia_setup_tail(subkey, subRL, 32); +} + +static void camellia_setup192(const unsigned char *key, u64 *subkey) +{ + unsigned char kk[32]; + u64 krl, krr; + + memcpy(kk, key, 24); + memcpy((unsigned char *)&krl, key+16, 8); + krr = ~krl; + memcpy(kk+24, (unsigned char *)&krr, 8); + camellia_setup256(kk, subkey); +} + +static int __camellia_setkey(struct camellia_ctx *cctx, + const unsigned char *key, + unsigned int key_len, u32 *flags) +{ + if (key_len != 16 && key_len != 24 && key_len != 32) { + *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; + return -EINVAL; + } + + cctx->key_length = key_len; + + switch (key_len) { + case 16: + camellia_setup128(key, cctx->key_table); + break; + case 24: + camellia_setup192(key, cctx->key_table); + break; + case 32: + camellia_setup256(key, cctx->key_table); + break; + } + + return 0; +} + +static int camellia_setkey(struct crypto_tfm *tfm, const u8 *in_key, + unsigned int key_len) +{ + return __camellia_setkey(crypto_tfm_ctx(tfm), in_key, key_len, + &tfm->crt_flags); +} + +static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk, + void (*fn)(struct camellia_ctx *, u8 *, const u8 *), + void (*fn_2way)(struct camellia_ctx *, u8 *, const u8 *)) +{ + struct camellia_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + unsigned int bsize = CAMELLIA_BLOCK_SIZE; + unsigned int nbytes; + int err; + + err = blkcipher_walk_virt(desc, walk); + + while ((nbytes = walk->nbytes)) { + u8 *wsrc = walk->src.virt.addr; + u8 *wdst = walk->dst.virt.addr; + + /* Process two block batch */ + if (nbytes >= bsize * 2) { + do { + fn_2way(ctx, wdst, wsrc); + + wsrc += bsize * 2; + wdst += bsize * 2; + nbytes -= bsize * 2; + } while (nbytes >= bsize * 2); + + if (nbytes < bsize) + goto done; + } + + /* Handle leftovers */ + do { + fn(ctx, wdst, wsrc); + + wsrc += bsize; + wdst += bsize; + nbytes -= bsize; + } while (nbytes >= bsize); + +done: + err = blkcipher_walk_done(desc, walk, nbytes); + } + + return err; +} + +static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct blkcipher_walk walk; + + blkcipher_walk_init(&walk, dst, src, nbytes); + return ecb_crypt(desc, &walk, camellia_enc_blk, camellia_enc_blk_2way); +} + +static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct blkcipher_walk walk; + + blkcipher_walk_init(&walk, dst, src, nbytes); + return ecb_crypt(desc, &walk, camellia_dec_blk, camellia_dec_blk_2way); +} + +static unsigned int __cbc_encrypt(struct blkcipher_desc *desc, + struct blkcipher_walk *walk) +{ + struct camellia_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + unsigned int bsize = CAMELLIA_BLOCK_SIZE; + unsigned int nbytes = walk->nbytes; + u128 *src = (u128 *)walk->src.virt.addr; + u128 *dst = (u128 *)walk->dst.virt.addr; + u128 *iv = (u128 *)walk->iv; + + do { + u128_xor(dst, src, iv); + camellia_enc_blk(ctx, (u8 *)dst, (u8 *)dst); + iv = dst; + + src += 1; + dst += 1; + nbytes -= bsize; + } while (nbytes >= bsize); + + u128_xor((u128 *)walk->iv, (u128 *)walk->iv, iv); + return nbytes; +} + +static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct blkcipher_walk walk; + int err; + + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt(desc, &walk); + + while ((nbytes = walk.nbytes)) { + nbytes = __cbc_encrypt(desc, &walk); + err = blkcipher_walk_done(desc, &walk, nbytes); + } + + return err; +} + +static unsigned int __cbc_decrypt(struct blkcipher_desc *desc, + struct blkcipher_walk *walk) +{ + struct camellia_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + unsigned int bsize = CAMELLIA_BLOCK_SIZE; + unsigned int nbytes = walk->nbytes; + u128 *src = (u128 *)walk->src.virt.addr; + u128 *dst = (u128 *)walk->dst.virt.addr; + u128 ivs[2 - 1]; + u128 last_iv; + + /* Start of the last block. */ + src += nbytes / bsize - 1; + dst += nbytes / bsize - 1; + + last_iv = *src; + + /* Process two block batch */ + if (nbytes >= bsize * 2) { + do { + nbytes -= bsize * (2 - 1); + src -= 2 - 1; + dst -= 2 - 1; + + ivs[0] = src[0]; + + camellia_dec_blk_2way(ctx, (u8 *)dst, (u8 *)src); + + u128_xor(dst + 1, dst + 1, ivs + 0); + + nbytes -= bsize; + if (nbytes < bsize) + goto done; + + u128_xor(dst, dst, src - 1); + src -= 1; + dst -= 1; + } while (nbytes >= bsize * 2); + + if (nbytes < bsize) + goto done; + } + + /* Handle leftovers */ + for (;;) { + camellia_dec_blk(ctx, (u8 *)dst, (u8 *)src); + + nbytes -= bsize; + if (nbytes < bsize) + break; + + u128_xor(dst, dst, src - 1); + src -= 1; + dst -= 1; + } + +done: + u128_xor(dst, dst, (u128 *)walk->iv); + *(u128 *)walk->iv = last_iv; + + return nbytes; +} + +static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct blkcipher_walk walk; + int err; + + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt(desc, &walk); + + while ((nbytes = walk.nbytes)) { + nbytes = __cbc_decrypt(desc, &walk); + err = blkcipher_walk_done(desc, &walk, nbytes); + } + + return err; +} + +static inline void u128_to_be128(be128 *dst, const u128 *src) +{ + dst->a = cpu_to_be64(src->a); + dst->b = cpu_to_be64(src->b); +} + +static inline void be128_to_u128(u128 *dst, const be128 *src) +{ + dst->a = be64_to_cpu(src->a); + dst->b = be64_to_cpu(src->b); +} + +static inline void u128_inc(u128 *i) +{ + i->b++; + if (!i->b) + i->a++; +} + +static void ctr_crypt_final(struct blkcipher_desc *desc, + struct blkcipher_walk *walk) +{ + struct camellia_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + u8 keystream[CAMELLIA_BLOCK_SIZE]; + u8 *src = walk->src.virt.addr; + u8 *dst = walk->dst.virt.addr; + unsigned int nbytes = walk->nbytes; + u128 ctrblk; + + memcpy(keystream, src, nbytes); + camellia_enc_blk_xor(ctx, keystream, walk->iv); + memcpy(dst, keystream, nbytes); + + be128_to_u128(&ctrblk, (be128 *)walk->iv); + u128_inc(&ctrblk); + u128_to_be128((be128 *)walk->iv, &ctrblk); +} + +static unsigned int __ctr_crypt(struct blkcipher_desc *desc, + struct blkcipher_walk *walk) +{ + struct camellia_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + unsigned int bsize = CAMELLIA_BLOCK_SIZE; + unsigned int nbytes = walk->nbytes; + u128 *src = (u128 *)walk->src.virt.addr; + u128 *dst = (u128 *)walk->dst.virt.addr; + u128 ctrblk; + be128 ctrblocks[2]; + + be128_to_u128(&ctrblk, (be128 *)walk->iv); + + /* Process two block batch */ + if (nbytes >= bsize * 2) { + do { + if (dst != src) { + dst[0] = src[0]; + dst[1] = src[1]; + } + + /* create ctrblks for parallel encrypt */ + u128_to_be128(&ctrblocks[0], &ctrblk); + u128_inc(&ctrblk); + u128_to_be128(&ctrblocks[1], &ctrblk); + u128_inc(&ctrblk); + + camellia_enc_blk_xor_2way(ctx, (u8 *)dst, + (u8 *)ctrblocks); + + src += 2; + dst += 2; + nbytes -= bsize * 2; + } while (nbytes >= bsize * 2); + + if (nbytes < bsize) + goto done; + } + + /* Handle leftovers */ + do { + if (dst != src) + *dst = *src; + + u128_to_be128(&ctrblocks[0], &ctrblk); + u128_inc(&ctrblk); + + camellia_enc_blk_xor(ctx, (u8 *)dst, (u8 *)ctrblocks); + + src += 1; + dst += 1; + nbytes -= bsize; + } while (nbytes >= bsize); + +done: + u128_to_be128((be128 *)walk->iv, &ctrblk); + return nbytes; +} + +static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct blkcipher_walk walk; + int err; + + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt_block(desc, &walk, CAMELLIA_BLOCK_SIZE); + + while ((nbytes = walk.nbytes) >= CAMELLIA_BLOCK_SIZE) { + nbytes = __ctr_crypt(desc, &walk); + err = blkcipher_walk_done(desc, &walk, nbytes); + } + + if (walk.nbytes) { + ctr_crypt_final(desc, &walk); + err = blkcipher_walk_done(desc, &walk, 0); + } + + return err; +} + +static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) +{ + const unsigned int bsize = CAMELLIA_BLOCK_SIZE; + struct camellia_ctx *ctx = priv; + int i; + + while (nbytes >= 2 * bsize) { + camellia_enc_blk_2way(ctx, srcdst, srcdst); + srcdst += bsize * 2; + nbytes -= bsize * 2; + } + + for (i = 0; i < nbytes / bsize; i++, srcdst += bsize) + camellia_enc_blk(ctx, srcdst, srcdst); +} + +static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) +{ + const unsigned int bsize = CAMELLIA_BLOCK_SIZE; + struct camellia_ctx *ctx = priv; + int i; + + while (nbytes >= 2 * bsize) { + camellia_dec_blk_2way(ctx, srcdst, srcdst); + srcdst += bsize * 2; + nbytes -= bsize * 2; + } + + for (i = 0; i < nbytes / bsize; i++, srcdst += bsize) + camellia_dec_blk(ctx, srcdst, srcdst); +} + +struct camellia_lrw_ctx { + struct lrw_table_ctx lrw_table; + struct camellia_ctx camellia_ctx; +}; + +static int lrw_camellia_setkey(struct crypto_tfm *tfm, const u8 *key, + unsigned int keylen) +{ + struct camellia_lrw_ctx *ctx = crypto_tfm_ctx(tfm); + int err; + + err = __camellia_setkey(&ctx->camellia_ctx, key, + keylen - CAMELLIA_BLOCK_SIZE, + &tfm->crt_flags); + if (err) + return err; + + return lrw_init_table(&ctx->lrw_table, + key + keylen - CAMELLIA_BLOCK_SIZE); +} + +static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct camellia_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + be128 buf[2 * 4]; + struct lrw_crypt_req req = { + .tbuf = buf, + .tbuflen = sizeof(buf), + + .table_ctx = &ctx->lrw_table, + .crypt_ctx = &ctx->camellia_ctx, + .crypt_fn = encrypt_callback, + }; + + return lrw_crypt(desc, dst, src, nbytes, &req); +} + +static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct camellia_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + be128 buf[2 * 4]; + struct lrw_crypt_req req = { + .tbuf = buf, + .tbuflen = sizeof(buf), + + .table_ctx = &ctx->lrw_table, + .crypt_ctx = &ctx->camellia_ctx, + .crypt_fn = decrypt_callback, + }; + + return lrw_crypt(desc, dst, src, nbytes, &req); +} + +static void lrw_exit_tfm(struct crypto_tfm *tfm) +{ + struct camellia_lrw_ctx *ctx = crypto_tfm_ctx(tfm); + + lrw_free_table(&ctx->lrw_table); +} + +struct camellia_xts_ctx { + struct camellia_ctx tweak_ctx; + struct camellia_ctx crypt_ctx; +}; + +static int xts_camellia_setkey(struct crypto_tfm *tfm, const u8 *key, + unsigned int keylen) +{ + struct camellia_xts_ctx *ctx = crypto_tfm_ctx(tfm); + u32 *flags = &tfm->crt_flags; + int err; + + /* key consists of keys of equal size concatenated, therefore + * the length must be even + */ + if (keylen % 2) { + *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; + return -EINVAL; + } + + /* first half of xts-key is for crypt */ + err = __camellia_setkey(&ctx->crypt_ctx, key, keylen / 2, flags); + if (err) + return err; + + /* second half of xts-key is for tweak */ + return __camellia_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2, + flags); +} + +static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct camellia_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + be128 buf[2 * 4]; + struct xts_crypt_req req = { + .tbuf = buf, + .tbuflen = sizeof(buf), + + .tweak_ctx = &ctx->tweak_ctx, + .tweak_fn = XTS_TWEAK_CAST(camellia_enc_blk), + .crypt_ctx = &ctx->crypt_ctx, + .crypt_fn = encrypt_callback, + }; + + return xts_crypt(desc, dst, src, nbytes, &req); +} + +static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct camellia_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + be128 buf[2 * 4]; + struct xts_crypt_req req = { + .tbuf = buf, + .tbuflen = sizeof(buf), + + .tweak_ctx = &ctx->tweak_ctx, + .tweak_fn = XTS_TWEAK_CAST(camellia_enc_blk), + .crypt_ctx = &ctx->crypt_ctx, + .crypt_fn = decrypt_callback, + }; + + return xts_crypt(desc, dst, src, nbytes, &req); +} + +static struct crypto_alg camellia_algs[6] = { { + .cra_name = "camellia", + .cra_driver_name = "camellia-asm", + .cra_priority = 200, + .cra_flags = CRYPTO_ALG_TYPE_CIPHER, + .cra_blocksize = CAMELLIA_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct camellia_ctx), + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(camellia_algs[0].cra_list), + .cra_u = { + .cipher = { + .cia_min_keysize = CAMELLIA_MIN_KEY_SIZE, + .cia_max_keysize = CAMELLIA_MAX_KEY_SIZE, + .cia_setkey = camellia_setkey, + .cia_encrypt = camellia_encrypt, + .cia_decrypt = camellia_decrypt + } + } +}, { + .cra_name = "ecb(camellia)", + .cra_driver_name = "ecb-camellia-asm", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = CAMELLIA_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct camellia_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(camellia_algs[1].cra_list), + .cra_u = { + .blkcipher = { + .min_keysize = CAMELLIA_MIN_KEY_SIZE, + .max_keysize = CAMELLIA_MAX_KEY_SIZE, + .setkey = camellia_setkey, + .encrypt = ecb_encrypt, + .decrypt = ecb_decrypt, + }, + }, +}, { + .cra_name = "cbc(camellia)", + .cra_driver_name = "cbc-camellia-asm", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = CAMELLIA_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct camellia_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(camellia_algs[2].cra_list), + .cra_u = { + .blkcipher = { + .min_keysize = CAMELLIA_MIN_KEY_SIZE, + .max_keysize = CAMELLIA_MAX_KEY_SIZE, + .ivsize = CAMELLIA_BLOCK_SIZE, + .setkey = camellia_setkey, + .encrypt = cbc_encrypt, + .decrypt = cbc_decrypt, + }, + }, +}, { + .cra_name = "ctr(camellia)", + .cra_driver_name = "ctr-camellia-asm", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct camellia_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(camellia_algs[3].cra_list), + .cra_u = { + .blkcipher = { + .min_keysize = CAMELLIA_MIN_KEY_SIZE, + .max_keysize = CAMELLIA_MAX_KEY_SIZE, + .ivsize = CAMELLIA_BLOCK_SIZE, + .setkey = camellia_setkey, + .encrypt = ctr_crypt, + .decrypt = ctr_crypt, + }, + }, +}, { + .cra_name = "lrw(camellia)", + .cra_driver_name = "lrw-camellia-asm", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = CAMELLIA_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct camellia_lrw_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(camellia_algs[4].cra_list), + .cra_exit = lrw_exit_tfm, + .cra_u = { + .blkcipher = { + .min_keysize = CAMELLIA_MIN_KEY_SIZE + + CAMELLIA_BLOCK_SIZE, + .max_keysize = CAMELLIA_MAX_KEY_SIZE + + CAMELLIA_BLOCK_SIZE, + .ivsize = CAMELLIA_BLOCK_SIZE, + .setkey = lrw_camellia_setkey, + .encrypt = lrw_encrypt, + .decrypt = lrw_decrypt, + }, + }, +}, { + .cra_name = "xts(camellia)", + .cra_driver_name = "xts-camellia-asm", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = CAMELLIA_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct camellia_xts_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(camellia_algs[5].cra_list), + .cra_u = { + .blkcipher = { + .min_keysize = CAMELLIA_MIN_KEY_SIZE * 2, + .max_keysize = CAMELLIA_MAX_KEY_SIZE * 2, + .ivsize = CAMELLIA_BLOCK_SIZE, + .setkey = xts_camellia_setkey, + .encrypt = xts_encrypt, + .decrypt = xts_decrypt, + }, + }, +} }; + +static bool is_blacklisted_cpu(void) +{ + if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) + return false; + + if (boot_cpu_data.x86 == 0x0f) { + /* + * On Pentium 4, camellia-asm is slower than original assembler + * implementation because excessive uses of 64bit rotate and + * left-shifts (which are really slow on P4) needed to store and + * handle 128bit block in two 64bit registers. + */ + return true; + } + + return false; +} + +static int force; +module_param(force, int, 0); +MODULE_PARM_DESC(force, "Force module load, ignore CPU blacklist"); + +static int __init init(void) +{ + if (!force && is_blacklisted_cpu()) { + printk(KERN_INFO + "camellia-x86_64: performance on this CPU " + "would be suboptimal: disabling " + "camellia-x86_64.\n"); + return -ENODEV; + } + + return crypto_register_algs(camellia_algs, ARRAY_SIZE(camellia_algs)); +} + +static void __exit fini(void) +{ + crypto_unregister_algs(camellia_algs, ARRAY_SIZE(camellia_algs)); +} + +module_init(init); +module_exit(fini); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Camellia Cipher Algorithm, asm optimized"); +MODULE_ALIAS("camellia"); +MODULE_ALIAS("camellia-asm"); diff --git a/arch/x86/crypto/crc32c-intel.c b/arch/x86/crypto/crc32c-intel.c new file mode 100644 index 00000000..493f9592 --- /dev/null +++ b/arch/x86/crypto/crc32c-intel.c @@ -0,0 +1,203 @@ +/* + * Using hardware provided CRC32 instruction to accelerate the CRC32 disposal. + * CRC32C polynomial:0x1EDC6F41(BE)/0x82F63B78(LE) + * CRC32 is a new instruction in Intel SSE4.2, the reference can be found at: + * http://www.intel.com/products/processor/manuals/ + * Intel(R) 64 and IA-32 Architectures Software Developer's Manual + * Volume 2A: Instruction Set Reference, A-M + * + * Copyright (C) 2008 Intel Corporation + * Authors: Austin Zhang + * Kent Liu + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + * + */ +#include +#include +#include +#include +#include + +#include +#include + +#define CHKSUM_BLOCK_SIZE 1 +#define CHKSUM_DIGEST_SIZE 4 + +#define SCALE_F sizeof(unsigned long) + +#ifdef CONFIG_X86_64 +#define REX_PRE "0x48, " +#else +#define REX_PRE +#endif + +static u32 crc32c_intel_le_hw_byte(u32 crc, unsigned char const *data, size_t length) +{ + while (length--) { + __asm__ __volatile__( + ".byte 0xf2, 0xf, 0x38, 0xf0, 0xf1" + :"=S"(crc) + :"0"(crc), "c"(*data) + ); + data++; + } + + return crc; +} + +static u32 __pure crc32c_intel_le_hw(u32 crc, unsigned char const *p, size_t len) +{ + unsigned int iquotient = len / SCALE_F; + unsigned int iremainder = len % SCALE_F; + unsigned long *ptmp = (unsigned long *)p; + + while (iquotient--) { + __asm__ __volatile__( + ".byte 0xf2, " REX_PRE "0xf, 0x38, 0xf1, 0xf1;" + :"=S"(crc) + :"0"(crc), "c"(*ptmp) + ); + ptmp++; + } + + if (iremainder) + crc = crc32c_intel_le_hw_byte(crc, (unsigned char *)ptmp, + iremainder); + + return crc; +} + +/* + * Setting the seed allows arbitrary accumulators and flexible XOR policy + * If your algorithm starts with ~0, then XOR with ~0 before you set + * the seed. + */ +static int crc32c_intel_setkey(struct crypto_shash *hash, const u8 *key, + unsigned int keylen) +{ + u32 *mctx = crypto_shash_ctx(hash); + + if (keylen != sizeof(u32)) { + crypto_shash_set_flags(hash, CRYPTO_TFM_RES_BAD_KEY_LEN); + return -EINVAL; + } + *mctx = le32_to_cpup((__le32 *)key); + return 0; +} + +static int crc32c_intel_init(struct shash_desc *desc) +{ + u32 *mctx = crypto_shash_ctx(desc->tfm); + u32 *crcp = shash_desc_ctx(desc); + + *crcp = *mctx; + + return 0; +} + +static int crc32c_intel_update(struct shash_desc *desc, const u8 *data, + unsigned int len) +{ + u32 *crcp = shash_desc_ctx(desc); + + *crcp = crc32c_intel_le_hw(*crcp, data, len); + return 0; +} + +static int __crc32c_intel_finup(u32 *crcp, const u8 *data, unsigned int len, + u8 *out) +{ + *(__le32 *)out = ~cpu_to_le32(crc32c_intel_le_hw(*crcp, data, len)); + return 0; +} + +static int crc32c_intel_finup(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out) +{ + return __crc32c_intel_finup(shash_desc_ctx(desc), data, len, out); +} + +static int crc32c_intel_final(struct shash_desc *desc, u8 *out) +{ + u32 *crcp = shash_desc_ctx(desc); + + *(__le32 *)out = ~cpu_to_le32p(crcp); + return 0; +} + +static int crc32c_intel_digest(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out) +{ + return __crc32c_intel_finup(crypto_shash_ctx(desc->tfm), data, len, + out); +} + +static int crc32c_intel_cra_init(struct crypto_tfm *tfm) +{ + u32 *key = crypto_tfm_ctx(tfm); + + *key = ~0; + + return 0; +} + +static struct shash_alg alg = { + .setkey = crc32c_intel_setkey, + .init = crc32c_intel_init, + .update = crc32c_intel_update, + .final = crc32c_intel_final, + .finup = crc32c_intel_finup, + .digest = crc32c_intel_digest, + .descsize = sizeof(u32), + .digestsize = CHKSUM_DIGEST_SIZE, + .base = { + .cra_name = "crc32c", + .cra_driver_name = "crc32c-intel", + .cra_priority = 200, + .cra_blocksize = CHKSUM_BLOCK_SIZE, + .cra_ctxsize = sizeof(u32), + .cra_module = THIS_MODULE, + .cra_init = crc32c_intel_cra_init, + } +}; + +static const struct x86_cpu_id crc32c_cpu_id[] = { + X86_FEATURE_MATCH(X86_FEATURE_XMM4_2), + {} +}; +MODULE_DEVICE_TABLE(x86cpu, crc32c_cpu_id); + +static int __init crc32c_intel_mod_init(void) +{ + if (!x86_match_cpu(crc32c_cpu_id)) + return -ENODEV; + return crypto_register_shash(&alg); +} + +static void __exit crc32c_intel_mod_fini(void) +{ + crypto_unregister_shash(&alg); +} + +module_init(crc32c_intel_mod_init); +module_exit(crc32c_intel_mod_fini); + +MODULE_AUTHOR("Austin Zhang , Kent Liu "); +MODULE_DESCRIPTION("CRC32c (Castagnoli) optimization using Intel Hardware."); +MODULE_LICENSE("GPL"); + +MODULE_ALIAS("crc32c"); +MODULE_ALIAS("crc32c-intel"); diff --git a/arch/x86/crypto/fpu.c b/arch/x86/crypto/fpu.c new file mode 100644 index 00000000..98d7a188 --- /dev/null +++ b/arch/x86/crypto/fpu.c @@ -0,0 +1,161 @@ +/* + * FPU: Wrapper for blkcipher touching fpu + * + * Copyright (c) Intel Corp. + * Author: Huang Ying + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + */ + +#include +#include +#include +#include +#include +#include +#include + +struct crypto_fpu_ctx { + struct crypto_blkcipher *child; +}; + +static int crypto_fpu_setkey(struct crypto_tfm *parent, const u8 *key, + unsigned int keylen) +{ + struct crypto_fpu_ctx *ctx = crypto_tfm_ctx(parent); + struct crypto_blkcipher *child = ctx->child; + int err; + + crypto_blkcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK); + crypto_blkcipher_set_flags(child, crypto_tfm_get_flags(parent) & + CRYPTO_TFM_REQ_MASK); + err = crypto_blkcipher_setkey(child, key, keylen); + crypto_tfm_set_flags(parent, crypto_blkcipher_get_flags(child) & + CRYPTO_TFM_RES_MASK); + return err; +} + +static int crypto_fpu_encrypt(struct blkcipher_desc *desc_in, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + int err; + struct crypto_fpu_ctx *ctx = crypto_blkcipher_ctx(desc_in->tfm); + struct crypto_blkcipher *child = ctx->child; + struct blkcipher_desc desc = { + .tfm = child, + .info = desc_in->info, + .flags = desc_in->flags & ~CRYPTO_TFM_REQ_MAY_SLEEP, + }; + + kernel_fpu_begin(); + err = crypto_blkcipher_crt(desc.tfm)->encrypt(&desc, dst, src, nbytes); + kernel_fpu_end(); + return err; +} + +static int crypto_fpu_decrypt(struct blkcipher_desc *desc_in, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + int err; + struct crypto_fpu_ctx *ctx = crypto_blkcipher_ctx(desc_in->tfm); + struct crypto_blkcipher *child = ctx->child; + struct blkcipher_desc desc = { + .tfm = child, + .info = desc_in->info, + .flags = desc_in->flags & ~CRYPTO_TFM_REQ_MAY_SLEEP, + }; + + kernel_fpu_begin(); + err = crypto_blkcipher_crt(desc.tfm)->decrypt(&desc, dst, src, nbytes); + kernel_fpu_end(); + return err; +} + +static int crypto_fpu_init_tfm(struct crypto_tfm *tfm) +{ + struct crypto_instance *inst = crypto_tfm_alg_instance(tfm); + struct crypto_spawn *spawn = crypto_instance_ctx(inst); + struct crypto_fpu_ctx *ctx = crypto_tfm_ctx(tfm); + struct crypto_blkcipher *cipher; + + cipher = crypto_spawn_blkcipher(spawn); + if (IS_ERR(cipher)) + return PTR_ERR(cipher); + + ctx->child = cipher; + return 0; +} + +static void crypto_fpu_exit_tfm(struct crypto_tfm *tfm) +{ + struct crypto_fpu_ctx *ctx = crypto_tfm_ctx(tfm); + crypto_free_blkcipher(ctx->child); +} + +static struct crypto_instance *crypto_fpu_alloc(struct rtattr **tb) +{ + struct crypto_instance *inst; + struct crypto_alg *alg; + int err; + + err = crypto_check_attr_type(tb, CRYPTO_ALG_TYPE_BLKCIPHER); + if (err) + return ERR_PTR(err); + + alg = crypto_get_attr_alg(tb, CRYPTO_ALG_TYPE_BLKCIPHER, + CRYPTO_ALG_TYPE_MASK); + if (IS_ERR(alg)) + return ERR_CAST(alg); + + inst = crypto_alloc_instance("fpu", alg); + if (IS_ERR(inst)) + goto out_put_alg; + + inst->alg.cra_flags = alg->cra_flags; + inst->alg.cra_priority = alg->cra_priority; + inst->alg.cra_blocksize = alg->cra_blocksize; + inst->alg.cra_alignmask = alg->cra_alignmask; + inst->alg.cra_type = alg->cra_type; + inst->alg.cra_blkcipher.ivsize = alg->cra_blkcipher.ivsize; + inst->alg.cra_blkcipher.min_keysize = alg->cra_blkcipher.min_keysize; + inst->alg.cra_blkcipher.max_keysize = alg->cra_blkcipher.max_keysize; + inst->alg.cra_ctxsize = sizeof(struct crypto_fpu_ctx); + inst->alg.cra_init = crypto_fpu_init_tfm; + inst->alg.cra_exit = crypto_fpu_exit_tfm; + inst->alg.cra_blkcipher.setkey = crypto_fpu_setkey; + inst->alg.cra_blkcipher.encrypt = crypto_fpu_encrypt; + inst->alg.cra_blkcipher.decrypt = crypto_fpu_decrypt; + +out_put_alg: + crypto_mod_put(alg); + return inst; +} + +static void crypto_fpu_free(struct crypto_instance *inst) +{ + crypto_drop_spawn(crypto_instance_ctx(inst)); + kfree(inst); +} + +static struct crypto_template crypto_fpu_tmpl = { + .name = "fpu", + .alloc = crypto_fpu_alloc, + .free = crypto_fpu_free, + .module = THIS_MODULE, +}; + +int __init crypto_fpu_init(void) +{ + return crypto_register_template(&crypto_fpu_tmpl); +} + +void __exit crypto_fpu_exit(void) +{ + crypto_unregister_template(&crypto_fpu_tmpl); +} diff --git a/arch/x86/crypto/ghash-clmulni-intel_asm.S b/arch/x86/crypto/ghash-clmulni-intel_asm.S new file mode 100644 index 00000000..1eb7f90c --- /dev/null +++ b/arch/x86/crypto/ghash-clmulni-intel_asm.S @@ -0,0 +1,157 @@ +/* + * Accelerated GHASH implementation with Intel PCLMULQDQ-NI + * instructions. This file contains accelerated part of ghash + * implementation. More information about PCLMULQDQ can be found at: + * + * http://software.intel.com/en-us/articles/carry-less-multiplication-and-its-usage-for-computing-the-gcm-mode/ + * + * Copyright (c) 2009 Intel Corp. + * Author: Huang Ying + * Vinodh Gopal + * Erdinc Ozturk + * Deniz Karakoyunlu + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + */ + +#include +#include + +.data + +.align 16 +.Lbswap_mask: + .octa 0x000102030405060708090a0b0c0d0e0f +.Lpoly: + .octa 0xc2000000000000000000000000000001 +.Ltwo_one: + .octa 0x00000001000000000000000000000001 + +#define DATA %xmm0 +#define SHASH %xmm1 +#define T1 %xmm2 +#define T2 %xmm3 +#define T3 %xmm4 +#define BSWAP %xmm5 +#define IN1 %xmm6 + +.text + +/* + * __clmul_gf128mul_ble: internal ABI + * input: + * DATA: operand1 + * SHASH: operand2, hash_key << 1 mod poly + * output: + * DATA: operand1 * operand2 mod poly + * changed: + * T1 + * T2 + * T3 + */ +__clmul_gf128mul_ble: + movaps DATA, T1 + pshufd $0b01001110, DATA, T2 + pshufd $0b01001110, SHASH, T3 + pxor DATA, T2 + pxor SHASH, T3 + + PCLMULQDQ 0x00 SHASH DATA # DATA = a0 * b0 + PCLMULQDQ 0x11 SHASH T1 # T1 = a1 * b1 + PCLMULQDQ 0x00 T3 T2 # T2 = (a1 + a0) * (b1 + b0) + pxor DATA, T2 + pxor T1, T2 # T2 = a0 * b1 + a1 * b0 + + movaps T2, T3 + pslldq $8, T3 + psrldq $8, T2 + pxor T3, DATA + pxor T2, T1 # is result of + # carry-less multiplication + + # first phase of the reduction + movaps DATA, T3 + psllq $1, T3 + pxor DATA, T3 + psllq $5, T3 + pxor DATA, T3 + psllq $57, T3 + movaps T3, T2 + pslldq $8, T2 + psrldq $8, T3 + pxor T2, DATA + pxor T3, T1 + + # second phase of the reduction + movaps DATA, T2 + psrlq $5, T2 + pxor DATA, T2 + psrlq $1, T2 + pxor DATA, T2 + psrlq $1, T2 + pxor T2, T1 + pxor T1, DATA + ret + +/* void clmul_ghash_mul(char *dst, const be128 *shash) */ +ENTRY(clmul_ghash_mul) + movups (%rdi), DATA + movups (%rsi), SHASH + movaps .Lbswap_mask, BSWAP + PSHUFB_XMM BSWAP DATA + call __clmul_gf128mul_ble + PSHUFB_XMM BSWAP DATA + movups DATA, (%rdi) + ret + +/* + * void clmul_ghash_update(char *dst, const char *src, unsigned int srclen, + * const be128 *shash); + */ +ENTRY(clmul_ghash_update) + cmp $16, %rdx + jb .Lupdate_just_ret # check length + movaps .Lbswap_mask, BSWAP + movups (%rdi), DATA + movups (%rcx), SHASH + PSHUFB_XMM BSWAP DATA +.align 4 +.Lupdate_loop: + movups (%rsi), IN1 + PSHUFB_XMM BSWAP IN1 + pxor IN1, DATA + call __clmul_gf128mul_ble + sub $16, %rdx + add $16, %rsi + cmp $16, %rdx + jge .Lupdate_loop + PSHUFB_XMM BSWAP DATA + movups DATA, (%rdi) +.Lupdate_just_ret: + ret + +/* + * void clmul_ghash_setkey(be128 *shash, const u8 *key); + * + * Calculate hash_key << 1 mod poly + */ +ENTRY(clmul_ghash_setkey) + movaps .Lbswap_mask, BSWAP + movups (%rsi), %xmm0 + PSHUFB_XMM BSWAP %xmm0 + movaps %xmm0, %xmm1 + psllq $1, %xmm0 + psrlq $63, %xmm1 + movaps %xmm1, %xmm2 + pslldq $8, %xmm1 + psrldq $8, %xmm2 + por %xmm1, %xmm0 + # reduction + pshufd $0b00100100, %xmm2, %xmm1 + pcmpeqd .Ltwo_one, %xmm1 + pand .Lpoly, %xmm1 + pxor %xmm1, %xmm0 + movups %xmm0, (%rdi) + ret diff --git a/arch/x86/crypto/ghash-clmulni-intel_glue.c b/arch/x86/crypto/ghash-clmulni-intel_glue.c new file mode 100644 index 00000000..b4bf0a63 --- /dev/null +++ b/arch/x86/crypto/ghash-clmulni-intel_glue.c @@ -0,0 +1,338 @@ +/* + * Accelerated GHASH implementation with Intel PCLMULQDQ-NI + * instructions. This file contains glue code. + * + * Copyright (c) 2009 Intel Corp. + * Author: Huang Ying + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define GHASH_BLOCK_SIZE 16 +#define GHASH_DIGEST_SIZE 16 + +void clmul_ghash_mul(char *dst, const be128 *shash); + +void clmul_ghash_update(char *dst, const char *src, unsigned int srclen, + const be128 *shash); + +void clmul_ghash_setkey(be128 *shash, const u8 *key); + +struct ghash_async_ctx { + struct cryptd_ahash *cryptd_tfm; +}; + +struct ghash_ctx { + be128 shash; +}; + +struct ghash_desc_ctx { + u8 buffer[GHASH_BLOCK_SIZE]; + u32 bytes; +}; + +static int ghash_init(struct shash_desc *desc) +{ + struct ghash_desc_ctx *dctx = shash_desc_ctx(desc); + + memset(dctx, 0, sizeof(*dctx)); + + return 0; +} + +static int ghash_setkey(struct crypto_shash *tfm, + const u8 *key, unsigned int keylen) +{ + struct ghash_ctx *ctx = crypto_shash_ctx(tfm); + + if (keylen != GHASH_BLOCK_SIZE) { + crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); + return -EINVAL; + } + + clmul_ghash_setkey(&ctx->shash, key); + + return 0; +} + +static int ghash_update(struct shash_desc *desc, + const u8 *src, unsigned int srclen) +{ + struct ghash_desc_ctx *dctx = shash_desc_ctx(desc); + struct ghash_ctx *ctx = crypto_shash_ctx(desc->tfm); + u8 *dst = dctx->buffer; + + kernel_fpu_begin(); + if (dctx->bytes) { + int n = min(srclen, dctx->bytes); + u8 *pos = dst + (GHASH_BLOCK_SIZE - dctx->bytes); + + dctx->bytes -= n; + srclen -= n; + + while (n--) + *pos++ ^= *src++; + + if (!dctx->bytes) + clmul_ghash_mul(dst, &ctx->shash); + } + + clmul_ghash_update(dst, src, srclen, &ctx->shash); + kernel_fpu_end(); + + if (srclen & 0xf) { + src += srclen - (srclen & 0xf); + srclen &= 0xf; + dctx->bytes = GHASH_BLOCK_SIZE - srclen; + while (srclen--) + *dst++ ^= *src++; + } + + return 0; +} + +static void ghash_flush(struct ghash_ctx *ctx, struct ghash_desc_ctx *dctx) +{ + u8 *dst = dctx->buffer; + + if (dctx->bytes) { + u8 *tmp = dst + (GHASH_BLOCK_SIZE - dctx->bytes); + + while (dctx->bytes--) + *tmp++ ^= 0; + + kernel_fpu_begin(); + clmul_ghash_mul(dst, &ctx->shash); + kernel_fpu_end(); + } + + dctx->bytes = 0; +} + +static int ghash_final(struct shash_desc *desc, u8 *dst) +{ + struct ghash_desc_ctx *dctx = shash_desc_ctx(desc); + struct ghash_ctx *ctx = crypto_shash_ctx(desc->tfm); + u8 *buf = dctx->buffer; + + ghash_flush(ctx, dctx); + memcpy(dst, buf, GHASH_BLOCK_SIZE); + + return 0; +} + +static struct shash_alg ghash_alg = { + .digestsize = GHASH_DIGEST_SIZE, + .init = ghash_init, + .update = ghash_update, + .final = ghash_final, + .setkey = ghash_setkey, + .descsize = sizeof(struct ghash_desc_ctx), + .base = { + .cra_name = "__ghash", + .cra_driver_name = "__ghash-pclmulqdqni", + .cra_priority = 0, + .cra_flags = CRYPTO_ALG_TYPE_SHASH, + .cra_blocksize = GHASH_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct ghash_ctx), + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(ghash_alg.base.cra_list), + }, +}; + +static int ghash_async_init(struct ahash_request *req) +{ + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm); + struct ahash_request *cryptd_req = ahash_request_ctx(req); + struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm; + + if (!irq_fpu_usable()) { + memcpy(cryptd_req, req, sizeof(*req)); + ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base); + return crypto_ahash_init(cryptd_req); + } else { + struct shash_desc *desc = cryptd_shash_desc(cryptd_req); + struct crypto_shash *child = cryptd_ahash_child(cryptd_tfm); + + desc->tfm = child; + desc->flags = req->base.flags; + return crypto_shash_init(desc); + } +} + +static int ghash_async_update(struct ahash_request *req) +{ + struct ahash_request *cryptd_req = ahash_request_ctx(req); + + if (!irq_fpu_usable()) { + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm); + struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm; + + memcpy(cryptd_req, req, sizeof(*req)); + ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base); + return crypto_ahash_update(cryptd_req); + } else { + struct shash_desc *desc = cryptd_shash_desc(cryptd_req); + return shash_ahash_update(req, desc); + } +} + +static int ghash_async_final(struct ahash_request *req) +{ + struct ahash_request *cryptd_req = ahash_request_ctx(req); + + if (!irq_fpu_usable()) { + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm); + struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm; + + memcpy(cryptd_req, req, sizeof(*req)); + ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base); + return crypto_ahash_final(cryptd_req); + } else { + struct shash_desc *desc = cryptd_shash_desc(cryptd_req); + return crypto_shash_final(desc, req->result); + } +} + +static int ghash_async_digest(struct ahash_request *req) +{ + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm); + struct ahash_request *cryptd_req = ahash_request_ctx(req); + struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm; + + if (!irq_fpu_usable()) { + memcpy(cryptd_req, req, sizeof(*req)); + ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base); + return crypto_ahash_digest(cryptd_req); + } else { + struct shash_desc *desc = cryptd_shash_desc(cryptd_req); + struct crypto_shash *child = cryptd_ahash_child(cryptd_tfm); + + desc->tfm = child; + desc->flags = req->base.flags; + return shash_ahash_digest(req, desc); + } +} + +static int ghash_async_setkey(struct crypto_ahash *tfm, const u8 *key, + unsigned int keylen) +{ + struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm); + struct crypto_ahash *child = &ctx->cryptd_tfm->base; + int err; + + crypto_ahash_clear_flags(child, CRYPTO_TFM_REQ_MASK); + crypto_ahash_set_flags(child, crypto_ahash_get_flags(tfm) + & CRYPTO_TFM_REQ_MASK); + err = crypto_ahash_setkey(child, key, keylen); + crypto_ahash_set_flags(tfm, crypto_ahash_get_flags(child) + & CRYPTO_TFM_RES_MASK); + + return err; +} + +static int ghash_async_init_tfm(struct crypto_tfm *tfm) +{ + struct cryptd_ahash *cryptd_tfm; + struct ghash_async_ctx *ctx = crypto_tfm_ctx(tfm); + + cryptd_tfm = cryptd_alloc_ahash("__ghash-pclmulqdqni", 0, 0); + if (IS_ERR(cryptd_tfm)) + return PTR_ERR(cryptd_tfm); + ctx->cryptd_tfm = cryptd_tfm; + crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm), + sizeof(struct ahash_request) + + crypto_ahash_reqsize(&cryptd_tfm->base)); + + return 0; +} + +static void ghash_async_exit_tfm(struct crypto_tfm *tfm) +{ + struct ghash_async_ctx *ctx = crypto_tfm_ctx(tfm); + + cryptd_free_ahash(ctx->cryptd_tfm); +} + +static struct ahash_alg ghash_async_alg = { + .init = ghash_async_init, + .update = ghash_async_update, + .final = ghash_async_final, + .setkey = ghash_async_setkey, + .digest = ghash_async_digest, + .halg = { + .digestsize = GHASH_DIGEST_SIZE, + .base = { + .cra_name = "ghash", + .cra_driver_name = "ghash-clmulni", + .cra_priority = 400, + .cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_ASYNC, + .cra_blocksize = GHASH_BLOCK_SIZE, + .cra_type = &crypto_ahash_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(ghash_async_alg.halg.base.cra_list), + .cra_init = ghash_async_init_tfm, + .cra_exit = ghash_async_exit_tfm, + }, + }, +}; + +static const struct x86_cpu_id pcmul_cpu_id[] = { + X86_FEATURE_MATCH(X86_FEATURE_PCLMULQDQ), /* Pickle-Mickle-Duck */ + {} +}; +MODULE_DEVICE_TABLE(x86cpu, pcmul_cpu_id); + +static int __init ghash_pclmulqdqni_mod_init(void) +{ + int err; + + if (!x86_match_cpu(pcmul_cpu_id)) + return -ENODEV; + + err = crypto_register_shash(&ghash_alg); + if (err) + goto err_out; + err = crypto_register_ahash(&ghash_async_alg); + if (err) + goto err_shash; + + return 0; + +err_shash: + crypto_unregister_shash(&ghash_alg); +err_out: + return err; +} + +static void __exit ghash_pclmulqdqni_mod_exit(void) +{ + crypto_unregister_ahash(&ghash_async_alg); + crypto_unregister_shash(&ghash_alg); +} + +module_init(ghash_pclmulqdqni_mod_init); +module_exit(ghash_pclmulqdqni_mod_exit); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("GHASH Message Digest Algorithm, " + "acclerated by PCLMULQDQ-NI"); +MODULE_ALIAS("ghash"); diff --git a/arch/x86/crypto/salsa20-i586-asm_32.S b/arch/x86/crypto/salsa20-i586-asm_32.S new file mode 100644 index 00000000..72eb3066 --- /dev/null +++ b/arch/x86/crypto/salsa20-i586-asm_32.S @@ -0,0 +1,1114 @@ +# salsa20_pm.s version 20051229 +# D. J. Bernstein +# Public domain. + +# enter ECRYPT_encrypt_bytes +.text +.p2align 5 +.globl ECRYPT_encrypt_bytes +ECRYPT_encrypt_bytes: + mov %esp,%eax + and $31,%eax + add $256,%eax + sub %eax,%esp + # eax_stack = eax + movl %eax,80(%esp) + # ebx_stack = ebx + movl %ebx,84(%esp) + # esi_stack = esi + movl %esi,88(%esp) + # edi_stack = edi + movl %edi,92(%esp) + # ebp_stack = ebp + movl %ebp,96(%esp) + # x = arg1 + movl 4(%esp,%eax),%edx + # m = arg2 + movl 8(%esp,%eax),%esi + # out = arg3 + movl 12(%esp,%eax),%edi + # bytes = arg4 + movl 16(%esp,%eax),%ebx + # bytes -= 0 + sub $0,%ebx + # goto done if unsigned<= + jbe ._done +._start: + # in0 = *(uint32 *) (x + 0) + movl 0(%edx),%eax + # in1 = *(uint32 *) (x + 4) + movl 4(%edx),%ecx + # in2 = *(uint32 *) (x + 8) + movl 8(%edx),%ebp + # j0 = in0 + movl %eax,164(%esp) + # in3 = *(uint32 *) (x + 12) + movl 12(%edx),%eax + # j1 = in1 + movl %ecx,168(%esp) + # in4 = *(uint32 *) (x + 16) + movl 16(%edx),%ecx + # j2 = in2 + movl %ebp,172(%esp) + # in5 = *(uint32 *) (x + 20) + movl 20(%edx),%ebp + # j3 = in3 + movl %eax,176(%esp) + # in6 = *(uint32 *) (x + 24) + movl 24(%edx),%eax + # j4 = in4 + movl %ecx,180(%esp) + # in7 = *(uint32 *) (x + 28) + movl 28(%edx),%ecx + # j5 = in5 + movl %ebp,184(%esp) + # in8 = *(uint32 *) (x + 32) + movl 32(%edx),%ebp + # j6 = in6 + movl %eax,188(%esp) + # in9 = *(uint32 *) (x + 36) + movl 36(%edx),%eax + # j7 = in7 + movl %ecx,192(%esp) + # in10 = *(uint32 *) (x + 40) + movl 40(%edx),%ecx + # j8 = in8 + movl %ebp,196(%esp) + # in11 = *(uint32 *) (x + 44) + movl 44(%edx),%ebp + # j9 = in9 + movl %eax,200(%esp) + # in12 = *(uint32 *) (x + 48) + movl 48(%edx),%eax + # j10 = in10 + movl %ecx,204(%esp) + # in13 = *(uint32 *) (x + 52) + movl 52(%edx),%ecx + # j11 = in11 + movl %ebp,208(%esp) + # in14 = *(uint32 *) (x + 56) + movl 56(%edx),%ebp + # j12 = in12 + movl %eax,212(%esp) + # in15 = *(uint32 *) (x + 60) + movl 60(%edx),%eax + # j13 = in13 + movl %ecx,216(%esp) + # j14 = in14 + movl %ebp,220(%esp) + # j15 = in15 + movl %eax,224(%esp) + # x_backup = x + movl %edx,64(%esp) +._bytesatleast1: + # bytes - 64 + cmp $64,%ebx + # goto nocopy if unsigned>= + jae ._nocopy + # ctarget = out + movl %edi,228(%esp) + # out = &tmp + leal 0(%esp),%edi + # i = bytes + mov %ebx,%ecx + # while (i) { *out++ = *m++; --i } + rep movsb + # out = &tmp + leal 0(%esp),%edi + # m = &tmp + leal 0(%esp),%esi +._nocopy: + # out_backup = out + movl %edi,72(%esp) + # m_backup = m + movl %esi,68(%esp) + # bytes_backup = bytes + movl %ebx,76(%esp) + # in0 = j0 + movl 164(%esp),%eax + # in1 = j1 + movl 168(%esp),%ecx + # in2 = j2 + movl 172(%esp),%edx + # in3 = j3 + movl 176(%esp),%ebx + # x0 = in0 + movl %eax,100(%esp) + # x1 = in1 + movl %ecx,104(%esp) + # x2 = in2 + movl %edx,108(%esp) + # x3 = in3 + movl %ebx,112(%esp) + # in4 = j4 + movl 180(%esp),%eax + # in5 = j5 + movl 184(%esp),%ecx + # in6 = j6 + movl 188(%esp),%edx + # in7 = j7 + movl 192(%esp),%ebx + # x4 = in4 + movl %eax,116(%esp) + # x5 = in5 + movl %ecx,120(%esp) + # x6 = in6 + movl %edx,124(%esp) + # x7 = in7 + movl %ebx,128(%esp) + # in8 = j8 + movl 196(%esp),%eax + # in9 = j9 + movl 200(%esp),%ecx + # in10 = j10 + movl 204(%esp),%edx + # in11 = j11 + movl 208(%esp),%ebx + # x8 = in8 + movl %eax,132(%esp) + # x9 = in9 + movl %ecx,136(%esp) + # x10 = in10 + movl %edx,140(%esp) + # x11 = in11 + movl %ebx,144(%esp) + # in12 = j12 + movl 212(%esp),%eax + # in13 = j13 + movl 216(%esp),%ecx + # in14 = j14 + movl 220(%esp),%edx + # in15 = j15 + movl 224(%esp),%ebx + # x12 = in12 + movl %eax,148(%esp) + # x13 = in13 + movl %ecx,152(%esp) + # x14 = in14 + movl %edx,156(%esp) + # x15 = in15 + movl %ebx,160(%esp) + # i = 20 + mov $20,%ebp + # p = x0 + movl 100(%esp),%eax + # s = x5 + movl 120(%esp),%ecx + # t = x10 + movl 140(%esp),%edx + # w = x15 + movl 160(%esp),%ebx +._mainloop: + # x0 = p + movl %eax,100(%esp) + # x10 = t + movl %edx,140(%esp) + # p += x12 + addl 148(%esp),%eax + # x5 = s + movl %ecx,120(%esp) + # t += x6 + addl 124(%esp),%edx + # x15 = w + movl %ebx,160(%esp) + # r = x1 + movl 104(%esp),%esi + # r += s + add %ecx,%esi + # v = x11 + movl 144(%esp),%edi + # v += w + add %ebx,%edi + # p <<<= 7 + rol $7,%eax + # p ^= x4 + xorl 116(%esp),%eax + # t <<<= 7 + rol $7,%edx + # t ^= x14 + xorl 156(%esp),%edx + # r <<<= 7 + rol $7,%esi + # r ^= x9 + xorl 136(%esp),%esi + # v <<<= 7 + rol $7,%edi + # v ^= x3 + xorl 112(%esp),%edi + # x4 = p + movl %eax,116(%esp) + # x14 = t + movl %edx,156(%esp) + # p += x0 + addl 100(%esp),%eax + # x9 = r + movl %esi,136(%esp) + # t += x10 + addl 140(%esp),%edx + # x3 = v + movl %edi,112(%esp) + # p <<<= 9 + rol $9,%eax + # p ^= x8 + xorl 132(%esp),%eax + # t <<<= 9 + rol $9,%edx + # t ^= x2 + xorl 108(%esp),%edx + # s += r + add %esi,%ecx + # s <<<= 9 + rol $9,%ecx + # s ^= x13 + xorl 152(%esp),%ecx + # w += v + add %edi,%ebx + # w <<<= 9 + rol $9,%ebx + # w ^= x7 + xorl 128(%esp),%ebx + # x8 = p + movl %eax,132(%esp) + # x2 = t + movl %edx,108(%esp) + # p += x4 + addl 116(%esp),%eax + # x13 = s + movl %ecx,152(%esp) + # t += x14 + addl 156(%esp),%edx + # x7 = w + movl %ebx,128(%esp) + # p <<<= 13 + rol $13,%eax + # p ^= x12 + xorl 148(%esp),%eax + # t <<<= 13 + rol $13,%edx + # t ^= x6 + xorl 124(%esp),%edx + # r += s + add %ecx,%esi + # r <<<= 13 + rol $13,%esi + # r ^= x1 + xorl 104(%esp),%esi + # v += w + add %ebx,%edi + # v <<<= 13 + rol $13,%edi + # v ^= x11 + xorl 144(%esp),%edi + # x12 = p + movl %eax,148(%esp) + # x6 = t + movl %edx,124(%esp) + # p += x8 + addl 132(%esp),%eax + # x1 = r + movl %esi,104(%esp) + # t += x2 + addl 108(%esp),%edx + # x11 = v + movl %edi,144(%esp) + # p <<<= 18 + rol $18,%eax + # p ^= x0 + xorl 100(%esp),%eax + # t <<<= 18 + rol $18,%edx + # t ^= x10 + xorl 140(%esp),%edx + # s += r + add %esi,%ecx + # s <<<= 18 + rol $18,%ecx + # s ^= x5 + xorl 120(%esp),%ecx + # w += v + add %edi,%ebx + # w <<<= 18 + rol $18,%ebx + # w ^= x15 + xorl 160(%esp),%ebx + # x0 = p + movl %eax,100(%esp) + # x10 = t + movl %edx,140(%esp) + # p += x3 + addl 112(%esp),%eax + # p <<<= 7 + rol $7,%eax + # x5 = s + movl %ecx,120(%esp) + # t += x9 + addl 136(%esp),%edx + # x15 = w + movl %ebx,160(%esp) + # r = x4 + movl 116(%esp),%esi + # r += s + add %ecx,%esi + # v = x14 + movl 156(%esp),%edi + # v += w + add %ebx,%edi + # p ^= x1 + xorl 104(%esp),%eax + # t <<<= 7 + rol $7,%edx + # t ^= x11 + xorl 144(%esp),%edx + # r <<<= 7 + rol $7,%esi + # r ^= x6 + xorl 124(%esp),%esi + # v <<<= 7 + rol $7,%edi + # v ^= x12 + xorl 148(%esp),%edi + # x1 = p + movl %eax,104(%esp) + # x11 = t + movl %edx,144(%esp) + # p += x0 + addl 100(%esp),%eax + # x6 = r + movl %esi,124(%esp) + # t += x10 + addl 140(%esp),%edx + # x12 = v + movl %edi,148(%esp) + # p <<<= 9 + rol $9,%eax + # p ^= x2 + xorl 108(%esp),%eax + # t <<<= 9 + rol $9,%edx + # t ^= x8 + xorl 132(%esp),%edx + # s += r + add %esi,%ecx + # s <<<= 9 + rol $9,%ecx + # s ^= x7 + xorl 128(%esp),%ecx + # w += v + add %edi,%ebx + # w <<<= 9 + rol $9,%ebx + # w ^= x13 + xorl 152(%esp),%ebx + # x2 = p + movl %eax,108(%esp) + # x8 = t + movl %edx,132(%esp) + # p += x1 + addl 104(%esp),%eax + # x7 = s + movl %ecx,128(%esp) + # t += x11 + addl 144(%esp),%edx + # x13 = w + movl %ebx,152(%esp) + # p <<<= 13 + rol $13,%eax + # p ^= x3 + xorl 112(%esp),%eax + # t <<<= 13 + rol $13,%edx + # t ^= x9 + xorl 136(%esp),%edx + # r += s + add %ecx,%esi + # r <<<= 13 + rol $13,%esi + # r ^= x4 + xorl 116(%esp),%esi + # v += w + add %ebx,%edi + # v <<<= 13 + rol $13,%edi + # v ^= x14 + xorl 156(%esp),%edi + # x3 = p + movl %eax,112(%esp) + # x9 = t + movl %edx,136(%esp) + # p += x2 + addl 108(%esp),%eax + # x4 = r + movl %esi,116(%esp) + # t += x8 + addl 132(%esp),%edx + # x14 = v + movl %edi,156(%esp) + # p <<<= 18 + rol $18,%eax + # p ^= x0 + xorl 100(%esp),%eax + # t <<<= 18 + rol $18,%edx + # t ^= x10 + xorl 140(%esp),%edx + # s += r + add %esi,%ecx + # s <<<= 18 + rol $18,%ecx + # s ^= x5 + xorl 120(%esp),%ecx + # w += v + add %edi,%ebx + # w <<<= 18 + rol $18,%ebx + # w ^= x15 + xorl 160(%esp),%ebx + # x0 = p + movl %eax,100(%esp) + # x10 = t + movl %edx,140(%esp) + # p += x12 + addl 148(%esp),%eax + # x5 = s + movl %ecx,120(%esp) + # t += x6 + addl 124(%esp),%edx + # x15 = w + movl %ebx,160(%esp) + # r = x1 + movl 104(%esp),%esi + # r += s + add %ecx,%esi + # v = x11 + movl 144(%esp),%edi + # v += w + add %ebx,%edi + # p <<<= 7 + rol $7,%eax + # p ^= x4 + xorl 116(%esp),%eax + # t <<<= 7 + rol $7,%edx + # t ^= x14 + xorl 156(%esp),%edx + # r <<<= 7 + rol $7,%esi + # r ^= x9 + xorl 136(%esp),%esi + # v <<<= 7 + rol $7,%edi + # v ^= x3 + xorl 112(%esp),%edi + # x4 = p + movl %eax,116(%esp) + # x14 = t + movl %edx,156(%esp) + # p += x0 + addl 100(%esp),%eax + # x9 = r + movl %esi,136(%esp) + # t += x10 + addl 140(%esp),%edx + # x3 = v + movl %edi,112(%esp) + # p <<<= 9 + rol $9,%eax + # p ^= x8 + xorl 132(%esp),%eax + # t <<<= 9 + rol $9,%edx + # t ^= x2 + xorl 108(%esp),%edx + # s += r + add %esi,%ecx + # s <<<= 9 + rol $9,%ecx + # s ^= x13 + xorl 152(%esp),%ecx + # w += v + add %edi,%ebx + # w <<<= 9 + rol $9,%ebx + # w ^= x7 + xorl 128(%esp),%ebx + # x8 = p + movl %eax,132(%esp) + # x2 = t + movl %edx,108(%esp) + # p += x4 + addl 116(%esp),%eax + # x13 = s + movl %ecx,152(%esp) + # t += x14 + addl 156(%esp),%edx + # x7 = w + movl %ebx,128(%esp) + # p <<<= 13 + rol $13,%eax + # p ^= x12 + xorl 148(%esp),%eax + # t <<<= 13 + rol $13,%edx + # t ^= x6 + xorl 124(%esp),%edx + # r += s + add %ecx,%esi + # r <<<= 13 + rol $13,%esi + # r ^= x1 + xorl 104(%esp),%esi + # v += w + add %ebx,%edi + # v <<<= 13 + rol $13,%edi + # v ^= x11 + xorl 144(%esp),%edi + # x12 = p + movl %eax,148(%esp) + # x6 = t + movl %edx,124(%esp) + # p += x8 + addl 132(%esp),%eax + # x1 = r + movl %esi,104(%esp) + # t += x2 + addl 108(%esp),%edx + # x11 = v + movl %edi,144(%esp) + # p <<<= 18 + rol $18,%eax + # p ^= x0 + xorl 100(%esp),%eax + # t <<<= 18 + rol $18,%edx + # t ^= x10 + xorl 140(%esp),%edx + # s += r + add %esi,%ecx + # s <<<= 18 + rol $18,%ecx + # s ^= x5 + xorl 120(%esp),%ecx + # w += v + add %edi,%ebx + # w <<<= 18 + rol $18,%ebx + # w ^= x15 + xorl 160(%esp),%ebx + # x0 = p + movl %eax,100(%esp) + # x10 = t + movl %edx,140(%esp) + # p += x3 + addl 112(%esp),%eax + # p <<<= 7 + rol $7,%eax + # x5 = s + movl %ecx,120(%esp) + # t += x9 + addl 136(%esp),%edx + # x15 = w + movl %ebx,160(%esp) + # r = x4 + movl 116(%esp),%esi + # r += s + add %ecx,%esi + # v = x14 + movl 156(%esp),%edi + # v += w + add %ebx,%edi + # p ^= x1 + xorl 104(%esp),%eax + # t <<<= 7 + rol $7,%edx + # t ^= x11 + xorl 144(%esp),%edx + # r <<<= 7 + rol $7,%esi + # r ^= x6 + xorl 124(%esp),%esi + # v <<<= 7 + rol $7,%edi + # v ^= x12 + xorl 148(%esp),%edi + # x1 = p + movl %eax,104(%esp) + # x11 = t + movl %edx,144(%esp) + # p += x0 + addl 100(%esp),%eax + # x6 = r + movl %esi,124(%esp) + # t += x10 + addl 140(%esp),%edx + # x12 = v + movl %edi,148(%esp) + # p <<<= 9 + rol $9,%eax + # p ^= x2 + xorl 108(%esp),%eax + # t <<<= 9 + rol $9,%edx + # t ^= x8 + xorl 132(%esp),%edx + # s += r + add %esi,%ecx + # s <<<= 9 + rol $9,%ecx + # s ^= x7 + xorl 128(%esp),%ecx + # w += v + add %edi,%ebx + # w <<<= 9 + rol $9,%ebx + # w ^= x13 + xorl 152(%esp),%ebx + # x2 = p + movl %eax,108(%esp) + # x8 = t + movl %edx,132(%esp) + # p += x1 + addl 104(%esp),%eax + # x7 = s + movl %ecx,128(%esp) + # t += x11 + addl 144(%esp),%edx + # x13 = w + movl %ebx,152(%esp) + # p <<<= 13 + rol $13,%eax + # p ^= x3 + xorl 112(%esp),%eax + # t <<<= 13 + rol $13,%edx + # t ^= x9 + xorl 136(%esp),%edx + # r += s + add %ecx,%esi + # r <<<= 13 + rol $13,%esi + # r ^= x4 + xorl 116(%esp),%esi + # v += w + add %ebx,%edi + # v <<<= 13 + rol $13,%edi + # v ^= x14 + xorl 156(%esp),%edi + # x3 = p + movl %eax,112(%esp) + # x9 = t + movl %edx,136(%esp) + # p += x2 + addl 108(%esp),%eax + # x4 = r + movl %esi,116(%esp) + # t += x8 + addl 132(%esp),%edx + # x14 = v + movl %edi,156(%esp) + # p <<<= 18 + rol $18,%eax + # p ^= x0 + xorl 100(%esp),%eax + # t <<<= 18 + rol $18,%edx + # t ^= x10 + xorl 140(%esp),%edx + # s += r + add %esi,%ecx + # s <<<= 18 + rol $18,%ecx + # s ^= x5 + xorl 120(%esp),%ecx + # w += v + add %edi,%ebx + # w <<<= 18 + rol $18,%ebx + # w ^= x15 + xorl 160(%esp),%ebx + # i -= 4 + sub $4,%ebp + # goto mainloop if unsigned > + ja ._mainloop + # x0 = p + movl %eax,100(%esp) + # x5 = s + movl %ecx,120(%esp) + # x10 = t + movl %edx,140(%esp) + # x15 = w + movl %ebx,160(%esp) + # out = out_backup + movl 72(%esp),%edi + # m = m_backup + movl 68(%esp),%esi + # in0 = x0 + movl 100(%esp),%eax + # in1 = x1 + movl 104(%esp),%ecx + # in0 += j0 + addl 164(%esp),%eax + # in1 += j1 + addl 168(%esp),%ecx + # in0 ^= *(uint32 *) (m + 0) + xorl 0(%esi),%eax + # in1 ^= *(uint32 *) (m + 4) + xorl 4(%esi),%ecx + # *(uint32 *) (out + 0) = in0 + movl %eax,0(%edi) + # *(uint32 *) (out + 4) = in1 + movl %ecx,4(%edi) + # in2 = x2 + movl 108(%esp),%eax + # in3 = x3 + movl 112(%esp),%ecx + # in2 += j2 + addl 172(%esp),%eax + # in3 += j3 + addl 176(%esp),%ecx + # in2 ^= *(uint32 *) (m + 8) + xorl 8(%esi),%eax + # in3 ^= *(uint32 *) (m + 12) + xorl 12(%esi),%ecx + # *(uint32 *) (out + 8) = in2 + movl %eax,8(%edi) + # *(uint32 *) (out + 12) = in3 + movl %ecx,12(%edi) + # in4 = x4 + movl 116(%esp),%eax + # in5 = x5 + movl 120(%esp),%ecx + # in4 += j4 + addl 180(%esp),%eax + # in5 += j5 + addl 184(%esp),%ecx + # in4 ^= *(uint32 *) (m + 16) + xorl 16(%esi),%eax + # in5 ^= *(uint32 *) (m + 20) + xorl 20(%esi),%ecx + # *(uint32 *) (out + 16) = in4 + movl %eax,16(%edi) + # *(uint32 *) (out + 20) = in5 + movl %ecx,20(%edi) + # in6 = x6 + movl 124(%esp),%eax + # in7 = x7 + movl 128(%esp),%ecx + # in6 += j6 + addl 188(%esp),%eax + # in7 += j7 + addl 192(%esp),%ecx + # in6 ^= *(uint32 *) (m + 24) + xorl 24(%esi),%eax + # in7 ^= *(uint32 *) (m + 28) + xorl 28(%esi),%ecx + # *(uint32 *) (out + 24) = in6 + movl %eax,24(%edi) + # *(uint32 *) (out + 28) = in7 + movl %ecx,28(%edi) + # in8 = x8 + movl 132(%esp),%eax + # in9 = x9 + movl 136(%esp),%ecx + # in8 += j8 + addl 196(%esp),%eax + # in9 += j9 + addl 200(%esp),%ecx + # in8 ^= *(uint32 *) (m + 32) + xorl 32(%esi),%eax + # in9 ^= *(uint32 *) (m + 36) + xorl 36(%esi),%ecx + # *(uint32 *) (out + 32) = in8 + movl %eax,32(%edi) + # *(uint32 *) (out + 36) = in9 + movl %ecx,36(%edi) + # in10 = x10 + movl 140(%esp),%eax + # in11 = x11 + movl 144(%esp),%ecx + # in10 += j10 + addl 204(%esp),%eax + # in11 += j11 + addl 208(%esp),%ecx + # in10 ^= *(uint32 *) (m + 40) + xorl 40(%esi),%eax + # in11 ^= *(uint32 *) (m + 44) + xorl 44(%esi),%ecx + # *(uint32 *) (out + 40) = in10 + movl %eax,40(%edi) + # *(uint32 *) (out + 44) = in11 + movl %ecx,44(%edi) + # in12 = x12 + movl 148(%esp),%eax + # in13 = x13 + movl 152(%esp),%ecx + # in12 += j12 + addl 212(%esp),%eax + # in13 += j13 + addl 216(%esp),%ecx + # in12 ^= *(uint32 *) (m + 48) + xorl 48(%esi),%eax + # in13 ^= *(uint32 *) (m + 52) + xorl 52(%esi),%ecx + # *(uint32 *) (out + 48) = in12 + movl %eax,48(%edi) + # *(uint32 *) (out + 52) = in13 + movl %ecx,52(%edi) + # in14 = x14 + movl 156(%esp),%eax + # in15 = x15 + movl 160(%esp),%ecx + # in14 += j14 + addl 220(%esp),%eax + # in15 += j15 + addl 224(%esp),%ecx + # in14 ^= *(uint32 *) (m + 56) + xorl 56(%esi),%eax + # in15 ^= *(uint32 *) (m + 60) + xorl 60(%esi),%ecx + # *(uint32 *) (out + 56) = in14 + movl %eax,56(%edi) + # *(uint32 *) (out + 60) = in15 + movl %ecx,60(%edi) + # bytes = bytes_backup + movl 76(%esp),%ebx + # in8 = j8 + movl 196(%esp),%eax + # in9 = j9 + movl 200(%esp),%ecx + # in8 += 1 + add $1,%eax + # in9 += 0 + carry + adc $0,%ecx + # j8 = in8 + movl %eax,196(%esp) + # j9 = in9 + movl %ecx,200(%esp) + # bytes - 64 + cmp $64,%ebx + # goto bytesatleast65 if unsigned> + ja ._bytesatleast65 + # goto bytesatleast64 if unsigned>= + jae ._bytesatleast64 + # m = out + mov %edi,%esi + # out = ctarget + movl 228(%esp),%edi + # i = bytes + mov %ebx,%ecx + # while (i) { *out++ = *m++; --i } + rep movsb +._bytesatleast64: + # x = x_backup + movl 64(%esp),%eax + # in8 = j8 + movl 196(%esp),%ecx + # in9 = j9 + movl 200(%esp),%edx + # *(uint32 *) (x + 32) = in8 + movl %ecx,32(%eax) + # *(uint32 *) (x + 36) = in9 + movl %edx,36(%eax) +._done: + # eax = eax_stack + movl 80(%esp),%eax + # ebx = ebx_stack + movl 84(%esp),%ebx + # esi = esi_stack + movl 88(%esp),%esi + # edi = edi_stack + movl 92(%esp),%edi + # ebp = ebp_stack + movl 96(%esp),%ebp + # leave + add %eax,%esp + ret +._bytesatleast65: + # bytes -= 64 + sub $64,%ebx + # out += 64 + add $64,%edi + # m += 64 + add $64,%esi + # goto bytesatleast1 + jmp ._bytesatleast1 +# enter ECRYPT_keysetup +.text +.p2align 5 +.globl ECRYPT_keysetup +ECRYPT_keysetup: + mov %esp,%eax + and $31,%eax + add $256,%eax + sub %eax,%esp + # eax_stack = eax + movl %eax,64(%esp) + # ebx_stack = ebx + movl %ebx,68(%esp) + # esi_stack = esi + movl %esi,72(%esp) + # edi_stack = edi + movl %edi,76(%esp) + # ebp_stack = ebp + movl %ebp,80(%esp) + # k = arg2 + movl 8(%esp,%eax),%ecx + # kbits = arg3 + movl 12(%esp,%eax),%edx + # x = arg1 + movl 4(%esp,%eax),%eax + # in1 = *(uint32 *) (k + 0) + movl 0(%ecx),%ebx + # in2 = *(uint32 *) (k + 4) + movl 4(%ecx),%esi + # in3 = *(uint32 *) (k + 8) + movl 8(%ecx),%edi + # in4 = *(uint32 *) (k + 12) + movl 12(%ecx),%ebp + # *(uint32 *) (x + 4) = in1 + movl %ebx,4(%eax) + # *(uint32 *) (x + 8) = in2 + movl %esi,8(%eax) + # *(uint32 *) (x + 12) = in3 + movl %edi,12(%eax) + # *(uint32 *) (x + 16) = in4 + movl %ebp,16(%eax) + # kbits - 256 + cmp $256,%edx + # goto kbits128 if unsigned< + jb ._kbits128 +._kbits256: + # in11 = *(uint32 *) (k + 16) + movl 16(%ecx),%edx + # in12 = *(uint32 *) (k + 20) + movl 20(%ecx),%ebx + # in13 = *(uint32 *) (k + 24) + movl 24(%ecx),%esi + # in14 = *(uint32 *) (k + 28) + movl 28(%ecx),%ecx + # *(uint32 *) (x + 44) = in11 + movl %edx,44(%eax) + # *(uint32 *) (x + 48) = in12 + movl %ebx,48(%eax) + # *(uint32 *) (x + 52) = in13 + movl %esi,52(%eax) + # *(uint32 *) (x + 56) = in14 + movl %ecx,56(%eax) + # in0 = 1634760805 + mov $1634760805,%ecx + # in5 = 857760878 + mov $857760878,%edx + # in10 = 2036477234 + mov $2036477234,%ebx + # in15 = 1797285236 + mov $1797285236,%esi + # *(uint32 *) (x + 0) = in0 + movl %ecx,0(%eax) + # *(uint32 *) (x + 20) = in5 + movl %edx,20(%eax) + # *(uint32 *) (x + 40) = in10 + movl %ebx,40(%eax) + # *(uint32 *) (x + 60) = in15 + movl %esi,60(%eax) + # goto keysetupdone + jmp ._keysetupdone +._kbits128: + # in11 = *(uint32 *) (k + 0) + movl 0(%ecx),%edx + # in12 = *(uint32 *) (k + 4) + movl 4(%ecx),%ebx + # in13 = *(uint32 *) (k + 8) + movl 8(%ecx),%esi + # in14 = *(uint32 *) (k + 12) + movl 12(%ecx),%ecx + # *(uint32 *) (x + 44) = in11 + movl %edx,44(%eax) + # *(uint32 *) (x + 48) = in12 + movl %ebx,48(%eax) + # *(uint32 *) (x + 52) = in13 + movl %esi,52(%eax) + # *(uint32 *) (x + 56) = in14 + movl %ecx,56(%eax) + # in0 = 1634760805 + mov $1634760805,%ecx + # in5 = 824206446 + mov $824206446,%edx + # in10 = 2036477238 + mov $2036477238,%ebx + # in15 = 1797285236 + mov $1797285236,%esi + # *(uint32 *) (x + 0) = in0 + movl %ecx,0(%eax) + # *(uint32 *) (x + 20) = in5 + movl %edx,20(%eax) + # *(uint32 *) (x + 40) = in10 + movl %ebx,40(%eax) + # *(uint32 *) (x + 60) = in15 + movl %esi,60(%eax) +._keysetupdone: + # eax = eax_stack + movl 64(%esp),%eax + # ebx = ebx_stack + movl 68(%esp),%ebx + # esi = esi_stack + movl 72(%esp),%esi + # edi = edi_stack + movl 76(%esp),%edi + # ebp = ebp_stack + movl 80(%esp),%ebp + # leave + add %eax,%esp + ret +# enter ECRYPT_ivsetup +.text +.p2align 5 +.globl ECRYPT_ivsetup +ECRYPT_ivsetup: + mov %esp,%eax + and $31,%eax + add $256,%eax + sub %eax,%esp + # eax_stack = eax + movl %eax,64(%esp) + # ebx_stack = ebx + movl %ebx,68(%esp) + # esi_stack = esi + movl %esi,72(%esp) + # edi_stack = edi + movl %edi,76(%esp) + # ebp_stack = ebp + movl %ebp,80(%esp) + # iv = arg2 + movl 8(%esp,%eax),%ecx + # x = arg1 + movl 4(%esp,%eax),%eax + # in6 = *(uint32 *) (iv + 0) + movl 0(%ecx),%edx + # in7 = *(uint32 *) (iv + 4) + movl 4(%ecx),%ecx + # in8 = 0 + mov $0,%ebx + # in9 = 0 + mov $0,%esi + # *(uint32 *) (x + 24) = in6 + movl %edx,24(%eax) + # *(uint32 *) (x + 28) = in7 + movl %ecx,28(%eax) + # *(uint32 *) (x + 32) = in8 + movl %ebx,32(%eax) + # *(uint32 *) (x + 36) = in9 + movl %esi,36(%eax) + # eax = eax_stack + movl 64(%esp),%eax + # ebx = ebx_stack + movl 68(%esp),%ebx + # esi = esi_stack + movl 72(%esp),%esi + # edi = edi_stack + movl 76(%esp),%edi + # ebp = ebp_stack + movl 80(%esp),%ebp + # leave + add %eax,%esp + ret diff --git a/arch/x86/crypto/salsa20-x86_64-asm_64.S b/arch/x86/crypto/salsa20-x86_64-asm_64.S new file mode 100644 index 00000000..6214a9b0 --- /dev/null +++ b/arch/x86/crypto/salsa20-x86_64-asm_64.S @@ -0,0 +1,920 @@ +# enter ECRYPT_encrypt_bytes +.text +.p2align 5 +.globl ECRYPT_encrypt_bytes +ECRYPT_encrypt_bytes: + mov %rsp,%r11 + and $31,%r11 + add $256,%r11 + sub %r11,%rsp + # x = arg1 + mov %rdi,%r8 + # m = arg2 + mov %rsi,%rsi + # out = arg3 + mov %rdx,%rdi + # bytes = arg4 + mov %rcx,%rdx + # unsigned>? bytes - 0 + cmp $0,%rdx + # comment:fp stack unchanged by jump + # goto done if !unsigned> + jbe ._done + # comment:fp stack unchanged by fallthrough +# start: +._start: + # r11_stack = r11 + movq %r11,0(%rsp) + # r12_stack = r12 + movq %r12,8(%rsp) + # r13_stack = r13 + movq %r13,16(%rsp) + # r14_stack = r14 + movq %r14,24(%rsp) + # r15_stack = r15 + movq %r15,32(%rsp) + # rbx_stack = rbx + movq %rbx,40(%rsp) + # rbp_stack = rbp + movq %rbp,48(%rsp) + # in0 = *(uint64 *) (x + 0) + movq 0(%r8),%rcx + # in2 = *(uint64 *) (x + 8) + movq 8(%r8),%r9 + # in4 = *(uint64 *) (x + 16) + movq 16(%r8),%rax + # in6 = *(uint64 *) (x + 24) + movq 24(%r8),%r10 + # in8 = *(uint64 *) (x + 32) + movq 32(%r8),%r11 + # in10 = *(uint64 *) (x + 40) + movq 40(%r8),%r12 + # in12 = *(uint64 *) (x + 48) + movq 48(%r8),%r13 + # in14 = *(uint64 *) (x + 56) + movq 56(%r8),%r14 + # j0 = in0 + movq %rcx,56(%rsp) + # j2 = in2 + movq %r9,64(%rsp) + # j4 = in4 + movq %rax,72(%rsp) + # j6 = in6 + movq %r10,80(%rsp) + # j8 = in8 + movq %r11,88(%rsp) + # j10 = in10 + movq %r12,96(%rsp) + # j12 = in12 + movq %r13,104(%rsp) + # j14 = in14 + movq %r14,112(%rsp) + # x_backup = x + movq %r8,120(%rsp) +# bytesatleast1: +._bytesatleast1: + # unsigned>= 32 + shr $32,%rdi + # x3 = j2 + movq 64(%rsp),%rsi + # x2 = x3 + mov %rsi,%rcx + # (uint64) x3 >>= 32 + shr $32,%rsi + # x5 = j4 + movq 72(%rsp),%r8 + # x4 = x5 + mov %r8,%r9 + # (uint64) x5 >>= 32 + shr $32,%r8 + # x5_stack = x5 + movq %r8,160(%rsp) + # x7 = j6 + movq 80(%rsp),%r8 + # x6 = x7 + mov %r8,%rax + # (uint64) x7 >>= 32 + shr $32,%r8 + # x9 = j8 + movq 88(%rsp),%r10 + # x8 = x9 + mov %r10,%r11 + # (uint64) x9 >>= 32 + shr $32,%r10 + # x11 = j10 + movq 96(%rsp),%r12 + # x10 = x11 + mov %r12,%r13 + # x10_stack = x10 + movq %r13,168(%rsp) + # (uint64) x11 >>= 32 + shr $32,%r12 + # x13 = j12 + movq 104(%rsp),%r13 + # x12 = x13 + mov %r13,%r14 + # (uint64) x13 >>= 32 + shr $32,%r13 + # x15 = j14 + movq 112(%rsp),%r15 + # x14 = x15 + mov %r15,%rbx + # (uint64) x15 >>= 32 + shr $32,%r15 + # x15_stack = x15 + movq %r15,176(%rsp) + # i = 20 + mov $20,%r15 +# mainloop: +._mainloop: + # i_backup = i + movq %r15,184(%rsp) + # x5 = x5_stack + movq 160(%rsp),%r15 + # a = x12 + x0 + lea (%r14,%rdx),%rbp + # (uint32) a <<<= 7 + rol $7,%ebp + # x4 ^= a + xor %rbp,%r9 + # b = x1 + x5 + lea (%rdi,%r15),%rbp + # (uint32) b <<<= 7 + rol $7,%ebp + # x9 ^= b + xor %rbp,%r10 + # a = x0 + x4 + lea (%rdx,%r9),%rbp + # (uint32) a <<<= 9 + rol $9,%ebp + # x8 ^= a + xor %rbp,%r11 + # b = x5 + x9 + lea (%r15,%r10),%rbp + # (uint32) b <<<= 9 + rol $9,%ebp + # x13 ^= b + xor %rbp,%r13 + # a = x4 + x8 + lea (%r9,%r11),%rbp + # (uint32) a <<<= 13 + rol $13,%ebp + # x12 ^= a + xor %rbp,%r14 + # b = x9 + x13 + lea (%r10,%r13),%rbp + # (uint32) b <<<= 13 + rol $13,%ebp + # x1 ^= b + xor %rbp,%rdi + # a = x8 + x12 + lea (%r11,%r14),%rbp + # (uint32) a <<<= 18 + rol $18,%ebp + # x0 ^= a + xor %rbp,%rdx + # b = x13 + x1 + lea (%r13,%rdi),%rbp + # (uint32) b <<<= 18 + rol $18,%ebp + # x5 ^= b + xor %rbp,%r15 + # x10 = x10_stack + movq 168(%rsp),%rbp + # x5_stack = x5 + movq %r15,160(%rsp) + # c = x6 + x10 + lea (%rax,%rbp),%r15 + # (uint32) c <<<= 7 + rol $7,%r15d + # x14 ^= c + xor %r15,%rbx + # c = x10 + x14 + lea (%rbp,%rbx),%r15 + # (uint32) c <<<= 9 + rol $9,%r15d + # x2 ^= c + xor %r15,%rcx + # c = x14 + x2 + lea (%rbx,%rcx),%r15 + # (uint32) c <<<= 13 + rol $13,%r15d + # x6 ^= c + xor %r15,%rax + # c = x2 + x6 + lea (%rcx,%rax),%r15 + # (uint32) c <<<= 18 + rol $18,%r15d + # x10 ^= c + xor %r15,%rbp + # x15 = x15_stack + movq 176(%rsp),%r15 + # x10_stack = x10 + movq %rbp,168(%rsp) + # d = x11 + x15 + lea (%r12,%r15),%rbp + # (uint32) d <<<= 7 + rol $7,%ebp + # x3 ^= d + xor %rbp,%rsi + # d = x15 + x3 + lea (%r15,%rsi),%rbp + # (uint32) d <<<= 9 + rol $9,%ebp + # x7 ^= d + xor %rbp,%r8 + # d = x3 + x7 + lea (%rsi,%r8),%rbp + # (uint32) d <<<= 13 + rol $13,%ebp + # x11 ^= d + xor %rbp,%r12 + # d = x7 + x11 + lea (%r8,%r12),%rbp + # (uint32) d <<<= 18 + rol $18,%ebp + # x15 ^= d + xor %rbp,%r15 + # x15_stack = x15 + movq %r15,176(%rsp) + # x5 = x5_stack + movq 160(%rsp),%r15 + # a = x3 + x0 + lea (%rsi,%rdx),%rbp + # (uint32) a <<<= 7 + rol $7,%ebp + # x1 ^= a + xor %rbp,%rdi + # b = x4 + x5 + lea (%r9,%r15),%rbp + # (uint32) b <<<= 7 + rol $7,%ebp + # x6 ^= b + xor %rbp,%rax + # a = x0 + x1 + lea (%rdx,%rdi),%rbp + # (uint32) a <<<= 9 + rol $9,%ebp + # x2 ^= a + xor %rbp,%rcx + # b = x5 + x6 + lea (%r15,%rax),%rbp + # (uint32) b <<<= 9 + rol $9,%ebp + # x7 ^= b + xor %rbp,%r8 + # a = x1 + x2 + lea (%rdi,%rcx),%rbp + # (uint32) a <<<= 13 + rol $13,%ebp + # x3 ^= a + xor %rbp,%rsi + # b = x6 + x7 + lea (%rax,%r8),%rbp + # (uint32) b <<<= 13 + rol $13,%ebp + # x4 ^= b + xor %rbp,%r9 + # a = x2 + x3 + lea (%rcx,%rsi),%rbp + # (uint32) a <<<= 18 + rol $18,%ebp + # x0 ^= a + xor %rbp,%rdx + # b = x7 + x4 + lea (%r8,%r9),%rbp + # (uint32) b <<<= 18 + rol $18,%ebp + # x5 ^= b + xor %rbp,%r15 + # x10 = x10_stack + movq 168(%rsp),%rbp + # x5_stack = x5 + movq %r15,160(%rsp) + # c = x9 + x10 + lea (%r10,%rbp),%r15 + # (uint32) c <<<= 7 + rol $7,%r15d + # x11 ^= c + xor %r15,%r12 + # c = x10 + x11 + lea (%rbp,%r12),%r15 + # (uint32) c <<<= 9 + rol $9,%r15d + # x8 ^= c + xor %r15,%r11 + # c = x11 + x8 + lea (%r12,%r11),%r15 + # (uint32) c <<<= 13 + rol $13,%r15d + # x9 ^= c + xor %r15,%r10 + # c = x8 + x9 + lea (%r11,%r10),%r15 + # (uint32) c <<<= 18 + rol $18,%r15d + # x10 ^= c + xor %r15,%rbp + # x15 = x15_stack + movq 176(%rsp),%r15 + # x10_stack = x10 + movq %rbp,168(%rsp) + # d = x14 + x15 + lea (%rbx,%r15),%rbp + # (uint32) d <<<= 7 + rol $7,%ebp + # x12 ^= d + xor %rbp,%r14 + # d = x15 + x12 + lea (%r15,%r14),%rbp + # (uint32) d <<<= 9 + rol $9,%ebp + # x13 ^= d + xor %rbp,%r13 + # d = x12 + x13 + lea (%r14,%r13),%rbp + # (uint32) d <<<= 13 + rol $13,%ebp + # x14 ^= d + xor %rbp,%rbx + # d = x13 + x14 + lea (%r13,%rbx),%rbp + # (uint32) d <<<= 18 + rol $18,%ebp + # x15 ^= d + xor %rbp,%r15 + # x15_stack = x15 + movq %r15,176(%rsp) + # x5 = x5_stack + movq 160(%rsp),%r15 + # a = x12 + x0 + lea (%r14,%rdx),%rbp + # (uint32) a <<<= 7 + rol $7,%ebp + # x4 ^= a + xor %rbp,%r9 + # b = x1 + x5 + lea (%rdi,%r15),%rbp + # (uint32) b <<<= 7 + rol $7,%ebp + # x9 ^= b + xor %rbp,%r10 + # a = x0 + x4 + lea (%rdx,%r9),%rbp + # (uint32) a <<<= 9 + rol $9,%ebp + # x8 ^= a + xor %rbp,%r11 + # b = x5 + x9 + lea (%r15,%r10),%rbp + # (uint32) b <<<= 9 + rol $9,%ebp + # x13 ^= b + xor %rbp,%r13 + # a = x4 + x8 + lea (%r9,%r11),%rbp + # (uint32) a <<<= 13 + rol $13,%ebp + # x12 ^= a + xor %rbp,%r14 + # b = x9 + x13 + lea (%r10,%r13),%rbp + # (uint32) b <<<= 13 + rol $13,%ebp + # x1 ^= b + xor %rbp,%rdi + # a = x8 + x12 + lea (%r11,%r14),%rbp + # (uint32) a <<<= 18 + rol $18,%ebp + # x0 ^= a + xor %rbp,%rdx + # b = x13 + x1 + lea (%r13,%rdi),%rbp + # (uint32) b <<<= 18 + rol $18,%ebp + # x5 ^= b + xor %rbp,%r15 + # x10 = x10_stack + movq 168(%rsp),%rbp + # x5_stack = x5 + movq %r15,160(%rsp) + # c = x6 + x10 + lea (%rax,%rbp),%r15 + # (uint32) c <<<= 7 + rol $7,%r15d + # x14 ^= c + xor %r15,%rbx + # c = x10 + x14 + lea (%rbp,%rbx),%r15 + # (uint32) c <<<= 9 + rol $9,%r15d + # x2 ^= c + xor %r15,%rcx + # c = x14 + x2 + lea (%rbx,%rcx),%r15 + # (uint32) c <<<= 13 + rol $13,%r15d + # x6 ^= c + xor %r15,%rax + # c = x2 + x6 + lea (%rcx,%rax),%r15 + # (uint32) c <<<= 18 + rol $18,%r15d + # x10 ^= c + xor %r15,%rbp + # x15 = x15_stack + movq 176(%rsp),%r15 + # x10_stack = x10 + movq %rbp,168(%rsp) + # d = x11 + x15 + lea (%r12,%r15),%rbp + # (uint32) d <<<= 7 + rol $7,%ebp + # x3 ^= d + xor %rbp,%rsi + # d = x15 + x3 + lea (%r15,%rsi),%rbp + # (uint32) d <<<= 9 + rol $9,%ebp + # x7 ^= d + xor %rbp,%r8 + # d = x3 + x7 + lea (%rsi,%r8),%rbp + # (uint32) d <<<= 13 + rol $13,%ebp + # x11 ^= d + xor %rbp,%r12 + # d = x7 + x11 + lea (%r8,%r12),%rbp + # (uint32) d <<<= 18 + rol $18,%ebp + # x15 ^= d + xor %rbp,%r15 + # x15_stack = x15 + movq %r15,176(%rsp) + # x5 = x5_stack + movq 160(%rsp),%r15 + # a = x3 + x0 + lea (%rsi,%rdx),%rbp + # (uint32) a <<<= 7 + rol $7,%ebp + # x1 ^= a + xor %rbp,%rdi + # b = x4 + x5 + lea (%r9,%r15),%rbp + # (uint32) b <<<= 7 + rol $7,%ebp + # x6 ^= b + xor %rbp,%rax + # a = x0 + x1 + lea (%rdx,%rdi),%rbp + # (uint32) a <<<= 9 + rol $9,%ebp + # x2 ^= a + xor %rbp,%rcx + # b = x5 + x6 + lea (%r15,%rax),%rbp + # (uint32) b <<<= 9 + rol $9,%ebp + # x7 ^= b + xor %rbp,%r8 + # a = x1 + x2 + lea (%rdi,%rcx),%rbp + # (uint32) a <<<= 13 + rol $13,%ebp + # x3 ^= a + xor %rbp,%rsi + # b = x6 + x7 + lea (%rax,%r8),%rbp + # (uint32) b <<<= 13 + rol $13,%ebp + # x4 ^= b + xor %rbp,%r9 + # a = x2 + x3 + lea (%rcx,%rsi),%rbp + # (uint32) a <<<= 18 + rol $18,%ebp + # x0 ^= a + xor %rbp,%rdx + # b = x7 + x4 + lea (%r8,%r9),%rbp + # (uint32) b <<<= 18 + rol $18,%ebp + # x5 ^= b + xor %rbp,%r15 + # x10 = x10_stack + movq 168(%rsp),%rbp + # x5_stack = x5 + movq %r15,160(%rsp) + # c = x9 + x10 + lea (%r10,%rbp),%r15 + # (uint32) c <<<= 7 + rol $7,%r15d + # x11 ^= c + xor %r15,%r12 + # c = x10 + x11 + lea (%rbp,%r12),%r15 + # (uint32) c <<<= 9 + rol $9,%r15d + # x8 ^= c + xor %r15,%r11 + # c = x11 + x8 + lea (%r12,%r11),%r15 + # (uint32) c <<<= 13 + rol $13,%r15d + # x9 ^= c + xor %r15,%r10 + # c = x8 + x9 + lea (%r11,%r10),%r15 + # (uint32) c <<<= 18 + rol $18,%r15d + # x10 ^= c + xor %r15,%rbp + # x15 = x15_stack + movq 176(%rsp),%r15 + # x10_stack = x10 + movq %rbp,168(%rsp) + # d = x14 + x15 + lea (%rbx,%r15),%rbp + # (uint32) d <<<= 7 + rol $7,%ebp + # x12 ^= d + xor %rbp,%r14 + # d = x15 + x12 + lea (%r15,%r14),%rbp + # (uint32) d <<<= 9 + rol $9,%ebp + # x13 ^= d + xor %rbp,%r13 + # d = x12 + x13 + lea (%r14,%r13),%rbp + # (uint32) d <<<= 13 + rol $13,%ebp + # x14 ^= d + xor %rbp,%rbx + # d = x13 + x14 + lea (%r13,%rbx),%rbp + # (uint32) d <<<= 18 + rol $18,%ebp + # x15 ^= d + xor %rbp,%r15 + # x15_stack = x15 + movq %r15,176(%rsp) + # i = i_backup + movq 184(%rsp),%r15 + # unsigned>? i -= 4 + sub $4,%r15 + # comment:fp stack unchanged by jump + # goto mainloop if unsigned> + ja ._mainloop + # (uint32) x2 += j2 + addl 64(%rsp),%ecx + # x3 <<= 32 + shl $32,%rsi + # x3 += j2 + addq 64(%rsp),%rsi + # (uint64) x3 >>= 32 + shr $32,%rsi + # x3 <<= 32 + shl $32,%rsi + # x2 += x3 + add %rsi,%rcx + # (uint32) x6 += j6 + addl 80(%rsp),%eax + # x7 <<= 32 + shl $32,%r8 + # x7 += j6 + addq 80(%rsp),%r8 + # (uint64) x7 >>= 32 + shr $32,%r8 + # x7 <<= 32 + shl $32,%r8 + # x6 += x7 + add %r8,%rax + # (uint32) x8 += j8 + addl 88(%rsp),%r11d + # x9 <<= 32 + shl $32,%r10 + # x9 += j8 + addq 88(%rsp),%r10 + # (uint64) x9 >>= 32 + shr $32,%r10 + # x9 <<= 32 + shl $32,%r10 + # x8 += x9 + add %r10,%r11 + # (uint32) x12 += j12 + addl 104(%rsp),%r14d + # x13 <<= 32 + shl $32,%r13 + # x13 += j12 + addq 104(%rsp),%r13 + # (uint64) x13 >>= 32 + shr $32,%r13 + # x13 <<= 32 + shl $32,%r13 + # x12 += x13 + add %r13,%r14 + # (uint32) x0 += j0 + addl 56(%rsp),%edx + # x1 <<= 32 + shl $32,%rdi + # x1 += j0 + addq 56(%rsp),%rdi + # (uint64) x1 >>= 32 + shr $32,%rdi + # x1 <<= 32 + shl $32,%rdi + # x0 += x1 + add %rdi,%rdx + # x5 = x5_stack + movq 160(%rsp),%rdi + # (uint32) x4 += j4 + addl 72(%rsp),%r9d + # x5 <<= 32 + shl $32,%rdi + # x5 += j4 + addq 72(%rsp),%rdi + # (uint64) x5 >>= 32 + shr $32,%rdi + # x5 <<= 32 + shl $32,%rdi + # x4 += x5 + add %rdi,%r9 + # x10 = x10_stack + movq 168(%rsp),%r8 + # (uint32) x10 += j10 + addl 96(%rsp),%r8d + # x11 <<= 32 + shl $32,%r12 + # x11 += j10 + addq 96(%rsp),%r12 + # (uint64) x11 >>= 32 + shr $32,%r12 + # x11 <<= 32 + shl $32,%r12 + # x10 += x11 + add %r12,%r8 + # x15 = x15_stack + movq 176(%rsp),%rdi + # (uint32) x14 += j14 + addl 112(%rsp),%ebx + # x15 <<= 32 + shl $32,%rdi + # x15 += j14 + addq 112(%rsp),%rdi + # (uint64) x15 >>= 32 + shr $32,%rdi + # x15 <<= 32 + shl $32,%rdi + # x14 += x15 + add %rdi,%rbx + # out = out_backup + movq 136(%rsp),%rdi + # m = m_backup + movq 144(%rsp),%rsi + # x0 ^= *(uint64 *) (m + 0) + xorq 0(%rsi),%rdx + # *(uint64 *) (out + 0) = x0 + movq %rdx,0(%rdi) + # x2 ^= *(uint64 *) (m + 8) + xorq 8(%rsi),%rcx + # *(uint64 *) (out + 8) = x2 + movq %rcx,8(%rdi) + # x4 ^= *(uint64 *) (m + 16) + xorq 16(%rsi),%r9 + # *(uint64 *) (out + 16) = x4 + movq %r9,16(%rdi) + # x6 ^= *(uint64 *) (m + 24) + xorq 24(%rsi),%rax + # *(uint64 *) (out + 24) = x6 + movq %rax,24(%rdi) + # x8 ^= *(uint64 *) (m + 32) + xorq 32(%rsi),%r11 + # *(uint64 *) (out + 32) = x8 + movq %r11,32(%rdi) + # x10 ^= *(uint64 *) (m + 40) + xorq 40(%rsi),%r8 + # *(uint64 *) (out + 40) = x10 + movq %r8,40(%rdi) + # x12 ^= *(uint64 *) (m + 48) + xorq 48(%rsi),%r14 + # *(uint64 *) (out + 48) = x12 + movq %r14,48(%rdi) + # x14 ^= *(uint64 *) (m + 56) + xorq 56(%rsi),%rbx + # *(uint64 *) (out + 56) = x14 + movq %rbx,56(%rdi) + # bytes = bytes_backup + movq 152(%rsp),%rdx + # in8 = j8 + movq 88(%rsp),%rcx + # in8 += 1 + add $1,%rcx + # j8 = in8 + movq %rcx,88(%rsp) + # unsigned>? unsigned + ja ._bytesatleast65 + # comment:fp stack unchanged by jump + # goto bytesatleast64 if !unsigned< + jae ._bytesatleast64 + # m = out + mov %rdi,%rsi + # out = ctarget + movq 128(%rsp),%rdi + # i = bytes + mov %rdx,%rcx + # while (i) { *out++ = *m++; --i } + rep movsb + # comment:fp stack unchanged by fallthrough +# bytesatleast64: +._bytesatleast64: + # x = x_backup + movq 120(%rsp),%rdi + # in8 = j8 + movq 88(%rsp),%rsi + # *(uint64 *) (x + 32) = in8 + movq %rsi,32(%rdi) + # r11 = r11_stack + movq 0(%rsp),%r11 + # r12 = r12_stack + movq 8(%rsp),%r12 + # r13 = r13_stack + movq 16(%rsp),%r13 + # r14 = r14_stack + movq 24(%rsp),%r14 + # r15 = r15_stack + movq 32(%rsp),%r15 + # rbx = rbx_stack + movq 40(%rsp),%rbx + # rbp = rbp_stack + movq 48(%rsp),%rbp + # comment:fp stack unchanged by fallthrough +# done: +._done: + # leave + add %r11,%rsp + mov %rdi,%rax + mov %rsi,%rdx + ret +# bytesatleast65: +._bytesatleast65: + # bytes -= 64 + sub $64,%rdx + # out += 64 + add $64,%rdi + # m += 64 + add $64,%rsi + # comment:fp stack unchanged by jump + # goto bytesatleast1 + jmp ._bytesatleast1 +# enter ECRYPT_keysetup +.text +.p2align 5 +.globl ECRYPT_keysetup +ECRYPT_keysetup: + mov %rsp,%r11 + and $31,%r11 + add $256,%r11 + sub %r11,%rsp + # k = arg2 + mov %rsi,%rsi + # kbits = arg3 + mov %rdx,%rdx + # x = arg1 + mov %rdi,%rdi + # in0 = *(uint64 *) (k + 0) + movq 0(%rsi),%r8 + # in2 = *(uint64 *) (k + 8) + movq 8(%rsi),%r9 + # *(uint64 *) (x + 4) = in0 + movq %r8,4(%rdi) + # *(uint64 *) (x + 12) = in2 + movq %r9,12(%rdi) + # unsigned + * + * The assembly codes are public domain assembly codes written by Daniel. J. + * Bernstein . The codes are modified to include indentation + * and to remove extraneous comments and functions that are not needed. + * - i586 version, renamed as salsa20-i586-asm_32.S + * available from + * - x86-64 version, renamed as salsa20-x86_64-asm_64.S + * available from + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + */ + +#include +#include +#include + +#define SALSA20_IV_SIZE 8U +#define SALSA20_MIN_KEY_SIZE 16U +#define SALSA20_MAX_KEY_SIZE 32U + +// use the ECRYPT_* function names +#define salsa20_keysetup ECRYPT_keysetup +#define salsa20_ivsetup ECRYPT_ivsetup +#define salsa20_encrypt_bytes ECRYPT_encrypt_bytes + +struct salsa20_ctx +{ + u32 input[16]; +}; + +asmlinkage void salsa20_keysetup(struct salsa20_ctx *ctx, const u8 *k, + u32 keysize, u32 ivsize); +asmlinkage void salsa20_ivsetup(struct salsa20_ctx *ctx, const u8 *iv); +asmlinkage void salsa20_encrypt_bytes(struct salsa20_ctx *ctx, + const u8 *src, u8 *dst, u32 bytes); + +static int setkey(struct crypto_tfm *tfm, const u8 *key, + unsigned int keysize) +{ + struct salsa20_ctx *ctx = crypto_tfm_ctx(tfm); + salsa20_keysetup(ctx, key, keysize*8, SALSA20_IV_SIZE*8); + return 0; +} + +static int encrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + struct blkcipher_walk walk; + struct crypto_blkcipher *tfm = desc->tfm; + struct salsa20_ctx *ctx = crypto_blkcipher_ctx(tfm); + int err; + + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt_block(desc, &walk, 64); + + salsa20_ivsetup(ctx, walk.iv); + + if (likely(walk.nbytes == nbytes)) + { + salsa20_encrypt_bytes(ctx, walk.src.virt.addr, + walk.dst.virt.addr, nbytes); + return blkcipher_walk_done(desc, &walk, 0); + } + + while (walk.nbytes >= 64) { + salsa20_encrypt_bytes(ctx, walk.src.virt.addr, + walk.dst.virt.addr, + walk.nbytes - (walk.nbytes % 64)); + err = blkcipher_walk_done(desc, &walk, walk.nbytes % 64); + } + + if (walk.nbytes) { + salsa20_encrypt_bytes(ctx, walk.src.virt.addr, + walk.dst.virt.addr, walk.nbytes); + err = blkcipher_walk_done(desc, &walk, 0); + } + + return err; +} + +static struct crypto_alg alg = { + .cra_name = "salsa20", + .cra_driver_name = "salsa20-asm", + .cra_priority = 200, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_type = &crypto_blkcipher_type, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct salsa20_ctx), + .cra_alignmask = 3, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(alg.cra_list), + .cra_u = { + .blkcipher = { + .setkey = setkey, + .encrypt = encrypt, + .decrypt = encrypt, + .min_keysize = SALSA20_MIN_KEY_SIZE, + .max_keysize = SALSA20_MAX_KEY_SIZE, + .ivsize = SALSA20_IV_SIZE, + } + } +}; + +static int __init init(void) +{ + return crypto_register_alg(&alg); +} + +static void __exit fini(void) +{ + crypto_unregister_alg(&alg); +} + +module_init(init); +module_exit(fini); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION ("Salsa20 stream cipher algorithm (optimized assembly version)"); +MODULE_ALIAS("salsa20"); +MODULE_ALIAS("salsa20-asm"); diff --git a/arch/x86/crypto/serpent-sse2-i586-asm_32.S b/arch/x86/crypto/serpent-sse2-i586-asm_32.S new file mode 100644 index 00000000..c00053d4 --- /dev/null +++ b/arch/x86/crypto/serpent-sse2-i586-asm_32.S @@ -0,0 +1,635 @@ +/* + * Serpent Cipher 4-way parallel algorithm (i586/SSE2) + * + * Copyright (C) 2011 Jussi Kivilinna + * + * Based on crypto/serpent.c by + * Copyright (C) 2002 Dag Arne Osvik + * 2003 Herbert Valerio Riedel + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 + * USA + * + */ + +.file "serpent-sse2-i586-asm_32.S" +.text + +#define arg_ctx 4 +#define arg_dst 8 +#define arg_src 12 +#define arg_xor 16 + +/********************************************************************** + 4-way SSE2 serpent + **********************************************************************/ +#define CTX %edx + +#define RA %xmm0 +#define RB %xmm1 +#define RC %xmm2 +#define RD %xmm3 +#define RE %xmm4 + +#define RT0 %xmm5 +#define RT1 %xmm6 + +#define RNOT %xmm7 + +#define get_key(i, j, t) \ + movd (4*(i)+(j))*4(CTX), t; \ + pshufd $0, t, t; + +#define K(x0, x1, x2, x3, x4, i) \ + get_key(i, 0, x4); \ + get_key(i, 1, RT0); \ + get_key(i, 2, RT1); \ + pxor x4, x0; \ + pxor RT0, x1; \ + pxor RT1, x2; \ + get_key(i, 3, x4); \ + pxor x4, x3; + +#define LK(x0, x1, x2, x3, x4, i) \ + movdqa x0, x4; \ + pslld $13, x0; \ + psrld $(32 - 13), x4; \ + por x4, x0; \ + pxor x0, x1; \ + movdqa x2, x4; \ + pslld $3, x2; \ + psrld $(32 - 3), x4; \ + por x4, x2; \ + pxor x2, x1; \ + movdqa x1, x4; \ + pslld $1, x1; \ + psrld $(32 - 1), x4; \ + por x4, x1; \ + movdqa x0, x4; \ + pslld $3, x4; \ + pxor x2, x3; \ + pxor x4, x3; \ + movdqa x3, x4; \ + pslld $7, x3; \ + psrld $(32 - 7), x4; \ + por x4, x3; \ + movdqa x1, x4; \ + pslld $7, x4; \ + pxor x1, x0; \ + pxor x3, x0; \ + pxor x3, x2; \ + pxor x4, x2; \ + movdqa x0, x4; \ + get_key(i, 1, RT0); \ + pxor RT0, x1; \ + get_key(i, 3, RT0); \ + pxor RT0, x3; \ + pslld $5, x0; \ + psrld $(32 - 5), x4; \ + por x4, x0; \ + movdqa x2, x4; \ + pslld $22, x2; \ + psrld $(32 - 22), x4; \ + por x4, x2; \ + get_key(i, 0, RT0); \ + pxor RT0, x0; \ + get_key(i, 2, RT0); \ + pxor RT0, x2; + +#define KL(x0, x1, x2, x3, x4, i) \ + K(x0, x1, x2, x3, x4, i); \ + movdqa x0, x4; \ + psrld $5, x0; \ + pslld $(32 - 5), x4; \ + por x4, x0; \ + movdqa x2, x4; \ + psrld $22, x2; \ + pslld $(32 - 22), x4; \ + por x4, x2; \ + pxor x3, x2; \ + pxor x3, x0; \ + movdqa x1, x4; \ + pslld $7, x4; \ + pxor x1, x0; \ + pxor x4, x2; \ + movdqa x1, x4; \ + psrld $1, x1; \ + pslld $(32 - 1), x4; \ + por x4, x1; \ + movdqa x3, x4; \ + psrld $7, x3; \ + pslld $(32 - 7), x4; \ + por x4, x3; \ + pxor x0, x1; \ + movdqa x0, x4; \ + pslld $3, x4; \ + pxor x4, x3; \ + movdqa x0, x4; \ + psrld $13, x0; \ + pslld $(32 - 13), x4; \ + por x4, x0; \ + pxor x2, x1; \ + pxor x2, x3; \ + movdqa x2, x4; \ + psrld $3, x2; \ + pslld $(32 - 3), x4; \ + por x4, x2; + +#define S0(x0, x1, x2, x3, x4) \ + movdqa x3, x4; \ + por x0, x3; \ + pxor x4, x0; \ + pxor x2, x4; \ + pxor RNOT, x4; \ + pxor x1, x3; \ + pand x0, x1; \ + pxor x4, x1; \ + pxor x0, x2; \ + pxor x3, x0; \ + por x0, x4; \ + pxor x2, x0; \ + pand x1, x2; \ + pxor x2, x3; \ + pxor RNOT, x1; \ + pxor x4, x2; \ + pxor x2, x1; + +#define S1(x0, x1, x2, x3, x4) \ + movdqa x1, x4; \ + pxor x0, x1; \ + pxor x3, x0; \ + pxor RNOT, x3; \ + pand x1, x4; \ + por x1, x0; \ + pxor x2, x3; \ + pxor x3, x0; \ + pxor x3, x1; \ + pxor x4, x3; \ + por x4, x1; \ + pxor x2, x4; \ + pand x0, x2; \ + pxor x1, x2; \ + por x0, x1; \ + pxor RNOT, x0; \ + pxor x2, x0; \ + pxor x1, x4; + +#define S2(x0, x1, x2, x3, x4) \ + pxor RNOT, x3; \ + pxor x0, x1; \ + movdqa x0, x4; \ + pand x2, x0; \ + pxor x3, x0; \ + por x4, x3; \ + pxor x1, x2; \ + pxor x1, x3; \ + pand x0, x1; \ + pxor x2, x0; \ + pand x3, x2; \ + por x1, x3; \ + pxor RNOT, x0; \ + pxor x0, x3; \ + pxor x0, x4; \ + pxor x2, x0; \ + por x2, x1; + +#define S3(x0, x1, x2, x3, x4) \ + movdqa x1, x4; \ + pxor x3, x1; \ + por x0, x3; \ + pand x0, x4; \ + pxor x2, x0; \ + pxor x1, x2; \ + pand x3, x1; \ + pxor x3, x2; \ + por x4, x0; \ + pxor x3, x4; \ + pxor x0, x1; \ + pand x3, x0; \ + pand x4, x3; \ + pxor x2, x3; \ + por x1, x4; \ + pand x1, x2; \ + pxor x3, x4; \ + pxor x3, x0; \ + pxor x2, x3; + +#define S4(x0, x1, x2, x3, x4) \ + movdqa x3, x4; \ + pand x0, x3; \ + pxor x4, x0; \ + pxor x2, x3; \ + por x4, x2; \ + pxor x1, x0; \ + pxor x3, x4; \ + por x0, x2; \ + pxor x1, x2; \ + pand x0, x1; \ + pxor x4, x1; \ + pand x2, x4; \ + pxor x3, x2; \ + pxor x0, x4; \ + por x1, x3; \ + pxor RNOT, x1; \ + pxor x0, x3; + +#define S5(x0, x1, x2, x3, x4) \ + movdqa x1, x4; \ + por x0, x1; \ + pxor x1, x2; \ + pxor RNOT, x3; \ + pxor x0, x4; \ + pxor x2, x0; \ + pand x4, x1; \ + por x3, x4; \ + pxor x0, x4; \ + pand x3, x0; \ + pxor x3, x1; \ + pxor x2, x3; \ + pxor x1, x0; \ + pand x4, x2; \ + pxor x2, x1; \ + pand x0, x2; \ + pxor x2, x3; + +#define S6(x0, x1, x2, x3, x4) \ + movdqa x1, x4; \ + pxor x0, x3; \ + pxor x2, x1; \ + pxor x0, x2; \ + pand x3, x0; \ + por x3, x1; \ + pxor RNOT, x4; \ + pxor x1, x0; \ + pxor x2, x1; \ + pxor x4, x3; \ + pxor x0, x4; \ + pand x0, x2; \ + pxor x1, x4; \ + pxor x3, x2; \ + pand x1, x3; \ + pxor x0, x3; \ + pxor x2, x1; + +#define S7(x0, x1, x2, x3, x4) \ + pxor RNOT, x1; \ + movdqa x1, x4; \ + pxor RNOT, x0; \ + pand x2, x1; \ + pxor x3, x1; \ + por x4, x3; \ + pxor x2, x4; \ + pxor x3, x2; \ + pxor x0, x3; \ + por x1, x0; \ + pand x0, x2; \ + pxor x4, x0; \ + pxor x3, x4; \ + pand x0, x3; \ + pxor x1, x4; \ + pxor x4, x2; \ + pxor x1, x3; \ + por x0, x4; \ + pxor x1, x4; + +#define SI0(x0, x1, x2, x3, x4) \ + movdqa x3, x4; \ + pxor x0, x1; \ + por x1, x3; \ + pxor x1, x4; \ + pxor RNOT, x0; \ + pxor x3, x2; \ + pxor x0, x3; \ + pand x1, x0; \ + pxor x2, x0; \ + pand x3, x2; \ + pxor x4, x3; \ + pxor x3, x2; \ + pxor x3, x1; \ + pand x0, x3; \ + pxor x0, x1; \ + pxor x2, x0; \ + pxor x3, x4; + +#define SI1(x0, x1, x2, x3, x4) \ + pxor x3, x1; \ + movdqa x0, x4; \ + pxor x2, x0; \ + pxor RNOT, x2; \ + por x1, x4; \ + pxor x3, x4; \ + pand x1, x3; \ + pxor x2, x1; \ + pand x4, x2; \ + pxor x1, x4; \ + por x3, x1; \ + pxor x0, x3; \ + pxor x0, x2; \ + por x4, x0; \ + pxor x4, x2; \ + pxor x0, x1; \ + pxor x1, x4; + +#define SI2(x0, x1, x2, x3, x4) \ + pxor x1, x2; \ + movdqa x3, x4; \ + pxor RNOT, x3; \ + por x2, x3; \ + pxor x4, x2; \ + pxor x0, x4; \ + pxor x1, x3; \ + por x2, x1; \ + pxor x0, x2; \ + pxor x4, x1; \ + por x3, x4; \ + pxor x3, x2; \ + pxor x2, x4; \ + pand x1, x2; \ + pxor x3, x2; \ + pxor x4, x3; \ + pxor x0, x4; + +#define SI3(x0, x1, x2, x3, x4) \ + pxor x1, x2; \ + movdqa x1, x4; \ + pand x2, x1; \ + pxor x0, x1; \ + por x4, x0; \ + pxor x3, x4; \ + pxor x3, x0; \ + por x1, x3; \ + pxor x2, x1; \ + pxor x3, x1; \ + pxor x2, x0; \ + pxor x3, x2; \ + pand x1, x3; \ + pxor x0, x1; \ + pand x2, x0; \ + pxor x3, x4; \ + pxor x0, x3; \ + pxor x1, x0; + +#define SI4(x0, x1, x2, x3, x4) \ + pxor x3, x2; \ + movdqa x0, x4; \ + pand x1, x0; \ + pxor x2, x0; \ + por x3, x2; \ + pxor RNOT, x4; \ + pxor x0, x1; \ + pxor x2, x0; \ + pand x4, x2; \ + pxor x0, x2; \ + por x4, x0; \ + pxor x3, x0; \ + pand x2, x3; \ + pxor x3, x4; \ + pxor x1, x3; \ + pand x0, x1; \ + pxor x1, x4; \ + pxor x3, x0; + +#define SI5(x0, x1, x2, x3, x4) \ + movdqa x1, x4; \ + por x2, x1; \ + pxor x4, x2; \ + pxor x3, x1; \ + pand x4, x3; \ + pxor x3, x2; \ + por x0, x3; \ + pxor RNOT, x0; \ + pxor x2, x3; \ + por x0, x2; \ + pxor x1, x4; \ + pxor x4, x2; \ + pand x0, x4; \ + pxor x1, x0; \ + pxor x3, x1; \ + pand x2, x0; \ + pxor x3, x2; \ + pxor x2, x0; \ + pxor x4, x2; \ + pxor x3, x4; + +#define SI6(x0, x1, x2, x3, x4) \ + pxor x2, x0; \ + movdqa x0, x4; \ + pand x3, x0; \ + pxor x3, x2; \ + pxor x2, x0; \ + pxor x1, x3; \ + por x4, x2; \ + pxor x3, x2; \ + pand x0, x3; \ + pxor RNOT, x0; \ + pxor x1, x3; \ + pand x2, x1; \ + pxor x0, x4; \ + pxor x4, x3; \ + pxor x2, x4; \ + pxor x1, x0; \ + pxor x0, x2; + +#define SI7(x0, x1, x2, x3, x4) \ + movdqa x3, x4; \ + pand x0, x3; \ + pxor x2, x0; \ + por x4, x2; \ + pxor x1, x4; \ + pxor RNOT, x0; \ + por x3, x1; \ + pxor x0, x4; \ + pand x2, x0; \ + pxor x1, x0; \ + pand x2, x1; \ + pxor x2, x3; \ + pxor x3, x4; \ + pand x3, x2; \ + por x0, x3; \ + pxor x4, x1; \ + pxor x4, x3; \ + pand x0, x4; \ + pxor x2, x4; + +#define transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \ + movdqa x0, t2; \ + punpckldq x1, x0; \ + punpckhdq x1, t2; \ + movdqa x2, t1; \ + punpckhdq x3, x2; \ + punpckldq x3, t1; \ + movdqa x0, x1; \ + punpcklqdq t1, x0; \ + punpckhqdq t1, x1; \ + movdqa t2, x3; \ + punpcklqdq x2, t2; \ + punpckhqdq x2, x3; \ + movdqa t2, x2; + +#define read_blocks(in, x0, x1, x2, x3, t0, t1, t2) \ + movdqu (0*4*4)(in), x0; \ + movdqu (1*4*4)(in), x1; \ + movdqu (2*4*4)(in), x2; \ + movdqu (3*4*4)(in), x3; \ + \ + transpose_4x4(x0, x1, x2, x3, t0, t1, t2) + +#define write_blocks(out, x0, x1, x2, x3, t0, t1, t2) \ + transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \ + \ + movdqu x0, (0*4*4)(out); \ + movdqu x1, (1*4*4)(out); \ + movdqu x2, (2*4*4)(out); \ + movdqu x3, (3*4*4)(out); + +#define xor_blocks(out, x0, x1, x2, x3, t0, t1, t2) \ + transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \ + \ + movdqu (0*4*4)(out), t0; \ + pxor t0, x0; \ + movdqu x0, (0*4*4)(out); \ + movdqu (1*4*4)(out), t0; \ + pxor t0, x1; \ + movdqu x1, (1*4*4)(out); \ + movdqu (2*4*4)(out), t0; \ + pxor t0, x2; \ + movdqu x2, (2*4*4)(out); \ + movdqu (3*4*4)(out), t0; \ + pxor t0, x3; \ + movdqu x3, (3*4*4)(out); + +.align 8 +.global __serpent_enc_blk_4way +.type __serpent_enc_blk_4way,@function; + +__serpent_enc_blk_4way: + /* input: + * arg_ctx(%esp): ctx, CTX + * arg_dst(%esp): dst + * arg_src(%esp): src + * arg_xor(%esp): bool, if true: xor output + */ + + pcmpeqd RNOT, RNOT; + + movl arg_ctx(%esp), CTX; + + movl arg_src(%esp), %eax; + read_blocks(%eax, RA, RB, RC, RD, RT0, RT1, RE); + + K(RA, RB, RC, RD, RE, 0); + S0(RA, RB, RC, RD, RE); LK(RC, RB, RD, RA, RE, 1); + S1(RC, RB, RD, RA, RE); LK(RE, RD, RA, RC, RB, 2); + S2(RE, RD, RA, RC, RB); LK(RB, RD, RE, RC, RA, 3); + S3(RB, RD, RE, RC, RA); LK(RC, RA, RD, RB, RE, 4); + S4(RC, RA, RD, RB, RE); LK(RA, RD, RB, RE, RC, 5); + S5(RA, RD, RB, RE, RC); LK(RC, RA, RD, RE, RB, 6); + S6(RC, RA, RD, RE, RB); LK(RD, RB, RA, RE, RC, 7); + S7(RD, RB, RA, RE, RC); LK(RC, RA, RE, RD, RB, 8); + S0(RC, RA, RE, RD, RB); LK(RE, RA, RD, RC, RB, 9); + S1(RE, RA, RD, RC, RB); LK(RB, RD, RC, RE, RA, 10); + S2(RB, RD, RC, RE, RA); LK(RA, RD, RB, RE, RC, 11); + S3(RA, RD, RB, RE, RC); LK(RE, RC, RD, RA, RB, 12); + S4(RE, RC, RD, RA, RB); LK(RC, RD, RA, RB, RE, 13); + S5(RC, RD, RA, RB, RE); LK(RE, RC, RD, RB, RA, 14); + S6(RE, RC, RD, RB, RA); LK(RD, RA, RC, RB, RE, 15); + S7(RD, RA, RC, RB, RE); LK(RE, RC, RB, RD, RA, 16); + S0(RE, RC, RB, RD, RA); LK(RB, RC, RD, RE, RA, 17); + S1(RB, RC, RD, RE, RA); LK(RA, RD, RE, RB, RC, 18); + S2(RA, RD, RE, RB, RC); LK(RC, RD, RA, RB, RE, 19); + S3(RC, RD, RA, RB, RE); LK(RB, RE, RD, RC, RA, 20); + S4(RB, RE, RD, RC, RA); LK(RE, RD, RC, RA, RB, 21); + S5(RE, RD, RC, RA, RB); LK(RB, RE, RD, RA, RC, 22); + S6(RB, RE, RD, RA, RC); LK(RD, RC, RE, RA, RB, 23); + S7(RD, RC, RE, RA, RB); LK(RB, RE, RA, RD, RC, 24); + S0(RB, RE, RA, RD, RC); LK(RA, RE, RD, RB, RC, 25); + S1(RA, RE, RD, RB, RC); LK(RC, RD, RB, RA, RE, 26); + S2(RC, RD, RB, RA, RE); LK(RE, RD, RC, RA, RB, 27); + S3(RE, RD, RC, RA, RB); LK(RA, RB, RD, RE, RC, 28); + S4(RA, RB, RD, RE, RC); LK(RB, RD, RE, RC, RA, 29); + S5(RB, RD, RE, RC, RA); LK(RA, RB, RD, RC, RE, 30); + S6(RA, RB, RD, RC, RE); LK(RD, RE, RB, RC, RA, 31); + S7(RD, RE, RB, RC, RA); K(RA, RB, RC, RD, RE, 32); + + movl arg_dst(%esp), %eax; + + cmpb $0, arg_xor(%esp); + jnz __enc_xor4; + + write_blocks(%eax, RA, RB, RC, RD, RT0, RT1, RE); + + ret; + +__enc_xor4: + xor_blocks(%eax, RA, RB, RC, RD, RT0, RT1, RE); + + ret; + +.align 8 +.global serpent_dec_blk_4way +.type serpent_dec_blk_4way,@function; + +serpent_dec_blk_4way: + /* input: + * arg_ctx(%esp): ctx, CTX + * arg_dst(%esp): dst + * arg_src(%esp): src + */ + + pcmpeqd RNOT, RNOT; + + movl arg_ctx(%esp), CTX; + + movl arg_src(%esp), %eax; + read_blocks(%eax, RA, RB, RC, RD, RT0, RT1, RE); + + K(RA, RB, RC, RD, RE, 32); + SI7(RA, RB, RC, RD, RE); KL(RB, RD, RA, RE, RC, 31); + SI6(RB, RD, RA, RE, RC); KL(RA, RC, RE, RB, RD, 30); + SI5(RA, RC, RE, RB, RD); KL(RC, RD, RA, RE, RB, 29); + SI4(RC, RD, RA, RE, RB); KL(RC, RA, RB, RE, RD, 28); + SI3(RC, RA, RB, RE, RD); KL(RB, RC, RD, RE, RA, 27); + SI2(RB, RC, RD, RE, RA); KL(RC, RA, RE, RD, RB, 26); + SI1(RC, RA, RE, RD, RB); KL(RB, RA, RE, RD, RC, 25); + SI0(RB, RA, RE, RD, RC); KL(RE, RC, RA, RB, RD, 24); + SI7(RE, RC, RA, RB, RD); KL(RC, RB, RE, RD, RA, 23); + SI6(RC, RB, RE, RD, RA); KL(RE, RA, RD, RC, RB, 22); + SI5(RE, RA, RD, RC, RB); KL(RA, RB, RE, RD, RC, 21); + SI4(RA, RB, RE, RD, RC); KL(RA, RE, RC, RD, RB, 20); + SI3(RA, RE, RC, RD, RB); KL(RC, RA, RB, RD, RE, 19); + SI2(RC, RA, RB, RD, RE); KL(RA, RE, RD, RB, RC, 18); + SI1(RA, RE, RD, RB, RC); KL(RC, RE, RD, RB, RA, 17); + SI0(RC, RE, RD, RB, RA); KL(RD, RA, RE, RC, RB, 16); + SI7(RD, RA, RE, RC, RB); KL(RA, RC, RD, RB, RE, 15); + SI6(RA, RC, RD, RB, RE); KL(RD, RE, RB, RA, RC, 14); + SI5(RD, RE, RB, RA, RC); KL(RE, RC, RD, RB, RA, 13); + SI4(RE, RC, RD, RB, RA); KL(RE, RD, RA, RB, RC, 12); + SI3(RE, RD, RA, RB, RC); KL(RA, RE, RC, RB, RD, 11); + SI2(RA, RE, RC, RB, RD); KL(RE, RD, RB, RC, RA, 10); + SI1(RE, RD, RB, RC, RA); KL(RA, RD, RB, RC, RE, 9); + SI0(RA, RD, RB, RC, RE); KL(RB, RE, RD, RA, RC, 8); + SI7(RB, RE, RD, RA, RC); KL(RE, RA, RB, RC, RD, 7); + SI6(RE, RA, RB, RC, RD); KL(RB, RD, RC, RE, RA, 6); + SI5(RB, RD, RC, RE, RA); KL(RD, RA, RB, RC, RE, 5); + SI4(RD, RA, RB, RC, RE); KL(RD, RB, RE, RC, RA, 4); + SI3(RD, RB, RE, RC, RA); KL(RE, RD, RA, RC, RB, 3); + SI2(RE, RD, RA, RC, RB); KL(RD, RB, RC, RA, RE, 2); + SI1(RD, RB, RC, RA, RE); KL(RE, RB, RC, RA, RD, 1); + SI0(RE, RB, RC, RA, RD); K(RC, RD, RB, RE, RA, 0); + + movl arg_dst(%esp), %eax; + write_blocks(%eax, RC, RD, RB, RE, RT0, RT1, RA); + + ret; diff --git a/arch/x86/crypto/serpent-sse2-x86_64-asm_64.S b/arch/x86/crypto/serpent-sse2-x86_64-asm_64.S new file mode 100644 index 00000000..3ee1ff04 --- /dev/null +++ b/arch/x86/crypto/serpent-sse2-x86_64-asm_64.S @@ -0,0 +1,758 @@ +/* + * Serpent Cipher 8-way parallel algorithm (x86_64/SSE2) + * + * Copyright (C) 2011 Jussi Kivilinna + * + * Based on crypto/serpent.c by + * Copyright (C) 2002 Dag Arne Osvik + * 2003 Herbert Valerio Riedel + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 + * USA + * + */ + +.file "serpent-sse2-x86_64-asm_64.S" +.text + +#define CTX %rdi + +/********************************************************************** + 8-way SSE2 serpent + **********************************************************************/ +#define RA1 %xmm0 +#define RB1 %xmm1 +#define RC1 %xmm2 +#define RD1 %xmm3 +#define RE1 %xmm4 + +#define RA2 %xmm5 +#define RB2 %xmm6 +#define RC2 %xmm7 +#define RD2 %xmm8 +#define RE2 %xmm9 + +#define RNOT %xmm10 + +#define RK0 %xmm11 +#define RK1 %xmm12 +#define RK2 %xmm13 +#define RK3 %xmm14 + +#define S0_1(x0, x1, x2, x3, x4) \ + movdqa x3, x4; \ + por x0, x3; \ + pxor x4, x0; \ + pxor x2, x4; \ + pxor RNOT, x4; \ + pxor x1, x3; \ + pand x0, x1; \ + pxor x4, x1; \ + pxor x0, x2; +#define S0_2(x0, x1, x2, x3, x4) \ + pxor x3, x0; \ + por x0, x4; \ + pxor x2, x0; \ + pand x1, x2; \ + pxor x2, x3; \ + pxor RNOT, x1; \ + pxor x4, x2; \ + pxor x2, x1; + +#define S1_1(x0, x1, x2, x3, x4) \ + movdqa x1, x4; \ + pxor x0, x1; \ + pxor x3, x0; \ + pxor RNOT, x3; \ + pand x1, x4; \ + por x1, x0; \ + pxor x2, x3; \ + pxor x3, x0; \ + pxor x3, x1; +#define S1_2(x0, x1, x2, x3, x4) \ + pxor x4, x3; \ + por x4, x1; \ + pxor x2, x4; \ + pand x0, x2; \ + pxor x1, x2; \ + por x0, x1; \ + pxor RNOT, x0; \ + pxor x2, x0; \ + pxor x1, x4; + +#define S2_1(x0, x1, x2, x3, x4) \ + pxor RNOT, x3; \ + pxor x0, x1; \ + movdqa x0, x4; \ + pand x2, x0; \ + pxor x3, x0; \ + por x4, x3; \ + pxor x1, x2; \ + pxor x1, x3; \ + pand x0, x1; +#define S2_2(x0, x1, x2, x3, x4) \ + pxor x2, x0; \ + pand x3, x2; \ + por x1, x3; \ + pxor RNOT, x0; \ + pxor x0, x3; \ + pxor x0, x4; \ + pxor x2, x0; \ + por x2, x1; + +#define S3_1(x0, x1, x2, x3, x4) \ + movdqa x1, x4; \ + pxor x3, x1; \ + por x0, x3; \ + pand x0, x4; \ + pxor x2, x0; \ + pxor x1, x2; \ + pand x3, x1; \ + pxor x3, x2; \ + por x4, x0; \ + pxor x3, x4; +#define S3_2(x0, x1, x2, x3, x4) \ + pxor x0, x1; \ + pand x3, x0; \ + pand x4, x3; \ + pxor x2, x3; \ + por x1, x4; \ + pand x1, x2; \ + pxor x3, x4; \ + pxor x3, x0; \ + pxor x2, x3; + +#define S4_1(x0, x1, x2, x3, x4) \ + movdqa x3, x4; \ + pand x0, x3; \ + pxor x4, x0; \ + pxor x2, x3; \ + por x4, x2; \ + pxor x1, x0; \ + pxor x3, x4; \ + por x0, x2; \ + pxor x1, x2; +#define S4_2(x0, x1, x2, x3, x4) \ + pand x0, x1; \ + pxor x4, x1; \ + pand x2, x4; \ + pxor x3, x2; \ + pxor x0, x4; \ + por x1, x3; \ + pxor RNOT, x1; \ + pxor x0, x3; + +#define S5_1(x0, x1, x2, x3, x4) \ + movdqa x1, x4; \ + por x0, x1; \ + pxor x1, x2; \ + pxor RNOT, x3; \ + pxor x0, x4; \ + pxor x2, x0; \ + pand x4, x1; \ + por x3, x4; \ + pxor x0, x4; +#define S5_2(x0, x1, x2, x3, x4) \ + pand x3, x0; \ + pxor x3, x1; \ + pxor x2, x3; \ + pxor x1, x0; \ + pand x4, x2; \ + pxor x2, x1; \ + pand x0, x2; \ + pxor x2, x3; + +#define S6_1(x0, x1, x2, x3, x4) \ + movdqa x1, x4; \ + pxor x0, x3; \ + pxor x2, x1; \ + pxor x0, x2; \ + pand x3, x0; \ + por x3, x1; \ + pxor RNOT, x4; \ + pxor x1, x0; \ + pxor x2, x1; +#define S6_2(x0, x1, x2, x3, x4) \ + pxor x4, x3; \ + pxor x0, x4; \ + pand x0, x2; \ + pxor x1, x4; \ + pxor x3, x2; \ + pand x1, x3; \ + pxor x0, x3; \ + pxor x2, x1; + +#define S7_1(x0, x1, x2, x3, x4) \ + pxor RNOT, x1; \ + movdqa x1, x4; \ + pxor RNOT, x0; \ + pand x2, x1; \ + pxor x3, x1; \ + por x4, x3; \ + pxor x2, x4; \ + pxor x3, x2; \ + pxor x0, x3; \ + por x1, x0; +#define S7_2(x0, x1, x2, x3, x4) \ + pand x0, x2; \ + pxor x4, x0; \ + pxor x3, x4; \ + pand x0, x3; \ + pxor x1, x4; \ + pxor x4, x2; \ + pxor x1, x3; \ + por x0, x4; \ + pxor x1, x4; + +#define SI0_1(x0, x1, x2, x3, x4) \ + movdqa x3, x4; \ + pxor x0, x1; \ + por x1, x3; \ + pxor x1, x4; \ + pxor RNOT, x0; \ + pxor x3, x2; \ + pxor x0, x3; \ + pand x1, x0; \ + pxor x2, x0; +#define SI0_2(x0, x1, x2, x3, x4) \ + pand x3, x2; \ + pxor x4, x3; \ + pxor x3, x2; \ + pxor x3, x1; \ + pand x0, x3; \ + pxor x0, x1; \ + pxor x2, x0; \ + pxor x3, x4; + +#define SI1_1(x0, x1, x2, x3, x4) \ + pxor x3, x1; \ + movdqa x0, x4; \ + pxor x2, x0; \ + pxor RNOT, x2; \ + por x1, x4; \ + pxor x3, x4; \ + pand x1, x3; \ + pxor x2, x1; \ + pand x4, x2; +#define SI1_2(x0, x1, x2, x3, x4) \ + pxor x1, x4; \ + por x3, x1; \ + pxor x0, x3; \ + pxor x0, x2; \ + por x4, x0; \ + pxor x4, x2; \ + pxor x0, x1; \ + pxor x1, x4; + +#define SI2_1(x0, x1, x2, x3, x4) \ + pxor x1, x2; \ + movdqa x3, x4; \ + pxor RNOT, x3; \ + por x2, x3; \ + pxor x4, x2; \ + pxor x0, x4; \ + pxor x1, x3; \ + por x2, x1; \ + pxor x0, x2; +#define SI2_2(x0, x1, x2, x3, x4) \ + pxor x4, x1; \ + por x3, x4; \ + pxor x3, x2; \ + pxor x2, x4; \ + pand x1, x2; \ + pxor x3, x2; \ + pxor x4, x3; \ + pxor x0, x4; + +#define SI3_1(x0, x1, x2, x3, x4) \ + pxor x1, x2; \ + movdqa x1, x4; \ + pand x2, x1; \ + pxor x0, x1; \ + por x4, x0; \ + pxor x3, x4; \ + pxor x3, x0; \ + por x1, x3; \ + pxor x2, x1; +#define SI3_2(x0, x1, x2, x3, x4) \ + pxor x3, x1; \ + pxor x2, x0; \ + pxor x3, x2; \ + pand x1, x3; \ + pxor x0, x1; \ + pand x2, x0; \ + pxor x3, x4; \ + pxor x0, x3; \ + pxor x1, x0; + +#define SI4_1(x0, x1, x2, x3, x4) \ + pxor x3, x2; \ + movdqa x0, x4; \ + pand x1, x0; \ + pxor x2, x0; \ + por x3, x2; \ + pxor RNOT, x4; \ + pxor x0, x1; \ + pxor x2, x0; \ + pand x4, x2; +#define SI4_2(x0, x1, x2, x3, x4) \ + pxor x0, x2; \ + por x4, x0; \ + pxor x3, x0; \ + pand x2, x3; \ + pxor x3, x4; \ + pxor x1, x3; \ + pand x0, x1; \ + pxor x1, x4; \ + pxor x3, x0; + +#define SI5_1(x0, x1, x2, x3, x4) \ + movdqa x1, x4; \ + por x2, x1; \ + pxor x4, x2; \ + pxor x3, x1; \ + pand x4, x3; \ + pxor x3, x2; \ + por x0, x3; \ + pxor RNOT, x0; \ + pxor x2, x3; \ + por x0, x2; +#define SI5_2(x0, x1, x2, x3, x4) \ + pxor x1, x4; \ + pxor x4, x2; \ + pand x0, x4; \ + pxor x1, x0; \ + pxor x3, x1; \ + pand x2, x0; \ + pxor x3, x2; \ + pxor x2, x0; \ + pxor x4, x2; \ + pxor x3, x4; + +#define SI6_1(x0, x1, x2, x3, x4) \ + pxor x2, x0; \ + movdqa x0, x4; \ + pand x3, x0; \ + pxor x3, x2; \ + pxor x2, x0; \ + pxor x1, x3; \ + por x4, x2; \ + pxor x3, x2; \ + pand x0, x3; +#define SI6_2(x0, x1, x2, x3, x4) \ + pxor RNOT, x0; \ + pxor x1, x3; \ + pand x2, x1; \ + pxor x0, x4; \ + pxor x4, x3; \ + pxor x2, x4; \ + pxor x1, x0; \ + pxor x0, x2; + +#define SI7_1(x0, x1, x2, x3, x4) \ + movdqa x3, x4; \ + pand x0, x3; \ + pxor x2, x0; \ + por x4, x2; \ + pxor x1, x4; \ + pxor RNOT, x0; \ + por x3, x1; \ + pxor x0, x4; \ + pand x2, x0; \ + pxor x1, x0; +#define SI7_2(x0, x1, x2, x3, x4) \ + pand x2, x1; \ + pxor x2, x3; \ + pxor x3, x4; \ + pand x3, x2; \ + por x0, x3; \ + pxor x4, x1; \ + pxor x4, x3; \ + pand x0, x4; \ + pxor x2, x4; + +#define get_key(i, j, t) \ + movd (4*(i)+(j))*4(CTX), t; \ + pshufd $0, t, t; + +#define K2(x0, x1, x2, x3, x4, i) \ + get_key(i, 0, RK0); \ + get_key(i, 1, RK1); \ + get_key(i, 2, RK2); \ + get_key(i, 3, RK3); \ + pxor RK0, x0 ## 1; \ + pxor RK1, x1 ## 1; \ + pxor RK2, x2 ## 1; \ + pxor RK3, x3 ## 1; \ + pxor RK0, x0 ## 2; \ + pxor RK1, x1 ## 2; \ + pxor RK2, x2 ## 2; \ + pxor RK3, x3 ## 2; + +#define LK2(x0, x1, x2, x3, x4, i) \ + movdqa x0 ## 1, x4 ## 1; \ + pslld $13, x0 ## 1; \ + psrld $(32 - 13), x4 ## 1; \ + por x4 ## 1, x0 ## 1; \ + pxor x0 ## 1, x1 ## 1; \ + movdqa x2 ## 1, x4 ## 1; \ + pslld $3, x2 ## 1; \ + psrld $(32 - 3), x4 ## 1; \ + por x4 ## 1, x2 ## 1; \ + pxor x2 ## 1, x1 ## 1; \ + movdqa x0 ## 2, x4 ## 2; \ + pslld $13, x0 ## 2; \ + psrld $(32 - 13), x4 ## 2; \ + por x4 ## 2, x0 ## 2; \ + pxor x0 ## 2, x1 ## 2; \ + movdqa x2 ## 2, x4 ## 2; \ + pslld $3, x2 ## 2; \ + psrld $(32 - 3), x4 ## 2; \ + por x4 ## 2, x2 ## 2; \ + pxor x2 ## 2, x1 ## 2; \ + movdqa x1 ## 1, x4 ## 1; \ + pslld $1, x1 ## 1; \ + psrld $(32 - 1), x4 ## 1; \ + por x4 ## 1, x1 ## 1; \ + movdqa x0 ## 1, x4 ## 1; \ + pslld $3, x4 ## 1; \ + pxor x2 ## 1, x3 ## 1; \ + pxor x4 ## 1, x3 ## 1; \ + movdqa x3 ## 1, x4 ## 1; \ + get_key(i, 1, RK1); \ + movdqa x1 ## 2, x4 ## 2; \ + pslld $1, x1 ## 2; \ + psrld $(32 - 1), x4 ## 2; \ + por x4 ## 2, x1 ## 2; \ + movdqa x0 ## 2, x4 ## 2; \ + pslld $3, x4 ## 2; \ + pxor x2 ## 2, x3 ## 2; \ + pxor x4 ## 2, x3 ## 2; \ + movdqa x3 ## 2, x4 ## 2; \ + get_key(i, 3, RK3); \ + pslld $7, x3 ## 1; \ + psrld $(32 - 7), x4 ## 1; \ + por x4 ## 1, x3 ## 1; \ + movdqa x1 ## 1, x4 ## 1; \ + pslld $7, x4 ## 1; \ + pxor x1 ## 1, x0 ## 1; \ + pxor x3 ## 1, x0 ## 1; \ + pxor x3 ## 1, x2 ## 1; \ + pxor x4 ## 1, x2 ## 1; \ + get_key(i, 0, RK0); \ + pslld $7, x3 ## 2; \ + psrld $(32 - 7), x4 ## 2; \ + por x4 ## 2, x3 ## 2; \ + movdqa x1 ## 2, x4 ## 2; \ + pslld $7, x4 ## 2; \ + pxor x1 ## 2, x0 ## 2; \ + pxor x3 ## 2, x0 ## 2; \ + pxor x3 ## 2, x2 ## 2; \ + pxor x4 ## 2, x2 ## 2; \ + get_key(i, 2, RK2); \ + pxor RK1, x1 ## 1; \ + pxor RK3, x3 ## 1; \ + movdqa x0 ## 1, x4 ## 1; \ + pslld $5, x0 ## 1; \ + psrld $(32 - 5), x4 ## 1; \ + por x4 ## 1, x0 ## 1; \ + movdqa x2 ## 1, x4 ## 1; \ + pslld $22, x2 ## 1; \ + psrld $(32 - 22), x4 ## 1; \ + por x4 ## 1, x2 ## 1; \ + pxor RK0, x0 ## 1; \ + pxor RK2, x2 ## 1; \ + pxor RK1, x1 ## 2; \ + pxor RK3, x3 ## 2; \ + movdqa x0 ## 2, x4 ## 2; \ + pslld $5, x0 ## 2; \ + psrld $(32 - 5), x4 ## 2; \ + por x4 ## 2, x0 ## 2; \ + movdqa x2 ## 2, x4 ## 2; \ + pslld $22, x2 ## 2; \ + psrld $(32 - 22), x4 ## 2; \ + por x4 ## 2, x2 ## 2; \ + pxor RK0, x0 ## 2; \ + pxor RK2, x2 ## 2; + +#define KL2(x0, x1, x2, x3, x4, i) \ + pxor RK0, x0 ## 1; \ + pxor RK2, x2 ## 1; \ + movdqa x0 ## 1, x4 ## 1; \ + psrld $5, x0 ## 1; \ + pslld $(32 - 5), x4 ## 1; \ + por x4 ## 1, x0 ## 1; \ + pxor RK3, x3 ## 1; \ + pxor RK1, x1 ## 1; \ + movdqa x2 ## 1, x4 ## 1; \ + psrld $22, x2 ## 1; \ + pslld $(32 - 22), x4 ## 1; \ + por x4 ## 1, x2 ## 1; \ + pxor x3 ## 1, x2 ## 1; \ + pxor RK0, x0 ## 2; \ + pxor RK2, x2 ## 2; \ + movdqa x0 ## 2, x4 ## 2; \ + psrld $5, x0 ## 2; \ + pslld $(32 - 5), x4 ## 2; \ + por x4 ## 2, x0 ## 2; \ + pxor RK3, x3 ## 2; \ + pxor RK1, x1 ## 2; \ + movdqa x2 ## 2, x4 ## 2; \ + psrld $22, x2 ## 2; \ + pslld $(32 - 22), x4 ## 2; \ + por x4 ## 2, x2 ## 2; \ + pxor x3 ## 2, x2 ## 2; \ + pxor x3 ## 1, x0 ## 1; \ + movdqa x1 ## 1, x4 ## 1; \ + pslld $7, x4 ## 1; \ + pxor x1 ## 1, x0 ## 1; \ + pxor x4 ## 1, x2 ## 1; \ + movdqa x1 ## 1, x4 ## 1; \ + psrld $1, x1 ## 1; \ + pslld $(32 - 1), x4 ## 1; \ + por x4 ## 1, x1 ## 1; \ + pxor x3 ## 2, x0 ## 2; \ + movdqa x1 ## 2, x4 ## 2; \ + pslld $7, x4 ## 2; \ + pxor x1 ## 2, x0 ## 2; \ + pxor x4 ## 2, x2 ## 2; \ + movdqa x1 ## 2, x4 ## 2; \ + psrld $1, x1 ## 2; \ + pslld $(32 - 1), x4 ## 2; \ + por x4 ## 2, x1 ## 2; \ + movdqa x3 ## 1, x4 ## 1; \ + psrld $7, x3 ## 1; \ + pslld $(32 - 7), x4 ## 1; \ + por x4 ## 1, x3 ## 1; \ + pxor x0 ## 1, x1 ## 1; \ + movdqa x0 ## 1, x4 ## 1; \ + pslld $3, x4 ## 1; \ + pxor x4 ## 1, x3 ## 1; \ + movdqa x0 ## 1, x4 ## 1; \ + movdqa x3 ## 2, x4 ## 2; \ + psrld $7, x3 ## 2; \ + pslld $(32 - 7), x4 ## 2; \ + por x4 ## 2, x3 ## 2; \ + pxor x0 ## 2, x1 ## 2; \ + movdqa x0 ## 2, x4 ## 2; \ + pslld $3, x4 ## 2; \ + pxor x4 ## 2, x3 ## 2; \ + movdqa x0 ## 2, x4 ## 2; \ + psrld $13, x0 ## 1; \ + pslld $(32 - 13), x4 ## 1; \ + por x4 ## 1, x0 ## 1; \ + pxor x2 ## 1, x1 ## 1; \ + pxor x2 ## 1, x3 ## 1; \ + movdqa x2 ## 1, x4 ## 1; \ + psrld $3, x2 ## 1; \ + pslld $(32 - 3), x4 ## 1; \ + por x4 ## 1, x2 ## 1; \ + psrld $13, x0 ## 2; \ + pslld $(32 - 13), x4 ## 2; \ + por x4 ## 2, x0 ## 2; \ + pxor x2 ## 2, x1 ## 2; \ + pxor x2 ## 2, x3 ## 2; \ + movdqa x2 ## 2, x4 ## 2; \ + psrld $3, x2 ## 2; \ + pslld $(32 - 3), x4 ## 2; \ + por x4 ## 2, x2 ## 2; + +#define S(SBOX, x0, x1, x2, x3, x4) \ + SBOX ## _1(x0 ## 1, x1 ## 1, x2 ## 1, x3 ## 1, x4 ## 1); \ + SBOX ## _2(x0 ## 1, x1 ## 1, x2 ## 1, x3 ## 1, x4 ## 1); \ + SBOX ## _1(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, x4 ## 2); \ + SBOX ## _2(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, x4 ## 2); + +#define SP(SBOX, x0, x1, x2, x3, x4, i) \ + get_key(i, 0, RK0); \ + SBOX ## _1(x0 ## 1, x1 ## 1, x2 ## 1, x3 ## 1, x4 ## 1); \ + get_key(i, 2, RK2); \ + SBOX ## _1(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, x4 ## 2); \ + get_key(i, 3, RK3); \ + SBOX ## _2(x0 ## 1, x1 ## 1, x2 ## 1, x3 ## 1, x4 ## 1); \ + get_key(i, 1, RK1); \ + SBOX ## _2(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, x4 ## 2); \ + +#define transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \ + movdqa x0, t2; \ + punpckldq x1, x0; \ + punpckhdq x1, t2; \ + movdqa x2, t1; \ + punpckhdq x3, x2; \ + punpckldq x3, t1; \ + movdqa x0, x1; \ + punpcklqdq t1, x0; \ + punpckhqdq t1, x1; \ + movdqa t2, x3; \ + punpcklqdq x2, t2; \ + punpckhqdq x2, x3; \ + movdqa t2, x2; + +#define read_blocks(in, x0, x1, x2, x3, t0, t1, t2) \ + movdqu (0*4*4)(in), x0; \ + movdqu (1*4*4)(in), x1; \ + movdqu (2*4*4)(in), x2; \ + movdqu (3*4*4)(in), x3; \ + \ + transpose_4x4(x0, x1, x2, x3, t0, t1, t2) + +#define write_blocks(out, x0, x1, x2, x3, t0, t1, t2) \ + transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \ + \ + movdqu x0, (0*4*4)(out); \ + movdqu x1, (1*4*4)(out); \ + movdqu x2, (2*4*4)(out); \ + movdqu x3, (3*4*4)(out); + +#define xor_blocks(out, x0, x1, x2, x3, t0, t1, t2) \ + transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \ + \ + movdqu (0*4*4)(out), t0; \ + pxor t0, x0; \ + movdqu x0, (0*4*4)(out); \ + movdqu (1*4*4)(out), t0; \ + pxor t0, x1; \ + movdqu x1, (1*4*4)(out); \ + movdqu (2*4*4)(out), t0; \ + pxor t0, x2; \ + movdqu x2, (2*4*4)(out); \ + movdqu (3*4*4)(out), t0; \ + pxor t0, x3; \ + movdqu x3, (3*4*4)(out); + +.align 8 +.global __serpent_enc_blk_8way +.type __serpent_enc_blk_8way,@function; + +__serpent_enc_blk_8way: + /* input: + * %rdi: ctx, CTX + * %rsi: dst + * %rdx: src + * %rcx: bool, if true: xor output + */ + + pcmpeqd RNOT, RNOT; + + leaq (4*4*4)(%rdx), %rax; + read_blocks(%rdx, RA1, RB1, RC1, RD1, RK0, RK1, RK2); + read_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2); + + K2(RA, RB, RC, RD, RE, 0); + S(S0, RA, RB, RC, RD, RE); LK2(RC, RB, RD, RA, RE, 1); + S(S1, RC, RB, RD, RA, RE); LK2(RE, RD, RA, RC, RB, 2); + S(S2, RE, RD, RA, RC, RB); LK2(RB, RD, RE, RC, RA, 3); + S(S3, RB, RD, RE, RC, RA); LK2(RC, RA, RD, RB, RE, 4); + S(S4, RC, RA, RD, RB, RE); LK2(RA, RD, RB, RE, RC, 5); + S(S5, RA, RD, RB, RE, RC); LK2(RC, RA, RD, RE, RB, 6); + S(S6, RC, RA, RD, RE, RB); LK2(RD, RB, RA, RE, RC, 7); + S(S7, RD, RB, RA, RE, RC); LK2(RC, RA, RE, RD, RB, 8); + S(S0, RC, RA, RE, RD, RB); LK2(RE, RA, RD, RC, RB, 9); + S(S1, RE, RA, RD, RC, RB); LK2(RB, RD, RC, RE, RA, 10); + S(S2, RB, RD, RC, RE, RA); LK2(RA, RD, RB, RE, RC, 11); + S(S3, RA, RD, RB, RE, RC); LK2(RE, RC, RD, RA, RB, 12); + S(S4, RE, RC, RD, RA, RB); LK2(RC, RD, RA, RB, RE, 13); + S(S5, RC, RD, RA, RB, RE); LK2(RE, RC, RD, RB, RA, 14); + S(S6, RE, RC, RD, RB, RA); LK2(RD, RA, RC, RB, RE, 15); + S(S7, RD, RA, RC, RB, RE); LK2(RE, RC, RB, RD, RA, 16); + S(S0, RE, RC, RB, RD, RA); LK2(RB, RC, RD, RE, RA, 17); + S(S1, RB, RC, RD, RE, RA); LK2(RA, RD, RE, RB, RC, 18); + S(S2, RA, RD, RE, RB, RC); LK2(RC, RD, RA, RB, RE, 19); + S(S3, RC, RD, RA, RB, RE); LK2(RB, RE, RD, RC, RA, 20); + S(S4, RB, RE, RD, RC, RA); LK2(RE, RD, RC, RA, RB, 21); + S(S5, RE, RD, RC, RA, RB); LK2(RB, RE, RD, RA, RC, 22); + S(S6, RB, RE, RD, RA, RC); LK2(RD, RC, RE, RA, RB, 23); + S(S7, RD, RC, RE, RA, RB); LK2(RB, RE, RA, RD, RC, 24); + S(S0, RB, RE, RA, RD, RC); LK2(RA, RE, RD, RB, RC, 25); + S(S1, RA, RE, RD, RB, RC); LK2(RC, RD, RB, RA, RE, 26); + S(S2, RC, RD, RB, RA, RE); LK2(RE, RD, RC, RA, RB, 27); + S(S3, RE, RD, RC, RA, RB); LK2(RA, RB, RD, RE, RC, 28); + S(S4, RA, RB, RD, RE, RC); LK2(RB, RD, RE, RC, RA, 29); + S(S5, RB, RD, RE, RC, RA); LK2(RA, RB, RD, RC, RE, 30); + S(S6, RA, RB, RD, RC, RE); LK2(RD, RE, RB, RC, RA, 31); + S(S7, RD, RE, RB, RC, RA); K2(RA, RB, RC, RD, RE, 32); + + leaq (4*4*4)(%rsi), %rax; + + testb %cl, %cl; + jnz __enc_xor8; + + write_blocks(%rsi, RA1, RB1, RC1, RD1, RK0, RK1, RK2); + write_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2); + + ret; + +__enc_xor8: + xor_blocks(%rsi, RA1, RB1, RC1, RD1, RK0, RK1, RK2); + xor_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2); + + ret; + +.align 8 +.global serpent_dec_blk_8way +.type serpent_dec_blk_8way,@function; + +serpent_dec_blk_8way: + /* input: + * %rdi: ctx, CTX + * %rsi: dst + * %rdx: src + */ + + pcmpeqd RNOT, RNOT; + + leaq (4*4*4)(%rdx), %rax; + read_blocks(%rdx, RA1, RB1, RC1, RD1, RK0, RK1, RK2); + read_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2); + + K2(RA, RB, RC, RD, RE, 32); + SP(SI7, RA, RB, RC, RD, RE, 31); KL2(RB, RD, RA, RE, RC, 31); + SP(SI6, RB, RD, RA, RE, RC, 30); KL2(RA, RC, RE, RB, RD, 30); + SP(SI5, RA, RC, RE, RB, RD, 29); KL2(RC, RD, RA, RE, RB, 29); + SP(SI4, RC, RD, RA, RE, RB, 28); KL2(RC, RA, RB, RE, RD, 28); + SP(SI3, RC, RA, RB, RE, RD, 27); KL2(RB, RC, RD, RE, RA, 27); + SP(SI2, RB, RC, RD, RE, RA, 26); KL2(RC, RA, RE, RD, RB, 26); + SP(SI1, RC, RA, RE, RD, RB, 25); KL2(RB, RA, RE, RD, RC, 25); + SP(SI0, RB, RA, RE, RD, RC, 24); KL2(RE, RC, RA, RB, RD, 24); + SP(SI7, RE, RC, RA, RB, RD, 23); KL2(RC, RB, RE, RD, RA, 23); + SP(SI6, RC, RB, RE, RD, RA, 22); KL2(RE, RA, RD, RC, RB, 22); + SP(SI5, RE, RA, RD, RC, RB, 21); KL2(RA, RB, RE, RD, RC, 21); + SP(SI4, RA, RB, RE, RD, RC, 20); KL2(RA, RE, RC, RD, RB, 20); + SP(SI3, RA, RE, RC, RD, RB, 19); KL2(RC, RA, RB, RD, RE, 19); + SP(SI2, RC, RA, RB, RD, RE, 18); KL2(RA, RE, RD, RB, RC, 18); + SP(SI1, RA, RE, RD, RB, RC, 17); KL2(RC, RE, RD, RB, RA, 17); + SP(SI0, RC, RE, RD, RB, RA, 16); KL2(RD, RA, RE, RC, RB, 16); + SP(SI7, RD, RA, RE, RC, RB, 15); KL2(RA, RC, RD, RB, RE, 15); + SP(SI6, RA, RC, RD, RB, RE, 14); KL2(RD, RE, RB, RA, RC, 14); + SP(SI5, RD, RE, RB, RA, RC, 13); KL2(RE, RC, RD, RB, RA, 13); + SP(SI4, RE, RC, RD, RB, RA, 12); KL2(RE, RD, RA, RB, RC, 12); + SP(SI3, RE, RD, RA, RB, RC, 11); KL2(RA, RE, RC, RB, RD, 11); + SP(SI2, RA, RE, RC, RB, RD, 10); KL2(RE, RD, RB, RC, RA, 10); + SP(SI1, RE, RD, RB, RC, RA, 9); KL2(RA, RD, RB, RC, RE, 9); + SP(SI0, RA, RD, RB, RC, RE, 8); KL2(RB, RE, RD, RA, RC, 8); + SP(SI7, RB, RE, RD, RA, RC, 7); KL2(RE, RA, RB, RC, RD, 7); + SP(SI6, RE, RA, RB, RC, RD, 6); KL2(RB, RD, RC, RE, RA, 6); + SP(SI5, RB, RD, RC, RE, RA, 5); KL2(RD, RA, RB, RC, RE, 5); + SP(SI4, RD, RA, RB, RC, RE, 4); KL2(RD, RB, RE, RC, RA, 4); + SP(SI3, RD, RB, RE, RC, RA, 3); KL2(RE, RD, RA, RC, RB, 3); + SP(SI2, RE, RD, RA, RC, RB, 2); KL2(RD, RB, RC, RA, RE, 2); + SP(SI1, RD, RB, RC, RA, RE, 1); KL2(RE, RB, RC, RA, RD, 1); + S(SI0, RE, RB, RC, RA, RD); K2(RC, RD, RB, RE, RA, 0); + + leaq (4*4*4)(%rsi), %rax; + write_blocks(%rsi, RC1, RD1, RB1, RE1, RK0, RK1, RK2); + write_blocks(%rax, RC2, RD2, RB2, RE2, RK0, RK1, RK2); + + ret; diff --git a/arch/x86/crypto/serpent_sse2_glue.c b/arch/x86/crypto/serpent_sse2_glue.c new file mode 100644 index 00000000..4b21be85 --- /dev/null +++ b/arch/x86/crypto/serpent_sse2_glue.c @@ -0,0 +1,944 @@ +/* + * Glue Code for SSE2 assembler versions of Serpent Cipher + * + * Copyright (c) 2011 Jussi Kivilinna + * + * Glue code based on aesni-intel_glue.c by: + * Copyright (C) 2008, Intel Corp. + * Author: Huang Ying + * + * CBC & ECB parts based on code (crypto/cbc.c,ecb.c) by: + * Copyright (c) 2006 Herbert Xu + * CTR part based on code (crypto/ctr.c) by: + * (C) Copyright IBM Corp. 2007 - Joy Latten + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 + * USA + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +struct async_serpent_ctx { + struct cryptd_ablkcipher *cryptd_tfm; +}; + +static inline bool serpent_fpu_begin(bool fpu_enabled, unsigned int nbytes) +{ + if (fpu_enabled) + return true; + + /* SSE2 is only used when chunk to be processed is large enough, so + * do not enable FPU until it is necessary. + */ + if (nbytes < SERPENT_BLOCK_SIZE * SERPENT_PARALLEL_BLOCKS) + return false; + + kernel_fpu_begin(); + return true; +} + +static inline void serpent_fpu_end(bool fpu_enabled) +{ + if (fpu_enabled) + kernel_fpu_end(); +} + +static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk, + bool enc) +{ + bool fpu_enabled = false; + struct serpent_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + const unsigned int bsize = SERPENT_BLOCK_SIZE; + unsigned int nbytes; + int err; + + err = blkcipher_walk_virt(desc, walk); + desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + + while ((nbytes = walk->nbytes)) { + u8 *wsrc = walk->src.virt.addr; + u8 *wdst = walk->dst.virt.addr; + + fpu_enabled = serpent_fpu_begin(fpu_enabled, nbytes); + + /* Process multi-block batch */ + if (nbytes >= bsize * SERPENT_PARALLEL_BLOCKS) { + do { + if (enc) + serpent_enc_blk_xway(ctx, wdst, wsrc); + else + serpent_dec_blk_xway(ctx, wdst, wsrc); + + wsrc += bsize * SERPENT_PARALLEL_BLOCKS; + wdst += bsize * SERPENT_PARALLEL_BLOCKS; + nbytes -= bsize * SERPENT_PARALLEL_BLOCKS; + } while (nbytes >= bsize * SERPENT_PARALLEL_BLOCKS); + + if (nbytes < bsize) + goto done; + } + + /* Handle leftovers */ + do { + if (enc) + __serpent_encrypt(ctx, wdst, wsrc); + else + __serpent_decrypt(ctx, wdst, wsrc); + + wsrc += bsize; + wdst += bsize; + nbytes -= bsize; + } while (nbytes >= bsize); + +done: + err = blkcipher_walk_done(desc, walk, nbytes); + } + + serpent_fpu_end(fpu_enabled); + return err; +} + +static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct blkcipher_walk walk; + + blkcipher_walk_init(&walk, dst, src, nbytes); + return ecb_crypt(desc, &walk, true); +} + +static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct blkcipher_walk walk; + + blkcipher_walk_init(&walk, dst, src, nbytes); + return ecb_crypt(desc, &walk, false); +} + +static unsigned int __cbc_encrypt(struct blkcipher_desc *desc, + struct blkcipher_walk *walk) +{ + struct serpent_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + const unsigned int bsize = SERPENT_BLOCK_SIZE; + unsigned int nbytes = walk->nbytes; + u128 *src = (u128 *)walk->src.virt.addr; + u128 *dst = (u128 *)walk->dst.virt.addr; + u128 *iv = (u128 *)walk->iv; + + do { + u128_xor(dst, src, iv); + __serpent_encrypt(ctx, (u8 *)dst, (u8 *)dst); + iv = dst; + + src += 1; + dst += 1; + nbytes -= bsize; + } while (nbytes >= bsize); + + u128_xor((u128 *)walk->iv, (u128 *)walk->iv, iv); + return nbytes; +} + +static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct blkcipher_walk walk; + int err; + + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt(desc, &walk); + + while ((nbytes = walk.nbytes)) { + nbytes = __cbc_encrypt(desc, &walk); + err = blkcipher_walk_done(desc, &walk, nbytes); + } + + return err; +} + +static unsigned int __cbc_decrypt(struct blkcipher_desc *desc, + struct blkcipher_walk *walk) +{ + struct serpent_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + const unsigned int bsize = SERPENT_BLOCK_SIZE; + unsigned int nbytes = walk->nbytes; + u128 *src = (u128 *)walk->src.virt.addr; + u128 *dst = (u128 *)walk->dst.virt.addr; + u128 ivs[SERPENT_PARALLEL_BLOCKS - 1]; + u128 last_iv; + int i; + + /* Start of the last block. */ + src += nbytes / bsize - 1; + dst += nbytes / bsize - 1; + + last_iv = *src; + + /* Process multi-block batch */ + if (nbytes >= bsize * SERPENT_PARALLEL_BLOCKS) { + do { + nbytes -= bsize * (SERPENT_PARALLEL_BLOCKS - 1); + src -= SERPENT_PARALLEL_BLOCKS - 1; + dst -= SERPENT_PARALLEL_BLOCKS - 1; + + for (i = 0; i < SERPENT_PARALLEL_BLOCKS - 1; i++) + ivs[i] = src[i]; + + serpent_dec_blk_xway(ctx, (u8 *)dst, (u8 *)src); + + for (i = 0; i < SERPENT_PARALLEL_BLOCKS - 1; i++) + u128_xor(dst + (i + 1), dst + (i + 1), ivs + i); + + nbytes -= bsize; + if (nbytes < bsize) + goto done; + + u128_xor(dst, dst, src - 1); + src -= 1; + dst -= 1; + } while (nbytes >= bsize * SERPENT_PARALLEL_BLOCKS); + + if (nbytes < bsize) + goto done; + } + + /* Handle leftovers */ + for (;;) { + __serpent_decrypt(ctx, (u8 *)dst, (u8 *)src); + + nbytes -= bsize; + if (nbytes < bsize) + break; + + u128_xor(dst, dst, src - 1); + src -= 1; + dst -= 1; + } + +done: + u128_xor(dst, dst, (u128 *)walk->iv); + *(u128 *)walk->iv = last_iv; + + return nbytes; +} + +static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + bool fpu_enabled = false; + struct blkcipher_walk walk; + int err; + + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt(desc, &walk); + desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + + while ((nbytes = walk.nbytes)) { + fpu_enabled = serpent_fpu_begin(fpu_enabled, nbytes); + nbytes = __cbc_decrypt(desc, &walk); + err = blkcipher_walk_done(desc, &walk, nbytes); + } + + serpent_fpu_end(fpu_enabled); + return err; +} + +static inline void u128_to_be128(be128 *dst, const u128 *src) +{ + dst->a = cpu_to_be64(src->a); + dst->b = cpu_to_be64(src->b); +} + +static inline void be128_to_u128(u128 *dst, const be128 *src) +{ + dst->a = be64_to_cpu(src->a); + dst->b = be64_to_cpu(src->b); +} + +static inline void u128_inc(u128 *i) +{ + i->b++; + if (!i->b) + i->a++; +} + +static void ctr_crypt_final(struct blkcipher_desc *desc, + struct blkcipher_walk *walk) +{ + struct serpent_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + u8 *ctrblk = walk->iv; + u8 keystream[SERPENT_BLOCK_SIZE]; + u8 *src = walk->src.virt.addr; + u8 *dst = walk->dst.virt.addr; + unsigned int nbytes = walk->nbytes; + + __serpent_encrypt(ctx, keystream, ctrblk); + crypto_xor(keystream, src, nbytes); + memcpy(dst, keystream, nbytes); + + crypto_inc(ctrblk, SERPENT_BLOCK_SIZE); +} + +static unsigned int __ctr_crypt(struct blkcipher_desc *desc, + struct blkcipher_walk *walk) +{ + struct serpent_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + const unsigned int bsize = SERPENT_BLOCK_SIZE; + unsigned int nbytes = walk->nbytes; + u128 *src = (u128 *)walk->src.virt.addr; + u128 *dst = (u128 *)walk->dst.virt.addr; + u128 ctrblk; + be128 ctrblocks[SERPENT_PARALLEL_BLOCKS]; + int i; + + be128_to_u128(&ctrblk, (be128 *)walk->iv); + + /* Process multi-block batch */ + if (nbytes >= bsize * SERPENT_PARALLEL_BLOCKS) { + do { + /* create ctrblks for parallel encrypt */ + for (i = 0; i < SERPENT_PARALLEL_BLOCKS; i++) { + if (dst != src) + dst[i] = src[i]; + + u128_to_be128(&ctrblocks[i], &ctrblk); + u128_inc(&ctrblk); + } + + serpent_enc_blk_xway_xor(ctx, (u8 *)dst, + (u8 *)ctrblocks); + + src += SERPENT_PARALLEL_BLOCKS; + dst += SERPENT_PARALLEL_BLOCKS; + nbytes -= bsize * SERPENT_PARALLEL_BLOCKS; + } while (nbytes >= bsize * SERPENT_PARALLEL_BLOCKS); + + if (nbytes < bsize) + goto done; + } + + /* Handle leftovers */ + do { + if (dst != src) + *dst = *src; + + u128_to_be128(&ctrblocks[0], &ctrblk); + u128_inc(&ctrblk); + + __serpent_encrypt(ctx, (u8 *)ctrblocks, (u8 *)ctrblocks); + u128_xor(dst, dst, (u128 *)ctrblocks); + + src += 1; + dst += 1; + nbytes -= bsize; + } while (nbytes >= bsize); + +done: + u128_to_be128((be128 *)walk->iv, &ctrblk); + return nbytes; +} + +static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + bool fpu_enabled = false; + struct blkcipher_walk walk; + int err; + + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt_block(desc, &walk, SERPENT_BLOCK_SIZE); + desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + + while ((nbytes = walk.nbytes) >= SERPENT_BLOCK_SIZE) { + fpu_enabled = serpent_fpu_begin(fpu_enabled, nbytes); + nbytes = __ctr_crypt(desc, &walk); + err = blkcipher_walk_done(desc, &walk, nbytes); + } + + serpent_fpu_end(fpu_enabled); + + if (walk.nbytes) { + ctr_crypt_final(desc, &walk); + err = blkcipher_walk_done(desc, &walk, 0); + } + + return err; +} + +struct crypt_priv { + struct serpent_ctx *ctx; + bool fpu_enabled; +}; + +static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) +{ + const unsigned int bsize = SERPENT_BLOCK_SIZE; + struct crypt_priv *ctx = priv; + int i; + + ctx->fpu_enabled = serpent_fpu_begin(ctx->fpu_enabled, nbytes); + + if (nbytes == bsize * SERPENT_PARALLEL_BLOCKS) { + serpent_enc_blk_xway(ctx->ctx, srcdst, srcdst); + return; + } + + for (i = 0; i < nbytes / bsize; i++, srcdst += bsize) + __serpent_encrypt(ctx->ctx, srcdst, srcdst); +} + +static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) +{ + const unsigned int bsize = SERPENT_BLOCK_SIZE; + struct crypt_priv *ctx = priv; + int i; + + ctx->fpu_enabled = serpent_fpu_begin(ctx->fpu_enabled, nbytes); + + if (nbytes == bsize * SERPENT_PARALLEL_BLOCKS) { + serpent_dec_blk_xway(ctx->ctx, srcdst, srcdst); + return; + } + + for (i = 0; i < nbytes / bsize; i++, srcdst += bsize) + __serpent_decrypt(ctx->ctx, srcdst, srcdst); +} + +struct serpent_lrw_ctx { + struct lrw_table_ctx lrw_table; + struct serpent_ctx serpent_ctx; +}; + +static int lrw_serpent_setkey(struct crypto_tfm *tfm, const u8 *key, + unsigned int keylen) +{ + struct serpent_lrw_ctx *ctx = crypto_tfm_ctx(tfm); + int err; + + err = __serpent_setkey(&ctx->serpent_ctx, key, keylen - + SERPENT_BLOCK_SIZE); + if (err) + return err; + + return lrw_init_table(&ctx->lrw_table, key + keylen - + SERPENT_BLOCK_SIZE); +} + +static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct serpent_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + be128 buf[SERPENT_PARALLEL_BLOCKS]; + struct crypt_priv crypt_ctx = { + .ctx = &ctx->serpent_ctx, + .fpu_enabled = false, + }; + struct lrw_crypt_req req = { + .tbuf = buf, + .tbuflen = sizeof(buf), + + .table_ctx = &ctx->lrw_table, + .crypt_ctx = &crypt_ctx, + .crypt_fn = encrypt_callback, + }; + int ret; + + desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + ret = lrw_crypt(desc, dst, src, nbytes, &req); + serpent_fpu_end(crypt_ctx.fpu_enabled); + + return ret; +} + +static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct serpent_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + be128 buf[SERPENT_PARALLEL_BLOCKS]; + struct crypt_priv crypt_ctx = { + .ctx = &ctx->serpent_ctx, + .fpu_enabled = false, + }; + struct lrw_crypt_req req = { + .tbuf = buf, + .tbuflen = sizeof(buf), + + .table_ctx = &ctx->lrw_table, + .crypt_ctx = &crypt_ctx, + .crypt_fn = decrypt_callback, + }; + int ret; + + desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + ret = lrw_crypt(desc, dst, src, nbytes, &req); + serpent_fpu_end(crypt_ctx.fpu_enabled); + + return ret; +} + +static void lrw_exit_tfm(struct crypto_tfm *tfm) +{ + struct serpent_lrw_ctx *ctx = crypto_tfm_ctx(tfm); + + lrw_free_table(&ctx->lrw_table); +} + +struct serpent_xts_ctx { + struct serpent_ctx tweak_ctx; + struct serpent_ctx crypt_ctx; +}; + +static int xts_serpent_setkey(struct crypto_tfm *tfm, const u8 *key, + unsigned int keylen) +{ + struct serpent_xts_ctx *ctx = crypto_tfm_ctx(tfm); + u32 *flags = &tfm->crt_flags; + int err; + + /* key consists of keys of equal size concatenated, therefore + * the length must be even + */ + if (keylen % 2) { + *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; + return -EINVAL; + } + + /* first half of xts-key is for crypt */ + err = __serpent_setkey(&ctx->crypt_ctx, key, keylen / 2); + if (err) + return err; + + /* second half of xts-key is for tweak */ + return __serpent_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2); +} + +static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct serpent_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + be128 buf[SERPENT_PARALLEL_BLOCKS]; + struct crypt_priv crypt_ctx = { + .ctx = &ctx->crypt_ctx, + .fpu_enabled = false, + }; + struct xts_crypt_req req = { + .tbuf = buf, + .tbuflen = sizeof(buf), + + .tweak_ctx = &ctx->tweak_ctx, + .tweak_fn = XTS_TWEAK_CAST(__serpent_encrypt), + .crypt_ctx = &crypt_ctx, + .crypt_fn = encrypt_callback, + }; + int ret; + + desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + ret = xts_crypt(desc, dst, src, nbytes, &req); + serpent_fpu_end(crypt_ctx.fpu_enabled); + + return ret; +} + +static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct serpent_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + be128 buf[SERPENT_PARALLEL_BLOCKS]; + struct crypt_priv crypt_ctx = { + .ctx = &ctx->crypt_ctx, + .fpu_enabled = false, + }; + struct xts_crypt_req req = { + .tbuf = buf, + .tbuflen = sizeof(buf), + + .tweak_ctx = &ctx->tweak_ctx, + .tweak_fn = XTS_TWEAK_CAST(__serpent_encrypt), + .crypt_ctx = &crypt_ctx, + .crypt_fn = decrypt_callback, + }; + int ret; + + desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + ret = xts_crypt(desc, dst, src, nbytes, &req); + serpent_fpu_end(crypt_ctx.fpu_enabled); + + return ret; +} + +static int ablk_set_key(struct crypto_ablkcipher *tfm, const u8 *key, + unsigned int key_len) +{ + struct async_serpent_ctx *ctx = crypto_ablkcipher_ctx(tfm); + struct crypto_ablkcipher *child = &ctx->cryptd_tfm->base; + int err; + + crypto_ablkcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK); + crypto_ablkcipher_set_flags(child, crypto_ablkcipher_get_flags(tfm) + & CRYPTO_TFM_REQ_MASK); + err = crypto_ablkcipher_setkey(child, key, key_len); + crypto_ablkcipher_set_flags(tfm, crypto_ablkcipher_get_flags(child) + & CRYPTO_TFM_RES_MASK); + return err; +} + +static int __ablk_encrypt(struct ablkcipher_request *req) +{ + struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); + struct async_serpent_ctx *ctx = crypto_ablkcipher_ctx(tfm); + struct blkcipher_desc desc; + + desc.tfm = cryptd_ablkcipher_child(ctx->cryptd_tfm); + desc.info = req->info; + desc.flags = 0; + + return crypto_blkcipher_crt(desc.tfm)->encrypt( + &desc, req->dst, req->src, req->nbytes); +} + +static int ablk_encrypt(struct ablkcipher_request *req) +{ + struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); + struct async_serpent_ctx *ctx = crypto_ablkcipher_ctx(tfm); + + if (!irq_fpu_usable()) { + struct ablkcipher_request *cryptd_req = + ablkcipher_request_ctx(req); + + memcpy(cryptd_req, req, sizeof(*req)); + ablkcipher_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base); + + return crypto_ablkcipher_encrypt(cryptd_req); + } else { + return __ablk_encrypt(req); + } +} + +static int ablk_decrypt(struct ablkcipher_request *req) +{ + struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); + struct async_serpent_ctx *ctx = crypto_ablkcipher_ctx(tfm); + + if (!irq_fpu_usable()) { + struct ablkcipher_request *cryptd_req = + ablkcipher_request_ctx(req); + + memcpy(cryptd_req, req, sizeof(*req)); + ablkcipher_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base); + + return crypto_ablkcipher_decrypt(cryptd_req); + } else { + struct blkcipher_desc desc; + + desc.tfm = cryptd_ablkcipher_child(ctx->cryptd_tfm); + desc.info = req->info; + desc.flags = 0; + + return crypto_blkcipher_crt(desc.tfm)->decrypt( + &desc, req->dst, req->src, req->nbytes); + } +} + +static void ablk_exit(struct crypto_tfm *tfm) +{ + struct async_serpent_ctx *ctx = crypto_tfm_ctx(tfm); + + cryptd_free_ablkcipher(ctx->cryptd_tfm); +} + +static int ablk_init(struct crypto_tfm *tfm) +{ + struct async_serpent_ctx *ctx = crypto_tfm_ctx(tfm); + struct cryptd_ablkcipher *cryptd_tfm; + char drv_name[CRYPTO_MAX_ALG_NAME]; + + snprintf(drv_name, sizeof(drv_name), "__driver-%s", + crypto_tfm_alg_driver_name(tfm)); + + cryptd_tfm = cryptd_alloc_ablkcipher(drv_name, 0, 0); + if (IS_ERR(cryptd_tfm)) + return PTR_ERR(cryptd_tfm); + + ctx->cryptd_tfm = cryptd_tfm; + tfm->crt_ablkcipher.reqsize = sizeof(struct ablkcipher_request) + + crypto_ablkcipher_reqsize(&cryptd_tfm->base); + + return 0; +} + +static struct crypto_alg serpent_algs[10] = { { + .cra_name = "__ecb-serpent-sse2", + .cra_driver_name = "__driver-ecb-serpent-sse2", + .cra_priority = 0, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = SERPENT_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct serpent_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(serpent_algs[0].cra_list), + .cra_u = { + .blkcipher = { + .min_keysize = SERPENT_MIN_KEY_SIZE, + .max_keysize = SERPENT_MAX_KEY_SIZE, + .setkey = serpent_setkey, + .encrypt = ecb_encrypt, + .decrypt = ecb_decrypt, + }, + }, +}, { + .cra_name = "__cbc-serpent-sse2", + .cra_driver_name = "__driver-cbc-serpent-sse2", + .cra_priority = 0, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = SERPENT_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct serpent_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(serpent_algs[1].cra_list), + .cra_u = { + .blkcipher = { + .min_keysize = SERPENT_MIN_KEY_SIZE, + .max_keysize = SERPENT_MAX_KEY_SIZE, + .setkey = serpent_setkey, + .encrypt = cbc_encrypt, + .decrypt = cbc_decrypt, + }, + }, +}, { + .cra_name = "__ctr-serpent-sse2", + .cra_driver_name = "__driver-ctr-serpent-sse2", + .cra_priority = 0, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct serpent_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(serpent_algs[2].cra_list), + .cra_u = { + .blkcipher = { + .min_keysize = SERPENT_MIN_KEY_SIZE, + .max_keysize = SERPENT_MAX_KEY_SIZE, + .ivsize = SERPENT_BLOCK_SIZE, + .setkey = serpent_setkey, + .encrypt = ctr_crypt, + .decrypt = ctr_crypt, + }, + }, +}, { + .cra_name = "__lrw-serpent-sse2", + .cra_driver_name = "__driver-lrw-serpent-sse2", + .cra_priority = 0, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = SERPENT_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct serpent_lrw_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(serpent_algs[3].cra_list), + .cra_exit = lrw_exit_tfm, + .cra_u = { + .blkcipher = { + .min_keysize = SERPENT_MIN_KEY_SIZE + + SERPENT_BLOCK_SIZE, + .max_keysize = SERPENT_MAX_KEY_SIZE + + SERPENT_BLOCK_SIZE, + .ivsize = SERPENT_BLOCK_SIZE, + .setkey = lrw_serpent_setkey, + .encrypt = lrw_encrypt, + .decrypt = lrw_decrypt, + }, + }, +}, { + .cra_name = "__xts-serpent-sse2", + .cra_driver_name = "__driver-xts-serpent-sse2", + .cra_priority = 0, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = SERPENT_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct serpent_xts_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(serpent_algs[4].cra_list), + .cra_u = { + .blkcipher = { + .min_keysize = SERPENT_MIN_KEY_SIZE * 2, + .max_keysize = SERPENT_MAX_KEY_SIZE * 2, + .ivsize = SERPENT_BLOCK_SIZE, + .setkey = xts_serpent_setkey, + .encrypt = xts_encrypt, + .decrypt = xts_decrypt, + }, + }, +}, { + .cra_name = "ecb(serpent)", + .cra_driver_name = "ecb-serpent-sse2", + .cra_priority = 400, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, + .cra_blocksize = SERPENT_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct async_serpent_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(serpent_algs[5].cra_list), + .cra_init = ablk_init, + .cra_exit = ablk_exit, + .cra_u = { + .ablkcipher = { + .min_keysize = SERPENT_MIN_KEY_SIZE, + .max_keysize = SERPENT_MAX_KEY_SIZE, + .setkey = ablk_set_key, + .encrypt = ablk_encrypt, + .decrypt = ablk_decrypt, + }, + }, +}, { + .cra_name = "cbc(serpent)", + .cra_driver_name = "cbc-serpent-sse2", + .cra_priority = 400, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, + .cra_blocksize = SERPENT_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct async_serpent_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(serpent_algs[6].cra_list), + .cra_init = ablk_init, + .cra_exit = ablk_exit, + .cra_u = { + .ablkcipher = { + .min_keysize = SERPENT_MIN_KEY_SIZE, + .max_keysize = SERPENT_MAX_KEY_SIZE, + .ivsize = SERPENT_BLOCK_SIZE, + .setkey = ablk_set_key, + .encrypt = __ablk_encrypt, + .decrypt = ablk_decrypt, + }, + }, +}, { + .cra_name = "ctr(serpent)", + .cra_driver_name = "ctr-serpent-sse2", + .cra_priority = 400, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct async_serpent_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(serpent_algs[7].cra_list), + .cra_init = ablk_init, + .cra_exit = ablk_exit, + .cra_u = { + .ablkcipher = { + .min_keysize = SERPENT_MIN_KEY_SIZE, + .max_keysize = SERPENT_MAX_KEY_SIZE, + .ivsize = SERPENT_BLOCK_SIZE, + .setkey = ablk_set_key, + .encrypt = ablk_encrypt, + .decrypt = ablk_encrypt, + .geniv = "chainiv", + }, + }, +}, { + .cra_name = "lrw(serpent)", + .cra_driver_name = "lrw-serpent-sse2", + .cra_priority = 400, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, + .cra_blocksize = SERPENT_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct async_serpent_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(serpent_algs[8].cra_list), + .cra_init = ablk_init, + .cra_exit = ablk_exit, + .cra_u = { + .ablkcipher = { + .min_keysize = SERPENT_MIN_KEY_SIZE + + SERPENT_BLOCK_SIZE, + .max_keysize = SERPENT_MAX_KEY_SIZE + + SERPENT_BLOCK_SIZE, + .ivsize = SERPENT_BLOCK_SIZE, + .setkey = ablk_set_key, + .encrypt = ablk_encrypt, + .decrypt = ablk_decrypt, + }, + }, +}, { + .cra_name = "xts(serpent)", + .cra_driver_name = "xts-serpent-sse2", + .cra_priority = 400, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, + .cra_blocksize = SERPENT_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct async_serpent_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(serpent_algs[9].cra_list), + .cra_init = ablk_init, + .cra_exit = ablk_exit, + .cra_u = { + .ablkcipher = { + .min_keysize = SERPENT_MIN_KEY_SIZE * 2, + .max_keysize = SERPENT_MAX_KEY_SIZE * 2, + .ivsize = SERPENT_BLOCK_SIZE, + .setkey = ablk_set_key, + .encrypt = ablk_encrypt, + .decrypt = ablk_decrypt, + }, + }, +} }; + +static int __init serpent_sse2_init(void) +{ + if (!cpu_has_xmm2) { + printk(KERN_INFO "SSE2 instructions are not detected.\n"); + return -ENODEV; + } + + return crypto_register_algs(serpent_algs, ARRAY_SIZE(serpent_algs)); +} + +static void __exit serpent_sse2_exit(void) +{ + crypto_unregister_algs(serpent_algs, ARRAY_SIZE(serpent_algs)); +} + +module_init(serpent_sse2_init); +module_exit(serpent_sse2_exit); + +MODULE_DESCRIPTION("Serpent Cipher Algorithm, SSE2 optimized"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("serpent"); diff --git a/arch/x86/crypto/sha1_ssse3_asm.S b/arch/x86/crypto/sha1_ssse3_asm.S new file mode 100644 index 00000000..b2c2f57d --- /dev/null +++ b/arch/x86/crypto/sha1_ssse3_asm.S @@ -0,0 +1,558 @@ +/* + * This is a SIMD SHA-1 implementation. It requires the Intel(R) Supplemental + * SSE3 instruction set extensions introduced in Intel Core Microarchitecture + * processors. CPUs supporting Intel(R) AVX extensions will get an additional + * boost. + * + * This work was inspired by the vectorized implementation of Dean Gaudet. + * Additional information on it can be found at: + * http://www.arctic.org/~dean/crypto/sha1.html + * + * It was improved upon with more efficient vectorization of the message + * scheduling. This implementation has also been optimized for all current and + * several future generations of Intel CPUs. + * + * See this article for more information about the implementation details: + * http://software.intel.com/en-us/articles/improving-the-performance-of-the-secure-hash-algorithm-1/ + * + * Copyright (C) 2010, Intel Corp. + * Authors: Maxim Locktyukhin + * Ronen Zohar + * + * Converted to AT&T syntax and adapted for inclusion in the Linux kernel: + * Author: Mathias Krause + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#define CTX %rdi // arg1 +#define BUF %rsi // arg2 +#define CNT %rdx // arg3 + +#define REG_A %ecx +#define REG_B %esi +#define REG_C %edi +#define REG_D %ebp +#define REG_E %edx + +#define REG_T1 %eax +#define REG_T2 %ebx + +#define K_BASE %r8 +#define HASH_PTR %r9 +#define BUFFER_PTR %r10 +#define BUFFER_END %r11 + +#define W_TMP1 %xmm0 +#define W_TMP2 %xmm9 + +#define W0 %xmm1 +#define W4 %xmm2 +#define W8 %xmm3 +#define W12 %xmm4 +#define W16 %xmm5 +#define W20 %xmm6 +#define W24 %xmm7 +#define W28 %xmm8 + +#define XMM_SHUFB_BSWAP %xmm10 + +/* we keep window of 64 w[i]+K pre-calculated values in a circular buffer */ +#define WK(t) (((t) & 15) * 4)(%rsp) +#define W_PRECALC_AHEAD 16 + +/* + * This macro implements the SHA-1 function's body for single 64-byte block + * param: function's name + */ +.macro SHA1_VECTOR_ASM name + .global \name + .type \name, @function + .align 32 +\name: + push %rbx + push %rbp + push %r12 + + mov %rsp, %r12 + sub $64, %rsp # allocate workspace + and $~15, %rsp # align stack + + mov CTX, HASH_PTR + mov BUF, BUFFER_PTR + + shl $6, CNT # multiply by 64 + add BUF, CNT + mov CNT, BUFFER_END + + lea K_XMM_AR(%rip), K_BASE + xmm_mov BSWAP_SHUFB_CTL(%rip), XMM_SHUFB_BSWAP + + SHA1_PIPELINED_MAIN_BODY + + # cleanup workspace + mov $8, %ecx + mov %rsp, %rdi + xor %rax, %rax + rep stosq + + mov %r12, %rsp # deallocate workspace + + pop %r12 + pop %rbp + pop %rbx + ret + + .size \name, .-\name +.endm + +/* + * This macro implements 80 rounds of SHA-1 for one 64-byte block + */ +.macro SHA1_PIPELINED_MAIN_BODY + INIT_REGALLOC + + mov (HASH_PTR), A + mov 4(HASH_PTR), B + mov 8(HASH_PTR), C + mov 12(HASH_PTR), D + mov 16(HASH_PTR), E + + .set i, 0 + .rept W_PRECALC_AHEAD + W_PRECALC i + .set i, (i+1) + .endr + +.align 4 +1: + RR F1,A,B,C,D,E,0 + RR F1,D,E,A,B,C,2 + RR F1,B,C,D,E,A,4 + RR F1,E,A,B,C,D,6 + RR F1,C,D,E,A,B,8 + + RR F1,A,B,C,D,E,10 + RR F1,D,E,A,B,C,12 + RR F1,B,C,D,E,A,14 + RR F1,E,A,B,C,D,16 + RR F1,C,D,E,A,B,18 + + RR F2,A,B,C,D,E,20 + RR F2,D,E,A,B,C,22 + RR F2,B,C,D,E,A,24 + RR F2,E,A,B,C,D,26 + RR F2,C,D,E,A,B,28 + + RR F2,A,B,C,D,E,30 + RR F2,D,E,A,B,C,32 + RR F2,B,C,D,E,A,34 + RR F2,E,A,B,C,D,36 + RR F2,C,D,E,A,B,38 + + RR F3,A,B,C,D,E,40 + RR F3,D,E,A,B,C,42 + RR F3,B,C,D,E,A,44 + RR F3,E,A,B,C,D,46 + RR F3,C,D,E,A,B,48 + + RR F3,A,B,C,D,E,50 + RR F3,D,E,A,B,C,52 + RR F3,B,C,D,E,A,54 + RR F3,E,A,B,C,D,56 + RR F3,C,D,E,A,B,58 + + add $64, BUFFER_PTR # move to the next 64-byte block + cmp BUFFER_END, BUFFER_PTR # if the current is the last one use + cmovae K_BASE, BUFFER_PTR # dummy source to avoid buffer overrun + + RR F4,A,B,C,D,E,60 + RR F4,D,E,A,B,C,62 + RR F4,B,C,D,E,A,64 + RR F4,E,A,B,C,D,66 + RR F4,C,D,E,A,B,68 + + RR F4,A,B,C,D,E,70 + RR F4,D,E,A,B,C,72 + RR F4,B,C,D,E,A,74 + RR F4,E,A,B,C,D,76 + RR F4,C,D,E,A,B,78 + + UPDATE_HASH (HASH_PTR), A + UPDATE_HASH 4(HASH_PTR), B + UPDATE_HASH 8(HASH_PTR), C + UPDATE_HASH 12(HASH_PTR), D + UPDATE_HASH 16(HASH_PTR), E + + RESTORE_RENAMED_REGS + cmp K_BASE, BUFFER_PTR # K_BASE means, we reached the end + jne 1b +.endm + +.macro INIT_REGALLOC + .set A, REG_A + .set B, REG_B + .set C, REG_C + .set D, REG_D + .set E, REG_E + .set T1, REG_T1 + .set T2, REG_T2 +.endm + +.macro RESTORE_RENAMED_REGS + # order is important (REG_C is where it should be) + mov B, REG_B + mov D, REG_D + mov A, REG_A + mov E, REG_E +.endm + +.macro SWAP_REG_NAMES a, b + .set _T, \a + .set \a, \b + .set \b, _T +.endm + +.macro F1 b, c, d + mov \c, T1 + SWAP_REG_NAMES \c, T1 + xor \d, T1 + and \b, T1 + xor \d, T1 +.endm + +.macro F2 b, c, d + mov \d, T1 + SWAP_REG_NAMES \d, T1 + xor \c, T1 + xor \b, T1 +.endm + +.macro F3 b, c ,d + mov \c, T1 + SWAP_REG_NAMES \c, T1 + mov \b, T2 + or \b, T1 + and \c, T2 + and \d, T1 + or T2, T1 +.endm + +.macro F4 b, c, d + F2 \b, \c, \d +.endm + +.macro UPDATE_HASH hash, val + add \hash, \val + mov \val, \hash +.endm + +/* + * RR does two rounds of SHA-1 back to back with W[] pre-calc + * t1 = F(b, c, d); e += w(i) + * e += t1; b <<= 30; d += w(i+1); + * t1 = F(a, b, c); + * d += t1; a <<= 5; + * e += a; + * t1 = e; a >>= 7; + * t1 <<= 5; + * d += t1; + */ +.macro RR F, a, b, c, d, e, round + add WK(\round), \e + \F \b, \c, \d # t1 = F(b, c, d); + W_PRECALC (\round + W_PRECALC_AHEAD) + rol $30, \b + add T1, \e + add WK(\round + 1), \d + + \F \a, \b, \c + W_PRECALC (\round + W_PRECALC_AHEAD + 1) + rol $5, \a + add \a, \e + add T1, \d + ror $7, \a # (a <>r 7) => a <= 80) && (i < (80 + W_PRECALC_AHEAD)))) + .set i, ((\r) % 80) # pre-compute for the next iteration + .if (i == 0) + W_PRECALC_RESET + .endif + W_PRECALC_00_15 + .elseif (i<32) + W_PRECALC_16_31 + .elseif (i < 80) // rounds 32-79 + W_PRECALC_32_79 + .endif +.endm + +.macro W_PRECALC_RESET + .set W, W0 + .set W_minus_04, W4 + .set W_minus_08, W8 + .set W_minus_12, W12 + .set W_minus_16, W16 + .set W_minus_20, W20 + .set W_minus_24, W24 + .set W_minus_28, W28 + .set W_minus_32, W +.endm + +.macro W_PRECALC_ROTATE + .set W_minus_32, W_minus_28 + .set W_minus_28, W_minus_24 + .set W_minus_24, W_minus_20 + .set W_minus_20, W_minus_16 + .set W_minus_16, W_minus_12 + .set W_minus_12, W_minus_08 + .set W_minus_08, W_minus_04 + .set W_minus_04, W + .set W, W_minus_32 +.endm + +.macro W_PRECALC_SSSE3 + +.macro W_PRECALC_00_15 + W_PRECALC_00_15_SSSE3 +.endm +.macro W_PRECALC_16_31 + W_PRECALC_16_31_SSSE3 +.endm +.macro W_PRECALC_32_79 + W_PRECALC_32_79_SSSE3 +.endm + +/* message scheduling pre-compute for rounds 0-15 */ +.macro W_PRECALC_00_15_SSSE3 + .if ((i & 3) == 0) + movdqu (i*4)(BUFFER_PTR), W_TMP1 + .elseif ((i & 3) == 1) + pshufb XMM_SHUFB_BSWAP, W_TMP1 + movdqa W_TMP1, W + .elseif ((i & 3) == 2) + paddd (K_BASE), W_TMP1 + .elseif ((i & 3) == 3) + movdqa W_TMP1, WK(i&~3) + W_PRECALC_ROTATE + .endif +.endm + +/* message scheduling pre-compute for rounds 16-31 + * + * - calculating last 32 w[i] values in 8 XMM registers + * - pre-calculate K+w[i] values and store to mem, for later load by ALU add + * instruction + * + * some "heavy-lifting" vectorization for rounds 16-31 due to w[i]->w[i-3] + * dependency, but improves for 32-79 + */ +.macro W_PRECALC_16_31_SSSE3 + # blended scheduling of vector and scalar instruction streams, one 4-wide + # vector iteration / 4 scalar rounds + .if ((i & 3) == 0) + movdqa W_minus_12, W + palignr $8, W_minus_16, W # w[i-14] + movdqa W_minus_04, W_TMP1 + psrldq $4, W_TMP1 # w[i-3] + pxor W_minus_08, W + .elseif ((i & 3) == 1) + pxor W_minus_16, W_TMP1 + pxor W_TMP1, W + movdqa W, W_TMP2 + movdqa W, W_TMP1 + pslldq $12, W_TMP2 + .elseif ((i & 3) == 2) + psrld $31, W + pslld $1, W_TMP1 + por W, W_TMP1 + movdqa W_TMP2, W + psrld $30, W_TMP2 + pslld $2, W + .elseif ((i & 3) == 3) + pxor W, W_TMP1 + pxor W_TMP2, W_TMP1 + movdqa W_TMP1, W + paddd K_XMM(K_BASE), W_TMP1 + movdqa W_TMP1, WK(i&~3) + W_PRECALC_ROTATE + .endif +.endm + +/* message scheduling pre-compute for rounds 32-79 + * + * in SHA-1 specification: w[i] = (w[i-3] ^ w[i-8] ^ w[i-14] ^ w[i-16]) rol 1 + * instead we do equal: w[i] = (w[i-6] ^ w[i-16] ^ w[i-28] ^ w[i-32]) rol 2 + * allows more efficient vectorization since w[i]=>w[i-3] dependency is broken + */ +.macro W_PRECALC_32_79_SSSE3 + .if ((i & 3) == 0) + movdqa W_minus_04, W_TMP1 + pxor W_minus_28, W # W is W_minus_32 before xor + palignr $8, W_minus_08, W_TMP1 + .elseif ((i & 3) == 1) + pxor W_minus_16, W + pxor W_TMP1, W + movdqa W, W_TMP1 + .elseif ((i & 3) == 2) + psrld $30, W + pslld $2, W_TMP1 + por W, W_TMP1 + .elseif ((i & 3) == 3) + movdqa W_TMP1, W + paddd K_XMM(K_BASE), W_TMP1 + movdqa W_TMP1, WK(i&~3) + W_PRECALC_ROTATE + .endif +.endm + +.endm // W_PRECALC_SSSE3 + + +#define K1 0x5a827999 +#define K2 0x6ed9eba1 +#define K3 0x8f1bbcdc +#define K4 0xca62c1d6 + +.section .rodata +.align 16 + +K_XMM_AR: + .long K1, K1, K1, K1 + .long K2, K2, K2, K2 + .long K3, K3, K3, K3 + .long K4, K4, K4, K4 + +BSWAP_SHUFB_CTL: + .long 0x00010203 + .long 0x04050607 + .long 0x08090a0b + .long 0x0c0d0e0f + + +.section .text + +W_PRECALC_SSSE3 +.macro xmm_mov a, b + movdqu \a,\b +.endm + +/* SSSE3 optimized implementation: + * extern "C" void sha1_transform_ssse3(u32 *digest, const char *data, u32 *ws, + * unsigned int rounds); + */ +SHA1_VECTOR_ASM sha1_transform_ssse3 + +#ifdef SHA1_ENABLE_AVX_SUPPORT + +.macro W_PRECALC_AVX + +.purgem W_PRECALC_00_15 +.macro W_PRECALC_00_15 + W_PRECALC_00_15_AVX +.endm +.purgem W_PRECALC_16_31 +.macro W_PRECALC_16_31 + W_PRECALC_16_31_AVX +.endm +.purgem W_PRECALC_32_79 +.macro W_PRECALC_32_79 + W_PRECALC_32_79_AVX +.endm + +.macro W_PRECALC_00_15_AVX + .if ((i & 3) == 0) + vmovdqu (i*4)(BUFFER_PTR), W_TMP1 + .elseif ((i & 3) == 1) + vpshufb XMM_SHUFB_BSWAP, W_TMP1, W + .elseif ((i & 3) == 2) + vpaddd (K_BASE), W, W_TMP1 + .elseif ((i & 3) == 3) + vmovdqa W_TMP1, WK(i&~3) + W_PRECALC_ROTATE + .endif +.endm + +.macro W_PRECALC_16_31_AVX + .if ((i & 3) == 0) + vpalignr $8, W_minus_16, W_minus_12, W # w[i-14] + vpsrldq $4, W_minus_04, W_TMP1 # w[i-3] + vpxor W_minus_08, W, W + vpxor W_minus_16, W_TMP1, W_TMP1 + .elseif ((i & 3) == 1) + vpxor W_TMP1, W, W + vpslldq $12, W, W_TMP2 + vpslld $1, W, W_TMP1 + .elseif ((i & 3) == 2) + vpsrld $31, W, W + vpor W, W_TMP1, W_TMP1 + vpslld $2, W_TMP2, W + vpsrld $30, W_TMP2, W_TMP2 + .elseif ((i & 3) == 3) + vpxor W, W_TMP1, W_TMP1 + vpxor W_TMP2, W_TMP1, W + vpaddd K_XMM(K_BASE), W, W_TMP1 + vmovdqu W_TMP1, WK(i&~3) + W_PRECALC_ROTATE + .endif +.endm + +.macro W_PRECALC_32_79_AVX + .if ((i & 3) == 0) + vpalignr $8, W_minus_08, W_minus_04, W_TMP1 + vpxor W_minus_28, W, W # W is W_minus_32 before xor + .elseif ((i & 3) == 1) + vpxor W_minus_16, W_TMP1, W_TMP1 + vpxor W_TMP1, W, W + .elseif ((i & 3) == 2) + vpslld $2, W, W_TMP1 + vpsrld $30, W, W + vpor W, W_TMP1, W + .elseif ((i & 3) == 3) + vpaddd K_XMM(K_BASE), W, W_TMP1 + vmovdqu W_TMP1, WK(i&~3) + W_PRECALC_ROTATE + .endif +.endm + +.endm // W_PRECALC_AVX + +W_PRECALC_AVX +.purgem xmm_mov +.macro xmm_mov a, b + vmovdqu \a,\b +.endm + + +/* AVX optimized implementation: + * extern "C" void sha1_transform_avx(u32 *digest, const char *data, u32 *ws, + * unsigned int rounds); + */ +SHA1_VECTOR_ASM sha1_transform_avx + +#endif diff --git a/arch/x86/crypto/sha1_ssse3_glue.c b/arch/x86/crypto/sha1_ssse3_glue.c new file mode 100644 index 00000000..f916499d --- /dev/null +++ b/arch/x86/crypto/sha1_ssse3_glue.c @@ -0,0 +1,240 @@ +/* + * Cryptographic API. + * + * Glue code for the SHA1 Secure Hash Algorithm assembler implementation using + * Supplemental SSE3 instructions. + * + * This file is based on sha1_generic.c + * + * Copyright (c) Alan Smithee. + * Copyright (c) Andrew McDonald + * Copyright (c) Jean-Francois Dive + * Copyright (c) Mathias Krause + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +asmlinkage void sha1_transform_ssse3(u32 *digest, const char *data, + unsigned int rounds); +#ifdef SHA1_ENABLE_AVX_SUPPORT +asmlinkage void sha1_transform_avx(u32 *digest, const char *data, + unsigned int rounds); +#endif + +static asmlinkage void (*sha1_transform_asm)(u32 *, const char *, unsigned int); + + +static int sha1_ssse3_init(struct shash_desc *desc) +{ + struct sha1_state *sctx = shash_desc_ctx(desc); + + *sctx = (struct sha1_state){ + .state = { SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 }, + }; + + return 0; +} + +static int __sha1_ssse3_update(struct shash_desc *desc, const u8 *data, + unsigned int len, unsigned int partial) +{ + struct sha1_state *sctx = shash_desc_ctx(desc); + unsigned int done = 0; + + sctx->count += len; + + if (partial) { + done = SHA1_BLOCK_SIZE - partial; + memcpy(sctx->buffer + partial, data, done); + sha1_transform_asm(sctx->state, sctx->buffer, 1); + } + + if (len - done >= SHA1_BLOCK_SIZE) { + const unsigned int rounds = (len - done) / SHA1_BLOCK_SIZE; + + sha1_transform_asm(sctx->state, data + done, rounds); + done += rounds * SHA1_BLOCK_SIZE; + } + + memcpy(sctx->buffer, data + done, len - done); + + return 0; +} + +static int sha1_ssse3_update(struct shash_desc *desc, const u8 *data, + unsigned int len) +{ + struct sha1_state *sctx = shash_desc_ctx(desc); + unsigned int partial = sctx->count % SHA1_BLOCK_SIZE; + int res; + + /* Handle the fast case right here */ + if (partial + len < SHA1_BLOCK_SIZE) { + sctx->count += len; + memcpy(sctx->buffer + partial, data, len); + + return 0; + } + + if (!irq_fpu_usable()) { + res = crypto_sha1_update(desc, data, len); + } else { + kernel_fpu_begin(); + res = __sha1_ssse3_update(desc, data, len, partial); + kernel_fpu_end(); + } + + return res; +} + + +/* Add padding and return the message digest. */ +static int sha1_ssse3_final(struct shash_desc *desc, u8 *out) +{ + struct sha1_state *sctx = shash_desc_ctx(desc); + unsigned int i, index, padlen; + __be32 *dst = (__be32 *)out; + __be64 bits; + static const u8 padding[SHA1_BLOCK_SIZE] = { 0x80, }; + + bits = cpu_to_be64(sctx->count << 3); + + /* Pad out to 56 mod 64 and append length */ + index = sctx->count % SHA1_BLOCK_SIZE; + padlen = (index < 56) ? (56 - index) : ((SHA1_BLOCK_SIZE+56) - index); + if (!irq_fpu_usable()) { + crypto_sha1_update(desc, padding, padlen); + crypto_sha1_update(desc, (const u8 *)&bits, sizeof(bits)); + } else { + kernel_fpu_begin(); + /* We need to fill a whole block for __sha1_ssse3_update() */ + if (padlen <= 56) { + sctx->count += padlen; + memcpy(sctx->buffer + index, padding, padlen); + } else { + __sha1_ssse3_update(desc, padding, padlen, index); + } + __sha1_ssse3_update(desc, (const u8 *)&bits, sizeof(bits), 56); + kernel_fpu_end(); + } + + /* Store state in digest */ + for (i = 0; i < 5; i++) + dst[i] = cpu_to_be32(sctx->state[i]); + + /* Wipe context */ + memset(sctx, 0, sizeof(*sctx)); + + return 0; +} + +static int sha1_ssse3_export(struct shash_desc *desc, void *out) +{ + struct sha1_state *sctx = shash_desc_ctx(desc); + + memcpy(out, sctx, sizeof(*sctx)); + + return 0; +} + +static int sha1_ssse3_import(struct shash_desc *desc, const void *in) +{ + struct sha1_state *sctx = shash_desc_ctx(desc); + + memcpy(sctx, in, sizeof(*sctx)); + + return 0; +} + +static struct shash_alg alg = { + .digestsize = SHA1_DIGEST_SIZE, + .init = sha1_ssse3_init, + .update = sha1_ssse3_update, + .final = sha1_ssse3_final, + .export = sha1_ssse3_export, + .import = sha1_ssse3_import, + .descsize = sizeof(struct sha1_state), + .statesize = sizeof(struct sha1_state), + .base = { + .cra_name = "sha1", + .cra_driver_name= "sha1-ssse3", + .cra_priority = 150, + .cra_flags = CRYPTO_ALG_TYPE_SHASH, + .cra_blocksize = SHA1_BLOCK_SIZE, + .cra_module = THIS_MODULE, + } +}; + +#ifdef SHA1_ENABLE_AVX_SUPPORT +static bool __init avx_usable(void) +{ + u64 xcr0; + + if (!cpu_has_avx || !cpu_has_osxsave) + return false; + + xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK); + if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) { + pr_info("AVX detected but unusable.\n"); + + return false; + } + + return true; +} +#endif + +static int __init sha1_ssse3_mod_init(void) +{ + /* test for SSSE3 first */ + if (cpu_has_ssse3) + sha1_transform_asm = sha1_transform_ssse3; + +#ifdef SHA1_ENABLE_AVX_SUPPORT + /* allow AVX to override SSSE3, it's a little faster */ + if (avx_usable()) + sha1_transform_asm = sha1_transform_avx; +#endif + + if (sha1_transform_asm) { + pr_info("Using %s optimized SHA-1 implementation\n", + sha1_transform_asm == sha1_transform_ssse3 ? "SSSE3" + : "AVX"); + return crypto_register_shash(&alg); + } + pr_info("Neither AVX nor SSSE3 is available/usable.\n"); + + return -ENODEV; +} + +static void __exit sha1_ssse3_mod_fini(void) +{ + crypto_unregister_shash(&alg); +} + +module_init(sha1_ssse3_mod_init); +module_exit(sha1_ssse3_mod_fini); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("SHA1 Secure Hash Algorithm, Supplemental SSE3 accelerated"); + +MODULE_ALIAS("sha1"); diff --git a/arch/x86/crypto/twofish-i586-asm_32.S b/arch/x86/crypto/twofish-i586-asm_32.S new file mode 100644 index 00000000..658af4bb --- /dev/null +++ b/arch/x86/crypto/twofish-i586-asm_32.S @@ -0,0 +1,335 @@ +/*************************************************************************** +* Copyright (C) 2006 by Joachim Fritschi, * +* * +* This program is free software; you can redistribute it and/or modify * +* it under the terms of the GNU General Public License as published by * +* the Free Software Foundation; either version 2 of the License, or * +* (at your option) any later version. * +* * +* This program is distributed in the hope that it will be useful, * +* but WITHOUT ANY WARRANTY; without even the implied warranty of * +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * +* GNU General Public License for more details. * +* * +* You should have received a copy of the GNU General Public License * +* along with this program; if not, write to the * +* Free Software Foundation, Inc., * +* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * +***************************************************************************/ + +.file "twofish-i586-asm.S" +.text + +#include + +/* return address at 0 */ + +#define in_blk 12 /* input byte array address parameter*/ +#define out_blk 8 /* output byte array address parameter*/ +#define ctx 4 /* Twofish context structure */ + +#define a_offset 0 +#define b_offset 4 +#define c_offset 8 +#define d_offset 12 + +/* Structure of the crypto context struct*/ + +#define s0 0 /* S0 Array 256 Words each */ +#define s1 1024 /* S1 Array */ +#define s2 2048 /* S2 Array */ +#define s3 3072 /* S3 Array */ +#define w 4096 /* 8 whitening keys (word) */ +#define k 4128 /* key 1-32 ( word ) */ + +/* define a few register aliases to allow macro substitution */ + +#define R0D %eax +#define R0B %al +#define R0H %ah + +#define R1D %ebx +#define R1B %bl +#define R1H %bh + +#define R2D %ecx +#define R2B %cl +#define R2H %ch + +#define R3D %edx +#define R3B %dl +#define R3H %dh + + +/* performs input whitening */ +#define input_whitening(src,context,offset)\ + xor w+offset(context), src; + +/* performs input whitening */ +#define output_whitening(src,context,offset)\ + xor w+16+offset(context), src; + +/* + * a input register containing a (rotated 16) + * b input register containing b + * c input register containing c + * d input register containing d (already rol $1) + * operations on a and b are interleaved to increase performance + */ +#define encrypt_round(a,b,c,d,round)\ + push d ## D;\ + movzx b ## B, %edi;\ + mov s1(%ebp,%edi,4),d ## D;\ + movzx a ## B, %edi;\ + mov s2(%ebp,%edi,4),%esi;\ + movzx b ## H, %edi;\ + ror $16, b ## D;\ + xor s2(%ebp,%edi,4),d ## D;\ + movzx a ## H, %edi;\ + ror $16, a ## D;\ + xor s3(%ebp,%edi,4),%esi;\ + movzx b ## B, %edi;\ + xor s3(%ebp,%edi,4),d ## D;\ + movzx a ## B, %edi;\ + xor (%ebp,%edi,4), %esi;\ + movzx b ## H, %edi;\ + ror $15, b ## D;\ + xor (%ebp,%edi,4), d ## D;\ + movzx a ## H, %edi;\ + xor s1(%ebp,%edi,4),%esi;\ + pop %edi;\ + add d ## D, %esi;\ + add %esi, d ## D;\ + add k+round(%ebp), %esi;\ + xor %esi, c ## D;\ + rol $15, c ## D;\ + add k+4+round(%ebp),d ## D;\ + xor %edi, d ## D; + +/* + * a input register containing a (rotated 16) + * b input register containing b + * c input register containing c + * d input register containing d (already rol $1) + * operations on a and b are interleaved to increase performance + * last round has different rotations for the output preparation + */ +#define encrypt_last_round(a,b,c,d,round)\ + push d ## D;\ + movzx b ## B, %edi;\ + mov s1(%ebp,%edi,4),d ## D;\ + movzx a ## B, %edi;\ + mov s2(%ebp,%edi,4),%esi;\ + movzx b ## H, %edi;\ + ror $16, b ## D;\ + xor s2(%ebp,%edi,4),d ## D;\ + movzx a ## H, %edi;\ + ror $16, a ## D;\ + xor s3(%ebp,%edi,4),%esi;\ + movzx b ## B, %edi;\ + xor s3(%ebp,%edi,4),d ## D;\ + movzx a ## B, %edi;\ + xor (%ebp,%edi,4), %esi;\ + movzx b ## H, %edi;\ + ror $16, b ## D;\ + xor (%ebp,%edi,4), d ## D;\ + movzx a ## H, %edi;\ + xor s1(%ebp,%edi,4),%esi;\ + pop %edi;\ + add d ## D, %esi;\ + add %esi, d ## D;\ + add k+round(%ebp), %esi;\ + xor %esi, c ## D;\ + ror $1, c ## D;\ + add k+4+round(%ebp),d ## D;\ + xor %edi, d ## D; + +/* + * a input register containing a + * b input register containing b (rotated 16) + * c input register containing c + * d input register containing d (already rol $1) + * operations on a and b are interleaved to increase performance + */ +#define decrypt_round(a,b,c,d,round)\ + push c ## D;\ + movzx a ## B, %edi;\ + mov (%ebp,%edi,4), c ## D;\ + movzx b ## B, %edi;\ + mov s3(%ebp,%edi,4),%esi;\ + movzx a ## H, %edi;\ + ror $16, a ## D;\ + xor s1(%ebp,%edi,4),c ## D;\ + movzx b ## H, %edi;\ + ror $16, b ## D;\ + xor (%ebp,%edi,4), %esi;\ + movzx a ## B, %edi;\ + xor s2(%ebp,%edi,4),c ## D;\ + movzx b ## B, %edi;\ + xor s1(%ebp,%edi,4),%esi;\ + movzx a ## H, %edi;\ + ror $15, a ## D;\ + xor s3(%ebp,%edi,4),c ## D;\ + movzx b ## H, %edi;\ + xor s2(%ebp,%edi,4),%esi;\ + pop %edi;\ + add %esi, c ## D;\ + add c ## D, %esi;\ + add k+round(%ebp), c ## D;\ + xor %edi, c ## D;\ + add k+4+round(%ebp),%esi;\ + xor %esi, d ## D;\ + rol $15, d ## D; + +/* + * a input register containing a + * b input register containing b (rotated 16) + * c input register containing c + * d input register containing d (already rol $1) + * operations on a and b are interleaved to increase performance + * last round has different rotations for the output preparation + */ +#define decrypt_last_round(a,b,c,d,round)\ + push c ## D;\ + movzx a ## B, %edi;\ + mov (%ebp,%edi,4), c ## D;\ + movzx b ## B, %edi;\ + mov s3(%ebp,%edi,4),%esi;\ + movzx a ## H, %edi;\ + ror $16, a ## D;\ + xor s1(%ebp,%edi,4),c ## D;\ + movzx b ## H, %edi;\ + ror $16, b ## D;\ + xor (%ebp,%edi,4), %esi;\ + movzx a ## B, %edi;\ + xor s2(%ebp,%edi,4),c ## D;\ + movzx b ## B, %edi;\ + xor s1(%ebp,%edi,4),%esi;\ + movzx a ## H, %edi;\ + ror $16, a ## D;\ + xor s3(%ebp,%edi,4),c ## D;\ + movzx b ## H, %edi;\ + xor s2(%ebp,%edi,4),%esi;\ + pop %edi;\ + add %esi, c ## D;\ + add c ## D, %esi;\ + add k+round(%ebp), c ## D;\ + xor %edi, c ## D;\ + add k+4+round(%ebp),%esi;\ + xor %esi, d ## D;\ + ror $1, d ## D; + +.align 4 +.global twofish_enc_blk +.global twofish_dec_blk + +twofish_enc_blk: + push %ebp /* save registers according to calling convention*/ + push %ebx + push %esi + push %edi + + mov ctx + 16(%esp), %ebp /* abuse the base pointer: set new base + * pointer to the ctx address */ + mov in_blk+16(%esp),%edi /* input address in edi */ + + mov (%edi), %eax + mov b_offset(%edi), %ebx + mov c_offset(%edi), %ecx + mov d_offset(%edi), %edx + input_whitening(%eax,%ebp,a_offset) + ror $16, %eax + input_whitening(%ebx,%ebp,b_offset) + input_whitening(%ecx,%ebp,c_offset) + input_whitening(%edx,%ebp,d_offset) + rol $1, %edx + + encrypt_round(R0,R1,R2,R3,0); + encrypt_round(R2,R3,R0,R1,8); + encrypt_round(R0,R1,R2,R3,2*8); + encrypt_round(R2,R3,R0,R1,3*8); + encrypt_round(R0,R1,R2,R3,4*8); + encrypt_round(R2,R3,R0,R1,5*8); + encrypt_round(R0,R1,R2,R3,6*8); + encrypt_round(R2,R3,R0,R1,7*8); + encrypt_round(R0,R1,R2,R3,8*8); + encrypt_round(R2,R3,R0,R1,9*8); + encrypt_round(R0,R1,R2,R3,10*8); + encrypt_round(R2,R3,R0,R1,11*8); + encrypt_round(R0,R1,R2,R3,12*8); + encrypt_round(R2,R3,R0,R1,13*8); + encrypt_round(R0,R1,R2,R3,14*8); + encrypt_last_round(R2,R3,R0,R1,15*8); + + output_whitening(%eax,%ebp,c_offset) + output_whitening(%ebx,%ebp,d_offset) + output_whitening(%ecx,%ebp,a_offset) + output_whitening(%edx,%ebp,b_offset) + mov out_blk+16(%esp),%edi; + mov %eax, c_offset(%edi) + mov %ebx, d_offset(%edi) + mov %ecx, (%edi) + mov %edx, b_offset(%edi) + + pop %edi + pop %esi + pop %ebx + pop %ebp + mov $1, %eax + ret + +twofish_dec_blk: + push %ebp /* save registers according to calling convention*/ + push %ebx + push %esi + push %edi + + + mov ctx + 16(%esp), %ebp /* abuse the base pointer: set new base + * pointer to the ctx address */ + mov in_blk+16(%esp),%edi /* input address in edi */ + + mov (%edi), %eax + mov b_offset(%edi), %ebx + mov c_offset(%edi), %ecx + mov d_offset(%edi), %edx + output_whitening(%eax,%ebp,a_offset) + output_whitening(%ebx,%ebp,b_offset) + ror $16, %ebx + output_whitening(%ecx,%ebp,c_offset) + output_whitening(%edx,%ebp,d_offset) + rol $1, %ecx + + decrypt_round(R0,R1,R2,R3,15*8); + decrypt_round(R2,R3,R0,R1,14*8); + decrypt_round(R0,R1,R2,R3,13*8); + decrypt_round(R2,R3,R0,R1,12*8); + decrypt_round(R0,R1,R2,R3,11*8); + decrypt_round(R2,R3,R0,R1,10*8); + decrypt_round(R0,R1,R2,R3,9*8); + decrypt_round(R2,R3,R0,R1,8*8); + decrypt_round(R0,R1,R2,R3,7*8); + decrypt_round(R2,R3,R0,R1,6*8); + decrypt_round(R0,R1,R2,R3,5*8); + decrypt_round(R2,R3,R0,R1,4*8); + decrypt_round(R0,R1,R2,R3,3*8); + decrypt_round(R2,R3,R0,R1,2*8); + decrypt_round(R0,R1,R2,R3,1*8); + decrypt_last_round(R2,R3,R0,R1,0); + + input_whitening(%eax,%ebp,c_offset) + input_whitening(%ebx,%ebp,d_offset) + input_whitening(%ecx,%ebp,a_offset) + input_whitening(%edx,%ebp,b_offset) + mov out_blk+16(%esp),%edi; + mov %eax, c_offset(%edi) + mov %ebx, d_offset(%edi) + mov %ecx, (%edi) + mov %edx, b_offset(%edi) + + pop %edi + pop %esi + pop %ebx + pop %ebp + mov $1, %eax + ret diff --git a/arch/x86/crypto/twofish-x86_64-asm_64-3way.S b/arch/x86/crypto/twofish-x86_64-asm_64-3way.S new file mode 100644 index 00000000..5b012a2c --- /dev/null +++ b/arch/x86/crypto/twofish-x86_64-asm_64-3way.S @@ -0,0 +1,316 @@ +/* + * Twofish Cipher 3-way parallel algorithm (x86_64) + * + * Copyright (C) 2011 Jussi Kivilinna + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 + * USA + * + */ + +.file "twofish-x86_64-asm-3way.S" +.text + +/* structure of crypto context */ +#define s0 0 +#define s1 1024 +#define s2 2048 +#define s3 3072 +#define w 4096 +#define k 4128 + +/********************************************************************** + 3-way twofish + **********************************************************************/ +#define CTX %rdi +#define RIO %rdx + +#define RAB0 %rax +#define RAB1 %rbx +#define RAB2 %rcx + +#define RAB0d %eax +#define RAB1d %ebx +#define RAB2d %ecx + +#define RAB0bh %ah +#define RAB1bh %bh +#define RAB2bh %ch + +#define RAB0bl %al +#define RAB1bl %bl +#define RAB2bl %cl + +#define RCD0 %r8 +#define RCD1 %r9 +#define RCD2 %r10 + +#define RCD0d %r8d +#define RCD1d %r9d +#define RCD2d %r10d + +#define RX0 %rbp +#define RX1 %r11 +#define RX2 %r12 + +#define RX0d %ebp +#define RX1d %r11d +#define RX2d %r12d + +#define RY0 %r13 +#define RY1 %r14 +#define RY2 %r15 + +#define RY0d %r13d +#define RY1d %r14d +#define RY2d %r15d + +#define RT0 %rdx +#define RT1 %rsi + +#define RT0d %edx +#define RT1d %esi + +#define do16bit_ror(rot, op1, op2, T0, T1, tmp1, tmp2, ab, dst) \ + movzbl ab ## bl, tmp2 ## d; \ + movzbl ab ## bh, tmp1 ## d; \ + rorq $(rot), ab; \ + op1##l T0(CTX, tmp2, 4), dst ## d; \ + op2##l T1(CTX, tmp1, 4), dst ## d; + +/* + * Combined G1 & G2 function. Reordered with help of rotates to have moves + * at begining. + */ +#define g1g2_3(ab, cd, Tx0, Tx1, Tx2, Tx3, Ty0, Ty1, Ty2, Ty3, x, y) \ + /* G1,1 && G2,1 */ \ + do16bit_ror(32, mov, xor, Tx0, Tx1, RT0, x ## 0, ab ## 0, x ## 0); \ + do16bit_ror(48, mov, xor, Ty1, Ty2, RT0, y ## 0, ab ## 0, y ## 0); \ + \ + do16bit_ror(32, mov, xor, Tx0, Tx1, RT0, x ## 1, ab ## 1, x ## 1); \ + do16bit_ror(48, mov, xor, Ty1, Ty2, RT0, y ## 1, ab ## 1, y ## 1); \ + \ + do16bit_ror(32, mov, xor, Tx0, Tx1, RT0, x ## 2, ab ## 2, x ## 2); \ + do16bit_ror(48, mov, xor, Ty1, Ty2, RT0, y ## 2, ab ## 2, y ## 2); \ + \ + /* G1,2 && G2,2 */ \ + do16bit_ror(32, xor, xor, Tx2, Tx3, RT0, RT1, ab ## 0, x ## 0); \ + do16bit_ror(16, xor, xor, Ty3, Ty0, RT0, RT1, ab ## 0, y ## 0); \ + xchgq cd ## 0, ab ## 0; \ + \ + do16bit_ror(32, xor, xor, Tx2, Tx3, RT0, RT1, ab ## 1, x ## 1); \ + do16bit_ror(16, xor, xor, Ty3, Ty0, RT0, RT1, ab ## 1, y ## 1); \ + xchgq cd ## 1, ab ## 1; \ + \ + do16bit_ror(32, xor, xor, Tx2, Tx3, RT0, RT1, ab ## 2, x ## 2); \ + do16bit_ror(16, xor, xor, Ty3, Ty0, RT0, RT1, ab ## 2, y ## 2); \ + xchgq cd ## 2, ab ## 2; + +#define enc_round_end(ab, x, y, n) \ + addl y ## d, x ## d; \ + addl x ## d, y ## d; \ + addl k+4*(2*(n))(CTX), x ## d; \ + xorl ab ## d, x ## d; \ + addl k+4*(2*(n)+1)(CTX), y ## d; \ + shrq $32, ab; \ + roll $1, ab ## d; \ + xorl y ## d, ab ## d; \ + shlq $32, ab; \ + rorl $1, x ## d; \ + orq x, ab; + +#define dec_round_end(ba, x, y, n) \ + addl y ## d, x ## d; \ + addl x ## d, y ## d; \ + addl k+4*(2*(n))(CTX), x ## d; \ + addl k+4*(2*(n)+1)(CTX), y ## d; \ + xorl ba ## d, y ## d; \ + shrq $32, ba; \ + roll $1, ba ## d; \ + xorl x ## d, ba ## d; \ + shlq $32, ba; \ + rorl $1, y ## d; \ + orq y, ba; + +#define encrypt_round3(ab, cd, n) \ + g1g2_3(ab, cd, s0, s1, s2, s3, s0, s1, s2, s3, RX, RY); \ + \ + enc_round_end(ab ## 0, RX0, RY0, n); \ + enc_round_end(ab ## 1, RX1, RY1, n); \ + enc_round_end(ab ## 2, RX2, RY2, n); + +#define decrypt_round3(ba, dc, n) \ + g1g2_3(ba, dc, s1, s2, s3, s0, s3, s0, s1, s2, RY, RX); \ + \ + dec_round_end(ba ## 0, RX0, RY0, n); \ + dec_round_end(ba ## 1, RX1, RY1, n); \ + dec_round_end(ba ## 2, RX2, RY2, n); + +#define encrypt_cycle3(ab, cd, n) \ + encrypt_round3(ab, cd, n*2); \ + encrypt_round3(ab, cd, (n*2)+1); + +#define decrypt_cycle3(ba, dc, n) \ + decrypt_round3(ba, dc, (n*2)+1); \ + decrypt_round3(ba, dc, (n*2)); + +#define inpack3(in, n, xy, m) \ + movq 4*(n)(in), xy ## 0; \ + xorq w+4*m(CTX), xy ## 0; \ + \ + movq 4*(4+(n))(in), xy ## 1; \ + xorq w+4*m(CTX), xy ## 1; \ + \ + movq 4*(8+(n))(in), xy ## 2; \ + xorq w+4*m(CTX), xy ## 2; + +#define outunpack3(op, out, n, xy, m) \ + xorq w+4*m(CTX), xy ## 0; \ + op ## q xy ## 0, 4*(n)(out); \ + \ + xorq w+4*m(CTX), xy ## 1; \ + op ## q xy ## 1, 4*(4+(n))(out); \ + \ + xorq w+4*m(CTX), xy ## 2; \ + op ## q xy ## 2, 4*(8+(n))(out); + +#define inpack_enc3() \ + inpack3(RIO, 0, RAB, 0); \ + inpack3(RIO, 2, RCD, 2); + +#define outunpack_enc3(op) \ + outunpack3(op, RIO, 2, RAB, 6); \ + outunpack3(op, RIO, 0, RCD, 4); + +#define inpack_dec3() \ + inpack3(RIO, 0, RAB, 4); \ + rorq $32, RAB0; \ + rorq $32, RAB1; \ + rorq $32, RAB2; \ + inpack3(RIO, 2, RCD, 6); \ + rorq $32, RCD0; \ + rorq $32, RCD1; \ + rorq $32, RCD2; + +#define outunpack_dec3() \ + rorq $32, RCD0; \ + rorq $32, RCD1; \ + rorq $32, RCD2; \ + outunpack3(mov, RIO, 0, RCD, 0); \ + rorq $32, RAB0; \ + rorq $32, RAB1; \ + rorq $32, RAB2; \ + outunpack3(mov, RIO, 2, RAB, 2); + +.align 8 +.global __twofish_enc_blk_3way +.type __twofish_enc_blk_3way,@function; + +__twofish_enc_blk_3way: + /* input: + * %rdi: ctx, CTX + * %rsi: dst + * %rdx: src, RIO + * %rcx: bool, if true: xor output + */ + pushq %r15; + pushq %r14; + pushq %r13; + pushq %r12; + pushq %rbp; + pushq %rbx; + + pushq %rcx; /* bool xor */ + pushq %rsi; /* dst */ + + inpack_enc3(); + + encrypt_cycle3(RAB, RCD, 0); + encrypt_cycle3(RAB, RCD, 1); + encrypt_cycle3(RAB, RCD, 2); + encrypt_cycle3(RAB, RCD, 3); + encrypt_cycle3(RAB, RCD, 4); + encrypt_cycle3(RAB, RCD, 5); + encrypt_cycle3(RAB, RCD, 6); + encrypt_cycle3(RAB, RCD, 7); + + popq RIO; /* dst */ + popq %rbp; /* bool xor */ + + testb %bpl, %bpl; + jnz __enc_xor3; + + outunpack_enc3(mov); + + popq %rbx; + popq %rbp; + popq %r12; + popq %r13; + popq %r14; + popq %r15; + ret; + +__enc_xor3: + outunpack_enc3(xor); + + popq %rbx; + popq %rbp; + popq %r12; + popq %r13; + popq %r14; + popq %r15; + ret; + +.global twofish_dec_blk_3way +.type twofish_dec_blk_3way,@function; + +twofish_dec_blk_3way: + /* input: + * %rdi: ctx, CTX + * %rsi: dst + * %rdx: src, RIO + */ + pushq %r15; + pushq %r14; + pushq %r13; + pushq %r12; + pushq %rbp; + pushq %rbx; + + pushq %rsi; /* dst */ + + inpack_dec3(); + + decrypt_cycle3(RAB, RCD, 7); + decrypt_cycle3(RAB, RCD, 6); + decrypt_cycle3(RAB, RCD, 5); + decrypt_cycle3(RAB, RCD, 4); + decrypt_cycle3(RAB, RCD, 3); + decrypt_cycle3(RAB, RCD, 2); + decrypt_cycle3(RAB, RCD, 1); + decrypt_cycle3(RAB, RCD, 0); + + popq RIO; /* dst */ + + outunpack_dec3(); + + popq %rbx; + popq %rbp; + popq %r12; + popq %r13; + popq %r14; + popq %r15; + ret; + diff --git a/arch/x86/crypto/twofish-x86_64-asm_64.S b/arch/x86/crypto/twofish-x86_64-asm_64.S new file mode 100644 index 00000000..7bcf3fcc --- /dev/null +++ b/arch/x86/crypto/twofish-x86_64-asm_64.S @@ -0,0 +1,322 @@ +/*************************************************************************** +* Copyright (C) 2006 by Joachim Fritschi, * +* * +* This program is free software; you can redistribute it and/or modify * +* it under the terms of the GNU General Public License as published by * +* the Free Software Foundation; either version 2 of the License, or * +* (at your option) any later version. * +* * +* This program is distributed in the hope that it will be useful, * +* but WITHOUT ANY WARRANTY; without even the implied warranty of * +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * +* GNU General Public License for more details. * +* * +* You should have received a copy of the GNU General Public License * +* along with this program; if not, write to the * +* Free Software Foundation, Inc., * +* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * +***************************************************************************/ + +.file "twofish-x86_64-asm.S" +.text + +#include + +#define a_offset 0 +#define b_offset 4 +#define c_offset 8 +#define d_offset 12 + +/* Structure of the crypto context struct*/ + +#define s0 0 /* S0 Array 256 Words each */ +#define s1 1024 /* S1 Array */ +#define s2 2048 /* S2 Array */ +#define s3 3072 /* S3 Array */ +#define w 4096 /* 8 whitening keys (word) */ +#define k 4128 /* key 1-32 ( word ) */ + +/* define a few register aliases to allow macro substitution */ + +#define R0 %rax +#define R0D %eax +#define R0B %al +#define R0H %ah + +#define R1 %rbx +#define R1D %ebx +#define R1B %bl +#define R1H %bh + +#define R2 %rcx +#define R2D %ecx +#define R2B %cl +#define R2H %ch + +#define R3 %rdx +#define R3D %edx +#define R3B %dl +#define R3H %dh + + +/* performs input whitening */ +#define input_whitening(src,context,offset)\ + xor w+offset(context), src; + +/* performs input whitening */ +#define output_whitening(src,context,offset)\ + xor w+16+offset(context), src; + + +/* + * a input register containing a (rotated 16) + * b input register containing b + * c input register containing c + * d input register containing d (already rol $1) + * operations on a and b are interleaved to increase performance + */ +#define encrypt_round(a,b,c,d,round)\ + movzx b ## B, %edi;\ + mov s1(%r11,%rdi,4),%r8d;\ + movzx a ## B, %edi;\ + mov s2(%r11,%rdi,4),%r9d;\ + movzx b ## H, %edi;\ + ror $16, b ## D;\ + xor s2(%r11,%rdi,4),%r8d;\ + movzx a ## H, %edi;\ + ror $16, a ## D;\ + xor s3(%r11,%rdi,4),%r9d;\ + movzx b ## B, %edi;\ + xor s3(%r11,%rdi,4),%r8d;\ + movzx a ## B, %edi;\ + xor (%r11,%rdi,4), %r9d;\ + movzx b ## H, %edi;\ + ror $15, b ## D;\ + xor (%r11,%rdi,4), %r8d;\ + movzx a ## H, %edi;\ + xor s1(%r11,%rdi,4),%r9d;\ + add %r8d, %r9d;\ + add %r9d, %r8d;\ + add k+round(%r11), %r9d;\ + xor %r9d, c ## D;\ + rol $15, c ## D;\ + add k+4+round(%r11),%r8d;\ + xor %r8d, d ## D; + +/* + * a input register containing a(rotated 16) + * b input register containing b + * c input register containing c + * d input register containing d (already rol $1) + * operations on a and b are interleaved to increase performance + * during the round a and b are prepared for the output whitening + */ +#define encrypt_last_round(a,b,c,d,round)\ + mov b ## D, %r10d;\ + shl $32, %r10;\ + movzx b ## B, %edi;\ + mov s1(%r11,%rdi,4),%r8d;\ + movzx a ## B, %edi;\ + mov s2(%r11,%rdi,4),%r9d;\ + movzx b ## H, %edi;\ + ror $16, b ## D;\ + xor s2(%r11,%rdi,4),%r8d;\ + movzx a ## H, %edi;\ + ror $16, a ## D;\ + xor s3(%r11,%rdi,4),%r9d;\ + movzx b ## B, %edi;\ + xor s3(%r11,%rdi,4),%r8d;\ + movzx a ## B, %edi;\ + xor (%r11,%rdi,4), %r9d;\ + xor a, %r10;\ + movzx b ## H, %edi;\ + xor (%r11,%rdi,4), %r8d;\ + movzx a ## H, %edi;\ + xor s1(%r11,%rdi,4),%r9d;\ + add %r8d, %r9d;\ + add %r9d, %r8d;\ + add k+round(%r11), %r9d;\ + xor %r9d, c ## D;\ + ror $1, c ## D;\ + add k+4+round(%r11),%r8d;\ + xor %r8d, d ## D + +/* + * a input register containing a + * b input register containing b (rotated 16) + * c input register containing c (already rol $1) + * d input register containing d + * operations on a and b are interleaved to increase performance + */ +#define decrypt_round(a,b,c,d,round)\ + movzx a ## B, %edi;\ + mov (%r11,%rdi,4), %r9d;\ + movzx b ## B, %edi;\ + mov s3(%r11,%rdi,4),%r8d;\ + movzx a ## H, %edi;\ + ror $16, a ## D;\ + xor s1(%r11,%rdi,4),%r9d;\ + movzx b ## H, %edi;\ + ror $16, b ## D;\ + xor (%r11,%rdi,4), %r8d;\ + movzx a ## B, %edi;\ + xor s2(%r11,%rdi,4),%r9d;\ + movzx b ## B, %edi;\ + xor s1(%r11,%rdi,4),%r8d;\ + movzx a ## H, %edi;\ + ror $15, a ## D;\ + xor s3(%r11,%rdi,4),%r9d;\ + movzx b ## H, %edi;\ + xor s2(%r11,%rdi,4),%r8d;\ + add %r8d, %r9d;\ + add %r9d, %r8d;\ + add k+round(%r11), %r9d;\ + xor %r9d, c ## D;\ + add k+4+round(%r11),%r8d;\ + xor %r8d, d ## D;\ + rol $15, d ## D; + +/* + * a input register containing a + * b input register containing b + * c input register containing c (already rol $1) + * d input register containing d + * operations on a and b are interleaved to increase performance + * during the round a and b are prepared for the output whitening + */ +#define decrypt_last_round(a,b,c,d,round)\ + movzx a ## B, %edi;\ + mov (%r11,%rdi,4), %r9d;\ + movzx b ## B, %edi;\ + mov s3(%r11,%rdi,4),%r8d;\ + movzx b ## H, %edi;\ + ror $16, b ## D;\ + xor (%r11,%rdi,4), %r8d;\ + movzx a ## H, %edi;\ + mov b ## D, %r10d;\ + shl $32, %r10;\ + xor a, %r10;\ + ror $16, a ## D;\ + xor s1(%r11,%rdi,4),%r9d;\ + movzx b ## B, %edi;\ + xor s1(%r11,%rdi,4),%r8d;\ + movzx a ## B, %edi;\ + xor s2(%r11,%rdi,4),%r9d;\ + movzx b ## H, %edi;\ + xor s2(%r11,%rdi,4),%r8d;\ + movzx a ## H, %edi;\ + xor s3(%r11,%rdi,4),%r9d;\ + add %r8d, %r9d;\ + add %r9d, %r8d;\ + add k+round(%r11), %r9d;\ + xor %r9d, c ## D;\ + add k+4+round(%r11),%r8d;\ + xor %r8d, d ## D;\ + ror $1, d ## D; + +.align 8 +.global twofish_enc_blk +.global twofish_dec_blk + +twofish_enc_blk: + pushq R1 + + /* %rdi contains the ctx address */ + /* %rsi contains the output address */ + /* %rdx contains the input address */ + /* ctx address is moved to free one non-rex register + as target for the 8bit high operations */ + mov %rdi, %r11 + + movq (R3), R1 + movq 8(R3), R3 + input_whitening(R1,%r11,a_offset) + input_whitening(R3,%r11,c_offset) + mov R1D, R0D + rol $16, R0D + shr $32, R1 + mov R3D, R2D + shr $32, R3 + rol $1, R3D + + encrypt_round(R0,R1,R2,R3,0); + encrypt_round(R2,R3,R0,R1,8); + encrypt_round(R0,R1,R2,R3,2*8); + encrypt_round(R2,R3,R0,R1,3*8); + encrypt_round(R0,R1,R2,R3,4*8); + encrypt_round(R2,R3,R0,R1,5*8); + encrypt_round(R0,R1,R2,R3,6*8); + encrypt_round(R2,R3,R0,R1,7*8); + encrypt_round(R0,R1,R2,R3,8*8); + encrypt_round(R2,R3,R0,R1,9*8); + encrypt_round(R0,R1,R2,R3,10*8); + encrypt_round(R2,R3,R0,R1,11*8); + encrypt_round(R0,R1,R2,R3,12*8); + encrypt_round(R2,R3,R0,R1,13*8); + encrypt_round(R0,R1,R2,R3,14*8); + encrypt_last_round(R2,R3,R0,R1,15*8); + + + output_whitening(%r10,%r11,a_offset) + movq %r10, (%rsi) + + shl $32, R1 + xor R0, R1 + + output_whitening(R1,%r11,c_offset) + movq R1, 8(%rsi) + + popq R1 + movq $1,%rax + ret + +twofish_dec_blk: + pushq R1 + + /* %rdi contains the ctx address */ + /* %rsi contains the output address */ + /* %rdx contains the input address */ + /* ctx address is moved to free one non-rex register + as target for the 8bit high operations */ + mov %rdi, %r11 + + movq (R3), R1 + movq 8(R3), R3 + output_whitening(R1,%r11,a_offset) + output_whitening(R3,%r11,c_offset) + mov R1D, R0D + shr $32, R1 + rol $16, R1D + mov R3D, R2D + shr $32, R3 + rol $1, R2D + + decrypt_round(R0,R1,R2,R3,15*8); + decrypt_round(R2,R3,R0,R1,14*8); + decrypt_round(R0,R1,R2,R3,13*8); + decrypt_round(R2,R3,R0,R1,12*8); + decrypt_round(R0,R1,R2,R3,11*8); + decrypt_round(R2,R3,R0,R1,10*8); + decrypt_round(R0,R1,R2,R3,9*8); + decrypt_round(R2,R3,R0,R1,8*8); + decrypt_round(R0,R1,R2,R3,7*8); + decrypt_round(R2,R3,R0,R1,6*8); + decrypt_round(R0,R1,R2,R3,5*8); + decrypt_round(R2,R3,R0,R1,4*8); + decrypt_round(R0,R1,R2,R3,3*8); + decrypt_round(R2,R3,R0,R1,2*8); + decrypt_round(R0,R1,R2,R3,1*8); + decrypt_last_round(R2,R3,R0,R1,0); + + input_whitening(%r10,%r11,a_offset) + movq %r10, (%rsi) + + shl $32, R1 + xor R0, R1 + + input_whitening(R1,%r11,c_offset) + movq R1, 8(%rsi) + + popq R1 + movq $1,%rax + ret diff --git a/arch/x86/crypto/twofish_glue.c b/arch/x86/crypto/twofish_glue.c new file mode 100644 index 00000000..359ae084 --- /dev/null +++ b/arch/x86/crypto/twofish_glue.c @@ -0,0 +1,101 @@ +/* + * Glue Code for assembler optimized version of TWOFISH + * + * Originally Twofish for GPG + * By Matthew Skala , July 26, 1998 + * 256-bit key length added March 20, 1999 + * Some modifications to reduce the text size by Werner Koch, April, 1998 + * Ported to the kerneli patch by Marc Mutz + * Ported to CryptoAPI by Colin Slater + * + * The original author has disclaimed all copyright interest in this + * code and thus put it in the public domain. The subsequent authors + * have put this under the GNU General Public License. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 + * USA + * + * This code is a "clean room" implementation, written from the paper + * _Twofish: A 128-Bit Block Cipher_ by Bruce Schneier, John Kelsey, + * Doug Whiting, David Wagner, Chris Hall, and Niels Ferguson, available + * through http://www.counterpane.com/twofish.html + * + * For background information on multiplication in finite fields, used for + * the matrix operations in the key schedule, see the book _Contemporary + * Abstract Algebra_ by Joseph A. Gallian, especially chapter 22 in the + * Third Edition. + */ + +#include +#include +#include +#include +#include + +asmlinkage void twofish_enc_blk(struct twofish_ctx *ctx, u8 *dst, + const u8 *src); +EXPORT_SYMBOL_GPL(twofish_enc_blk); +asmlinkage void twofish_dec_blk(struct twofish_ctx *ctx, u8 *dst, + const u8 *src); +EXPORT_SYMBOL_GPL(twofish_dec_blk); + +static void twofish_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) +{ + twofish_enc_blk(crypto_tfm_ctx(tfm), dst, src); +} + +static void twofish_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) +{ + twofish_dec_blk(crypto_tfm_ctx(tfm), dst, src); +} + +static struct crypto_alg alg = { + .cra_name = "twofish", + .cra_driver_name = "twofish-asm", + .cra_priority = 200, + .cra_flags = CRYPTO_ALG_TYPE_CIPHER, + .cra_blocksize = TF_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct twofish_ctx), + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(alg.cra_list), + .cra_u = { + .cipher = { + .cia_min_keysize = TF_MIN_KEY_SIZE, + .cia_max_keysize = TF_MAX_KEY_SIZE, + .cia_setkey = twofish_setkey, + .cia_encrypt = twofish_encrypt, + .cia_decrypt = twofish_decrypt + } + } +}; + +static int __init init(void) +{ + return crypto_register_alg(&alg); +} + +static void __exit fini(void) +{ + crypto_unregister_alg(&alg); +} + +module_init(init); +module_exit(fini); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION ("Twofish Cipher Algorithm, asm optimized"); +MODULE_ALIAS("twofish"); +MODULE_ALIAS("twofish-asm"); diff --git a/arch/x86/crypto/twofish_glue_3way.c b/arch/x86/crypto/twofish_glue_3way.c new file mode 100644 index 00000000..922ab24c --- /dev/null +++ b/arch/x86/crypto/twofish_glue_3way.c @@ -0,0 +1,695 @@ +/* + * Glue Code for 3-way parallel assembler optimized version of Twofish + * + * Copyright (c) 2011 Jussi Kivilinna + * + * CBC & ECB parts based on code (crypto/cbc.c,ecb.c) by: + * Copyright (c) 2006 Herbert Xu + * CTR part based on code (crypto/ctr.c) by: + * (C) Copyright IBM Corp. 2007 - Joy Latten + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 + * USA + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* regular block cipher functions from twofish_x86_64 module */ +asmlinkage void twofish_enc_blk(struct twofish_ctx *ctx, u8 *dst, + const u8 *src); +asmlinkage void twofish_dec_blk(struct twofish_ctx *ctx, u8 *dst, + const u8 *src); + +/* 3-way parallel cipher functions */ +asmlinkage void __twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst, + const u8 *src, bool xor); +asmlinkage void twofish_dec_blk_3way(struct twofish_ctx *ctx, u8 *dst, + const u8 *src); + +static inline void twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst, + const u8 *src) +{ + __twofish_enc_blk_3way(ctx, dst, src, false); +} + +static inline void twofish_enc_blk_xor_3way(struct twofish_ctx *ctx, u8 *dst, + const u8 *src) +{ + __twofish_enc_blk_3way(ctx, dst, src, true); +} + +static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk, + void (*fn)(struct twofish_ctx *, u8 *, const u8 *), + void (*fn_3way)(struct twofish_ctx *, u8 *, const u8 *)) +{ + struct twofish_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + unsigned int bsize = TF_BLOCK_SIZE; + unsigned int nbytes; + int err; + + err = blkcipher_walk_virt(desc, walk); + + while ((nbytes = walk->nbytes)) { + u8 *wsrc = walk->src.virt.addr; + u8 *wdst = walk->dst.virt.addr; + + /* Process three block batch */ + if (nbytes >= bsize * 3) { + do { + fn_3way(ctx, wdst, wsrc); + + wsrc += bsize * 3; + wdst += bsize * 3; + nbytes -= bsize * 3; + } while (nbytes >= bsize * 3); + + if (nbytes < bsize) + goto done; + } + + /* Handle leftovers */ + do { + fn(ctx, wdst, wsrc); + + wsrc += bsize; + wdst += bsize; + nbytes -= bsize; + } while (nbytes >= bsize); + +done: + err = blkcipher_walk_done(desc, walk, nbytes); + } + + return err; +} + +static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct blkcipher_walk walk; + + blkcipher_walk_init(&walk, dst, src, nbytes); + return ecb_crypt(desc, &walk, twofish_enc_blk, twofish_enc_blk_3way); +} + +static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct blkcipher_walk walk; + + blkcipher_walk_init(&walk, dst, src, nbytes); + return ecb_crypt(desc, &walk, twofish_dec_blk, twofish_dec_blk_3way); +} + +static unsigned int __cbc_encrypt(struct blkcipher_desc *desc, + struct blkcipher_walk *walk) +{ + struct twofish_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + unsigned int bsize = TF_BLOCK_SIZE; + unsigned int nbytes = walk->nbytes; + u128 *src = (u128 *)walk->src.virt.addr; + u128 *dst = (u128 *)walk->dst.virt.addr; + u128 *iv = (u128 *)walk->iv; + + do { + u128_xor(dst, src, iv); + twofish_enc_blk(ctx, (u8 *)dst, (u8 *)dst); + iv = dst; + + src += 1; + dst += 1; + nbytes -= bsize; + } while (nbytes >= bsize); + + u128_xor((u128 *)walk->iv, (u128 *)walk->iv, iv); + return nbytes; +} + +static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct blkcipher_walk walk; + int err; + + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt(desc, &walk); + + while ((nbytes = walk.nbytes)) { + nbytes = __cbc_encrypt(desc, &walk); + err = blkcipher_walk_done(desc, &walk, nbytes); + } + + return err; +} + +static unsigned int __cbc_decrypt(struct blkcipher_desc *desc, + struct blkcipher_walk *walk) +{ + struct twofish_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + unsigned int bsize = TF_BLOCK_SIZE; + unsigned int nbytes = walk->nbytes; + u128 *src = (u128 *)walk->src.virt.addr; + u128 *dst = (u128 *)walk->dst.virt.addr; + u128 ivs[3 - 1]; + u128 last_iv; + + /* Start of the last block. */ + src += nbytes / bsize - 1; + dst += nbytes / bsize - 1; + + last_iv = *src; + + /* Process three block batch */ + if (nbytes >= bsize * 3) { + do { + nbytes -= bsize * (3 - 1); + src -= 3 - 1; + dst -= 3 - 1; + + ivs[0] = src[0]; + ivs[1] = src[1]; + + twofish_dec_blk_3way(ctx, (u8 *)dst, (u8 *)src); + + u128_xor(dst + 1, dst + 1, ivs + 0); + u128_xor(dst + 2, dst + 2, ivs + 1); + + nbytes -= bsize; + if (nbytes < bsize) + goto done; + + u128_xor(dst, dst, src - 1); + src -= 1; + dst -= 1; + } while (nbytes >= bsize * 3); + + if (nbytes < bsize) + goto done; + } + + /* Handle leftovers */ + for (;;) { + twofish_dec_blk(ctx, (u8 *)dst, (u8 *)src); + + nbytes -= bsize; + if (nbytes < bsize) + break; + + u128_xor(dst, dst, src - 1); + src -= 1; + dst -= 1; + } + +done: + u128_xor(dst, dst, (u128 *)walk->iv); + *(u128 *)walk->iv = last_iv; + + return nbytes; +} + +static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct blkcipher_walk walk; + int err; + + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt(desc, &walk); + + while ((nbytes = walk.nbytes)) { + nbytes = __cbc_decrypt(desc, &walk); + err = blkcipher_walk_done(desc, &walk, nbytes); + } + + return err; +} + +static inline void u128_to_be128(be128 *dst, const u128 *src) +{ + dst->a = cpu_to_be64(src->a); + dst->b = cpu_to_be64(src->b); +} + +static inline void be128_to_u128(u128 *dst, const be128 *src) +{ + dst->a = be64_to_cpu(src->a); + dst->b = be64_to_cpu(src->b); +} + +static inline void u128_inc(u128 *i) +{ + i->b++; + if (!i->b) + i->a++; +} + +static void ctr_crypt_final(struct blkcipher_desc *desc, + struct blkcipher_walk *walk) +{ + struct twofish_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + u8 *ctrblk = walk->iv; + u8 keystream[TF_BLOCK_SIZE]; + u8 *src = walk->src.virt.addr; + u8 *dst = walk->dst.virt.addr; + unsigned int nbytes = walk->nbytes; + + twofish_enc_blk(ctx, keystream, ctrblk); + crypto_xor(keystream, src, nbytes); + memcpy(dst, keystream, nbytes); + + crypto_inc(ctrblk, TF_BLOCK_SIZE); +} + +static unsigned int __ctr_crypt(struct blkcipher_desc *desc, + struct blkcipher_walk *walk) +{ + struct twofish_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + unsigned int bsize = TF_BLOCK_SIZE; + unsigned int nbytes = walk->nbytes; + u128 *src = (u128 *)walk->src.virt.addr; + u128 *dst = (u128 *)walk->dst.virt.addr; + u128 ctrblk; + be128 ctrblocks[3]; + + be128_to_u128(&ctrblk, (be128 *)walk->iv); + + /* Process three block batch */ + if (nbytes >= bsize * 3) { + do { + if (dst != src) { + dst[0] = src[0]; + dst[1] = src[1]; + dst[2] = src[2]; + } + + /* create ctrblks for parallel encrypt */ + u128_to_be128(&ctrblocks[0], &ctrblk); + u128_inc(&ctrblk); + u128_to_be128(&ctrblocks[1], &ctrblk); + u128_inc(&ctrblk); + u128_to_be128(&ctrblocks[2], &ctrblk); + u128_inc(&ctrblk); + + twofish_enc_blk_xor_3way(ctx, (u8 *)dst, + (u8 *)ctrblocks); + + src += 3; + dst += 3; + nbytes -= bsize * 3; + } while (nbytes >= bsize * 3); + + if (nbytes < bsize) + goto done; + } + + /* Handle leftovers */ + do { + if (dst != src) + *dst = *src; + + u128_to_be128(&ctrblocks[0], &ctrblk); + u128_inc(&ctrblk); + + twofish_enc_blk(ctx, (u8 *)ctrblocks, (u8 *)ctrblocks); + u128_xor(dst, dst, (u128 *)ctrblocks); + + src += 1; + dst += 1; + nbytes -= bsize; + } while (nbytes >= bsize); + +done: + u128_to_be128((be128 *)walk->iv, &ctrblk); + return nbytes; +} + +static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct blkcipher_walk walk; + int err; + + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt_block(desc, &walk, TF_BLOCK_SIZE); + + while ((nbytes = walk.nbytes) >= TF_BLOCK_SIZE) { + nbytes = __ctr_crypt(desc, &walk); + err = blkcipher_walk_done(desc, &walk, nbytes); + } + + if (walk.nbytes) { + ctr_crypt_final(desc, &walk); + err = blkcipher_walk_done(desc, &walk, 0); + } + + return err; +} + +static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) +{ + const unsigned int bsize = TF_BLOCK_SIZE; + struct twofish_ctx *ctx = priv; + int i; + + if (nbytes == 3 * bsize) { + twofish_enc_blk_3way(ctx, srcdst, srcdst); + return; + } + + for (i = 0; i < nbytes / bsize; i++, srcdst += bsize) + twofish_enc_blk(ctx, srcdst, srcdst); +} + +static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) +{ + const unsigned int bsize = TF_BLOCK_SIZE; + struct twofish_ctx *ctx = priv; + int i; + + if (nbytes == 3 * bsize) { + twofish_dec_blk_3way(ctx, srcdst, srcdst); + return; + } + + for (i = 0; i < nbytes / bsize; i++, srcdst += bsize) + twofish_dec_blk(ctx, srcdst, srcdst); +} + +struct twofish_lrw_ctx { + struct lrw_table_ctx lrw_table; + struct twofish_ctx twofish_ctx; +}; + +static int lrw_twofish_setkey(struct crypto_tfm *tfm, const u8 *key, + unsigned int keylen) +{ + struct twofish_lrw_ctx *ctx = crypto_tfm_ctx(tfm); + int err; + + err = __twofish_setkey(&ctx->twofish_ctx, key, keylen - TF_BLOCK_SIZE, + &tfm->crt_flags); + if (err) + return err; + + return lrw_init_table(&ctx->lrw_table, key + keylen - TF_BLOCK_SIZE); +} + +static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct twofish_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + be128 buf[3]; + struct lrw_crypt_req req = { + .tbuf = buf, + .tbuflen = sizeof(buf), + + .table_ctx = &ctx->lrw_table, + .crypt_ctx = &ctx->twofish_ctx, + .crypt_fn = encrypt_callback, + }; + + return lrw_crypt(desc, dst, src, nbytes, &req); +} + +static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct twofish_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + be128 buf[3]; + struct lrw_crypt_req req = { + .tbuf = buf, + .tbuflen = sizeof(buf), + + .table_ctx = &ctx->lrw_table, + .crypt_ctx = &ctx->twofish_ctx, + .crypt_fn = decrypt_callback, + }; + + return lrw_crypt(desc, dst, src, nbytes, &req); +} + +static void lrw_exit_tfm(struct crypto_tfm *tfm) +{ + struct twofish_lrw_ctx *ctx = crypto_tfm_ctx(tfm); + + lrw_free_table(&ctx->lrw_table); +} + +struct twofish_xts_ctx { + struct twofish_ctx tweak_ctx; + struct twofish_ctx crypt_ctx; +}; + +static int xts_twofish_setkey(struct crypto_tfm *tfm, const u8 *key, + unsigned int keylen) +{ + struct twofish_xts_ctx *ctx = crypto_tfm_ctx(tfm); + u32 *flags = &tfm->crt_flags; + int err; + + /* key consists of keys of equal size concatenated, therefore + * the length must be even + */ + if (keylen % 2) { + *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; + return -EINVAL; + } + + /* first half of xts-key is for crypt */ + err = __twofish_setkey(&ctx->crypt_ctx, key, keylen / 2, flags); + if (err) + return err; + + /* second half of xts-key is for tweak */ + return __twofish_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2, + flags); +} + +static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct twofish_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + be128 buf[3]; + struct xts_crypt_req req = { + .tbuf = buf, + .tbuflen = sizeof(buf), + + .tweak_ctx = &ctx->tweak_ctx, + .tweak_fn = XTS_TWEAK_CAST(twofish_enc_blk), + .crypt_ctx = &ctx->crypt_ctx, + .crypt_fn = encrypt_callback, + }; + + return xts_crypt(desc, dst, src, nbytes, &req); +} + +static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct twofish_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + be128 buf[3]; + struct xts_crypt_req req = { + .tbuf = buf, + .tbuflen = sizeof(buf), + + .tweak_ctx = &ctx->tweak_ctx, + .tweak_fn = XTS_TWEAK_CAST(twofish_enc_blk), + .crypt_ctx = &ctx->crypt_ctx, + .crypt_fn = decrypt_callback, + }; + + return xts_crypt(desc, dst, src, nbytes, &req); +} + +static struct crypto_alg tf_algs[5] = { { + .cra_name = "ecb(twofish)", + .cra_driver_name = "ecb-twofish-3way", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = TF_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct twofish_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(tf_algs[0].cra_list), + .cra_u = { + .blkcipher = { + .min_keysize = TF_MIN_KEY_SIZE, + .max_keysize = TF_MAX_KEY_SIZE, + .setkey = twofish_setkey, + .encrypt = ecb_encrypt, + .decrypt = ecb_decrypt, + }, + }, +}, { + .cra_name = "cbc(twofish)", + .cra_driver_name = "cbc-twofish-3way", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = TF_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct twofish_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(tf_algs[1].cra_list), + .cra_u = { + .blkcipher = { + .min_keysize = TF_MIN_KEY_SIZE, + .max_keysize = TF_MAX_KEY_SIZE, + .ivsize = TF_BLOCK_SIZE, + .setkey = twofish_setkey, + .encrypt = cbc_encrypt, + .decrypt = cbc_decrypt, + }, + }, +}, { + .cra_name = "ctr(twofish)", + .cra_driver_name = "ctr-twofish-3way", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct twofish_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(tf_algs[2].cra_list), + .cra_u = { + .blkcipher = { + .min_keysize = TF_MIN_KEY_SIZE, + .max_keysize = TF_MAX_KEY_SIZE, + .ivsize = TF_BLOCK_SIZE, + .setkey = twofish_setkey, + .encrypt = ctr_crypt, + .decrypt = ctr_crypt, + }, + }, +}, { + .cra_name = "lrw(twofish)", + .cra_driver_name = "lrw-twofish-3way", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = TF_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct twofish_lrw_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(tf_algs[3].cra_list), + .cra_exit = lrw_exit_tfm, + .cra_u = { + .blkcipher = { + .min_keysize = TF_MIN_KEY_SIZE + TF_BLOCK_SIZE, + .max_keysize = TF_MAX_KEY_SIZE + TF_BLOCK_SIZE, + .ivsize = TF_BLOCK_SIZE, + .setkey = lrw_twofish_setkey, + .encrypt = lrw_encrypt, + .decrypt = lrw_decrypt, + }, + }, +}, { + .cra_name = "xts(twofish)", + .cra_driver_name = "xts-twofish-3way", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = TF_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct twofish_xts_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(tf_algs[4].cra_list), + .cra_u = { + .blkcipher = { + .min_keysize = TF_MIN_KEY_SIZE * 2, + .max_keysize = TF_MAX_KEY_SIZE * 2, + .ivsize = TF_BLOCK_SIZE, + .setkey = xts_twofish_setkey, + .encrypt = xts_encrypt, + .decrypt = xts_decrypt, + }, + }, +} }; + +static bool is_blacklisted_cpu(void) +{ + if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) + return false; + + if (boot_cpu_data.x86 == 0x06 && + (boot_cpu_data.x86_model == 0x1c || + boot_cpu_data.x86_model == 0x26 || + boot_cpu_data.x86_model == 0x36)) { + /* + * On Atom, twofish-3way is slower than original assembler + * implementation. Twofish-3way trades off some performance in + * storing blocks in 64bit registers to allow three blocks to + * be processed parallel. Parallel operation then allows gaining + * more performance than was trade off, on out-of-order CPUs. + * However Atom does not benefit from this parallellism and + * should be blacklisted. + */ + return true; + } + + if (boot_cpu_data.x86 == 0x0f) { + /* + * On Pentium 4, twofish-3way is slower than original assembler + * implementation because excessive uses of 64bit rotate and + * left-shifts (which are really slow on P4) needed to store and + * handle 128bit block in two 64bit registers. + */ + return true; + } + + return false; +} + +static int force; +module_param(force, int, 0); +MODULE_PARM_DESC(force, "Force module load, ignore CPU blacklist"); + +static int __init init(void) +{ + if (!force && is_blacklisted_cpu()) { + printk(KERN_INFO + "twofish-x86_64-3way: performance on this CPU " + "would be suboptimal: disabling " + "twofish-x86_64-3way.\n"); + return -ENODEV; + } + + return crypto_register_algs(tf_algs, ARRAY_SIZE(tf_algs)); +} + +static void __exit fini(void) +{ + crypto_unregister_algs(tf_algs, ARRAY_SIZE(tf_algs)); +} + +module_init(init); +module_exit(fini); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Twofish Cipher Algorithm, 3-way parallel asm optimized"); +MODULE_ALIAS("twofish"); +MODULE_ALIAS("twofish-asm"); diff --git a/arch/x86/ia32/Makefile b/arch/x86/ia32/Makefile new file mode 100644 index 00000000..455646e0 --- /dev/null +++ b/arch/x86/ia32/Makefile @@ -0,0 +1,14 @@ +# +# Makefile for the ia32 kernel emulation subsystem. +# + +obj-$(CONFIG_IA32_EMULATION) := ia32entry.o sys_ia32.o ia32_signal.o +obj-$(CONFIG_IA32_EMULATION) += nosyscall.o syscall_ia32.o + +sysv-$(CONFIG_SYSVIPC) := ipc32.o +obj-$(CONFIG_IA32_EMULATION) += $(sysv-y) + +obj-$(CONFIG_IA32_AOUT) += ia32_aout.o + +audit-class-$(CONFIG_AUDIT) := audit.o +obj-$(CONFIG_IA32_EMULATION) += $(audit-class-y) diff --git a/arch/x86/ia32/audit.c b/arch/x86/ia32/audit.c new file mode 100644 index 00000000..5d7b381d --- /dev/null +++ b/arch/x86/ia32/audit.c @@ -0,0 +1,42 @@ +#include + +unsigned ia32_dir_class[] = { +#include +~0U +}; + +unsigned ia32_chattr_class[] = { +#include +~0U +}; + +unsigned ia32_write_class[] = { +#include +~0U +}; + +unsigned ia32_read_class[] = { +#include +~0U +}; + +unsigned ia32_signal_class[] = { +#include +~0U +}; + +int ia32_classify_syscall(unsigned syscall) +{ + switch (syscall) { + case __NR_open: + return 2; + case __NR_openat: + return 3; + case __NR_socketcall: + return 4; + case __NR_execve: + return 5; + default: + return 1; + } +} diff --git a/arch/x86/ia32/ia32_aout.c b/arch/x86/ia32/ia32_aout.c new file mode 100644 index 00000000..07b3a68d --- /dev/null +++ b/arch/x86/ia32/ia32_aout.c @@ -0,0 +1,514 @@ +/* + * a.out loader for x86-64 + * + * Copyright (C) 1991, 1992, 1996 Linus Torvalds + * Hacked together by Andi Kleen + */ + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#undef WARN_OLD +#undef CORE_DUMP /* definitely broken */ + +static int load_aout_binary(struct linux_binprm *, struct pt_regs *regs); +static int load_aout_library(struct file *); + +#ifdef CORE_DUMP +static int aout_core_dump(long signr, struct pt_regs *regs, struct file *file, + unsigned long limit); + +/* + * fill in the user structure for a core dump.. + */ +static void dump_thread32(struct pt_regs *regs, struct user32 *dump) +{ + u32 fs, gs; + +/* changed the size calculations - should hopefully work better. lbt */ + dump->magic = CMAGIC; + dump->start_code = 0; + dump->start_stack = regs->sp & ~(PAGE_SIZE - 1); + dump->u_tsize = ((unsigned long) current->mm->end_code) >> PAGE_SHIFT; + dump->u_dsize = ((unsigned long) + (current->mm->brk + (PAGE_SIZE-1))) >> PAGE_SHIFT; + dump->u_dsize -= dump->u_tsize; + dump->u_ssize = 0; + dump->u_debugreg[0] = current->thread.debugreg0; + dump->u_debugreg[1] = current->thread.debugreg1; + dump->u_debugreg[2] = current->thread.debugreg2; + dump->u_debugreg[3] = current->thread.debugreg3; + dump->u_debugreg[4] = 0; + dump->u_debugreg[5] = 0; + dump->u_debugreg[6] = current->thread.debugreg6; + dump->u_debugreg[7] = current->thread.debugreg7; + + if (dump->start_stack < 0xc0000000) { + unsigned long tmp; + + tmp = (unsigned long) (0xc0000000 - dump->start_stack); + dump->u_ssize = tmp >> PAGE_SHIFT; + } + + dump->regs.bx = regs->bx; + dump->regs.cx = regs->cx; + dump->regs.dx = regs->dx; + dump->regs.si = regs->si; + dump->regs.di = regs->di; + dump->regs.bp = regs->bp; + dump->regs.ax = regs->ax; + dump->regs.ds = current->thread.ds; + dump->regs.es = current->thread.es; + savesegment(fs, fs); + dump->regs.fs = fs; + savesegment(gs, gs); + dump->regs.gs = gs; + dump->regs.orig_ax = regs->orig_ax; + dump->regs.ip = regs->ip; + dump->regs.cs = regs->cs; + dump->regs.flags = regs->flags; + dump->regs.sp = regs->sp; + dump->regs.ss = regs->ss; + +#if 1 /* FIXME */ + dump->u_fpvalid = 0; +#else + dump->u_fpvalid = dump_fpu(regs, &dump->i387); +#endif +} + +#endif + +static struct linux_binfmt aout_format = { + .module = THIS_MODULE, + .load_binary = load_aout_binary, + .load_shlib = load_aout_library, +#ifdef CORE_DUMP + .core_dump = aout_core_dump, +#endif + .min_coredump = PAGE_SIZE +}; + +static void set_brk(unsigned long start, unsigned long end) +{ + start = PAGE_ALIGN(start); + end = PAGE_ALIGN(end); + if (end <= start) + return; + vm_brk(start, end - start); +} + +#ifdef CORE_DUMP +/* + * These are the only things you should do on a core-file: use only these + * macros to write out all the necessary info. + */ + +#include + +#define DUMP_WRITE(addr, nr) \ + if (!dump_write(file, (void *)(addr), (nr))) \ + goto end_coredump; + +#define DUMP_SEEK(offset) \ + if (!dump_seek(file, offset)) \ + goto end_coredump; + +#define START_DATA() (u.u_tsize << PAGE_SHIFT) +#define START_STACK(u) (u.start_stack) + +/* + * Routine writes a core dump image in the current directory. + * Currently only a stub-function. + * + * Note that setuid/setgid files won't make a core-dump if the uid/gid + * changed due to the set[u|g]id. It's enforced by the "current->mm->dumpable" + * field, which also makes sure the core-dumps won't be recursive if the + * dumping of the process results in another error.. + */ + +static int aout_core_dump(long signr, struct pt_regs *regs, struct file *file, + unsigned long limit) +{ + mm_segment_t fs; + int has_dumped = 0; + unsigned long dump_start, dump_size; + struct user32 dump; + + fs = get_fs(); + set_fs(KERNEL_DS); + has_dumped = 1; + current->flags |= PF_DUMPCORE; + strncpy(dump.u_comm, current->comm, sizeof(current->comm)); + dump.u_ar0 = offsetof(struct user32, regs); + dump.signal = signr; + dump_thread32(regs, &dump); + + /* + * If the size of the dump file exceeds the rlimit, then see + * what would happen if we wrote the stack, but not the data + * area. + */ + if ((dump.u_dsize + dump.u_ssize + 1) * PAGE_SIZE > limit) + dump.u_dsize = 0; + + /* Make sure we have enough room to write the stack and data areas. */ + if ((dump.u_ssize + 1) * PAGE_SIZE > limit) + dump.u_ssize = 0; + + /* make sure we actually have a data and stack area to dump */ + set_fs(USER_DS); + if (!access_ok(VERIFY_READ, (void *) (unsigned long)START_DATA(dump), + dump.u_dsize << PAGE_SHIFT)) + dump.u_dsize = 0; + if (!access_ok(VERIFY_READ, (void *) (unsigned long)START_STACK(dump), + dump.u_ssize << PAGE_SHIFT)) + dump.u_ssize = 0; + + set_fs(KERNEL_DS); + /* struct user */ + DUMP_WRITE(&dump, sizeof(dump)); + /* Now dump all of the user data. Include malloced stuff as well */ + DUMP_SEEK(PAGE_SIZE); + /* now we start writing out the user space info */ + set_fs(USER_DS); + /* Dump the data area */ + if (dump.u_dsize != 0) { + dump_start = START_DATA(dump); + dump_size = dump.u_dsize << PAGE_SHIFT; + DUMP_WRITE(dump_start, dump_size); + } + /* Now prepare to dump the stack area */ + if (dump.u_ssize != 0) { + dump_start = START_STACK(dump); + dump_size = dump.u_ssize << PAGE_SHIFT; + DUMP_WRITE(dump_start, dump_size); + } +end_coredump: + set_fs(fs); + return has_dumped; +} +#endif + +/* + * create_aout_tables() parses the env- and arg-strings in new user + * memory and creates the pointer tables from them, and puts their + * addresses on the "stack", returning the new stack pointer value. + */ +static u32 __user *create_aout_tables(char __user *p, struct linux_binprm *bprm) +{ + u32 __user *argv, *envp, *sp; + int argc = bprm->argc, envc = bprm->envc; + + sp = (u32 __user *) ((-(unsigned long)sizeof(u32)) & (unsigned long) p); + sp -= envc+1; + envp = sp; + sp -= argc+1; + argv = sp; + put_user((unsigned long) envp, --sp); + put_user((unsigned long) argv, --sp); + put_user(argc, --sp); + current->mm->arg_start = (unsigned long) p; + while (argc-- > 0) { + char c; + + put_user((u32)(unsigned long)p, argv++); + do { + get_user(c, p++); + } while (c); + } + put_user(0, argv); + current->mm->arg_end = current->mm->env_start = (unsigned long) p; + while (envc-- > 0) { + char c; + + put_user((u32)(unsigned long)p, envp++); + do { + get_user(c, p++); + } while (c); + } + put_user(0, envp); + current->mm->env_end = (unsigned long) p; + return sp; +} + +/* + * These are the functions used to load a.out style executables and shared + * libraries. There is no binary dependent code anywhere else. + */ +static int load_aout_binary(struct linux_binprm *bprm, struct pt_regs *regs) +{ + unsigned long error, fd_offset, rlim; + struct exec ex; + int retval; + + ex = *((struct exec *) bprm->buf); /* exec-header */ + if ((N_MAGIC(ex) != ZMAGIC && N_MAGIC(ex) != OMAGIC && + N_MAGIC(ex) != QMAGIC && N_MAGIC(ex) != NMAGIC) || + N_TRSIZE(ex) || N_DRSIZE(ex) || + i_size_read(bprm->file->f_path.dentry->d_inode) < + ex.a_text+ex.a_data+N_SYMSIZE(ex)+N_TXTOFF(ex)) { + return -ENOEXEC; + } + + fd_offset = N_TXTOFF(ex); + + /* Check initial limits. This avoids letting people circumvent + * size limits imposed on them by creating programs with large + * arrays in the data or bss. + */ + rlim = rlimit(RLIMIT_DATA); + if (rlim >= RLIM_INFINITY) + rlim = ~0; + if (ex.a_data + ex.a_bss > rlim) + return -ENOMEM; + + /* Flush all traces of the currently running executable */ + retval = flush_old_exec(bprm); + if (retval) + return retval; + + /* OK, This is the point of no return */ + set_personality(PER_LINUX); + set_personality_ia32(false); + + setup_new_exec(bprm); + + regs->cs = __USER32_CS; + regs->r8 = regs->r9 = regs->r10 = regs->r11 = regs->r12 = + regs->r13 = regs->r14 = regs->r15 = 0; + + current->mm->end_code = ex.a_text + + (current->mm->start_code = N_TXTADDR(ex)); + current->mm->end_data = ex.a_data + + (current->mm->start_data = N_DATADDR(ex)); + current->mm->brk = ex.a_bss + + (current->mm->start_brk = N_BSSADDR(ex)); + current->mm->free_area_cache = TASK_UNMAPPED_BASE; + current->mm->cached_hole_size = 0; + + retval = setup_arg_pages(bprm, IA32_STACK_TOP, EXSTACK_DEFAULT); + if (retval < 0) { + /* Someone check-me: is this error path enough? */ + send_sig(SIGKILL, current, 0); + return retval; + } + + install_exec_creds(bprm); + + if (N_MAGIC(ex) == OMAGIC) { + unsigned long text_addr, map_size; + loff_t pos; + + text_addr = N_TXTADDR(ex); + + pos = 32; + map_size = ex.a_text+ex.a_data; + + error = vm_brk(text_addr & PAGE_MASK, map_size); + + if (error != (text_addr & PAGE_MASK)) { + send_sig(SIGKILL, current, 0); + return error; + } + + error = bprm->file->f_op->read(bprm->file, + (char __user *)text_addr, + ex.a_text+ex.a_data, &pos); + if ((signed long)error < 0) { + send_sig(SIGKILL, current, 0); + return error; + } + + flush_icache_range(text_addr, text_addr+ex.a_text+ex.a_data); + } else { +#ifdef WARN_OLD + static unsigned long error_time, error_time2; + if ((ex.a_text & 0xfff || ex.a_data & 0xfff) && + (N_MAGIC(ex) != NMAGIC) && + time_after(jiffies, error_time2 + 5*HZ)) { + printk(KERN_NOTICE "executable not page aligned\n"); + error_time2 = jiffies; + } + + if ((fd_offset & ~PAGE_MASK) != 0 && + time_after(jiffies, error_time + 5*HZ)) { + printk(KERN_WARNING + "fd_offset is not page aligned. Please convert " + "program: %s\n", + bprm->file->f_path.dentry->d_name.name); + error_time = jiffies; + } +#endif + + if (!bprm->file->f_op->mmap || (fd_offset & ~PAGE_MASK) != 0) { + loff_t pos = fd_offset; + + vm_brk(N_TXTADDR(ex), ex.a_text+ex.a_data); + bprm->file->f_op->read(bprm->file, + (char __user *)N_TXTADDR(ex), + ex.a_text+ex.a_data, &pos); + flush_icache_range((unsigned long) N_TXTADDR(ex), + (unsigned long) N_TXTADDR(ex) + + ex.a_text+ex.a_data); + goto beyond_if; + } + + error = vm_mmap(bprm->file, N_TXTADDR(ex), ex.a_text, + PROT_READ | PROT_EXEC, + MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE | + MAP_EXECUTABLE | MAP_32BIT, + fd_offset); + + if (error != N_TXTADDR(ex)) { + send_sig(SIGKILL, current, 0); + return error; + } + + error = vm_mmap(bprm->file, N_DATADDR(ex), ex.a_data, + PROT_READ | PROT_WRITE | PROT_EXEC, + MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE | + MAP_EXECUTABLE | MAP_32BIT, + fd_offset + ex.a_text); + if (error != N_DATADDR(ex)) { + send_sig(SIGKILL, current, 0); + return error; + } + } +beyond_if: + set_binfmt(&aout_format); + + set_brk(current->mm->start_brk, current->mm->brk); + + current->mm->start_stack = + (unsigned long)create_aout_tables((char __user *)bprm->p, bprm); + /* start thread */ + loadsegment(fs, 0); + loadsegment(ds, __USER32_DS); + loadsegment(es, __USER32_DS); + load_gs_index(0); + (regs)->ip = ex.a_entry; + (regs)->sp = current->mm->start_stack; + (regs)->flags = 0x200; + (regs)->cs = __USER32_CS; + (regs)->ss = __USER32_DS; + regs->r8 = regs->r9 = regs->r10 = regs->r11 = + regs->r12 = regs->r13 = regs->r14 = regs->r15 = 0; + set_fs(USER_DS); + return 0; +} + +static int load_aout_library(struct file *file) +{ + struct inode *inode; + unsigned long bss, start_addr, len, error; + int retval; + struct exec ex; + + inode = file->f_path.dentry->d_inode; + + retval = -ENOEXEC; + error = kernel_read(file, 0, (char *) &ex, sizeof(ex)); + if (error != sizeof(ex)) + goto out; + + /* We come in here for the regular a.out style of shared libraries */ + if ((N_MAGIC(ex) != ZMAGIC && N_MAGIC(ex) != QMAGIC) || N_TRSIZE(ex) || + N_DRSIZE(ex) || ((ex.a_entry & 0xfff) && N_MAGIC(ex) == ZMAGIC) || + i_size_read(inode) < + ex.a_text+ex.a_data+N_SYMSIZE(ex)+N_TXTOFF(ex)) { + goto out; + } + + if (N_FLAGS(ex)) + goto out; + + /* For QMAGIC, the starting address is 0x20 into the page. We mask + this off to get the starting address for the page */ + + start_addr = ex.a_entry & 0xfffff000; + + if ((N_TXTOFF(ex) & ~PAGE_MASK) != 0) { + loff_t pos = N_TXTOFF(ex); + +#ifdef WARN_OLD + static unsigned long error_time; + if (time_after(jiffies, error_time + 5*HZ)) { + printk(KERN_WARNING + "N_TXTOFF is not page aligned. Please convert " + "library: %s\n", + file->f_path.dentry->d_name.name); + error_time = jiffies; + } +#endif + vm_brk(start_addr, ex.a_text + ex.a_data + ex.a_bss); + + file->f_op->read(file, (char __user *)start_addr, + ex.a_text + ex.a_data, &pos); + flush_icache_range((unsigned long) start_addr, + (unsigned long) start_addr + ex.a_text + + ex.a_data); + + retval = 0; + goto out; + } + /* Now use mmap to map the library into memory. */ + error = vm_mmap(file, start_addr, ex.a_text + ex.a_data, + PROT_READ | PROT_WRITE | PROT_EXEC, + MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE | MAP_32BIT, + N_TXTOFF(ex)); + retval = error; + if (error != start_addr) + goto out; + + len = PAGE_ALIGN(ex.a_text + ex.a_data); + bss = ex.a_text + ex.a_data + ex.a_bss; + if (bss > len) { + error = vm_brk(start_addr + len, bss - len); + retval = error; + if (error != start_addr + len) + goto out; + } + retval = 0; +out: + return retval; +} + +static int __init init_aout_binfmt(void) +{ + register_binfmt(&aout_format); + return 0; +} + +static void __exit exit_aout_binfmt(void) +{ + unregister_binfmt(&aout_format); +} + +module_init(init_aout_binfmt); +module_exit(exit_aout_binfmt); +MODULE_LICENSE("GPL"); diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c new file mode 100644 index 00000000..4f5bfacf --- /dev/null +++ b/arch/x86/ia32/ia32_signal.c @@ -0,0 +1,562 @@ +/* + * linux/arch/x86_64/ia32/ia32_signal.c + * + * Copyright (C) 1991, 1992 Linus Torvalds + * + * 1997-11-28 Modified for POSIX.1b signals by Richard Henderson + * 2000-06-20 Pentium III FXSR, SSE support by Gareth Hughes + * 2000-12-* x86-64 compatibility mode signal handling by Andi Kleen + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define FIX_EFLAGS __FIX_EFLAGS + +int copy_siginfo_to_user32(compat_siginfo_t __user *to, siginfo_t *from) +{ + int err = 0; + bool ia32 = test_thread_flag(TIF_IA32); + + if (!access_ok(VERIFY_WRITE, to, sizeof(compat_siginfo_t))) + return -EFAULT; + + put_user_try { + /* If you change siginfo_t structure, please make sure that + this code is fixed accordingly. + It should never copy any pad contained in the structure + to avoid security leaks, but must copy the generic + 3 ints plus the relevant union member. */ + put_user_ex(from->si_signo, &to->si_signo); + put_user_ex(from->si_errno, &to->si_errno); + put_user_ex((short)from->si_code, &to->si_code); + + if (from->si_code < 0) { + put_user_ex(from->si_pid, &to->si_pid); + put_user_ex(from->si_uid, &to->si_uid); + put_user_ex(ptr_to_compat(from->si_ptr), &to->si_ptr); + } else { + /* + * First 32bits of unions are always present: + * si_pid === si_band === si_tid === si_addr(LS half) + */ + put_user_ex(from->_sifields._pad[0], + &to->_sifields._pad[0]); + switch (from->si_code >> 16) { + case __SI_FAULT >> 16: + break; + case __SI_CHLD >> 16: + if (ia32) { + put_user_ex(from->si_utime, &to->si_utime); + put_user_ex(from->si_stime, &to->si_stime); + } else { + put_user_ex(from->si_utime, &to->_sifields._sigchld_x32._utime); + put_user_ex(from->si_stime, &to->_sifields._sigchld_x32._stime); + } + put_user_ex(from->si_status, &to->si_status); + /* FALL THROUGH */ + default: + case __SI_KILL >> 16: + put_user_ex(from->si_uid, &to->si_uid); + break; + case __SI_POLL >> 16: + put_user_ex(from->si_fd, &to->si_fd); + break; + case __SI_TIMER >> 16: + put_user_ex(from->si_overrun, &to->si_overrun); + put_user_ex(ptr_to_compat(from->si_ptr), + &to->si_ptr); + break; + /* This is not generated by the kernel as of now. */ + case __SI_RT >> 16: + case __SI_MESGQ >> 16: + put_user_ex(from->si_uid, &to->si_uid); + put_user_ex(from->si_int, &to->si_int); + break; + } + } + } put_user_catch(err); + + return err; +} + +int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from) +{ + int err = 0; + u32 ptr32; + + if (!access_ok(VERIFY_READ, from, sizeof(compat_siginfo_t))) + return -EFAULT; + + get_user_try { + get_user_ex(to->si_signo, &from->si_signo); + get_user_ex(to->si_errno, &from->si_errno); + get_user_ex(to->si_code, &from->si_code); + + get_user_ex(to->si_pid, &from->si_pid); + get_user_ex(to->si_uid, &from->si_uid); + get_user_ex(ptr32, &from->si_ptr); + to->si_ptr = compat_ptr(ptr32); + } get_user_catch(err); + + return err; +} + +asmlinkage long sys32_sigsuspend(int history0, int history1, old_sigset_t mask) +{ + sigset_t blocked; + + current->saved_sigmask = current->blocked; + + mask &= _BLOCKABLE; + siginitset(&blocked, mask); + set_current_blocked(&blocked); + + current->state = TASK_INTERRUPTIBLE; + schedule(); + + set_restore_sigmask(); + return -ERESTARTNOHAND; +} + +asmlinkage long sys32_sigaltstack(const stack_ia32_t __user *uss_ptr, + stack_ia32_t __user *uoss_ptr, + struct pt_regs *regs) +{ + stack_t uss, uoss; + int ret, err = 0; + mm_segment_t seg; + + if (uss_ptr) { + u32 ptr; + + memset(&uss, 0, sizeof(stack_t)); + if (!access_ok(VERIFY_READ, uss_ptr, sizeof(stack_ia32_t))) + return -EFAULT; + + get_user_try { + get_user_ex(ptr, &uss_ptr->ss_sp); + get_user_ex(uss.ss_flags, &uss_ptr->ss_flags); + get_user_ex(uss.ss_size, &uss_ptr->ss_size); + } get_user_catch(err); + + if (err) + return -EFAULT; + uss.ss_sp = compat_ptr(ptr); + } + seg = get_fs(); + set_fs(KERNEL_DS); + ret = do_sigaltstack(uss_ptr ? &uss : NULL, &uoss, regs->sp); + set_fs(seg); + if (ret >= 0 && uoss_ptr) { + if (!access_ok(VERIFY_WRITE, uoss_ptr, sizeof(stack_ia32_t))) + return -EFAULT; + + put_user_try { + put_user_ex(ptr_to_compat(uoss.ss_sp), &uoss_ptr->ss_sp); + put_user_ex(uoss.ss_flags, &uoss_ptr->ss_flags); + put_user_ex(uoss.ss_size, &uoss_ptr->ss_size); + } put_user_catch(err); + + if (err) + ret = -EFAULT; + } + return ret; +} + +/* + * Do a signal return; undo the signal stack. + */ +#define loadsegment_gs(v) load_gs_index(v) +#define loadsegment_fs(v) loadsegment(fs, v) +#define loadsegment_ds(v) loadsegment(ds, v) +#define loadsegment_es(v) loadsegment(es, v) + +#define get_user_seg(seg) ({ unsigned int v; savesegment(seg, v); v; }) +#define set_user_seg(seg, v) loadsegment_##seg(v) + +#define COPY(x) { \ + get_user_ex(regs->x, &sc->x); \ +} + +#define GET_SEG(seg) ({ \ + unsigned short tmp; \ + get_user_ex(tmp, &sc->seg); \ + tmp; \ +}) + +#define COPY_SEG_CPL3(seg) do { \ + regs->seg = GET_SEG(seg) | 3; \ +} while (0) + +#define RELOAD_SEG(seg) { \ + unsigned int pre = GET_SEG(seg); \ + unsigned int cur = get_user_seg(seg); \ + pre |= 3; \ + if (pre != cur) \ + set_user_seg(seg, pre); \ +} + +static int ia32_restore_sigcontext(struct pt_regs *regs, + struct sigcontext_ia32 __user *sc, + unsigned int *pax) +{ + unsigned int tmpflags, err = 0; + void __user *buf; + u32 tmp; + + /* Always make any pending restarted system calls return -EINTR */ + current_thread_info()->restart_block.fn = do_no_restart_syscall; + + get_user_try { + /* + * Reload fs and gs if they have changed in the signal + * handler. This does not handle long fs/gs base changes in + * the handler, but does not clobber them at least in the + * normal case. + */ + RELOAD_SEG(gs); + RELOAD_SEG(fs); + RELOAD_SEG(ds); + RELOAD_SEG(es); + + COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx); + COPY(dx); COPY(cx); COPY(ip); + /* Don't touch extended registers */ + + COPY_SEG_CPL3(cs); + COPY_SEG_CPL3(ss); + + get_user_ex(tmpflags, &sc->flags); + regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS); + /* disable syscall checks */ + regs->orig_ax = -1; + + get_user_ex(tmp, &sc->fpstate); + buf = compat_ptr(tmp); + err |= restore_i387_xstate_ia32(buf); + + get_user_ex(*pax, &sc->ax); + } get_user_catch(err); + + return err; +} + +asmlinkage long sys32_sigreturn(struct pt_regs *regs) +{ + struct sigframe_ia32 __user *frame = (struct sigframe_ia32 __user *)(regs->sp-8); + sigset_t set; + unsigned int ax; + + if (!access_ok(VERIFY_READ, frame, sizeof(*frame))) + goto badframe; + if (__get_user(set.sig[0], &frame->sc.oldmask) + || (_COMPAT_NSIG_WORDS > 1 + && __copy_from_user((((char *) &set.sig) + 4), + &frame->extramask, + sizeof(frame->extramask)))) + goto badframe; + + sigdelsetmask(&set, ~_BLOCKABLE); + set_current_blocked(&set); + + if (ia32_restore_sigcontext(regs, &frame->sc, &ax)) + goto badframe; + return ax; + +badframe: + signal_fault(regs, frame, "32bit sigreturn"); + return 0; +} + +asmlinkage long sys32_rt_sigreturn(struct pt_regs *regs) +{ + struct rt_sigframe_ia32 __user *frame; + sigset_t set; + unsigned int ax; + struct pt_regs tregs; + + frame = (struct rt_sigframe_ia32 __user *)(regs->sp - 4); + + if (!access_ok(VERIFY_READ, frame, sizeof(*frame))) + goto badframe; + if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set))) + goto badframe; + + sigdelsetmask(&set, ~_BLOCKABLE); + set_current_blocked(&set); + + if (ia32_restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax)) + goto badframe; + + tregs = *regs; + if (sys32_sigaltstack(&frame->uc.uc_stack, NULL, &tregs) == -EFAULT) + goto badframe; + + return ax; + +badframe: + signal_fault(regs, frame, "32bit rt sigreturn"); + return 0; +} + +/* + * Set up a signal frame. + */ + +static int ia32_setup_sigcontext(struct sigcontext_ia32 __user *sc, + void __user *fpstate, + struct pt_regs *regs, unsigned int mask) +{ + int err = 0; + + put_user_try { + put_user_ex(get_user_seg(gs), (unsigned int __user *)&sc->gs); + put_user_ex(get_user_seg(fs), (unsigned int __user *)&sc->fs); + put_user_ex(get_user_seg(ds), (unsigned int __user *)&sc->ds); + put_user_ex(get_user_seg(es), (unsigned int __user *)&sc->es); + + put_user_ex(regs->di, &sc->di); + put_user_ex(regs->si, &sc->si); + put_user_ex(regs->bp, &sc->bp); + put_user_ex(regs->sp, &sc->sp); + put_user_ex(regs->bx, &sc->bx); + put_user_ex(regs->dx, &sc->dx); + put_user_ex(regs->cx, &sc->cx); + put_user_ex(regs->ax, &sc->ax); + put_user_ex(current->thread.trap_nr, &sc->trapno); + put_user_ex(current->thread.error_code, &sc->err); + put_user_ex(regs->ip, &sc->ip); + put_user_ex(regs->cs, (unsigned int __user *)&sc->cs); + put_user_ex(regs->flags, &sc->flags); + put_user_ex(regs->sp, &sc->sp_at_signal); + put_user_ex(regs->ss, (unsigned int __user *)&sc->ss); + + put_user_ex(ptr_to_compat(fpstate), &sc->fpstate); + + /* non-iBCS2 extensions.. */ + put_user_ex(mask, &sc->oldmask); + put_user_ex(current->thread.cr2, &sc->cr2); + } put_user_catch(err); + + return err; +} + +/* + * Determine which stack to use.. + */ +static void __user *get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, + size_t frame_size, + void **fpstate) +{ + unsigned long sp; + + /* Default to using normal stack */ + sp = regs->sp; + + /* This is the X/Open sanctioned signal stack switching. */ + if (ka->sa.sa_flags & SA_ONSTACK) { + if (sas_ss_flags(sp) == 0) + sp = current->sas_ss_sp + current->sas_ss_size; + } + + /* This is the legacy signal stack switching. */ + else if ((regs->ss & 0xffff) != __USER32_DS && + !(ka->sa.sa_flags & SA_RESTORER) && + ka->sa.sa_restorer) + sp = (unsigned long) ka->sa.sa_restorer; + + if (used_math()) { + sp = sp - sig_xstate_ia32_size; + *fpstate = (struct _fpstate_ia32 *) sp; + if (save_i387_xstate_ia32(*fpstate) < 0) + return (void __user *) -1L; + } + + sp -= frame_size; + /* Align the stack pointer according to the i386 ABI, + * i.e. so that on function entry ((sp + 4) & 15) == 0. */ + sp = ((sp + 4) & -16ul) - 4; + return (void __user *) sp; +} + +int ia32_setup_frame(int sig, struct k_sigaction *ka, + compat_sigset_t *set, struct pt_regs *regs) +{ + struct sigframe_ia32 __user *frame; + void __user *restorer; + int err = 0; + void __user *fpstate = NULL; + + /* copy_to_user optimizes that into a single 8 byte store */ + static const struct { + u16 poplmovl; + u32 val; + u16 int80; + } __attribute__((packed)) code = { + 0xb858, /* popl %eax ; movl $...,%eax */ + __NR_ia32_sigreturn, + 0x80cd, /* int $0x80 */ + }; + + frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate); + + if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) + return -EFAULT; + + if (__put_user(sig, &frame->sig)) + return -EFAULT; + + if (ia32_setup_sigcontext(&frame->sc, fpstate, regs, set->sig[0])) + return -EFAULT; + + if (_COMPAT_NSIG_WORDS > 1) { + if (__copy_to_user(frame->extramask, &set->sig[1], + sizeof(frame->extramask))) + return -EFAULT; + } + + if (ka->sa.sa_flags & SA_RESTORER) { + restorer = ka->sa.sa_restorer; + } else { + /* Return stub is in 32bit vsyscall page */ + if (current->mm->context.vdso) + restorer = VDSO32_SYMBOL(current->mm->context.vdso, + sigreturn); + else + restorer = &frame->retcode; + } + + put_user_try { + put_user_ex(ptr_to_compat(restorer), &frame->pretcode); + + /* + * These are actually not used anymore, but left because some + * gdb versions depend on them as a marker. + */ + put_user_ex(*((u64 *)&code), (u64 *)frame->retcode); + } put_user_catch(err); + + if (err) + return -EFAULT; + + /* Set up registers for signal handler */ + regs->sp = (unsigned long) frame; + regs->ip = (unsigned long) ka->sa.sa_handler; + + /* Make -mregparm=3 work */ + regs->ax = sig; + regs->dx = 0; + regs->cx = 0; + + loadsegment(ds, __USER32_DS); + loadsegment(es, __USER32_DS); + + regs->cs = __USER32_CS; + regs->ss = __USER32_DS; + + return 0; +} + +int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, + compat_sigset_t *set, struct pt_regs *regs) +{ + struct rt_sigframe_ia32 __user *frame; + void __user *restorer; + int err = 0; + void __user *fpstate = NULL; + + /* __copy_to_user optimizes that into a single 8 byte store */ + static const struct { + u8 movl; + u32 val; + u16 int80; + u8 pad; + } __attribute__((packed)) code = { + 0xb8, + __NR_ia32_rt_sigreturn, + 0x80cd, + 0, + }; + + frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate); + + if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) + return -EFAULT; + + put_user_try { + put_user_ex(sig, &frame->sig); + put_user_ex(ptr_to_compat(&frame->info), &frame->pinfo); + put_user_ex(ptr_to_compat(&frame->uc), &frame->puc); + err |= copy_siginfo_to_user32(&frame->info, info); + + /* Create the ucontext. */ + if (cpu_has_xsave) + put_user_ex(UC_FP_XSTATE, &frame->uc.uc_flags); + else + put_user_ex(0, &frame->uc.uc_flags); + put_user_ex(0, &frame->uc.uc_link); + put_user_ex(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp); + put_user_ex(sas_ss_flags(regs->sp), + &frame->uc.uc_stack.ss_flags); + put_user_ex(current->sas_ss_size, &frame->uc.uc_stack.ss_size); + err |= ia32_setup_sigcontext(&frame->uc.uc_mcontext, fpstate, + regs, set->sig[0]); + err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); + + if (ka->sa.sa_flags & SA_RESTORER) + restorer = ka->sa.sa_restorer; + else + restorer = VDSO32_SYMBOL(current->mm->context.vdso, + rt_sigreturn); + put_user_ex(ptr_to_compat(restorer), &frame->pretcode); + + /* + * Not actually used anymore, but left because some gdb + * versions need it. + */ + put_user_ex(*((u64 *)&code), (u64 *)frame->retcode); + } put_user_catch(err); + + if (err) + return -EFAULT; + + /* Set up registers for signal handler */ + regs->sp = (unsigned long) frame; + regs->ip = (unsigned long) ka->sa.sa_handler; + + /* Make -mregparm=3 work */ + regs->ax = sig; + regs->dx = (unsigned long) &frame->info; + regs->cx = (unsigned long) &frame->uc; + + loadsegment(ds, __USER32_DS); + loadsegment(es, __USER32_DS); + + regs->cs = __USER32_CS; + regs->ss = __USER32_DS; + + return 0; +} diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S new file mode 100644 index 00000000..e3e73400 --- /dev/null +++ b/arch/x86/ia32/ia32entry.S @@ -0,0 +1,493 @@ +/* + * Compatibility mode system call entry point for x86-64. + * + * Copyright 2000-2002 Andi Kleen, SuSE Labs. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* Avoid __ASSEMBLER__'ifying just for this. */ +#include +#define AUDIT_ARCH_I386 (EM_386|__AUDIT_ARCH_LE) +#define __AUDIT_ARCH_LE 0x40000000 + +#ifndef CONFIG_AUDITSYSCALL +#define sysexit_audit ia32_ret_from_sys_call +#define sysretl_audit ia32_ret_from_sys_call +#endif + + .section .entry.text, "ax" + + .macro IA32_ARG_FIXUP noebp=0 + movl %edi,%r8d + .if \noebp + .else + movl %ebp,%r9d + .endif + xchg %ecx,%esi + movl %ebx,%edi + movl %edx,%edx /* zero extension */ + .endm + + /* clobbers %eax */ + .macro CLEAR_RREGS offset=0, _r9=rax + xorl %eax,%eax + movq %rax,\offset+R11(%rsp) + movq %rax,\offset+R10(%rsp) + movq %\_r9,\offset+R9(%rsp) + movq %rax,\offset+R8(%rsp) + .endm + + /* + * Reload arg registers from stack in case ptrace changed them. + * We don't reload %eax because syscall_trace_enter() returned + * the %rax value we should see. Instead, we just truncate that + * value to 32 bits again as we did on entry from user mode. + * If it's a new value set by user_regset during entry tracing, + * this matches the normal truncation of the user-mode value. + * If it's -1 to make us punt the syscall, then (u32)-1 is still + * an appropriately invalid value. + */ + .macro LOAD_ARGS32 offset, _r9=0 + .if \_r9 + movl \offset+16(%rsp),%r9d + .endif + movl \offset+40(%rsp),%ecx + movl \offset+48(%rsp),%edx + movl \offset+56(%rsp),%esi + movl \offset+64(%rsp),%edi + movl %eax,%eax /* zero extension */ + .endm + + .macro CFI_STARTPROC32 simple + CFI_STARTPROC \simple + CFI_UNDEFINED r8 + CFI_UNDEFINED r9 + CFI_UNDEFINED r10 + CFI_UNDEFINED r11 + CFI_UNDEFINED r12 + CFI_UNDEFINED r13 + CFI_UNDEFINED r14 + CFI_UNDEFINED r15 + .endm + +#ifdef CONFIG_PARAVIRT +ENTRY(native_usergs_sysret32) + swapgs + sysretl +ENDPROC(native_usergs_sysret32) + +ENTRY(native_irq_enable_sysexit) + swapgs + sti + sysexit +ENDPROC(native_irq_enable_sysexit) +#endif + +/* + * 32bit SYSENTER instruction entry. + * + * Arguments: + * %eax System call number. + * %ebx Arg1 + * %ecx Arg2 + * %edx Arg3 + * %esi Arg4 + * %edi Arg5 + * %ebp user stack + * 0(%ebp) Arg6 + * + * Interrupts off. + * + * This is purely a fast path. For anything complicated we use the int 0x80 + * path below. Set up a complete hardware stack frame to share code + * with the int 0x80 path. + */ +ENTRY(ia32_sysenter_target) + CFI_STARTPROC32 simple + CFI_SIGNAL_FRAME + CFI_DEF_CFA rsp,0 + CFI_REGISTER rsp,rbp + SWAPGS_UNSAFE_STACK + movq PER_CPU_VAR(kernel_stack), %rsp + addq $(KERNEL_STACK_OFFSET),%rsp + /* + * No need to follow this irqs on/off section: the syscall + * disabled irqs, here we enable it straight after entry: + */ + ENABLE_INTERRUPTS(CLBR_NONE) + movl %ebp,%ebp /* zero extension */ + pushq_cfi $__USER32_DS + /*CFI_REL_OFFSET ss,0*/ + pushq_cfi %rbp + CFI_REL_OFFSET rsp,0 + pushfq_cfi + /*CFI_REL_OFFSET rflags,0*/ + movl TI_sysenter_return+THREAD_INFO(%rsp,3*8-KERNEL_STACK_OFFSET),%r10d + CFI_REGISTER rip,r10 + pushq_cfi $__USER32_CS + /*CFI_REL_OFFSET cs,0*/ + movl %eax, %eax + pushq_cfi %r10 + CFI_REL_OFFSET rip,0 + pushq_cfi %rax + cld + SAVE_ARGS 0,1,0 + /* no need to do an access_ok check here because rbp has been + 32bit zero extended */ +1: movl (%rbp),%ebp + .section __ex_table,"a" + .quad 1b,ia32_badarg + .previous + orl $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET) + testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) + CFI_REMEMBER_STATE + jnz sysenter_tracesys + cmpq $(IA32_NR_syscalls-1),%rax + ja ia32_badsys +sysenter_do_call: + IA32_ARG_FIXUP +sysenter_dispatch: + call *ia32_sys_call_table(,%rax,8) + movq %rax,RAX-ARGOFFSET(%rsp) + DISABLE_INTERRUPTS(CLBR_NONE) + TRACE_IRQS_OFF + testl $_TIF_ALLWORK_MASK,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) + jnz sysexit_audit +sysexit_from_sys_call: + andl $~TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET) + /* clear IF, that popfq doesn't enable interrupts early */ + andl $~0x200,EFLAGS-R11(%rsp) + movl RIP-R11(%rsp),%edx /* User %eip */ + CFI_REGISTER rip,rdx + RESTORE_ARGS 0,24,0,0,0,0 + xorq %r8,%r8 + xorq %r9,%r9 + xorq %r10,%r10 + xorq %r11,%r11 + popfq_cfi + /*CFI_RESTORE rflags*/ + popq_cfi %rcx /* User %esp */ + CFI_REGISTER rsp,rcx + TRACE_IRQS_ON + ENABLE_INTERRUPTS_SYSEXIT32 + +#ifdef CONFIG_AUDITSYSCALL + .macro auditsys_entry_common + movl %esi,%r9d /* 6th arg: 4th syscall arg */ + movl %edx,%r8d /* 5th arg: 3rd syscall arg */ + /* (already in %ecx) 4th arg: 2nd syscall arg */ + movl %ebx,%edx /* 3rd arg: 1st syscall arg */ + movl %eax,%esi /* 2nd arg: syscall number */ + movl $AUDIT_ARCH_I386,%edi /* 1st arg: audit arch */ + call __audit_syscall_entry + movl RAX-ARGOFFSET(%rsp),%eax /* reload syscall number */ + cmpq $(IA32_NR_syscalls-1),%rax + ja ia32_badsys + movl %ebx,%edi /* reload 1st syscall arg */ + movl RCX-ARGOFFSET(%rsp),%esi /* reload 2nd syscall arg */ + movl RDX-ARGOFFSET(%rsp),%edx /* reload 3rd syscall arg */ + movl RSI-ARGOFFSET(%rsp),%ecx /* reload 4th syscall arg */ + movl RDI-ARGOFFSET(%rsp),%r8d /* reload 5th syscall arg */ + .endm + + .macro auditsys_exit exit + testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) + jnz ia32_ret_from_sys_call + TRACE_IRQS_ON + sti + movl %eax,%esi /* second arg, syscall return value */ + cmpl $-MAX_ERRNO,%eax /* is it an error ? */ + jbe 1f + movslq %eax, %rsi /* if error sign extend to 64 bits */ +1: setbe %al /* 1 if error, 0 if not */ + movzbl %al,%edi /* zero-extend that into %edi */ + call __audit_syscall_exit + movq RAX-ARGOFFSET(%rsp),%rax /* reload syscall return value */ + movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi + cli + TRACE_IRQS_OFF + testl %edi,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) + jz \exit + CLEAR_RREGS -ARGOFFSET + jmp int_with_check + .endm + +sysenter_auditsys: + CFI_RESTORE_STATE + auditsys_entry_common + movl %ebp,%r9d /* reload 6th syscall arg */ + jmp sysenter_dispatch + +sysexit_audit: + auditsys_exit sysexit_from_sys_call +#endif + +sysenter_tracesys: +#ifdef CONFIG_AUDITSYSCALL + testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) + jz sysenter_auditsys +#endif + SAVE_REST + CLEAR_RREGS + movq $-ENOSYS,RAX(%rsp)/* ptrace can change this for a bad syscall */ + movq %rsp,%rdi /* &pt_regs -> arg1 */ + call syscall_trace_enter + LOAD_ARGS32 ARGOFFSET /* reload args from stack in case ptrace changed it */ + RESTORE_REST + cmpq $(IA32_NR_syscalls-1),%rax + ja int_ret_from_sys_call /* sysenter_tracesys has set RAX(%rsp) */ + jmp sysenter_do_call + CFI_ENDPROC +ENDPROC(ia32_sysenter_target) + +/* + * 32bit SYSCALL instruction entry. + * + * Arguments: + * %eax System call number. + * %ebx Arg1 + * %ecx return EIP + * %edx Arg3 + * %esi Arg4 + * %edi Arg5 + * %ebp Arg2 [note: not saved in the stack frame, should not be touched] + * %esp user stack + * 0(%esp) Arg6 + * + * Interrupts off. + * + * This is purely a fast path. For anything complicated we use the int 0x80 + * path below. Set up a complete hardware stack frame to share code + * with the int 0x80 path. + */ +ENTRY(ia32_cstar_target) + CFI_STARTPROC32 simple + CFI_SIGNAL_FRAME + CFI_DEF_CFA rsp,KERNEL_STACK_OFFSET + CFI_REGISTER rip,rcx + /*CFI_REGISTER rflags,r11*/ + SWAPGS_UNSAFE_STACK + movl %esp,%r8d + CFI_REGISTER rsp,r8 + movq PER_CPU_VAR(kernel_stack),%rsp + /* + * No need to follow this irqs on/off section: the syscall + * disabled irqs and here we enable it straight after entry: + */ + ENABLE_INTERRUPTS(CLBR_NONE) + SAVE_ARGS 8,0,0 + movl %eax,%eax /* zero extension */ + movq %rax,ORIG_RAX-ARGOFFSET(%rsp) + movq %rcx,RIP-ARGOFFSET(%rsp) + CFI_REL_OFFSET rip,RIP-ARGOFFSET + movq %rbp,RCX-ARGOFFSET(%rsp) /* this lies slightly to ptrace */ + movl %ebp,%ecx + movq $__USER32_CS,CS-ARGOFFSET(%rsp) + movq $__USER32_DS,SS-ARGOFFSET(%rsp) + movq %r11,EFLAGS-ARGOFFSET(%rsp) + /*CFI_REL_OFFSET rflags,EFLAGS-ARGOFFSET*/ + movq %r8,RSP-ARGOFFSET(%rsp) + CFI_REL_OFFSET rsp,RSP-ARGOFFSET + /* no need to do an access_ok check here because r8 has been + 32bit zero extended */ + /* hardware stack frame is complete now */ +1: movl (%r8),%r9d + .section __ex_table,"a" + .quad 1b,ia32_badarg + .previous + orl $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET) + testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) + CFI_REMEMBER_STATE + jnz cstar_tracesys + cmpq $IA32_NR_syscalls-1,%rax + ja ia32_badsys +cstar_do_call: + IA32_ARG_FIXUP 1 +cstar_dispatch: + call *ia32_sys_call_table(,%rax,8) + movq %rax,RAX-ARGOFFSET(%rsp) + DISABLE_INTERRUPTS(CLBR_NONE) + TRACE_IRQS_OFF + testl $_TIF_ALLWORK_MASK,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) + jnz sysretl_audit +sysretl_from_sys_call: + andl $~TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET) + RESTORE_ARGS 0,-ARG_SKIP,0,0,0 + movl RIP-ARGOFFSET(%rsp),%ecx + CFI_REGISTER rip,rcx + movl EFLAGS-ARGOFFSET(%rsp),%r11d + /*CFI_REGISTER rflags,r11*/ + xorq %r10,%r10 + xorq %r9,%r9 + xorq %r8,%r8 + TRACE_IRQS_ON + movl RSP-ARGOFFSET(%rsp),%esp + CFI_RESTORE rsp + USERGS_SYSRET32 + +#ifdef CONFIG_AUDITSYSCALL +cstar_auditsys: + CFI_RESTORE_STATE + movl %r9d,R9-ARGOFFSET(%rsp) /* register to be clobbered by call */ + auditsys_entry_common + movl R9-ARGOFFSET(%rsp),%r9d /* reload 6th syscall arg */ + jmp cstar_dispatch + +sysretl_audit: + auditsys_exit sysretl_from_sys_call +#endif + +cstar_tracesys: +#ifdef CONFIG_AUDITSYSCALL + testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) + jz cstar_auditsys +#endif + xchgl %r9d,%ebp + SAVE_REST + CLEAR_RREGS 0, r9 + movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */ + movq %rsp,%rdi /* &pt_regs -> arg1 */ + call syscall_trace_enter + LOAD_ARGS32 ARGOFFSET, 1 /* reload args from stack in case ptrace changed it */ + RESTORE_REST + xchgl %ebp,%r9d + cmpq $(IA32_NR_syscalls-1),%rax + ja int_ret_from_sys_call /* cstar_tracesys has set RAX(%rsp) */ + jmp cstar_do_call +END(ia32_cstar_target) + +ia32_badarg: + movq $-EFAULT,%rax + jmp ia32_sysret + CFI_ENDPROC + +/* + * Emulated IA32 system calls via int 0x80. + * + * Arguments: + * %eax System call number. + * %ebx Arg1 + * %ecx Arg2 + * %edx Arg3 + * %esi Arg4 + * %edi Arg5 + * %ebp Arg6 [note: not saved in the stack frame, should not be touched] + * + * Notes: + * Uses the same stack frame as the x86-64 version. + * All registers except %eax must be saved (but ptrace may violate that) + * Arguments are zero extended. For system calls that want sign extension and + * take long arguments a wrapper is needed. Most calls can just be called + * directly. + * Assumes it is only called from user space and entered with interrupts off. + */ + +ENTRY(ia32_syscall) + CFI_STARTPROC32 simple + CFI_SIGNAL_FRAME + CFI_DEF_CFA rsp,SS+8-RIP + /*CFI_REL_OFFSET ss,SS-RIP*/ + CFI_REL_OFFSET rsp,RSP-RIP + /*CFI_REL_OFFSET rflags,EFLAGS-RIP*/ + /*CFI_REL_OFFSET cs,CS-RIP*/ + CFI_REL_OFFSET rip,RIP-RIP + PARAVIRT_ADJUST_EXCEPTION_FRAME + SWAPGS + /* + * No need to follow this irqs on/off section: the syscall + * disabled irqs and here we enable it straight after entry: + */ + ENABLE_INTERRUPTS(CLBR_NONE) + movl %eax,%eax + pushq_cfi %rax + cld + /* note the registers are not zero extended to the sf. + this could be a problem. */ + SAVE_ARGS 0,1,0 + orl $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET) + testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) + jnz ia32_tracesys + cmpq $(IA32_NR_syscalls-1),%rax + ja ia32_badsys +ia32_do_call: + IA32_ARG_FIXUP + call *ia32_sys_call_table(,%rax,8) # xxx: rip relative +ia32_sysret: + movq %rax,RAX-ARGOFFSET(%rsp) +ia32_ret_from_sys_call: + CLEAR_RREGS -ARGOFFSET + jmp int_ret_from_sys_call + +ia32_tracesys: + SAVE_REST + CLEAR_RREGS + movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */ + movq %rsp,%rdi /* &pt_regs -> arg1 */ + call syscall_trace_enter + LOAD_ARGS32 ARGOFFSET /* reload args from stack in case ptrace changed it */ + RESTORE_REST + cmpq $(IA32_NR_syscalls-1),%rax + ja int_ret_from_sys_call /* ia32_tracesys has set RAX(%rsp) */ + jmp ia32_do_call +END(ia32_syscall) + +ia32_badsys: + movq $0,ORIG_RAX-ARGOFFSET(%rsp) + movq $-ENOSYS,%rax + jmp ia32_sysret + + CFI_ENDPROC + + .macro PTREGSCALL label, func, arg + ALIGN +GLOBAL(\label) + leaq \func(%rip),%rax + leaq -ARGOFFSET+8(%rsp),\arg /* 8 for return address */ + jmp ia32_ptregs_common + .endm + + CFI_STARTPROC32 + + PTREGSCALL stub32_rt_sigreturn, sys32_rt_sigreturn, %rdi + PTREGSCALL stub32_sigreturn, sys32_sigreturn, %rdi + PTREGSCALL stub32_sigaltstack, sys32_sigaltstack, %rdx + PTREGSCALL stub32_execve, sys32_execve, %rcx + PTREGSCALL stub32_fork, sys_fork, %rdi + PTREGSCALL stub32_clone, sys32_clone, %rdx + PTREGSCALL stub32_vfork, sys_vfork, %rdi + PTREGSCALL stub32_iopl, sys_iopl, %rsi + + ALIGN +ia32_ptregs_common: + popq %r11 + CFI_ENDPROC + CFI_STARTPROC32 simple + CFI_SIGNAL_FRAME + CFI_DEF_CFA rsp,SS+8-ARGOFFSET + CFI_REL_OFFSET rax,RAX-ARGOFFSET + CFI_REL_OFFSET rcx,RCX-ARGOFFSET + CFI_REL_OFFSET rdx,RDX-ARGOFFSET + CFI_REL_OFFSET rsi,RSI-ARGOFFSET + CFI_REL_OFFSET rdi,RDI-ARGOFFSET + CFI_REL_OFFSET rip,RIP-ARGOFFSET +/* CFI_REL_OFFSET cs,CS-ARGOFFSET*/ +/* CFI_REL_OFFSET rflags,EFLAGS-ARGOFFSET*/ + CFI_REL_OFFSET rsp,RSP-ARGOFFSET +/* CFI_REL_OFFSET ss,SS-ARGOFFSET*/ + SAVE_REST + call *%rax + RESTORE_REST + jmp ia32_sysret /* misbalances the return cache */ + CFI_ENDPROC +END(ia32_ptregs_common) diff --git a/arch/x86/ia32/ipc32.c b/arch/x86/ia32/ipc32.c new file mode 100644 index 00000000..29cdcd02 --- /dev/null +++ b/arch/x86/ia32/ipc32.c @@ -0,0 +1,54 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +asmlinkage long sys32_ipc(u32 call, int first, int second, int third, + compat_uptr_t ptr, u32 fifth) +{ + int version; + + version = call >> 16; /* hack for backward compatibility */ + call &= 0xffff; + + switch (call) { + case SEMOP: + /* struct sembuf is the same on 32 and 64bit :)) */ + return sys_semtimedop(first, compat_ptr(ptr), second, NULL); + case SEMTIMEDOP: + return compat_sys_semtimedop(first, compat_ptr(ptr), second, + compat_ptr(fifth)); + case SEMGET: + return sys_semget(first, second, third); + case SEMCTL: + return compat_sys_semctl(first, second, third, compat_ptr(ptr)); + + case MSGSND: + return compat_sys_msgsnd(first, second, third, compat_ptr(ptr)); + case MSGRCV: + return compat_sys_msgrcv(first, second, fifth, third, + version, compat_ptr(ptr)); + case MSGGET: + return sys_msgget((key_t) first, second); + case MSGCTL: + return compat_sys_msgctl(first, second, compat_ptr(ptr)); + + case SHMAT: + return compat_sys_shmat(first, second, third, version, + compat_ptr(ptr)); + case SHMDT: + return sys_shmdt(compat_ptr(ptr)); + case SHMGET: + return sys_shmget(first, (unsigned)second, third); + case SHMCTL: + return compat_sys_shmctl(first, second, compat_ptr(ptr)); + } + return -ENOSYS; +} diff --git a/arch/x86/ia32/nosyscall.c b/arch/x86/ia32/nosyscall.c new file mode 100644 index 00000000..51ecd5b4 --- /dev/null +++ b/arch/x86/ia32/nosyscall.c @@ -0,0 +1,7 @@ +#include +#include + +long compat_ni_syscall(void) +{ + return -ENOSYS; +} diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c new file mode 100644 index 00000000..aec2202a --- /dev/null +++ b/arch/x86/ia32/sys_ia32.c @@ -0,0 +1,516 @@ +/* + * sys_ia32.c: Conversion between 32bit and 64bit native syscalls. Based on + * sys_sparc32 + * + * Copyright (C) 2000 VA Linux Co + * Copyright (C) 2000 Don Dugger + * Copyright (C) 1999 Arun Sharma + * Copyright (C) 1997,1998 Jakub Jelinek (jj@sunsite.mff.cuni.cz) + * Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu) + * Copyright (C) 2000 Hewlett-Packard Co. + * Copyright (C) 2000 David Mosberger-Tang + * Copyright (C) 2000,2001,2002 Andi Kleen, SuSE Labs (x86-64 port) + * + * These routines maintain argument size conversion between 32bit and 64bit + * environment. In 2.5 most of this should be moved to a generic directory. + * + * This file assumes that there is a hole at the end of user address space. + * + * Some of the functions are LE specific currently. These are + * hopefully all marked. This should be fixed. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define AA(__x) ((unsigned long)(__x)) + + +asmlinkage long sys32_truncate64(const char __user *filename, + unsigned long offset_low, + unsigned long offset_high) +{ + return sys_truncate(filename, ((loff_t) offset_high << 32) | offset_low); +} + +asmlinkage long sys32_ftruncate64(unsigned int fd, unsigned long offset_low, + unsigned long offset_high) +{ + return sys_ftruncate(fd, ((loff_t) offset_high << 32) | offset_low); +} + +/* + * Another set for IA32/LFS -- x86_64 struct stat is different due to + * support for 64bit inode numbers. + */ +static int cp_stat64(struct stat64 __user *ubuf, struct kstat *stat) +{ + typeof(ubuf->st_uid) uid = 0; + typeof(ubuf->st_gid) gid = 0; + SET_UID(uid, stat->uid); + SET_GID(gid, stat->gid); + if (!access_ok(VERIFY_WRITE, ubuf, sizeof(struct stat64)) || + __put_user(huge_encode_dev(stat->dev), &ubuf->st_dev) || + __put_user(stat->ino, &ubuf->__st_ino) || + __put_user(stat->ino, &ubuf->st_ino) || + __put_user(stat->mode, &ubuf->st_mode) || + __put_user(stat->nlink, &ubuf->st_nlink) || + __put_user(uid, &ubuf->st_uid) || + __put_user(gid, &ubuf->st_gid) || + __put_user(huge_encode_dev(stat->rdev), &ubuf->st_rdev) || + __put_user(stat->size, &ubuf->st_size) || + __put_user(stat->atime.tv_sec, &ubuf->st_atime) || + __put_user(stat->atime.tv_nsec, &ubuf->st_atime_nsec) || + __put_user(stat->mtime.tv_sec, &ubuf->st_mtime) || + __put_user(stat->mtime.tv_nsec, &ubuf->st_mtime_nsec) || + __put_user(stat->ctime.tv_sec, &ubuf->st_ctime) || + __put_user(stat->ctime.tv_nsec, &ubuf->st_ctime_nsec) || + __put_user(stat->blksize, &ubuf->st_blksize) || + __put_user(stat->blocks, &ubuf->st_blocks)) + return -EFAULT; + return 0; +} + +asmlinkage long sys32_stat64(const char __user *filename, + struct stat64 __user *statbuf) +{ + struct kstat stat; + int ret = vfs_stat(filename, &stat); + + if (!ret) + ret = cp_stat64(statbuf, &stat); + return ret; +} + +asmlinkage long sys32_lstat64(const char __user *filename, + struct stat64 __user *statbuf) +{ + struct kstat stat; + int ret = vfs_lstat(filename, &stat); + if (!ret) + ret = cp_stat64(statbuf, &stat); + return ret; +} + +asmlinkage long sys32_fstat64(unsigned int fd, struct stat64 __user *statbuf) +{ + struct kstat stat; + int ret = vfs_fstat(fd, &stat); + if (!ret) + ret = cp_stat64(statbuf, &stat); + return ret; +} + +asmlinkage long sys32_fstatat(unsigned int dfd, const char __user *filename, + struct stat64 __user *statbuf, int flag) +{ + struct kstat stat; + int error; + + error = vfs_fstatat(dfd, filename, &stat, flag); + if (error) + return error; + return cp_stat64(statbuf, &stat); +} + +/* + * Linux/i386 didn't use to be able to handle more than + * 4 system call parameters, so these system calls used a memory + * block for parameter passing.. + */ + +struct mmap_arg_struct32 { + unsigned int addr; + unsigned int len; + unsigned int prot; + unsigned int flags; + unsigned int fd; + unsigned int offset; +}; + +asmlinkage long sys32_mmap(struct mmap_arg_struct32 __user *arg) +{ + struct mmap_arg_struct32 a; + + if (copy_from_user(&a, arg, sizeof(a))) + return -EFAULT; + + if (a.offset & ~PAGE_MASK) + return -EINVAL; + + return sys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd, + a.offset>>PAGE_SHIFT); +} + +asmlinkage long sys32_mprotect(unsigned long start, size_t len, + unsigned long prot) +{ + return sys_mprotect(start, len, prot); +} + +asmlinkage long sys32_rt_sigaction(int sig, struct sigaction32 __user *act, + struct sigaction32 __user *oact, + unsigned int sigsetsize) +{ + struct k_sigaction new_ka, old_ka; + int ret; + compat_sigset_t set32; + + /* XXX: Don't preclude handling different sized sigset_t's. */ + if (sigsetsize != sizeof(compat_sigset_t)) + return -EINVAL; + + if (act) { + compat_uptr_t handler, restorer; + + if (!access_ok(VERIFY_READ, act, sizeof(*act)) || + __get_user(handler, &act->sa_handler) || + __get_user(new_ka.sa.sa_flags, &act->sa_flags) || + __get_user(restorer, &act->sa_restorer) || + __copy_from_user(&set32, &act->sa_mask, + sizeof(compat_sigset_t))) + return -EFAULT; + new_ka.sa.sa_handler = compat_ptr(handler); + new_ka.sa.sa_restorer = compat_ptr(restorer); + + /* + * FIXME: here we rely on _COMPAT_NSIG_WORS to be >= + * than _NSIG_WORDS << 1 + */ + switch (_NSIG_WORDS) { + case 4: new_ka.sa.sa_mask.sig[3] = set32.sig[6] + | (((long)set32.sig[7]) << 32); + case 3: new_ka.sa.sa_mask.sig[2] = set32.sig[4] + | (((long)set32.sig[5]) << 32); + case 2: new_ka.sa.sa_mask.sig[1] = set32.sig[2] + | (((long)set32.sig[3]) << 32); + case 1: new_ka.sa.sa_mask.sig[0] = set32.sig[0] + | (((long)set32.sig[1]) << 32); + } + } + + ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL); + + if (!ret && oact) { + /* + * FIXME: here we rely on _COMPAT_NSIG_WORS to be >= + * than _NSIG_WORDS << 1 + */ + switch (_NSIG_WORDS) { + case 4: + set32.sig[7] = (old_ka.sa.sa_mask.sig[3] >> 32); + set32.sig[6] = old_ka.sa.sa_mask.sig[3]; + case 3: + set32.sig[5] = (old_ka.sa.sa_mask.sig[2] >> 32); + set32.sig[4] = old_ka.sa.sa_mask.sig[2]; + case 2: + set32.sig[3] = (old_ka.sa.sa_mask.sig[1] >> 32); + set32.sig[2] = old_ka.sa.sa_mask.sig[1]; + case 1: + set32.sig[1] = (old_ka.sa.sa_mask.sig[0] >> 32); + set32.sig[0] = old_ka.sa.sa_mask.sig[0]; + } + if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact)) || + __put_user(ptr_to_compat(old_ka.sa.sa_handler), + &oact->sa_handler) || + __put_user(ptr_to_compat(old_ka.sa.sa_restorer), + &oact->sa_restorer) || + __put_user(old_ka.sa.sa_flags, &oact->sa_flags) || + __copy_to_user(&oact->sa_mask, &set32, + sizeof(compat_sigset_t))) + return -EFAULT; + } + + return ret; +} + +asmlinkage long sys32_sigaction(int sig, struct old_sigaction32 __user *act, + struct old_sigaction32 __user *oact) +{ + struct k_sigaction new_ka, old_ka; + int ret; + + if (act) { + compat_old_sigset_t mask; + compat_uptr_t handler, restorer; + + if (!access_ok(VERIFY_READ, act, sizeof(*act)) || + __get_user(handler, &act->sa_handler) || + __get_user(new_ka.sa.sa_flags, &act->sa_flags) || + __get_user(restorer, &act->sa_restorer) || + __get_user(mask, &act->sa_mask)) + return -EFAULT; + + new_ka.sa.sa_handler = compat_ptr(handler); + new_ka.sa.sa_restorer = compat_ptr(restorer); + + siginitset(&new_ka.sa.sa_mask, mask); + } + + ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL); + + if (!ret && oact) { + if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact)) || + __put_user(ptr_to_compat(old_ka.sa.sa_handler), + &oact->sa_handler) || + __put_user(ptr_to_compat(old_ka.sa.sa_restorer), + &oact->sa_restorer) || + __put_user(old_ka.sa.sa_flags, &oact->sa_flags) || + __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask)) + return -EFAULT; + } + + return ret; +} + +asmlinkage long sys32_alarm(unsigned int seconds) +{ + return alarm_setitimer(seconds); +} + +asmlinkage long sys32_waitpid(compat_pid_t pid, unsigned int *stat_addr, + int options) +{ + return compat_sys_wait4(pid, stat_addr, options, NULL); +} + +/* 32-bit timeval and related flotsam. */ + +asmlinkage long sys32_sysfs(int option, u32 arg1, u32 arg2) +{ + return sys_sysfs(option, arg1, arg2); +} + +asmlinkage long sys32_sched_rr_get_interval(compat_pid_t pid, + struct compat_timespec __user *interval) +{ + struct timespec t; + int ret; + mm_segment_t old_fs = get_fs(); + + set_fs(KERNEL_DS); + ret = sys_sched_rr_get_interval(pid, (struct timespec __user *)&t); + set_fs(old_fs); + if (put_compat_timespec(&t, interval)) + return -EFAULT; + return ret; +} + +asmlinkage long sys32_rt_sigpending(compat_sigset_t __user *set, + compat_size_t sigsetsize) +{ + sigset_t s; + compat_sigset_t s32; + int ret; + mm_segment_t old_fs = get_fs(); + + set_fs(KERNEL_DS); + ret = sys_rt_sigpending((sigset_t __user *)&s, sigsetsize); + set_fs(old_fs); + if (!ret) { + switch (_NSIG_WORDS) { + case 4: s32.sig[7] = (s.sig[3] >> 32); s32.sig[6] = s.sig[3]; + case 3: s32.sig[5] = (s.sig[2] >> 32); s32.sig[4] = s.sig[2]; + case 2: s32.sig[3] = (s.sig[1] >> 32); s32.sig[2] = s.sig[1]; + case 1: s32.sig[1] = (s.sig[0] >> 32); s32.sig[0] = s.sig[0]; + } + if (copy_to_user(set, &s32, sizeof(compat_sigset_t))) + return -EFAULT; + } + return ret; +} + +asmlinkage long sys32_rt_sigqueueinfo(int pid, int sig, + compat_siginfo_t __user *uinfo) +{ + siginfo_t info; + int ret; + mm_segment_t old_fs = get_fs(); + + if (copy_siginfo_from_user32(&info, uinfo)) + return -EFAULT; + set_fs(KERNEL_DS); + ret = sys_rt_sigqueueinfo(pid, sig, (siginfo_t __user *)&info); + set_fs(old_fs); + return ret; +} + +/* warning: next two assume little endian */ +asmlinkage long sys32_pread(unsigned int fd, char __user *ubuf, u32 count, + u32 poslo, u32 poshi) +{ + return sys_pread64(fd, ubuf, count, + ((loff_t)AA(poshi) << 32) | AA(poslo)); +} + +asmlinkage long sys32_pwrite(unsigned int fd, const char __user *ubuf, + u32 count, u32 poslo, u32 poshi) +{ + return sys_pwrite64(fd, ubuf, count, + ((loff_t)AA(poshi) << 32) | AA(poslo)); +} + + +asmlinkage long sys32_personality(unsigned long personality) +{ + int ret; + + if (personality(current->personality) == PER_LINUX32 && + personality == PER_LINUX) + personality = PER_LINUX32; + ret = sys_personality(personality); + if (ret == PER_LINUX32) + ret = PER_LINUX; + return ret; +} + +asmlinkage long sys32_sendfile(int out_fd, int in_fd, + compat_off_t __user *offset, s32 count) +{ + mm_segment_t old_fs = get_fs(); + int ret; + off_t of; + + if (offset && get_user(of, offset)) + return -EFAULT; + + set_fs(KERNEL_DS); + ret = sys_sendfile(out_fd, in_fd, offset ? (off_t __user *)&of : NULL, + count); + set_fs(old_fs); + + if (offset && put_user(of, offset)) + return -EFAULT; + return ret; +} + +asmlinkage long sys32_execve(const char __user *name, compat_uptr_t __user *argv, + compat_uptr_t __user *envp, struct pt_regs *regs) +{ + long error; + char *filename; + + filename = getname(name); + error = PTR_ERR(filename); + if (IS_ERR(filename)) + return error; + error = compat_do_execve(filename, argv, envp, regs); + putname(filename); + return error; +} + +asmlinkage long sys32_clone(unsigned int clone_flags, unsigned int newsp, + struct pt_regs *regs) +{ + void __user *parent_tid = (void __user *)regs->dx; + void __user *child_tid = (void __user *)regs->di; + + if (!newsp) + newsp = regs->sp; + return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid); +} + +/* + * Some system calls that need sign extended arguments. This could be + * done by a generic wrapper. + */ +long sys32_lseek(unsigned int fd, int offset, unsigned int whence) +{ + return sys_lseek(fd, offset, whence); +} + +long sys32_kill(int pid, int sig) +{ + return sys_kill(pid, sig); +} + +long sys32_fadvise64_64(int fd, __u32 offset_low, __u32 offset_high, + __u32 len_low, __u32 len_high, int advice) +{ + return sys_fadvise64_64(fd, + (((u64)offset_high)<<32) | offset_low, + (((u64)len_high)<<32) | len_low, + advice); +} + +long sys32_vm86_warning(void) +{ + struct task_struct *me = current; + static char lastcomm[sizeof(me->comm)]; + + if (strncmp(lastcomm, me->comm, sizeof(lastcomm))) { + compat_printk(KERN_INFO + "%s: vm86 mode not supported on 64 bit kernel\n", + me->comm); + strncpy(lastcomm, me->comm, sizeof(lastcomm)); + } + return -ENOSYS; +} + +long sys32_lookup_dcookie(u32 addr_low, u32 addr_high, + char __user *buf, size_t len) +{ + return sys_lookup_dcookie(((u64)addr_high << 32) | addr_low, buf, len); +} + +asmlinkage ssize_t sys32_readahead(int fd, unsigned off_lo, unsigned off_hi, + size_t count) +{ + return sys_readahead(fd, ((u64)off_hi << 32) | off_lo, count); +} + +asmlinkage long sys32_sync_file_range(int fd, unsigned off_low, unsigned off_hi, + unsigned n_low, unsigned n_hi, int flags) +{ + return sys_sync_file_range(fd, + ((u64)off_hi << 32) | off_low, + ((u64)n_hi << 32) | n_low, flags); +} + +asmlinkage long sys32_fadvise64(int fd, unsigned offset_lo, unsigned offset_hi, + size_t len, int advice) +{ + return sys_fadvise64_64(fd, ((u64)offset_hi << 32) | offset_lo, + len, advice); +} + +asmlinkage long sys32_fallocate(int fd, int mode, unsigned offset_lo, + unsigned offset_hi, unsigned len_lo, + unsigned len_hi) +{ + return sys_fallocate(fd, mode, ((u64)offset_hi << 32) | offset_lo, + ((u64)len_hi << 32) | len_lo); +} + +asmlinkage long sys32_fanotify_mark(int fanotify_fd, unsigned int flags, + u32 mask_lo, u32 mask_hi, + int fd, const char __user *pathname) +{ + return sys_fanotify_mark(fanotify_fd, flags, + ((u64)mask_hi << 32) | mask_lo, + fd, pathname); +} diff --git a/arch/x86/ia32/syscall_ia32.c b/arch/x86/ia32/syscall_ia32.c new file mode 100644 index 00000000..4754ba0f --- /dev/null +++ b/arch/x86/ia32/syscall_ia32.c @@ -0,0 +1,25 @@ +/* System call table for ia32 emulation. */ + +#include +#include +#include +#include + +#define __SYSCALL_I386(nr, sym, compat) extern asmlinkage void compat(void) ; +#include +#undef __SYSCALL_I386 + +#define __SYSCALL_I386(nr, sym, compat) [nr] = compat, + +typedef void (*sys_call_ptr_t)(void); + +extern void compat_ni_syscall(void); + +const sys_call_ptr_t ia32_sys_call_table[__NR_ia32_syscall_max+1] = { + /* + * Smells like a compiler bug -- it doesn't work + * when the & below is removed. + */ + [0 ... __NR_ia32_syscall_max] = &compat_ni_syscall, +#include +}; diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild new file mode 100644 index 00000000..f9c0d3ba --- /dev/null +++ b/arch/x86/include/asm/Kbuild @@ -0,0 +1,28 @@ +include include/asm-generic/Kbuild.asm + +header-y += boot.h +header-y += bootparam.h +header-y += debugreg.h +header-y += e820.h +header-y += hw_breakpoint.h +header-y += hyperv.h +header-y += ist.h +header-y += ldt.h +header-y += mce.h +header-y += msr-index.h +header-y += msr.h +header-y += mtrr.h +header-y += posix_types_32.h +header-y += posix_types_64.h +header-y += posix_types_x32.h +header-y += prctl.h +header-y += processor-flags.h +header-y += ptrace-abi.h +header-y += sigcontext32.h +header-y += ucontext.h +header-y += vm86.h +header-y += vsyscall.h + +genhdr-y += unistd_32.h +genhdr-y += unistd_64.h +genhdr-y += unistd_x32.h diff --git a/arch/x86/include/asm/a.out-core.h b/arch/x86/include/asm/a.out-core.h new file mode 100644 index 00000000..7a15588e --- /dev/null +++ b/arch/x86/include/asm/a.out-core.h @@ -0,0 +1,65 @@ +/* a.out coredump register dumper + * + * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ + +#ifndef _ASM_X86_A_OUT_CORE_H +#define _ASM_X86_A_OUT_CORE_H + +#ifdef __KERNEL__ +#ifdef CONFIG_X86_32 + +#include +#include +#include + +/* + * fill in the user structure for an a.out core dump + */ +static inline void aout_dump_thread(struct pt_regs *regs, struct user *dump) +{ +/* changed the size calculations - should hopefully work better. lbt */ + dump->magic = CMAGIC; + dump->start_code = 0; + dump->start_stack = regs->sp & ~(PAGE_SIZE - 1); + dump->u_tsize = ((unsigned long)current->mm->end_code) >> PAGE_SHIFT; + dump->u_dsize = ((unsigned long)(current->mm->brk + (PAGE_SIZE - 1))) + >> PAGE_SHIFT; + dump->u_dsize -= dump->u_tsize; + dump->u_ssize = 0; + aout_dump_debugregs(dump); + + if (dump->start_stack < TASK_SIZE) + dump->u_ssize = ((unsigned long)(TASK_SIZE - dump->start_stack)) + >> PAGE_SHIFT; + + dump->regs.bx = regs->bx; + dump->regs.cx = regs->cx; + dump->regs.dx = regs->dx; + dump->regs.si = regs->si; + dump->regs.di = regs->di; + dump->regs.bp = regs->bp; + dump->regs.ax = regs->ax; + dump->regs.ds = (u16)regs->ds; + dump->regs.es = (u16)regs->es; + dump->regs.fs = (u16)regs->fs; + dump->regs.gs = get_user_gs(regs); + dump->regs.orig_ax = regs->orig_ax; + dump->regs.ip = regs->ip; + dump->regs.cs = (u16)regs->cs; + dump->regs.flags = regs->flags; + dump->regs.sp = regs->sp; + dump->regs.ss = (u16)regs->ss; + + dump->u_fpvalid = dump_fpu(regs, &dump->i387); +} + +#endif /* CONFIG_X86_32 */ +#endif /* __KERNEL__ */ +#endif /* _ASM_X86_A_OUT_CORE_H */ diff --git a/arch/x86/include/asm/a.out.h b/arch/x86/include/asm/a.out.h new file mode 100644 index 00000000..4684f97a --- /dev/null +++ b/arch/x86/include/asm/a.out.h @@ -0,0 +1,20 @@ +#ifndef _ASM_X86_A_OUT_H +#define _ASM_X86_A_OUT_H + +struct exec +{ + unsigned int a_info; /* Use macros N_MAGIC, etc for access */ + unsigned a_text; /* length of text, in bytes */ + unsigned a_data; /* length of data, in bytes */ + unsigned a_bss; /* length of uninitialized data area for file, in bytes */ + unsigned a_syms; /* length of symbol table data in file, in bytes */ + unsigned a_entry; /* start address */ + unsigned a_trsize; /* length of relocation info for text, in bytes */ + unsigned a_drsize; /* length of relocation info for data, in bytes */ +}; + +#define N_TRSIZE(a) ((a).a_trsize) +#define N_DRSIZE(a) ((a).a_drsize) +#define N_SYMSIZE(a) ((a).a_syms) + +#endif /* _ASM_X86_A_OUT_H */ diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h new file mode 100644 index 00000000..610001d3 --- /dev/null +++ b/arch/x86/include/asm/acpi.h @@ -0,0 +1,193 @@ +#ifndef _ASM_X86_ACPI_H +#define _ASM_X86_ACPI_H + +/* + * Copyright (C) 2001 Paul Diefenbaugh + * Copyright (C) 2001 Patrick Mochel + * + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + */ +#include + +#include +#include +#include +#include +#include + +#define COMPILER_DEPENDENT_INT64 long long +#define COMPILER_DEPENDENT_UINT64 unsigned long long + +/* + * Calling conventions: + * + * ACPI_SYSTEM_XFACE - Interfaces to host OS (handlers, threads) + * ACPI_EXTERNAL_XFACE - External ACPI interfaces + * ACPI_INTERNAL_XFACE - Internal ACPI interfaces + * ACPI_INTERNAL_VAR_XFACE - Internal variable-parameter list interfaces + */ +#define ACPI_SYSTEM_XFACE +#define ACPI_EXTERNAL_XFACE +#define ACPI_INTERNAL_XFACE +#define ACPI_INTERNAL_VAR_XFACE + +/* Asm macros */ + +#define ACPI_ASM_MACROS +#define BREAKPOINT3 +#define ACPI_DISABLE_IRQS() local_irq_disable() +#define ACPI_ENABLE_IRQS() local_irq_enable() +#define ACPI_FLUSH_CPU_CACHE() wbinvd() + +int __acpi_acquire_global_lock(unsigned int *lock); +int __acpi_release_global_lock(unsigned int *lock); + +#define ACPI_ACQUIRE_GLOBAL_LOCK(facs, Acq) \ + ((Acq) = __acpi_acquire_global_lock(&facs->global_lock)) + +#define ACPI_RELEASE_GLOBAL_LOCK(facs, Acq) \ + ((Acq) = __acpi_release_global_lock(&facs->global_lock)) + +/* + * Math helper asm macros + */ +#define ACPI_DIV_64_BY_32(n_hi, n_lo, d32, q32, r32) \ + asm("divl %2;" \ + : "=a"(q32), "=d"(r32) \ + : "r"(d32), \ + "0"(n_lo), "1"(n_hi)) + + +#define ACPI_SHIFT_RIGHT_64(n_hi, n_lo) \ + asm("shrl $1,%2 ;" \ + "rcrl $1,%3;" \ + : "=r"(n_hi), "=r"(n_lo) \ + : "0"(n_hi), "1"(n_lo)) + +#ifdef CONFIG_ACPI +extern int acpi_lapic; +extern int acpi_ioapic; +extern int acpi_noirq; +extern int acpi_strict; +extern int acpi_disabled; +extern int acpi_pci_disabled; +extern int acpi_skip_timer_override; +extern int acpi_use_timer_override; +extern int acpi_fix_pin2_polarity; + +extern u8 acpi_sci_flags; +extern int acpi_sci_override_gsi; +void acpi_pic_sci_set_trigger(unsigned int, u16); + +extern int (*__acpi_register_gsi)(struct device *dev, u32 gsi, + int trigger, int polarity); + +static inline void disable_acpi(void) +{ + acpi_disabled = 1; + acpi_pci_disabled = 1; + acpi_noirq = 1; +} + +extern int acpi_gsi_to_irq(u32 gsi, unsigned int *irq); + +static inline void acpi_noirq_set(void) { acpi_noirq = 1; } +static inline void acpi_disable_pci(void) +{ + acpi_pci_disabled = 1; + acpi_noirq_set(); +} + +/* Low-level suspend routine. */ +extern int acpi_suspend_lowlevel(void); + +extern const unsigned char acpi_wakeup_code[]; +#define acpi_wakeup_address (__pa(TRAMPOLINE_SYM(acpi_wakeup_code))) + +/* early initialization routine */ +extern void acpi_reserve_wakeup_memory(void); + +/* + * Check if the CPU can handle C2 and deeper + */ +static inline unsigned int acpi_processor_cstate_check(unsigned int max_cstate) +{ + /* + * Early models (<=5) of AMD Opterons are not supposed to go into + * C2 state. + * + * Steppings 0x0A and later are good + */ + if (boot_cpu_data.x86 == 0x0F && + boot_cpu_data.x86_vendor == X86_VENDOR_AMD && + boot_cpu_data.x86_model <= 0x05 && + boot_cpu_data.x86_mask < 0x0A) + return 1; + else if (amd_e400_c1e_detected) + return 1; + else + return max_cstate; +} + +static inline bool arch_has_acpi_pdc(void) +{ + struct cpuinfo_x86 *c = &cpu_data(0); + return (c->x86_vendor == X86_VENDOR_INTEL || + c->x86_vendor == X86_VENDOR_CENTAUR); +} + +static inline void arch_acpi_set_pdc_bits(u32 *buf) +{ + struct cpuinfo_x86 *c = &cpu_data(0); + + buf[2] |= ACPI_PDC_C_CAPABILITY_SMP; + + if (cpu_has(c, X86_FEATURE_EST)) + buf[2] |= ACPI_PDC_EST_CAPABILITY_SWSMP; + + if (cpu_has(c, X86_FEATURE_ACPI)) + buf[2] |= ACPI_PDC_T_FFH; + + /* + * If mwait/monitor is unsupported, C2/C3_FFH will be disabled + */ + if (!cpu_has(c, X86_FEATURE_MWAIT)) + buf[2] &= ~(ACPI_PDC_C_C2C3_FFH); +} + +#else /* !CONFIG_ACPI */ + +#define acpi_lapic 0 +#define acpi_ioapic 0 +static inline void acpi_noirq_set(void) { } +static inline void acpi_disable_pci(void) { } +static inline void disable_acpi(void) { } + +#endif /* !CONFIG_ACPI */ + +#define ARCH_HAS_POWER_INIT 1 + +#ifdef CONFIG_ACPI_NUMA +extern int acpi_numa; +extern int x86_acpi_numa_init(void); +#endif /* CONFIG_ACPI_NUMA */ + +#define acpi_unlazy_tlb(x) leave_mm(x) + +#endif /* _ASM_X86_ACPI_H */ diff --git a/arch/x86/include/asm/aes.h b/arch/x86/include/asm/aes.h new file mode 100644 index 00000000..80545a1c --- /dev/null +++ b/arch/x86/include/asm/aes.h @@ -0,0 +1,11 @@ +#ifndef ASM_X86_AES_H +#define ASM_X86_AES_H + +#include +#include + +void crypto_aes_encrypt_x86(struct crypto_aes_ctx *ctx, u8 *dst, + const u8 *src); +void crypto_aes_decrypt_x86(struct crypto_aes_ctx *ctx, u8 *dst, + const u8 *src); +#endif diff --git a/arch/x86/include/asm/agp.h b/arch/x86/include/asm/agp.h new file mode 100644 index 00000000..eec2a70d --- /dev/null +++ b/arch/x86/include/asm/agp.h @@ -0,0 +1,31 @@ +#ifndef _ASM_X86_AGP_H +#define _ASM_X86_AGP_H + +#include +#include + +/* + * Functions to keep the agpgart mappings coherent with the MMU. The + * GART gives the CPU a physical alias of pages in memory. The alias + * region is mapped uncacheable. Make sure there are no conflicting + * mappings with different cachability attributes for the same + * page. This avoids data corruption on some CPUs. + */ + +#define map_page_into_agp(page) set_pages_uc(page, 1) +#define unmap_page_from_agp(page) set_pages_wb(page, 1) + +/* + * Could use CLFLUSH here if the cpu supports it. But then it would + * need to be called for each cacheline of the whole page so it may + * not be worth it. Would need a page for it. + */ +#define flush_agp_cache() wbinvd() + +/* GATT allocation. Returns/accepts GATT kernel virtual address. */ +#define alloc_gatt_pages(order) \ + ((char *)__get_free_pages(GFP_KERNEL, (order))) +#define free_gatt_pages(table, order) \ + free_pages((unsigned long)(table), (order)) + +#endif /* _ASM_X86_AGP_H */ diff --git a/arch/x86/include/asm/alternative-asm.h b/arch/x86/include/asm/alternative-asm.h new file mode 100644 index 00000000..952bd010 --- /dev/null +++ b/arch/x86/include/asm/alternative-asm.h @@ -0,0 +1,26 @@ +#ifdef __ASSEMBLY__ + +#include + +#ifdef CONFIG_SMP + .macro LOCK_PREFIX +672: lock + .section .smp_locks,"a" + .balign 4 + .long 672b - . + .previous + .endm +#else + .macro LOCK_PREFIX + .endm +#endif + +.macro altinstruction_entry orig alt feature orig_len alt_len + .long \orig - . + .long \alt - . + .word \feature + .byte \orig_len + .byte \alt_len +.endm + +#endif /* __ASSEMBLY__ */ diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h new file mode 100644 index 00000000..49331bed --- /dev/null +++ b/arch/x86/include/asm/alternative.h @@ -0,0 +1,195 @@ +#ifndef _ASM_X86_ALTERNATIVE_H +#define _ASM_X86_ALTERNATIVE_H + +#include +#include +#include +#include + +/* + * Alternative inline assembly for SMP. + * + * The LOCK_PREFIX macro defined here replaces the LOCK and + * LOCK_PREFIX macros used everywhere in the source tree. + * + * SMP alternatives use the same data structures as the other + * alternatives and the X86_FEATURE_UP flag to indicate the case of a + * UP system running a SMP kernel. The existing apply_alternatives() + * works fine for patching a SMP kernel for UP. + * + * The SMP alternative tables can be kept after boot and contain both + * UP and SMP versions of the instructions to allow switching back to + * SMP at runtime, when hotplugging in a new CPU, which is especially + * useful in virtualized environments. + * + * The very common lock prefix is handled as special case in a + * separate table which is a pure address list without replacement ptr + * and size information. That keeps the table sizes small. + */ + +#ifdef CONFIG_SMP +#define LOCK_PREFIX_HERE \ + ".section .smp_locks,\"a\"\n" \ + ".balign 4\n" \ + ".long 671f - .\n" /* offset */ \ + ".previous\n" \ + "671:" + +#define LOCK_PREFIX LOCK_PREFIX_HERE "\n\tlock; " + +#else /* ! CONFIG_SMP */ +#define LOCK_PREFIX_HERE "" +#define LOCK_PREFIX "" +#endif + +struct alt_instr { + s32 instr_offset; /* original instruction */ + s32 repl_offset; /* offset to replacement instruction */ + u16 cpuid; /* cpuid bit set for replacement */ + u8 instrlen; /* length of original instruction */ + u8 replacementlen; /* length of new instruction, <= instrlen */ +}; + +extern void alternative_instructions(void); +extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end); + +struct module; + +#ifdef CONFIG_SMP +extern void alternatives_smp_module_add(struct module *mod, char *name, + void *locks, void *locks_end, + void *text, void *text_end); +extern void alternatives_smp_module_del(struct module *mod); +extern void alternatives_smp_switch(int smp); +extern int alternatives_text_reserved(void *start, void *end); +extern bool skip_smp_alternatives; +#else +static inline void alternatives_smp_module_add(struct module *mod, char *name, + void *locks, void *locks_end, + void *text, void *text_end) {} +static inline void alternatives_smp_module_del(struct module *mod) {} +static inline void alternatives_smp_switch(int smp) {} +static inline int alternatives_text_reserved(void *start, void *end) +{ + return 0; +} +#endif /* CONFIG_SMP */ + +/* alternative assembly primitive: */ +#define ALTERNATIVE(oldinstr, newinstr, feature) \ + \ + "661:\n\t" oldinstr "\n662:\n" \ + ".section .altinstructions,\"a\"\n" \ + " .long 661b - .\n" /* label */ \ + " .long 663f - .\n" /* new instruction */ \ + " .word " __stringify(feature) "\n" /* feature bit */ \ + " .byte 662b-661b\n" /* sourcelen */ \ + " .byte 664f-663f\n" /* replacementlen */ \ + ".previous\n" \ + ".section .discard,\"aw\",@progbits\n" \ + " .byte 0xff + (664f-663f) - (662b-661b)\n" /* rlen <= slen */ \ + ".previous\n" \ + ".section .altinstr_replacement, \"ax\"\n" \ + "663:\n\t" newinstr "\n664:\n" /* replacement */ \ + ".previous" + +/* + * This must be included *after* the definition of ALTERNATIVE due to + * + */ +#include + +/* + * Alternative instructions for different CPU types or capabilities. + * + * This allows to use optimized instructions even on generic binary + * kernels. + * + * length of oldinstr must be longer or equal the length of newinstr + * It can be padded with nops as needed. + * + * For non barrier like inlines please define new variants + * without volatile and memory clobber. + */ +#define alternative(oldinstr, newinstr, feature) \ + asm volatile (ALTERNATIVE(oldinstr, newinstr, feature) : : : "memory") + +/* + * Alternative inline assembly with input. + * + * Pecularities: + * No memory clobber here. + * Argument numbers start with 1. + * Best is to use constraints that are fixed size (like (%1) ... "r") + * If you use variable sized constraints like "m" or "g" in the + * replacement make sure to pad to the worst case length. + * Leaving an unused argument 0 to keep API compatibility. + */ +#define alternative_input(oldinstr, newinstr, feature, input...) \ + asm volatile (ALTERNATIVE(oldinstr, newinstr, feature) \ + : : "i" (0), ## input) + +/* Like alternative_input, but with a single output argument */ +#define alternative_io(oldinstr, newinstr, feature, output, input...) \ + asm volatile (ALTERNATIVE(oldinstr, newinstr, feature) \ + : output : "i" (0), ## input) + +/* Like alternative_io, but for replacing a direct call with another one. */ +#define alternative_call(oldfunc, newfunc, feature, output, input...) \ + asm volatile (ALTERNATIVE("call %P[old]", "call %P[new]", feature) \ + : output : [old] "i" (oldfunc), [new] "i" (newfunc), ## input) + +/* + * use this macro(s) if you need more than one output parameter + * in alternative_io + */ +#define ASM_OUTPUT2(a...) a + +/* + * use this macro if you need clobbers but no inputs in + * alternative_{input,io,call}() + */ +#define ASM_NO_INPUT_CLOBBER(clbr...) "i" (0) : clbr + +struct paravirt_patch_site; +#ifdef CONFIG_PARAVIRT +void apply_paravirt(struct paravirt_patch_site *start, + struct paravirt_patch_site *end); +#else +static inline void apply_paravirt(struct paravirt_patch_site *start, + struct paravirt_patch_site *end) +{} +#define __parainstructions NULL +#define __parainstructions_end NULL +#endif + +extern void *text_poke_early(void *addr, const void *opcode, size_t len); + +/* + * Clear and restore the kernel write-protection flag on the local CPU. + * Allows the kernel to edit read-only pages. + * Side-effect: any interrupt handler running between save and restore will have + * the ability to write to read-only pages. + * + * Warning: + * Code patching in the UP case is safe if NMIs and MCE handlers are stopped and + * no thread can be preempted in the instructions being modified (no iret to an + * invalid instruction possible) or if the instructions are changed from a + * consistent state to another consistent state atomically. + * More care must be taken when modifying code in the SMP case because of + * Intel's errata. text_poke_smp() takes care that errata, but still + * doesn't support NMI/MCE handler code modifying. + * On the local CPU you need to be protected again NMI or MCE handlers seeing an + * inconsistent instruction while you patch. + */ +struct text_poke_param { + void *addr; + const void *opcode; + size_t len; +}; + +extern void *text_poke(void *addr, const void *opcode, size_t len); +extern void *text_poke_smp(void *addr, const void *opcode, size_t len); +extern void text_poke_smp_batch(struct text_poke_param *params, int n); + +#endif /* _ASM_X86_ALTERNATIVE_H */ diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h new file mode 100644 index 00000000..49ad773f --- /dev/null +++ b/arch/x86/include/asm/amd_nb.h @@ -0,0 +1,72 @@ +#ifndef _ASM_X86_AMD_NB_H +#define _ASM_X86_AMD_NB_H + +#include +#include + +struct amd_nb_bus_dev_range { + u8 bus; + u8 dev_base; + u8 dev_limit; +}; + +extern const struct pci_device_id amd_nb_misc_ids[]; +extern const struct amd_nb_bus_dev_range amd_nb_bus_dev_ranges[]; + +extern bool early_is_amd_nb(u32 value); +extern struct resource *amd_get_mmconfig_range(struct resource *res); +extern int amd_cache_northbridges(void); +extern void amd_flush_garts(void); +extern int amd_numa_init(void); +extern int amd_get_subcaches(int); +extern int amd_set_subcaches(int, int); + +struct amd_l3_cache { + unsigned indices; + u8 subcaches[4]; +}; + +struct amd_northbridge { + struct pci_dev *misc; + struct pci_dev *link; + struct amd_l3_cache l3_cache; +}; + +struct amd_northbridge_info { + u16 num; + u64 flags; + struct amd_northbridge *nb; +}; +extern struct amd_northbridge_info amd_northbridges; + +#define AMD_NB_GART BIT(0) +#define AMD_NB_L3_INDEX_DISABLE BIT(1) +#define AMD_NB_L3_PARTITIONING BIT(2) + +#ifdef CONFIG_AMD_NB + +static inline u16 amd_nb_num(void) +{ + return amd_northbridges.num; +} + +static inline bool amd_nb_has_feature(unsigned feature) +{ + return ((amd_northbridges.flags & feature) == feature); +} + +static inline struct amd_northbridge *node_to_amd_nb(int node) +{ + return (node < amd_northbridges.num) ? &amd_northbridges.nb[node] : NULL; +} + +#else + +#define amd_nb_num(x) 0 +#define amd_nb_has_feature(x) false +#define node_to_amd_nb(x) NULL + +#endif + + +#endif /* _ASM_X86_AMD_NB_H */ diff --git a/arch/x86/include/asm/apb_timer.h b/arch/x86/include/asm/apb_timer.h new file mode 100644 index 00000000..0acbac29 --- /dev/null +++ b/arch/x86/include/asm/apb_timer.h @@ -0,0 +1,49 @@ +/* + * apb_timer.h: Driver for Langwell APB timer based on Synopsis DesignWare + * + * (C) Copyright 2009 Intel Corporation + * Author: Jacob Pan (jacob.jun.pan@intel.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; version 2 + * of the License. + * + * Note: + */ + +#ifndef ASM_X86_APBT_H +#define ASM_X86_APBT_H +#include + +#ifdef CONFIG_APB_TIMER + +/* default memory mapped register base */ +#define LNW_SCU_ADDR 0xFF100000 +#define LNW_EXT_TIMER_OFFSET 0x1B800 +#define APBT_DEFAULT_BASE (LNW_SCU_ADDR+LNW_EXT_TIMER_OFFSET) +#define LNW_EXT_TIMER_PGOFFSET 0x800 + +/* APBT clock speed range from PCLK to fabric base, 25-100MHz */ +#define APBT_MAX_FREQ 50000000 +#define APBT_MIN_FREQ 1000000 +#define APBT_MMAP_SIZE 1024 + +#define APBT_DEV_USED 1 + +extern void apbt_time_init(void); +extern unsigned long apbt_quick_calibrate(void); +extern int arch_setup_apbt_irqs(int irq, int trigger, int mask, int cpu); +extern void apbt_setup_secondary_clock(void); + +extern struct sfi_timer_table_entry *sfi_get_mtmr(int hint); +extern void sfi_free_mtmr(struct sfi_timer_table_entry *mtmr); +extern int sfi_mtimer_num; + +#else /* CONFIG_APB_TIMER */ + +static inline unsigned long apbt_quick_calibrate(void) {return 0; } +static inline void apbt_time_init(void) { } + +#endif +#endif /* ASM_X86_APBT_H */ diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h new file mode 100644 index 00000000..d8541017 --- /dev/null +++ b/arch/x86/include/asm/apic.h @@ -0,0 +1,636 @@ +#ifndef _ASM_X86_APIC_H +#define _ASM_X86_APIC_H + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#define ARCH_APICTIMER_STOPS_ON_C3 1 + +/* + * Debugging macros + */ +#define APIC_QUIET 0 +#define APIC_VERBOSE 1 +#define APIC_DEBUG 2 + +/* + * Define the default level of output to be very little + * This can be turned up by using apic=verbose for more + * information and apic=debug for _lots_ of information. + * apic_verbosity is defined in apic.c + */ +#define apic_printk(v, s, a...) do { \ + if ((v) <= apic_verbosity) \ + printk(s, ##a); \ + } while (0) + + +#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_32) +extern void generic_apic_probe(void); +#else +static inline void generic_apic_probe(void) +{ +} +#endif + +#ifdef CONFIG_X86_LOCAL_APIC + +extern unsigned int apic_verbosity; +extern int local_apic_timer_c2_ok; + +extern int disable_apic; +extern unsigned int lapic_timer_frequency; + +#ifdef CONFIG_SMP +extern void __inquire_remote_apic(int apicid); +#else /* CONFIG_SMP */ +static inline void __inquire_remote_apic(int apicid) +{ +} +#endif /* CONFIG_SMP */ + +static inline void default_inquire_remote_apic(int apicid) +{ + if (apic_verbosity >= APIC_DEBUG) + __inquire_remote_apic(apicid); +} + +/* + * With 82489DX we can't rely on apic feature bit + * retrieved via cpuid but still have to deal with + * such an apic chip so we assume that SMP configuration + * is found from MP table (64bit case uses ACPI mostly + * which set smp presence flag as well so we are safe + * to use this helper too). + */ +static inline bool apic_from_smp_config(void) +{ + return smp_found_config && !disable_apic; +} + +/* + * Basic functions accessing APICs. + */ +#ifdef CONFIG_PARAVIRT +#include +#endif + +#ifdef CONFIG_X86_64 +extern int is_vsmp_box(void); +#else +static inline int is_vsmp_box(void) +{ + return 0; +} +#endif +extern void xapic_wait_icr_idle(void); +extern u32 safe_xapic_wait_icr_idle(void); +extern void xapic_icr_write(u32, u32); +extern int setup_profiling_timer(unsigned int); + +static inline void native_apic_mem_write(u32 reg, u32 v) +{ + volatile u32 *addr = (volatile u32 *)(APIC_BASE + reg); + + alternative_io("movl %0, %1", "xchgl %0, %1", X86_FEATURE_11AP, + ASM_OUTPUT2("=r" (v), "=m" (*addr)), + ASM_OUTPUT2("0" (v), "m" (*addr))); +} + +static inline u32 native_apic_mem_read(u32 reg) +{ + return *((volatile u32 *)(APIC_BASE + reg)); +} + +extern void native_apic_wait_icr_idle(void); +extern u32 native_safe_apic_wait_icr_idle(void); +extern void native_apic_icr_write(u32 low, u32 id); +extern u64 native_apic_icr_read(void); + +extern int x2apic_mode; + +#ifdef CONFIG_X86_X2APIC +/* + * Make previous memory operations globally visible before + * sending the IPI through x2apic wrmsr. We need a serializing instruction or + * mfence for this. + */ +static inline void x2apic_wrmsr_fence(void) +{ + asm volatile("mfence" : : : "memory"); +} + +static inline void native_apic_msr_write(u32 reg, u32 v) +{ + if (reg == APIC_DFR || reg == APIC_ID || reg == APIC_LDR || + reg == APIC_LVR) + return; + + wrmsr(APIC_BASE_MSR + (reg >> 4), v, 0); +} + +static inline u32 native_apic_msr_read(u32 reg) +{ + u64 msr; + + if (reg == APIC_DFR) + return -1; + + rdmsrl(APIC_BASE_MSR + (reg >> 4), msr); + return (u32)msr; +} + +static inline void native_x2apic_wait_icr_idle(void) +{ + /* no need to wait for icr idle in x2apic */ + return; +} + +static inline u32 native_safe_x2apic_wait_icr_idle(void) +{ + /* no need to wait for icr idle in x2apic */ + return 0; +} + +static inline void native_x2apic_icr_write(u32 low, u32 id) +{ + wrmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), ((__u64) id) << 32 | low); +} + +static inline u64 native_x2apic_icr_read(void) +{ + unsigned long val; + + rdmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), val); + return val; +} + +extern int x2apic_phys; +extern int x2apic_preenabled; +extern void check_x2apic(void); +extern void enable_x2apic(void); +extern void x2apic_icr_write(u32 low, u32 id); +static inline int x2apic_enabled(void) +{ + u64 msr; + + if (!cpu_has_x2apic) + return 0; + + rdmsrl(MSR_IA32_APICBASE, msr); + if (msr & X2APIC_ENABLE) + return 1; + return 0; +} + +#define x2apic_supported() (cpu_has_x2apic) +static inline void x2apic_force_phys(void) +{ + x2apic_phys = 1; +} +#else +static inline void disable_x2apic(void) +{ +} +static inline void check_x2apic(void) +{ +} +static inline void enable_x2apic(void) +{ +} +static inline int x2apic_enabled(void) +{ + return 0; +} +static inline void x2apic_force_phys(void) +{ +} + +#define nox2apic 0 +#define x2apic_preenabled 0 +#define x2apic_supported() 0 +#endif + +extern void enable_IR_x2apic(void); + +extern int get_physical_broadcast(void); + +extern int lapic_get_maxlvt(void); +extern void clear_local_APIC(void); +extern void connect_bsp_APIC(void); +extern void disconnect_bsp_APIC(int virt_wire_setup); +extern void disable_local_APIC(void); +extern void lapic_shutdown(void); +extern int verify_local_APIC(void); +extern void sync_Arb_IDs(void); +extern void init_bsp_APIC(void); +extern void setup_local_APIC(void); +extern void end_local_APIC_setup(void); +extern void bsp_end_local_APIC_setup(void); +extern void init_apic_mappings(void); +void register_lapic_address(unsigned long address); +extern void setup_boot_APIC_clock(void); +extern void setup_secondary_APIC_clock(void); +extern int APIC_init_uniprocessor(void); +extern int apic_force_enable(unsigned long addr); + +/* + * On 32bit this is mach-xxx local + */ +#ifdef CONFIG_X86_64 +extern int apic_is_clustered_box(void); +#else +static inline int apic_is_clustered_box(void) +{ + return 0; +} +#endif + +extern int setup_APIC_eilvt(u8 lvt_off, u8 vector, u8 msg_type, u8 mask); + +#else /* !CONFIG_X86_LOCAL_APIC */ +static inline void lapic_shutdown(void) { } +#define local_apic_timer_c2_ok 1 +static inline void init_apic_mappings(void) { } +static inline void disable_local_APIC(void) { } +# define setup_boot_APIC_clock x86_init_noop +# define setup_secondary_APIC_clock x86_init_noop +#endif /* !CONFIG_X86_LOCAL_APIC */ + +#ifdef CONFIG_X86_64 +#define SET_APIC_ID(x) (apic->set_apic_id(x)) +#else + +#endif + +/* + * Copyright 2004 James Cleverdon, IBM. + * Subject to the GNU Public License, v.2 + * + * Generic APIC sub-arch data struct. + * + * Hacked for x86-64 by James Cleverdon from i386 architecture code by + * Martin Bligh, Andi Kleen, James Bottomley, John Stultz, and + * James Cleverdon. + */ +struct apic { + char *name; + + int (*probe)(void); + int (*acpi_madt_oem_check)(char *oem_id, char *oem_table_id); + int (*apic_id_valid)(int apicid); + int (*apic_id_registered)(void); + + u32 irq_delivery_mode; + u32 irq_dest_mode; + + const struct cpumask *(*target_cpus)(void); + + int disable_esr; + + int dest_logical; + unsigned long (*check_apicid_used)(physid_mask_t *map, int apicid); + unsigned long (*check_apicid_present)(int apicid); + + void (*vector_allocation_domain)(int cpu, struct cpumask *retmask); + void (*init_apic_ldr)(void); + + void (*ioapic_phys_id_map)(physid_mask_t *phys_map, physid_mask_t *retmap); + + void (*setup_apic_routing)(void); + int (*multi_timer_check)(int apic, int irq); + int (*cpu_present_to_apicid)(int mps_cpu); + void (*apicid_to_cpu_present)(int phys_apicid, physid_mask_t *retmap); + void (*setup_portio_remap)(void); + int (*check_phys_apicid_present)(int phys_apicid); + void (*enable_apic_mode)(void); + int (*phys_pkg_id)(int cpuid_apic, int index_msb); + + /* + * When one of the next two hooks returns 1 the apic + * is switched to this. Essentially they are additional + * probe functions: + */ + int (*mps_oem_check)(struct mpc_table *mpc, char *oem, char *productid); + + unsigned int (*get_apic_id)(unsigned long x); + unsigned long (*set_apic_id)(unsigned int id); + unsigned long apic_id_mask; + + unsigned int (*cpu_mask_to_apicid)(const struct cpumask *cpumask); + unsigned int (*cpu_mask_to_apicid_and)(const struct cpumask *cpumask, + const struct cpumask *andmask); + + /* ipi */ + void (*send_IPI_mask)(const struct cpumask *mask, int vector); + void (*send_IPI_mask_allbutself)(const struct cpumask *mask, + int vector); + void (*send_IPI_allbutself)(int vector); + void (*send_IPI_all)(int vector); + void (*send_IPI_self)(int vector); + + /* wakeup_secondary_cpu */ + int (*wakeup_secondary_cpu)(int apicid, unsigned long start_eip); + + int trampoline_phys_low; + int trampoline_phys_high; + + void (*wait_for_init_deassert)(atomic_t *deassert); + void (*smp_callin_clear_local_apic)(void); + void (*inquire_remote_apic)(int apicid); + + /* apic ops */ + u32 (*read)(u32 reg); + void (*write)(u32 reg, u32 v); + u64 (*icr_read)(void); + void (*icr_write)(u32 low, u32 high); + void (*wait_icr_idle)(void); + u32 (*safe_wait_icr_idle)(void); + +#ifdef CONFIG_X86_32 + /* + * Called very early during boot from get_smp_config(). It should + * return the logical apicid. x86_[bios]_cpu_to_apicid is + * initialized before this function is called. + * + * If logical apicid can't be determined that early, the function + * may return BAD_APICID. Logical apicid will be configured after + * init_apic_ldr() while bringing up CPUs. Note that NUMA affinity + * won't be applied properly during early boot in this case. + */ + int (*x86_32_early_logical_apicid)(int cpu); + + /* + * Optional method called from setup_local_APIC() after logical + * apicid is guaranteed to be known to initialize apicid -> node + * mapping if NUMA initialization hasn't done so already. Don't + * add new users. + */ + int (*x86_32_numa_cpu_node)(int cpu); +#endif +}; + +/* + * Pointer to the local APIC driver in use on this system (there's + * always just one such driver in use - the kernel decides via an + * early probing process which one it picks - and then sticks to it): + */ +extern struct apic *apic; + +/* + * APIC drivers are probed based on how they are listed in the .apicdrivers + * section. So the order is important and enforced by the ordering + * of different apic driver files in the Makefile. + * + * For the files having two apic drivers, we use apic_drivers() + * to enforce the order with in them. + */ +#define apic_driver(sym) \ + static struct apic *__apicdrivers_##sym __used \ + __aligned(sizeof(struct apic *)) \ + __section(.apicdrivers) = { &sym } + +#define apic_drivers(sym1, sym2) \ + static struct apic *__apicdrivers_##sym1##sym2[2] __used \ + __aligned(sizeof(struct apic *)) \ + __section(.apicdrivers) = { &sym1, &sym2 } + +extern struct apic *__apicdrivers[], *__apicdrivers_end[]; + +/* + * APIC functionality to boot other CPUs - only used on SMP: + */ +#ifdef CONFIG_SMP +extern atomic_t init_deasserted; +extern int wakeup_secondary_cpu_via_nmi(int apicid, unsigned long start_eip); +#endif + +#ifdef CONFIG_X86_LOCAL_APIC + +static inline u32 apic_read(u32 reg) +{ + return apic->read(reg); +} + +static inline void apic_write(u32 reg, u32 val) +{ + apic->write(reg, val); +} + +static inline u64 apic_icr_read(void) +{ + return apic->icr_read(); +} + +static inline void apic_icr_write(u32 low, u32 high) +{ + apic->icr_write(low, high); +} + +static inline void apic_wait_icr_idle(void) +{ + apic->wait_icr_idle(); +} + +static inline u32 safe_apic_wait_icr_idle(void) +{ + return apic->safe_wait_icr_idle(); +} + +#else /* CONFIG_X86_LOCAL_APIC */ + +static inline u32 apic_read(u32 reg) { return 0; } +static inline void apic_write(u32 reg, u32 val) { } +static inline u64 apic_icr_read(void) { return 0; } +static inline void apic_icr_write(u32 low, u32 high) { } +static inline void apic_wait_icr_idle(void) { } +static inline u32 safe_apic_wait_icr_idle(void) { return 0; } + +#endif /* CONFIG_X86_LOCAL_APIC */ + +static inline void ack_APIC_irq(void) +{ + /* + * ack_APIC_irq() actually gets compiled as a single instruction + * ... yummie. + */ + + /* Docs say use 0 for future compatibility */ + apic_write(APIC_EOI, 0); +} + +static inline unsigned default_get_apic_id(unsigned long x) +{ + unsigned int ver = GET_APIC_VERSION(apic_read(APIC_LVR)); + + if (APIC_XAPIC(ver) || boot_cpu_has(X86_FEATURE_EXTD_APICID)) + return (x >> 24) & 0xFF; + else + return (x >> 24) & 0x0F; +} + +/* + * Warm reset vector default position: + */ +#define DEFAULT_TRAMPOLINE_PHYS_LOW 0x467 +#define DEFAULT_TRAMPOLINE_PHYS_HIGH 0x469 + +#ifdef CONFIG_X86_64 +extern int default_acpi_madt_oem_check(char *, char *); + +extern void apic_send_IPI_self(int vector); + +DECLARE_PER_CPU(int, x2apic_extra_bits); + +extern int default_cpu_present_to_apicid(int mps_cpu); +extern int default_check_phys_apicid_present(int phys_apicid); +#endif + +static inline void default_wait_for_init_deassert(atomic_t *deassert) +{ + while (!atomic_read(deassert)) + cpu_relax(); + return; +} + +extern void generic_bigsmp_probe(void); + + +#ifdef CONFIG_X86_LOCAL_APIC + +#include + +#define APIC_DFR_VALUE (APIC_DFR_FLAT) + +static inline const struct cpumask *default_target_cpus(void) +{ +#ifdef CONFIG_SMP + return cpu_online_mask; +#else + return cpumask_of(0); +#endif +} + +DECLARE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid); + + +static inline unsigned int read_apic_id(void) +{ + unsigned int reg; + + reg = apic_read(APIC_ID); + + return apic->get_apic_id(reg); +} + +static inline int default_apic_id_valid(int apicid) +{ + return (apicid < 255); +} + +extern void default_setup_apic_routing(void); + +extern struct apic apic_noop; + +#ifdef CONFIG_X86_32 + +static inline int noop_x86_32_early_logical_apicid(int cpu) +{ + return BAD_APICID; +} + +/* + * Set up the logical destination ID. + * + * Intel recommends to set DFR, LDR and TPR before enabling + * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel + * document number 292116). So here it goes... + */ +extern void default_init_apic_ldr(void); + +static inline int default_apic_id_registered(void) +{ + return physid_isset(read_apic_id(), phys_cpu_present_map); +} + +static inline int default_phys_pkg_id(int cpuid_apic, int index_msb) +{ + return cpuid_apic >> index_msb; +} + +#endif + +static inline unsigned int +default_cpu_mask_to_apicid(const struct cpumask *cpumask) +{ + return cpumask_bits(cpumask)[0] & APIC_ALL_CPUS; +} + +static inline unsigned int +default_cpu_mask_to_apicid_and(const struct cpumask *cpumask, + const struct cpumask *andmask) +{ + unsigned long mask1 = cpumask_bits(cpumask)[0]; + unsigned long mask2 = cpumask_bits(andmask)[0]; + unsigned long mask3 = cpumask_bits(cpu_online_mask)[0]; + + return (unsigned int)(mask1 & mask2 & mask3); +} + +static inline unsigned long default_check_apicid_used(physid_mask_t *map, int apicid) +{ + return physid_isset(apicid, *map); +} + +static inline unsigned long default_check_apicid_present(int bit) +{ + return physid_isset(bit, phys_cpu_present_map); +} + +static inline void default_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap) +{ + *retmap = *phys_map; +} + +static inline int __default_cpu_present_to_apicid(int mps_cpu) +{ + if (mps_cpu < nr_cpu_ids && cpu_present(mps_cpu)) + return (int)per_cpu(x86_bios_cpu_apicid, mps_cpu); + else + return BAD_APICID; +} + +static inline int +__default_check_phys_apicid_present(int phys_apicid) +{ + return physid_isset(phys_apicid, phys_cpu_present_map); +} + +#ifdef CONFIG_X86_32 +static inline int default_cpu_present_to_apicid(int mps_cpu) +{ + return __default_cpu_present_to_apicid(mps_cpu); +} + +static inline int +default_check_phys_apicid_present(int phys_apicid) +{ + return __default_check_phys_apicid_present(phys_apicid); +} +#else +extern int default_cpu_present_to_apicid(int mps_cpu); +extern int default_check_phys_apicid_present(int phys_apicid); +#endif + +#endif /* CONFIG_X86_LOCAL_APIC */ + +#endif /* _ASM_X86_APIC_H */ diff --git a/arch/x86/include/asm/apic_flat_64.h b/arch/x86/include/asm/apic_flat_64.h new file mode 100644 index 00000000..a2d31279 --- /dev/null +++ b/arch/x86/include/asm/apic_flat_64.h @@ -0,0 +1,7 @@ +#ifndef _ASM_X86_APIC_FLAT_64_H +#define _ASM_X86_APIC_FLAT_64_H + +extern void flat_init_apic_ldr(void); + +#endif + diff --git a/arch/x86/include/asm/apicdef.h b/arch/x86/include/asm/apicdef.h new file mode 100644 index 00000000..134bba00 --- /dev/null +++ b/arch/x86/include/asm/apicdef.h @@ -0,0 +1,445 @@ +#ifndef _ASM_X86_APICDEF_H +#define _ASM_X86_APICDEF_H + +/* + * Constants for various Intel APICs. (local APIC, IOAPIC, etc.) + * + * Alan Cox , 1995. + * Ingo Molnar , 1999, 2000 + */ + +#define IO_APIC_DEFAULT_PHYS_BASE 0xfec00000 +#define APIC_DEFAULT_PHYS_BASE 0xfee00000 + +/* + * This is the IO-APIC register space as specified + * by Intel docs: + */ +#define IO_APIC_SLOT_SIZE 1024 + +#define APIC_ID 0x20 + +#define APIC_LVR 0x30 +#define APIC_LVR_MASK 0xFF00FF +#define APIC_LVR_DIRECTED_EOI (1 << 24) +#define GET_APIC_VERSION(x) ((x) & 0xFFu) +#define GET_APIC_MAXLVT(x) (((x) >> 16) & 0xFFu) +#ifdef CONFIG_X86_32 +# define APIC_INTEGRATED(x) ((x) & 0xF0u) +#else +# define APIC_INTEGRATED(x) (1) +#endif +#define APIC_XAPIC(x) ((x) >= 0x14) +#define APIC_EXT_SPACE(x) ((x) & 0x80000000) +#define APIC_TASKPRI 0x80 +#define APIC_TPRI_MASK 0xFFu +#define APIC_ARBPRI 0x90 +#define APIC_ARBPRI_MASK 0xFFu +#define APIC_PROCPRI 0xA0 +#define APIC_EOI 0xB0 +#define APIC_EIO_ACK 0x0 +#define APIC_RRR 0xC0 +#define APIC_LDR 0xD0 +#define APIC_LDR_MASK (0xFFu << 24) +#define GET_APIC_LOGICAL_ID(x) (((x) >> 24) & 0xFFu) +#define SET_APIC_LOGICAL_ID(x) (((x) << 24)) +#define APIC_ALL_CPUS 0xFFu +#define APIC_DFR 0xE0 +#define APIC_DFR_CLUSTER 0x0FFFFFFFul +#define APIC_DFR_FLAT 0xFFFFFFFFul +#define APIC_SPIV 0xF0 +#define APIC_SPIV_DIRECTED_EOI (1 << 12) +#define APIC_SPIV_FOCUS_DISABLED (1 << 9) +#define APIC_SPIV_APIC_ENABLED (1 << 8) +#define APIC_ISR 0x100 +#define APIC_ISR_NR 0x8 /* Number of 32 bit ISR registers. */ +#define APIC_TMR 0x180 +#define APIC_IRR 0x200 +#define APIC_ESR 0x280 +#define APIC_ESR_SEND_CS 0x00001 +#define APIC_ESR_RECV_CS 0x00002 +#define APIC_ESR_SEND_ACC 0x00004 +#define APIC_ESR_RECV_ACC 0x00008 +#define APIC_ESR_SENDILL 0x00020 +#define APIC_ESR_RECVILL 0x00040 +#define APIC_ESR_ILLREGA 0x00080 +#define APIC_LVTCMCI 0x2f0 +#define APIC_ICR 0x300 +#define APIC_DEST_SELF 0x40000 +#define APIC_DEST_ALLINC 0x80000 +#define APIC_DEST_ALLBUT 0xC0000 +#define APIC_ICR_RR_MASK 0x30000 +#define APIC_ICR_RR_INVALID 0x00000 +#define APIC_ICR_RR_INPROG 0x10000 +#define APIC_ICR_RR_VALID 0x20000 +#define APIC_INT_LEVELTRIG 0x08000 +#define APIC_INT_ASSERT 0x04000 +#define APIC_ICR_BUSY 0x01000 +#define APIC_DEST_LOGICAL 0x00800 +#define APIC_DEST_PHYSICAL 0x00000 +#define APIC_DM_FIXED 0x00000 +#define APIC_DM_FIXED_MASK 0x00700 +#define APIC_DM_LOWEST 0x00100 +#define APIC_DM_SMI 0x00200 +#define APIC_DM_REMRD 0x00300 +#define APIC_DM_NMI 0x00400 +#define APIC_DM_INIT 0x00500 +#define APIC_DM_STARTUP 0x00600 +#define APIC_DM_EXTINT 0x00700 +#define APIC_VECTOR_MASK 0x000FF +#define APIC_ICR2 0x310 +#define GET_APIC_DEST_FIELD(x) (((x) >> 24) & 0xFF) +#define SET_APIC_DEST_FIELD(x) ((x) << 24) +#define APIC_LVTT 0x320 +#define APIC_LVTTHMR 0x330 +#define APIC_LVTPC 0x340 +#define APIC_LVT0 0x350 +#define APIC_LVT_TIMER_BASE_MASK (0x3 << 18) +#define GET_APIC_TIMER_BASE(x) (((x) >> 18) & 0x3) +#define SET_APIC_TIMER_BASE(x) (((x) << 18)) +#define APIC_TIMER_BASE_CLKIN 0x0 +#define APIC_TIMER_BASE_TMBASE 0x1 +#define APIC_TIMER_BASE_DIV 0x2 +#define APIC_LVT_TIMER_ONESHOT (0 << 17) +#define APIC_LVT_TIMER_PERIODIC (1 << 17) +#define APIC_LVT_TIMER_TSCDEADLINE (2 << 17) +#define APIC_LVT_MASKED (1 << 16) +#define APIC_LVT_LEVEL_TRIGGER (1 << 15) +#define APIC_LVT_REMOTE_IRR (1 << 14) +#define APIC_INPUT_POLARITY (1 << 13) +#define APIC_SEND_PENDING (1 << 12) +#define APIC_MODE_MASK 0x700 +#define GET_APIC_DELIVERY_MODE(x) (((x) >> 8) & 0x7) +#define SET_APIC_DELIVERY_MODE(x, y) (((x) & ~0x700) | ((y) << 8)) +#define APIC_MODE_FIXED 0x0 +#define APIC_MODE_NMI 0x4 +#define APIC_MODE_EXTINT 0x7 +#define APIC_LVT1 0x360 +#define APIC_LVTERR 0x370 +#define APIC_TMICT 0x380 +#define APIC_TMCCT 0x390 +#define APIC_TDCR 0x3E0 +#define APIC_SELF_IPI 0x3F0 +#define APIC_TDR_DIV_TMBASE (1 << 2) +#define APIC_TDR_DIV_1 0xB +#define APIC_TDR_DIV_2 0x0 +#define APIC_TDR_DIV_4 0x1 +#define APIC_TDR_DIV_8 0x2 +#define APIC_TDR_DIV_16 0x3 +#define APIC_TDR_DIV_32 0x8 +#define APIC_TDR_DIV_64 0x9 +#define APIC_TDR_DIV_128 0xA +#define APIC_EFEAT 0x400 +#define APIC_ECTRL 0x410 +#define APIC_EILVTn(n) (0x500 + 0x10 * n) +#define APIC_EILVT_NR_AMD_K8 1 /* # of extended interrupts */ +#define APIC_EILVT_NR_AMD_10H 4 +#define APIC_EILVT_NR_MAX APIC_EILVT_NR_AMD_10H +#define APIC_EILVT_LVTOFF(x) (((x) >> 4) & 0xF) +#define APIC_EILVT_MSG_FIX 0x0 +#define APIC_EILVT_MSG_SMI 0x2 +#define APIC_EILVT_MSG_NMI 0x4 +#define APIC_EILVT_MSG_EXT 0x7 +#define APIC_EILVT_MASKED (1 << 16) + +#define APIC_BASE (fix_to_virt(FIX_APIC_BASE)) +#define APIC_BASE_MSR 0x800 +#define XAPIC_ENABLE (1UL << 11) +#define X2APIC_ENABLE (1UL << 10) + +#ifdef CONFIG_X86_32 +# define MAX_IO_APICS 64 +# define MAX_LOCAL_APIC 256 +#else +# define MAX_IO_APICS 128 +# define MAX_LOCAL_APIC 32768 +#endif + +/* + * All x86-64 systems are xAPIC compatible. + * In the following, "apicid" is a physical APIC ID. + */ +#define XAPIC_DEST_CPUS_SHIFT 4 +#define XAPIC_DEST_CPUS_MASK ((1u << XAPIC_DEST_CPUS_SHIFT) - 1) +#define XAPIC_DEST_CLUSTER_MASK (XAPIC_DEST_CPUS_MASK << XAPIC_DEST_CPUS_SHIFT) +#define APIC_CLUSTER(apicid) ((apicid) & XAPIC_DEST_CLUSTER_MASK) +#define APIC_CLUSTERID(apicid) (APIC_CLUSTER(apicid) >> XAPIC_DEST_CPUS_SHIFT) +#define APIC_CPUID(apicid) ((apicid) & XAPIC_DEST_CPUS_MASK) +#define NUM_APIC_CLUSTERS ((BAD_APICID + 1) >> XAPIC_DEST_CPUS_SHIFT) + +/* + * the local APIC register structure, memory mapped. Not terribly well + * tested, but we might eventually use this one in the future - the + * problem why we cannot use it right now is the P5 APIC, it has an + * errata which cannot take 8-bit reads and writes, only 32-bit ones ... + */ +#define u32 unsigned int + +struct local_apic { + +/*000*/ struct { u32 __reserved[4]; } __reserved_01; + +/*010*/ struct { u32 __reserved[4]; } __reserved_02; + +/*020*/ struct { /* APIC ID Register */ + u32 __reserved_1 : 24, + phys_apic_id : 4, + __reserved_2 : 4; + u32 __reserved[3]; + } id; + +/*030*/ const + struct { /* APIC Version Register */ + u32 version : 8, + __reserved_1 : 8, + max_lvt : 8, + __reserved_2 : 8; + u32 __reserved[3]; + } version; + +/*040*/ struct { u32 __reserved[4]; } __reserved_03; + +/*050*/ struct { u32 __reserved[4]; } __reserved_04; + +/*060*/ struct { u32 __reserved[4]; } __reserved_05; + +/*070*/ struct { u32 __reserved[4]; } __reserved_06; + +/*080*/ struct { /* Task Priority Register */ + u32 priority : 8, + __reserved_1 : 24; + u32 __reserved_2[3]; + } tpr; + +/*090*/ const + struct { /* Arbitration Priority Register */ + u32 priority : 8, + __reserved_1 : 24; + u32 __reserved_2[3]; + } apr; + +/*0A0*/ const + struct { /* Processor Priority Register */ + u32 priority : 8, + __reserved_1 : 24; + u32 __reserved_2[3]; + } ppr; + +/*0B0*/ struct { /* End Of Interrupt Register */ + u32 eoi; + u32 __reserved[3]; + } eoi; + +/*0C0*/ struct { u32 __reserved[4]; } __reserved_07; + +/*0D0*/ struct { /* Logical Destination Register */ + u32 __reserved_1 : 24, + logical_dest : 8; + u32 __reserved_2[3]; + } ldr; + +/*0E0*/ struct { /* Destination Format Register */ + u32 __reserved_1 : 28, + model : 4; + u32 __reserved_2[3]; + } dfr; + +/*0F0*/ struct { /* Spurious Interrupt Vector Register */ + u32 spurious_vector : 8, + apic_enabled : 1, + focus_cpu : 1, + __reserved_2 : 22; + u32 __reserved_3[3]; + } svr; + +/*100*/ struct { /* In Service Register */ +/*170*/ u32 bitfield; + u32 __reserved[3]; + } isr [8]; + +/*180*/ struct { /* Trigger Mode Register */ +/*1F0*/ u32 bitfield; + u32 __reserved[3]; + } tmr [8]; + +/*200*/ struct { /* Interrupt Request Register */ +/*270*/ u32 bitfield; + u32 __reserved[3]; + } irr [8]; + +/*280*/ union { /* Error Status Register */ + struct { + u32 send_cs_error : 1, + receive_cs_error : 1, + send_accept_error : 1, + receive_accept_error : 1, + __reserved_1 : 1, + send_illegal_vector : 1, + receive_illegal_vector : 1, + illegal_register_address : 1, + __reserved_2 : 24; + u32 __reserved_3[3]; + } error_bits; + struct { + u32 errors; + u32 __reserved_3[3]; + } all_errors; + } esr; + +/*290*/ struct { u32 __reserved[4]; } __reserved_08; + +/*2A0*/ struct { u32 __reserved[4]; } __reserved_09; + +/*2B0*/ struct { u32 __reserved[4]; } __reserved_10; + +/*2C0*/ struct { u32 __reserved[4]; } __reserved_11; + +/*2D0*/ struct { u32 __reserved[4]; } __reserved_12; + +/*2E0*/ struct { u32 __reserved[4]; } __reserved_13; + +/*2F0*/ struct { u32 __reserved[4]; } __reserved_14; + +/*300*/ struct { /* Interrupt Command Register 1 */ + u32 vector : 8, + delivery_mode : 3, + destination_mode : 1, + delivery_status : 1, + __reserved_1 : 1, + level : 1, + trigger : 1, + __reserved_2 : 2, + shorthand : 2, + __reserved_3 : 12; + u32 __reserved_4[3]; + } icr1; + +/*310*/ struct { /* Interrupt Command Register 2 */ + union { + u32 __reserved_1 : 24, + phys_dest : 4, + __reserved_2 : 4; + u32 __reserved_3 : 24, + logical_dest : 8; + } dest; + u32 __reserved_4[3]; + } icr2; + +/*320*/ struct { /* LVT - Timer */ + u32 vector : 8, + __reserved_1 : 4, + delivery_status : 1, + __reserved_2 : 3, + mask : 1, + timer_mode : 1, + __reserved_3 : 14; + u32 __reserved_4[3]; + } lvt_timer; + +/*330*/ struct { /* LVT - Thermal Sensor */ + u32 vector : 8, + delivery_mode : 3, + __reserved_1 : 1, + delivery_status : 1, + __reserved_2 : 3, + mask : 1, + __reserved_3 : 15; + u32 __reserved_4[3]; + } lvt_thermal; + +/*340*/ struct { /* LVT - Performance Counter */ + u32 vector : 8, + delivery_mode : 3, + __reserved_1 : 1, + delivery_status : 1, + __reserved_2 : 3, + mask : 1, + __reserved_3 : 15; + u32 __reserved_4[3]; + } lvt_pc; + +/*350*/ struct { /* LVT - LINT0 */ + u32 vector : 8, + delivery_mode : 3, + __reserved_1 : 1, + delivery_status : 1, + polarity : 1, + remote_irr : 1, + trigger : 1, + mask : 1, + __reserved_2 : 15; + u32 __reserved_3[3]; + } lvt_lint0; + +/*360*/ struct { /* LVT - LINT1 */ + u32 vector : 8, + delivery_mode : 3, + __reserved_1 : 1, + delivery_status : 1, + polarity : 1, + remote_irr : 1, + trigger : 1, + mask : 1, + __reserved_2 : 15; + u32 __reserved_3[3]; + } lvt_lint1; + +/*370*/ struct { /* LVT - Error */ + u32 vector : 8, + __reserved_1 : 4, + delivery_status : 1, + __reserved_2 : 3, + mask : 1, + __reserved_3 : 15; + u32 __reserved_4[3]; + } lvt_error; + +/*380*/ struct { /* Timer Initial Count Register */ + u32 initial_count; + u32 __reserved_2[3]; + } timer_icr; + +/*390*/ const + struct { /* Timer Current Count Register */ + u32 curr_count; + u32 __reserved_2[3]; + } timer_ccr; + +/*3A0*/ struct { u32 __reserved[4]; } __reserved_16; + +/*3B0*/ struct { u32 __reserved[4]; } __reserved_17; + +/*3C0*/ struct { u32 __reserved[4]; } __reserved_18; + +/*3D0*/ struct { u32 __reserved[4]; } __reserved_19; + +/*3E0*/ struct { /* Timer Divide Configuration Register */ + u32 divisor : 4, + __reserved_1 : 28; + u32 __reserved_2[3]; + } timer_dcr; + +/*3F0*/ struct { u32 __reserved[4]; } __reserved_20; + +} __attribute__ ((packed)); + +#undef u32 + +#ifdef CONFIG_X86_32 + #define BAD_APICID 0xFFu +#else + #define BAD_APICID 0xFFFFu +#endif + +enum ioapic_irq_destination_types { + dest_Fixed = 0, + dest_LowestPrio = 1, + dest_SMI = 2, + dest__reserved_1 = 3, + dest_NMI = 4, + dest_INIT = 5, + dest__reserved_2 = 6, + dest_ExtINT = 7 +}; + +#endif /* _ASM_X86_APICDEF_H */ diff --git a/arch/x86/include/asm/apm.h b/arch/x86/include/asm/apm.h new file mode 100644 index 00000000..20370c6d --- /dev/null +++ b/arch/x86/include/asm/apm.h @@ -0,0 +1,73 @@ +/* + * Machine specific APM BIOS functions for generic. + * Split out from apm.c by Osamu Tomita + */ + +#ifndef _ASM_X86_MACH_DEFAULT_APM_H +#define _ASM_X86_MACH_DEFAULT_APM_H + +#ifdef APM_ZERO_SEGS +# define APM_DO_ZERO_SEGS \ + "pushl %%ds\n\t" \ + "pushl %%es\n\t" \ + "xorl %%edx, %%edx\n\t" \ + "mov %%dx, %%ds\n\t" \ + "mov %%dx, %%es\n\t" \ + "mov %%dx, %%fs\n\t" \ + "mov %%dx, %%gs\n\t" +# define APM_DO_POP_SEGS \ + "popl %%es\n\t" \ + "popl %%ds\n\t" +#else +# define APM_DO_ZERO_SEGS +# define APM_DO_POP_SEGS +#endif + +static inline void apm_bios_call_asm(u32 func, u32 ebx_in, u32 ecx_in, + u32 *eax, u32 *ebx, u32 *ecx, + u32 *edx, u32 *esi) +{ + /* + * N.B. We do NOT need a cld after the BIOS call + * because we always save and restore the flags. + */ + __asm__ __volatile__(APM_DO_ZERO_SEGS + "pushl %%edi\n\t" + "pushl %%ebp\n\t" + "lcall *%%cs:apm_bios_entry\n\t" + "setc %%al\n\t" + "popl %%ebp\n\t" + "popl %%edi\n\t" + APM_DO_POP_SEGS + : "=a" (*eax), "=b" (*ebx), "=c" (*ecx), "=d" (*edx), + "=S" (*esi) + : "a" (func), "b" (ebx_in), "c" (ecx_in) + : "memory", "cc"); +} + +static inline u8 apm_bios_call_simple_asm(u32 func, u32 ebx_in, + u32 ecx_in, u32 *eax) +{ + int cx, dx, si; + u8 error; + + /* + * N.B. We do NOT need a cld after the BIOS call + * because we always save and restore the flags. + */ + __asm__ __volatile__(APM_DO_ZERO_SEGS + "pushl %%edi\n\t" + "pushl %%ebp\n\t" + "lcall *%%cs:apm_bios_entry\n\t" + "setc %%bl\n\t" + "popl %%ebp\n\t" + "popl %%edi\n\t" + APM_DO_POP_SEGS + : "=a" (*eax), "=b" (error), "=c" (cx), "=d" (dx), + "=S" (si) + : "a" (func), "b" (ebx_in), "c" (ecx_in) + : "memory", "cc"); + return error; +} + +#endif /* _ASM_X86_MACH_DEFAULT_APM_H */ diff --git a/arch/x86/include/asm/arch_hweight.h b/arch/x86/include/asm/arch_hweight.h new file mode 100644 index 00000000..9686c3d9 --- /dev/null +++ b/arch/x86/include/asm/arch_hweight.h @@ -0,0 +1,61 @@ +#ifndef _ASM_X86_HWEIGHT_H +#define _ASM_X86_HWEIGHT_H + +#ifdef CONFIG_64BIT +/* popcnt %edi, %eax -- redundant REX prefix for alignment */ +#define POPCNT32 ".byte 0xf3,0x40,0x0f,0xb8,0xc7" +/* popcnt %rdi, %rax */ +#define POPCNT64 ".byte 0xf3,0x48,0x0f,0xb8,0xc7" +#define REG_IN "D" +#define REG_OUT "a" +#else +/* popcnt %eax, %eax */ +#define POPCNT32 ".byte 0xf3,0x0f,0xb8,0xc0" +#define REG_IN "a" +#define REG_OUT "a" +#endif + +/* + * __sw_hweightXX are called from within the alternatives below + * and callee-clobbered registers need to be taken care of. See + * ARCH_HWEIGHT_CFLAGS in for the respective + * compiler switches. + */ +static inline unsigned int __arch_hweight32(unsigned int w) +{ + unsigned int res = 0; + + asm (ALTERNATIVE("call __sw_hweight32", POPCNT32, X86_FEATURE_POPCNT) + : "="REG_OUT (res) + : REG_IN (w)); + + return res; +} + +static inline unsigned int __arch_hweight16(unsigned int w) +{ + return __arch_hweight32(w & 0xffff); +} + +static inline unsigned int __arch_hweight8(unsigned int w) +{ + return __arch_hweight32(w & 0xff); +} + +static inline unsigned long __arch_hweight64(__u64 w) +{ + unsigned long res = 0; + +#ifdef CONFIG_X86_32 + return __arch_hweight32((u32)w) + + __arch_hweight32((u32)(w >> 32)); +#else + asm (ALTERNATIVE("call __sw_hweight64", POPCNT64, X86_FEATURE_POPCNT) + : "="REG_OUT (res) + : REG_IN (w)); +#endif /* CONFIG_X86_32 */ + + return res; +} + +#endif diff --git a/arch/x86/include/asm/archrandom.h b/arch/x86/include/asm/archrandom.h new file mode 100644 index 00000000..0d9ec770 --- /dev/null +++ b/arch/x86/include/asm/archrandom.h @@ -0,0 +1,75 @@ +/* + * This file is part of the Linux kernel. + * + * Copyright (c) 2011, Intel Corporation + * Authors: Fenghua Yu , + * H. Peter Anvin + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + * + */ + +#ifndef ASM_X86_ARCHRANDOM_H +#define ASM_X86_ARCHRANDOM_H + +#include +#include +#include +#include + +#define RDRAND_RETRY_LOOPS 10 + +#define RDRAND_INT ".byte 0x0f,0xc7,0xf0" +#ifdef CONFIG_X86_64 +# define RDRAND_LONG ".byte 0x48,0x0f,0xc7,0xf0" +#else +# define RDRAND_LONG RDRAND_INT +#endif + +#ifdef CONFIG_ARCH_RANDOM + +#define GET_RANDOM(name, type, rdrand, nop) \ +static inline int name(type *v) \ +{ \ + int ok; \ + alternative_io("movl $0, %0\n\t" \ + nop, \ + "\n1: " rdrand "\n\t" \ + "jc 2f\n\t" \ + "decl %0\n\t" \ + "jnz 1b\n\t" \ + "2:", \ + X86_FEATURE_RDRAND, \ + ASM_OUTPUT2("=r" (ok), "=a" (*v)), \ + "0" (RDRAND_RETRY_LOOPS)); \ + return ok; \ +} + +#ifdef CONFIG_X86_64 + +GET_RANDOM(arch_get_random_long, unsigned long, RDRAND_LONG, ASM_NOP5); +GET_RANDOM(arch_get_random_int, unsigned int, RDRAND_INT, ASM_NOP4); + +#else + +GET_RANDOM(arch_get_random_long, unsigned long, RDRAND_LONG, ASM_NOP3); +GET_RANDOM(arch_get_random_int, unsigned int, RDRAND_INT, ASM_NOP3); + +#endif /* CONFIG_X86_64 */ + +#endif /* CONFIG_ARCH_RANDOM */ + +extern void x86_init_rdrand(struct cpuinfo_x86 *c); + +#endif /* ASM_X86_ARCHRANDOM_H */ diff --git a/arch/x86/include/asm/asm-offsets.h b/arch/x86/include/asm/asm-offsets.h new file mode 100644 index 00000000..d370ee36 --- /dev/null +++ b/arch/x86/include/asm/asm-offsets.h @@ -0,0 +1 @@ +#include diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h new file mode 100644 index 00000000..9412d655 --- /dev/null +++ b/arch/x86/include/asm/asm.h @@ -0,0 +1,58 @@ +#ifndef _ASM_X86_ASM_H +#define _ASM_X86_ASM_H + +#ifdef __ASSEMBLY__ +# define __ASM_FORM(x) x +# define __ASM_FORM_COMMA(x) x, +# define __ASM_EX_SEC .section __ex_table, "a" +#else +# define __ASM_FORM(x) " " #x " " +# define __ASM_FORM_COMMA(x) " " #x "," +# define __ASM_EX_SEC " .section __ex_table,\"a\"\n" +#endif + +#ifdef CONFIG_X86_32 +# define __ASM_SEL(a,b) __ASM_FORM(a) +#else +# define __ASM_SEL(a,b) __ASM_FORM(b) +#endif + +#define __ASM_SIZE(inst, ...) __ASM_SEL(inst##l##__VA_ARGS__, \ + inst##q##__VA_ARGS__) +#define __ASM_REG(reg) __ASM_SEL(e##reg, r##reg) + +#define _ASM_PTR __ASM_SEL(.long, .quad) +#define _ASM_ALIGN __ASM_SEL(.balign 4, .balign 8) + +#define _ASM_MOV __ASM_SIZE(mov) +#define _ASM_INC __ASM_SIZE(inc) +#define _ASM_DEC __ASM_SIZE(dec) +#define _ASM_ADD __ASM_SIZE(add) +#define _ASM_SUB __ASM_SIZE(sub) +#define _ASM_XADD __ASM_SIZE(xadd) + +#define _ASM_AX __ASM_REG(ax) +#define _ASM_BX __ASM_REG(bx) +#define _ASM_CX __ASM_REG(cx) +#define _ASM_DX __ASM_REG(dx) +#define _ASM_SP __ASM_REG(sp) +#define _ASM_BP __ASM_REG(bp) +#define _ASM_SI __ASM_REG(si) +#define _ASM_DI __ASM_REG(di) + +/* Exception table entry */ +#ifdef __ASSEMBLY__ +# define _ASM_EXTABLE(from,to) \ + __ASM_EX_SEC ; \ + _ASM_ALIGN ; \ + _ASM_PTR from , to ; \ + .previous +#else +# define _ASM_EXTABLE(from,to) \ + __ASM_EX_SEC \ + _ASM_ALIGN "\n" \ + _ASM_PTR #from "," #to "\n" \ + " .previous\n" +#endif + +#endif /* _ASM_X86_ASM_H */ diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h new file mode 100644 index 00000000..58cb6d40 --- /dev/null +++ b/arch/x86/include/asm/atomic.h @@ -0,0 +1,317 @@ +#ifndef _ASM_X86_ATOMIC_H +#define _ASM_X86_ATOMIC_H + +#include +#include +#include +#include +#include + +/* + * Atomic operations that C can't guarantee us. Useful for + * resource counting etc.. + */ + +#define ATOMIC_INIT(i) { (i) } + +/** + * atomic_read - read atomic variable + * @v: pointer of type atomic_t + * + * Atomically reads the value of @v. + */ +static inline int atomic_read(const atomic_t *v) +{ + return (*(volatile int *)&(v)->counter); +} + +/** + * atomic_set - set atomic variable + * @v: pointer of type atomic_t + * @i: required value + * + * Atomically sets the value of @v to @i. + */ +static inline void atomic_set(atomic_t *v, int i) +{ + v->counter = i; +} + +/** + * atomic_add - add integer to atomic variable + * @i: integer value to add + * @v: pointer of type atomic_t + * + * Atomically adds @i to @v. + */ +static inline void atomic_add(int i, atomic_t *v) +{ + asm volatile(LOCK_PREFIX "addl %1,%0" + : "+m" (v->counter) + : "ir" (i)); +} + +/** + * atomic_sub - subtract integer from atomic variable + * @i: integer value to subtract + * @v: pointer of type atomic_t + * + * Atomically subtracts @i from @v. + */ +static inline void atomic_sub(int i, atomic_t *v) +{ + asm volatile(LOCK_PREFIX "subl %1,%0" + : "+m" (v->counter) + : "ir" (i)); +} + +/** + * atomic_sub_and_test - subtract value from variable and test result + * @i: integer value to subtract + * @v: pointer of type atomic_t + * + * Atomically subtracts @i from @v and returns + * true if the result is zero, or false for all + * other cases. + */ +static inline int atomic_sub_and_test(int i, atomic_t *v) +{ + unsigned char c; + + asm volatile(LOCK_PREFIX "subl %2,%0; sete %1" + : "+m" (v->counter), "=qm" (c) + : "ir" (i) : "memory"); + return c; +} + +/** + * atomic_inc - increment atomic variable + * @v: pointer of type atomic_t + * + * Atomically increments @v by 1. + */ +static inline void atomic_inc(atomic_t *v) +{ + asm volatile(LOCK_PREFIX "incl %0" + : "+m" (v->counter)); +} + +/** + * atomic_dec - decrement atomic variable + * @v: pointer of type atomic_t + * + * Atomically decrements @v by 1. + */ +static inline void atomic_dec(atomic_t *v) +{ + asm volatile(LOCK_PREFIX "decl %0" + : "+m" (v->counter)); +} + +/** + * atomic_dec_and_test - decrement and test + * @v: pointer of type atomic_t + * + * Atomically decrements @v by 1 and + * returns true if the result is 0, or false for all other + * cases. + */ +static inline int atomic_dec_and_test(atomic_t *v) +{ + unsigned char c; + + asm volatile(LOCK_PREFIX "decl %0; sete %1" + : "+m" (v->counter), "=qm" (c) + : : "memory"); + return c != 0; +} + +/** + * atomic_inc_and_test - increment and test + * @v: pointer of type atomic_t + * + * Atomically increments @v by 1 + * and returns true if the result is zero, or false for all + * other cases. + */ +static inline int atomic_inc_and_test(atomic_t *v) +{ + unsigned char c; + + asm volatile(LOCK_PREFIX "incl %0; sete %1" + : "+m" (v->counter), "=qm" (c) + : : "memory"); + return c != 0; +} + +/** + * atomic_add_negative - add and test if negative + * @i: integer value to add + * @v: pointer of type atomic_t + * + * Atomically adds @i to @v and returns true + * if the result is negative, or false when + * result is greater than or equal to zero. + */ +static inline int atomic_add_negative(int i, atomic_t *v) +{ + unsigned char c; + + asm volatile(LOCK_PREFIX "addl %2,%0; sets %1" + : "+m" (v->counter), "=qm" (c) + : "ir" (i) : "memory"); + return c; +} + +/** + * atomic_add_return - add integer and return + * @i: integer value to add + * @v: pointer of type atomic_t + * + * Atomically adds @i to @v and returns @i + @v + */ +static inline int atomic_add_return(int i, atomic_t *v) +{ +#ifdef CONFIG_M386 + int __i; + unsigned long flags; + if (unlikely(boot_cpu_data.x86 <= 3)) + goto no_xadd; +#endif + /* Modern 486+ processor */ + return i + xadd(&v->counter, i); + +#ifdef CONFIG_M386 +no_xadd: /* Legacy 386 processor */ + raw_local_irq_save(flags); + __i = atomic_read(v); + atomic_set(v, i + __i); + raw_local_irq_restore(flags); + return i + __i; +#endif +} + +/** + * atomic_sub_return - subtract integer and return + * @v: pointer of type atomic_t + * @i: integer value to subtract + * + * Atomically subtracts @i from @v and returns @v - @i + */ +static inline int atomic_sub_return(int i, atomic_t *v) +{ + return atomic_add_return(-i, v); +} + +#define atomic_inc_return(v) (atomic_add_return(1, v)) +#define atomic_dec_return(v) (atomic_sub_return(1, v)) + +static inline int atomic_cmpxchg(atomic_t *v, int old, int new) +{ + return cmpxchg(&v->counter, old, new); +} + +static inline int atomic_xchg(atomic_t *v, int new) +{ + return xchg(&v->counter, new); +} + +/** + * __atomic_add_unless - add unless the number is already a given value + * @v: pointer of type atomic_t + * @a: the amount to add to v... + * @u: ...unless v is equal to u. + * + * Atomically adds @a to @v, so long as @v was not already @u. + * Returns the old value of @v. + */ +static inline int __atomic_add_unless(atomic_t *v, int a, int u) +{ + int c, old; + c = atomic_read(v); + for (;;) { + if (unlikely(c == (u))) + break; + old = atomic_cmpxchg((v), c, c + (a)); + if (likely(old == c)) + break; + c = old; + } + return c; +} + + +/* + * atomic_dec_if_positive - decrement by 1 if old value positive + * @v: pointer of type atomic_t + * + * The function returns the old value of *v minus 1, even if + * the atomic variable, v, was not decremented. + */ +static inline int atomic_dec_if_positive(atomic_t *v) +{ + int c, old, dec; + c = atomic_read(v); + for (;;) { + dec = c - 1; + if (unlikely(dec < 0)) + break; + old = atomic_cmpxchg((v), c, dec); + if (likely(old == c)) + break; + c = old; + } + return dec; +} + +/** + * atomic_inc_short - increment of a short integer + * @v: pointer to type int + * + * Atomically adds 1 to @v + * Returns the new value of @u + */ +static inline short int atomic_inc_short(short int *v) +{ + asm(LOCK_PREFIX "addw $1, %0" : "+m" (*v)); + return *v; +} + +#ifdef CONFIG_X86_64 +/** + * atomic_or_long - OR of two long integers + * @v1: pointer to type unsigned long + * @v2: pointer to type unsigned long + * + * Atomically ORs @v1 and @v2 + * Returns the result of the OR + */ +static inline void atomic_or_long(unsigned long *v1, unsigned long v2) +{ + asm(LOCK_PREFIX "orq %1, %0" : "+m" (*v1) : "r" (v2)); +} +#endif + +/* These are x86-specific, used by some header files */ +#define atomic_clear_mask(mask, addr) \ + asm volatile(LOCK_PREFIX "andl %0,%1" \ + : : "r" (~(mask)), "m" (*(addr)) : "memory") + +#define atomic_set_mask(mask, addr) \ + asm volatile(LOCK_PREFIX "orl %0,%1" \ + : : "r" ((unsigned)(mask)), "m" (*(addr)) \ + : "memory") + +/* Atomic operations are already serializing on x86 */ +#define smp_mb__before_atomic_dec() barrier() +#define smp_mb__after_atomic_dec() barrier() +#define smp_mb__before_atomic_inc() barrier() +#define smp_mb__after_atomic_inc() barrier() + +#ifdef CONFIG_X86_32 +# include "atomic64_32.h" +#else +# include "atomic64_64.h" +#endif + +#endif /* _ASM_X86_ATOMIC_H */ diff --git a/arch/x86/include/asm/atomic64_32.h b/arch/x86/include/asm/atomic64_32.h new file mode 100644 index 00000000..19811991 --- /dev/null +++ b/arch/x86/include/asm/atomic64_32.h @@ -0,0 +1,316 @@ +#ifndef _ASM_X86_ATOMIC64_32_H +#define _ASM_X86_ATOMIC64_32_H + +#include +#include +#include +//#include + +/* An 64bit atomic type */ + +typedef struct { + u64 __aligned(8) counter; +} atomic64_t; + +#define ATOMIC64_INIT(val) { (val) } + +#define __ATOMIC64_DECL(sym) void atomic64_##sym(atomic64_t *, ...) +#ifndef ATOMIC64_EXPORT +#define ATOMIC64_DECL_ONE __ATOMIC64_DECL +#else +#define ATOMIC64_DECL_ONE(sym) __ATOMIC64_DECL(sym); \ + ATOMIC64_EXPORT(atomic64_##sym) +#endif + +#ifdef CONFIG_X86_CMPXCHG64 +#define __alternative_atomic64(f, g, out, in...) \ + asm volatile("call %P[func]" \ + : out : [func] "i" (atomic64_##g##_cx8), ## in) + +#define ATOMIC64_DECL(sym) ATOMIC64_DECL_ONE(sym##_cx8) +#else +#define __alternative_atomic64(f, g, out, in...) \ + alternative_call(atomic64_##f##_386, atomic64_##g##_cx8, \ + X86_FEATURE_CX8, ASM_OUTPUT2(out), ## in) + +#define ATOMIC64_DECL(sym) ATOMIC64_DECL_ONE(sym##_cx8); \ + ATOMIC64_DECL_ONE(sym##_386) + +ATOMIC64_DECL_ONE(add_386); +ATOMIC64_DECL_ONE(sub_386); +ATOMIC64_DECL_ONE(inc_386); +ATOMIC64_DECL_ONE(dec_386); +#endif + +#define alternative_atomic64(f, out, in...) \ + __alternative_atomic64(f, f, ASM_OUTPUT2(out), ## in) + +ATOMIC64_DECL(read); +ATOMIC64_DECL(set); +ATOMIC64_DECL(xchg); +ATOMIC64_DECL(add_return); +ATOMIC64_DECL(sub_return); +ATOMIC64_DECL(inc_return); +ATOMIC64_DECL(dec_return); +ATOMIC64_DECL(dec_if_positive); +ATOMIC64_DECL(inc_not_zero); +ATOMIC64_DECL(add_unless); + +#undef ATOMIC64_DECL +#undef ATOMIC64_DECL_ONE +#undef __ATOMIC64_DECL +#undef ATOMIC64_EXPORT + +/** + * atomic64_cmpxchg - cmpxchg atomic64 variable + * @p: pointer to type atomic64_t + * @o: expected value + * @n: new value + * + * Atomically sets @v to @n if it was equal to @o and returns + * the old value. + */ + +static inline long long atomic64_cmpxchg(atomic64_t *v, long long o, long long n) +{ + return cmpxchg64(&v->counter, o, n); +} + +/** + * atomic64_xchg - xchg atomic64 variable + * @v: pointer to type atomic64_t + * @n: value to assign + * + * Atomically xchgs the value of @v to @n and returns + * the old value. + */ +static inline long long atomic64_xchg(atomic64_t *v, long long n) +{ + long long o; + unsigned high = (unsigned)(n >> 32); + unsigned low = (unsigned)n; + alternative_atomic64(xchg, "=&A" (o), + "S" (v), "b" (low), "c" (high) + : "memory"); + return o; +} + +/** + * atomic64_set - set atomic64 variable + * @v: pointer to type atomic64_t + * @n: value to assign + * + * Atomically sets the value of @v to @n. + */ +static inline void atomic64_set(atomic64_t *v, long long i) +{ + unsigned high = (unsigned)(i >> 32); + unsigned low = (unsigned)i; + alternative_atomic64(set, /* no output */, + "S" (v), "b" (low), "c" (high) + : "eax", "edx", "memory"); +} + +/** + * atomic64_read - read atomic64 variable + * @v: pointer to type atomic64_t + * + * Atomically reads the value of @v and returns it. + */ +static inline long long atomic64_read(const atomic64_t *v) +{ + long long r; + alternative_atomic64(read, "=&A" (r), "c" (v) : "memory"); + return r; + } + +/** + * atomic64_add_return - add and return + * @i: integer value to add + * @v: pointer to type atomic64_t + * + * Atomically adds @i to @v and returns @i + *@v + */ +static inline long long atomic64_add_return(long long i, atomic64_t *v) +{ + alternative_atomic64(add_return, + ASM_OUTPUT2("+A" (i), "+c" (v)), + ASM_NO_INPUT_CLOBBER("memory")); + return i; +} + +/* + * Other variants with different arithmetic operators: + */ +static inline long long atomic64_sub_return(long long i, atomic64_t *v) +{ + alternative_atomic64(sub_return, + ASM_OUTPUT2("+A" (i), "+c" (v)), + ASM_NO_INPUT_CLOBBER("memory")); + return i; +} + +static inline long long atomic64_inc_return(atomic64_t *v) +{ + long long a; + alternative_atomic64(inc_return, "=&A" (a), + "S" (v) : "memory", "ecx"); + return a; +} + +static inline long long atomic64_dec_return(atomic64_t *v) +{ + long long a; + alternative_atomic64(dec_return, "=&A" (a), + "S" (v) : "memory", "ecx"); + return a; +} + +/** + * atomic64_add - add integer to atomic64 variable + * @i: integer value to add + * @v: pointer to type atomic64_t + * + * Atomically adds @i to @v. + */ +static inline long long atomic64_add(long long i, atomic64_t *v) +{ + __alternative_atomic64(add, add_return, + ASM_OUTPUT2("+A" (i), "+c" (v)), + ASM_NO_INPUT_CLOBBER("memory")); + return i; +} + +/** + * atomic64_sub - subtract the atomic64 variable + * @i: integer value to subtract + * @v: pointer to type atomic64_t + * + * Atomically subtracts @i from @v. + */ +static inline long long atomic64_sub(long long i, atomic64_t *v) +{ + __alternative_atomic64(sub, sub_return, + ASM_OUTPUT2("+A" (i), "+c" (v)), + ASM_NO_INPUT_CLOBBER("memory")); + return i; +} + +/** + * atomic64_sub_and_test - subtract value from variable and test result + * @i: integer value to subtract + * @v: pointer to type atomic64_t + * + * Atomically subtracts @i from @v and returns + * true if the result is zero, or false for all + * other cases. + */ +static inline int atomic64_sub_and_test(long long i, atomic64_t *v) +{ + return atomic64_sub_return(i, v) == 0; +} + +/** + * atomic64_inc - increment atomic64 variable + * @v: pointer to type atomic64_t + * + * Atomically increments @v by 1. + */ +static inline void atomic64_inc(atomic64_t *v) +{ + __alternative_atomic64(inc, inc_return, /* no output */, + "S" (v) : "memory", "eax", "ecx", "edx"); +} + +/** + * atomic64_dec - decrement atomic64 variable + * @ptr: pointer to type atomic64_t + * + * Atomically decrements @ptr by 1. + */ +static inline void atomic64_dec(atomic64_t *v) +{ + __alternative_atomic64(dec, dec_return, /* no output */, + "S" (v) : "memory", "eax", "ecx", "edx"); +} + +/** + * atomic64_dec_and_test - decrement and test + * @v: pointer to type atomic64_t + * + * Atomically decrements @v by 1 and + * returns true if the result is 0, or false for all other + * cases. + */ +static inline int atomic64_dec_and_test(atomic64_t *v) +{ + return atomic64_dec_return(v) == 0; +} + +/** + * atomic64_inc_and_test - increment and test + * @v: pointer to type atomic64_t + * + * Atomically increments @v by 1 + * and returns true if the result is zero, or false for all + * other cases. + */ +static inline int atomic64_inc_and_test(atomic64_t *v) +{ + return atomic64_inc_return(v) == 0; +} + +/** + * atomic64_add_negative - add and test if negative + * @i: integer value to add + * @v: pointer to type atomic64_t + * + * Atomically adds @i to @v and returns true + * if the result is negative, or false when + * result is greater than or equal to zero. + */ +static inline int atomic64_add_negative(long long i, atomic64_t *v) +{ + return atomic64_add_return(i, v) < 0; +} + +/** + * atomic64_add_unless - add unless the number is a given value + * @v: pointer of type atomic64_t + * @a: the amount to add to v... + * @u: ...unless v is equal to u. + * + * Atomically adds @a to @v, so long as it was not @u. + * Returns non-zero if the add was done, zero otherwise. + */ +static inline int atomic64_add_unless(atomic64_t *v, long long a, long long u) +{ + unsigned low = (unsigned)u; + unsigned high = (unsigned)(u >> 32); + alternative_atomic64(add_unless, + ASM_OUTPUT2("+A" (a), "+c" (low), "+D" (high)), + "S" (v) : "memory"); + return (int)a; +} + + +static inline int atomic64_inc_not_zero(atomic64_t *v) +{ + int r; + alternative_atomic64(inc_not_zero, "=&a" (r), + "S" (v) : "ecx", "edx", "memory"); + return r; +} + +static inline long long atomic64_dec_if_positive(atomic64_t *v) +{ + long long r; + alternative_atomic64(dec_if_positive, "=&A" (r), + "S" (v) : "ecx", "memory"); + return r; +} + +#undef alternative_atomic64 +#undef __alternative_atomic64 + +#endif /* _ASM_X86_ATOMIC64_32_H */ diff --git a/arch/x86/include/asm/atomic64_64.h b/arch/x86/include/asm/atomic64_64.h new file mode 100644 index 00000000..0e1cbfc8 --- /dev/null +++ b/arch/x86/include/asm/atomic64_64.h @@ -0,0 +1,243 @@ +#ifndef _ASM_X86_ATOMIC64_64_H +#define _ASM_X86_ATOMIC64_64_H + +#include +#include +#include + +/* The 64-bit atomic type */ + +#define ATOMIC64_INIT(i) { (i) } + +/** + * atomic64_read - read atomic64 variable + * @v: pointer of type atomic64_t + * + * Atomically reads the value of @v. + * Doesn't imply a read memory barrier. + */ +static inline long atomic64_read(const atomic64_t *v) +{ + return (*(volatile long *)&(v)->counter); +} + +/** + * atomic64_set - set atomic64 variable + * @v: pointer to type atomic64_t + * @i: required value + * + * Atomically sets the value of @v to @i. + */ +static inline void atomic64_set(atomic64_t *v, long i) +{ + v->counter = i; +} + +/** + * atomic64_add - add integer to atomic64 variable + * @i: integer value to add + * @v: pointer to type atomic64_t + * + * Atomically adds @i to @v. + */ +static inline void atomic64_add(long i, atomic64_t *v) +{ + asm volatile(LOCK_PREFIX "addq %1,%0" + : "=m" (v->counter) + : "er" (i), "m" (v->counter)); +} + +/** + * atomic64_sub - subtract the atomic64 variable + * @i: integer value to subtract + * @v: pointer to type atomic64_t + * + * Atomically subtracts @i from @v. + */ +static inline void atomic64_sub(long i, atomic64_t *v) +{ + asm volatile(LOCK_PREFIX "subq %1,%0" + : "=m" (v->counter) + : "er" (i), "m" (v->counter)); +} + +/** + * atomic64_sub_and_test - subtract value from variable and test result + * @i: integer value to subtract + * @v: pointer to type atomic64_t + * + * Atomically subtracts @i from @v and returns + * true if the result is zero, or false for all + * other cases. + */ +static inline int atomic64_sub_and_test(long i, atomic64_t *v) +{ + unsigned char c; + + asm volatile(LOCK_PREFIX "subq %2,%0; sete %1" + : "=m" (v->counter), "=qm" (c) + : "er" (i), "m" (v->counter) : "memory"); + return c; +} + +/** + * atomic64_inc - increment atomic64 variable + * @v: pointer to type atomic64_t + * + * Atomically increments @v by 1. + */ +static inline void atomic64_inc(atomic64_t *v) +{ + asm volatile(LOCK_PREFIX "incq %0" + : "=m" (v->counter) + : "m" (v->counter)); +} + +/** + * atomic64_dec - decrement atomic64 variable + * @v: pointer to type atomic64_t + * + * Atomically decrements @v by 1. + */ +static inline void atomic64_dec(atomic64_t *v) +{ + asm volatile(LOCK_PREFIX "decq %0" + : "=m" (v->counter) + : "m" (v->counter)); +} + +/** + * atomic64_dec_and_test - decrement and test + * @v: pointer to type atomic64_t + * + * Atomically decrements @v by 1 and + * returns true if the result is 0, or false for all other + * cases. + */ +static inline int atomic64_dec_and_test(atomic64_t *v) +{ + unsigned char c; + + asm volatile(LOCK_PREFIX "decq %0; sete %1" + : "=m" (v->counter), "=qm" (c) + : "m" (v->counter) : "memory"); + return c != 0; +} + +/** + * atomic64_inc_and_test - increment and test + * @v: pointer to type atomic64_t + * + * Atomically increments @v by 1 + * and returns true if the result is zero, or false for all + * other cases. + */ +static inline int atomic64_inc_and_test(atomic64_t *v) +{ + unsigned char c; + + asm volatile(LOCK_PREFIX "incq %0; sete %1" + : "=m" (v->counter), "=qm" (c) + : "m" (v->counter) : "memory"); + return c != 0; +} + +/** + * atomic64_add_negative - add and test if negative + * @i: integer value to add + * @v: pointer to type atomic64_t + * + * Atomically adds @i to @v and returns true + * if the result is negative, or false when + * result is greater than or equal to zero. + */ +static inline int atomic64_add_negative(long i, atomic64_t *v) +{ + unsigned char c; + + asm volatile(LOCK_PREFIX "addq %2,%0; sets %1" + : "=m" (v->counter), "=qm" (c) + : "er" (i), "m" (v->counter) : "memory"); + return c; +} + +/** + * atomic64_add_return - add and return + * @i: integer value to add + * @v: pointer to type atomic64_t + * + * Atomically adds @i to @v and returns @i + @v + */ +static inline long atomic64_add_return(long i, atomic64_t *v) +{ + return i + xadd(&v->counter, i); +} + +static inline long atomic64_sub_return(long i, atomic64_t *v) +{ + return atomic64_add_return(-i, v); +} + +#define atomic64_inc_return(v) (atomic64_add_return(1, (v))) +#define atomic64_dec_return(v) (atomic64_sub_return(1, (v))) + +static inline long atomic64_cmpxchg(atomic64_t *v, long old, long new) +{ + return cmpxchg(&v->counter, old, new); +} + +static inline long atomic64_xchg(atomic64_t *v, long new) +{ + return xchg(&v->counter, new); +} + +/** + * atomic64_add_unless - add unless the number is a given value + * @v: pointer of type atomic64_t + * @a: the amount to add to v... + * @u: ...unless v is equal to u. + * + * Atomically adds @a to @v, so long as it was not @u. + * Returns the old value of @v. + */ +static inline int atomic64_add_unless(atomic64_t *v, long a, long u) +{ + long c, old; + c = atomic64_read(v); + for (;;) { + if (unlikely(c == (u))) + break; + old = atomic64_cmpxchg((v), c, c + (a)); + if (likely(old == c)) + break; + c = old; + } + return c != (u); +} + +#define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0) + +/* + * atomic64_dec_if_positive - decrement by 1 if old value positive + * @v: pointer of type atomic_t + * + * The function returns the old value of *v minus 1, even if + * the atomic variable, v, was not decremented. + */ +static inline long atomic64_dec_if_positive(atomic64_t *v) +{ + long c, old, dec; + c = atomic64_read(v); + for (;;) { + dec = c - 1; + if (unlikely(dec < 0)) + break; + old = atomic64_cmpxchg((v), c, dec); + if (likely(old == c)) + break; + c = old; + } + return dec; +} + +#endif /* _ASM_X86_ATOMIC64_64_H */ diff --git a/arch/x86/include/asm/auxvec.h b/arch/x86/include/asm/auxvec.h new file mode 100644 index 00000000..77203ac3 --- /dev/null +++ b/arch/x86/include/asm/auxvec.h @@ -0,0 +1,19 @@ +#ifndef _ASM_X86_AUXVEC_H +#define _ASM_X86_AUXVEC_H +/* + * Architecture-neutral AT_ values in 0-17, leave some room + * for more of them, start the x86-specific ones at 32. + */ +#ifdef __i386__ +#define AT_SYSINFO 32 +#endif +#define AT_SYSINFO_EHDR 33 + +/* entries in ARCH_DLINFO: */ +#if defined(CONFIG_IA32_EMULATION) || !defined(CONFIG_X86_64) +# define AT_VECTOR_SIZE_ARCH 2 +#else /* else it's non-compat x86-64 */ +# define AT_VECTOR_SIZE_ARCH 1 +#endif + +#endif /* _ASM_X86_AUXVEC_H */ diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h new file mode 100644 index 00000000..c6cd358a --- /dev/null +++ b/arch/x86/include/asm/barrier.h @@ -0,0 +1,116 @@ +#ifndef _ASM_X86_BARRIER_H +#define _ASM_X86_BARRIER_H + +#include +#include + +/* + * Force strict CPU ordering. + * And yes, this is required on UP too when we're talking + * to devices. + */ + +#ifdef CONFIG_X86_32 +/* + * Some non-Intel clones support out of order store. wmb() ceases to be a + * nop for these. + */ +#define mb() alternative("lock; addl $0,0(%%esp)", "mfence", X86_FEATURE_XMM2) +#define rmb() alternative("lock; addl $0,0(%%esp)", "lfence", X86_FEATURE_XMM2) +#define wmb() alternative("lock; addl $0,0(%%esp)", "sfence", X86_FEATURE_XMM) +#else +#define mb() asm volatile("mfence":::"memory") +#define rmb() asm volatile("lfence":::"memory") +#define wmb() asm volatile("sfence" ::: "memory") +#endif + +/** + * read_barrier_depends - Flush all pending reads that subsequents reads + * depend on. + * + * No data-dependent reads from memory-like regions are ever reordered + * over this barrier. All reads preceding this primitive are guaranteed + * to access memory (but not necessarily other CPUs' caches) before any + * reads following this primitive that depend on the data return by + * any of the preceding reads. This primitive is much lighter weight than + * rmb() on most CPUs, and is never heavier weight than is + * rmb(). + * + * These ordering constraints are respected by both the local CPU + * and the compiler. + * + * Ordering is not guaranteed by anything other than these primitives, + * not even by data dependencies. See the documentation for + * memory_barrier() for examples and URLs to more information. + * + * For example, the following code would force ordering (the initial + * value of "a" is zero, "b" is one, and "p" is "&a"): + * + * + * CPU 0 CPU 1 + * + * b = 2; + * memory_barrier(); + * p = &b; q = p; + * read_barrier_depends(); + * d = *q; + * + * + * because the read of "*q" depends on the read of "p" and these + * two reads are separated by a read_barrier_depends(). However, + * the following code, with the same initial values for "a" and "b": + * + * + * CPU 0 CPU 1 + * + * a = 2; + * memory_barrier(); + * b = 3; y = b; + * read_barrier_depends(); + * x = a; + * + * + * does not enforce ordering, since there is no data dependency between + * the read of "a" and the read of "b". Therefore, on some CPUs, such + * as Alpha, "y" could be set to 3 and "x" to 0. Use rmb() + * in cases like this where there are no data dependencies. + **/ + +#define read_barrier_depends() do { } while (0) + +#ifdef CONFIG_SMP +#define smp_mb() mb() +#ifdef CONFIG_X86_PPRO_FENCE +# define smp_rmb() rmb() +#else +# define smp_rmb() barrier() +#endif +#ifdef CONFIG_X86_OOSTORE +# define smp_wmb() wmb() +#else +# define smp_wmb() barrier() +#endif +#define smp_read_barrier_depends() read_barrier_depends() +#define set_mb(var, value) do { (void)xchg(&var, value); } while (0) +#else +#define smp_mb() barrier() +#define smp_rmb() barrier() +#define smp_wmb() barrier() +#define smp_read_barrier_depends() do { } while (0) +#define set_mb(var, value) do { var = value; barrier(); } while (0) +#endif + +/* + * Stop RDTSC speculation. This is needed when you need to use RDTSC + * (or get_cycles or vread that possibly accesses the TSC) in a defined + * code region. + * + * (Could use an alternative three way for this if there was one.) + */ +static __always_inline void rdtsc_barrier(void) +{ + alternative(ASM_NOP3, "mfence", X86_FEATURE_MFENCE_RDTSC); + alternative(ASM_NOP3, "lfence", X86_FEATURE_LFENCE_RDTSC); +} + +#endif /* _ASM_X86_BARRIER_H */ diff --git a/arch/x86/include/asm/bios_ebda.h b/arch/x86/include/asm/bios_ebda.h new file mode 100644 index 00000000..aa6a3170 --- /dev/null +++ b/arch/x86/include/asm/bios_ebda.h @@ -0,0 +1,60 @@ +#ifndef _ASM_X86_BIOS_EBDA_H +#define _ASM_X86_BIOS_EBDA_H + +#include + +/* + * Returns physical address of EBDA. Returns 0 if there is no EBDA. + */ +static inline unsigned int get_bios_ebda(void) +{ + /* + * There is a real-mode segmented pointer pointing to the + * 4K EBDA area at 0x40E. + */ + unsigned int address = *(unsigned short *)phys_to_virt(0x40E); + address <<= 4; + return address; /* 0 means none */ +} + +/* + * Return the sanitized length of the EBDA in bytes, if it exists. + */ +static inline unsigned int get_bios_ebda_length(void) +{ + unsigned int address; + unsigned int length; + + address = get_bios_ebda(); + if (!address) + return 0; + + /* EBDA length is byte 0 of the EBDA (stored in KiB) */ + length = *(unsigned char *)phys_to_virt(address); + length <<= 10; + + /* Trim the length if it extends beyond 640KiB */ + length = min_t(unsigned int, (640 * 1024) - address, length); + return length; +} + +void reserve_ebda_region(void); + +#ifdef CONFIG_X86_CHECK_BIOS_CORRUPTION +/* + * This is obviously not a great place for this, but we want to be + * able to scatter it around anywhere in the kernel. + */ +void check_for_bios_corruption(void); +void start_periodic_check_for_corruption(void); +#else +static inline void check_for_bios_corruption(void) +{ +} + +static inline void start_periodic_check_for_corruption(void) +{ +} +#endif + +#endif /* _ASM_X86_BIOS_EBDA_H */ diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h new file mode 100644 index 00000000..b97596e2 --- /dev/null +++ b/arch/x86/include/asm/bitops.h @@ -0,0 +1,514 @@ +#ifndef _ASM_X86_BITOPS_H +#define _ASM_X86_BITOPS_H + +/* + * Copyright 1992, Linus Torvalds. + * + * Note: inlines with more than a single statement should be marked + * __always_inline to avoid problems with older gcc's inlining heuristics. + */ + +#ifndef _LINUX_BITOPS_H +#error only can be included directly +#endif + +#include +#include + +/* + * These have to be done with inline assembly: that way the bit-setting + * is guaranteed to be atomic. All bit operations return 0 if the bit + * was cleared before the operation and != 0 if it was not. + * + * bit 0 is the LSB of addr; bit 32 is the LSB of (addr+1). + */ + +#if __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 1) +/* Technically wrong, but this avoids compilation errors on some gcc + versions. */ +#define BITOP_ADDR(x) "=m" (*(volatile long *) (x)) +#else +#define BITOP_ADDR(x) "+m" (*(volatile long *) (x)) +#endif + +#define ADDR BITOP_ADDR(addr) + +/* + * We do the locked ops that don't return the old value as + * a mask operation on a byte. + */ +#define IS_IMMEDIATE(nr) (__builtin_constant_p(nr)) +#define CONST_MASK_ADDR(nr, addr) BITOP_ADDR((void *)(addr) + ((nr)>>3)) +#define CONST_MASK(nr) (1 << ((nr) & 7)) + +/** + * set_bit - Atomically set a bit in memory + * @nr: the bit to set + * @addr: the address to start counting from + * + * This function is atomic and may not be reordered. See __set_bit() + * if you do not require the atomic guarantees. + * + * Note: there are no guarantees that this function will not be reordered + * on non x86 architectures, so if you are writing portable code, + * make sure not to rely on its reordering guarantees. + * + * Note that @nr may be almost arbitrarily large; this function is not + * restricted to acting on a single-word quantity. + */ +static __always_inline void +set_bit(unsigned int nr, volatile unsigned long *addr) +{ + if (IS_IMMEDIATE(nr)) { + asm volatile(LOCK_PREFIX "orb %1,%0" + : CONST_MASK_ADDR(nr, addr) + : "iq" ((u8)CONST_MASK(nr)) + : "memory"); + } else { + asm volatile(LOCK_PREFIX "bts %1,%0" + : BITOP_ADDR(addr) : "Ir" (nr) : "memory"); + } +} + +/** + * __set_bit - Set a bit in memory + * @nr: the bit to set + * @addr: the address to start counting from + * + * Unlike set_bit(), this function is non-atomic and may be reordered. + * If it's called on the same region of memory simultaneously, the effect + * may be that only one operation succeeds. + */ +static inline void __set_bit(int nr, volatile unsigned long *addr) +{ + asm volatile("bts %1,%0" : ADDR : "Ir" (nr) : "memory"); +} + +/** + * clear_bit - Clears a bit in memory + * @nr: Bit to clear + * @addr: Address to start counting from + * + * clear_bit() is atomic and may not be reordered. However, it does + * not contain a memory barrier, so if it is used for locking purposes, + * you should call smp_mb__before_clear_bit() and/or smp_mb__after_clear_bit() + * in order to ensure changes are visible on other processors. + */ +static __always_inline void +clear_bit(int nr, volatile unsigned long *addr) +{ + if (IS_IMMEDIATE(nr)) { + asm volatile(LOCK_PREFIX "andb %1,%0" + : CONST_MASK_ADDR(nr, addr) + : "iq" ((u8)~CONST_MASK(nr))); + } else { + asm volatile(LOCK_PREFIX "btr %1,%0" + : BITOP_ADDR(addr) + : "Ir" (nr)); + } +} + +/* + * clear_bit_unlock - Clears a bit in memory + * @nr: Bit to clear + * @addr: Address to start counting from + * + * clear_bit() is atomic and implies release semantics before the memory + * operation. It can be used for an unlock. + */ +static inline void clear_bit_unlock(unsigned nr, volatile unsigned long *addr) +{ + barrier(); + clear_bit(nr, addr); +} + +static inline void __clear_bit(int nr, volatile unsigned long *addr) +{ + asm volatile("btr %1,%0" : ADDR : "Ir" (nr)); +} + +/* + * __clear_bit_unlock - Clears a bit in memory + * @nr: Bit to clear + * @addr: Address to start counting from + * + * __clear_bit() is non-atomic and implies release semantics before the memory + * operation. It can be used for an unlock if no other CPUs can concurrently + * modify other bits in the word. + * + * No memory barrier is required here, because x86 cannot reorder stores past + * older loads. Same principle as spin_unlock. + */ +static inline void __clear_bit_unlock(unsigned nr, volatile unsigned long *addr) +{ + barrier(); + __clear_bit(nr, addr); +} + +#define smp_mb__before_clear_bit() barrier() +#define smp_mb__after_clear_bit() barrier() + +/** + * __change_bit - Toggle a bit in memory + * @nr: the bit to change + * @addr: the address to start counting from + * + * Unlike change_bit(), this function is non-atomic and may be reordered. + * If it's called on the same region of memory simultaneously, the effect + * may be that only one operation succeeds. + */ +static inline void __change_bit(int nr, volatile unsigned long *addr) +{ + asm volatile("btc %1,%0" : ADDR : "Ir" (nr)); +} + +/** + * change_bit - Toggle a bit in memory + * @nr: Bit to change + * @addr: Address to start counting from + * + * change_bit() is atomic and may not be reordered. + * Note that @nr may be almost arbitrarily large; this function is not + * restricted to acting on a single-word quantity. + */ +static inline void change_bit(int nr, volatile unsigned long *addr) +{ + if (IS_IMMEDIATE(nr)) { + asm volatile(LOCK_PREFIX "xorb %1,%0" + : CONST_MASK_ADDR(nr, addr) + : "iq" ((u8)CONST_MASK(nr))); + } else { + asm volatile(LOCK_PREFIX "btc %1,%0" + : BITOP_ADDR(addr) + : "Ir" (nr)); + } +} + +/** + * test_and_set_bit - Set a bit and return its old value + * @nr: Bit to set + * @addr: Address to count from + * + * This operation is atomic and cannot be reordered. + * It also implies a memory barrier. + */ +static inline int test_and_set_bit(int nr, volatile unsigned long *addr) +{ + int oldbit; + + asm volatile(LOCK_PREFIX "bts %2,%1\n\t" + "sbb %0,%0" : "=r" (oldbit), ADDR : "Ir" (nr) : "memory"); + + return oldbit; +} + +/** + * test_and_set_bit_lock - Set a bit and return its old value for lock + * @nr: Bit to set + * @addr: Address to count from + * + * This is the same as test_and_set_bit on x86. + */ +static __always_inline int +test_and_set_bit_lock(int nr, volatile unsigned long *addr) +{ + return test_and_set_bit(nr, addr); +} + +/** + * __test_and_set_bit - Set a bit and return its old value + * @nr: Bit to set + * @addr: Address to count from + * + * This operation is non-atomic and can be reordered. + * If two examples of this operation race, one can appear to succeed + * but actually fail. You must protect multiple accesses with a lock. + */ +static inline int __test_and_set_bit(int nr, volatile unsigned long *addr) +{ + int oldbit; + + asm("bts %2,%1\n\t" + "sbb %0,%0" + : "=r" (oldbit), ADDR + : "Ir" (nr)); + return oldbit; +} + +/** + * test_and_clear_bit - Clear a bit and return its old value + * @nr: Bit to clear + * @addr: Address to count from + * + * This operation is atomic and cannot be reordered. + * It also implies a memory barrier. + */ +static inline int test_and_clear_bit(int nr, volatile unsigned long *addr) +{ + int oldbit; + + asm volatile(LOCK_PREFIX "btr %2,%1\n\t" + "sbb %0,%0" + : "=r" (oldbit), ADDR : "Ir" (nr) : "memory"); + + return oldbit; +} + +/** + * __test_and_clear_bit - Clear a bit and return its old value + * @nr: Bit to clear + * @addr: Address to count from + * + * This operation is non-atomic and can be reordered. + * If two examples of this operation race, one can appear to succeed + * but actually fail. You must protect multiple accesses with a lock. + */ +static inline int __test_and_clear_bit(int nr, volatile unsigned long *addr) +{ + int oldbit; + + asm volatile("btr %2,%1\n\t" + "sbb %0,%0" + : "=r" (oldbit), ADDR + : "Ir" (nr)); + return oldbit; +} + +/* WARNING: non atomic and it can be reordered! */ +static inline int __test_and_change_bit(int nr, volatile unsigned long *addr) +{ + int oldbit; + + asm volatile("btc %2,%1\n\t" + "sbb %0,%0" + : "=r" (oldbit), ADDR + : "Ir" (nr) : "memory"); + + return oldbit; +} + +/** + * test_and_change_bit - Change a bit and return its old value + * @nr: Bit to change + * @addr: Address to count from + * + * This operation is atomic and cannot be reordered. + * It also implies a memory barrier. + */ +static inline int test_and_change_bit(int nr, volatile unsigned long *addr) +{ + int oldbit; + + asm volatile(LOCK_PREFIX "btc %2,%1\n\t" + "sbb %0,%0" + : "=r" (oldbit), ADDR : "Ir" (nr) : "memory"); + + return oldbit; +} + +static __always_inline int constant_test_bit(unsigned int nr, const volatile unsigned long *addr) +{ + return ((1UL << (nr % BITS_PER_LONG)) & + (addr[nr / BITS_PER_LONG])) != 0; +} + +static inline int variable_test_bit(int nr, volatile const unsigned long *addr) +{ + int oldbit; + + asm volatile("bt %2,%1\n\t" + "sbb %0,%0" + : "=r" (oldbit) + : "m" (*(unsigned long *)addr), "Ir" (nr)); + + return oldbit; +} + +#if 0 /* Fool kernel-doc since it doesn't do macros yet */ +/** + * test_bit - Determine whether a bit is set + * @nr: bit number to test + * @addr: Address to start counting from + */ +static int test_bit(int nr, const volatile unsigned long *addr); +#endif + +#define test_bit(nr, addr) \ + (__builtin_constant_p((nr)) \ + ? constant_test_bit((nr), (addr)) \ + : variable_test_bit((nr), (addr))) + +/** + * __ffs - find first set bit in word + * @word: The word to search + * + * Undefined if no bit exists, so code should check against 0 first. + */ +static inline unsigned long __ffs(unsigned long word) +{ + asm("bsf %1,%0" + : "=r" (word) + : "rm" (word)); + return word; +} + +/** + * ffz - find first zero bit in word + * @word: The word to search + * + * Undefined if no zero exists, so code should check against ~0UL first. + */ +static inline unsigned long ffz(unsigned long word) +{ + asm("bsf %1,%0" + : "=r" (word) + : "r" (~word)); + return word; +} + +/* + * __fls: find last set bit in word + * @word: The word to search + * + * Undefined if no set bit exists, so code should check against 0 first. + */ +static inline unsigned long __fls(unsigned long word) +{ + asm("bsr %1,%0" + : "=r" (word) + : "rm" (word)); + return word; +} + +#undef ADDR + +#ifdef __KERNEL__ +/** + * ffs - find first set bit in word + * @x: the word to search + * + * This is defined the same way as the libc and compiler builtin ffs + * routines, therefore differs in spirit from the other bitops. + * + * ffs(value) returns 0 if value is 0 or the position of the first + * set bit if value is nonzero. The first (least significant) bit + * is at position 1. + */ +static inline int ffs(int x) +{ + int r; + +#ifdef CONFIG_X86_64 + /* + * AMD64 says BSFL won't clobber the dest reg if x==0; Intel64 says the + * dest reg is undefined if x==0, but their CPU architect says its + * value is written to set it to the same as before, except that the + * top 32 bits will be cleared. + * + * We cannot do this on 32 bits because at the very least some + * 486 CPUs did not behave this way. + */ + long tmp = -1; + asm("bsfl %1,%0" + : "=r" (r) + : "rm" (x), "0" (tmp)); +#elif defined(CONFIG_X86_CMOV) + asm("bsfl %1,%0\n\t" + "cmovzl %2,%0" + : "=&r" (r) : "rm" (x), "r" (-1)); +#else + asm("bsfl %1,%0\n\t" + "jnz 1f\n\t" + "movl $-1,%0\n" + "1:" : "=r" (r) : "rm" (x)); +#endif + return r + 1; +} + +/** + * fls - find last set bit in word + * @x: the word to search + * + * This is defined in a similar way as the libc and compiler builtin + * ffs, but returns the position of the most significant set bit. + * + * fls(value) returns 0 if value is 0 or the position of the last + * set bit if value is nonzero. The last (most significant) bit is + * at position 32. + */ +static inline int fls(int x) +{ + int r; + +#ifdef CONFIG_X86_64 + /* + * AMD64 says BSRL won't clobber the dest reg if x==0; Intel64 says the + * dest reg is undefined if x==0, but their CPU architect says its + * value is written to set it to the same as before, except that the + * top 32 bits will be cleared. + * + * We cannot do this on 32 bits because at the very least some + * 486 CPUs did not behave this way. + */ + long tmp = -1; + asm("bsrl %1,%0" + : "=r" (r) + : "rm" (x), "0" (tmp)); +#elif defined(CONFIG_X86_CMOV) + asm("bsrl %1,%0\n\t" + "cmovzl %2,%0" + : "=&r" (r) : "rm" (x), "rm" (-1)); +#else + asm("bsrl %1,%0\n\t" + "jnz 1f\n\t" + "movl $-1,%0\n" + "1:" : "=r" (r) : "rm" (x)); +#endif + return r + 1; +} + +/** + * fls64 - find last set bit in a 64-bit word + * @x: the word to search + * + * This is defined in a similar way as the libc and compiler builtin + * ffsll, but returns the position of the most significant set bit. + * + * fls64(value) returns 0 if value is 0 or the position of the last + * set bit if value is nonzero. The last (most significant) bit is + * at position 64. + */ +#ifdef CONFIG_X86_64 +static __always_inline int fls64(__u64 x) +{ + long bitpos = -1; + /* + * AMD64 says BSRQ won't clobber the dest reg if x==0; Intel64 says the + * dest reg is undefined if x==0, but their CPU architect says its + * value is written to set it to the same as before. + */ + asm("bsrq %1,%0" + : "+r" (bitpos) + : "rm" (x)); + return bitpos + 1; +} +#else +#include +#endif + +#include + +#include + +#define ARCH_HAS_FAST_MULTIPLIER 1 + +#include + +#include + +#include + +#include + +#endif /* __KERNEL__ */ +#endif /* _ASM_X86_BITOPS_H */ diff --git a/arch/x86/include/asm/bitsperlong.h b/arch/x86/include/asm/bitsperlong.h new file mode 100644 index 00000000..b0ae1c4d --- /dev/null +++ b/arch/x86/include/asm/bitsperlong.h @@ -0,0 +1,13 @@ +#ifndef __ASM_X86_BITSPERLONG_H +#define __ASM_X86_BITSPERLONG_H + +#ifdef __x86_64__ +# define __BITS_PER_LONG 64 +#else +# define __BITS_PER_LONG 32 +#endif + +#include + +#endif /* __ASM_X86_BITSPERLONG_H */ + diff --git a/arch/x86/include/asm/boot.h b/arch/x86/include/asm/boot.h new file mode 100644 index 00000000..5e1a2eef --- /dev/null +++ b/arch/x86/include/asm/boot.h @@ -0,0 +1,47 @@ +#ifndef _ASM_X86_BOOT_H +#define _ASM_X86_BOOT_H + +/* Internal svga startup constants */ +#define NORMAL_VGA 0xffff /* 80x25 mode */ +#define EXTENDED_VGA 0xfffe /* 80x50 mode */ +#define ASK_VGA 0xfffd /* ask for it at bootup */ + +#ifdef __KERNEL__ + +#include + +/* Physical address where kernel should be loaded. */ +#define LOAD_PHYSICAL_ADDR ((CONFIG_PHYSICAL_START \ + + (CONFIG_PHYSICAL_ALIGN - 1)) \ + & ~(CONFIG_PHYSICAL_ALIGN - 1)) + +/* Minimum kernel alignment, as a power of two */ +#ifdef CONFIG_X86_64 +#define MIN_KERNEL_ALIGN_LG2 PMD_SHIFT +#else +#define MIN_KERNEL_ALIGN_LG2 (PAGE_SHIFT + THREAD_ORDER) +#endif +#define MIN_KERNEL_ALIGN (_AC(1, UL) << MIN_KERNEL_ALIGN_LG2) + +#if (CONFIG_PHYSICAL_ALIGN & (CONFIG_PHYSICAL_ALIGN-1)) || \ + (CONFIG_PHYSICAL_ALIGN < MIN_KERNEL_ALIGN) +#error "Invalid value for CONFIG_PHYSICAL_ALIGN" +#endif + +#ifdef CONFIG_KERNEL_BZIP2 +#define BOOT_HEAP_SIZE 0x400000 +#else /* !CONFIG_KERNEL_BZIP2 */ + +#define BOOT_HEAP_SIZE 0x8000 + +#endif /* !CONFIG_KERNEL_BZIP2 */ + +#ifdef CONFIG_X86_64 +#define BOOT_STACK_SIZE 0x4000 +#else +#define BOOT_STACK_SIZE 0x1000 +#endif + +#endif /* __KERNEL__ */ + +#endif /* _ASM_X86_BOOT_H */ diff --git a/arch/x86/include/asm/bootparam.h b/arch/x86/include/asm/bootparam.h new file mode 100644 index 00000000..2f90c51c --- /dev/null +++ b/arch/x86/include/asm/bootparam.h @@ -0,0 +1,136 @@ +#ifndef _ASM_X86_BOOTPARAM_H +#define _ASM_X86_BOOTPARAM_H + +#include +#include +#include +#include +#include +#include +#include