diff options
author | Srikant Patnaik | 2015-01-13 15:08:24 +0530 |
---|---|---|
committer | Srikant Patnaik | 2015-01-13 15:08:24 +0530 |
commit | 97327692361306d1e6259021bc425e32832fdb50 (patch) | |
tree | fe9088f3248ec61e24f404f21b9793cb644b7f01 /arch/arm/mm | |
parent | 2d05a8f663478a44e088d122e0d62109bbc801d0 (diff) | |
parent | a3a8b90b61e21be3dde9101c4e86c881e0f06210 (diff) | |
download | FOSSEE-netbook-kernel-source-97327692361306d1e6259021bc425e32832fdb50.tar.gz FOSSEE-netbook-kernel-source-97327692361306d1e6259021bc425e32832fdb50.tar.bz2 FOSSEE-netbook-kernel-source-97327692361306d1e6259021bc425e32832fdb50.zip |
dirty fix to merging
Diffstat (limited to 'arch/arm/mm')
92 files changed, 23175 insertions, 0 deletions
diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig new file mode 100644 index 00000000..d160cf7b --- /dev/null +++ b/arch/arm/mm/Kconfig @@ -0,0 +1,929 @@ +comment "Processor Type" + +# Select CPU types depending on the architecture selected. This selects +# which CPUs we support in the kernel image, and the compiler instruction +# optimiser behaviour. + +# ARM610 +config CPU_ARM610 + bool "Support ARM610 processor" if ARCH_RPC + select CPU_32v3 + select CPU_CACHE_V3 + select CPU_CACHE_VIVT + select CPU_CP15_MMU + select CPU_COPY_V3 if MMU + select CPU_TLB_V3 if MMU + select CPU_PABRT_LEGACY + help + The ARM610 is the successor to the ARM3 processor + and was produced by VLSI Technology Inc. + + Say Y if you want support for the ARM610 processor. + Otherwise, say N. + +# ARM7TDMI +config CPU_ARM7TDMI + bool "Support ARM7TDMI processor" + depends on !MMU + select CPU_32v4T + select CPU_ABRT_LV4T + select CPU_PABRT_LEGACY + select CPU_CACHE_V4 + help + A 32-bit RISC microprocessor based on the ARM7 processor core + which has no memory control unit and cache. + + Say Y if you want support for the ARM7TDMI processor. + Otherwise, say N. + +# ARM710 +config CPU_ARM710 + bool "Support ARM710 processor" if ARCH_RPC + select CPU_32v3 + select CPU_CACHE_V3 + select CPU_CACHE_VIVT + select CPU_CP15_MMU + select CPU_COPY_V3 if MMU + select CPU_TLB_V3 if MMU + select CPU_PABRT_LEGACY + help + A 32-bit RISC microprocessor based on the ARM7 processor core + designed by Advanced RISC Machines Ltd. The ARM710 is the + successor to the ARM610 processor. It was released in + July 1994 by VLSI Technology Inc. + + Say Y if you want support for the ARM710 processor. + Otherwise, say N. + +# ARM720T +config CPU_ARM720T + bool "Support ARM720T processor" if ARCH_INTEGRATOR + select CPU_32v4T + select CPU_ABRT_LV4T + select CPU_PABRT_LEGACY + select CPU_CACHE_V4 + select CPU_CACHE_VIVT + select CPU_CP15_MMU + select CPU_COPY_V4WT if MMU + select CPU_TLB_V4WT if MMU + help + A 32-bit RISC processor with 8kByte Cache, Write Buffer and + MMU built around an ARM7TDMI core. + + Say Y if you want support for the ARM720T processor. + Otherwise, say N. + +# ARM740T +config CPU_ARM740T + bool "Support ARM740T processor" if ARCH_INTEGRATOR + depends on !MMU + select CPU_32v4T + select CPU_ABRT_LV4T + select CPU_PABRT_LEGACY + select CPU_CACHE_V3 # although the core is v4t + select CPU_CP15_MPU + help + A 32-bit RISC processor with 8KB cache or 4KB variants, + write buffer and MPU(Protection Unit) built around + an ARM7TDMI core. + + Say Y if you want support for the ARM740T processor. + Otherwise, say N. + +# ARM9TDMI +config CPU_ARM9TDMI + bool "Support ARM9TDMI processor" + depends on !MMU + select CPU_32v4T + select CPU_ABRT_NOMMU + select CPU_PABRT_LEGACY + select CPU_CACHE_V4 + help + A 32-bit RISC microprocessor based on the ARM9 processor core + which has no memory control unit and cache. + + Say Y if you want support for the ARM9TDMI processor. + Otherwise, say N. + +# ARM920T +config CPU_ARM920T + bool "Support ARM920T processor" if ARCH_INTEGRATOR + select CPU_32v4T + select CPU_ABRT_EV4T + select CPU_PABRT_LEGACY + select CPU_CACHE_V4WT + select CPU_CACHE_VIVT + select CPU_CP15_MMU + select CPU_COPY_V4WB if MMU + select CPU_TLB_V4WBI if MMU + help + The ARM920T is licensed to be produced by numerous vendors, + and is used in the Cirrus EP93xx and the Samsung S3C2410. + + Say Y if you want support for the ARM920T processor. + Otherwise, say N. + +# ARM922T +config CPU_ARM922T + bool "Support ARM922T processor" if ARCH_INTEGRATOR + select CPU_32v4T + select CPU_ABRT_EV4T + select CPU_PABRT_LEGACY + select CPU_CACHE_V4WT + select CPU_CACHE_VIVT + select CPU_CP15_MMU + select CPU_COPY_V4WB if MMU + select CPU_TLB_V4WBI if MMU + help + The ARM922T is a version of the ARM920T, but with smaller + instruction and data caches. It is used in Altera's + Excalibur XA device family and Micrel's KS8695 Centaur. + + Say Y if you want support for the ARM922T processor. + Otherwise, say N. + +# ARM925T +config CPU_ARM925T + bool "Support ARM925T processor" if ARCH_OMAP1 + select CPU_32v4T + select CPU_ABRT_EV4T + select CPU_PABRT_LEGACY + select CPU_CACHE_V4WT + select CPU_CACHE_VIVT + select CPU_CP15_MMU + select CPU_COPY_V4WB if MMU + select CPU_TLB_V4WBI if MMU + help + The ARM925T is a mix between the ARM920T and ARM926T, but with + different instruction and data caches. It is used in TI's OMAP + device family. + + Say Y if you want support for the ARM925T processor. + Otherwise, say N. + +# ARM926T +config CPU_ARM926T + bool "Support ARM926T processor" if ARCH_INTEGRATOR || MACH_REALVIEW_EB + select CPU_32v5 + select CPU_ABRT_EV5TJ + select CPU_PABRT_LEGACY + select CPU_CACHE_VIVT + select CPU_CP15_MMU + select CPU_COPY_V4WB if MMU + select CPU_TLB_V4WBI if MMU + help + This is a variant of the ARM920. It has slightly different + instruction sequences for cache and TLB operations. Curiously, + there is no documentation on it at the ARM corporate website. + + Say Y if you want support for the ARM926T processor. + Otherwise, say N. + +# FA526 +config CPU_FA526 + bool + select CPU_32v4 + select CPU_ABRT_EV4 + select CPU_PABRT_LEGACY + select CPU_CACHE_VIVT + select CPU_CP15_MMU + select CPU_CACHE_FA + select CPU_COPY_FA if MMU + select CPU_TLB_FA if MMU + help + The FA526 is a version of the ARMv4 compatible processor with + Branch Target Buffer, Unified TLB and cache line size 16. + + Say Y if you want support for the FA526 processor. + Otherwise, say N. + +# ARM940T +config CPU_ARM940T + bool "Support ARM940T processor" if ARCH_INTEGRATOR + depends on !MMU + select CPU_32v4T + select CPU_ABRT_NOMMU + select CPU_PABRT_LEGACY + select CPU_CACHE_VIVT + select CPU_CP15_MPU + help + ARM940T is a member of the ARM9TDMI family of general- + purpose microprocessors with MPU and separate 4KB + instruction and 4KB data cases, each with a 4-word line + length. + + Say Y if you want support for the ARM940T processor. + Otherwise, say N. + +# ARM946E-S +config CPU_ARM946E + bool "Support ARM946E-S processor" if ARCH_INTEGRATOR + depends on !MMU + select CPU_32v5 + select CPU_ABRT_NOMMU + select CPU_PABRT_LEGACY + select CPU_CACHE_VIVT + select CPU_CP15_MPU + help + ARM946E-S is a member of the ARM9E-S family of high- + performance, 32-bit system-on-chip processor solutions. + The TCM and ARMv5TE 32-bit instruction set is supported. + + Say Y if you want support for the ARM946E-S processor. + Otherwise, say N. + +# ARM1020 - needs validating +config CPU_ARM1020 + bool "Support ARM1020T (rev 0) processor" if ARCH_INTEGRATOR + select CPU_32v5 + select CPU_ABRT_EV4T + select CPU_PABRT_LEGACY + select CPU_CACHE_V4WT + select CPU_CACHE_VIVT + select CPU_CP15_MMU + select CPU_COPY_V4WB if MMU + select CPU_TLB_V4WBI if MMU + help + The ARM1020 is the 32K cached version of the ARM10 processor, + with an addition of a floating-point unit. + + Say Y if you want support for the ARM1020 processor. + Otherwise, say N. + +# ARM1020E - needs validating +config CPU_ARM1020E + bool "Support ARM1020E processor" if ARCH_INTEGRATOR + select CPU_32v5 + select CPU_ABRT_EV4T + select CPU_PABRT_LEGACY + select CPU_CACHE_V4WT + select CPU_CACHE_VIVT + select CPU_CP15_MMU + select CPU_COPY_V4WB if MMU + select CPU_TLB_V4WBI if MMU + depends on n + +# ARM1022E +config CPU_ARM1022 + bool "Support ARM1022E processor" if ARCH_INTEGRATOR + select CPU_32v5 + select CPU_ABRT_EV4T + select CPU_PABRT_LEGACY + select CPU_CACHE_VIVT + select CPU_CP15_MMU + select CPU_COPY_V4WB if MMU # can probably do better + select CPU_TLB_V4WBI if MMU + help + The ARM1022E is an implementation of the ARMv5TE architecture + based upon the ARM10 integer core with a 16KiB L1 Harvard cache, + embedded trace macrocell, and a floating-point unit. + + Say Y if you want support for the ARM1022E processor. + Otherwise, say N. + +# ARM1026EJ-S +config CPU_ARM1026 + bool "Support ARM1026EJ-S processor" if ARCH_INTEGRATOR + select CPU_32v5 + select CPU_ABRT_EV5T # But need Jazelle, but EV5TJ ignores bit 10 + select CPU_PABRT_LEGACY + select CPU_CACHE_VIVT + select CPU_CP15_MMU + select CPU_COPY_V4WB if MMU # can probably do better + select CPU_TLB_V4WBI if MMU + help + The ARM1026EJ-S is an implementation of the ARMv5TEJ architecture + based upon the ARM10 integer core. + + Say Y if you want support for the ARM1026EJ-S processor. + Otherwise, say N. + +# SA110 +config CPU_SA110 + bool "Support StrongARM(R) SA-110 processor" if ARCH_RPC + select CPU_32v3 if ARCH_RPC + select CPU_32v4 if !ARCH_RPC + select CPU_ABRT_EV4 + select CPU_PABRT_LEGACY + select CPU_CACHE_V4WB + select CPU_CACHE_VIVT + select CPU_CP15_MMU + select CPU_COPY_V4WB if MMU + select CPU_TLB_V4WB if MMU + help + The Intel StrongARM(R) SA-110 is a 32-bit microprocessor and + is available at five speeds ranging from 100 MHz to 233 MHz. + More information is available at + <http://developer.intel.com/design/strong/sa110.htm>. + + Say Y if you want support for the SA-110 processor. + Otherwise, say N. + +# SA1100 +config CPU_SA1100 + bool + select CPU_32v4 + select CPU_ABRT_EV4 + select CPU_PABRT_LEGACY + select CPU_CACHE_V4WB + select CPU_CACHE_VIVT + select CPU_CP15_MMU + select CPU_TLB_V4WB if MMU + +# XScale +config CPU_XSCALE + bool + select CPU_32v5 + select CPU_ABRT_EV5T + select CPU_PABRT_LEGACY + select CPU_CACHE_VIVT + select CPU_CP15_MMU + select CPU_TLB_V4WBI if MMU + +# XScale Core Version 3 +config CPU_XSC3 + bool + select CPU_32v5 + select CPU_ABRT_EV5T + select CPU_PABRT_LEGACY + select CPU_CACHE_VIVT + select CPU_CP15_MMU + select CPU_TLB_V4WBI if MMU + select IO_36 + +# Marvell PJ1 (Mohawk) +config CPU_MOHAWK + bool + select CPU_32v5 + select CPU_ABRT_EV5T + select CPU_PABRT_LEGACY + select CPU_CACHE_VIVT + select CPU_CP15_MMU + select CPU_TLB_V4WBI if MMU + select CPU_COPY_V4WB if MMU + +# Feroceon +config CPU_FEROCEON + bool + select CPU_32v5 + select CPU_ABRT_EV5T + select CPU_PABRT_LEGACY + select CPU_CACHE_VIVT + select CPU_CP15_MMU + select CPU_COPY_FEROCEON if MMU + select CPU_TLB_FEROCEON if MMU + +config CPU_FEROCEON_OLD_ID + bool "Accept early Feroceon cores with an ARM926 ID" + depends on CPU_FEROCEON && !CPU_ARM926T + default y + help + This enables the usage of some old Feroceon cores + for which the CPU ID is equal to the ARM926 ID. + Relevant for Feroceon-1850 and early Feroceon-2850. + +# Marvell PJ4 +config CPU_PJ4 + bool + select CPU_V7 + select ARM_THUMBEE + +# ARMv6 +config CPU_V6 + bool "Support ARM V6 processor" if ARCH_INTEGRATOR || MACH_REALVIEW_EB || MACH_REALVIEW_PBX + select CPU_32v6 + select CPU_ABRT_EV6 + select CPU_PABRT_V6 + select CPU_CACHE_V6 + select CPU_CACHE_VIPT + select CPU_CP15_MMU + select CPU_HAS_ASID if MMU + select CPU_COPY_V6 if MMU + select CPU_TLB_V6 if MMU + +# ARMv6k +config CPU_V6K + bool "Support ARM V6K processor" if ARCH_INTEGRATOR || MACH_REALVIEW_EB || MACH_REALVIEW_PBX + select CPU_32v6 + select CPU_32v6K + select CPU_ABRT_EV6 + select CPU_PABRT_V6 + select CPU_CACHE_V6 + select CPU_CACHE_VIPT + select CPU_CP15_MMU + select CPU_HAS_ASID if MMU + select CPU_COPY_V6 if MMU + select CPU_TLB_V6 if MMU + +# ARMv7 +config CPU_V7 + bool "Support ARM V7 processor" if ARCH_INTEGRATOR || MACH_REALVIEW_EB || MACH_REALVIEW_PBX || ARCH_WMT + default y if ARCH_WMT + select CPU_32v6K + select CPU_32v7 + select CPU_ABRT_EV7 + select CPU_PABRT_V7 + select CPU_CACHE_V7 + select CPU_CACHE_VIPT + select CPU_CP15_MMU + select CPU_HAS_ASID if MMU + select CPU_COPY_V6 if MMU + select CPU_TLB_V7 if MMU + +# Figure out what processor architecture version we should be using. +# This defines the compiler instruction set which depends on the machine type. +config CPU_32v3 + bool + select TLS_REG_EMUL if SMP || !MMU + select NEEDS_SYSCALL_FOR_CMPXCHG if SMP + select CPU_USE_DOMAINS if MMU + +config CPU_32v4 + bool + select TLS_REG_EMUL if SMP || !MMU + select NEEDS_SYSCALL_FOR_CMPXCHG if SMP + select CPU_USE_DOMAINS if MMU + +config CPU_32v4T + bool + select TLS_REG_EMUL if SMP || !MMU + select NEEDS_SYSCALL_FOR_CMPXCHG if SMP + select CPU_USE_DOMAINS if MMU + +config CPU_32v5 + bool + select TLS_REG_EMUL if SMP || !MMU + select NEEDS_SYSCALL_FOR_CMPXCHG if SMP + select CPU_USE_DOMAINS if MMU + +config CPU_32v6 + bool + select TLS_REG_EMUL if !CPU_32v6K && !MMU + select CPU_USE_DOMAINS if CPU_V6 && MMU + +config CPU_32v6K + bool + +config CPU_32v7 + bool + +# The abort model +config CPU_ABRT_NOMMU + bool + +config CPU_ABRT_EV4 + bool + +config CPU_ABRT_EV4T + bool + +config CPU_ABRT_LV4T + bool + +config CPU_ABRT_EV5T + bool + +config CPU_ABRT_EV5TJ + bool + +config CPU_ABRT_EV6 + bool + +config CPU_ABRT_EV7 + bool + +config CPU_PABRT_LEGACY + bool + +config CPU_PABRT_V6 + bool + +config CPU_PABRT_V7 + bool + +# The cache model +config CPU_CACHE_V3 + bool + +config CPU_CACHE_V4 + bool + +config CPU_CACHE_V4WT + bool + +config CPU_CACHE_V4WB + bool + +config CPU_CACHE_V6 + bool + +config CPU_CACHE_V7 + bool + +config CPU_CACHE_VIVT + bool + +config CPU_CACHE_VIPT + bool + +config CPU_CACHE_FA + bool + +if MMU +# The copy-page model +config CPU_COPY_V3 + bool + +config CPU_COPY_V4WT + bool + +config CPU_COPY_V4WB + bool + +config CPU_COPY_FEROCEON + bool + +config CPU_COPY_FA + bool + +config CPU_COPY_V6 + bool + +# This selects the TLB model +config CPU_TLB_V3 + bool + help + ARM Architecture Version 3 TLB. + +config CPU_TLB_V4WT + bool + help + ARM Architecture Version 4 TLB with writethrough cache. + +config CPU_TLB_V4WB + bool + help + ARM Architecture Version 4 TLB with writeback cache. + +config CPU_TLB_V4WBI + bool + help + ARM Architecture Version 4 TLB with writeback cache and invalidate + instruction cache entry. + +config CPU_TLB_FEROCEON + bool + help + Feroceon TLB (v4wbi with non-outer-cachable page table walks). + +config CPU_TLB_FA + bool + help + Faraday ARM FA526 architecture, unified TLB with writeback cache + and invalidate instruction cache entry. Branch target buffer is + also supported. + +config CPU_TLB_V6 + bool + +config CPU_TLB_V7 + bool + +config VERIFY_PERMISSION_FAULT + bool +endif + +config CPU_HAS_ASID + bool + help + This indicates whether the CPU has the ASID register; used to + tag TLB and possibly cache entries. + +config CPU_CP15 + bool + help + Processor has the CP15 register. + +config CPU_CP15_MMU + bool + select CPU_CP15 + help + Processor has the CP15 register, which has MMU related registers. + +config CPU_CP15_MPU + bool + select CPU_CP15 + help + Processor has the CP15 register, which has MPU related registers. + +config CPU_USE_DOMAINS + bool + help + This option enables or disables the use of domain switching + via the set_fs() function. + +# +# CPU supports 36-bit I/O +# +config IO_36 + bool + +comment "Processor Features" + +config ARM_LPAE + bool "Support for the Large Physical Address Extension" + depends on MMU && CPU_32v7 && !CPU_32v6 && !CPU_32v5 && \ + !CPU_32v4 && !CPU_32v3 + help + Say Y if you have an ARMv7 processor supporting the LPAE page + table format and you would like to access memory beyond the + 4GB limit. The resulting kernel image will not run on + processors without the LPA extension. + + If unsure, say N. + +config ARCH_PHYS_ADDR_T_64BIT + def_bool ARM_LPAE + +config ARCH_DMA_ADDR_T_64BIT + bool + +config ARM_THUMB + bool "Support Thumb user binaries" + depends on CPU_ARM720T || CPU_ARM740T || CPU_ARM920T || CPU_ARM922T || CPU_ARM925T || CPU_ARM926T || CPU_ARM940T || CPU_ARM946E || CPU_ARM1020 || CPU_ARM1020E || CPU_ARM1022 || CPU_ARM1026 || CPU_XSCALE || CPU_XSC3 || CPU_MOHAWK || CPU_V6 || CPU_V6K || CPU_V7 || CPU_FEROCEON + default y + help + Say Y if you want to include kernel support for running user space + Thumb binaries. + + The Thumb instruction set is a compressed form of the standard ARM + instruction set resulting in smaller binaries at the expense of + slightly less efficient code. + + If you don't know what this all is, saying Y is a safe choice. + +config ARM_THUMBEE + bool "Enable ThumbEE CPU extension" + depends on CPU_V7 + help + Say Y here if you have a CPU with the ThumbEE extension and code to + make use of it. Say N for code that can run on CPUs without ThumbEE. + +config SWP_EMULATE + bool "Emulate SWP/SWPB instructions" + depends on !CPU_USE_DOMAINS && CPU_V7 + select HAVE_PROC_CPU if PROC_FS + default y if SMP + help + ARMv6 architecture deprecates use of the SWP/SWPB instructions. + ARMv7 multiprocessing extensions introduce the ability to disable + these instructions, triggering an undefined instruction exception + when executed. Say Y here to enable software emulation of these + instructions for userspace (not kernel) using LDREX/STREX. + Also creates /proc/cpu/swp_emulation for statistics. + + In some older versions of glibc [<=2.8] SWP is used during futex + trylock() operations with the assumption that the code will not + be preempted. This invalid assumption may be more likely to fail + with SWP emulation enabled, leading to deadlock of the user + application. + + NOTE: when accessing uncached shared regions, LDREX/STREX rely + on an external transaction monitoring block called a global + monitor to maintain update atomicity. If your system does not + implement a global monitor, this option can cause programs that + perform SWP operations to uncached memory to deadlock. + + If unsure, say Y. + +config CPU_BIG_ENDIAN + bool "Build big-endian kernel" + depends on ARCH_SUPPORTS_BIG_ENDIAN + help + Say Y if you plan on running a kernel in big-endian mode. + Note that your board must be properly built and your board + port must properly enable any big-endian related features + of your chipset/board/processor. + +config CPU_ENDIAN_BE8 + bool + depends on CPU_BIG_ENDIAN + default CPU_V6 || CPU_V6K || CPU_V7 + help + Support for the BE-8 (big-endian) mode on ARMv6 and ARMv7 processors. + +config CPU_ENDIAN_BE32 + bool + depends on CPU_BIG_ENDIAN + default !CPU_ENDIAN_BE8 + help + Support for the BE-32 (big-endian) mode on pre-ARMv6 processors. + +config CPU_HIGH_VECTOR + depends on !MMU && CPU_CP15 && !CPU_ARM740T + bool "Select the High exception vector" + help + Say Y here to select high exception vector(0xFFFF0000~). + The exception vector can vary depending on the platform + design in nommu mode. If your platform needs to select + high exception vector, say Y. + Otherwise or if you are unsure, say N, and the low exception + vector (0x00000000~) will be used. + +config CPU_ICACHE_DISABLE + bool "Disable I-Cache (I-bit)" + depends on CPU_CP15 && !(CPU_ARM610 || CPU_ARM710 || CPU_ARM720T || CPU_ARM740T || CPU_XSCALE || CPU_XSC3) + help + Say Y here to disable the processor instruction cache. Unless + you have a reason not to or are unsure, say N. + +config CPU_DCACHE_DISABLE + bool "Disable D-Cache (C-bit)" + depends on CPU_CP15 + help + Say Y here to disable the processor data cache. Unless + you have a reason not to or are unsure, say N. + +config CPU_DCACHE_SIZE + hex + depends on CPU_ARM740T || CPU_ARM946E + default 0x00001000 if CPU_ARM740T + default 0x00002000 # default size for ARM946E-S + help + Some cores are synthesizable to have various sized cache. For + ARM946E-S case, it can vary from 0KB to 1MB. + To support such cache operations, it is efficient to know the size + before compile time. + If your SoC is configured to have a different size, define the value + here with proper conditions. + +config CPU_DCACHE_WRITETHROUGH + bool "Force write through D-cache" + depends on (CPU_ARM740T || CPU_ARM920T || CPU_ARM922T || CPU_ARM925T || CPU_ARM926T || CPU_ARM940T || CPU_ARM946E || CPU_ARM1020 || CPU_FA526) && !CPU_DCACHE_DISABLE + default y if CPU_ARM925T + help + Say Y here to use the data cache in writethrough mode. Unless you + specifically require this or are unsure, say N. + +config CPU_CACHE_ROUND_ROBIN + bool "Round robin I and D cache replacement algorithm" + depends on (CPU_ARM926T || CPU_ARM946E || CPU_ARM1020) && (!CPU_ICACHE_DISABLE || !CPU_DCACHE_DISABLE) + help + Say Y here to use the predictable round-robin cache replacement + policy. Unless you specifically require this or are unsure, say N. + +config CPU_BPREDICT_DISABLE + bool "Disable branch prediction" + depends on CPU_ARM1020 || CPU_V6 || CPU_V6K || CPU_MOHAWK || CPU_XSC3 || CPU_V7 || CPU_FA526 + help + Say Y here to disable branch prediction. If unsure, say N. + +config TLS_REG_EMUL + bool + help + An SMP system using a pre-ARMv6 processor (there are apparently + a few prototypes like that in existence) and therefore access to + that required register must be emulated. + +config NEEDS_SYSCALL_FOR_CMPXCHG + bool + help + SMP on a pre-ARMv6 processor? Well OK then. + Forget about fast user space cmpxchg support. + It is just not possible. + +config DMA_CACHE_RWFO + bool "Enable read/write for ownership DMA cache maintenance" + depends on CPU_V6K && SMP + default y + help + The Snoop Control Unit on ARM11MPCore does not detect the + cache maintenance operations and the dma_{map,unmap}_area() + functions may leave stale cache entries on other CPUs. By + enabling this option, Read or Write For Ownership in the ARMv6 + DMA cache maintenance functions is performed. These LDR/STR + instructions change the cache line state to shared or modified + so that the cache operation has the desired effect. + + Note that the workaround is only valid on processors that do + not perform speculative loads into the D-cache. For such + processors, if cache maintenance operations are not broadcast + in hardware, other workarounds are needed (e.g. cache + maintenance broadcasting in software via FIQ). + +config OUTER_CACHE + bool + +config OUTER_CACHE_SYNC + bool + help + The outer cache has a outer_cache_fns.sync function pointer + that can be used to drain the write buffer of the outer cache. + +config CACHE_FEROCEON_L2 + bool "Enable the Feroceon L2 cache controller" + depends on ARCH_KIRKWOOD || ARCH_MV78XX0 + default y + select OUTER_CACHE + help + This option enables the Feroceon L2 cache controller. + +config CACHE_FEROCEON_L2_WRITETHROUGH + bool "Force Feroceon L2 cache write through" + depends on CACHE_FEROCEON_L2 + help + Say Y here to use the Feroceon L2 cache in writethrough mode. + Unless you specifically require this, say N for writeback mode. + +config MIGHT_HAVE_CACHE_L2X0 + bool + help + This option should be selected by machines which have a L2x0 + or PL310 cache controller, but where its use is optional. + + The only effect of this option is to make CACHE_L2X0 and + related options available to the user for configuration. + + Boards or SoCs which always require the cache controller + support to be present should select CACHE_L2X0 directly + instead of this option, thus preventing the user from + inadvertently configuring a broken kernel. + +config CACHE_L2X0 + bool "Enable the L2x0 outer cache controller" if MIGHT_HAVE_CACHE_L2X0 + default MIGHT_HAVE_CACHE_L2X0 + select OUTER_CACHE + select OUTER_CACHE_SYNC + help + This option enables the L2x0 PrimeCell. + +config CACHE_PL310 + bool + depends on CACHE_L2X0 + default y if CPU_V7 && !(CPU_V6 || CPU_V6K) + help + This option enables optimisations for the PL310 cache + controller. + +config CACHE_TAUROS2 + bool "Enable the Tauros2 L2 cache controller" + depends on (ARCH_DOVE || ARCH_MMP || CPU_PJ4) + default y + select OUTER_CACHE + help + This option enables the Tauros2 L2 cache controller (as + found on PJ1/PJ4). + +config CACHE_XSC3L2 + bool "Enable the L2 cache on XScale3" + depends on CPU_XSC3 + default y + select OUTER_CACHE + help + This option enables the L2 cache on XScale3. + +config ARM_L1_CACHE_SHIFT_6 + bool + default y if CPU_V7 + help + Setting ARM L1 cache line size to 64 Bytes. + +config ARM_L1_CACHE_SHIFT + int + default 6 if ARM_L1_CACHE_SHIFT_6 + default 5 + +config ARM_DMA_MEM_BUFFERABLE + bool "Use non-cacheable memory for DMA" if (CPU_V6 || CPU_V6K) && !CPU_V7 + depends on !(MACH_REALVIEW_PB1176 || REALVIEW_EB_ARM11MP || \ + MACH_REALVIEW_PB11MP) + default y if CPU_V6 || CPU_V6K || CPU_V7 + help + Historically, the kernel has used strongly ordered mappings to + provide DMA coherent memory. With the advent of ARMv7, mapping + memory with differing types results in unpredictable behaviour, + so on these CPUs, this option is forced on. + + Multiple mappings with differing attributes is also unpredictable + on ARMv6 CPUs, but since they do not have aggressive speculative + prefetch, no harm appears to occur. + + However, drivers may be missing the necessary barriers for ARMv6, + and therefore turning this on may result in unpredictable driver + behaviour. Therefore, we offer this as an option. + + You are recommended say 'Y' here and debug any affected drivers. + +config ARCH_HAS_BARRIERS + bool + help + This option allows the use of custom mandatory barriers + included via the mach/barriers.h file. + +config ARCH_WMT_SMP_CACHEPOLICY_WRITEBACK + bool "Set SMP kernel cache policy as write-back" + depends on ARCH_WMT && CPU_V7 && SMP + default y + help + This option set cache policy as write-back to improve memcpy() + speed. diff --git a/arch/arm/mm/Makefile b/arch/arm/mm/Makefile new file mode 100644 index 00000000..37da2cc8 --- /dev/null +++ b/arch/arm/mm/Makefile @@ -0,0 +1,103 @@ +# +# Makefile for the linux arm-specific parts of the memory manager. +# + +obj-y := dma-mapping.o extable.o fault.o init.o \ + iomap.o + +obj-$(CONFIG_MMU) += fault-armv.o flush.o idmap.o ioremap.o \ + mmap.o pgd.o mmu.o vmregion.o +obj-$(CONFIG_DEBUG_RODATA) += rodata.o + +ifneq ($(CONFIG_MMU),y) +obj-y += nommu.o +endif + +obj-$(CONFIG_MODULES) += proc-syms.o + +obj-$(CONFIG_ALIGNMENT_TRAP) += alignment.o +obj-$(CONFIG_HIGHMEM) += highmem.o + +obj-$(CONFIG_CPU_ABRT_NOMMU) += abort-nommu.o +obj-$(CONFIG_CPU_ABRT_EV4) += abort-ev4.o +obj-$(CONFIG_CPU_ABRT_EV4T) += abort-ev4t.o +obj-$(CONFIG_CPU_ABRT_LV4T) += abort-lv4t.o +obj-$(CONFIG_CPU_ABRT_EV5T) += abort-ev5t.o +obj-$(CONFIG_CPU_ABRT_EV5TJ) += abort-ev5tj.o +obj-$(CONFIG_CPU_ABRT_EV6) += abort-ev6.o +obj-$(CONFIG_CPU_ABRT_EV7) += abort-ev7.o + +AFLAGS_abort-ev6.o :=-Wa,-march=armv6k +AFLAGS_abort-ev7.o :=-Wa,-march=armv7-a + +obj-$(CONFIG_CPU_PABRT_LEGACY) += pabort-legacy.o +obj-$(CONFIG_CPU_PABRT_V6) += pabort-v6.o +obj-$(CONFIG_CPU_PABRT_V7) += pabort-v7.o + +obj-$(CONFIG_CPU_CACHE_V3) += cache-v3.o +obj-$(CONFIG_CPU_CACHE_V4) += cache-v4.o +obj-$(CONFIG_CPU_CACHE_V4WT) += cache-v4wt.o +obj-$(CONFIG_CPU_CACHE_V4WB) += cache-v4wb.o +obj-$(CONFIG_CPU_CACHE_V6) += cache-v6.o +obj-$(CONFIG_CPU_CACHE_V7) += cache-v7.o +obj-$(CONFIG_CPU_CACHE_FA) += cache-fa.o + +AFLAGS_cache-v6.o :=-Wa,-march=armv6 +AFLAGS_cache-v7.o :=-Wa,-march=armv7-a + +obj-$(CONFIG_CPU_COPY_V3) += copypage-v3.o +obj-$(CONFIG_CPU_COPY_V4WT) += copypage-v4wt.o +obj-$(CONFIG_CPU_COPY_V4WB) += copypage-v4wb.o +obj-$(CONFIG_CPU_COPY_FEROCEON) += copypage-feroceon.o +obj-$(CONFIG_CPU_COPY_V6) += copypage-v6.o context.o +obj-$(CONFIG_CPU_SA1100) += copypage-v4mc.o +obj-$(CONFIG_CPU_XSCALE) += copypage-xscale.o +obj-$(CONFIG_CPU_XSC3) += copypage-xsc3.o +obj-$(CONFIG_CPU_COPY_FA) += copypage-fa.o + +obj-$(CONFIG_CPU_TLB_V3) += tlb-v3.o +obj-$(CONFIG_CPU_TLB_V4WT) += tlb-v4.o +obj-$(CONFIG_CPU_TLB_V4WB) += tlb-v4wb.o +obj-$(CONFIG_CPU_TLB_V4WBI) += tlb-v4wbi.o +obj-$(CONFIG_CPU_TLB_FEROCEON) += tlb-v4wbi.o # reuse v4wbi TLB functions +obj-$(CONFIG_CPU_TLB_V6) += tlb-v6.o +obj-$(CONFIG_CPU_TLB_V7) += tlb-v7.o +obj-$(CONFIG_CPU_TLB_FA) += tlb-fa.o + +AFLAGS_tlb-v6.o :=-Wa,-march=armv6 +AFLAGS_tlb-v7.o :=-Wa,-march=armv7-a + +obj-$(CONFIG_CPU_ARM610) += proc-arm6_7.o +obj-$(CONFIG_CPU_ARM710) += proc-arm6_7.o +obj-$(CONFIG_CPU_ARM7TDMI) += proc-arm7tdmi.o +obj-$(CONFIG_CPU_ARM720T) += proc-arm720.o +obj-$(CONFIG_CPU_ARM740T) += proc-arm740.o +obj-$(CONFIG_CPU_ARM9TDMI) += proc-arm9tdmi.o +obj-$(CONFIG_CPU_ARM920T) += proc-arm920.o +obj-$(CONFIG_CPU_ARM922T) += proc-arm922.o +obj-$(CONFIG_CPU_ARM925T) += proc-arm925.o +obj-$(CONFIG_CPU_ARM926T) += proc-arm926.o +obj-$(CONFIG_CPU_ARM940T) += proc-arm940.o +obj-$(CONFIG_CPU_ARM946E) += proc-arm946.o +obj-$(CONFIG_CPU_FA526) += proc-fa526.o +obj-$(CONFIG_CPU_ARM1020) += proc-arm1020.o +obj-$(CONFIG_CPU_ARM1020E) += proc-arm1020e.o +obj-$(CONFIG_CPU_ARM1022) += proc-arm1022.o +obj-$(CONFIG_CPU_ARM1026) += proc-arm1026.o +obj-$(CONFIG_CPU_SA110) += proc-sa110.o +obj-$(CONFIG_CPU_SA1100) += proc-sa1100.o +obj-$(CONFIG_CPU_XSCALE) += proc-xscale.o +obj-$(CONFIG_CPU_XSC3) += proc-xsc3.o +obj-$(CONFIG_CPU_MOHAWK) += proc-mohawk.o +obj-$(CONFIG_CPU_FEROCEON) += proc-feroceon.o +obj-$(CONFIG_CPU_V6) += proc-v6.o +obj-$(CONFIG_CPU_V6K) += proc-v6.o +obj-$(CONFIG_CPU_V7) += proc-v7.o + +AFLAGS_proc-v6.o :=-Wa,-march=armv6 +AFLAGS_proc-v7.o :=-Wa,-march=armv7-a + +obj-$(CONFIG_CACHE_FEROCEON_L2) += cache-feroceon-l2.o +obj-$(CONFIG_CACHE_L2X0) += cache-l2x0.o +obj-$(CONFIG_CACHE_XSC3L2) += cache-xsc3l2.o +obj-$(CONFIG_CACHE_TAUROS2) += cache-tauros2.o diff --git a/arch/arm/mm/abort-ev4.S b/arch/arm/mm/abort-ev4.S new file mode 100644 index 00000000..54473cd4 --- /dev/null +++ b/arch/arm/mm/abort-ev4.S @@ -0,0 +1,25 @@ +#include <linux/linkage.h> +#include <asm/assembler.h> +/* + * Function: v4_early_abort + * + * Params : r2 = pt_regs + * : r4 = aborted context pc + * : r5 = aborted context psr + * + * Returns : r4 - r11, r13 preserved + * + * Purpose : obtain information about current aborted instruction. + * Note: we read user space. This means we might cause a data + * abort here if the I-TLB and D-TLB aren't seeing the same + * picture. Unfortunately, this does happen. We live with it. + */ + .align 5 +ENTRY(v4_early_abort) + mrc p15, 0, r1, c5, c0, 0 @ get FSR + mrc p15, 0, r0, c6, c0, 0 @ get FAR + ldr r3, [r4] @ read aborted ARM instruction + bic r1, r1, #1 << 11 | 1 << 10 @ clear bits 11 and 10 of FSR + tst r3, #1 << 20 @ L = 1 -> write? + orreq r1, r1, #1 << 11 @ yes. + b do_DataAbort diff --git a/arch/arm/mm/abort-ev4t.S b/arch/arm/mm/abort-ev4t.S new file mode 100644 index 00000000..9da704e7 --- /dev/null +++ b/arch/arm/mm/abort-ev4t.S @@ -0,0 +1,27 @@ +#include <linux/linkage.h> +#include <asm/assembler.h> +#include "abort-macro.S" +/* + * Function: v4t_early_abort + * + * Params : r2 = pt_regs + * : r4 = aborted context pc + * : r5 = aborted context psr + * + * Returns : r4 - r11, r13 preserved + * + * Purpose : obtain information about current aborted instruction. + * Note: we read user space. This means we might cause a data + * abort here if the I-TLB and D-TLB aren't seeing the same + * picture. Unfortunately, this does happen. We live with it. + */ + .align 5 +ENTRY(v4t_early_abort) + mrc p15, 0, r1, c5, c0, 0 @ get FSR + mrc p15, 0, r0, c6, c0, 0 @ get FAR + do_thumb_abort fsr=r1, pc=r4, psr=r5, tmp=r3 + ldreq r3, [r4] @ read aborted ARM instruction + bic r1, r1, #1 << 11 | 1 << 10 @ clear bits 11 and 10 of FSR + tst r3, #1 << 20 @ check write + orreq r1, r1, #1 << 11 + b do_DataAbort diff --git a/arch/arm/mm/abort-ev5t.S b/arch/arm/mm/abort-ev5t.S new file mode 100644 index 00000000..a0908d46 --- /dev/null +++ b/arch/arm/mm/abort-ev5t.S @@ -0,0 +1,28 @@ +#include <linux/linkage.h> +#include <asm/assembler.h> +#include "abort-macro.S" +/* + * Function: v5t_early_abort + * + * Params : r2 = pt_regs + * : r4 = aborted context pc + * : r5 = aborted context psr + * + * Returns : r4 - r11, r13 preserved + * + * Purpose : obtain information about current aborted instruction. + * Note: we read user space. This means we might cause a data + * abort here if the I-TLB and D-TLB aren't seeing the same + * picture. Unfortunately, this does happen. We live with it. + */ + .align 5 +ENTRY(v5t_early_abort) + mrc p15, 0, r1, c5, c0, 0 @ get FSR + mrc p15, 0, r0, c6, c0, 0 @ get FAR + do_thumb_abort fsr=r1, pc=r4, psr=r5, tmp=r3 + ldreq r3, [r4] @ read aborted ARM instruction + bic r1, r1, #1 << 11 @ clear bits 11 of FSR + do_ldrd_abort tmp=ip, insn=r3 + tst r3, #1 << 20 @ check write + orreq r1, r1, #1 << 11 + b do_DataAbort diff --git a/arch/arm/mm/abort-ev5tj.S b/arch/arm/mm/abort-ev5tj.S new file mode 100644 index 00000000..4006b7a6 --- /dev/null +++ b/arch/arm/mm/abort-ev5tj.S @@ -0,0 +1,30 @@ +#include <linux/linkage.h> +#include <asm/assembler.h> +#include "abort-macro.S" +/* + * Function: v5tj_early_abort + * + * Params : r2 = pt_regs + * : r4 = aborted context pc + * : r5 = aborted context psr + * + * Returns : r4 - r11, r13 preserved + * + * Purpose : obtain information about current aborted instruction. + * Note: we read user space. This means we might cause a data + * abort here if the I-TLB and D-TLB aren't seeing the same + * picture. Unfortunately, this does happen. We live with it. + */ + .align 5 +ENTRY(v5tj_early_abort) + mrc p15, 0, r1, c5, c0, 0 @ get FSR + mrc p15, 0, r0, c6, c0, 0 @ get FAR + bic r1, r1, #1 << 11 | 1 << 10 @ clear bits 11 and 10 of FSR + tst r5, #PSR_J_BIT @ Java? + bne do_DataAbort + do_thumb_abort fsr=r1, pc=r4, psr=r5, tmp=r3 + ldreq r3, [r4] @ read aborted ARM instruction + do_ldrd_abort tmp=ip, insn=r3 + tst r3, #1 << 20 @ L = 0 -> write + orreq r1, r1, #1 << 11 @ yes. + b do_DataAbort diff --git a/arch/arm/mm/abort-ev6.S b/arch/arm/mm/abort-ev6.S new file mode 100644 index 00000000..80741992 --- /dev/null +++ b/arch/arm/mm/abort-ev6.S @@ -0,0 +1,48 @@ +#include <linux/linkage.h> +#include <asm/assembler.h> +#include "abort-macro.S" +/* + * Function: v6_early_abort + * + * Params : r2 = pt_regs + * : r4 = aborted context pc + * : r5 = aborted context psr + * + * Returns : r4 - r11, r13 preserved + * + * Purpose : obtain information about current aborted instruction. + * Note: we read user space. This means we might cause a data + * abort here if the I-TLB and D-TLB aren't seeing the same + * picture. Unfortunately, this does happen. We live with it. + */ + .align 5 +ENTRY(v6_early_abort) +#ifdef CONFIG_CPU_V6 + sub r1, sp, #4 @ Get unused stack location + strex r0, r1, [r1] @ Clear the exclusive monitor +#elif defined(CONFIG_CPU_32v6K) + clrex +#endif + mrc p15, 0, r1, c5, c0, 0 @ get FSR + mrc p15, 0, r0, c6, c0, 0 @ get FAR +/* + * Faulty SWP instruction on 1136 doesn't set bit 11 in DFSR. + */ +#ifdef CONFIG_ARM_ERRATA_326103 + ldr ip, =0x4107b36 + mrc p15, 0, r3, c0, c0, 0 @ get processor id + teq ip, r3, lsr #4 @ r0 ARM1136? + bne do_DataAbort + tst r5, #PSR_J_BIT @ Java? + tsteq r5, #PSR_T_BIT @ Thumb? + bne do_DataAbort + bic r1, r1, #1 << 11 @ clear bit 11 of FSR + ldr r3, [r4] @ read aborted ARM instruction +#ifdef CONFIG_CPU_ENDIAN_BE8 + rev r3, r3 +#endif + do_ldrd_abort tmp=ip, insn=r3 + tst r3, #1 << 20 @ L = 0 -> write + orreq r1, r1, #1 << 11 @ yes. +#endif + b do_DataAbort diff --git a/arch/arm/mm/abort-ev7.S b/arch/arm/mm/abort-ev7.S new file mode 100644 index 00000000..70337527 --- /dev/null +++ b/arch/arm/mm/abort-ev7.S @@ -0,0 +1,51 @@ +#include <linux/linkage.h> +#include <asm/assembler.h> +/* + * Function: v7_early_abort + * + * Params : r2 = pt_regs + * : r4 = aborted context pc + * : r5 = aborted context psr + * + * Returns : r4 - r11, r13 preserved + * + * Purpose : obtain information about current aborted instruction. + */ + .align 5 +ENTRY(v7_early_abort) + /* + * The effect of data aborts on on the exclusive access monitor are + * UNPREDICTABLE. Do a CLREX to clear the state + */ + clrex + + mrc p15, 0, r1, c5, c0, 0 @ get FSR + mrc p15, 0, r0, c6, c0, 0 @ get FAR + + /* + * V6 code adjusts the returned DFSR. + * New designs should not need to patch up faults. + */ + +#if defined(CONFIG_VERIFY_PERMISSION_FAULT) + /* + * Detect erroneous permission failures and fix + */ + ldr r3, =0x40d @ On permission fault + and r3, r1, r3 + cmp r3, #0x0d + bne do_DataAbort + + mcr p15, 0, r0, c7, c8, 0 @ Retranslate FAR + isb + mrc p15, 0, ip, c7, c4, 0 @ Read the PAR + and r3, ip, #0x7b @ On translation fault + cmp r3, #0x0b + bne do_DataAbort + bic r1, r1, #0xf @ Fix up FSR FS[5:0] + and ip, ip, #0x7e + orr r1, r1, ip, LSR #1 +#endif + + b do_DataAbort +ENDPROC(v7_early_abort) diff --git a/arch/arm/mm/abort-lv4t.S b/arch/arm/mm/abort-lv4t.S new file mode 100644 index 00000000..f3982580 --- /dev/null +++ b/arch/arm/mm/abort-lv4t.S @@ -0,0 +1,220 @@ +#include <linux/linkage.h> +#include <asm/assembler.h> +/* + * Function: v4t_late_abort + * + * Params : r2 = pt_regs + * : r4 = aborted context pc + * : r5 = aborted context psr + * + * Returns : r4-r5, r10-r11, r13 preserved + * + * Purpose : obtain information about current aborted instruction. + * Note: we read user space. This means we might cause a data + * abort here if the I-TLB and D-TLB aren't seeing the same + * picture. Unfortunately, this does happen. We live with it. + */ +ENTRY(v4t_late_abort) + tst r5, #PSR_T_BIT @ check for thumb mode +#ifdef CONFIG_CPU_CP15_MMU + mrc p15, 0, r1, c5, c0, 0 @ get FSR + mrc p15, 0, r0, c6, c0, 0 @ get FAR + bic r1, r1, #1 << 11 | 1 << 10 @ clear bits 11 and 10 of FSR +#else + mov r0, #0 @ clear r0, r1 (no FSR/FAR) + mov r1, #0 +#endif + bne .data_thumb_abort + ldr r8, [r4] @ read arm instruction + tst r8, #1 << 20 @ L = 1 -> write? + orreq r1, r1, #1 << 11 @ yes. + and r7, r8, #15 << 24 + add pc, pc, r7, lsr #22 @ Now branch to the relevant processing routine + nop + +/* 0 */ b .data_arm_lateldrhpost @ ldrh rd, [rn], #m/rm +/* 1 */ b .data_arm_lateldrhpre @ ldrh rd, [rn, #m/rm] +/* 2 */ b .data_unknown +/* 3 */ b .data_unknown +/* 4 */ b .data_arm_lateldrpostconst @ ldr rd, [rn], #m +/* 5 */ b .data_arm_lateldrpreconst @ ldr rd, [rn, #m] +/* 6 */ b .data_arm_lateldrpostreg @ ldr rd, [rn], rm +/* 7 */ b .data_arm_lateldrprereg @ ldr rd, [rn, rm] +/* 8 */ b .data_arm_ldmstm @ ldm*a rn, <rlist> +/* 9 */ b .data_arm_ldmstm @ ldm*b rn, <rlist> +/* a */ b .data_unknown +/* b */ b .data_unknown +/* c */ b do_DataAbort @ ldc rd, [rn], #m @ Same as ldr rd, [rn], #m +/* d */ b do_DataAbort @ ldc rd, [rn, #m] +/* e */ b .data_unknown +/* f */ +.data_unknown: @ Part of jumptable + mov r0, r4 + mov r1, r8 + b baddataabort + +.data_arm_ldmstm: + tst r8, #1 << 21 @ check writeback bit + beq do_DataAbort @ no writeback -> no fixup + mov r7, #0x11 + orr r7, r7, #0x1100 + and r6, r8, r7 + and r9, r8, r7, lsl #1 + add r6, r6, r9, lsr #1 + and r9, r8, r7, lsl #2 + add r6, r6, r9, lsr #2 + and r9, r8, r7, lsl #3 + add r6, r6, r9, lsr #3 + add r6, r6, r6, lsr #8 + add r6, r6, r6, lsr #4 + and r6, r6, #15 @ r6 = no. of registers to transfer. + and r9, r8, #15 << 16 @ Extract 'n' from instruction + ldr r7, [r2, r9, lsr #14] @ Get register 'Rn' + tst r8, #1 << 23 @ Check U bit + subne r7, r7, r6, lsl #2 @ Undo increment + addeq r7, r7, r6, lsl #2 @ Undo decrement + str r7, [r2, r9, lsr #14] @ Put register 'Rn' + b do_DataAbort + +.data_arm_lateldrhpre: + tst r8, #1 << 21 @ Check writeback bit + beq do_DataAbort @ No writeback -> no fixup +.data_arm_lateldrhpost: + and r9, r8, #0x00f @ get Rm / low nibble of immediate value + tst r8, #1 << 22 @ if (immediate offset) + andne r6, r8, #0xf00 @ { immediate high nibble + orrne r6, r9, r6, lsr #4 @ combine nibbles } else + ldreq r6, [r2, r9, lsl #2] @ { load Rm value } +.data_arm_apply_r6_and_rn: + and r9, r8, #15 << 16 @ Extract 'n' from instruction + ldr r7, [r2, r9, lsr #14] @ Get register 'Rn' + tst r8, #1 << 23 @ Check U bit + subne r7, r7, r6 @ Undo incrmenet + addeq r7, r7, r6 @ Undo decrement + str r7, [r2, r9, lsr #14] @ Put register 'Rn' + b do_DataAbort + +.data_arm_lateldrpreconst: + tst r8, #1 << 21 @ check writeback bit + beq do_DataAbort @ no writeback -> no fixup +.data_arm_lateldrpostconst: + movs r6, r8, lsl #20 @ Get offset + beq do_DataAbort @ zero -> no fixup + and r9, r8, #15 << 16 @ Extract 'n' from instruction + ldr r7, [r2, r9, lsr #14] @ Get register 'Rn' + tst r8, #1 << 23 @ Check U bit + subne r7, r7, r6, lsr #20 @ Undo increment + addeq r7, r7, r6, lsr #20 @ Undo decrement + str r7, [r2, r9, lsr #14] @ Put register 'Rn' + b do_DataAbort + +.data_arm_lateldrprereg: + tst r8, #1 << 21 @ check writeback bit + beq do_DataAbort @ no writeback -> no fixup +.data_arm_lateldrpostreg: + and r7, r8, #15 @ Extract 'm' from instruction + ldr r6, [r2, r7, lsl #2] @ Get register 'Rm' + mov r9, r8, lsr #7 @ get shift count + ands r9, r9, #31 + and r7, r8, #0x70 @ get shift type + orreq r7, r7, #8 @ shift count = 0 + add pc, pc, r7 + nop + + mov r6, r6, lsl r9 @ 0: LSL #!0 + b .data_arm_apply_r6_and_rn + b .data_arm_apply_r6_and_rn @ 1: LSL #0 + nop + b .data_unknown @ 2: MUL? + nop + b .data_unknown @ 3: MUL? + nop + mov r6, r6, lsr r9 @ 4: LSR #!0 + b .data_arm_apply_r6_and_rn + mov r6, r6, lsr #32 @ 5: LSR #32 + b .data_arm_apply_r6_and_rn + b .data_unknown @ 6: MUL? + nop + b .data_unknown @ 7: MUL? + nop + mov r6, r6, asr r9 @ 8: ASR #!0 + b .data_arm_apply_r6_and_rn + mov r6, r6, asr #32 @ 9: ASR #32 + b .data_arm_apply_r6_and_rn + b .data_unknown @ A: MUL? + nop + b .data_unknown @ B: MUL? + nop + mov r6, r6, ror r9 @ C: ROR #!0 + b .data_arm_apply_r6_and_rn + mov r6, r6, rrx @ D: RRX + b .data_arm_apply_r6_and_rn + b .data_unknown @ E: MUL? + nop + b .data_unknown @ F: MUL? + +.data_thumb_abort: + ldrh r8, [r4] @ read instruction + tst r8, #1 << 11 @ L = 1 -> write? + orreq r1, r1, #1 << 8 @ yes + and r7, r8, #15 << 12 + add pc, pc, r7, lsr #10 @ lookup in table + nop + +/* 0 */ b .data_unknown +/* 1 */ b .data_unknown +/* 2 */ b .data_unknown +/* 3 */ b .data_unknown +/* 4 */ b .data_unknown +/* 5 */ b .data_thumb_reg +/* 6 */ b do_DataAbort +/* 7 */ b do_DataAbort +/* 8 */ b do_DataAbort +/* 9 */ b do_DataAbort +/* A */ b .data_unknown +/* B */ b .data_thumb_pushpop +/* C */ b .data_thumb_ldmstm +/* D */ b .data_unknown +/* E */ b .data_unknown +/* F */ b .data_unknown + +.data_thumb_reg: + tst r8, #1 << 9 + beq do_DataAbort + tst r8, #1 << 10 @ If 'S' (signed) bit is set + movne r1, #0 @ it must be a load instr + b do_DataAbort + +.data_thumb_pushpop: + tst r8, #1 << 10 + beq .data_unknown + and r6, r8, #0x55 @ hweight8(r8) + R bit + and r9, r8, #0xaa + add r6, r6, r9, lsr #1 + and r9, r6, #0xcc + and r6, r6, #0x33 + add r6, r6, r9, lsr #2 + movs r7, r8, lsr #9 @ C = r8 bit 8 (R bit) + adc r6, r6, r6, lsr #4 @ high + low nibble + R bit + and r6, r6, #15 @ number of regs to transfer + ldr r7, [r2, #13 << 2] + tst r8, #1 << 11 + addeq r7, r7, r6, lsl #2 @ increment SP if PUSH + subne r7, r7, r6, lsl #2 @ decrement SP if POP + str r7, [r2, #13 << 2] + b do_DataAbort + +.data_thumb_ldmstm: + and r6, r8, #0x55 @ hweight8(r8) + and r9, r8, #0xaa + add r6, r6, r9, lsr #1 + and r9, r6, #0xcc + and r6, r6, #0x33 + add r6, r6, r9, lsr #2 + add r6, r6, r6, lsr #4 + and r9, r8, #7 << 8 + ldr r7, [r2, r9, lsr #6] + and r6, r6, #15 @ number of regs to transfer + sub r7, r7, r6, lsl #2 @ always decrement + str r7, [r2, r9, lsr #6] + b do_DataAbort diff --git a/arch/arm/mm/abort-macro.S b/arch/arm/mm/abort-macro.S new file mode 100644 index 00000000..2cbf68ef --- /dev/null +++ b/arch/arm/mm/abort-macro.S @@ -0,0 +1,40 @@ +/* + * The ARM LDRD and Thumb LDRSB instructions use bit 20/11 (ARM/Thumb) + * differently than every other instruction, so it is set to 0 (write) + * even though the instructions are read instructions. This means that + * during an abort the instructions will be treated as a write and the + * handler will raise a signal from unwriteable locations if they + * fault. We have to specifically check for these instructions + * from the abort handlers to treat them properly. + * + */ + + .macro do_thumb_abort, fsr, pc, psr, tmp + tst \psr, #PSR_T_BIT + beq not_thumb + ldrh \tmp, [\pc] @ Read aborted Thumb instruction + and \tmp, \tmp, # 0xfe00 @ Mask opcode field + cmp \tmp, # 0x5600 @ Is it ldrsb? + orreq \tmp, \tmp, #1 << 11 @ Set L-bit if yes + tst \tmp, #1 << 11 @ L = 0 -> write + orreq \fsr, \fsr, #1 << 11 @ yes. + b do_DataAbort +not_thumb: + .endm + +/* + * We check for the following instruction encoding for LDRD. + * + * [27:25] == 000 + * [7:4] == 1101 + * [20] == 0 + */ + .macro do_ldrd_abort, tmp, insn + tst \insn, #0x0e100000 @ [27:25,20] == 0 + bne not_ldrd + and \tmp, \insn, #0x000000f0 @ [7:4] == 1101 + cmp \tmp, #0x000000d0 + beq do_DataAbort +not_ldrd: + .endm + diff --git a/arch/arm/mm/abort-nommu.S b/arch/arm/mm/abort-nommu.S new file mode 100644 index 00000000..119cb479 --- /dev/null +++ b/arch/arm/mm/abort-nommu.S @@ -0,0 +1,20 @@ +#include <linux/linkage.h> +#include <asm/assembler.h> +/* + * Function: nommu_early_abort + * + * Params : r2 = pt_regs + * : r4 = aborted context pc + * : r5 = aborted context psr + * + * Returns : r4 - r11, r13 preserved + * + * Note: There is no FSR/FAR on !CPU_CP15_MMU cores. + * Just fill zero into the registers. + */ + .align 5 +ENTRY(nommu_early_abort) + mov r0, #0 @ clear r0, r1 (no FSR/FAR) + mov r1, #0 + b do_DataAbort +ENDPROC(nommu_early_abort) diff --git a/arch/arm/mm/alignment.c b/arch/arm/mm/alignment.c new file mode 100644 index 00000000..9107231a --- /dev/null +++ b/arch/arm/mm/alignment.c @@ -0,0 +1,990 @@ +/* + * linux/arch/arm/mm/alignment.c + * + * Copyright (C) 1995 Linus Torvalds + * Modifications for ARM processor (c) 1995-2001 Russell King + * Thumb alignment fault fixups (c) 2004 MontaVista Software, Inc. + * - Adapted from gdb/sim/arm/thumbemu.c -- Thumb instruction emulation. + * Copyright (C) 1996, Cygnus Software Technologies Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/moduleparam.h> +#include <linux/compiler.h> +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/string.h> +#include <linux/proc_fs.h> +#include <linux/seq_file.h> +#include <linux/init.h> +#include <linux/sched.h> +#include <linux/uaccess.h> + +#include <asm/cp15.h> +#include <asm/system_info.h> +#include <asm/unaligned.h> + +#include "fault.h" + +/* + * 32-bit misaligned trap handler (c) 1998 San Mehat (CCC) -July 1998 + * /proc/sys/debug/alignment, modified and integrated into + * Linux 2.1 by Russell King + * + * Speed optimisations and better fault handling by Russell King. + * + * *** NOTE *** + * This code is not portable to processors with late data abort handling. + */ +#define CODING_BITS(i) (i & 0x0e000000) + +#define LDST_I_BIT(i) (i & (1 << 26)) /* Immediate constant */ +#define LDST_P_BIT(i) (i & (1 << 24)) /* Preindex */ +#define LDST_U_BIT(i) (i & (1 << 23)) /* Add offset */ +#define LDST_W_BIT(i) (i & (1 << 21)) /* Writeback */ +#define LDST_L_BIT(i) (i & (1 << 20)) /* Load */ + +#define LDST_P_EQ_U(i) ((((i) ^ ((i) >> 1)) & (1 << 23)) == 0) + +#define LDSTHD_I_BIT(i) (i & (1 << 22)) /* double/half-word immed */ +#define LDM_S_BIT(i) (i & (1 << 22)) /* write CPSR from SPSR */ + +#define RN_BITS(i) ((i >> 16) & 15) /* Rn */ +#define RD_BITS(i) ((i >> 12) & 15) /* Rd */ +#define RM_BITS(i) (i & 15) /* Rm */ + +#define REGMASK_BITS(i) (i & 0xffff) +#define OFFSET_BITS(i) (i & 0x0fff) + +#define IS_SHIFT(i) (i & 0x0ff0) +#define SHIFT_BITS(i) ((i >> 7) & 0x1f) +#define SHIFT_TYPE(i) (i & 0x60) +#define SHIFT_LSL 0x00 +#define SHIFT_LSR 0x20 +#define SHIFT_ASR 0x40 +#define SHIFT_RORRRX 0x60 + +#define BAD_INSTR 0xdeadc0de + +/* Thumb-2 32 bit format per ARMv7 DDI0406A A6.3, either f800h,e800h,f800h */ +#define IS_T32(hi16) \ + (((hi16) & 0xe000) == 0xe000 && ((hi16) & 0x1800)) + +static unsigned long ai_user; +static unsigned long ai_sys; +static unsigned long ai_skipped; +static unsigned long ai_half; +static unsigned long ai_word; +static unsigned long ai_dword; +static unsigned long ai_multi; +static int ai_usermode; + +core_param(alignment, ai_usermode, int, 0600); + +#define UM_WARN (1 << 0) +#define UM_FIXUP (1 << 1) +#define UM_SIGNAL (1 << 2) + +/* Return true if and only if the ARMv6 unaligned access model is in use. */ +static bool cpu_is_v6_unaligned(void) +{ + return cpu_architecture() >= CPU_ARCH_ARMv6 && (cr_alignment & CR_U); +} + +static int safe_usermode(int new_usermode, bool warn) +{ + /* + * ARMv6 and later CPUs can perform unaligned accesses for + * most single load and store instructions up to word size. + * LDM, STM, LDRD and STRD still need to be handled. + * + * Ignoring the alignment fault is not an option on these + * CPUs since we spin re-faulting the instruction without + * making any progress. + */ + if (cpu_is_v6_unaligned() && !(new_usermode & (UM_FIXUP | UM_SIGNAL))) { + new_usermode |= UM_FIXUP; + + if (warn) + printk(KERN_WARNING "alignment: ignoring faults is unsafe on this CPU. Defaulting to fixup mode.\n"); + } + + return new_usermode; +} + +#ifdef CONFIG_PROC_FS +static const char *usermode_action[] = { + "ignored", + "warn", + "fixup", + "fixup+warn", + "signal", + "signal+warn" +}; + +static int alignment_proc_show(struct seq_file *m, void *v) +{ + seq_printf(m, "User:\t\t%lu\n", ai_user); + seq_printf(m, "System:\t\t%lu\n", ai_sys); + seq_printf(m, "Skipped:\t%lu\n", ai_skipped); + seq_printf(m, "Half:\t\t%lu\n", ai_half); + seq_printf(m, "Word:\t\t%lu\n", ai_word); + if (cpu_architecture() >= CPU_ARCH_ARMv5TE) + seq_printf(m, "DWord:\t\t%lu\n", ai_dword); + seq_printf(m, "Multi:\t\t%lu\n", ai_multi); + seq_printf(m, "User faults:\t%i (%s)\n", ai_usermode, + usermode_action[ai_usermode]); + + return 0; +} + +static int alignment_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, alignment_proc_show, NULL); +} + +static ssize_t alignment_proc_write(struct file *file, const char __user *buffer, + size_t count, loff_t *pos) +{ + char mode; + + if (count > 0) { + if (get_user(mode, buffer)) + return -EFAULT; + if (mode >= '0' && mode <= '5') + ai_usermode = safe_usermode(mode - '0', true); + } + return count; +} + +static const struct file_operations alignment_proc_fops = { + .open = alignment_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, + .write = alignment_proc_write, +}; +#endif /* CONFIG_PROC_FS */ + +union offset_union { + unsigned long un; + signed long sn; +}; + +#define TYPE_ERROR 0 +#define TYPE_FAULT 1 +#define TYPE_LDST 2 +#define TYPE_DONE 3 + +#ifdef __ARMEB__ +#define BE 1 +#define FIRST_BYTE_16 "mov %1, %1, ror #8\n" +#define FIRST_BYTE_32 "mov %1, %1, ror #24\n" +#define NEXT_BYTE "ror #24" +#else +#define BE 0 +#define FIRST_BYTE_16 +#define FIRST_BYTE_32 +#define NEXT_BYTE "lsr #8" +#endif + +#define __get8_unaligned_check(ins,val,addr,err) \ + __asm__( \ + ARM( "1: "ins" %1, [%2], #1\n" ) \ + THUMB( "1: "ins" %1, [%2]\n" ) \ + THUMB( " add %2, %2, #1\n" ) \ + "2:\n" \ + " .pushsection .fixup,\"ax\"\n" \ + " .align 2\n" \ + "3: mov %0, #1\n" \ + " b 2b\n" \ + " .popsection\n" \ + " .pushsection __ex_table,\"a\"\n" \ + " .align 3\n" \ + " .long 1b, 3b\n" \ + " .popsection\n" \ + : "=r" (err), "=&r" (val), "=r" (addr) \ + : "0" (err), "2" (addr)) + +#define __get16_unaligned_check(ins,val,addr) \ + do { \ + unsigned int err = 0, v, a = addr; \ + __get8_unaligned_check(ins,v,a,err); \ + val = v << ((BE) ? 8 : 0); \ + __get8_unaligned_check(ins,v,a,err); \ + val |= v << ((BE) ? 0 : 8); \ + if (err) \ + goto fault; \ + } while (0) + +#define get16_unaligned_check(val,addr) \ + __get16_unaligned_check("ldrb",val,addr) + +#define get16t_unaligned_check(val,addr) \ + __get16_unaligned_check("ldrbt",val,addr) + +#define __get32_unaligned_check(ins,val,addr) \ + do { \ + unsigned int err = 0, v, a = addr; \ + __get8_unaligned_check(ins,v,a,err); \ + val = v << ((BE) ? 24 : 0); \ + __get8_unaligned_check(ins,v,a,err); \ + val |= v << ((BE) ? 16 : 8); \ + __get8_unaligned_check(ins,v,a,err); \ + val |= v << ((BE) ? 8 : 16); \ + __get8_unaligned_check(ins,v,a,err); \ + val |= v << ((BE) ? 0 : 24); \ + if (err) \ + goto fault; \ + } while (0) + +#define get32_unaligned_check(val,addr) \ + __get32_unaligned_check("ldrb",val,addr) + +#define get32t_unaligned_check(val,addr) \ + __get32_unaligned_check("ldrbt",val,addr) + +#define __put16_unaligned_check(ins,val,addr) \ + do { \ + unsigned int err = 0, v = val, a = addr; \ + __asm__( FIRST_BYTE_16 \ + ARM( "1: "ins" %1, [%2], #1\n" ) \ + THUMB( "1: "ins" %1, [%2]\n" ) \ + THUMB( " add %2, %2, #1\n" ) \ + " mov %1, %1, "NEXT_BYTE"\n" \ + "2: "ins" %1, [%2]\n" \ + "3:\n" \ + " .pushsection .fixup,\"ax\"\n" \ + " .align 2\n" \ + "4: mov %0, #1\n" \ + " b 3b\n" \ + " .popsection\n" \ + " .pushsection __ex_table,\"a\"\n" \ + " .align 3\n" \ + " .long 1b, 4b\n" \ + " .long 2b, 4b\n" \ + " .popsection\n" \ + : "=r" (err), "=&r" (v), "=&r" (a) \ + : "0" (err), "1" (v), "2" (a)); \ + if (err) \ + goto fault; \ + } while (0) + +#define put16_unaligned_check(val,addr) \ + __put16_unaligned_check("strb",val,addr) + +#define put16t_unaligned_check(val,addr) \ + __put16_unaligned_check("strbt",val,addr) + +#define __put32_unaligned_check(ins,val,addr) \ + do { \ + unsigned int err = 0, v = val, a = addr; \ + __asm__( FIRST_BYTE_32 \ + ARM( "1: "ins" %1, [%2], #1\n" ) \ + THUMB( "1: "ins" %1, [%2]\n" ) \ + THUMB( " add %2, %2, #1\n" ) \ + " mov %1, %1, "NEXT_BYTE"\n" \ + ARM( "2: "ins" %1, [%2], #1\n" ) \ + THUMB( "2: "ins" %1, [%2]\n" ) \ + THUMB( " add %2, %2, #1\n" ) \ + " mov %1, %1, "NEXT_BYTE"\n" \ + ARM( "3: "ins" %1, [%2], #1\n" ) \ + THUMB( "3: "ins" %1, [%2]\n" ) \ + THUMB( " add %2, %2, #1\n" ) \ + " mov %1, %1, "NEXT_BYTE"\n" \ + "4: "ins" %1, [%2]\n" \ + "5:\n" \ + " .pushsection .fixup,\"ax\"\n" \ + " .align 2\n" \ + "6: mov %0, #1\n" \ + " b 5b\n" \ + " .popsection\n" \ + " .pushsection __ex_table,\"a\"\n" \ + " .align 3\n" \ + " .long 1b, 6b\n" \ + " .long 2b, 6b\n" \ + " .long 3b, 6b\n" \ + " .long 4b, 6b\n" \ + " .popsection\n" \ + : "=r" (err), "=&r" (v), "=&r" (a) \ + : "0" (err), "1" (v), "2" (a)); \ + if (err) \ + goto fault; \ + } while (0) + +#define put32_unaligned_check(val,addr) \ + __put32_unaligned_check("strb", val, addr) + +#define put32t_unaligned_check(val,addr) \ + __put32_unaligned_check("strbt", val, addr) + +static void +do_alignment_finish_ldst(unsigned long addr, unsigned long instr, struct pt_regs *regs, union offset_union offset) +{ + if (!LDST_U_BIT(instr)) + offset.un = -offset.un; + + if (!LDST_P_BIT(instr)) + addr += offset.un; + + if (!LDST_P_BIT(instr) || LDST_W_BIT(instr)) + regs->uregs[RN_BITS(instr)] = addr; +} + +static int +do_alignment_ldrhstrh(unsigned long addr, unsigned long instr, struct pt_regs *regs) +{ + unsigned int rd = RD_BITS(instr); + + ai_half += 1; + + if (user_mode(regs)) + goto user; + + if (LDST_L_BIT(instr)) { + unsigned long val; + get16_unaligned_check(val, addr); + + /* signed half-word? */ + if (instr & 0x40) + val = (signed long)((signed short) val); + + regs->uregs[rd] = val; + } else + put16_unaligned_check(regs->uregs[rd], addr); + + return TYPE_LDST; + + user: + if (LDST_L_BIT(instr)) { + unsigned long val; + get16t_unaligned_check(val, addr); + + /* signed half-word? */ + if (instr & 0x40) + val = (signed long)((signed short) val); + + regs->uregs[rd] = val; + } else + put16t_unaligned_check(regs->uregs[rd], addr); + + return TYPE_LDST; + + fault: + return TYPE_FAULT; +} + +static int +do_alignment_ldrdstrd(unsigned long addr, unsigned long instr, + struct pt_regs *regs) +{ + unsigned int rd = RD_BITS(instr); + unsigned int rd2; + int load; + + if ((instr & 0xfe000000) == 0xe8000000) { + /* ARMv7 Thumb-2 32-bit LDRD/STRD */ + rd2 = (instr >> 8) & 0xf; + load = !!(LDST_L_BIT(instr)); + } else if (((rd & 1) == 1) || (rd == 14)) + goto bad; + else { + load = ((instr & 0xf0) == 0xd0); + rd2 = rd + 1; + } + + ai_dword += 1; + + if (user_mode(regs)) + goto user; + + if (load) { + unsigned long val; + get32_unaligned_check(val, addr); + regs->uregs[rd] = val; + get32_unaligned_check(val, addr + 4); + regs->uregs[rd2] = val; + } else { + put32_unaligned_check(regs->uregs[rd], addr); + put32_unaligned_check(regs->uregs[rd2], addr + 4); + } + + return TYPE_LDST; + + user: + if (load) { + unsigned long val; + get32t_unaligned_check(val, addr); + regs->uregs[rd] = val; + get32t_unaligned_check(val, addr + 4); + regs->uregs[rd2] = val; + } else { + put32t_unaligned_check(regs->uregs[rd], addr); + put32t_unaligned_check(regs->uregs[rd2], addr + 4); + } + + return TYPE_LDST; + bad: + return TYPE_ERROR; + fault: + return TYPE_FAULT; +} + +static int +do_alignment_ldrstr(unsigned long addr, unsigned long instr, struct pt_regs *regs) +{ + unsigned int rd = RD_BITS(instr); + + ai_word += 1; + + if ((!LDST_P_BIT(instr) && LDST_W_BIT(instr)) || user_mode(regs)) + goto trans; + + if (LDST_L_BIT(instr)) { + unsigned int val; + get32_unaligned_check(val, addr); + regs->uregs[rd] = val; + } else + put32_unaligned_check(regs->uregs[rd], addr); + return TYPE_LDST; + + trans: + if (LDST_L_BIT(instr)) { + unsigned int val; + get32t_unaligned_check(val, addr); + regs->uregs[rd] = val; + } else + put32t_unaligned_check(regs->uregs[rd], addr); + return TYPE_LDST; + + fault: + return TYPE_FAULT; +} + +/* + * LDM/STM alignment handler. + * + * There are 4 variants of this instruction: + * + * B = rn pointer before instruction, A = rn pointer after instruction + * ------ increasing address -----> + * | | r0 | r1 | ... | rx | | + * PU = 01 B A + * PU = 11 B A + * PU = 00 A B + * PU = 10 A B + */ +static int +do_alignment_ldmstm(unsigned long addr, unsigned long instr, struct pt_regs *regs) +{ + unsigned int rd, rn, correction, nr_regs, regbits; + unsigned long eaddr, newaddr; + + if (LDM_S_BIT(instr)) + goto bad; + + correction = 4; /* processor implementation defined */ + regs->ARM_pc += correction; + + ai_multi += 1; + + /* count the number of registers in the mask to be transferred */ + nr_regs = hweight16(REGMASK_BITS(instr)) * 4; + + rn = RN_BITS(instr); + newaddr = eaddr = regs->uregs[rn]; + + if (!LDST_U_BIT(instr)) + nr_regs = -nr_regs; + newaddr += nr_regs; + if (!LDST_U_BIT(instr)) + eaddr = newaddr; + + if (LDST_P_EQ_U(instr)) /* U = P */ + eaddr += 4; + + /* + * For alignment faults on the ARM922T/ARM920T the MMU makes + * the FSR (and hence addr) equal to the updated base address + * of the multiple access rather than the restored value. + * Switch this message off if we've got a ARM92[02], otherwise + * [ls]dm alignment faults are noisy! + */ +#if !(defined CONFIG_CPU_ARM922T) && !(defined CONFIG_CPU_ARM920T) + /* + * This is a "hint" - we already have eaddr worked out by the + * processor for us. + */ + if (addr != eaddr) { + printk(KERN_ERR "LDMSTM: PC = %08lx, instr = %08lx, " + "addr = %08lx, eaddr = %08lx\n", + instruction_pointer(regs), instr, addr, eaddr); + show_regs(regs); + } +#endif + + if (user_mode(regs)) { + for (regbits = REGMASK_BITS(instr), rd = 0; regbits; + regbits >>= 1, rd += 1) + if (regbits & 1) { + if (LDST_L_BIT(instr)) { + unsigned int val; + get32t_unaligned_check(val, eaddr); + regs->uregs[rd] = val; + } else + put32t_unaligned_check(regs->uregs[rd], eaddr); + eaddr += 4; + } + } else { + for (regbits = REGMASK_BITS(instr), rd = 0; regbits; + regbits >>= 1, rd += 1) + if (regbits & 1) { + if (LDST_L_BIT(instr)) { + unsigned int val; + get32_unaligned_check(val, eaddr); + regs->uregs[rd] = val; + } else + put32_unaligned_check(regs->uregs[rd], eaddr); + eaddr += 4; + } + } + + if (LDST_W_BIT(instr)) + regs->uregs[rn] = newaddr; + if (!LDST_L_BIT(instr) || !(REGMASK_BITS(instr) & (1 << 15))) + regs->ARM_pc -= correction; + return TYPE_DONE; + +fault: + regs->ARM_pc -= correction; + return TYPE_FAULT; + +bad: + printk(KERN_ERR "Alignment trap: not handling ldm with s-bit set\n"); + return TYPE_ERROR; +} + +/* + * Convert Thumb ld/st instruction forms to equivalent ARM instructions so + * we can reuse ARM userland alignment fault fixups for Thumb. + * + * This implementation was initially based on the algorithm found in + * gdb/sim/arm/thumbemu.c. It is basically just a code reduction of same + * to convert only Thumb ld/st instruction forms to equivalent ARM forms. + * + * NOTES: + * 1. Comments below refer to ARM ARM DDI0100E Thumb Instruction sections. + * 2. If for some reason we're passed an non-ld/st Thumb instruction to + * decode, we return 0xdeadc0de. This should never happen under normal + * circumstances but if it does, we've got other problems to deal with + * elsewhere and we obviously can't fix those problems here. + */ + +static unsigned long +thumb2arm(u16 tinstr) +{ + u32 L = (tinstr & (1<<11)) >> 11; + + switch ((tinstr & 0xf800) >> 11) { + /* 6.5.1 Format 1: */ + case 0x6000 >> 11: /* 7.1.52 STR(1) */ + case 0x6800 >> 11: /* 7.1.26 LDR(1) */ + case 0x7000 >> 11: /* 7.1.55 STRB(1) */ + case 0x7800 >> 11: /* 7.1.30 LDRB(1) */ + return 0xe5800000 | + ((tinstr & (1<<12)) << (22-12)) | /* fixup */ + (L<<20) | /* L==1? */ + ((tinstr & (7<<0)) << (12-0)) | /* Rd */ + ((tinstr & (7<<3)) << (16-3)) | /* Rn */ + ((tinstr & (31<<6)) >> /* immed_5 */ + (6 - ((tinstr & (1<<12)) ? 0 : 2))); + case 0x8000 >> 11: /* 7.1.57 STRH(1) */ + case 0x8800 >> 11: /* 7.1.32 LDRH(1) */ + return 0xe1c000b0 | + (L<<20) | /* L==1? */ + ((tinstr & (7<<0)) << (12-0)) | /* Rd */ + ((tinstr & (7<<3)) << (16-3)) | /* Rn */ + ((tinstr & (7<<6)) >> (6-1)) | /* immed_5[2:0] */ + ((tinstr & (3<<9)) >> (9-8)); /* immed_5[4:3] */ + + /* 6.5.1 Format 2: */ + case 0x5000 >> 11: + case 0x5800 >> 11: + { + static const u32 subset[8] = { + 0xe7800000, /* 7.1.53 STR(2) */ + 0xe18000b0, /* 7.1.58 STRH(2) */ + 0xe7c00000, /* 7.1.56 STRB(2) */ + 0xe19000d0, /* 7.1.34 LDRSB */ + 0xe7900000, /* 7.1.27 LDR(2) */ + 0xe19000b0, /* 7.1.33 LDRH(2) */ + 0xe7d00000, /* 7.1.31 LDRB(2) */ + 0xe19000f0 /* 7.1.35 LDRSH */ + }; + return subset[(tinstr & (7<<9)) >> 9] | + ((tinstr & (7<<0)) << (12-0)) | /* Rd */ + ((tinstr & (7<<3)) << (16-3)) | /* Rn */ + ((tinstr & (7<<6)) >> (6-0)); /* Rm */ + } + + /* 6.5.1 Format 3: */ + case 0x4800 >> 11: /* 7.1.28 LDR(3) */ + /* NOTE: This case is not technically possible. We're + * loading 32-bit memory data via PC relative + * addressing mode. So we can and should eliminate + * this case. But I'll leave it here for now. + */ + return 0xe59f0000 | + ((tinstr & (7<<8)) << (12-8)) | /* Rd */ + ((tinstr & 255) << (2-0)); /* immed_8 */ + + /* 6.5.1 Format 4: */ + case 0x9000 >> 11: /* 7.1.54 STR(3) */ + case 0x9800 >> 11: /* 7.1.29 LDR(4) */ + return 0xe58d0000 | + (L<<20) | /* L==1? */ + ((tinstr & (7<<8)) << (12-8)) | /* Rd */ + ((tinstr & 255) << 2); /* immed_8 */ + + /* 6.6.1 Format 1: */ + case 0xc000 >> 11: /* 7.1.51 STMIA */ + case 0xc800 >> 11: /* 7.1.25 LDMIA */ + { + u32 Rn = (tinstr & (7<<8)) >> 8; + u32 W = ((L<<Rn) & (tinstr&255)) ? 0 : 1<<21; + + return 0xe8800000 | W | (L<<20) | (Rn<<16) | + (tinstr&255); + } + + /* 6.6.1 Format 2: */ + case 0xb000 >> 11: /* 7.1.48 PUSH */ + case 0xb800 >> 11: /* 7.1.47 POP */ + if ((tinstr & (3 << 9)) == 0x0400) { + static const u32 subset[4] = { + 0xe92d0000, /* STMDB sp!,{registers} */ + 0xe92d4000, /* STMDB sp!,{registers,lr} */ + 0xe8bd0000, /* LDMIA sp!,{registers} */ + 0xe8bd8000 /* LDMIA sp!,{registers,pc} */ + }; + return subset[(L<<1) | ((tinstr & (1<<8)) >> 8)] | + (tinstr & 255); /* register_list */ + } + /* Else fall through for illegal instruction case */ + + default: + return BAD_INSTR; + } +} + +/* + * Convert Thumb-2 32 bit LDM, STM, LDRD, STRD to equivalent instruction + * handlable by ARM alignment handler, also find the corresponding handler, + * so that we can reuse ARM userland alignment fault fixups for Thumb. + * + * @pinstr: original Thumb-2 instruction; returns new handlable instruction + * @regs: register context. + * @poffset: return offset from faulted addr for later writeback + * + * NOTES: + * 1. Comments below refer to ARMv7 DDI0406A Thumb Instruction sections. + * 2. Register name Rt from ARMv7 is same as Rd from ARMv6 (Rd is Rt) + */ +static void * +do_alignment_t32_to_handler(unsigned long *pinstr, struct pt_regs *regs, + union offset_union *poffset) +{ + unsigned long instr = *pinstr; + u16 tinst1 = (instr >> 16) & 0xffff; + u16 tinst2 = instr & 0xffff; + poffset->un = 0; + + switch (tinst1 & 0xffe0) { + /* A6.3.5 Load/Store multiple */ + case 0xe880: /* STM/STMIA/STMEA,LDM/LDMIA, PUSH/POP T2 */ + case 0xe8a0: /* ...above writeback version */ + case 0xe900: /* STMDB/STMFD, LDMDB/LDMEA */ + case 0xe920: /* ...above writeback version */ + /* no need offset decision since handler calculates it */ + return do_alignment_ldmstm; + + case 0xf840: /* POP/PUSH T3 (single register) */ + if (RN_BITS(instr) == 13 && (tinst2 & 0x09ff) == 0x0904) { + u32 L = !!(LDST_L_BIT(instr)); + const u32 subset[2] = { + 0xe92d0000, /* STMDB sp!,{registers} */ + 0xe8bd0000, /* LDMIA sp!,{registers} */ + }; + *pinstr = subset[L] | (1<<RD_BITS(instr)); + return do_alignment_ldmstm; + } + /* Else fall through for illegal instruction case */ + break; + + /* A6.3.6 Load/store double, STRD/LDRD(immed, lit, reg) */ + case 0xe860: + case 0xe960: + case 0xe8e0: + case 0xe9e0: + poffset->un = (tinst2 & 0xff) << 2; + case 0xe940: + case 0xe9c0: + return do_alignment_ldrdstrd; + + /* + * No need to handle load/store instructions up to word size + * since ARMv6 and later CPUs can perform unaligned accesses. + */ + default: + break; + } + return NULL; +} + +static int +do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs) +{ + union offset_union offset; + unsigned long instr = 0, instrptr; + int (*handler)(unsigned long addr, unsigned long instr, struct pt_regs *regs); + unsigned int type; + mm_segment_t fs; + unsigned int fault; + u16 tinstr = 0; + int isize = 4; + int thumb2_32b = 0; + + if (interrupts_enabled(regs)) + local_irq_enable(); + + instrptr = instruction_pointer(regs); + + fs = get_fs(); + set_fs(KERNEL_DS); + if (thumb_mode(regs)) { + fault = __get_user(tinstr, (u16 *)(instrptr & ~1)); + if (!fault) { + if (cpu_architecture() >= CPU_ARCH_ARMv7 && + IS_T32(tinstr)) { + /* Thumb-2 32-bit */ + u16 tinst2 = 0; + fault = __get_user(tinst2, (u16 *)(instrptr+2)); + instr = (tinstr << 16) | tinst2; + thumb2_32b = 1; + } else { + isize = 2; + instr = thumb2arm(tinstr); + } + } + } else + fault = __get_user(instr, (u32 *)instrptr); + set_fs(fs); + + if (fault) { + type = TYPE_FAULT; + goto bad_or_fault; + } + + if (user_mode(regs)) + goto user; + + ai_sys += 1; + + fixup: + + regs->ARM_pc += isize; + + switch (CODING_BITS(instr)) { + case 0x00000000: /* 3.13.4 load/store instruction extensions */ + if (LDSTHD_I_BIT(instr)) + offset.un = (instr & 0xf00) >> 4 | (instr & 15); + else + offset.un = regs->uregs[RM_BITS(instr)]; + + if ((instr & 0x000000f0) == 0x000000b0 || /* LDRH, STRH */ + (instr & 0x001000f0) == 0x001000f0) /* LDRSH */ + handler = do_alignment_ldrhstrh; + else if ((instr & 0x001000f0) == 0x000000d0 || /* LDRD */ + (instr & 0x001000f0) == 0x000000f0) /* STRD */ + handler = do_alignment_ldrdstrd; + else if ((instr & 0x01f00ff0) == 0x01000090) /* SWP */ + goto swp; + else + goto bad; + break; + + case 0x04000000: /* ldr or str immediate */ + offset.un = OFFSET_BITS(instr); + handler = do_alignment_ldrstr; + break; + + case 0x06000000: /* ldr or str register */ + offset.un = regs->uregs[RM_BITS(instr)]; + + if (IS_SHIFT(instr)) { + unsigned int shiftval = SHIFT_BITS(instr); + + switch(SHIFT_TYPE(instr)) { + case SHIFT_LSL: + offset.un <<= shiftval; + break; + + case SHIFT_LSR: + offset.un >>= shiftval; + break; + + case SHIFT_ASR: + offset.sn >>= shiftval; + break; + + case SHIFT_RORRRX: + if (shiftval == 0) { + offset.un >>= 1; + if (regs->ARM_cpsr & PSR_C_BIT) + offset.un |= 1 << 31; + } else + offset.un = offset.un >> shiftval | + offset.un << (32 - shiftval); + break; + } + } + handler = do_alignment_ldrstr; + break; + + case 0x08000000: /* ldm or stm, or thumb-2 32bit instruction */ + if (thumb2_32b) + handler = do_alignment_t32_to_handler(&instr, regs, &offset); + else + handler = do_alignment_ldmstm; + break; + + default: + goto bad; + } + + if (!handler) + goto bad; + type = handler(addr, instr, regs); + + if (type == TYPE_ERROR || type == TYPE_FAULT) { + regs->ARM_pc -= isize; + goto bad_or_fault; + } + + if (type == TYPE_LDST) + do_alignment_finish_ldst(addr, instr, regs, offset); + + return 0; + + bad_or_fault: + if (type == TYPE_ERROR) + goto bad; + /* + * We got a fault - fix it up, or die. + */ + do_bad_area(addr, fsr, regs); + return 0; + + swp: + printk(KERN_ERR "Alignment trap: not handling swp instruction\n"); + + bad: + /* + * Oops, we didn't handle the instruction. + */ + printk(KERN_ERR "Alignment trap: not handling instruction " + "%0*lx at [<%08lx>]\n", + isize << 1, + isize == 2 ? tinstr : instr, instrptr); + ai_skipped += 1; + return 1; + + user: + ai_user += 1; + + if (ai_usermode & UM_WARN) + printk("Alignment trap: %s (%d) PC=0x%08lx Instr=0x%0*lx " + "Address=0x%08lx FSR 0x%03x\n", current->comm, + task_pid_nr(current), instrptr, + isize << 1, + isize == 2 ? tinstr : instr, + addr, fsr); + + if (ai_usermode & UM_FIXUP) + goto fixup; + + if (ai_usermode & UM_SIGNAL) { + siginfo_t si; + + si.si_signo = SIGBUS; + si.si_errno = 0; + si.si_code = BUS_ADRALN; + si.si_addr = (void __user *)addr; + + force_sig_info(si.si_signo, &si, current); + } else { + /* + * We're about to disable the alignment trap and return to + * user space. But if an interrupt occurs before actually + * reaching user space, then the IRQ vector entry code will + * notice that we were still in kernel space and therefore + * the alignment trap won't be re-enabled in that case as it + * is presumed to be always on from kernel space. + * Let's prevent that race by disabling interrupts here (they + * are disabled on the way back to user space anyway in + * entry-common.S) and disable the alignment trap only if + * there is no work pending for this thread. + */ + raw_local_irq_disable(); + if (!(current_thread_info()->flags & _TIF_WORK_MASK)) + set_cr(cr_no_alignment); + } + + return 0; +} + +/* + * This needs to be done after sysctl_init, otherwise sys/ will be + * overwritten. Actually, this shouldn't be in sys/ at all since + * it isn't a sysctl, and it doesn't contain sysctl information. + * We now locate it in /proc/cpu/alignment instead. + */ +static int __init alignment_init(void) +{ +#ifdef CONFIG_PROC_FS + struct proc_dir_entry *res; + + res = proc_create("cpu/alignment", S_IWUSR | S_IRUGO, NULL, + &alignment_proc_fops); + if (!res) + return -ENOMEM; +#endif + + if (cpu_is_v6_unaligned()) { + cr_alignment &= ~CR_A; + cr_no_alignment &= ~CR_A; + set_cr(cr_alignment); + ai_usermode = safe_usermode(ai_usermode, false); + } + + hook_fault_code(FAULT_CODE_ALIGNMENT, do_alignment, SIGBUS, BUS_ADRALN, + "alignment exception"); + + /* + * ARMv6K and ARMv7 use fault status 3 (0b00011) as Access Flag section + * fault, not as alignment error. + * + * TODO: handle ARMv6K properly. Runtime check for 'K' extension is + * needed. + */ + if (cpu_architecture() <= CPU_ARCH_ARMv6) { + hook_fault_code(3, do_alignment, SIGBUS, BUS_ADRALN, + "alignment exception"); + } + + return 0; +} + +fs_initcall(alignment_init); diff --git a/arch/arm/mm/cache-fa.S b/arch/arm/mm/cache-fa.S new file mode 100644 index 00000000..07201637 --- /dev/null +++ b/arch/arm/mm/cache-fa.S @@ -0,0 +1,246 @@ +/* + * linux/arch/arm/mm/cache-fa.S + * + * Copyright (C) 2005 Faraday Corp. + * Copyright (C) 2008-2009 Paulius Zaleckas <paulius.zaleckas@teltonika.lt> + * + * Based on cache-v4wb.S: + * Copyright (C) 1997-2002 Russell king + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Processors: FA520 FA526 FA626 + */ +#include <linux/linkage.h> +#include <linux/init.h> +#include <asm/memory.h> +#include <asm/page.h> + +#include "proc-macros.S" + +/* + * The size of one data cache line. + */ +#define CACHE_DLINESIZE 16 + +/* + * The total size of the data cache. + */ +#ifdef CONFIG_ARCH_GEMINI +#define CACHE_DSIZE 8192 +#else +#define CACHE_DSIZE 16384 +#endif + +/* FIXME: put optimal value here. Current one is just estimation */ +#define CACHE_DLIMIT (CACHE_DSIZE * 2) + +/* + * flush_icache_all() + * + * Unconditionally clean and invalidate the entire icache. + */ +ENTRY(fa_flush_icache_all) + mov r0, #0 + mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache + mov pc, lr +ENDPROC(fa_flush_icache_all) + +/* + * flush_user_cache_all() + * + * Clean and invalidate all cache entries in a particular address + * space. + */ +ENTRY(fa_flush_user_cache_all) + /* FALLTHROUGH */ +/* + * flush_kern_cache_all() + * + * Clean and invalidate the entire cache. + */ +ENTRY(fa_flush_kern_cache_all) + mov ip, #0 + mov r2, #VM_EXEC +__flush_whole_cache: + mcr p15, 0, ip, c7, c14, 0 @ clean/invalidate D cache + tst r2, #VM_EXEC + mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache + mcrne p15, 0, ip, c7, c5, 6 @ invalidate BTB + mcrne p15, 0, ip, c7, c10, 4 @ drain write buffer + mcrne p15, 0, ip, c7, c5, 4 @ prefetch flush + mov pc, lr + +/* + * flush_user_cache_range(start, end, flags) + * + * Invalidate a range of cache entries in the specified + * address space. + * + * - start - start address (inclusive, page aligned) + * - end - end address (exclusive, page aligned) + * - flags - vma_area_struct flags describing address space + */ +ENTRY(fa_flush_user_cache_range) + mov ip, #0 + sub r3, r1, r0 @ calculate total size + cmp r3, #CACHE_DLIMIT @ total size >= limit? + bhs __flush_whole_cache @ flush whole D cache + +1: tst r2, #VM_EXEC + mcrne p15, 0, r0, c7, c5, 1 @ invalidate I line + mcr p15, 0, r0, c7, c14, 1 @ clean and invalidate D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + tst r2, #VM_EXEC + mcrne p15, 0, ip, c7, c5, 6 @ invalidate BTB + mcrne p15, 0, ip, c7, c10, 4 @ data write barrier + mcrne p15, 0, ip, c7, c5, 4 @ prefetch flush + mov pc, lr + +/* + * coherent_kern_range(start, end) + * + * Ensure coherency between the Icache and the Dcache in the + * region described by start. If you have non-snooping + * Harvard caches, you need to implement this function. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(fa_coherent_kern_range) + /* fall through */ + +/* + * coherent_user_range(start, end) + * + * Ensure coherency between the Icache and the Dcache in the + * region described by start. If you have non-snooping + * Harvard caches, you need to implement this function. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(fa_coherent_user_range) + bic r0, r0, #CACHE_DLINESIZE - 1 +1: mcr p15, 0, r0, c7, c14, 1 @ clean and invalidate D entry + mcr p15, 0, r0, c7, c5, 1 @ invalidate I entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mov r0, #0 + mcr p15, 0, r0, c7, c5, 6 @ invalidate BTB + mcr p15, 0, r0, c7, c10, 4 @ drain write buffer + mcr p15, 0, r0, c7, c5, 4 @ prefetch flush + mov pc, lr + +/* + * flush_kern_dcache_area(void *addr, size_t size) + * + * Ensure that the data held in the page kaddr is written back + * to the page in question. + * + * - addr - kernel address + * - size - size of region + */ +ENTRY(fa_flush_kern_dcache_area) + add r1, r0, r1 +1: mcr p15, 0, r0, c7, c14, 1 @ clean & invalidate D line + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mov r0, #0 + mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache + mcr p15, 0, r0, c7, c10, 4 @ drain write buffer + mov pc, lr + +/* + * dma_inv_range(start, end) + * + * Invalidate (discard) the specified virtual address range. + * May not write back any entries. If 'start' or 'end' + * are not cache line aligned, those lines must be written + * back. + * + * - start - virtual start address + * - end - virtual end address + */ +fa_dma_inv_range: + tst r0, #CACHE_DLINESIZE - 1 + bic r0, r0, #CACHE_DLINESIZE - 1 + mcrne p15, 0, r0, c7, c14, 1 @ clean & invalidate D entry + tst r1, #CACHE_DLINESIZE - 1 + bic r1, r1, #CACHE_DLINESIZE - 1 + mcrne p15, 0, r1, c7, c14, 1 @ clean & invalidate D entry +1: mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mov r0, #0 + mcr p15, 0, r0, c7, c10, 4 @ drain write buffer + mov pc, lr + +/* + * dma_clean_range(start, end) + * + * Clean (write back) the specified virtual address range. + * + * - start - virtual start address + * - end - virtual end address + */ +fa_dma_clean_range: + bic r0, r0, #CACHE_DLINESIZE - 1 +1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mov r0, #0 + mcr p15, 0, r0, c7, c10, 4 @ drain write buffer + mov pc, lr + +/* + * dma_flush_range(start,end) + * - start - virtual start address of region + * - end - virtual end address of region + */ +ENTRY(fa_dma_flush_range) + bic r0, r0, #CACHE_DLINESIZE - 1 +1: mcr p15, 0, r0, c7, c14, 1 @ clean & invalidate D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mov r0, #0 + mcr p15, 0, r0, c7, c10, 4 @ drain write buffer + mov pc, lr + +/* + * dma_map_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(fa_dma_map_area) + add r1, r1, r0 + cmp r2, #DMA_TO_DEVICE + beq fa_dma_clean_range + bcs fa_dma_inv_range + b fa_dma_flush_range +ENDPROC(fa_dma_map_area) + +/* + * dma_unmap_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(fa_dma_unmap_area) + mov pc, lr +ENDPROC(fa_dma_unmap_area) + + __INITDATA + + @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) + define_cache_functions fa diff --git a/arch/arm/mm/cache-feroceon-l2.c b/arch/arm/mm/cache-feroceon-l2.c new file mode 100644 index 00000000..dd3d5912 --- /dev/null +++ b/arch/arm/mm/cache-feroceon-l2.c @@ -0,0 +1,351 @@ +/* + * arch/arm/mm/cache-feroceon-l2.c - Feroceon L2 cache controller support + * + * Copyright (C) 2008 Marvell Semiconductor + * + * This file is licensed under the terms of the GNU General Public + * License version 2. This program is licensed "as is" without any + * warranty of any kind, whether express or implied. + * + * References: + * - Unified Layer 2 Cache for Feroceon CPU Cores, + * Document ID MV-S104858-00, Rev. A, October 23 2007. + */ + +#include <linux/init.h> +#include <linux/highmem.h> +#include <asm/cacheflush.h> +#include <asm/cp15.h> +#include <plat/cache-feroceon-l2.h> + +/* + * Low-level cache maintenance operations. + * + * As well as the regular 'clean/invalidate/flush L2 cache line by + * MVA' instructions, the Feroceon L2 cache controller also features + * 'clean/invalidate L2 range by MVA' operations. + * + * Cache range operations are initiated by writing the start and + * end addresses to successive cp15 registers, and process every + * cache line whose first byte address lies in the inclusive range + * [start:end]. + * + * The cache range operations stall the CPU pipeline until completion. + * + * The range operations require two successive cp15 writes, in + * between which we don't want to be preempted. + */ + +static inline unsigned long l2_get_va(unsigned long paddr) +{ +#ifdef CONFIG_HIGHMEM + /* + * Because range ops can't be done on physical addresses, + * we simply install a virtual mapping for it only for the + * TLB lookup to occur, hence no need to flush the untouched + * memory mapping afterwards (note: a cache flush may happen + * in some circumstances depending on the path taken in kunmap_atomic). + */ + void *vaddr = kmap_atomic_pfn(paddr >> PAGE_SHIFT); + return (unsigned long)vaddr + (paddr & ~PAGE_MASK); +#else + return __phys_to_virt(paddr); +#endif +} + +static inline void l2_put_va(unsigned long vaddr) +{ +#ifdef CONFIG_HIGHMEM + kunmap_atomic((void *)vaddr); +#endif +} + +static inline void l2_clean_pa(unsigned long addr) +{ + __asm__("mcr p15, 1, %0, c15, c9, 3" : : "r" (addr)); +} + +static inline void l2_clean_pa_range(unsigned long start, unsigned long end) +{ + unsigned long va_start, va_end, flags; + + /* + * Make sure 'start' and 'end' reference the same page, as + * L2 is PIPT and range operations only do a TLB lookup on + * the start address. + */ + BUG_ON((start ^ end) >> PAGE_SHIFT); + + va_start = l2_get_va(start); + va_end = va_start + (end - start); + raw_local_irq_save(flags); + __asm__("mcr p15, 1, %0, c15, c9, 4\n\t" + "mcr p15, 1, %1, c15, c9, 5" + : : "r" (va_start), "r" (va_end)); + raw_local_irq_restore(flags); + l2_put_va(va_start); +} + +static inline void l2_clean_inv_pa(unsigned long addr) +{ + __asm__("mcr p15, 1, %0, c15, c10, 3" : : "r" (addr)); +} + +static inline void l2_inv_pa(unsigned long addr) +{ + __asm__("mcr p15, 1, %0, c15, c11, 3" : : "r" (addr)); +} + +static inline void l2_inv_pa_range(unsigned long start, unsigned long end) +{ + unsigned long va_start, va_end, flags; + + /* + * Make sure 'start' and 'end' reference the same page, as + * L2 is PIPT and range operations only do a TLB lookup on + * the start address. + */ + BUG_ON((start ^ end) >> PAGE_SHIFT); + + va_start = l2_get_va(start); + va_end = va_start + (end - start); + raw_local_irq_save(flags); + __asm__("mcr p15, 1, %0, c15, c11, 4\n\t" + "mcr p15, 1, %1, c15, c11, 5" + : : "r" (va_start), "r" (va_end)); + raw_local_irq_restore(flags); + l2_put_va(va_start); +} + +static inline void l2_inv_all(void) +{ + __asm__("mcr p15, 1, %0, c15, c11, 0" : : "r" (0)); +} + +/* + * Linux primitives. + * + * Note that the end addresses passed to Linux primitives are + * noninclusive, while the hardware cache range operations use + * inclusive start and end addresses. + */ +#define CACHE_LINE_SIZE 32 +#define MAX_RANGE_SIZE 1024 + +static int l2_wt_override; + +static unsigned long calc_range_end(unsigned long start, unsigned long end) +{ + unsigned long range_end; + + BUG_ON(start & (CACHE_LINE_SIZE - 1)); + BUG_ON(end & (CACHE_LINE_SIZE - 1)); + + /* + * Try to process all cache lines between 'start' and 'end'. + */ + range_end = end; + + /* + * Limit the number of cache lines processed at once, + * since cache range operations stall the CPU pipeline + * until completion. + */ + if (range_end > start + MAX_RANGE_SIZE) + range_end = start + MAX_RANGE_SIZE; + + /* + * Cache range operations can't straddle a page boundary. + */ + if (range_end > (start | (PAGE_SIZE - 1)) + 1) + range_end = (start | (PAGE_SIZE - 1)) + 1; + + return range_end; +} + +static void feroceon_l2_inv_range(unsigned long start, unsigned long end) +{ + /* + * Clean and invalidate partial first cache line. + */ + if (start & (CACHE_LINE_SIZE - 1)) { + l2_clean_inv_pa(start & ~(CACHE_LINE_SIZE - 1)); + start = (start | (CACHE_LINE_SIZE - 1)) + 1; + } + + /* + * Clean and invalidate partial last cache line. + */ + if (start < end && end & (CACHE_LINE_SIZE - 1)) { + l2_clean_inv_pa(end & ~(CACHE_LINE_SIZE - 1)); + end &= ~(CACHE_LINE_SIZE - 1); + } + + /* + * Invalidate all full cache lines between 'start' and 'end'. + */ + while (start < end) { + unsigned long range_end = calc_range_end(start, end); + l2_inv_pa_range(start, range_end - CACHE_LINE_SIZE); + start = range_end; + } + + dsb(); +} + +static void feroceon_l2_clean_range(unsigned long start, unsigned long end) +{ + /* + * If L2 is forced to WT, the L2 will always be clean and we + * don't need to do anything here. + */ + if (!l2_wt_override) { + start &= ~(CACHE_LINE_SIZE - 1); + end = (end + CACHE_LINE_SIZE - 1) & ~(CACHE_LINE_SIZE - 1); + while (start != end) { + unsigned long range_end = calc_range_end(start, end); + l2_clean_pa_range(start, range_end - CACHE_LINE_SIZE); + start = range_end; + } + } + + dsb(); +} + +static void feroceon_l2_flush_range(unsigned long start, unsigned long end) +{ + start &= ~(CACHE_LINE_SIZE - 1); + end = (end + CACHE_LINE_SIZE - 1) & ~(CACHE_LINE_SIZE - 1); + while (start != end) { + unsigned long range_end = calc_range_end(start, end); + if (!l2_wt_override) + l2_clean_pa_range(start, range_end - CACHE_LINE_SIZE); + l2_inv_pa_range(start, range_end - CACHE_LINE_SIZE); + start = range_end; + } + + dsb(); +} + + +/* + * Routines to disable and re-enable the D-cache and I-cache at run + * time. These are necessary because the L2 cache can only be enabled + * or disabled while the L1 Dcache and Icache are both disabled. + */ +static int __init flush_and_disable_dcache(void) +{ + u32 cr; + + cr = get_cr(); + if (cr & CR_C) { + unsigned long flags; + + raw_local_irq_save(flags); + flush_cache_all(); + set_cr(cr & ~CR_C); + raw_local_irq_restore(flags); + return 1; + } + return 0; +} + +static void __init enable_dcache(void) +{ + u32 cr; + + cr = get_cr(); + set_cr(cr | CR_C); +} + +static void __init __invalidate_icache(void) +{ + __asm__("mcr p15, 0, %0, c7, c5, 0" : : "r" (0)); +} + +static int __init invalidate_and_disable_icache(void) +{ + u32 cr; + + cr = get_cr(); + if (cr & CR_I) { + set_cr(cr & ~CR_I); + __invalidate_icache(); + return 1; + } + return 0; +} + +static void __init enable_icache(void) +{ + u32 cr; + + cr = get_cr(); + set_cr(cr | CR_I); +} + +static inline u32 read_extra_features(void) +{ + u32 u; + + __asm__("mrc p15, 1, %0, c15, c1, 0" : "=r" (u)); + + return u; +} + +static inline void write_extra_features(u32 u) +{ + __asm__("mcr p15, 1, %0, c15, c1, 0" : : "r" (u)); +} + +static void __init disable_l2_prefetch(void) +{ + u32 u; + + /* + * Read the CPU Extra Features register and verify that the + * Disable L2 Prefetch bit is set. + */ + u = read_extra_features(); + if (!(u & 0x01000000)) { + printk(KERN_INFO "Feroceon L2: Disabling L2 prefetch.\n"); + write_extra_features(u | 0x01000000); + } +} + +static void __init enable_l2(void) +{ + u32 u; + + u = read_extra_features(); + if (!(u & 0x00400000)) { + int i, d; + + printk(KERN_INFO "Feroceon L2: Enabling L2\n"); + + d = flush_and_disable_dcache(); + i = invalidate_and_disable_icache(); + l2_inv_all(); + write_extra_features(u | 0x00400000); + if (i) + enable_icache(); + if (d) + enable_dcache(); + } +} + +void __init feroceon_l2_init(int __l2_wt_override) +{ + l2_wt_override = __l2_wt_override; + + disable_l2_prefetch(); + + outer_cache.inv_range = feroceon_l2_inv_range; + outer_cache.clean_range = feroceon_l2_clean_range; + outer_cache.flush_range = feroceon_l2_flush_range; + + enable_l2(); + + printk(KERN_INFO "Feroceon L2: Cache support initialised%s.\n", + l2_wt_override ? ", in WT override mode" : ""); +} diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c new file mode 100644 index 00000000..eaa6847e --- /dev/null +++ b/arch/arm/mm/cache-l2x0.c @@ -0,0 +1,631 @@ +/* + * arch/arm/mm/cache-l2x0.c - L210/L220 cache controller support + * + * Copyright (C) 2007 ARM Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#include <linux/err.h> +#include <linux/init.h> +#include <linux/spinlock.h> +#include <linux/io.h> +#include <linux/of.h> +#include <linux/of_address.h> + +#include <asm/cacheflush.h> +#include <asm/hardware/cache-l2x0.h> + +#define CACHE_LINE_SIZE 32 + +static void __iomem *l2x0_base; +static DEFINE_RAW_SPINLOCK(l2x0_lock); +static u32 l2x0_way_mask; /* Bitmask of active ways */ +static u32 l2x0_size; +static u32 l2x0_cache_id; +static unsigned int l2x0_sets; +static unsigned int l2x0_ways; +static unsigned long sync_reg_offset = L2X0_CACHE_SYNC; + +static inline bool is_pl310_rev(int rev) +{ + return (l2x0_cache_id & + (L2X0_CACHE_ID_PART_MASK | L2X0_CACHE_ID_REV_MASK)) == + (L2X0_CACHE_ID_PART_L310 | rev); +} + +struct l2x0_regs l2x0_saved_regs; + +struct l2x0_of_data { + void (*setup)(const struct device_node *, u32 *, u32 *); + void (*save)(void); + void (*resume)(void); +}; + +static inline void cache_wait_way(void __iomem *reg, unsigned long mask) +{ + /* wait for cache operation by line or way to complete */ + while (readl_relaxed(reg) & mask) + cpu_relax(); +} + +#ifdef CONFIG_CACHE_PL310 +static inline void cache_wait(void __iomem *reg, unsigned long mask) +{ + /* cache operations by line are atomic on PL310 */ +} +#else +#define cache_wait cache_wait_way +#endif + +static inline void cache_sync(void) +{ + void __iomem *base = l2x0_base; + + writel_relaxed(0, base + sync_reg_offset); + cache_wait(base + L2X0_CACHE_SYNC, 1); +} + +static inline void l2x0_clean_line(unsigned long addr) +{ + void __iomem *base = l2x0_base; + cache_wait(base + L2X0_CLEAN_LINE_PA, 1); + writel_relaxed(addr, base + L2X0_CLEAN_LINE_PA); +} + +static inline void l2x0_inv_line(unsigned long addr) +{ + void __iomem *base = l2x0_base; + cache_wait(base + L2X0_INV_LINE_PA, 1); + writel_relaxed(addr, base + L2X0_INV_LINE_PA); +} + +#if defined(CONFIG_PL310_ERRATA_588369) || defined(CONFIG_PL310_ERRATA_727915) +static inline void debug_writel(unsigned long val) +{ + if (outer_cache.set_debug) + outer_cache.set_debug(val); +} + +static void pl310_set_debug(unsigned long val) +{ + writel_relaxed(val, l2x0_base + L2X0_DEBUG_CTRL); +} +#else +/* Optimised out for non-errata case */ +static inline void debug_writel(unsigned long val) +{ +} + +#define pl310_set_debug NULL +#endif + +#ifdef CONFIG_PL310_ERRATA_588369 +static inline void l2x0_flush_line(unsigned long addr) +{ + void __iomem *base = l2x0_base; + + /* Clean by PA followed by Invalidate by PA */ + cache_wait(base + L2X0_CLEAN_LINE_PA, 1); + writel_relaxed(addr, base + L2X0_CLEAN_LINE_PA); + cache_wait(base + L2X0_INV_LINE_PA, 1); + writel_relaxed(addr, base + L2X0_INV_LINE_PA); +} +#else + +static inline void l2x0_flush_line(unsigned long addr) +{ + void __iomem *base = l2x0_base; + cache_wait(base + L2X0_CLEAN_INV_LINE_PA, 1); + writel_relaxed(addr, base + L2X0_CLEAN_INV_LINE_PA); +} +#endif + +static void l2x0_cache_sync(void) +{ + unsigned long flags; + + raw_spin_lock_irqsave(&l2x0_lock, flags); + cache_sync(); + raw_spin_unlock_irqrestore(&l2x0_lock, flags); +} + +#ifdef CONFIG_PL310_ERRATA_727915 +static void l2x0_for_each_set_way(void __iomem *reg) +{ + int set; + int way; + unsigned long flags; + + for (way = 0; way < l2x0_ways; way++) { + raw_spin_lock_irqsave(&l2x0_lock, flags); + for (set = 0; set < l2x0_sets; set++) + writel_relaxed((way << 28) | (set << 5), reg); + cache_sync(); + raw_spin_unlock_irqrestore(&l2x0_lock, flags); + } +} +#endif + +static void __l2x0_flush_all(void) +{ + debug_writel(0x03); + writel_relaxed(l2x0_way_mask, l2x0_base + L2X0_CLEAN_INV_WAY); + cache_wait_way(l2x0_base + L2X0_CLEAN_INV_WAY, l2x0_way_mask); + cache_sync(); + debug_writel(0x00); +} + +static void l2x0_flush_all(void) +{ + unsigned long flags; + +#ifdef CONFIG_PL310_ERRATA_727915 + if (is_pl310_rev(REV_PL310_R2P0)) { + l2x0_for_each_set_way(l2x0_base + L2X0_CLEAN_INV_LINE_IDX); + return; + } +#endif + + /* clean all ways */ + raw_spin_lock_irqsave(&l2x0_lock, flags); + __l2x0_flush_all(); + raw_spin_unlock_irqrestore(&l2x0_lock, flags); +} + +static void l2x0_clean_all(void) +{ + unsigned long flags; + +#ifdef CONFIG_PL310_ERRATA_727915 + if (is_pl310_rev(REV_PL310_R2P0)) { + l2x0_for_each_set_way(l2x0_base + L2X0_CLEAN_LINE_IDX); + return; + } +#endif + + /* clean all ways */ + raw_spin_lock_irqsave(&l2x0_lock, flags); + debug_writel(0x03); + writel_relaxed(l2x0_way_mask, l2x0_base + L2X0_CLEAN_WAY); + cache_wait_way(l2x0_base + L2X0_CLEAN_WAY, l2x0_way_mask); + cache_sync(); + debug_writel(0x00); + raw_spin_unlock_irqrestore(&l2x0_lock, flags); +} + +static void l2x0_inv_all(void) +{ + unsigned long flags; + + /* invalidate all ways */ + raw_spin_lock_irqsave(&l2x0_lock, flags); + /* Invalidating when L2 is enabled is a nono */ + BUG_ON(readl(l2x0_base + L2X0_CTRL) & 1); + writel_relaxed(l2x0_way_mask, l2x0_base + L2X0_INV_WAY); + cache_wait_way(l2x0_base + L2X0_INV_WAY, l2x0_way_mask); + cache_sync(); + raw_spin_unlock_irqrestore(&l2x0_lock, flags); +} + +static void l2x0_inv_range(unsigned long start, unsigned long end) +{ + void __iomem *base = l2x0_base; + unsigned long flags; + + raw_spin_lock_irqsave(&l2x0_lock, flags); + if (start & (CACHE_LINE_SIZE - 1)) { + start &= ~(CACHE_LINE_SIZE - 1); + debug_writel(0x03); + l2x0_flush_line(start); + debug_writel(0x00); + start += CACHE_LINE_SIZE; + } + + if (end & (CACHE_LINE_SIZE - 1)) { + end &= ~(CACHE_LINE_SIZE - 1); + debug_writel(0x03); + l2x0_flush_line(end); + debug_writel(0x00); + } + + while (start < end) { + unsigned long blk_end = start + min(end - start, 4096UL); + + while (start < blk_end) { + l2x0_inv_line(start); + start += CACHE_LINE_SIZE; + } + + if (blk_end < end) { + raw_spin_unlock_irqrestore(&l2x0_lock, flags); + raw_spin_lock_irqsave(&l2x0_lock, flags); + } + } + cache_wait(base + L2X0_INV_LINE_PA, 1); + cache_sync(); + raw_spin_unlock_irqrestore(&l2x0_lock, flags); +} + +static void l2x0_clean_range(unsigned long start, unsigned long end) +{ + void __iomem *base = l2x0_base; + unsigned long flags; + + if ((end - start) >= l2x0_size) { + l2x0_clean_all(); + return; + } + + raw_spin_lock_irqsave(&l2x0_lock, flags); + start &= ~(CACHE_LINE_SIZE - 1); + while (start < end) { + unsigned long blk_end = start + min(end - start, 4096UL); + + while (start < blk_end) { + l2x0_clean_line(start); + start += CACHE_LINE_SIZE; + } + + if (blk_end < end) { + raw_spin_unlock_irqrestore(&l2x0_lock, flags); + raw_spin_lock_irqsave(&l2x0_lock, flags); + } + } + cache_wait(base + L2X0_CLEAN_LINE_PA, 1); + cache_sync(); + raw_spin_unlock_irqrestore(&l2x0_lock, flags); +} + +static void l2x0_flush_range(unsigned long start, unsigned long end) +{ + void __iomem *base = l2x0_base; + unsigned long flags; + + if ((end - start) >= l2x0_size) { + l2x0_flush_all(); + return; + } + + raw_spin_lock_irqsave(&l2x0_lock, flags); + start &= ~(CACHE_LINE_SIZE - 1); + while (start < end) { + unsigned long blk_end = start + min(end - start, 4096UL); + + debug_writel(0x03); + while (start < blk_end) { + l2x0_flush_line(start); + start += CACHE_LINE_SIZE; + } + debug_writel(0x00); + + if (blk_end < end) { + raw_spin_unlock_irqrestore(&l2x0_lock, flags); + raw_spin_lock_irqsave(&l2x0_lock, flags); + } + } + cache_wait(base + L2X0_CLEAN_INV_LINE_PA, 1); + cache_sync(); + raw_spin_unlock_irqrestore(&l2x0_lock, flags); +} + +static void l2x0_disable(void) +{ + unsigned long flags; + + raw_spin_lock_irqsave(&l2x0_lock, flags); + __l2x0_flush_all(); + writel_relaxed(0, l2x0_base + L2X0_CTRL); + dsb(); + raw_spin_unlock_irqrestore(&l2x0_lock, flags); +} + +static void l2x0_unlock(u32 cache_id) +{ + int lockregs; + int i; + + if (cache_id == L2X0_CACHE_ID_PART_L310) + lockregs = 8; + else + /* L210 and unknown types */ + lockregs = 1; + + for (i = 0; i < lockregs; i++) { + writel_relaxed(0x0, l2x0_base + L2X0_LOCKDOWN_WAY_D_BASE + + i * L2X0_LOCKDOWN_STRIDE); + writel_relaxed(0x0, l2x0_base + L2X0_LOCKDOWN_WAY_I_BASE + + i * L2X0_LOCKDOWN_STRIDE); + } +} + +void __init l2x0_init(void __iomem *base, u32 aux_val, u32 aux_mask) +{ + u32 aux; + u32 way_size = 0; + const char *type; + + l2x0_base = base; + + l2x0_cache_id = readl_relaxed(l2x0_base + L2X0_CACHE_ID); + aux = readl_relaxed(l2x0_base + L2X0_AUX_CTRL); + + aux &= aux_mask; + aux |= aux_val; + + /* Determine the number of ways */ + switch (l2x0_cache_id & L2X0_CACHE_ID_PART_MASK) { + case L2X0_CACHE_ID_PART_L310: + if (aux & (1 << 16)) + l2x0_ways = 16; + else + l2x0_ways = 8; + type = "L310"; +#ifdef CONFIG_PL310_ERRATA_753970 + /* Unmapped register. */ + sync_reg_offset = L2X0_DUMMY_REG; +#endif + outer_cache.set_debug = pl310_set_debug; + break; + case L2X0_CACHE_ID_PART_L210: + l2x0_ways = (aux >> 13) & 0xf; + type = "L210"; + break; + default: + /* Assume unknown chips have 8 ways */ + l2x0_ways = 8; + type = "L2x0 series"; + break; + } + + l2x0_way_mask = (1 << l2x0_ways) - 1; + + /* + * L2 cache Size = Way size * Number of ways + */ + way_size = (aux & L2X0_AUX_CTRL_WAY_SIZE_MASK) >> 17; + way_size = SZ_1K << (way_size + 3); + l2x0_size = l2x0_ways * way_size; + l2x0_sets = way_size / CACHE_LINE_SIZE; + + /* + * Check if l2x0 controller is already enabled. + * If you are booting from non-secure mode + * accessing the below registers will fault. + */ + if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & 1)) { + /* Make sure that I&D is not locked down when starting */ + l2x0_unlock(l2x0_cache_id); + + /* l2x0 controller is disabled */ + writel_relaxed(aux, l2x0_base + L2X0_AUX_CTRL); + + l2x0_saved_regs.aux_ctrl = aux; + + l2x0_inv_all(); + + /* enable L2X0 */ + writel_relaxed(1, l2x0_base + L2X0_CTRL); + } + + outer_cache.inv_range = l2x0_inv_range; + outer_cache.clean_range = l2x0_clean_range; + outer_cache.flush_range = l2x0_flush_range; + outer_cache.sync = l2x0_cache_sync; + outer_cache.flush_all = l2x0_flush_all; + outer_cache.inv_all = l2x0_inv_all; + outer_cache.disable = l2x0_disable; + + printk(KERN_INFO "%s cache controller enabled\n", type); + printk(KERN_INFO "l2x0: %d ways, CACHE_ID 0x%08x, AUX_CTRL 0x%08x, Cache size: %d B\n", + l2x0_ways, l2x0_cache_id, aux, l2x0_size); +} + +#ifdef CONFIG_OF +static void __init l2x0_of_setup(const struct device_node *np, + u32 *aux_val, u32 *aux_mask) +{ + u32 data[2] = { 0, 0 }; + u32 tag = 0; + u32 dirty = 0; + u32 val = 0, mask = 0; + + of_property_read_u32(np, "arm,tag-latency", &tag); + if (tag) { + mask |= L2X0_AUX_CTRL_TAG_LATENCY_MASK; + val |= (tag - 1) << L2X0_AUX_CTRL_TAG_LATENCY_SHIFT; + } + + of_property_read_u32_array(np, "arm,data-latency", + data, ARRAY_SIZE(data)); + if (data[0] && data[1]) { + mask |= L2X0_AUX_CTRL_DATA_RD_LATENCY_MASK | + L2X0_AUX_CTRL_DATA_WR_LATENCY_MASK; + val |= ((data[0] - 1) << L2X0_AUX_CTRL_DATA_RD_LATENCY_SHIFT) | + ((data[1] - 1) << L2X0_AUX_CTRL_DATA_WR_LATENCY_SHIFT); + } + + of_property_read_u32(np, "arm,dirty-latency", &dirty); + if (dirty) { + mask |= L2X0_AUX_CTRL_DIRTY_LATENCY_MASK; + val |= (dirty - 1) << L2X0_AUX_CTRL_DIRTY_LATENCY_SHIFT; + } + + *aux_val &= ~mask; + *aux_val |= val; + *aux_mask &= ~mask; +} + +static void __init pl310_of_setup(const struct device_node *np, + u32 *aux_val, u32 *aux_mask) +{ + u32 data[3] = { 0, 0, 0 }; + u32 tag[3] = { 0, 0, 0 }; + u32 filter[2] = { 0, 0 }; + + of_property_read_u32_array(np, "arm,tag-latency", tag, ARRAY_SIZE(tag)); + if (tag[0] && tag[1] && tag[2]) + writel_relaxed( + ((tag[0] - 1) << L2X0_LATENCY_CTRL_RD_SHIFT) | + ((tag[1] - 1) << L2X0_LATENCY_CTRL_WR_SHIFT) | + ((tag[2] - 1) << L2X0_LATENCY_CTRL_SETUP_SHIFT), + l2x0_base + L2X0_TAG_LATENCY_CTRL); + + of_property_read_u32_array(np, "arm,data-latency", + data, ARRAY_SIZE(data)); + if (data[0] && data[1] && data[2]) + writel_relaxed( + ((data[0] - 1) << L2X0_LATENCY_CTRL_RD_SHIFT) | + ((data[1] - 1) << L2X0_LATENCY_CTRL_WR_SHIFT) | + ((data[2] - 1) << L2X0_LATENCY_CTRL_SETUP_SHIFT), + l2x0_base + L2X0_DATA_LATENCY_CTRL); + + of_property_read_u32_array(np, "arm,filter-ranges", + filter, ARRAY_SIZE(filter)); + if (filter[1]) { + writel_relaxed(ALIGN(filter[0] + filter[1], SZ_1M), + l2x0_base + L2X0_ADDR_FILTER_END); + writel_relaxed((filter[0] & ~(SZ_1M - 1)) | L2X0_ADDR_FILTER_EN, + l2x0_base + L2X0_ADDR_FILTER_START); + } +} + +static void __init pl310_save(void) +{ + u32 l2x0_revision = readl_relaxed(l2x0_base + L2X0_CACHE_ID) & + L2X0_CACHE_ID_RTL_MASK; + + l2x0_saved_regs.tag_latency = readl_relaxed(l2x0_base + + L2X0_TAG_LATENCY_CTRL); + l2x0_saved_regs.data_latency = readl_relaxed(l2x0_base + + L2X0_DATA_LATENCY_CTRL); + l2x0_saved_regs.filter_end = readl_relaxed(l2x0_base + + L2X0_ADDR_FILTER_END); + l2x0_saved_regs.filter_start = readl_relaxed(l2x0_base + + L2X0_ADDR_FILTER_START); + + if (l2x0_revision >= L2X0_CACHE_ID_RTL_R2P0) { + /* + * From r2p0, there is Prefetch offset/control register + */ + l2x0_saved_regs.prefetch_ctrl = readl_relaxed(l2x0_base + + L2X0_PREFETCH_CTRL); + /* + * From r3p0, there is Power control register + */ + if (l2x0_revision >= L2X0_CACHE_ID_RTL_R3P0) + l2x0_saved_regs.pwr_ctrl = readl_relaxed(l2x0_base + + L2X0_POWER_CTRL); + } +} + +static void l2x0_resume(void) +{ + if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & 1)) { + /* restore aux ctrl and enable l2 */ + l2x0_unlock(readl_relaxed(l2x0_base + L2X0_CACHE_ID)); + + writel_relaxed(l2x0_saved_regs.aux_ctrl, l2x0_base + + L2X0_AUX_CTRL); + + l2x0_inv_all(); + + writel_relaxed(1, l2x0_base + L2X0_CTRL); + } +} + +static void pl310_resume(void) +{ + u32 l2x0_revision; + + if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & 1)) { + /* restore pl310 setup */ + writel_relaxed(l2x0_saved_regs.tag_latency, + l2x0_base + L2X0_TAG_LATENCY_CTRL); + writel_relaxed(l2x0_saved_regs.data_latency, + l2x0_base + L2X0_DATA_LATENCY_CTRL); + writel_relaxed(l2x0_saved_regs.filter_end, + l2x0_base + L2X0_ADDR_FILTER_END); + writel_relaxed(l2x0_saved_regs.filter_start, + l2x0_base + L2X0_ADDR_FILTER_START); + + l2x0_revision = readl_relaxed(l2x0_base + L2X0_CACHE_ID) & + L2X0_CACHE_ID_RTL_MASK; + + if (l2x0_revision >= L2X0_CACHE_ID_RTL_R2P0) { + writel_relaxed(l2x0_saved_regs.prefetch_ctrl, + l2x0_base + L2X0_PREFETCH_CTRL); + if (l2x0_revision >= L2X0_CACHE_ID_RTL_R3P0) + writel_relaxed(l2x0_saved_regs.pwr_ctrl, + l2x0_base + L2X0_POWER_CTRL); + } + } + + l2x0_resume(); +} + +static const struct l2x0_of_data pl310_data = { + pl310_of_setup, + pl310_save, + pl310_resume, +}; + +static const struct l2x0_of_data l2x0_data = { + l2x0_of_setup, + NULL, + l2x0_resume, +}; + +static const struct of_device_id l2x0_ids[] __initconst = { + { .compatible = "arm,pl310-cache", .data = (void *)&pl310_data }, + { .compatible = "arm,l220-cache", .data = (void *)&l2x0_data }, + { .compatible = "arm,l210-cache", .data = (void *)&l2x0_data }, + {} +}; + +int __init l2x0_of_init(u32 aux_val, u32 aux_mask) +{ + struct device_node *np; + struct l2x0_of_data *data; + struct resource res; + + np = of_find_matching_node(NULL, l2x0_ids); + if (!np) + return -ENODEV; + + if (of_address_to_resource(np, 0, &res)) + return -ENODEV; + + l2x0_base = ioremap(res.start, resource_size(&res)); + if (!l2x0_base) + return -ENOMEM; + + l2x0_saved_regs.phy_base = res.start; + + data = of_match_node(l2x0_ids, np)->data; + + /* L2 configuration can only be changed if the cache is disabled */ + if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & 1)) { + if (data->setup) + data->setup(np, &aux_val, &aux_mask); + } + + if (data->save) + data->save(); + + l2x0_init(l2x0_base, aux_val, aux_mask); + + outer_cache.resume = data->resume; + return 0; +} +#endif diff --git a/arch/arm/mm/cache-tauros2.c b/arch/arm/mm/cache-tauros2.c new file mode 100644 index 00000000..1fbca05f --- /dev/null +++ b/arch/arm/mm/cache-tauros2.c @@ -0,0 +1,264 @@ +/* + * arch/arm/mm/cache-tauros2.c - Tauros2 L2 cache controller support + * + * Copyright (C) 2008 Marvell Semiconductor + * + * This file is licensed under the terms of the GNU General Public + * License version 2. This program is licensed "as is" without any + * warranty of any kind, whether express or implied. + * + * References: + * - PJ1 CPU Core Datasheet, + * Document ID MV-S104837-01, Rev 0.7, January 24 2008. + * - PJ4 CPU Core Datasheet, + * Document ID MV-S105190-00, Rev 0.7, March 14 2008. + */ + +#include <linux/init.h> +#include <asm/cacheflush.h> +#include <asm/cp15.h> +#include <asm/hardware/cache-tauros2.h> + + +/* + * When Tauros2 is used on a CPU that supports the v7 hierarchical + * cache operations, the cache handling code in proc-v7.S takes care + * of everything, including handling DMA coherency. + * + * So, we only need to register outer cache operations here if we're + * being used on a pre-v7 CPU, and we only need to build support for + * outer cache operations into the kernel image if the kernel has been + * configured to support a pre-v7 CPU. + */ +#if __LINUX_ARM_ARCH__ < 7 +/* + * Low-level cache maintenance operations. + */ +static inline void tauros2_clean_pa(unsigned long addr) +{ + __asm__("mcr p15, 1, %0, c7, c11, 3" : : "r" (addr)); +} + +static inline void tauros2_clean_inv_pa(unsigned long addr) +{ + __asm__("mcr p15, 1, %0, c7, c15, 3" : : "r" (addr)); +} + +static inline void tauros2_inv_pa(unsigned long addr) +{ + __asm__("mcr p15, 1, %0, c7, c7, 3" : : "r" (addr)); +} + + +/* + * Linux primitives. + * + * Note that the end addresses passed to Linux primitives are + * noninclusive. + */ +#define CACHE_LINE_SIZE 32 + +static void tauros2_inv_range(unsigned long start, unsigned long end) +{ + /* + * Clean and invalidate partial first cache line. + */ + if (start & (CACHE_LINE_SIZE - 1)) { + tauros2_clean_inv_pa(start & ~(CACHE_LINE_SIZE - 1)); + start = (start | (CACHE_LINE_SIZE - 1)) + 1; + } + + /* + * Clean and invalidate partial last cache line. + */ + if (end & (CACHE_LINE_SIZE - 1)) { + tauros2_clean_inv_pa(end & ~(CACHE_LINE_SIZE - 1)); + end &= ~(CACHE_LINE_SIZE - 1); + } + + /* + * Invalidate all full cache lines between 'start' and 'end'. + */ + while (start < end) { + tauros2_inv_pa(start); + start += CACHE_LINE_SIZE; + } + + dsb(); +} + +static void tauros2_clean_range(unsigned long start, unsigned long end) +{ + start &= ~(CACHE_LINE_SIZE - 1); + while (start < end) { + tauros2_clean_pa(start); + start += CACHE_LINE_SIZE; + } + + dsb(); +} + +static void tauros2_flush_range(unsigned long start, unsigned long end) +{ + start &= ~(CACHE_LINE_SIZE - 1); + while (start < end) { + tauros2_clean_inv_pa(start); + start += CACHE_LINE_SIZE; + } + + dsb(); +} +#endif + +static inline u32 __init read_extra_features(void) +{ + u32 u; + + __asm__("mrc p15, 1, %0, c15, c1, 0" : "=r" (u)); + + return u; +} + +static inline void __init write_extra_features(u32 u) +{ + __asm__("mcr p15, 1, %0, c15, c1, 0" : : "r" (u)); +} + +static void __init disable_l2_prefetch(void) +{ + u32 u; + + /* + * Read the CPU Extra Features register and verify that the + * Disable L2 Prefetch bit is set. + */ + u = read_extra_features(); + if (!(u & 0x01000000)) { + printk(KERN_INFO "Tauros2: Disabling L2 prefetch.\n"); + write_extra_features(u | 0x01000000); + } +} + +static inline int __init cpuid_scheme(void) +{ + extern int processor_id; + + return !!((processor_id & 0x000f0000) == 0x000f0000); +} + +static inline u32 __init read_mmfr3(void) +{ + u32 mmfr3; + + __asm__("mrc p15, 0, %0, c0, c1, 7\n" : "=r" (mmfr3)); + + return mmfr3; +} + +static inline u32 __init read_actlr(void) +{ + u32 actlr; + + __asm__("mrc p15, 0, %0, c1, c0, 1\n" : "=r" (actlr)); + + return actlr; +} + +static inline void __init write_actlr(u32 actlr) +{ + __asm__("mcr p15, 0, %0, c1, c0, 1\n" : : "r" (actlr)); +} + +void __init tauros2_init(void) +{ + extern int processor_id; + char *mode; + + disable_l2_prefetch(); + +#ifdef CONFIG_CPU_32v5 + if ((processor_id & 0xff0f0000) == 0x56050000) { + u32 feat; + + /* + * v5 CPUs with Tauros2 have the L2 cache enable bit + * located in the CPU Extra Features register. + */ + feat = read_extra_features(); + if (!(feat & 0x00400000)) { + printk(KERN_INFO "Tauros2: Enabling L2 cache.\n"); + write_extra_features(feat | 0x00400000); + } + + mode = "ARMv5"; + outer_cache.inv_range = tauros2_inv_range; + outer_cache.clean_range = tauros2_clean_range; + outer_cache.flush_range = tauros2_flush_range; + } +#endif + +#ifdef CONFIG_CPU_32v6 + /* + * Check whether this CPU lacks support for the v7 hierarchical + * cache ops. (PJ4 is in its v6 personality mode if the MMFR3 + * register indicates no support for the v7 hierarchical cache + * ops.) + */ + if (cpuid_scheme() && (read_mmfr3() & 0xf) == 0) { + /* + * When Tauros2 is used in an ARMv6 system, the L2 + * enable bit is in the ARMv6 ARM-mandated position + * (bit [26] of the System Control Register). + */ + if (!(get_cr() & 0x04000000)) { + printk(KERN_INFO "Tauros2: Enabling L2 cache.\n"); + adjust_cr(0x04000000, 0x04000000); + } + + mode = "ARMv6"; + outer_cache.inv_range = tauros2_inv_range; + outer_cache.clean_range = tauros2_clean_range; + outer_cache.flush_range = tauros2_flush_range; + } +#endif + +#ifdef CONFIG_CPU_32v7 + /* + * Check whether this CPU has support for the v7 hierarchical + * cache ops. (PJ4 is in its v7 personality mode if the MMFR3 + * register indicates support for the v7 hierarchical cache + * ops.) + * + * (Although strictly speaking there may exist CPUs that + * implement the v7 cache ops but are only ARMv6 CPUs (due to + * not complying with all of the other ARMv7 requirements), + * there are no real-life examples of Tauros2 being used on + * such CPUs as of yet.) + */ + if (cpuid_scheme() && (read_mmfr3() & 0xf) == 1) { + u32 actlr; + + /* + * When Tauros2 is used in an ARMv7 system, the L2 + * enable bit is located in the Auxiliary System Control + * Register (which is the only register allowed by the + * ARMv7 spec to contain fine-grained cache control bits). + */ + actlr = read_actlr(); + if (!(actlr & 0x00000002)) { + printk(KERN_INFO "Tauros2: Enabling L2 cache.\n"); + write_actlr(actlr | 0x00000002); + } + + mode = "ARMv7"; + } +#endif + + if (mode == NULL) { + printk(KERN_CRIT "Tauros2: Unable to detect CPU mode.\n"); + return; + } + + printk(KERN_INFO "Tauros2: L2 cache support initialised " + "in %s mode.\n", mode); +} diff --git a/arch/arm/mm/cache-v3.S b/arch/arm/mm/cache-v3.S new file mode 100644 index 00000000..c2301f22 --- /dev/null +++ b/arch/arm/mm/cache-v3.S @@ -0,0 +1,133 @@ +/* + * linux/arch/arm/mm/cache-v3.S + * + * Copyright (C) 1997-2002 Russell king + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/linkage.h> +#include <linux/init.h> +#include <asm/page.h> +#include "proc-macros.S" + +/* + * flush_icache_all() + * + * Unconditionally clean and invalidate the entire icache. + */ +ENTRY(v3_flush_icache_all) + mov pc, lr +ENDPROC(v3_flush_icache_all) + +/* + * flush_user_cache_all() + * + * Invalidate all cache entries in a particular address + * space. + * + * - mm - mm_struct describing address space + */ +ENTRY(v3_flush_user_cache_all) + /* FALLTHROUGH */ +/* + * flush_kern_cache_all() + * + * Clean and invalidate the entire cache. + */ +ENTRY(v3_flush_kern_cache_all) + /* FALLTHROUGH */ + +/* + * flush_user_cache_range(start, end, flags) + * + * Invalidate a range of cache entries in the specified + * address space. + * + * - start - start address (may not be aligned) + * - end - end address (exclusive, may not be aligned) + * - flags - vma_area_struct flags describing address space + */ +ENTRY(v3_flush_user_cache_range) + mov ip, #0 + mcreq p15, 0, ip, c7, c0, 0 @ flush ID cache + mov pc, lr + +/* + * coherent_kern_range(start, end) + * + * Ensure coherency between the Icache and the Dcache in the + * region described by start. If you have non-snooping + * Harvard caches, you need to implement this function. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(v3_coherent_kern_range) + /* FALLTHROUGH */ + +/* + * coherent_user_range(start, end) + * + * Ensure coherency between the Icache and the Dcache in the + * region described by start. If you have non-snooping + * Harvard caches, you need to implement this function. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(v3_coherent_user_range) + mov pc, lr + +/* + * flush_kern_dcache_area(void *page, size_t size) + * + * Ensure no D cache aliasing occurs, either with itself or + * the I cache + * + * - addr - kernel address + * - size - region size + */ +ENTRY(v3_flush_kern_dcache_area) + /* FALLTHROUGH */ + +/* + * dma_flush_range(start, end) + * + * Clean and invalidate the specified virtual address range. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(v3_dma_flush_range) + mov r0, #0 + mcr p15, 0, r0, c7, c0, 0 @ flush ID cache + mov pc, lr + +/* + * dma_unmap_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(v3_dma_unmap_area) + teq r2, #DMA_TO_DEVICE + bne v3_dma_flush_range + /* FALLTHROUGH */ + +/* + * dma_map_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(v3_dma_map_area) + mov pc, lr +ENDPROC(v3_dma_unmap_area) +ENDPROC(v3_dma_map_area) + + __INITDATA + + @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) + define_cache_functions v3 diff --git a/arch/arm/mm/cache-v4.S b/arch/arm/mm/cache-v4.S new file mode 100644 index 00000000..fd9bb7ad --- /dev/null +++ b/arch/arm/mm/cache-v4.S @@ -0,0 +1,145 @@ +/* + * linux/arch/arm/mm/cache-v4.S + * + * Copyright (C) 1997-2002 Russell king + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/linkage.h> +#include <linux/init.h> +#include <asm/page.h> +#include "proc-macros.S" + +/* + * flush_icache_all() + * + * Unconditionally clean and invalidate the entire icache. + */ +ENTRY(v4_flush_icache_all) + mov pc, lr +ENDPROC(v4_flush_icache_all) + +/* + * flush_user_cache_all() + * + * Invalidate all cache entries in a particular address + * space. + * + * - mm - mm_struct describing address space + */ +ENTRY(v4_flush_user_cache_all) + /* FALLTHROUGH */ +/* + * flush_kern_cache_all() + * + * Clean and invalidate the entire cache. + */ +ENTRY(v4_flush_kern_cache_all) +#ifdef CONFIG_CPU_CP15 + mov r0, #0 + mcr p15, 0, r0, c7, c7, 0 @ flush ID cache + mov pc, lr +#else + /* FALLTHROUGH */ +#endif + +/* + * flush_user_cache_range(start, end, flags) + * + * Invalidate a range of cache entries in the specified + * address space. + * + * - start - start address (may not be aligned) + * - end - end address (exclusive, may not be aligned) + * - flags - vma_area_struct flags describing address space + */ +ENTRY(v4_flush_user_cache_range) +#ifdef CONFIG_CPU_CP15 + mov ip, #0 + mcreq p15, 0, ip, c7, c7, 0 @ flush ID cache + mov pc, lr +#else + /* FALLTHROUGH */ +#endif + +/* + * coherent_kern_range(start, end) + * + * Ensure coherency between the Icache and the Dcache in the + * region described by start. If you have non-snooping + * Harvard caches, you need to implement this function. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(v4_coherent_kern_range) + /* FALLTHROUGH */ + +/* + * coherent_user_range(start, end) + * + * Ensure coherency between the Icache and the Dcache in the + * region described by start. If you have non-snooping + * Harvard caches, you need to implement this function. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(v4_coherent_user_range) + mov pc, lr + +/* + * flush_kern_dcache_area(void *addr, size_t size) + * + * Ensure no D cache aliasing occurs, either with itself or + * the I cache + * + * - addr - kernel address + * - size - region size + */ +ENTRY(v4_flush_kern_dcache_area) + /* FALLTHROUGH */ + +/* + * dma_flush_range(start, end) + * + * Clean and invalidate the specified virtual address range. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(v4_dma_flush_range) +#ifdef CONFIG_CPU_CP15 + mov r0, #0 + mcr p15, 0, r0, c7, c7, 0 @ flush ID cache +#endif + mov pc, lr + +/* + * dma_unmap_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(v4_dma_unmap_area) + teq r2, #DMA_TO_DEVICE + bne v4_dma_flush_range + /* FALLTHROUGH */ + +/* + * dma_map_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(v4_dma_map_area) + mov pc, lr +ENDPROC(v4_dma_unmap_area) +ENDPROC(v4_dma_map_area) + + __INITDATA + + @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) + define_cache_functions v4 diff --git a/arch/arm/mm/cache-v4wb.S b/arch/arm/mm/cache-v4wb.S new file mode 100644 index 00000000..4f2c1415 --- /dev/null +++ b/arch/arm/mm/cache-v4wb.S @@ -0,0 +1,257 @@ +/* + * linux/arch/arm/mm/cache-v4wb.S + * + * Copyright (C) 1997-2002 Russell king + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/linkage.h> +#include <linux/init.h> +#include <asm/memory.h> +#include <asm/page.h> +#include "proc-macros.S" + +/* + * The size of one data cache line. + */ +#define CACHE_DLINESIZE 32 + +/* + * The total size of the data cache. + */ +#if defined(CONFIG_CPU_SA110) +# define CACHE_DSIZE 16384 +#elif defined(CONFIG_CPU_SA1100) +# define CACHE_DSIZE 8192 +#else +# error Unknown cache size +#endif + +/* + * This is the size at which it becomes more efficient to + * clean the whole cache, rather than using the individual + * cache line maintenance instructions. + * + * Size Clean (ticks) Dirty (ticks) + * 4096 21 20 21 53 55 54 + * 8192 40 41 40 106 100 102 + * 16384 77 77 76 140 140 138 + * 32768 150 149 150 214 216 212 <--- + * 65536 296 297 296 351 358 361 + * 131072 591 591 591 656 657 651 + * Whole 132 136 132 221 217 207 <--- + */ +#define CACHE_DLIMIT (CACHE_DSIZE * 4) + + .data +flush_base: + .long FLUSH_BASE + .text + +/* + * flush_icache_all() + * + * Unconditionally clean and invalidate the entire icache. + */ +ENTRY(v4wb_flush_icache_all) + mov r0, #0 + mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache + mov pc, lr +ENDPROC(v4wb_flush_icache_all) + +/* + * flush_user_cache_all() + * + * Clean and invalidate all cache entries in a particular address + * space. + */ +ENTRY(v4wb_flush_user_cache_all) + /* FALLTHROUGH */ +/* + * flush_kern_cache_all() + * + * Clean and invalidate the entire cache. + */ +ENTRY(v4wb_flush_kern_cache_all) + mov ip, #0 + mcr p15, 0, ip, c7, c5, 0 @ invalidate I cache +__flush_whole_cache: + ldr r3, =flush_base + ldr r1, [r3, #0] + eor r1, r1, #CACHE_DSIZE + str r1, [r3, #0] + add r2, r1, #CACHE_DSIZE +1: ldr r3, [r1], #32 + cmp r1, r2 + blo 1b +#ifdef FLUSH_BASE_MINICACHE + add r2, r2, #FLUSH_BASE_MINICACHE - FLUSH_BASE + sub r1, r2, #512 @ only 512 bytes +1: ldr r3, [r1], #32 + cmp r1, r2 + blo 1b +#endif + mcr p15, 0, ip, c7, c10, 4 @ drain write buffer + mov pc, lr + +/* + * flush_user_cache_range(start, end, flags) + * + * Invalidate a range of cache entries in the specified + * address space. + * + * - start - start address (inclusive, page aligned) + * - end - end address (exclusive, page aligned) + * - flags - vma_area_struct flags describing address space + */ +ENTRY(v4wb_flush_user_cache_range) + mov ip, #0 + sub r3, r1, r0 @ calculate total size + tst r2, #VM_EXEC @ executable region? + mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache + + cmp r3, #CACHE_DLIMIT @ total size >= limit? + bhs __flush_whole_cache @ flush whole D cache + +1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry + mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + tst r2, #VM_EXEC + mcrne p15, 0, ip, c7, c10, 4 @ drain write buffer + mov pc, lr + +/* + * flush_kern_dcache_area(void *addr, size_t size) + * + * Ensure no D cache aliasing occurs, either with itself or + * the I cache + * + * - addr - kernel address + * - size - region size + */ +ENTRY(v4wb_flush_kern_dcache_area) + add r1, r0, r1 + /* fall through */ + +/* + * coherent_kern_range(start, end) + * + * Ensure coherency between the Icache and the Dcache in the + * region described by start. If you have non-snooping + * Harvard caches, you need to implement this function. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(v4wb_coherent_kern_range) + /* fall through */ + +/* + * coherent_user_range(start, end) + * + * Ensure coherency between the Icache and the Dcache in the + * region described by start. If you have non-snooping + * Harvard caches, you need to implement this function. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(v4wb_coherent_user_range) + bic r0, r0, #CACHE_DLINESIZE - 1 +1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry + mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mov ip, #0 + mcr p15, 0, ip, c7, c5, 0 @ invalidate I cache + mcr p15, 0, ip, c7, c10, 4 @ drain WB + mov pc, lr + + +/* + * dma_inv_range(start, end) + * + * Invalidate (discard) the specified virtual address range. + * May not write back any entries. If 'start' or 'end' + * are not cache line aligned, those lines must be written + * back. + * + * - start - virtual start address + * - end - virtual end address + */ +v4wb_dma_inv_range: + tst r0, #CACHE_DLINESIZE - 1 + bic r0, r0, #CACHE_DLINESIZE - 1 + mcrne p15, 0, r0, c7, c10, 1 @ clean D entry + tst r1, #CACHE_DLINESIZE - 1 + mcrne p15, 0, r1, c7, c10, 1 @ clean D entry +1: mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mcr p15, 0, r0, c7, c10, 4 @ drain write buffer + mov pc, lr + +/* + * dma_clean_range(start, end) + * + * Clean (write back) the specified virtual address range. + * + * - start - virtual start address + * - end - virtual end address + */ +v4wb_dma_clean_range: + bic r0, r0, #CACHE_DLINESIZE - 1 +1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mcr p15, 0, r0, c7, c10, 4 @ drain write buffer + mov pc, lr + +/* + * dma_flush_range(start, end) + * + * Clean and invalidate the specified virtual address range. + * + * - start - virtual start address + * - end - virtual end address + * + * This is actually the same as v4wb_coherent_kern_range() + */ + .globl v4wb_dma_flush_range + .set v4wb_dma_flush_range, v4wb_coherent_kern_range + +/* + * dma_map_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(v4wb_dma_map_area) + add r1, r1, r0 + cmp r2, #DMA_TO_DEVICE + beq v4wb_dma_clean_range + bcs v4wb_dma_inv_range + b v4wb_dma_flush_range +ENDPROC(v4wb_dma_map_area) + +/* + * dma_unmap_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(v4wb_dma_unmap_area) + mov pc, lr +ENDPROC(v4wb_dma_unmap_area) + + __INITDATA + + @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) + define_cache_functions v4wb diff --git a/arch/arm/mm/cache-v4wt.S b/arch/arm/mm/cache-v4wt.S new file mode 100644 index 00000000..4d7b4676 --- /dev/null +++ b/arch/arm/mm/cache-v4wt.S @@ -0,0 +1,201 @@ +/* + * linux/arch/arm/mm/cache-v4wt.S + * + * Copyright (C) 1997-2002 Russell king + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * ARMv4 write through cache operations support. + * + * We assume that the write buffer is not enabled. + */ +#include <linux/linkage.h> +#include <linux/init.h> +#include <asm/page.h> +#include "proc-macros.S" + +/* + * The size of one data cache line. + */ +#define CACHE_DLINESIZE 32 + +/* + * The number of data cache segments. + */ +#define CACHE_DSEGMENTS 8 + +/* + * The number of lines in a cache segment. + */ +#define CACHE_DENTRIES 64 + +/* + * This is the size at which it becomes more efficient to + * clean the whole cache, rather than using the individual + * cache line maintenance instructions. + * + * *** This needs benchmarking + */ +#define CACHE_DLIMIT 16384 + +/* + * flush_icache_all() + * + * Unconditionally clean and invalidate the entire icache. + */ +ENTRY(v4wt_flush_icache_all) + mov r0, #0 + mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache + mov pc, lr +ENDPROC(v4wt_flush_icache_all) + +/* + * flush_user_cache_all() + * + * Invalidate all cache entries in a particular address + * space. + */ +ENTRY(v4wt_flush_user_cache_all) + /* FALLTHROUGH */ +/* + * flush_kern_cache_all() + * + * Clean and invalidate the entire cache. + */ +ENTRY(v4wt_flush_kern_cache_all) + mov r2, #VM_EXEC + mov ip, #0 +__flush_whole_cache: + tst r2, #VM_EXEC + mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache + mcr p15, 0, ip, c7, c6, 0 @ invalidate D cache + mov pc, lr + +/* + * flush_user_cache_range(start, end, flags) + * + * Clean and invalidate a range of cache entries in the specified + * address space. + * + * - start - start address (inclusive, page aligned) + * - end - end address (exclusive, page aligned) + * - flags - vma_area_struct flags describing address space + */ +ENTRY(v4wt_flush_user_cache_range) + sub r3, r1, r0 @ calculate total size + cmp r3, #CACHE_DLIMIT + bhs __flush_whole_cache + +1: mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry + tst r2, #VM_EXEC + mcrne p15, 0, r0, c7, c5, 1 @ invalidate I entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mov pc, lr + +/* + * coherent_kern_range(start, end) + * + * Ensure coherency between the Icache and the Dcache in the + * region described by start. If you have non-snooping + * Harvard caches, you need to implement this function. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(v4wt_coherent_kern_range) + /* FALLTRHOUGH */ + +/* + * coherent_user_range(start, end) + * + * Ensure coherency between the Icache and the Dcache in the + * region described by start. If you have non-snooping + * Harvard caches, you need to implement this function. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(v4wt_coherent_user_range) + bic r0, r0, #CACHE_DLINESIZE - 1 +1: mcr p15, 0, r0, c7, c5, 1 @ invalidate I entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mov pc, lr + +/* + * flush_kern_dcache_area(void *addr, size_t size) + * + * Ensure no D cache aliasing occurs, either with itself or + * the I cache + * + * - addr - kernel address + * - size - region size + */ +ENTRY(v4wt_flush_kern_dcache_area) + mov r2, #0 + mcr p15, 0, r2, c7, c5, 0 @ invalidate I cache + add r1, r0, r1 + /* fallthrough */ + +/* + * dma_inv_range(start, end) + * + * Invalidate (discard) the specified virtual address range. + * May not write back any entries. If 'start' or 'end' + * are not cache line aligned, those lines must be written + * back. + * + * - start - virtual start address + * - end - virtual end address + */ +v4wt_dma_inv_range: + bic r0, r0, #CACHE_DLINESIZE - 1 +1: mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mov pc, lr + +/* + * dma_flush_range(start, end) + * + * Clean and invalidate the specified virtual address range. + * + * - start - virtual start address + * - end - virtual end address + */ + .globl v4wt_dma_flush_range + .equ v4wt_dma_flush_range, v4wt_dma_inv_range + +/* + * dma_unmap_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(v4wt_dma_unmap_area) + add r1, r1, r0 + teq r2, #DMA_TO_DEVICE + bne v4wt_dma_inv_range + /* FALLTHROUGH */ + +/* + * dma_map_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(v4wt_dma_map_area) + mov pc, lr +ENDPROC(v4wt_dma_unmap_area) +ENDPROC(v4wt_dma_map_area) + + __INITDATA + + @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) + define_cache_functions v4wt diff --git a/arch/arm/mm/cache-v6.S b/arch/arm/mm/cache-v6.S new file mode 100644 index 00000000..2edb6f67 --- /dev/null +++ b/arch/arm/mm/cache-v6.S @@ -0,0 +1,351 @@ +/* + * linux/arch/arm/mm/cache-v6.S + * + * Copyright (C) 2001 Deep Blue Solutions Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This is the "shell" of the ARMv6 processor support. + */ +#include <linux/linkage.h> +#include <linux/init.h> +#include <asm/assembler.h> +#include <asm/unwind.h> + +#include "proc-macros.S" + +#define HARVARD_CACHE +#define CACHE_LINE_SIZE 32 +#define D_CACHE_LINE_SIZE 32 +#define BTB_FLUSH_SIZE 8 + +/* + * v6_flush_icache_all() + * + * Flush the whole I-cache. + * + * ARM1136 erratum 411920 - Invalidate Instruction Cache operation can fail. + * This erratum is present in 1136, 1156 and 1176. It does not affect the + * MPCore. + * + * Registers: + * r0 - set to 0 + * r1 - corrupted + */ +ENTRY(v6_flush_icache_all) + mov r0, #0 +#ifdef CONFIG_ARM_ERRATA_411920 + mrs r1, cpsr + cpsid ifa @ disable interrupts + mcr p15, 0, r0, c7, c5, 0 @ invalidate entire I-cache + mcr p15, 0, r0, c7, c5, 0 @ invalidate entire I-cache + mcr p15, 0, r0, c7, c5, 0 @ invalidate entire I-cache + mcr p15, 0, r0, c7, c5, 0 @ invalidate entire I-cache + msr cpsr_cx, r1 @ restore interrupts + .rept 11 @ ARM Ltd recommends at least + nop @ 11 NOPs + .endr +#else + mcr p15, 0, r0, c7, c5, 0 @ invalidate I-cache +#endif + mov pc, lr +ENDPROC(v6_flush_icache_all) + +/* + * v6_flush_cache_all() + * + * Flush the entire cache. + * + * It is assumed that: + */ +ENTRY(v6_flush_kern_cache_all) + mov r0, #0 +#ifdef HARVARD_CACHE + mcr p15, 0, r0, c7, c14, 0 @ D cache clean+invalidate +#ifndef CONFIG_ARM_ERRATA_411920 + mcr p15, 0, r0, c7, c5, 0 @ I+BTB cache invalidate +#else + b v6_flush_icache_all +#endif +#else + mcr p15, 0, r0, c7, c15, 0 @ Cache clean+invalidate +#endif + mov pc, lr + +/* + * v6_flush_cache_all() + * + * Flush all TLB entries in a particular address space + * + * - mm - mm_struct describing address space + */ +ENTRY(v6_flush_user_cache_all) + /*FALLTHROUGH*/ + +/* + * v6_flush_cache_range(start, end, flags) + * + * Flush a range of TLB entries in the specified address space. + * + * - start - start address (may not be aligned) + * - end - end address (exclusive, may not be aligned) + * - flags - vm_area_struct flags describing address space + * + * It is assumed that: + * - we have a VIPT cache. + */ +ENTRY(v6_flush_user_cache_range) + mov pc, lr + +/* + * v6_coherent_kern_range(start,end) + * + * Ensure that the I and D caches are coherent within specified + * region. This is typically used when code has been written to + * a memory region, and will be executed. + * + * - start - virtual start address of region + * - end - virtual end address of region + * + * It is assumed that: + * - the Icache does not read data from the write buffer + */ +ENTRY(v6_coherent_kern_range) + /* FALLTHROUGH */ + +/* + * v6_coherent_user_range(start,end) + * + * Ensure that the I and D caches are coherent within specified + * region. This is typically used when code has been written to + * a memory region, and will be executed. + * + * - start - virtual start address of region + * - end - virtual end address of region + * + * It is assumed that: + * - the Icache does not read data from the write buffer + */ +ENTRY(v6_coherent_user_range) + UNWIND(.fnstart ) +#ifdef HARVARD_CACHE + bic r0, r0, #CACHE_LINE_SIZE - 1 +1: + USER( mcr p15, 0, r0, c7, c10, 1 ) @ clean D line + add r0, r0, #CACHE_LINE_SIZE +2: + cmp r0, r1 + blo 1b +#endif + mov r0, #0 +#ifdef HARVARD_CACHE + mcr p15, 0, r0, c7, c10, 4 @ drain write buffer +#ifndef CONFIG_ARM_ERRATA_411920 + mcr p15, 0, r0, c7, c5, 0 @ I+BTB cache invalidate +#else + b v6_flush_icache_all +#endif +#else + mcr p15, 0, r0, c7, c5, 6 @ invalidate BTB +#endif + mov pc, lr + +/* + * Fault handling for the cache operation above. If the virtual address in r0 + * isn't mapped, just try the next page. + */ +9001: + mov r0, r0, lsr #12 + mov r0, r0, lsl #12 + add r0, r0, #4096 + b 2b + UNWIND(.fnend ) +ENDPROC(v6_coherent_user_range) +ENDPROC(v6_coherent_kern_range) + +/* + * v6_flush_kern_dcache_area(void *addr, size_t size) + * + * Ensure that the data held in the page kaddr is written back + * to the page in question. + * + * - addr - kernel address + * - size - region size + */ +ENTRY(v6_flush_kern_dcache_area) + add r1, r0, r1 + bic r0, r0, #D_CACHE_LINE_SIZE - 1 +1: +#ifdef HARVARD_CACHE + mcr p15, 0, r0, c7, c14, 1 @ clean & invalidate D line +#else + mcr p15, 0, r0, c7, c15, 1 @ clean & invalidate unified line +#endif + add r0, r0, #D_CACHE_LINE_SIZE + cmp r0, r1 + blo 1b +#ifdef HARVARD_CACHE + mov r0, #0 + mcr p15, 0, r0, c7, c10, 4 +#endif + mov pc, lr + + +/* + * v6_dma_inv_range(start,end) + * + * Invalidate the data cache within the specified region; we will + * be performing a DMA operation in this region and we want to + * purge old data in the cache. + * + * - start - virtual start address of region + * - end - virtual end address of region + */ +v6_dma_inv_range: +#ifdef CONFIG_DMA_CACHE_RWFO + ldrb r2, [r0] @ read for ownership + strb r2, [r0] @ write for ownership +#endif + tst r0, #D_CACHE_LINE_SIZE - 1 + bic r0, r0, #D_CACHE_LINE_SIZE - 1 +#ifdef HARVARD_CACHE + mcrne p15, 0, r0, c7, c10, 1 @ clean D line +#else + mcrne p15, 0, r0, c7, c11, 1 @ clean unified line +#endif + tst r1, #D_CACHE_LINE_SIZE - 1 +#ifdef CONFIG_DMA_CACHE_RWFO + ldrneb r2, [r1, #-1] @ read for ownership + strneb r2, [r1, #-1] @ write for ownership +#endif + bic r1, r1, #D_CACHE_LINE_SIZE - 1 +#ifdef HARVARD_CACHE + mcrne p15, 0, r1, c7, c14, 1 @ clean & invalidate D line +#else + mcrne p15, 0, r1, c7, c15, 1 @ clean & invalidate unified line +#endif +1: +#ifdef HARVARD_CACHE + mcr p15, 0, r0, c7, c6, 1 @ invalidate D line +#else + mcr p15, 0, r0, c7, c7, 1 @ invalidate unified line +#endif + add r0, r0, #D_CACHE_LINE_SIZE + cmp r0, r1 +#ifdef CONFIG_DMA_CACHE_RWFO + ldrlo r2, [r0] @ read for ownership + strlo r2, [r0] @ write for ownership +#endif + blo 1b + mov r0, #0 + mcr p15, 0, r0, c7, c10, 4 @ drain write buffer + mov pc, lr + +/* + * v6_dma_clean_range(start,end) + * - start - virtual start address of region + * - end - virtual end address of region + */ +v6_dma_clean_range: + bic r0, r0, #D_CACHE_LINE_SIZE - 1 +1: +#ifdef CONFIG_DMA_CACHE_RWFO + ldr r2, [r0] @ read for ownership +#endif +#ifdef HARVARD_CACHE + mcr p15, 0, r0, c7, c10, 1 @ clean D line +#else + mcr p15, 0, r0, c7, c11, 1 @ clean unified line +#endif + add r0, r0, #D_CACHE_LINE_SIZE + cmp r0, r1 + blo 1b + mov r0, #0 + mcr p15, 0, r0, c7, c10, 4 @ drain write buffer + mov pc, lr + +/* + * v6_dma_flush_range(start,end) + * - start - virtual start address of region + * - end - virtual end address of region + */ +ENTRY(v6_dma_flush_range) +#ifdef CONFIG_CACHE_FLUSH_RANGE_LIMIT + sub r2, r1, r0 + cmp r2, #CONFIG_CACHE_FLUSH_RANGE_LIMIT + bhi v6_dma_flush_dcache_all +#endif +#ifdef CONFIG_DMA_CACHE_RWFO + ldrb r2, [r0] @ read for ownership + strb r2, [r0] @ write for ownership +#endif + bic r0, r0, #D_CACHE_LINE_SIZE - 1 +1: +#ifdef HARVARD_CACHE + mcr p15, 0, r0, c7, c14, 1 @ clean & invalidate D line +#else + mcr p15, 0, r0, c7, c15, 1 @ clean & invalidate line +#endif + add r0, r0, #D_CACHE_LINE_SIZE + cmp r0, r1 +#ifdef CONFIG_DMA_CACHE_RWFO + ldrlob r2, [r0] @ read for ownership + strlob r2, [r0] @ write for ownership +#endif + blo 1b + mov r0, #0 + mcr p15, 0, r0, c7, c10, 4 @ drain write buffer + mov pc, lr + +#ifdef CONFIG_CACHE_FLUSH_RANGE_LIMIT +v6_dma_flush_dcache_all: + mov r0, #0 +#ifdef HARVARD_CACHE + mcr p15, 0, r0, c7, c14, 0 @ D cache clean+invalidate +#else + mcr p15, 0, r0, c7, c15, 0 @ Cache clean+invalidate +#endif + mcr p15, 0, r0, c7, c10, 4 @ drain write buffer + mov pc, lr +#endif + +/* + * dma_map_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(v6_dma_map_area) + add r1, r1, r0 + teq r2, #DMA_FROM_DEVICE + beq v6_dma_inv_range +#ifndef CONFIG_DMA_CACHE_RWFO + b v6_dma_clean_range +#else + teq r2, #DMA_TO_DEVICE + beq v6_dma_clean_range + b v6_dma_flush_range +#endif +ENDPROC(v6_dma_map_area) + +/* + * dma_unmap_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(v6_dma_unmap_area) +#ifndef CONFIG_DMA_CACHE_RWFO + add r1, r1, r0 + teq r2, #DMA_TO_DEVICE + bne v6_dma_inv_range +#endif + mov pc, lr +ENDPROC(v6_dma_unmap_area) + + __INITDATA + + @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) + define_cache_functions v6 diff --git a/arch/arm/mm/cache-v7.S b/arch/arm/mm/cache-v7.S new file mode 100644 index 00000000..a655d3da --- /dev/null +++ b/arch/arm/mm/cache-v7.S @@ -0,0 +1,355 @@ +/* + * linux/arch/arm/mm/cache-v7.S + * + * Copyright (C) 2001 Deep Blue Solutions Ltd. + * Copyright (C) 2005 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This is the "shell" of the ARMv7 processor support. + */ +#include <linux/linkage.h> +#include <linux/init.h> +#include <asm/assembler.h> +#include <asm/unwind.h> + +#include "proc-macros.S" + +/* + * v7_flush_icache_all() + * + * Flush the whole I-cache. + * + * Registers: + * r0 - set to 0 + */ +ENTRY(v7_flush_icache_all) + mov r0, #0 + ALT_SMP(mcr p15, 0, r0, c7, c1, 0) @ invalidate I-cache inner shareable + ALT_UP(mcr p15, 0, r0, c7, c5, 0) @ I+BTB cache invalidate + mov pc, lr +ENDPROC(v7_flush_icache_all) + +/* + * v7_flush_dcache_all() + * + * Flush the whole D-cache. + * + * Corrupted registers: r0-r7, r9-r11 (r6 only in Thumb mode) + * + * - mm - mm_struct describing address space + */ +ENTRY(v7_flush_dcache_all) + dmb @ ensure ordering with previous memory accesses + mrc p15, 1, r0, c0, c0, 1 @ read clidr + ands r3, r0, #0x7000000 @ extract loc from clidr + mov r3, r3, lsr #23 @ left align loc bit field + beq finished @ if loc is 0, then no need to clean + mov r10, #0 @ start clean at cache level 0 +loop1: + add r2, r10, r10, lsr #1 @ work out 3x current cache level + mov r1, r0, lsr r2 @ extract cache type bits from clidr + and r1, r1, #7 @ mask of the bits for current cache only + cmp r1, #2 @ see what cache we have at this level + blt skip @ skip if no cache, or just i-cache +#ifdef CONFIG_PREEMPT + save_and_disable_irqs_notrace r9 @ make cssr&csidr read atomic +#endif + mcr p15, 2, r10, c0, c0, 0 @ select current cache level in cssr + isb @ isb to sych the new cssr&csidr + mrc p15, 1, r1, c0, c0, 0 @ read the new csidr +#ifdef CONFIG_PREEMPT + restore_irqs_notrace r9 +#endif + and r2, r1, #7 @ extract the length of the cache lines + add r2, r2, #4 @ add 4 (line length offset) + ldr r4, =0x3ff + ands r4, r4, r1, lsr #3 @ find maximum number on the way size + clz r5, r4 @ find bit position of way size increment + ldr r7, =0x7fff + ands r7, r7, r1, lsr #13 @ extract max number of the index size +loop2: + mov r9, r4 @ create working copy of max way size +loop3: + ARM( orr r11, r10, r9, lsl r5 ) @ factor way and cache number into r11 + THUMB( lsl r6, r9, r5 ) + THUMB( orr r11, r10, r6 ) @ factor way and cache number into r11 + ARM( orr r11, r11, r7, lsl r2 ) @ factor index number into r11 + THUMB( lsl r6, r7, r2 ) + THUMB( orr r11, r11, r6 ) @ factor index number into r11 + mcr p15, 0, r11, c7, c14, 2 @ clean & invalidate by set/way + subs r9, r9, #1 @ decrement the way + bge loop3 + subs r7, r7, #1 @ decrement the index + bge loop2 +skip: + add r10, r10, #2 @ increment cache number + cmp r3, r10 + bgt loop1 +finished: + mov r10, #0 @ swith back to cache level 0 + mcr p15, 2, r10, c0, c0, 0 @ select current cache level in cssr + dsb + isb + mov pc, lr +ENDPROC(v7_flush_dcache_all) + +/* + * v7_flush_cache_all() + * + * Flush the entire cache system. + * The data cache flush is now achieved using atomic clean / invalidates + * working outwards from L1 cache. This is done using Set/Way based cache + * maintenance instructions. + * The instruction cache can still be invalidated back to the point of + * unification in a single instruction. + * + */ +ENTRY(v7_flush_kern_cache_all) + ARM( stmfd sp!, {r4-r5, r7, r9-r11, lr} ) + THUMB( stmfd sp!, {r4-r7, r9-r11, lr} ) + bl v7_flush_dcache_all + mov r0, #0 + ALT_SMP(mcr p15, 0, r0, c7, c1, 0) @ invalidate I-cache inner shareable + ALT_UP(mcr p15, 0, r0, c7, c5, 0) @ I+BTB cache invalidate + ARM( ldmfd sp!, {r4-r5, r7, r9-r11, lr} ) + THUMB( ldmfd sp!, {r4-r7, r9-r11, lr} ) + mov pc, lr +ENDPROC(v7_flush_kern_cache_all) + +/* + * v7_flush_cache_all() + * + * Flush all TLB entries in a particular address space + * + * - mm - mm_struct describing address space + */ +ENTRY(v7_flush_user_cache_all) + /*FALLTHROUGH*/ + +/* + * v7_flush_cache_range(start, end, flags) + * + * Flush a range of TLB entries in the specified address space. + * + * - start - start address (may not be aligned) + * - end - end address (exclusive, may not be aligned) + * - flags - vm_area_struct flags describing address space + * + * It is assumed that: + * - we have a VIPT cache. + */ +ENTRY(v7_flush_user_cache_range) + mov pc, lr +ENDPROC(v7_flush_user_cache_all) +ENDPROC(v7_flush_user_cache_range) + +/* + * v7_coherent_kern_range(start,end) + * + * Ensure that the I and D caches are coherent within specified + * region. This is typically used when code has been written to + * a memory region, and will be executed. + * + * - start - virtual start address of region + * - end - virtual end address of region + * + * It is assumed that: + * - the Icache does not read data from the write buffer + */ +ENTRY(v7_coherent_kern_range) + /* FALLTHROUGH */ + +/* + * v7_coherent_user_range(start,end) + * + * Ensure that the I and D caches are coherent within specified + * region. This is typically used when code has been written to + * a memory region, and will be executed. + * + * - start - virtual start address of region + * - end - virtual end address of region + * + * It is assumed that: + * - the Icache does not read data from the write buffer + */ +ENTRY(v7_coherent_user_range) + UNWIND(.fnstart ) + dcache_line_size r2, r3 + sub r3, r2, #1 + bic r12, r0, r3 +#ifdef CONFIG_ARM_ERRATA_764369 + ALT_SMP(W(dsb)) + ALT_UP(W(nop)) +#endif +1: + USER( mcr p15, 0, r12, c7, c11, 1 ) @ clean D line to the point of unification + add r12, r12, r2 + cmp r12, r1 + blo 1b + dsb + icache_line_size r2, r3 + sub r3, r2, #1 + bic r12, r0, r3 +2: + USER( mcr p15, 0, r12, c7, c5, 1 ) @ invalidate I line + add r12, r12, r2 + cmp r12, r1 + blo 2b +3: + mov r0, #0 + ALT_SMP(mcr p15, 0, r0, c7, c1, 6) @ invalidate BTB Inner Shareable + ALT_UP(mcr p15, 0, r0, c7, c5, 6) @ invalidate BTB + dsb + isb + mov pc, lr + +/* + * Fault handling for the cache operation above. If the virtual address in r0 + * isn't mapped, just try the next page. + */ +9001: + mov r12, r12, lsr #12 + mov r12, r12, lsl #12 + add r12, r12, #4096 + b 3b + UNWIND(.fnend ) +ENDPROC(v7_coherent_kern_range) +ENDPROC(v7_coherent_user_range) + +/* + * v7_flush_kern_dcache_area(void *addr, size_t size) + * + * Ensure that the data held in the page kaddr is written back + * to the page in question. + * + * - addr - kernel address + * - size - region size + */ +ENTRY(v7_flush_kern_dcache_area) + dcache_line_size r2, r3 + add r1, r0, r1 + sub r3, r2, #1 + bic r0, r0, r3 +#ifdef CONFIG_ARM_ERRATA_764369 + ALT_SMP(W(dsb)) + ALT_UP(W(nop)) +#endif +1: + mcr p15, 0, r0, c7, c14, 1 @ clean & invalidate D line / unified line + add r0, r0, r2 + cmp r0, r1 + blo 1b + dsb + mov pc, lr +ENDPROC(v7_flush_kern_dcache_area) + +/* + * v7_dma_inv_range(start,end) + * + * Invalidate the data cache within the specified region; we will + * be performing a DMA operation in this region and we want to + * purge old data in the cache. + * + * - start - virtual start address of region + * - end - virtual end address of region + */ +v7_dma_inv_range: + dcache_line_size r2, r3 + sub r3, r2, #1 + tst r0, r3 + bic r0, r0, r3 +#ifdef CONFIG_ARM_ERRATA_764369 + ALT_SMP(W(dsb)) + ALT_UP(W(nop)) +#endif + mcrne p15, 0, r0, c7, c14, 1 @ clean & invalidate D / U line + + tst r1, r3 + bic r1, r1, r3 + mcrne p15, 0, r1, c7, c14, 1 @ clean & invalidate D / U line +1: + mcr p15, 0, r0, c7, c6, 1 @ invalidate D / U line + add r0, r0, r2 + cmp r0, r1 + blo 1b + dsb + mov pc, lr +ENDPROC(v7_dma_inv_range) + +/* + * v7_dma_clean_range(start,end) + * - start - virtual start address of region + * - end - virtual end address of region + */ +v7_dma_clean_range: + dcache_line_size r2, r3 + sub r3, r2, #1 + bic r0, r0, r3 +#ifdef CONFIG_ARM_ERRATA_764369 + ALT_SMP(W(dsb)) + ALT_UP(W(nop)) +#endif +1: + mcr p15, 0, r0, c7, c10, 1 @ clean D / U line + add r0, r0, r2 + cmp r0, r1 + blo 1b + dsb + mov pc, lr +ENDPROC(v7_dma_clean_range) + +/* + * v7_dma_flush_range(start,end) + * - start - virtual start address of region + * - end - virtual end address of region + */ +ENTRY(v7_dma_flush_range) + dcache_line_size r2, r3 + sub r3, r2, #1 + bic r0, r0, r3 +#ifdef CONFIG_ARM_ERRATA_764369 + ALT_SMP(W(dsb)) + ALT_UP(W(nop)) +#endif +1: + mcr p15, 0, r0, c7, c14, 1 @ clean & invalidate D / U line + add r0, r0, r2 + cmp r0, r1 + blo 1b + dsb + mov pc, lr +ENDPROC(v7_dma_flush_range) + +/* + * dma_map_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(v7_dma_map_area) + add r1, r1, r0 + teq r2, #DMA_FROM_DEVICE + beq v7_dma_inv_range + b v7_dma_clean_range +ENDPROC(v7_dma_map_area) + +/* + * dma_unmap_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(v7_dma_unmap_area) + add r1, r1, r0 + teq r2, #DMA_TO_DEVICE + bne v7_dma_inv_range + mov pc, lr +ENDPROC(v7_dma_unmap_area) + + __INITDATA + + @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) + define_cache_functions v7 diff --git a/arch/arm/mm/cache-xsc3l2.c b/arch/arm/mm/cache-xsc3l2.c new file mode 100644 index 00000000..6c3edeb6 --- /dev/null +++ b/arch/arm/mm/cache-xsc3l2.c @@ -0,0 +1,220 @@ +/* + * arch/arm/mm/cache-xsc3l2.c - XScale3 L2 cache controller support + * + * Copyright (C) 2007 ARM Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#include <linux/init.h> +#include <linux/highmem.h> +#include <asm/cp15.h> +#include <asm/cputype.h> +#include <asm/cacheflush.h> + +#define CR_L2 (1 << 26) + +#define CACHE_LINE_SIZE 32 +#define CACHE_LINE_SHIFT 5 +#define CACHE_WAY_PER_SET 8 + +#define CACHE_WAY_SIZE(l2ctype) (8192 << (((l2ctype) >> 8) & 0xf)) +#define CACHE_SET_SIZE(l2ctype) (CACHE_WAY_SIZE(l2ctype) >> CACHE_LINE_SHIFT) + +static inline int xsc3_l2_present(void) +{ + unsigned long l2ctype; + + __asm__("mrc p15, 1, %0, c0, c0, 1" : "=r" (l2ctype)); + + return !!(l2ctype & 0xf8); +} + +static inline void xsc3_l2_clean_mva(unsigned long addr) +{ + __asm__("mcr p15, 1, %0, c7, c11, 1" : : "r" (addr)); +} + +static inline void xsc3_l2_inv_mva(unsigned long addr) +{ + __asm__("mcr p15, 1, %0, c7, c7, 1" : : "r" (addr)); +} + +static inline void xsc3_l2_inv_all(void) +{ + unsigned long l2ctype, set_way; + int set, way; + + __asm__("mrc p15, 1, %0, c0, c0, 1" : "=r" (l2ctype)); + + for (set = 0; set < CACHE_SET_SIZE(l2ctype); set++) { + for (way = 0; way < CACHE_WAY_PER_SET; way++) { + set_way = (way << 29) | (set << 5); + __asm__("mcr p15, 1, %0, c7, c11, 2" : : "r"(set_way)); + } + } + + dsb(); +} + +static inline void l2_unmap_va(unsigned long va) +{ +#ifdef CONFIG_HIGHMEM + if (va != -1) + kunmap_atomic((void *)va); +#endif +} + +static inline unsigned long l2_map_va(unsigned long pa, unsigned long prev_va) +{ +#ifdef CONFIG_HIGHMEM + unsigned long va = prev_va & PAGE_MASK; + unsigned long pa_offset = pa << (32 - PAGE_SHIFT); + if (unlikely(pa_offset < (prev_va << (32 - PAGE_SHIFT)))) { + /* + * Switching to a new page. Because cache ops are + * using virtual addresses only, we must put a mapping + * in place for it. + */ + l2_unmap_va(prev_va); + va = (unsigned long)kmap_atomic_pfn(pa >> PAGE_SHIFT); + } + return va + (pa_offset >> (32 - PAGE_SHIFT)); +#else + return __phys_to_virt(pa); +#endif +} + +static void xsc3_l2_inv_range(unsigned long start, unsigned long end) +{ + unsigned long vaddr; + + if (start == 0 && end == -1ul) { + xsc3_l2_inv_all(); + return; + } + + vaddr = -1; /* to force the first mapping */ + + /* + * Clean and invalidate partial first cache line. + */ + if (start & (CACHE_LINE_SIZE - 1)) { + vaddr = l2_map_va(start & ~(CACHE_LINE_SIZE - 1), vaddr); + xsc3_l2_clean_mva(vaddr); + xsc3_l2_inv_mva(vaddr); + start = (start | (CACHE_LINE_SIZE - 1)) + 1; + } + + /* + * Invalidate all full cache lines between 'start' and 'end'. + */ + while (start < (end & ~(CACHE_LINE_SIZE - 1))) { + vaddr = l2_map_va(start, vaddr); + xsc3_l2_inv_mva(vaddr); + start += CACHE_LINE_SIZE; + } + + /* + * Clean and invalidate partial last cache line. + */ + if (start < end) { + vaddr = l2_map_va(start, vaddr); + xsc3_l2_clean_mva(vaddr); + xsc3_l2_inv_mva(vaddr); + } + + l2_unmap_va(vaddr); + + dsb(); +} + +static void xsc3_l2_clean_range(unsigned long start, unsigned long end) +{ + unsigned long vaddr; + + vaddr = -1; /* to force the first mapping */ + + start &= ~(CACHE_LINE_SIZE - 1); + while (start < end) { + vaddr = l2_map_va(start, vaddr); + xsc3_l2_clean_mva(vaddr); + start += CACHE_LINE_SIZE; + } + + l2_unmap_va(vaddr); + + dsb(); +} + +/* + * optimize L2 flush all operation by set/way format + */ +static inline void xsc3_l2_flush_all(void) +{ + unsigned long l2ctype, set_way; + int set, way; + + __asm__("mrc p15, 1, %0, c0, c0, 1" : "=r" (l2ctype)); + + for (set = 0; set < CACHE_SET_SIZE(l2ctype); set++) { + for (way = 0; way < CACHE_WAY_PER_SET; way++) { + set_way = (way << 29) | (set << 5); + __asm__("mcr p15, 1, %0, c7, c15, 2" : : "r"(set_way)); + } + } + + dsb(); +} + +static void xsc3_l2_flush_range(unsigned long start, unsigned long end) +{ + unsigned long vaddr; + + if (start == 0 && end == -1ul) { + xsc3_l2_flush_all(); + return; + } + + vaddr = -1; /* to force the first mapping */ + + start &= ~(CACHE_LINE_SIZE - 1); + while (start < end) { + vaddr = l2_map_va(start, vaddr); + xsc3_l2_clean_mva(vaddr); + xsc3_l2_inv_mva(vaddr); + start += CACHE_LINE_SIZE; + } + + l2_unmap_va(vaddr); + + dsb(); +} + +static int __init xsc3_l2_init(void) +{ + if (!cpu_is_xsc3() || !xsc3_l2_present()) + return 0; + + if (get_cr() & CR_L2) { + pr_info("XScale3 L2 cache enabled.\n"); + xsc3_l2_inv_all(); + + outer_cache.inv_range = xsc3_l2_inv_range; + outer_cache.clean_range = xsc3_l2_clean_range; + outer_cache.flush_range = xsc3_l2_flush_range; + } + + return 0; +} +core_initcall(xsc3_l2_init); diff --git a/arch/arm/mm/context.c b/arch/arm/mm/context.c new file mode 100644 index 00000000..806cc4f6 --- /dev/null +++ b/arch/arm/mm/context.c @@ -0,0 +1,171 @@ +/* + * linux/arch/arm/mm/context.c + * + * Copyright (C) 2002-2003 Deep Blue Solutions Ltd, all rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/init.h> +#include <linux/sched.h> +#include <linux/mm.h> +#include <linux/smp.h> +#include <linux/percpu.h> + +#include <asm/mmu_context.h> +#include <asm/tlbflush.h> + +static DEFINE_RAW_SPINLOCK(cpu_asid_lock); +unsigned int cpu_last_asid = ASID_FIRST_VERSION; + +#ifdef CONFIG_ARM_LPAE +void cpu_set_reserved_ttbr0(void) +{ + unsigned long ttbl = __pa(swapper_pg_dir); + unsigned long ttbh = 0; + + /* + * Set TTBR0 to swapper_pg_dir which contains only global entries. The + * ASID is set to 0. + */ + asm volatile( + " mcrr p15, 0, %0, %1, c2 @ set TTBR0\n" + : + : "r" (ttbl), "r" (ttbh)); + isb(); +} +#else +void cpu_set_reserved_ttbr0(void) +{ + u32 ttb; + /* Copy TTBR1 into TTBR0 */ + asm volatile( + " mrc p15, 0, %0, c2, c0, 1 @ read TTBR1\n" + " mcr p15, 0, %0, c2, c0, 0 @ set TTBR0\n" + : "=r" (ttb)); + isb(); +} +#endif + +/* + * We fork()ed a process, and we need a new context for the child + * to run in. + */ +void __init_new_context(struct task_struct *tsk, struct mm_struct *mm) +{ + mm->context.id = 0; + raw_spin_lock_init(&mm->context.id_lock); +} + +static void flush_context(void) +{ + cpu_set_reserved_ttbr0(); + local_flush_tlb_all(); + if (icache_is_vivt_asid_tagged()) { + __flush_icache_all(); + dsb(); + } +} + +#ifdef CONFIG_SMP + +static void set_mm_context(struct mm_struct *mm, unsigned int asid) +{ + unsigned long flags; + + /* + * Locking needed for multi-threaded applications where the + * same mm->context.id could be set from different CPUs during + * the broadcast. This function is also called via IPI so the + * mm->context.id_lock has to be IRQ-safe. + */ + raw_spin_lock_irqsave(&mm->context.id_lock, flags); + if (likely((mm->context.id ^ cpu_last_asid) >> ASID_BITS)) { + /* + * Old version of ASID found. Set the new one and + * reset mm_cpumask(mm). + */ + mm->context.id = asid; + cpumask_clear(mm_cpumask(mm)); + } + raw_spin_unlock_irqrestore(&mm->context.id_lock, flags); + + /* + * Set the mm_cpumask(mm) bit for the current CPU. + */ + cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm)); +} + +/* + * Reset the ASID on the current CPU. This function call is broadcast + * from the CPU handling the ASID rollover and holding cpu_asid_lock. + */ +static void reset_context(void *info) +{ + unsigned int asid; + unsigned int cpu = smp_processor_id(); + struct mm_struct *mm = current->active_mm; + + smp_rmb(); + asid = cpu_last_asid + cpu + 1; + + flush_context(); + set_mm_context(mm, asid); + + /* set the new ASID */ + cpu_switch_mm(mm->pgd, mm); +} + +#else + +static inline void set_mm_context(struct mm_struct *mm, unsigned int asid) +{ + mm->context.id = asid; + cpumask_copy(mm_cpumask(mm), cpumask_of(smp_processor_id())); +} + +#endif + +void __new_context(struct mm_struct *mm) +{ + unsigned int asid; + + raw_spin_lock(&cpu_asid_lock); +#ifdef CONFIG_SMP + /* + * Check the ASID again, in case the change was broadcast from + * another CPU before we acquired the lock. + */ + if (unlikely(((mm->context.id ^ cpu_last_asid) >> ASID_BITS) == 0)) { + cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm)); + raw_spin_unlock(&cpu_asid_lock); + return; + } +#endif + /* + * At this point, it is guaranteed that the current mm (with + * an old ASID) isn't active on any other CPU since the ASIDs + * are changed simultaneously via IPI. + */ + asid = ++cpu_last_asid; + if (asid == 0) + asid = cpu_last_asid = ASID_FIRST_VERSION; + + /* + * If we've used up all our ASIDs, we need + * to start a new version and flush the TLB. + */ + if (unlikely((asid & ~ASID_MASK) == 0)) { + asid = cpu_last_asid + smp_processor_id() + 1; + flush_context(); +#ifdef CONFIG_SMP + smp_wmb(); + smp_call_function(reset_context, NULL, 1); +#endif + cpu_last_asid += NR_CPUS; + } + + set_mm_context(mm, asid); + raw_spin_unlock(&cpu_asid_lock); +} diff --git a/arch/arm/mm/copypage-fa.c b/arch/arm/mm/copypage-fa.c new file mode 100644 index 00000000..d130a5ec --- /dev/null +++ b/arch/arm/mm/copypage-fa.c @@ -0,0 +1,86 @@ +/* + * linux/arch/arm/lib/copypage-fa.S + * + * Copyright (C) 2005 Faraday Corp. + * Copyright (C) 2008-2009 Paulius Zaleckas <paulius.zaleckas@teltonika.lt> + * + * Based on copypage-v4wb.S: + * Copyright (C) 1995-1999 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/init.h> +#include <linux/highmem.h> + +/* + * Faraday optimised copy_user_page + */ +static void __naked +fa_copy_user_page(void *kto, const void *kfrom) +{ + asm("\ + stmfd sp!, {r4, lr} @ 2\n\ + mov r2, %0 @ 1\n\ +1: ldmia r1!, {r3, r4, ip, lr} @ 4\n\ + stmia r0, {r3, r4, ip, lr} @ 4\n\ + mcr p15, 0, r0, c7, c14, 1 @ 1 clean and invalidate D line\n\ + add r0, r0, #16 @ 1\n\ + ldmia r1!, {r3, r4, ip, lr} @ 4\n\ + stmia r0, {r3, r4, ip, lr} @ 4\n\ + mcr p15, 0, r0, c7, c14, 1 @ 1 clean and invalidate D line\n\ + add r0, r0, #16 @ 1\n\ + subs r2, r2, #1 @ 1\n\ + bne 1b @ 1\n\ + mcr p15, 0, r2, c7, c10, 4 @ 1 drain WB\n\ + ldmfd sp!, {r4, pc} @ 3" + : + : "I" (PAGE_SIZE / 32)); +} + +void fa_copy_user_highpage(struct page *to, struct page *from, + unsigned long vaddr, struct vm_area_struct *vma) +{ + void *kto, *kfrom; + + kto = kmap_atomic(to); + kfrom = kmap_atomic(from); + fa_copy_user_page(kto, kfrom); + kunmap_atomic(kfrom); + kunmap_atomic(kto); +} + +/* + * Faraday optimised clear_user_page + * + * Same story as above. + */ +void fa_clear_user_highpage(struct page *page, unsigned long vaddr) +{ + void *ptr, *kaddr = kmap_atomic(page); + asm volatile("\ + mov r1, %2 @ 1\n\ + mov r2, #0 @ 1\n\ + mov r3, #0 @ 1\n\ + mov ip, #0 @ 1\n\ + mov lr, #0 @ 1\n\ +1: stmia %0, {r2, r3, ip, lr} @ 4\n\ + mcr p15, 0, %0, c7, c14, 1 @ 1 clean and invalidate D line\n\ + add %0, %0, #16 @ 1\n\ + stmia %0, {r2, r3, ip, lr} @ 4\n\ + mcr p15, 0, %0, c7, c14, 1 @ 1 clean and invalidate D line\n\ + add %0, %0, #16 @ 1\n\ + subs r1, r1, #1 @ 1\n\ + bne 1b @ 1\n\ + mcr p15, 0, r1, c7, c10, 4 @ 1 drain WB" + : "=r" (ptr) + : "0" (kaddr), "I" (PAGE_SIZE / 32) + : "r1", "r2", "r3", "ip", "lr"); + kunmap_atomic(kaddr); +} + +struct cpu_user_fns fa_user_fns __initdata = { + .cpu_clear_user_highpage = fa_clear_user_highpage, + .cpu_copy_user_highpage = fa_copy_user_highpage, +}; diff --git a/arch/arm/mm/copypage-feroceon.c b/arch/arm/mm/copypage-feroceon.c new file mode 100644 index 00000000..49ee0c1a --- /dev/null +++ b/arch/arm/mm/copypage-feroceon.c @@ -0,0 +1,112 @@ +/* + * linux/arch/arm/mm/copypage-feroceon.S + * + * Copyright (C) 2008 Marvell Semiconductors + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This handles copy_user_highpage and clear_user_page on Feroceon + * more optimally than the generic implementations. + */ +#include <linux/init.h> +#include <linux/highmem.h> + +static void __naked +feroceon_copy_user_page(void *kto, const void *kfrom) +{ + asm("\ + stmfd sp!, {r4-r9, lr} \n\ + mov ip, %2 \n\ +1: mov lr, r1 \n\ + ldmia r1!, {r2 - r9} \n\ + pld [lr, #32] \n\ + pld [lr, #64] \n\ + pld [lr, #96] \n\ + pld [lr, #128] \n\ + pld [lr, #160] \n\ + pld [lr, #192] \n\ + pld [lr, #224] \n\ + stmia r0, {r2 - r9} \n\ + ldmia r1!, {r2 - r9} \n\ + mcr p15, 0, r0, c7, c14, 1 @ clean and invalidate D line\n\ + add r0, r0, #32 \n\ + stmia r0, {r2 - r9} \n\ + ldmia r1!, {r2 - r9} \n\ + mcr p15, 0, r0, c7, c14, 1 @ clean and invalidate D line\n\ + add r0, r0, #32 \n\ + stmia r0, {r2 - r9} \n\ + ldmia r1!, {r2 - r9} \n\ + mcr p15, 0, r0, c7, c14, 1 @ clean and invalidate D line\n\ + add r0, r0, #32 \n\ + stmia r0, {r2 - r9} \n\ + ldmia r1!, {r2 - r9} \n\ + mcr p15, 0, r0, c7, c14, 1 @ clean and invalidate D line\n\ + add r0, r0, #32 \n\ + stmia r0, {r2 - r9} \n\ + ldmia r1!, {r2 - r9} \n\ + mcr p15, 0, r0, c7, c14, 1 @ clean and invalidate D line\n\ + add r0, r0, #32 \n\ + stmia r0, {r2 - r9} \n\ + ldmia r1!, {r2 - r9} \n\ + mcr p15, 0, r0, c7, c14, 1 @ clean and invalidate D line\n\ + add r0, r0, #32 \n\ + stmia r0, {r2 - r9} \n\ + ldmia r1!, {r2 - r9} \n\ + mcr p15, 0, r0, c7, c14, 1 @ clean and invalidate D line\n\ + add r0, r0, #32 \n\ + stmia r0, {r2 - r9} \n\ + subs ip, ip, #(32 * 8) \n\ + mcr p15, 0, r0, c7, c14, 1 @ clean and invalidate D line\n\ + add r0, r0, #32 \n\ + bne 1b \n\ + mcr p15, 0, ip, c7, c10, 4 @ drain WB\n\ + ldmfd sp!, {r4-r9, pc}" + : + : "r" (kto), "r" (kfrom), "I" (PAGE_SIZE)); +} + +void feroceon_copy_user_highpage(struct page *to, struct page *from, + unsigned long vaddr, struct vm_area_struct *vma) +{ + void *kto, *kfrom; + + kto = kmap_atomic(to); + kfrom = kmap_atomic(from); + flush_cache_page(vma, vaddr, page_to_pfn(from)); + feroceon_copy_user_page(kto, kfrom); + kunmap_atomic(kfrom); + kunmap_atomic(kto); +} + +void feroceon_clear_user_highpage(struct page *page, unsigned long vaddr) +{ + void *ptr, *kaddr = kmap_atomic(page); + asm volatile ("\ + mov r1, %2 \n\ + mov r2, #0 \n\ + mov r3, #0 \n\ + mov r4, #0 \n\ + mov r5, #0 \n\ + mov r6, #0 \n\ + mov r7, #0 \n\ + mov ip, #0 \n\ + mov lr, #0 \n\ +1: stmia %0, {r2-r7, ip, lr} \n\ + subs r1, r1, #1 \n\ + mcr p15, 0, %0, c7, c14, 1 @ clean and invalidate D line\n\ + add %0, %0, #32 \n\ + bne 1b \n\ + mcr p15, 0, r1, c7, c10, 4 @ drain WB" + : "=r" (ptr) + : "0" (kaddr), "I" (PAGE_SIZE / 32) + : "r1", "r2", "r3", "r4", "r5", "r6", "r7", "ip", "lr"); + kunmap_atomic(kaddr); +} + +struct cpu_user_fns feroceon_user_fns __initdata = { + .cpu_clear_user_highpage = feroceon_clear_user_highpage, + .cpu_copy_user_highpage = feroceon_copy_user_highpage, +}; + diff --git a/arch/arm/mm/copypage-v3.c b/arch/arm/mm/copypage-v3.c new file mode 100644 index 00000000..3935bddd --- /dev/null +++ b/arch/arm/mm/copypage-v3.c @@ -0,0 +1,81 @@ +/* + * linux/arch/arm/mm/copypage-v3.c + * + * Copyright (C) 1995-1999 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/init.h> +#include <linux/highmem.h> + +/* + * ARMv3 optimised copy_user_highpage + * + * FIXME: do we need to handle cache stuff... + */ +static void __naked +v3_copy_user_page(void *kto, const void *kfrom) +{ + asm("\n\ + stmfd sp!, {r4, lr} @ 2\n\ + mov r2, %2 @ 1\n\ + ldmia %0!, {r3, r4, ip, lr} @ 4+1\n\ +1: stmia %1!, {r3, r4, ip, lr} @ 4\n\ + ldmia %0!, {r3, r4, ip, lr} @ 4+1\n\ + stmia %1!, {r3, r4, ip, lr} @ 4\n\ + ldmia %0!, {r3, r4, ip, lr} @ 4+1\n\ + stmia %1!, {r3, r4, ip, lr} @ 4\n\ + ldmia %0!, {r3, r4, ip, lr} @ 4\n\ + subs r2, r2, #1 @ 1\n\ + stmia %1!, {r3, r4, ip, lr} @ 4\n\ + ldmneia %0!, {r3, r4, ip, lr} @ 4\n\ + bne 1b @ 1\n\ + ldmfd sp!, {r4, pc} @ 3" + : + : "r" (kfrom), "r" (kto), "I" (PAGE_SIZE / 64)); +} + +void v3_copy_user_highpage(struct page *to, struct page *from, + unsigned long vaddr, struct vm_area_struct *vma) +{ + void *kto, *kfrom; + + kto = kmap_atomic(to); + kfrom = kmap_atomic(from); + v3_copy_user_page(kto, kfrom); + kunmap_atomic(kfrom); + kunmap_atomic(kto); +} + +/* + * ARMv3 optimised clear_user_page + * + * FIXME: do we need to handle cache stuff... + */ +void v3_clear_user_highpage(struct page *page, unsigned long vaddr) +{ + void *ptr, *kaddr = kmap_atomic(page); + asm volatile("\n\ + mov r1, %2 @ 1\n\ + mov r2, #0 @ 1\n\ + mov r3, #0 @ 1\n\ + mov ip, #0 @ 1\n\ + mov lr, #0 @ 1\n\ +1: stmia %0!, {r2, r3, ip, lr} @ 4\n\ + stmia %0!, {r2, r3, ip, lr} @ 4\n\ + stmia %0!, {r2, r3, ip, lr} @ 4\n\ + stmia %0!, {r2, r3, ip, lr} @ 4\n\ + subs r1, r1, #1 @ 1\n\ + bne 1b @ 1" + : "=r" (ptr) + : "0" (kaddr), "I" (PAGE_SIZE / 64) + : "r1", "r2", "r3", "ip", "lr"); + kunmap_atomic(kaddr); +} + +struct cpu_user_fns v3_user_fns __initdata = { + .cpu_clear_user_highpage = v3_clear_user_highpage, + .cpu_copy_user_highpage = v3_copy_user_highpage, +}; diff --git a/arch/arm/mm/copypage-v4mc.c b/arch/arm/mm/copypage-v4mc.c new file mode 100644 index 00000000..1267e641 --- /dev/null +++ b/arch/arm/mm/copypage-v4mc.c @@ -0,0 +1,115 @@ +/* + * linux/arch/arm/lib/copypage-armv4mc.S + * + * Copyright (C) 1995-2005 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This handles the mini data cache, as found on SA11x0 and XScale + * processors. When we copy a user page page, we map it in such a way + * that accesses to this page will not touch the main data cache, but + * will be cached in the mini data cache. This prevents us thrashing + * the main data cache on page faults. + */ +#include <linux/init.h> +#include <linux/mm.h> +#include <linux/highmem.h> + +#include <asm/pgtable.h> +#include <asm/tlbflush.h> +#include <asm/cacheflush.h> + +#include "mm.h" + +#define minicache_pgprot __pgprot(L_PTE_PRESENT | L_PTE_YOUNG | \ + L_PTE_MT_MINICACHE) + +static DEFINE_RAW_SPINLOCK(minicache_lock); + +/* + * ARMv4 mini-dcache optimised copy_user_highpage + * + * We flush the destination cache lines just before we write the data into the + * corresponding address. Since the Dcache is read-allocate, this removes the + * Dcache aliasing issue. The writes will be forwarded to the write buffer, + * and merged as appropriate. + * + * Note: We rely on all ARMv4 processors implementing the "invalidate D line" + * instruction. If your processor does not supply this, you have to write your + * own copy_user_highpage that does the right thing. + */ +static void __naked +mc_copy_user_page(void *from, void *to) +{ + asm volatile( + "stmfd sp!, {r4, lr} @ 2\n\ + mov r4, %2 @ 1\n\ + ldmia %0!, {r2, r3, ip, lr} @ 4\n\ +1: mcr p15, 0, %1, c7, c6, 1 @ 1 invalidate D line\n\ + stmia %1!, {r2, r3, ip, lr} @ 4\n\ + ldmia %0!, {r2, r3, ip, lr} @ 4+1\n\ + stmia %1!, {r2, r3, ip, lr} @ 4\n\ + ldmia %0!, {r2, r3, ip, lr} @ 4\n\ + mcr p15, 0, %1, c7, c6, 1 @ 1 invalidate D line\n\ + stmia %1!, {r2, r3, ip, lr} @ 4\n\ + ldmia %0!, {r2, r3, ip, lr} @ 4\n\ + subs r4, r4, #1 @ 1\n\ + stmia %1!, {r2, r3, ip, lr} @ 4\n\ + ldmneia %0!, {r2, r3, ip, lr} @ 4\n\ + bne 1b @ 1\n\ + ldmfd sp!, {r4, pc} @ 3" + : + : "r" (from), "r" (to), "I" (PAGE_SIZE / 64)); +} + +void v4_mc_copy_user_highpage(struct page *to, struct page *from, + unsigned long vaddr, struct vm_area_struct *vma) +{ + void *kto = kmap_atomic(to); + + if (!test_and_set_bit(PG_dcache_clean, &from->flags)) + __flush_dcache_page(page_mapping(from), from); + + raw_spin_lock(&minicache_lock); + + set_top_pte(COPYPAGE_MINICACHE, mk_pte(from, minicache_pgprot)); + + mc_copy_user_page((void *)COPYPAGE_MINICACHE, kto); + + raw_spin_unlock(&minicache_lock); + + kunmap_atomic(kto); +} + +/* + * ARMv4 optimised clear_user_page + */ +void v4_mc_clear_user_highpage(struct page *page, unsigned long vaddr) +{ + void *ptr, *kaddr = kmap_atomic(page); + asm volatile("\ + mov r1, %2 @ 1\n\ + mov r2, #0 @ 1\n\ + mov r3, #0 @ 1\n\ + mov ip, #0 @ 1\n\ + mov lr, #0 @ 1\n\ +1: mcr p15, 0, %0, c7, c6, 1 @ 1 invalidate D line\n\ + stmia %0!, {r2, r3, ip, lr} @ 4\n\ + stmia %0!, {r2, r3, ip, lr} @ 4\n\ + mcr p15, 0, %0, c7, c6, 1 @ 1 invalidate D line\n\ + stmia %0!, {r2, r3, ip, lr} @ 4\n\ + stmia %0!, {r2, r3, ip, lr} @ 4\n\ + subs r1, r1, #1 @ 1\n\ + bne 1b @ 1" + : "=r" (ptr) + : "0" (kaddr), "I" (PAGE_SIZE / 64) + : "r1", "r2", "r3", "ip", "lr"); + kunmap_atomic(kaddr); +} + +struct cpu_user_fns v4_mc_user_fns __initdata = { + .cpu_clear_user_highpage = v4_mc_clear_user_highpage, + .cpu_copy_user_highpage = v4_mc_copy_user_highpage, +}; diff --git a/arch/arm/mm/copypage-v4wb.c b/arch/arm/mm/copypage-v4wb.c new file mode 100644 index 00000000..067d0fdd --- /dev/null +++ b/arch/arm/mm/copypage-v4wb.c @@ -0,0 +1,95 @@ +/* + * linux/arch/arm/mm/copypage-v4wb.c + * + * Copyright (C) 1995-1999 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/init.h> +#include <linux/highmem.h> + +/* + * ARMv4 optimised copy_user_highpage + * + * We flush the destination cache lines just before we write the data into the + * corresponding address. Since the Dcache is read-allocate, this removes the + * Dcache aliasing issue. The writes will be forwarded to the write buffer, + * and merged as appropriate. + * + * Note: We rely on all ARMv4 processors implementing the "invalidate D line" + * instruction. If your processor does not supply this, you have to write your + * own copy_user_highpage that does the right thing. + */ +static void __naked +v4wb_copy_user_page(void *kto, const void *kfrom) +{ + asm("\ + stmfd sp!, {r4, lr} @ 2\n\ + mov r2, %2 @ 1\n\ + ldmia r1!, {r3, r4, ip, lr} @ 4\n\ +1: mcr p15, 0, r0, c7, c6, 1 @ 1 invalidate D line\n\ + stmia r0!, {r3, r4, ip, lr} @ 4\n\ + ldmia r1!, {r3, r4, ip, lr} @ 4+1\n\ + stmia r0!, {r3, r4, ip, lr} @ 4\n\ + ldmia r1!, {r3, r4, ip, lr} @ 4\n\ + mcr p15, 0, r0, c7, c6, 1 @ 1 invalidate D line\n\ + stmia r0!, {r3, r4, ip, lr} @ 4\n\ + ldmia r1!, {r3, r4, ip, lr} @ 4\n\ + subs r2, r2, #1 @ 1\n\ + stmia r0!, {r3, r4, ip, lr} @ 4\n\ + ldmneia r1!, {r3, r4, ip, lr} @ 4\n\ + bne 1b @ 1\n\ + mcr p15, 0, r1, c7, c10, 4 @ 1 drain WB\n\ + ldmfd sp!, {r4, pc} @ 3" + : + : "r" (kto), "r" (kfrom), "I" (PAGE_SIZE / 64)); +} + +void v4wb_copy_user_highpage(struct page *to, struct page *from, + unsigned long vaddr, struct vm_area_struct *vma) +{ + void *kto, *kfrom; + + kto = kmap_atomic(to); + kfrom = kmap_atomic(from); + flush_cache_page(vma, vaddr, page_to_pfn(from)); + v4wb_copy_user_page(kto, kfrom); + kunmap_atomic(kfrom); + kunmap_atomic(kto); +} + +/* + * ARMv4 optimised clear_user_page + * + * Same story as above. + */ +void v4wb_clear_user_highpage(struct page *page, unsigned long vaddr) +{ + void *ptr, *kaddr = kmap_atomic(page); + asm volatile("\ + mov r1, %2 @ 1\n\ + mov r2, #0 @ 1\n\ + mov r3, #0 @ 1\n\ + mov ip, #0 @ 1\n\ + mov lr, #0 @ 1\n\ +1: mcr p15, 0, %0, c7, c6, 1 @ 1 invalidate D line\n\ + stmia %0!, {r2, r3, ip, lr} @ 4\n\ + stmia %0!, {r2, r3, ip, lr} @ 4\n\ + mcr p15, 0, %0, c7, c6, 1 @ 1 invalidate D line\n\ + stmia %0!, {r2, r3, ip, lr} @ 4\n\ + stmia %0!, {r2, r3, ip, lr} @ 4\n\ + subs r1, r1, #1 @ 1\n\ + bne 1b @ 1\n\ + mcr p15, 0, r1, c7, c10, 4 @ 1 drain WB" + : "=r" (ptr) + : "0" (kaddr), "I" (PAGE_SIZE / 64) + : "r1", "r2", "r3", "ip", "lr"); + kunmap_atomic(kaddr); +} + +struct cpu_user_fns v4wb_user_fns __initdata = { + .cpu_clear_user_highpage = v4wb_clear_user_highpage, + .cpu_copy_user_highpage = v4wb_copy_user_highpage, +}; diff --git a/arch/arm/mm/copypage-v4wt.c b/arch/arm/mm/copypage-v4wt.c new file mode 100644 index 00000000..b85c5da2 --- /dev/null +++ b/arch/arm/mm/copypage-v4wt.c @@ -0,0 +1,88 @@ +/* + * linux/arch/arm/mm/copypage-v4wt.S + * + * Copyright (C) 1995-1999 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This is for CPUs with a writethrough cache and 'flush ID cache' is + * the only supported cache operation. + */ +#include <linux/init.h> +#include <linux/highmem.h> + +/* + * ARMv4 optimised copy_user_highpage + * + * Since we have writethrough caches, we don't have to worry about + * dirty data in the cache. However, we do have to ensure that + * subsequent reads are up to date. + */ +static void __naked +v4wt_copy_user_page(void *kto, const void *kfrom) +{ + asm("\ + stmfd sp!, {r4, lr} @ 2\n\ + mov r2, %2 @ 1\n\ + ldmia r1!, {r3, r4, ip, lr} @ 4\n\ +1: stmia r0!, {r3, r4, ip, lr} @ 4\n\ + ldmia r1!, {r3, r4, ip, lr} @ 4+1\n\ + stmia r0!, {r3, r4, ip, lr} @ 4\n\ + ldmia r1!, {r3, r4, ip, lr} @ 4\n\ + stmia r0!, {r3, r4, ip, lr} @ 4\n\ + ldmia r1!, {r3, r4, ip, lr} @ 4\n\ + subs r2, r2, #1 @ 1\n\ + stmia r0!, {r3, r4, ip, lr} @ 4\n\ + ldmneia r1!, {r3, r4, ip, lr} @ 4\n\ + bne 1b @ 1\n\ + mcr p15, 0, r2, c7, c7, 0 @ flush ID cache\n\ + ldmfd sp!, {r4, pc} @ 3" + : + : "r" (kto), "r" (kfrom), "I" (PAGE_SIZE / 64)); +} + +void v4wt_copy_user_highpage(struct page *to, struct page *from, + unsigned long vaddr, struct vm_area_struct *vma) +{ + void *kto, *kfrom; + + kto = kmap_atomic(to); + kfrom = kmap_atomic(from); + v4wt_copy_user_page(kto, kfrom); + kunmap_atomic(kfrom); + kunmap_atomic(kto); +} + +/* + * ARMv4 optimised clear_user_page + * + * Same story as above. + */ +void v4wt_clear_user_highpage(struct page *page, unsigned long vaddr) +{ + void *ptr, *kaddr = kmap_atomic(page); + asm volatile("\ + mov r1, %2 @ 1\n\ + mov r2, #0 @ 1\n\ + mov r3, #0 @ 1\n\ + mov ip, #0 @ 1\n\ + mov lr, #0 @ 1\n\ +1: stmia %0!, {r2, r3, ip, lr} @ 4\n\ + stmia %0!, {r2, r3, ip, lr} @ 4\n\ + stmia %0!, {r2, r3, ip, lr} @ 4\n\ + stmia %0!, {r2, r3, ip, lr} @ 4\n\ + subs r1, r1, #1 @ 1\n\ + bne 1b @ 1\n\ + mcr p15, 0, r2, c7, c7, 0 @ flush ID cache" + : "=r" (ptr) + : "0" (kaddr), "I" (PAGE_SIZE / 64) + : "r1", "r2", "r3", "ip", "lr"); + kunmap_atomic(kaddr); +} + +struct cpu_user_fns v4wt_user_fns __initdata = { + .cpu_clear_user_highpage = v4wt_clear_user_highpage, + .cpu_copy_user_highpage = v4wt_copy_user_highpage, +}; diff --git a/arch/arm/mm/copypage-v6.c b/arch/arm/mm/copypage-v6.c new file mode 100644 index 00000000..b9bcc9d7 --- /dev/null +++ b/arch/arm/mm/copypage-v6.c @@ -0,0 +1,140 @@ +/* + * linux/arch/arm/mm/copypage-v6.c + * + * Copyright (C) 2002 Deep Blue Solutions Ltd, All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/init.h> +#include <linux/spinlock.h> +#include <linux/mm.h> +#include <linux/highmem.h> + +#include <asm/pgtable.h> +#include <asm/shmparam.h> +#include <asm/tlbflush.h> +#include <asm/cacheflush.h> +#include <asm/cachetype.h> + +#include "mm.h" + +#if SHMLBA > 16384 +#error FIX ME +#endif + +static DEFINE_RAW_SPINLOCK(v6_lock); + +/* + * Copy the user page. No aliasing to deal with so we can just + * attack the kernel's existing mapping of these pages. + */ +static void v6_copy_user_highpage_nonaliasing(struct page *to, + struct page *from, unsigned long vaddr, struct vm_area_struct *vma) +{ + void *kto, *kfrom; + + kfrom = kmap_atomic(from); + kto = kmap_atomic(to); + copy_page(kto, kfrom); + kunmap_atomic(kto); + kunmap_atomic(kfrom); +} + +/* + * Clear the user page. No aliasing to deal with so we can just + * attack the kernel's existing mapping of this page. + */ +static void v6_clear_user_highpage_nonaliasing(struct page *page, unsigned long vaddr) +{ + void *kaddr = kmap_atomic(page); + clear_page(kaddr); + kunmap_atomic(kaddr); +} + +/* + * Discard data in the kernel mapping for the new page. + * FIXME: needs this MCRR to be supported. + */ +static void discard_old_kernel_data(void *kto) +{ + __asm__("mcrr p15, 0, %1, %0, c6 @ 0xec401f06" + : + : "r" (kto), + "r" ((unsigned long)kto + PAGE_SIZE - L1_CACHE_BYTES) + : "cc"); +} + +/* + * Copy the page, taking account of the cache colour. + */ +static void v6_copy_user_highpage_aliasing(struct page *to, + struct page *from, unsigned long vaddr, struct vm_area_struct *vma) +{ + unsigned int offset = CACHE_COLOUR(vaddr); + unsigned long kfrom, kto; + + if (!test_and_set_bit(PG_dcache_clean, &from->flags)) + __flush_dcache_page(page_mapping(from), from); + + /* FIXME: not highmem safe */ + discard_old_kernel_data(page_address(to)); + + /* + * Now copy the page using the same cache colour as the + * pages ultimate destination. + */ + raw_spin_lock(&v6_lock); + + kfrom = COPYPAGE_V6_FROM + (offset << PAGE_SHIFT); + kto = COPYPAGE_V6_TO + (offset << PAGE_SHIFT); + + set_top_pte(kfrom, mk_pte(from, PAGE_KERNEL)); + set_top_pte(kto, mk_pte(to, PAGE_KERNEL)); + + copy_page((void *)kto, (void *)kfrom); + + raw_spin_unlock(&v6_lock); +} + +/* + * Clear the user page. We need to deal with the aliasing issues, + * so remap the kernel page into the same cache colour as the user + * page. + */ +static void v6_clear_user_highpage_aliasing(struct page *page, unsigned long vaddr) +{ + unsigned long to = COPYPAGE_V6_TO + (CACHE_COLOUR(vaddr) << PAGE_SHIFT); + + /* FIXME: not highmem safe */ + discard_old_kernel_data(page_address(page)); + + /* + * Now clear the page using the same cache colour as + * the pages ultimate destination. + */ + raw_spin_lock(&v6_lock); + + set_top_pte(to, mk_pte(page, PAGE_KERNEL)); + clear_page((void *)to); + + raw_spin_unlock(&v6_lock); +} + +struct cpu_user_fns v6_user_fns __initdata = { + .cpu_clear_user_highpage = v6_clear_user_highpage_nonaliasing, + .cpu_copy_user_highpage = v6_copy_user_highpage_nonaliasing, +}; + +static int __init v6_userpage_init(void) +{ + if (cache_is_vipt_aliasing()) { + cpu_user.cpu_clear_user_highpage = v6_clear_user_highpage_aliasing; + cpu_user.cpu_copy_user_highpage = v6_copy_user_highpage_aliasing; + } + + return 0; +} + +core_initcall(v6_userpage_init); diff --git a/arch/arm/mm/copypage-xsc3.c b/arch/arm/mm/copypage-xsc3.c new file mode 100644 index 00000000..03a2042a --- /dev/null +++ b/arch/arm/mm/copypage-xsc3.c @@ -0,0 +1,114 @@ +/* + * linux/arch/arm/mm/copypage-xsc3.S + * + * Copyright (C) 2004 Intel Corp. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Adapted for 3rd gen XScale core, no more mini-dcache + * Author: Matt Gilbert (matthew.m.gilbert@intel.com) + */ +#include <linux/init.h> +#include <linux/highmem.h> + +/* + * General note: + * We don't really want write-allocate cache behaviour for these functions + * since that will just eat through 8K of the cache. + */ + +/* + * XSC3 optimised copy_user_highpage + * r0 = destination + * r1 = source + * + * The source page may have some clean entries in the cache already, but we + * can safely ignore them - break_cow() will flush them out of the cache + * if we eventually end up using our copied page. + * + */ +static void __naked +xsc3_mc_copy_user_page(void *kto, const void *kfrom) +{ + asm("\ + stmfd sp!, {r4, r5, lr} \n\ + mov lr, %2 \n\ + \n\ + pld [r1, #0] \n\ + pld [r1, #32] \n\ +1: pld [r1, #64] \n\ + pld [r1, #96] \n\ + \n\ +2: ldrd r2, [r1], #8 \n\ + mov ip, r0 \n\ + ldrd r4, [r1], #8 \n\ + mcr p15, 0, ip, c7, c6, 1 @ invalidate\n\ + strd r2, [r0], #8 \n\ + ldrd r2, [r1], #8 \n\ + strd r4, [r0], #8 \n\ + ldrd r4, [r1], #8 \n\ + strd r2, [r0], #8 \n\ + strd r4, [r0], #8 \n\ + ldrd r2, [r1], #8 \n\ + mov ip, r0 \n\ + ldrd r4, [r1], #8 \n\ + mcr p15, 0, ip, c7, c6, 1 @ invalidate\n\ + strd r2, [r0], #8 \n\ + ldrd r2, [r1], #8 \n\ + subs lr, lr, #1 \n\ + strd r4, [r0], #8 \n\ + ldrd r4, [r1], #8 \n\ + strd r2, [r0], #8 \n\ + strd r4, [r0], #8 \n\ + bgt 1b \n\ + beq 2b \n\ + \n\ + ldmfd sp!, {r4, r5, pc}" + : + : "r" (kto), "r" (kfrom), "I" (PAGE_SIZE / 64 - 1)); +} + +void xsc3_mc_copy_user_highpage(struct page *to, struct page *from, + unsigned long vaddr, struct vm_area_struct *vma) +{ + void *kto, *kfrom; + + kto = kmap_atomic(to); + kfrom = kmap_atomic(from); + flush_cache_page(vma, vaddr, page_to_pfn(from)); + xsc3_mc_copy_user_page(kto, kfrom); + kunmap_atomic(kfrom); + kunmap_atomic(kto); +} + +/* + * XScale optimised clear_user_page + * r0 = destination + * r1 = virtual user address of ultimate destination page + */ +void xsc3_mc_clear_user_highpage(struct page *page, unsigned long vaddr) +{ + void *ptr, *kaddr = kmap_atomic(page); + asm volatile ("\ + mov r1, %2 \n\ + mov r2, #0 \n\ + mov r3, #0 \n\ +1: mcr p15, 0, %0, c7, c6, 1 @ invalidate line\n\ + strd r2, [%0], #8 \n\ + strd r2, [%0], #8 \n\ + strd r2, [%0], #8 \n\ + strd r2, [%0], #8 \n\ + subs r1, r1, #1 \n\ + bne 1b" + : "=r" (ptr) + : "0" (kaddr), "I" (PAGE_SIZE / 32) + : "r1", "r2", "r3"); + kunmap_atomic(kaddr); +} + +struct cpu_user_fns xsc3_mc_user_fns __initdata = { + .cpu_clear_user_highpage = xsc3_mc_clear_user_highpage, + .cpu_copy_user_highpage = xsc3_mc_copy_user_highpage, +}; diff --git a/arch/arm/mm/copypage-xscale.c b/arch/arm/mm/copypage-xscale.c new file mode 100644 index 00000000..0fb85025 --- /dev/null +++ b/arch/arm/mm/copypage-xscale.c @@ -0,0 +1,135 @@ +/* + * linux/arch/arm/lib/copypage-xscale.S + * + * Copyright (C) 1995-2005 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This handles the mini data cache, as found on SA11x0 and XScale + * processors. When we copy a user page page, we map it in such a way + * that accesses to this page will not touch the main data cache, but + * will be cached in the mini data cache. This prevents us thrashing + * the main data cache on page faults. + */ +#include <linux/init.h> +#include <linux/mm.h> +#include <linux/highmem.h> + +#include <asm/pgtable.h> +#include <asm/tlbflush.h> +#include <asm/cacheflush.h> + +#include "mm.h" + +#define minicache_pgprot __pgprot(L_PTE_PRESENT | L_PTE_YOUNG | \ + L_PTE_MT_MINICACHE) + +static DEFINE_RAW_SPINLOCK(minicache_lock); + +/* + * XScale mini-dcache optimised copy_user_highpage + * + * We flush the destination cache lines just before we write the data into the + * corresponding address. Since the Dcache is read-allocate, this removes the + * Dcache aliasing issue. The writes will be forwarded to the write buffer, + * and merged as appropriate. + */ +static void __naked +mc_copy_user_page(void *from, void *to) +{ + /* + * Strangely enough, best performance is achieved + * when prefetching destination as well. (NP) + */ + asm volatile( + "stmfd sp!, {r4, r5, lr} \n\ + mov lr, %2 \n\ + pld [r0, #0] \n\ + pld [r0, #32] \n\ + pld [r1, #0] \n\ + pld [r1, #32] \n\ +1: pld [r0, #64] \n\ + pld [r0, #96] \n\ + pld [r1, #64] \n\ + pld [r1, #96] \n\ +2: ldrd r2, [r0], #8 \n\ + ldrd r4, [r0], #8 \n\ + mov ip, r1 \n\ + strd r2, [r1], #8 \n\ + ldrd r2, [r0], #8 \n\ + strd r4, [r1], #8 \n\ + ldrd r4, [r0], #8 \n\ + strd r2, [r1], #8 \n\ + strd r4, [r1], #8 \n\ + mcr p15, 0, ip, c7, c10, 1 @ clean D line\n\ + ldrd r2, [r0], #8 \n\ + mcr p15, 0, ip, c7, c6, 1 @ invalidate D line\n\ + ldrd r4, [r0], #8 \n\ + mov ip, r1 \n\ + strd r2, [r1], #8 \n\ + ldrd r2, [r0], #8 \n\ + strd r4, [r1], #8 \n\ + ldrd r4, [r0], #8 \n\ + strd r2, [r1], #8 \n\ + strd r4, [r1], #8 \n\ + mcr p15, 0, ip, c7, c10, 1 @ clean D line\n\ + subs lr, lr, #1 \n\ + mcr p15, 0, ip, c7, c6, 1 @ invalidate D line\n\ + bgt 1b \n\ + beq 2b \n\ + ldmfd sp!, {r4, r5, pc} " + : + : "r" (from), "r" (to), "I" (PAGE_SIZE / 64 - 1)); +} + +void xscale_mc_copy_user_highpage(struct page *to, struct page *from, + unsigned long vaddr, struct vm_area_struct *vma) +{ + void *kto = kmap_atomic(to); + + if (!test_and_set_bit(PG_dcache_clean, &from->flags)) + __flush_dcache_page(page_mapping(from), from); + + raw_spin_lock(&minicache_lock); + + set_top_pte(COPYPAGE_MINICACHE, mk_pte(from, minicache_pgprot)); + + mc_copy_user_page((void *)COPYPAGE_MINICACHE, kto); + + raw_spin_unlock(&minicache_lock); + + kunmap_atomic(kto); +} + +/* + * XScale optimised clear_user_page + */ +void +xscale_mc_clear_user_highpage(struct page *page, unsigned long vaddr) +{ + void *ptr, *kaddr = kmap_atomic(page); + asm volatile( + "mov r1, %2 \n\ + mov r2, #0 \n\ + mov r3, #0 \n\ +1: mov ip, %0 \n\ + strd r2, [%0], #8 \n\ + strd r2, [%0], #8 \n\ + strd r2, [%0], #8 \n\ + strd r2, [%0], #8 \n\ + mcr p15, 0, ip, c7, c10, 1 @ clean D line\n\ + subs r1, r1, #1 \n\ + mcr p15, 0, ip, c7, c6, 1 @ invalidate D line\n\ + bne 1b" + : "=r" (ptr) + : "0" (kaddr), "I" (PAGE_SIZE / 32) + : "r1", "r2", "r3", "ip"); + kunmap_atomic(kaddr); +} + +struct cpu_user_fns xscale_mc_user_fns __initdata = { + .cpu_clear_user_highpage = xscale_mc_clear_user_highpage, + .cpu_copy_user_highpage = xscale_mc_copy_user_highpage, +}; diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c new file mode 100644 index 00000000..cb5821be --- /dev/null +++ b/arch/arm/mm/dma-mapping.c @@ -0,0 +1,735 @@ +/* + * linux/arch/arm/mm/dma-mapping.c + * + * Copyright (C) 2000-2004 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * DMA uncached mapping support. + */ +#include <linux/module.h> +#include <linux/mm.h> +#include <linux/gfp.h> +#include <linux/errno.h> +#include <linux/list.h> +#include <linux/init.h> +#include <linux/device.h> +#include <linux/dma-mapping.h> +#include <linux/highmem.h> +#include <linux/slab.h> + +#include <asm/memory.h> +#include <asm/highmem.h> +#include <asm/cacheflush.h> +#include <asm/tlbflush.h> +#include <asm/sizes.h> +#include <asm/mach/arch.h> + +#include "mm.h" + +static u64 get_coherent_dma_mask(struct device *dev) +{ + u64 mask = (u64)arm_dma_limit; + + if (dev) { + mask = dev->coherent_dma_mask; + + /* + * Sanity check the DMA mask - it must be non-zero, and + * must be able to be satisfied by a DMA allocation. + */ + if (mask == 0) { + dev_warn(dev, "coherent DMA mask is unset\n"); + return 0; + } + + if ((~mask) & (u64)arm_dma_limit) { + dev_warn(dev, "coherent DMA mask %#llx is smaller " + "than system GFP_DMA mask %#llx\n", + mask, (u64)arm_dma_limit); + return 0; + } + } + + return mask; +} + +/* + * Allocate a DMA buffer for 'dev' of size 'size' using the + * specified gfp mask. Note that 'size' must be page aligned. + */ +static struct page *__dma_alloc_buffer(struct device *dev, size_t size, gfp_t gfp) +{ + unsigned long order = get_order(size); + struct page *page, *p, *e; + void *ptr; + u64 mask = get_coherent_dma_mask(dev); + +#ifdef CONFIG_DMA_API_DEBUG + u64 limit = (mask + 1) & ~mask; + if (limit && size >= limit) { + dev_warn(dev, "coherent allocation too big (requested %#x mask %#llx)\n", + size, mask); + return NULL; + } +#endif + + if (!mask) + return NULL; + + if (mask < 0xffffffffULL) + gfp |= GFP_DMA; + + page = alloc_pages(gfp, order); + if (!page) + return NULL; + + /* + * Now split the huge page and free the excess pages + */ + split_page(page, order); + for (p = page + (size >> PAGE_SHIFT), e = page + (1 << order); p < e; p++) + __free_page(p); + + /* + * Ensure that the allocated pages are zeroed, and that any data + * lurking in the kernel direct-mapped region is invalidated. + */ + ptr = page_address(page); + memset(ptr, 0, size); + dmac_flush_range(ptr, ptr + size); + outer_flush_range(__pa(ptr), __pa(ptr) + size); + + return page; +} + +/* + * Free a DMA buffer. 'size' must be page aligned. + */ +static void __dma_free_buffer(struct page *page, size_t size) +{ + struct page *e = page + (size >> PAGE_SHIFT); + + while (page < e) { + __free_page(page); + page++; + } +} + +#ifdef CONFIG_MMU + +#define CONSISTENT_OFFSET(x) (((unsigned long)(x) - consistent_base) >> PAGE_SHIFT) +#define CONSISTENT_PTE_INDEX(x) (((unsigned long)(x) - consistent_base) >> PMD_SHIFT) + +/* + * These are the page tables (2MB each) covering uncached, DMA consistent allocations + */ +static pte_t **consistent_pte; + +#define DEFAULT_CONSISTENT_DMA_SIZE SZ_4M + +unsigned long consistent_base = CONSISTENT_END - DEFAULT_CONSISTENT_DMA_SIZE; + +void __init init_consistent_dma_size(unsigned long size) +{ + unsigned long base = CONSISTENT_END - ALIGN(size, SZ_2M); + + BUG_ON(consistent_pte); /* Check we're called before DMA region init */ + BUG_ON(base < VMALLOC_END); + + /* Grow region to accommodate specified size */ + if (base < consistent_base) + consistent_base = base; +} + +#include "vmregion.h" + +static struct arm_vmregion_head consistent_head = { + .vm_lock = __SPIN_LOCK_UNLOCKED(&consistent_head.vm_lock), + .vm_list = LIST_HEAD_INIT(consistent_head.vm_list), + .vm_end = CONSISTENT_END, +}; + +#ifdef CONFIG_HUGETLB_PAGE +#error ARM Coherent DMA allocator does not (yet) support huge TLB +#endif + +/* + * Initialise the consistent memory allocation. + */ +static int __init consistent_init(void) +{ + int ret = 0; + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + pte_t *pte; + int i = 0; + unsigned long base = consistent_base; + unsigned long num_ptes = (CONSISTENT_END - base) >> PMD_SHIFT; + + consistent_pte = kmalloc(num_ptes * sizeof(pte_t), GFP_KERNEL); + if (!consistent_pte) { + pr_err("%s: no memory\n", __func__); + return -ENOMEM; + } + + pr_debug("DMA memory: 0x%08lx - 0x%08lx:\n", base, CONSISTENT_END); + consistent_head.vm_start = base; + + do { + pgd = pgd_offset(&init_mm, base); + + pud = pud_alloc(&init_mm, pgd, base); + if (!pud) { + printk(KERN_ERR "%s: no pud tables\n", __func__); + ret = -ENOMEM; + break; + } + + pmd = pmd_alloc(&init_mm, pud, base); + if (!pmd) { + printk(KERN_ERR "%s: no pmd tables\n", __func__); + ret = -ENOMEM; + break; + } + WARN_ON(!pmd_none(*pmd)); + + pte = pte_alloc_kernel(pmd, base); + if (!pte) { + printk(KERN_ERR "%s: no pte tables\n", __func__); + ret = -ENOMEM; + break; + } + + consistent_pte[i++] = pte; + base += PMD_SIZE; + } while (base < CONSISTENT_END); + + return ret; +} + +core_initcall(consistent_init); + +static void * +__dma_alloc_remap(struct page *page, size_t size, gfp_t gfp, pgprot_t prot, + const void *caller) +{ + struct arm_vmregion *c; + size_t align; + int bit; + + if (!consistent_pte) { + printk(KERN_ERR "%s: not initialised\n", __func__); + dump_stack(); + return NULL; + } + + /* + * Align the virtual region allocation - maximum alignment is + * a section size, minimum is a page size. This helps reduce + * fragmentation of the DMA space, and also prevents allocations + * smaller than a section from crossing a section boundary. + */ + bit = fls(size - 1); + if (bit > SECTION_SHIFT) + bit = SECTION_SHIFT; + align = 1 << bit; + + /* + * Allocate a virtual address in the consistent mapping region. + */ + c = arm_vmregion_alloc(&consistent_head, align, size, + gfp & ~(__GFP_DMA | __GFP_HIGHMEM), caller); + if (c) { + pte_t *pte; + int idx = CONSISTENT_PTE_INDEX(c->vm_start); + u32 off = CONSISTENT_OFFSET(c->vm_start) & (PTRS_PER_PTE-1); + + pte = consistent_pte[idx] + off; + c->vm_pages = page; + + do { + BUG_ON(!pte_none(*pte)); + + set_pte_ext(pte, mk_pte(page, prot), 0); + page++; + pte++; + off++; + if (off >= PTRS_PER_PTE) { + off = 0; + pte = consistent_pte[++idx]; + } + } while (size -= PAGE_SIZE); + + dsb(); + + return (void *)c->vm_start; + } + return NULL; +} + +static void __dma_free_remap(void *cpu_addr, size_t size) +{ + struct arm_vmregion *c; + unsigned long addr; + pte_t *ptep; + int idx; + u32 off; + + c = arm_vmregion_find_remove(&consistent_head, (unsigned long)cpu_addr); + if (!c) { + printk(KERN_ERR "%s: trying to free invalid coherent area: %p\n", + __func__, cpu_addr); + dump_stack(); + return; + } + + if ((c->vm_end - c->vm_start) != size) { + printk(KERN_ERR "%s: freeing wrong coherent size (%ld != %d)\n", + __func__, c->vm_end - c->vm_start, size); + dump_stack(); + size = c->vm_end - c->vm_start; + } + + idx = CONSISTENT_PTE_INDEX(c->vm_start); + off = CONSISTENT_OFFSET(c->vm_start) & (PTRS_PER_PTE-1); + ptep = consistent_pte[idx] + off; + addr = c->vm_start; + do { + pte_t pte = ptep_get_and_clear(&init_mm, addr, ptep); + + ptep++; + addr += PAGE_SIZE; + off++; + if (off >= PTRS_PER_PTE) { + off = 0; + ptep = consistent_pte[++idx]; + } + + if (pte_none(pte) || !pte_present(pte)) + printk(KERN_CRIT "%s: bad page in kernel page table\n", + __func__); + } while (size -= PAGE_SIZE); + + flush_tlb_kernel_range(c->vm_start, c->vm_end); + + arm_vmregion_free(&consistent_head, c); +} + +#else /* !CONFIG_MMU */ + +#define __dma_alloc_remap(page, size, gfp, prot, c) page_address(page) +#define __dma_free_remap(addr, size) do { } while (0) + +#endif /* CONFIG_MMU */ + +static void * +__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp, + pgprot_t prot, const void *caller) +{ + struct page *page; + void *addr; + + /* + * Following is a work-around (a.k.a. hack) to prevent pages + * with __GFP_COMP being passed to split_page() which cannot + * handle them. The real problem is that this flag probably + * should be 0 on ARM as it is not supported on this + * platform; see CONFIG_HUGETLBFS. + */ + gfp &= ~(__GFP_COMP); + + *handle = ~0; + size = PAGE_ALIGN(size); + + page = __dma_alloc_buffer(dev, size, gfp); + if (!page) + return NULL; + + if (!arch_is_coherent()) + addr = __dma_alloc_remap(page, size, gfp, prot, caller); + else + addr = page_address(page); + + if (addr) + *handle = pfn_to_dma(dev, page_to_pfn(page)); + else + __dma_free_buffer(page, size); + + return addr; +} + +/* + * Allocate DMA-coherent memory space and return both the kernel remapped + * virtual and bus address for that space. + */ +void * +dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp) +{ + void *memory; + + if (dma_alloc_from_coherent(dev, size, handle, &memory)) + return memory; + + return __dma_alloc(dev, size, handle, gfp, + pgprot_dmacoherent(pgprot_kernel), + __builtin_return_address(0)); +} +EXPORT_SYMBOL(dma_alloc_coherent); + +/* + * Allocate a writecombining region, in much the same way as + * dma_alloc_coherent above. + */ +void * +dma_alloc_writecombine(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp) +{ + return __dma_alloc(dev, size, handle, gfp, + pgprot_writecombine(pgprot_kernel), + __builtin_return_address(0)); +} +EXPORT_SYMBOL(dma_alloc_writecombine); + +static int dma_mmap(struct device *dev, struct vm_area_struct *vma, + void *cpu_addr, dma_addr_t dma_addr, size_t size) +{ + int ret = -ENXIO; +#ifdef CONFIG_MMU + unsigned long user_size, kern_size; + struct arm_vmregion *c; + + user_size = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; + + c = arm_vmregion_find(&consistent_head, (unsigned long)cpu_addr); + if (c) { + unsigned long off = vma->vm_pgoff; + + kern_size = (c->vm_end - c->vm_start) >> PAGE_SHIFT; + + if (off < kern_size && + user_size <= (kern_size - off)) { + ret = remap_pfn_range(vma, vma->vm_start, + page_to_pfn(c->vm_pages) + off, + user_size << PAGE_SHIFT, + vma->vm_page_prot); + } + } +#endif /* CONFIG_MMU */ + + return ret; +} + +int dma_mmap_coherent(struct device *dev, struct vm_area_struct *vma, + void *cpu_addr, dma_addr_t dma_addr, size_t size) +{ + vma->vm_page_prot = pgprot_dmacoherent(vma->vm_page_prot); + return dma_mmap(dev, vma, cpu_addr, dma_addr, size); +} +EXPORT_SYMBOL(dma_mmap_coherent); + +int dma_mmap_writecombine(struct device *dev, struct vm_area_struct *vma, + void *cpu_addr, dma_addr_t dma_addr, size_t size) +{ + vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); + return dma_mmap(dev, vma, cpu_addr, dma_addr, size); +} +EXPORT_SYMBOL(dma_mmap_writecombine); + +/* + * free a page as defined by the above mapping. + * Must not be called with IRQs disabled. + */ +void dma_free_coherent(struct device *dev, size_t size, void *cpu_addr, dma_addr_t handle) +{ + WARN_ON(irqs_disabled()); + + if (dma_release_from_coherent(dev, get_order(size), cpu_addr)) + return; + + size = PAGE_ALIGN(size); + + if (!arch_is_coherent()) + __dma_free_remap(cpu_addr, size); + + __dma_free_buffer(pfn_to_page(dma_to_pfn(dev, handle)), size); +} +EXPORT_SYMBOL(dma_free_coherent); + +/* + * Make an area consistent for devices. + * Note: Drivers should NOT use this function directly, as it will break + * platforms with CONFIG_DMABOUNCE. + * Use the driver DMA support - see dma-mapping.h (dma_sync_*) + */ +void ___dma_single_cpu_to_dev(const void *kaddr, size_t size, + enum dma_data_direction dir) +{ + unsigned long paddr; + + BUG_ON(!virt_addr_valid(kaddr) || !virt_addr_valid(kaddr + size - 1)); + + dmac_map_area(kaddr, size, dir); + + paddr = __pa(kaddr); + if (dir == DMA_FROM_DEVICE) { + outer_inv_range(paddr, paddr + size); + } else { + outer_clean_range(paddr, paddr + size); + } + /* FIXME: non-speculating: flush on bidirectional mappings? */ +} +EXPORT_SYMBOL(___dma_single_cpu_to_dev); + +void ___dma_single_dev_to_cpu(const void *kaddr, size_t size, + enum dma_data_direction dir) +{ + BUG_ON(!virt_addr_valid(kaddr) || !virt_addr_valid(kaddr + size - 1)); + + /* FIXME: non-speculating: not required */ + /* don't bother invalidating if DMA to device */ + if (dir != DMA_TO_DEVICE) { + unsigned long paddr = __pa(kaddr); + outer_inv_range(paddr, paddr + size); + } + + dmac_unmap_area(kaddr, size, dir); +} +EXPORT_SYMBOL(___dma_single_dev_to_cpu); + +static void dma_cache_maint_page(struct page *page, unsigned long offset, + size_t size, enum dma_data_direction dir, + void (*op)(const void *, size_t, int)) +{ + /* + * A single sg entry may refer to multiple physically contiguous + * pages. But we still need to process highmem pages individually. + * If highmem is not configured then the bulk of this loop gets + * optimized out. + */ + size_t left = size; + do { + size_t len = left; + void *vaddr; + + if (PageHighMem(page)) { + if (len + offset > PAGE_SIZE) { + if (offset >= PAGE_SIZE) { + page += offset / PAGE_SIZE; + offset %= PAGE_SIZE; + } + len = PAGE_SIZE - offset; + } + vaddr = kmap_high_get(page); + if (vaddr) { + vaddr += offset; + op(vaddr, len, dir); + kunmap_high(page); + } else if (cache_is_vipt()) { + /* unmapped pages might still be cached */ + vaddr = kmap_atomic(page); + op(vaddr + offset, len, dir); + kunmap_atomic(vaddr); + } + } else { + vaddr = page_address(page) + offset; + op(vaddr, len, dir); + } + offset = 0; + page++; + left -= len; + } while (left); +} + +void ___dma_page_cpu_to_dev(struct page *page, unsigned long off, + size_t size, enum dma_data_direction dir) +{ + unsigned long paddr; + + dma_cache_maint_page(page, off, size, dir, dmac_map_area); + + paddr = page_to_phys(page) + off; + if (dir == DMA_FROM_DEVICE) { + outer_inv_range(paddr, paddr + size); + } else { + outer_clean_range(paddr, paddr + size); + } + /* FIXME: non-speculating: flush on bidirectional mappings? */ +} +EXPORT_SYMBOL(___dma_page_cpu_to_dev); + +void ___dma_page_dev_to_cpu(struct page *page, unsigned long off, + size_t size, enum dma_data_direction dir) +{ + unsigned long paddr = page_to_phys(page) + off; + + /* FIXME: non-speculating: not required */ + /* don't bother invalidating if DMA to device */ + if (dir != DMA_TO_DEVICE) + outer_inv_range(paddr, paddr + size); + + dma_cache_maint_page(page, off, size, dir, dmac_unmap_area); + + /* + * Mark the D-cache clean for this page to avoid extra flushing. + */ + if (dir != DMA_TO_DEVICE && off == 0 && size >= PAGE_SIZE) + set_bit(PG_dcache_clean, &page->flags); +} +EXPORT_SYMBOL(___dma_page_dev_to_cpu); + +/** + * dma_map_sg - map a set of SG buffers for streaming mode DMA + * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices + * @sg: list of buffers + * @nents: number of buffers to map + * @dir: DMA transfer direction + * + * Map a set of buffers described by scatterlist in streaming mode for DMA. + * This is the scatter-gather version of the dma_map_single interface. + * Here the scatter gather list elements are each tagged with the + * appropriate dma address and length. They are obtained via + * sg_dma_{address,length}. + * + * Device ownership issues as mentioned for dma_map_single are the same + * here. + */ +int dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, + enum dma_data_direction dir) +{ + struct scatterlist *s; + int i, j; + + BUG_ON(!valid_dma_direction(dir)); + + for_each_sg(sg, s, nents, i) { + s->dma_address = __dma_map_page(dev, sg_page(s), s->offset, + s->length, dir); + if (dma_mapping_error(dev, s->dma_address)) + goto bad_mapping; + } + debug_dma_map_sg(dev, sg, nents, nents, dir); + return nents; + + bad_mapping: + for_each_sg(sg, s, i, j) + __dma_unmap_page(dev, sg_dma_address(s), sg_dma_len(s), dir); + return 0; +} +EXPORT_SYMBOL(dma_map_sg); + +/** + * dma_unmap_sg - unmap a set of SG buffers mapped by dma_map_sg + * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices + * @sg: list of buffers + * @nents: number of buffers to unmap (same as was passed to dma_map_sg) + * @dir: DMA transfer direction (same as was passed to dma_map_sg) + * + * Unmap a set of streaming mode DMA translations. Again, CPU access + * rules concerning calls here are the same as for dma_unmap_single(). + */ +void dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, + enum dma_data_direction dir) +{ + struct scatterlist *s; + int i; + + debug_dma_unmap_sg(dev, sg, nents, dir); + + for_each_sg(sg, s, nents, i) + __dma_unmap_page(dev, sg_dma_address(s), sg_dma_len(s), dir); +} +EXPORT_SYMBOL(dma_unmap_sg); + +/** + * dma_sync_sg_for_cpu + * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices + * @sg: list of buffers + * @nents: number of buffers to map (returned from dma_map_sg) + * @dir: DMA transfer direction (same as was passed to dma_map_sg) + */ +void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, + int nents, enum dma_data_direction dir) +{ + struct scatterlist *s; + int i; + + for_each_sg(sg, s, nents, i) { + if (!dmabounce_sync_for_cpu(dev, sg_dma_address(s), 0, + sg_dma_len(s), dir)) + continue; + + __dma_page_dev_to_cpu(sg_page(s), s->offset, + s->length, dir); + } + + debug_dma_sync_sg_for_cpu(dev, sg, nents, dir); +} +EXPORT_SYMBOL(dma_sync_sg_for_cpu); + +/** + * dma_sync_sg_for_device + * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices + * @sg: list of buffers + * @nents: number of buffers to map (returned from dma_map_sg) + * @dir: DMA transfer direction (same as was passed to dma_map_sg) + */ +void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, + int nents, enum dma_data_direction dir) +{ + struct scatterlist *s; + int i; + + for_each_sg(sg, s, nents, i) { + if (!dmabounce_sync_for_device(dev, sg_dma_address(s), 0, + sg_dma_len(s), dir)) + continue; + + __dma_page_cpu_to_dev(sg_page(s), s->offset, + s->length, dir); + } + + debug_dma_sync_sg_for_device(dev, sg, nents, dir); +} +EXPORT_SYMBOL(dma_sync_sg_for_device); + +/* + * Return whether the given device DMA address mask can be supported + * properly. For example, if your device can only drive the low 24-bits + * during bus mastering, then you would pass 0x00ffffff as the mask + * to this function. + */ +int dma_supported(struct device *dev, u64 mask) +{ + if (mask < (u64)arm_dma_limit) + return 0; + return 1; +} +EXPORT_SYMBOL(dma_supported); + +int dma_set_mask(struct device *dev, u64 dma_mask) +{ + if (!dev->dma_mask || !dma_supported(dev, dma_mask)) + return -EIO; + +#ifndef CONFIG_DMABOUNCE + *dev->dma_mask = dma_mask; +#endif + + return 0; +} +EXPORT_SYMBOL(dma_set_mask); + +#define PREALLOC_DMA_DEBUG_ENTRIES 4096 + +static int __init dma_debug_do_init(void) +{ +#ifdef CONFIG_MMU + arm_vmregion_create_proc("dma-mappings", &consistent_head); +#endif + dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES); + return 0; +} +fs_initcall(dma_debug_do_init); diff --git a/arch/arm/mm/extable.c b/arch/arm/mm/extable.c new file mode 100644 index 00000000..9d285626 --- /dev/null +++ b/arch/arm/mm/extable.c @@ -0,0 +1,16 @@ +/* + * linux/arch/arm/mm/extable.c + */ +#include <linux/module.h> +#include <linux/uaccess.h> + +int fixup_exception(struct pt_regs *regs) +{ + const struct exception_table_entry *fixup; + + fixup = search_exception_tables(instruction_pointer(regs)); + if (fixup) + regs->ARM_pc = fixup->fixup; + + return fixup != NULL; +} diff --git a/arch/arm/mm/fault-armv.c b/arch/arm/mm/fault-armv.c new file mode 100644 index 00000000..7599e262 --- /dev/null +++ b/arch/arm/mm/fault-armv.c @@ -0,0 +1,270 @@ +/* + * linux/arch/arm/mm/fault-armv.c + * + * Copyright (C) 1995 Linus Torvalds + * Modifications for ARM processor (c) 1995-2002 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/bitops.h> +#include <linux/vmalloc.h> +#include <linux/init.h> +#include <linux/pagemap.h> +#include <linux/gfp.h> + +#include <asm/bugs.h> +#include <asm/cacheflush.h> +#include <asm/cachetype.h> +#include <asm/pgtable.h> +#include <asm/tlbflush.h> + +#include "mm.h" + +static pteval_t shared_pte_mask = L_PTE_MT_BUFFERABLE; + +#if __LINUX_ARM_ARCH__ < 6 +/* + * We take the easy way out of this problem - we make the + * PTE uncacheable. However, we leave the write buffer on. + * + * Note that the pte lock held when calling update_mmu_cache must also + * guard the pte (somewhere else in the same mm) that we modify here. + * Therefore those configurations which might call adjust_pte (those + * without CONFIG_CPU_CACHE_VIPT) cannot support split page_table_lock. + */ +static int do_adjust_pte(struct vm_area_struct *vma, unsigned long address, + unsigned long pfn, pte_t *ptep) +{ + pte_t entry = *ptep; + int ret; + + /* + * If this page is present, it's actually being shared. + */ + ret = pte_present(entry); + + /* + * If this page isn't present, or is already setup to + * fault (ie, is old), we can safely ignore any issues. + */ + if (ret && (pte_val(entry) & L_PTE_MT_MASK) != shared_pte_mask) { + flush_cache_page(vma, address, pfn); + outer_flush_range((pfn << PAGE_SHIFT), + (pfn << PAGE_SHIFT) + PAGE_SIZE); + pte_val(entry) &= ~L_PTE_MT_MASK; + pte_val(entry) |= shared_pte_mask; + set_pte_at(vma->vm_mm, address, ptep, entry); + flush_tlb_page(vma, address); + } + + return ret; +} + +#if USE_SPLIT_PTLOCKS +/* + * If we are using split PTE locks, then we need to take the page + * lock here. Otherwise we are using shared mm->page_table_lock + * which is already locked, thus cannot take it. + */ +static inline void do_pte_lock(spinlock_t *ptl) +{ + /* + * Use nested version here to indicate that we are already + * holding one similar spinlock. + */ + spin_lock_nested(ptl, SINGLE_DEPTH_NESTING); +} + +static inline void do_pte_unlock(spinlock_t *ptl) +{ + spin_unlock(ptl); +} +#else /* !USE_SPLIT_PTLOCKS */ +static inline void do_pte_lock(spinlock_t *ptl) {} +static inline void do_pte_unlock(spinlock_t *ptl) {} +#endif /* USE_SPLIT_PTLOCKS */ + +static int adjust_pte(struct vm_area_struct *vma, unsigned long address, + unsigned long pfn) +{ + spinlock_t *ptl; + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + pte_t *pte; + int ret; + + pgd = pgd_offset(vma->vm_mm, address); + if (pgd_none_or_clear_bad(pgd)) + return 0; + + pud = pud_offset(pgd, address); + if (pud_none_or_clear_bad(pud)) + return 0; + + pmd = pmd_offset(pud, address); + if (pmd_none_or_clear_bad(pmd)) + return 0; + + /* + * This is called while another page table is mapped, so we + * must use the nested version. This also means we need to + * open-code the spin-locking. + */ + ptl = pte_lockptr(vma->vm_mm, pmd); + pte = pte_offset_map(pmd, address); + do_pte_lock(ptl); + + ret = do_adjust_pte(vma, address, pfn, pte); + + do_pte_unlock(ptl); + pte_unmap(pte); + + return ret; +} + +static void +make_coherent(struct address_space *mapping, struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep, unsigned long pfn) +{ + struct mm_struct *mm = vma->vm_mm; + struct vm_area_struct *mpnt; + struct prio_tree_iter iter; + unsigned long offset; + pgoff_t pgoff; + int aliases = 0; + + pgoff = vma->vm_pgoff + ((addr - vma->vm_start) >> PAGE_SHIFT); + + /* + * If we have any shared mappings that are in the same mm + * space, then we need to handle them specially to maintain + * cache coherency. + */ + flush_dcache_mmap_lock(mapping); + vma_prio_tree_foreach(mpnt, &iter, &mapping->i_mmap, pgoff, pgoff) { + /* + * If this VMA is not in our MM, we can ignore it. + * Note that we intentionally mask out the VMA + * that we are fixing up. + */ + if (mpnt->vm_mm != mm || mpnt == vma) + continue; + if (!(mpnt->vm_flags & VM_MAYSHARE)) + continue; + offset = (pgoff - mpnt->vm_pgoff) << PAGE_SHIFT; + aliases += adjust_pte(mpnt, mpnt->vm_start + offset, pfn); + } + flush_dcache_mmap_unlock(mapping); + if (aliases) + do_adjust_pte(vma, addr, pfn, ptep); +} + +/* + * Take care of architecture specific things when placing a new PTE into + * a page table, or changing an existing PTE. Basically, there are two + * things that we need to take care of: + * + * 1. If PG_dcache_clean is not set for the page, we need to ensure + * that any cache entries for the kernels virtual memory + * range are written back to the page. + * 2. If we have multiple shared mappings of the same space in + * an object, we need to deal with the cache aliasing issues. + * + * Note that the pte lock will be held. + */ +void update_mmu_cache(struct vm_area_struct *vma, unsigned long addr, + pte_t *ptep) +{ + unsigned long pfn = pte_pfn(*ptep); + struct address_space *mapping; + struct page *page; + + if (!pfn_valid(pfn)) + return; + + /* + * The zero page is never written to, so never has any dirty + * cache lines, and therefore never needs to be flushed. + */ + page = pfn_to_page(pfn); + if (page == ZERO_PAGE(0)) + return; + + mapping = page_mapping(page); + if (!test_and_set_bit(PG_dcache_clean, &page->flags)) + __flush_dcache_page(mapping, page); + if (mapping) { + if (cache_is_vivt()) + make_coherent(mapping, vma, addr, ptep, pfn); + else if (vma->vm_flags & VM_EXEC) + __flush_icache_all(); + } +} +#endif /* __LINUX_ARM_ARCH__ < 6 */ + +/* + * Check whether the write buffer has physical address aliasing + * issues. If it has, we need to avoid them for the case where + * we have several shared mappings of the same object in user + * space. + */ +static int __init check_writebuffer(unsigned long *p1, unsigned long *p2) +{ + register unsigned long zero = 0, one = 1, val; + + local_irq_disable(); + mb(); + *p1 = one; + mb(); + *p2 = zero; + mb(); + val = *p1; + mb(); + local_irq_enable(); + return val != zero; +} + +void __init check_writebuffer_bugs(void) +{ + struct page *page; + const char *reason; + unsigned long v = 1; + + printk(KERN_INFO "CPU: Testing write buffer coherency: "); + + page = alloc_page(GFP_KERNEL); + if (page) { + unsigned long *p1, *p2; + pgprot_t prot = __pgprot_modify(PAGE_KERNEL, + L_PTE_MT_MASK, L_PTE_MT_BUFFERABLE); + + p1 = vmap(&page, 1, VM_IOREMAP, prot); + p2 = vmap(&page, 1, VM_IOREMAP, prot); + + if (p1 && p2) { + v = check_writebuffer(p1, p2); + reason = "enabling work-around"; + } else { + reason = "unable to map memory\n"; + } + + vunmap(p1); + vunmap(p2); + put_page(page); + } else { + reason = "unable to grab page\n"; + } + + if (v) { + printk("failed, %s\n", reason); + shared_pte_mask = L_PTE_MT_UNCACHED; + } else { + printk("ok\n"); + } +} diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c new file mode 100644 index 00000000..5bb48356 --- /dev/null +++ b/arch/arm/mm/fault.c @@ -0,0 +1,616 @@ +/* + * linux/arch/arm/mm/fault.c + * + * Copyright (C) 1995 Linus Torvalds + * Modifications for ARM processor (c) 1995-2004 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/module.h> +#include <linux/signal.h> +#include <linux/mm.h> +#include <linux/hardirq.h> +#include <linux/init.h> +#include <linux/kprobes.h> +#include <linux/uaccess.h> +#include <linux/page-flags.h> +#include <linux/sched.h> +#include <linux/highmem.h> +#include <linux/perf_event.h> + +#include <asm/exception.h> +#include <asm/pgtable.h> +#include <asm/system_misc.h> +#include <asm/system_info.h> +#include <asm/tlbflush.h> + +#include "fault.h" + +#ifdef CONFIG_MMU + +#ifdef CONFIG_KPROBES +static inline int notify_page_fault(struct pt_regs *regs, unsigned int fsr) +{ + int ret = 0; + + if (!user_mode(regs)) { + /* kprobe_running() needs smp_processor_id() */ + preempt_disable(); + if (kprobe_running() && kprobe_fault_handler(regs, fsr)) + ret = 1; + preempt_enable(); + } + + return ret; +} +#else +static inline int notify_page_fault(struct pt_regs *regs, unsigned int fsr) +{ + return 0; +} +#endif + +/* + * This is useful to dump out the page tables associated with + * 'addr' in mm 'mm'. + */ +void show_pte(struct mm_struct *mm, unsigned long addr) +{ + pgd_t *pgd; + + if (!mm) + mm = &init_mm; + + printk(KERN_ALERT "pgd = %p\n", mm->pgd); + pgd = pgd_offset(mm, addr); + printk(KERN_ALERT "[%08lx] *pgd=%08llx", + addr, (long long)pgd_val(*pgd)); + + do { + pud_t *pud; + pmd_t *pmd; + pte_t *pte; + + if (pgd_none(*pgd)) + break; + + if (pgd_bad(*pgd)) { + printk("(bad)"); + break; + } + + pud = pud_offset(pgd, addr); + if (PTRS_PER_PUD != 1) + printk(", *pud=%08llx", (long long)pud_val(*pud)); + + if (pud_none(*pud)) + break; + + if (pud_bad(*pud)) { + printk("(bad)"); + break; + } + + pmd = pmd_offset(pud, addr); + if (PTRS_PER_PMD != 1) + printk(", *pmd=%08llx", (long long)pmd_val(*pmd)); + + if (pmd_none(*pmd)) + break; + + if (pmd_bad(*pmd)) { + printk("(bad)"); + break; + } + + /* We must not map this if we have highmem enabled */ + if (PageHighMem(pfn_to_page(pmd_val(*pmd) >> PAGE_SHIFT))) + break; + + pte = pte_offset_map(pmd, addr); + printk(", *pte=%08llx", (long long)pte_val(*pte)); +#ifndef CONFIG_ARM_LPAE + printk(", *ppte=%08llx", + (long long)pte_val(pte[PTE_HWTABLE_PTRS])); +#endif + pte_unmap(pte); + } while(0); + + printk("\n"); +} +#else /* CONFIG_MMU */ +void show_pte(struct mm_struct *mm, unsigned long addr) +{ } +#endif /* CONFIG_MMU */ + +/* + * Oops. The kernel tried to access some page that wasn't present. + */ +static void +__do_kernel_fault(struct mm_struct *mm, unsigned long addr, unsigned int fsr, + struct pt_regs *regs) +{ + /* + * Are we prepared to handle this kernel fault? + */ + if (fixup_exception(regs)) + return; + + /* + * No handler, we'll have to terminate things with extreme prejudice. + */ + bust_spinlocks(1); + printk(KERN_ALERT + "Unable to handle kernel %s at virtual address %08lx\n", + (addr < PAGE_SIZE) ? "NULL pointer dereference" : + "paging request", addr); + + show_pte(mm, addr); + die("Oops", regs, fsr); + bust_spinlocks(0); + do_exit(SIGKILL); +} + +/* + * Something tried to access memory that isn't in our memory map.. + * User mode accesses just cause a SIGSEGV + */ +static void +__do_user_fault(struct task_struct *tsk, unsigned long addr, + unsigned int fsr, unsigned int sig, int code, + struct pt_regs *regs) +{ + struct siginfo si; + +#ifdef CONFIG_DEBUG_USER + if (((user_debug & UDBG_SEGV) && (sig == SIGSEGV)) || + ((user_debug & UDBG_BUS) && (sig == SIGBUS))) { + printk(KERN_DEBUG "%s: unhandled page fault (%d) at 0x%08lx, code 0x%03x\n", + tsk->comm, sig, addr, fsr); + show_pte(tsk->mm, addr); + show_regs(regs); + } +#endif + + tsk->thread.address = addr; + tsk->thread.error_code = fsr; + tsk->thread.trap_no = 14; + si.si_signo = sig; + si.si_errno = 0; + si.si_code = code; + si.si_addr = (void __user *)addr; + force_sig_info(sig, &si, tsk); +} + +void do_bad_area(unsigned long addr, unsigned int fsr, struct pt_regs *regs) +{ + struct task_struct *tsk = current; + struct mm_struct *mm = tsk->active_mm; + + /* + * If we are in kernel mode at this point, we + * have no context to handle this fault with. + */ + if (user_mode(regs)) + __do_user_fault(tsk, addr, fsr, SIGSEGV, SEGV_MAPERR, regs); + else + __do_kernel_fault(mm, addr, fsr, regs); +} + +#ifdef CONFIG_MMU +#define VM_FAULT_BADMAP 0x010000 +#define VM_FAULT_BADACCESS 0x020000 + +/* + * Check that the permissions on the VMA allow for the fault which occurred. + * If we encountered a write fault, we must have write permission, otherwise + * we allow any permission. + */ +static inline bool access_error(unsigned int fsr, struct vm_area_struct *vma) +{ + unsigned int mask = VM_READ | VM_WRITE | VM_EXEC; + + if (fsr & FSR_WRITE) + mask = VM_WRITE; + if (fsr & FSR_LNX_PF) + mask = VM_EXEC; + + return vma->vm_flags & mask ? false : true; +} + +static int __kprobes +__do_page_fault(struct mm_struct *mm, unsigned long addr, unsigned int fsr, + unsigned int flags, struct task_struct *tsk) +{ + struct vm_area_struct *vma; + int fault; + + vma = find_vma(mm, addr); + fault = VM_FAULT_BADMAP; + if (unlikely(!vma)) + goto out; + if (unlikely(vma->vm_start > addr)) + goto check_stack; + + /* + * Ok, we have a good vm_area for this + * memory access, so we can handle it. + */ +good_area: + if (access_error(fsr, vma)) { + fault = VM_FAULT_BADACCESS; + goto out; + } + + return handle_mm_fault(mm, vma, addr & PAGE_MASK, flags); + +check_stack: + /* Don't allow expansion below FIRST_USER_ADDRESS */ + if (vma->vm_flags & VM_GROWSDOWN && + addr >= FIRST_USER_ADDRESS && !expand_stack(vma, addr)) + goto good_area; +out: + return fault; +} + +static int __kprobes +do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs) +{ + struct task_struct *tsk; + struct mm_struct *mm; + int fault, sig, code; + int write = fsr & FSR_WRITE; + unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE | + (write ? FAULT_FLAG_WRITE : 0); + + if (notify_page_fault(regs, fsr)) + return 0; + + tsk = current; + mm = tsk->mm; + + /* Enable interrupts if they were enabled in the parent context. */ + if (interrupts_enabled(regs)) + local_irq_enable(); + + /* + * If we're in an interrupt or have no user + * context, we must not take the fault.. + */ + if (in_atomic() || !mm) + goto no_context; + + /* + * As per x86, we may deadlock here. However, since the kernel only + * validly references user space from well defined areas of the code, + * we can bug out early if this is from code which shouldn't. + */ + if (!down_read_trylock(&mm->mmap_sem)) { + if (!user_mode(regs) && !search_exception_tables(regs->ARM_pc)) + goto no_context; +retry: + down_read(&mm->mmap_sem); + } else { + /* + * The above down_read_trylock() might have succeeded in + * which case, we'll have missed the might_sleep() from + * down_read() + */ + might_sleep(); +#ifdef CONFIG_DEBUG_VM + if (!user_mode(regs) && + !search_exception_tables(regs->ARM_pc)) + goto no_context; +#endif + } + + fault = __do_page_fault(mm, addr, fsr, flags, tsk); + + /* If we need to retry but a fatal signal is pending, handle the + * signal first. We do not need to release the mmap_sem because + * it would already be released in __lock_page_or_retry in + * mm/filemap.c. */ + if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current)) + return 0; + + /* + * Major/minor page fault accounting is only done on the + * initial attempt. If we go through a retry, it is extremely + * likely that the page will be found in page cache at that point. + */ + + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr); + if (!(fault & VM_FAULT_ERROR) && flags & FAULT_FLAG_ALLOW_RETRY) { + if (fault & VM_FAULT_MAJOR) { + tsk->maj_flt++; + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, + regs, addr); + } else { + tsk->min_flt++; + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, + regs, addr); + } + if (fault & VM_FAULT_RETRY) { + /* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk + * of starvation. */ + flags &= ~FAULT_FLAG_ALLOW_RETRY; + goto retry; + } + } + + up_read(&mm->mmap_sem); + + /* + * Handle the "normal" case first - VM_FAULT_MAJOR / VM_FAULT_MINOR + */ + if (likely(!(fault & (VM_FAULT_ERROR | VM_FAULT_BADMAP | VM_FAULT_BADACCESS)))) + return 0; + + if (fault & VM_FAULT_OOM) { + /* + * We ran out of memory, call the OOM killer, and return to + * userspace (which will retry the fault, or kill us if we + * got oom-killed) + */ + pagefault_out_of_memory(); + return 0; + } + + /* + * If we are in kernel mode at this point, we + * have no context to handle this fault with. + */ + if (!user_mode(regs)) + goto no_context; + + if (fault & VM_FAULT_SIGBUS) { + /* + * We had some memory, but were unable to + * successfully fix up this page fault. + */ + sig = SIGBUS; + code = BUS_ADRERR; + } else { + /* + * Something tried to access memory that + * isn't in our memory map.. + */ + sig = SIGSEGV; + code = fault == VM_FAULT_BADACCESS ? + SEGV_ACCERR : SEGV_MAPERR; + } + + __do_user_fault(tsk, addr, fsr, sig, code, regs); + return 0; + +no_context: + __do_kernel_fault(mm, addr, fsr, regs); + return 0; +} +#else /* CONFIG_MMU */ +static int +do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs) +{ + return 0; +} +#endif /* CONFIG_MMU */ + +/* + * First Level Translation Fault Handler + * + * We enter here because the first level page table doesn't contain + * a valid entry for the address. + * + * If the address is in kernel space (>= TASK_SIZE), then we are + * probably faulting in the vmalloc() area. + * + * If the init_task's first level page tables contains the relevant + * entry, we copy the it to this task. If not, we send the process + * a signal, fixup the exception, or oops the kernel. + * + * NOTE! We MUST NOT take any locks for this case. We may be in an + * interrupt or a critical region, and should only copy the information + * from the master page table, nothing more. + */ +#ifdef CONFIG_MMU +static int __kprobes +do_translation_fault(unsigned long addr, unsigned int fsr, + struct pt_regs *regs) +{ + unsigned int index; + pgd_t *pgd, *pgd_k; + pud_t *pud, *pud_k; + pmd_t *pmd, *pmd_k; + + if (addr < TASK_SIZE) + return do_page_fault(addr, fsr, regs); + + if (user_mode(regs)) + goto bad_area; + + index = pgd_index(addr); + + /* + * FIXME: CP15 C1 is write only on ARMv3 architectures. + */ + pgd = cpu_get_pgd() + index; + pgd_k = init_mm.pgd + index; + + if (pgd_none(*pgd_k)) + goto bad_area; + if (!pgd_present(*pgd)) + set_pgd(pgd, *pgd_k); + + pud = pud_offset(pgd, addr); + pud_k = pud_offset(pgd_k, addr); + + if (pud_none(*pud_k)) + goto bad_area; + if (!pud_present(*pud)) + set_pud(pud, *pud_k); + + pmd = pmd_offset(pud, addr); + pmd_k = pmd_offset(pud_k, addr); + +#ifdef CONFIG_ARM_LPAE + /* + * Only one hardware entry per PMD with LPAE. + */ + index = 0; +#else + /* + * On ARM one Linux PGD entry contains two hardware entries (see page + * tables layout in pgtable.h). We normally guarantee that we always + * fill both L1 entries. But create_mapping() doesn't follow the rule. + * It can create inidividual L1 entries, so here we have to call + * pmd_none() check for the entry really corresponded to address, not + * for the first of pair. + */ + index = (addr >> SECTION_SHIFT) & 1; +#endif + if (pmd_none(pmd_k[index])) + goto bad_area; + + copy_pmd(pmd, pmd_k); + return 0; + +bad_area: + do_bad_area(addr, fsr, regs); + return 0; +} +#else /* CONFIG_MMU */ +static int +do_translation_fault(unsigned long addr, unsigned int fsr, + struct pt_regs *regs) +{ + return 0; +} +#endif /* CONFIG_MMU */ + +/* + * Some section permission faults need to be handled gracefully. + * They can happen due to a __{get,put}_user during an oops. + */ +static int +do_sect_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs) +{ + do_bad_area(addr, fsr, regs); + return 0; +} + +/* + * This abort handler always returns "fault". + */ +static int +do_bad(unsigned long addr, unsigned int fsr, struct pt_regs *regs) +{ + return 1; +} + +struct fsr_info { + int (*fn)(unsigned long addr, unsigned int fsr, struct pt_regs *regs); + int sig; + int code; + const char *name; +}; + +/* FSR definition */ +#ifdef CONFIG_ARM_LPAE +#include "fsr-3level.c" +#else +#include "fsr-2level.c" +#endif + +void __init +hook_fault_code(int nr, int (*fn)(unsigned long, unsigned int, struct pt_regs *), + int sig, int code, const char *name) +{ + if (nr < 0 || nr >= ARRAY_SIZE(fsr_info)) + BUG(); + + fsr_info[nr].fn = fn; + fsr_info[nr].sig = sig; + fsr_info[nr].code = code; + fsr_info[nr].name = name; +} + +/* + * Dispatch a data abort to the relevant handler. + */ +asmlinkage void __exception +do_DataAbort(unsigned long addr, unsigned int fsr, struct pt_regs *regs) +{ + const struct fsr_info *inf = fsr_info + fsr_fs(fsr); + struct siginfo info; + + if (!inf->fn(addr, fsr & ~FSR_LNX_PF, regs)) + return; + + printk(KERN_ALERT "Unhandled fault: %s (0x%03x) at 0x%08lx\n", + inf->name, fsr, addr); + + info.si_signo = inf->sig; + info.si_errno = 0; + info.si_code = inf->code; + info.si_addr = (void __user *)addr; + arm_notify_die("", regs, &info, fsr, 0); +} + +void __init +hook_ifault_code(int nr, int (*fn)(unsigned long, unsigned int, struct pt_regs *), + int sig, int code, const char *name) +{ + if (nr < 0 || nr >= ARRAY_SIZE(ifsr_info)) + BUG(); + + ifsr_info[nr].fn = fn; + ifsr_info[nr].sig = sig; + ifsr_info[nr].code = code; + ifsr_info[nr].name = name; +} + +asmlinkage void __exception +do_PrefetchAbort(unsigned long addr, unsigned int ifsr, struct pt_regs *regs) +{ + const struct fsr_info *inf = ifsr_info + fsr_fs(ifsr); + struct siginfo info; + + if (!inf->fn(addr, ifsr | FSR_LNX_PF, regs)) + return; + + printk(KERN_ALERT "Unhandled prefetch abort: %s (0x%03x) at 0x%08lx\n", + inf->name, ifsr, addr); + + info.si_signo = inf->sig; + info.si_errno = 0; + info.si_code = inf->code; + info.si_addr = (void __user *)addr; + arm_notify_die("", regs, &info, ifsr, 0); +} + +#ifndef CONFIG_ARM_LPAE +static int __init exceptions_init(void) +{ + if (cpu_architecture() >= CPU_ARCH_ARMv6) { + hook_fault_code(4, do_translation_fault, SIGSEGV, SEGV_MAPERR, + "I-cache maintenance fault"); + } + + if (cpu_architecture() >= CPU_ARCH_ARMv7) { + /* + * TODO: Access flag faults introduced in ARMv6K. + * Runtime check for 'K' extension is needed + */ + hook_fault_code(3, do_bad, SIGSEGV, SEGV_MAPERR, + "section access flag fault"); + hook_fault_code(6, do_bad, SIGSEGV, SEGV_MAPERR, + "section access flag fault"); + } + + return 0; +} + +arch_initcall(exceptions_init); +#endif diff --git a/arch/arm/mm/fault.h b/arch/arm/mm/fault.h new file mode 100644 index 00000000..cf08bdfb --- /dev/null +++ b/arch/arm/mm/fault.h @@ -0,0 +1,28 @@ +#ifndef __ARCH_ARM_FAULT_H +#define __ARCH_ARM_FAULT_H + +/* + * Fault status register encodings. We steal bit 31 for our own purposes. + */ +#define FSR_LNX_PF (1 << 31) +#define FSR_WRITE (1 << 11) +#define FSR_FS4 (1 << 10) +#define FSR_FS3_0 (15) +#define FSR_FS5_0 (0x3f) + +#ifdef CONFIG_ARM_LPAE +static inline int fsr_fs(unsigned int fsr) +{ + return fsr & FSR_FS5_0; +} +#else +static inline int fsr_fs(unsigned int fsr) +{ + return (fsr & FSR_FS3_0) | (fsr & FSR_FS4) >> 6; +} +#endif + +void do_bad_area(unsigned long addr, unsigned int fsr, struct pt_regs *regs); +unsigned long search_exception_table(unsigned long addr); + +#endif /* __ARCH_ARM_FAULT_H */ diff --git a/arch/arm/mm/flush.c b/arch/arm/mm/flush.c new file mode 100644 index 00000000..77458548 --- /dev/null +++ b/arch/arm/mm/flush.c @@ -0,0 +1,341 @@ +/* + * linux/arch/arm/mm/flush.c + * + * Copyright (C) 1995-2002 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/module.h> +#include <linux/mm.h> +#include <linux/pagemap.h> +#include <linux/highmem.h> + +#include <asm/cacheflush.h> +#include <asm/cachetype.h> +#include <asm/highmem.h> +#include <asm/smp_plat.h> +#include <asm/tlbflush.h> + +#include "mm.h" + +#ifdef CONFIG_CPU_CACHE_VIPT + +static void flush_pfn_alias(unsigned long pfn, unsigned long vaddr) +{ + unsigned long to = FLUSH_ALIAS_START + (CACHE_COLOUR(vaddr) << PAGE_SHIFT); + const int zero = 0; + + set_top_pte(to, pfn_pte(pfn, PAGE_KERNEL)); + + asm( "mcrr p15, 0, %1, %0, c14\n" + " mcr p15, 0, %2, c7, c10, 4" + : + : "r" (to), "r" (to + PAGE_SIZE - L1_CACHE_BYTES), "r" (zero) + : "cc"); +} + +static void flush_icache_alias(unsigned long pfn, unsigned long vaddr, unsigned long len) +{ + unsigned long va = FLUSH_ALIAS_START + (CACHE_COLOUR(vaddr) << PAGE_SHIFT); + unsigned long offset = vaddr & (PAGE_SIZE - 1); + unsigned long to; + + set_top_pte(va, pfn_pte(pfn, PAGE_KERNEL)); + to = va + offset; + flush_icache_range(to, to + len); +} + +void flush_cache_mm(struct mm_struct *mm) +{ + if (cache_is_vivt()) { + vivt_flush_cache_mm(mm); + return; + } + + if (cache_is_vipt_aliasing()) { + asm( "mcr p15, 0, %0, c7, c14, 0\n" + " mcr p15, 0, %0, c7, c10, 4" + : + : "r" (0) + : "cc"); + } +} + +void flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) +{ + if (cache_is_vivt()) { + vivt_flush_cache_range(vma, start, end); + return; + } + + if (cache_is_vipt_aliasing()) { + asm( "mcr p15, 0, %0, c7, c14, 0\n" + " mcr p15, 0, %0, c7, c10, 4" + : + : "r" (0) + : "cc"); + } + + if (vma->vm_flags & VM_EXEC) + __flush_icache_all(); +} + +void flush_cache_page(struct vm_area_struct *vma, unsigned long user_addr, unsigned long pfn) +{ + if (cache_is_vivt()) { + vivt_flush_cache_page(vma, user_addr, pfn); + return; + } + + if (cache_is_vipt_aliasing()) { + flush_pfn_alias(pfn, user_addr); + __flush_icache_all(); + } + + if (vma->vm_flags & VM_EXEC && icache_is_vivt_asid_tagged()) + __flush_icache_all(); +} + +#else +#define flush_pfn_alias(pfn,vaddr) do { } while (0) +#define flush_icache_alias(pfn,vaddr,len) do { } while (0) +#endif + +static void flush_ptrace_access_other(void *args) +{ + __flush_icache_all(); +} + +static +void flush_ptrace_access(struct vm_area_struct *vma, struct page *page, + unsigned long uaddr, void *kaddr, unsigned long len) +{ + if (cache_is_vivt()) { + if (cpumask_test_cpu(smp_processor_id(), mm_cpumask(vma->vm_mm))) { + unsigned long addr = (unsigned long)kaddr; + __cpuc_coherent_kern_range(addr, addr + len); + } + return; + } + + if (cache_is_vipt_aliasing()) { + flush_pfn_alias(page_to_pfn(page), uaddr); + __flush_icache_all(); + return; + } + + /* VIPT non-aliasing D-cache */ + if (vma->vm_flags & VM_EXEC) { + unsigned long addr = (unsigned long)kaddr; + if (icache_is_vipt_aliasing()) + flush_icache_alias(page_to_pfn(page), uaddr, len); + else + __cpuc_coherent_kern_range(addr, addr + len); + if (cache_ops_need_broadcast()) + smp_call_function(flush_ptrace_access_other, + NULL, 1); + } +} + +/* + * Copy user data from/to a page which is mapped into a different + * processes address space. Really, we want to allow our "user + * space" model to handle this. + * + * Note that this code needs to run on the current CPU. + */ +void copy_to_user_page(struct vm_area_struct *vma, struct page *page, + unsigned long uaddr, void *dst, const void *src, + unsigned long len) +{ +#ifdef CONFIG_SMP + preempt_disable(); +#endif + memcpy(dst, src, len); + flush_ptrace_access(vma, page, uaddr, dst, len); +#ifdef CONFIG_SMP + preempt_enable(); +#endif +} + +void __flush_dcache_page(struct address_space *mapping, struct page *page) +{ + /* + * Writeback any data associated with the kernel mapping of this + * page. This ensures that data in the physical page is mutually + * coherent with the kernels mapping. + */ + if (!PageHighMem(page)) { + __cpuc_flush_dcache_area(page_address(page), PAGE_SIZE); + } else { + void *addr = kmap_high_get(page); + if (addr) { + __cpuc_flush_dcache_area(addr, PAGE_SIZE); + kunmap_high(page); + } else if (cache_is_vipt()) { + /* unmapped pages might still be cached */ + addr = kmap_atomic(page); + __cpuc_flush_dcache_area(addr, PAGE_SIZE); + kunmap_atomic(addr); + } + } + + /* + * If this is a page cache page, and we have an aliasing VIPT cache, + * we only need to do one flush - which would be at the relevant + * userspace colour, which is congruent with page->index. + */ + if (mapping && cache_is_vipt_aliasing()) + flush_pfn_alias(page_to_pfn(page), + page->index << PAGE_CACHE_SHIFT); +} + +static void __flush_dcache_aliases(struct address_space *mapping, struct page *page) +{ + struct mm_struct *mm = current->active_mm; + struct vm_area_struct *mpnt; + struct prio_tree_iter iter; + pgoff_t pgoff; + + /* + * There are possible user space mappings of this page: + * - VIVT cache: we need to also write back and invalidate all user + * data in the current VM view associated with this page. + * - aliasing VIPT: we only need to find one mapping of this page. + */ + pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); + + flush_dcache_mmap_lock(mapping); + vma_prio_tree_foreach(mpnt, &iter, &mapping->i_mmap, pgoff, pgoff) { + unsigned long offset; + + /* + * If this VMA is not in our MM, we can ignore it. + */ + if (mpnt->vm_mm != mm) + continue; + if (!(mpnt->vm_flags & VM_MAYSHARE)) + continue; + offset = (pgoff - mpnt->vm_pgoff) << PAGE_SHIFT; + flush_cache_page(mpnt, mpnt->vm_start + offset, page_to_pfn(page)); + } + flush_dcache_mmap_unlock(mapping); +} + +#if __LINUX_ARM_ARCH__ >= 6 +void __sync_icache_dcache(pte_t pteval) +{ + unsigned long pfn; + struct page *page; + struct address_space *mapping; + + if (!pte_present_user(pteval)) + return; + if (cache_is_vipt_nonaliasing() && !pte_exec(pteval)) + /* only flush non-aliasing VIPT caches for exec mappings */ + return; + pfn = pte_pfn(pteval); + if (!pfn_valid(pfn)) + return; + + page = pfn_to_page(pfn); + if (cache_is_vipt_aliasing()) + mapping = page_mapping(page); + else + mapping = NULL; + + if (!test_and_set_bit(PG_dcache_clean, &page->flags)) + __flush_dcache_page(mapping, page); + + if (pte_exec(pteval)) + __flush_icache_all(); +} +#endif + +/* + * Ensure cache coherency between kernel mapping and userspace mapping + * of this page. + * + * We have three cases to consider: + * - VIPT non-aliasing cache: fully coherent so nothing required. + * - VIVT: fully aliasing, so we need to handle every alias in our + * current VM view. + * - VIPT aliasing: need to handle one alias in our current VM view. + * + * If we need to handle aliasing: + * If the page only exists in the page cache and there are no user + * space mappings, we can be lazy and remember that we may have dirty + * kernel cache lines for later. Otherwise, we assume we have + * aliasing mappings. + * + * Note that we disable the lazy flush for SMP configurations where + * the cache maintenance operations are not automatically broadcasted. + */ +void flush_dcache_page(struct page *page) +{ + struct address_space *mapping; + + /* + * The zero page is never written to, so never has any dirty + * cache lines, and therefore never needs to be flushed. + */ + if (page == ZERO_PAGE(0)) + return; + + mapping = page_mapping(page); + + if (!cache_ops_need_broadcast() && + mapping && !mapping_mapped(mapping)) + clear_bit(PG_dcache_clean, &page->flags); + else { + __flush_dcache_page(mapping, page); + if (mapping && cache_is_vivt()) + __flush_dcache_aliases(mapping, page); + else if (mapping) + __flush_icache_all(); + set_bit(PG_dcache_clean, &page->flags); + } +} +EXPORT_SYMBOL(flush_dcache_page); + +/* + * Flush an anonymous page so that users of get_user_pages() + * can safely access the data. The expected sequence is: + * + * get_user_pages() + * -> flush_anon_page + * memcpy() to/from page + * if written to page, flush_dcache_page() + */ +void __flush_anon_page(struct vm_area_struct *vma, struct page *page, unsigned long vmaddr) +{ + unsigned long pfn; + + /* VIPT non-aliasing caches need do nothing */ + if (cache_is_vipt_nonaliasing()) + return; + + /* + * Write back and invalidate userspace mapping. + */ + pfn = page_to_pfn(page); + if (cache_is_vivt()) { + flush_cache_page(vma, vmaddr, pfn); + } else { + /* + * For aliasing VIPT, we can flush an alias of the + * userspace address only. + */ + flush_pfn_alias(pfn, vmaddr); + __flush_icache_all(); + } + + /* + * Invalidate kernel mapping. No data should be contained + * in this mapping of the page. FIXME: this is overkill + * since we actually ask for a write-back and invalidate. + */ + __cpuc_flush_dcache_area(page_address(page), PAGE_SIZE); +} diff --git a/arch/arm/mm/fsr-2level.c b/arch/arm/mm/fsr-2level.c new file mode 100644 index 00000000..18ca74c0 --- /dev/null +++ b/arch/arm/mm/fsr-2level.c @@ -0,0 +1,78 @@ +static struct fsr_info fsr_info[] = { + /* + * The following are the standard ARMv3 and ARMv4 aborts. ARMv5 + * defines these to be "precise" aborts. + */ + { do_bad, SIGSEGV, 0, "vector exception" }, + { do_bad, SIGBUS, BUS_ADRALN, "alignment exception" }, + { do_bad, SIGKILL, 0, "terminal exception" }, + { do_bad, SIGBUS, BUS_ADRALN, "alignment exception" }, + { do_bad, SIGBUS, 0, "external abort on linefetch" }, + { do_translation_fault, SIGSEGV, SEGV_MAPERR, "section translation fault" }, + { do_bad, SIGBUS, 0, "external abort on linefetch" }, + { do_page_fault, SIGSEGV, SEGV_MAPERR, "page translation fault" }, + { do_bad, SIGBUS, 0, "external abort on non-linefetch" }, + { do_bad, SIGSEGV, SEGV_ACCERR, "section domain fault" }, + { do_bad, SIGBUS, 0, "external abort on non-linefetch" }, + { do_bad, SIGSEGV, SEGV_ACCERR, "page domain fault" }, + { do_bad, SIGBUS, 0, "external abort on translation" }, + { do_sect_fault, SIGSEGV, SEGV_ACCERR, "section permission fault" }, + { do_bad, SIGBUS, 0, "external abort on translation" }, + { do_page_fault, SIGSEGV, SEGV_ACCERR, "page permission fault" }, + /* + * The following are "imprecise" aborts, which are signalled by bit + * 10 of the FSR, and may not be recoverable. These are only + * supported if the CPU abort handler supports bit 10. + */ + { do_bad, SIGBUS, 0, "unknown 16" }, + { do_bad, SIGBUS, 0, "unknown 17" }, + { do_bad, SIGBUS, 0, "unknown 18" }, + { do_bad, SIGBUS, 0, "unknown 19" }, + { do_bad, SIGBUS, 0, "lock abort" }, /* xscale */ + { do_bad, SIGBUS, 0, "unknown 21" }, + { do_bad, SIGBUS, BUS_OBJERR, "imprecise external abort" }, /* xscale */ + { do_bad, SIGBUS, 0, "unknown 23" }, + { do_bad, SIGBUS, 0, "dcache parity error" }, /* xscale */ + { do_bad, SIGBUS, 0, "unknown 25" }, + { do_bad, SIGBUS, 0, "unknown 26" }, + { do_bad, SIGBUS, 0, "unknown 27" }, + { do_bad, SIGBUS, 0, "unknown 28" }, + { do_bad, SIGBUS, 0, "unknown 29" }, + { do_bad, SIGBUS, 0, "unknown 30" }, + { do_bad, SIGBUS, 0, "unknown 31" }, +}; + +static struct fsr_info ifsr_info[] = { + { do_bad, SIGBUS, 0, "unknown 0" }, + { do_bad, SIGBUS, 0, "unknown 1" }, + { do_bad, SIGBUS, 0, "debug event" }, + { do_bad, SIGSEGV, SEGV_ACCERR, "section access flag fault" }, + { do_bad, SIGBUS, 0, "unknown 4" }, + { do_translation_fault, SIGSEGV, SEGV_MAPERR, "section translation fault" }, + { do_bad, SIGSEGV, SEGV_ACCERR, "page access flag fault" }, + { do_page_fault, SIGSEGV, SEGV_MAPERR, "page translation fault" }, + { do_bad, SIGBUS, 0, "external abort on non-linefetch" }, + { do_bad, SIGSEGV, SEGV_ACCERR, "section domain fault" }, + { do_bad, SIGBUS, 0, "unknown 10" }, + { do_bad, SIGSEGV, SEGV_ACCERR, "page domain fault" }, + { do_bad, SIGBUS, 0, "external abort on translation" }, + { do_sect_fault, SIGSEGV, SEGV_ACCERR, "section permission fault" }, + { do_bad, SIGBUS, 0, "external abort on translation" }, + { do_page_fault, SIGSEGV, SEGV_ACCERR, "page permission fault" }, + { do_bad, SIGBUS, 0, "unknown 16" }, + { do_bad, SIGBUS, 0, "unknown 17" }, + { do_bad, SIGBUS, 0, "unknown 18" }, + { do_bad, SIGBUS, 0, "unknown 19" }, + { do_bad, SIGBUS, 0, "unknown 20" }, + { do_bad, SIGBUS, 0, "unknown 21" }, + { do_bad, SIGBUS, 0, "unknown 22" }, + { do_bad, SIGBUS, 0, "unknown 23" }, + { do_bad, SIGBUS, 0, "unknown 24" }, + { do_bad, SIGBUS, 0, "unknown 25" }, + { do_bad, SIGBUS, 0, "unknown 26" }, + { do_bad, SIGBUS, 0, "unknown 27" }, + { do_bad, SIGBUS, 0, "unknown 28" }, + { do_bad, SIGBUS, 0, "unknown 29" }, + { do_bad, SIGBUS, 0, "unknown 30" }, + { do_bad, SIGBUS, 0, "unknown 31" }, +}; diff --git a/arch/arm/mm/fsr-3level.c b/arch/arm/mm/fsr-3level.c new file mode 100644 index 00000000..05a4e943 --- /dev/null +++ b/arch/arm/mm/fsr-3level.c @@ -0,0 +1,68 @@ +static struct fsr_info fsr_info[] = { + { do_bad, SIGBUS, 0, "unknown 0" }, + { do_bad, SIGBUS, 0, "unknown 1" }, + { do_bad, SIGBUS, 0, "unknown 2" }, + { do_bad, SIGBUS, 0, "unknown 3" }, + { do_bad, SIGBUS, 0, "reserved translation fault" }, + { do_translation_fault, SIGSEGV, SEGV_MAPERR, "level 1 translation fault" }, + { do_translation_fault, SIGSEGV, SEGV_MAPERR, "level 2 translation fault" }, + { do_page_fault, SIGSEGV, SEGV_MAPERR, "level 3 translation fault" }, + { do_bad, SIGBUS, 0, "reserved access flag fault" }, + { do_bad, SIGSEGV, SEGV_ACCERR, "level 1 access flag fault" }, + { do_bad, SIGSEGV, SEGV_ACCERR, "level 2 access flag fault" }, + { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 3 access flag fault" }, + { do_bad, SIGBUS, 0, "reserved permission fault" }, + { do_bad, SIGSEGV, SEGV_ACCERR, "level 1 permission fault" }, + { do_sect_fault, SIGSEGV, SEGV_ACCERR, "level 2 permission fault" }, + { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 3 permission fault" }, + { do_bad, SIGBUS, 0, "synchronous external abort" }, + { do_bad, SIGBUS, 0, "asynchronous external abort" }, + { do_bad, SIGBUS, 0, "unknown 18" }, + { do_bad, SIGBUS, 0, "unknown 19" }, + { do_bad, SIGBUS, 0, "synchronous abort (translation table walk)" }, + { do_bad, SIGBUS, 0, "synchronous abort (translation table walk)" }, + { do_bad, SIGBUS, 0, "synchronous abort (translation table walk)" }, + { do_bad, SIGBUS, 0, "synchronous abort (translation table walk)" }, + { do_bad, SIGBUS, 0, "synchronous parity error" }, + { do_bad, SIGBUS, 0, "asynchronous parity error" }, + { do_bad, SIGBUS, 0, "unknown 26" }, + { do_bad, SIGBUS, 0, "unknown 27" }, + { do_bad, SIGBUS, 0, "synchronous parity error (translation table walk" }, + { do_bad, SIGBUS, 0, "synchronous parity error (translation table walk" }, + { do_bad, SIGBUS, 0, "synchronous parity error (translation table walk" }, + { do_bad, SIGBUS, 0, "synchronous parity error (translation table walk" }, + { do_bad, SIGBUS, 0, "unknown 32" }, + { do_bad, SIGBUS, BUS_ADRALN, "alignment fault" }, + { do_bad, SIGBUS, 0, "debug event" }, + { do_bad, SIGBUS, 0, "unknown 35" }, + { do_bad, SIGBUS, 0, "unknown 36" }, + { do_bad, SIGBUS, 0, "unknown 37" }, + { do_bad, SIGBUS, 0, "unknown 38" }, + { do_bad, SIGBUS, 0, "unknown 39" }, + { do_bad, SIGBUS, 0, "unknown 40" }, + { do_bad, SIGBUS, 0, "unknown 41" }, + { do_bad, SIGBUS, 0, "unknown 42" }, + { do_bad, SIGBUS, 0, "unknown 43" }, + { do_bad, SIGBUS, 0, "unknown 44" }, + { do_bad, SIGBUS, 0, "unknown 45" }, + { do_bad, SIGBUS, 0, "unknown 46" }, + { do_bad, SIGBUS, 0, "unknown 47" }, + { do_bad, SIGBUS, 0, "unknown 48" }, + { do_bad, SIGBUS, 0, "unknown 49" }, + { do_bad, SIGBUS, 0, "unknown 50" }, + { do_bad, SIGBUS, 0, "unknown 51" }, + { do_bad, SIGBUS, 0, "implementation fault (lockdown abort)" }, + { do_bad, SIGBUS, 0, "unknown 53" }, + { do_bad, SIGBUS, 0, "unknown 54" }, + { do_bad, SIGBUS, 0, "unknown 55" }, + { do_bad, SIGBUS, 0, "unknown 56" }, + { do_bad, SIGBUS, 0, "unknown 57" }, + { do_bad, SIGBUS, 0, "implementation fault (coprocessor abort)" }, + { do_bad, SIGBUS, 0, "unknown 59" }, + { do_bad, SIGBUS, 0, "unknown 60" }, + { do_bad, SIGBUS, 0, "unknown 61" }, + { do_bad, SIGBUS, 0, "unknown 62" }, + { do_bad, SIGBUS, 0, "unknown 63" }, +}; + +#define ifsr_info fsr_info diff --git a/arch/arm/mm/highmem.c b/arch/arm/mm/highmem.c new file mode 100644 index 00000000..21b9e1bf --- /dev/null +++ b/arch/arm/mm/highmem.c @@ -0,0 +1,137 @@ +/* + * arch/arm/mm/highmem.c -- ARM highmem support + * + * Author: Nicolas Pitre + * Created: september 8, 2008 + * Copyright: Marvell Semiconductors Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/module.h> +#include <linux/highmem.h> +#include <linux/interrupt.h> +#include <asm/fixmap.h> +#include <asm/cacheflush.h> +#include <asm/tlbflush.h> +#include "mm.h" + +void *kmap(struct page *page) +{ + might_sleep(); + if (!PageHighMem(page)) + return page_address(page); + return kmap_high(page); +} +EXPORT_SYMBOL(kmap); + +void kunmap(struct page *page) +{ + BUG_ON(in_interrupt()); + if (!PageHighMem(page)) + return; + kunmap_high(page); +} +EXPORT_SYMBOL(kunmap); + +void *kmap_atomic(struct page *page) +{ + unsigned int idx; + unsigned long vaddr; + void *kmap; + int type; + + pagefault_disable(); + if (!PageHighMem(page)) + return page_address(page); + +#ifdef CONFIG_DEBUG_HIGHMEM + /* + * There is no cache coherency issue when non VIVT, so force the + * dedicated kmap usage for better debugging purposes in that case. + */ + if (!cache_is_vivt()) + kmap = NULL; + else +#endif + kmap = kmap_high_get(page); + if (kmap) + return kmap; + + type = kmap_atomic_idx_push(); + + idx = type + KM_TYPE_NR * smp_processor_id(); + vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); +#ifdef CONFIG_DEBUG_HIGHMEM + /* + * With debugging enabled, kunmap_atomic forces that entry to 0. + * Make sure it was indeed properly unmapped. + */ + BUG_ON(!pte_none(get_top_pte(vaddr))); +#endif + /* + * When debugging is off, kunmap_atomic leaves the previous mapping + * in place, so the contained TLB flush ensures the TLB is updated + * with the new mapping. + */ + set_top_pte(vaddr, mk_pte(page, kmap_prot)); + + return (void *)vaddr; +} +EXPORT_SYMBOL(kmap_atomic); + +void __kunmap_atomic(void *kvaddr) +{ + unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK; + int idx, type; + + if (kvaddr >= (void *)FIXADDR_START) { + type = kmap_atomic_idx(); + idx = type + KM_TYPE_NR * smp_processor_id(); + + if (cache_is_vivt()) + __cpuc_flush_dcache_area((void *)vaddr, PAGE_SIZE); +#ifdef CONFIG_DEBUG_HIGHMEM + BUG_ON(vaddr != __fix_to_virt(FIX_KMAP_BEGIN + idx)); + set_top_pte(vaddr, __pte(0)); +#else + (void) idx; /* to kill a warning */ +#endif + kmap_atomic_idx_pop(); + } else if (vaddr >= PKMAP_ADDR(0) && vaddr < PKMAP_ADDR(LAST_PKMAP)) { + /* this address was obtained through kmap_high_get() */ + kunmap_high(pte_page(pkmap_page_table[PKMAP_NR(vaddr)])); + } + pagefault_enable(); +} +EXPORT_SYMBOL(__kunmap_atomic); + +void *kmap_atomic_pfn(unsigned long pfn) +{ + unsigned long vaddr; + int idx, type; + + pagefault_disable(); + + type = kmap_atomic_idx_push(); + idx = type + KM_TYPE_NR * smp_processor_id(); + vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); +#ifdef CONFIG_DEBUG_HIGHMEM + BUG_ON(!pte_none(get_top_pte(vaddr))); +#endif + set_top_pte(vaddr, pfn_pte(pfn, kmap_prot)); + + return (void *)vaddr; +} + +struct page *kmap_atomic_to_page(const void *ptr) +{ + unsigned long vaddr = (unsigned long)ptr; + + if (vaddr < FIXADDR_START) + return virt_to_page(ptr); + + return pte_page(get_top_pte(vaddr)); +} diff --git a/arch/arm/mm/idmap.c b/arch/arm/mm/idmap.c new file mode 100644 index 00000000..ab88ed4f --- /dev/null +++ b/arch/arm/mm/idmap.c @@ -0,0 +1,114 @@ +#include <linux/kernel.h> + +#include <asm/cputype.h> +#include <asm/idmap.h> +#include <asm/pgalloc.h> +#include <asm/pgtable.h> +#include <asm/sections.h> +#include <asm/system_info.h> + +pgd_t *idmap_pgd; + +#ifdef CONFIG_ARM_LPAE +static void idmap_add_pmd(pud_t *pud, unsigned long addr, unsigned long end, + unsigned long prot) +{ + pmd_t *pmd; + unsigned long next; + + if (pud_none_or_clear_bad(pud) || (pud_val(*pud) & L_PGD_SWAPPER)) { + pmd = pmd_alloc_one(&init_mm, addr); + if (!pmd) { + pr_warning("Failed to allocate identity pmd.\n"); + return; + } + pud_populate(&init_mm, pud, pmd); + pmd += pmd_index(addr); + } else + pmd = pmd_offset(pud, addr); + + do { + next = pmd_addr_end(addr, end); + *pmd = __pmd((addr & PMD_MASK) | prot); + flush_pmd_entry(pmd); + } while (pmd++, addr = next, addr != end); +} +#else /* !CONFIG_ARM_LPAE */ +static void idmap_add_pmd(pud_t *pud, unsigned long addr, unsigned long end, + unsigned long prot) +{ + pmd_t *pmd = pmd_offset(pud, addr); + + addr = (addr & PMD_MASK) | prot; + pmd[0] = __pmd(addr); + addr += SECTION_SIZE; + pmd[1] = __pmd(addr); + flush_pmd_entry(pmd); +} +#endif /* CONFIG_ARM_LPAE */ + +static void idmap_add_pud(pgd_t *pgd, unsigned long addr, unsigned long end, + unsigned long prot) +{ + pud_t *pud = pud_offset(pgd, addr); + unsigned long next; + + do { + next = pud_addr_end(addr, end); + idmap_add_pmd(pud, addr, next, prot); + } while (pud++, addr = next, addr != end); +} + +static void identity_mapping_add(pgd_t *pgd, unsigned long addr, unsigned long end) +{ + unsigned long prot, next; + + prot = PMD_TYPE_SECT | PMD_SECT_AP_WRITE | PMD_SECT_AF; + if (cpu_architecture() <= CPU_ARCH_ARMv5TEJ && !cpu_is_xscale()) + prot |= PMD_BIT4; + + pgd += pgd_index(addr); + do { + next = pgd_addr_end(addr, end); + idmap_add_pud(pgd, addr, next, prot); + } while (pgd++, addr = next, addr != end); +} + +extern char __idmap_text_start[], __idmap_text_end[]; + +static int __init init_static_idmap(void) +{ + phys_addr_t idmap_start, idmap_end; + + idmap_pgd = pgd_alloc(&init_mm); + if (!idmap_pgd) + return -ENOMEM; + + /* Add an identity mapping for the physical address of the section. */ + idmap_start = virt_to_phys((void *)__idmap_text_start); + idmap_end = virt_to_phys((void *)__idmap_text_end); + + pr_info("Setting up static identity map for 0x%llx - 0x%llx\n", + (long long)idmap_start, (long long)idmap_end); + identity_mapping_add(idmap_pgd, idmap_start, idmap_end); + + return 0; +} +early_initcall(init_static_idmap); + +/* + * In order to soft-boot, we need to switch to a 1:1 mapping for the + * cpu_reset functions. This will then ensure that we have predictable + * results when turning off the mmu. + */ +void setup_mm_for_reboot(void) +{ + /* Clean and invalidate L1. */ + flush_cache_all(); + + /* Switch to the identity mapping. */ + cpu_switch_mm(idmap_pgd, &init_mm); + + /* Flush the TLB. */ + local_flush_tlb_all(); +} diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c new file mode 100644 index 00000000..b7f9a479 --- /dev/null +++ b/arch/arm/mm/init.c @@ -0,0 +1,775 @@ +/* + * linux/arch/arm/mm/init.c + * + * Copyright (C) 1995-2005 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/swap.h> +#include <linux/init.h> +#include <linux/bootmem.h> +#include <linux/mman.h> +#include <linux/export.h> +#include <linux/nodemask.h> +#include <linux/initrd.h> +#include <linux/of_fdt.h> +#include <linux/highmem.h> +#include <linux/gfp.h> +#include <linux/memblock.h> +#include <linux/suspend.h> +#include <asm/mach-types.h> +#include <asm/memblock.h> +#include <asm/prom.h> +#include <asm/sections.h> +#include <asm/setup.h> +#include <asm/sizes.h> +#include <asm/tlb.h> +#include <asm/fixmap.h> + +#include <asm/mach/arch.h> +#include <asm/mach/map.h> + +#include "mm.h" + +static unsigned long phys_initrd_start __initdata = 0; +static unsigned long phys_initrd_size __initdata = 0; + +static int __init early_initrd(char *p) +{ + unsigned long start, size; + char *endp; + + start = memparse(p, &endp); + if (*endp == ',') { + size = memparse(endp + 1, NULL); + + phys_initrd_start = start; + phys_initrd_size = size; + } + return 0; +} +early_param("initrd", early_initrd); + +static int __init parse_tag_initrd(const struct tag *tag) +{ + printk(KERN_WARNING "ATAG_INITRD is deprecated; " + "please update your bootloader.\n"); + phys_initrd_start = __virt_to_phys(tag->u.initrd.start); + phys_initrd_size = tag->u.initrd.size; + return 0; +} + +__tagtable(ATAG_INITRD, parse_tag_initrd); + +static int __init parse_tag_initrd2(const struct tag *tag) +{ + phys_initrd_start = tag->u.initrd.start; + phys_initrd_size = tag->u.initrd.size; + return 0; +} + +__tagtable(ATAG_INITRD2, parse_tag_initrd2); + +#ifdef CONFIG_OF_FLATTREE +void __init early_init_dt_setup_initrd_arch(unsigned long start, unsigned long end) +{ + phys_initrd_start = start; + phys_initrd_size = end - start; +} +#endif /* CONFIG_OF_FLATTREE */ + +/* + * This keeps memory configuration data used by a couple memory + * initialization functions, as well as show_mem() for the skipping + * of holes in the memory map. It is populated by arm_add_memory(). + */ +struct meminfo meminfo; + +void show_mem(unsigned int filter) +{ + int free = 0, total = 0, reserved = 0; + int shared = 0, cached = 0, slab = 0, i; + struct meminfo * mi = &meminfo; + + printk("Mem-info:\n"); + show_free_areas(filter); + + for_each_bank (i, mi) { + struct membank *bank = &mi->bank[i]; + unsigned int pfn1, pfn2; + struct page *page, *end; + + pfn1 = bank_pfn_start(bank); + pfn2 = bank_pfn_end(bank); + + page = pfn_to_page(pfn1); + end = pfn_to_page(pfn2 - 1) + 1; + + do { + total++; + if (PageReserved(page)) + reserved++; + else if (PageSwapCache(page)) + cached++; + else if (PageSlab(page)) + slab++; + else if (!page_count(page)) + free++; + else + shared += page_count(page) - 1; + page++; + } while (page < end); + } + + printk("%d pages of RAM\n", total); + printk("%d free pages\n", free); + printk("%d reserved pages\n", reserved); + printk("%d slab pages\n", slab); + printk("%d pages shared\n", shared); + printk("%d pages swap cached\n", cached); +} + +static void __init find_limits(unsigned long *min, unsigned long *max_low, + unsigned long *max_high) +{ + struct meminfo *mi = &meminfo; + int i; + + /* This assumes the meminfo array is properly sorted */ + *min = bank_pfn_start(&mi->bank[0]); + for_each_bank (i, mi) + if (mi->bank[i].highmem) + break; + *max_low = bank_pfn_end(&mi->bank[i - 1]); + *max_high = bank_pfn_end(&mi->bank[mi->nr_banks - 1]); +} + +static void __init arm_bootmem_init(unsigned long start_pfn, + unsigned long end_pfn) +{ + struct memblock_region *reg; + unsigned int boot_pages; + phys_addr_t bitmap; + pg_data_t *pgdat; + /*u32 uboot_start; + u32 uboot_end; */ + /* + * Allocate the bootmem bitmap page. This must be in a region + * of memory which has already been mapped. + */ + boot_pages = bootmem_bootmap_pages(end_pfn - start_pfn); + bitmap = memblock_alloc_base(boot_pages << PAGE_SHIFT, L1_CACHE_BYTES, + __pfn_to_phys(end_pfn)); + + /* + * Initialise the bootmem allocator, handing the + * memory banks over to bootmem. + */ + node_set_online(0); + pgdat = NODE_DATA(0); + init_bootmem_node(pgdat, __phys_to_pfn(bitmap), start_pfn, end_pfn); + + /* Free the lowmem regions from memblock into bootmem. */ + for_each_memblock(memory, reg) { + unsigned long start = memblock_region_memory_base_pfn(reg); + unsigned long end = memblock_region_memory_end_pfn(reg); + + if (end >= end_pfn) + end = end_pfn; + if (start >= end) + break; + + free_bootmem(__pfn_to_phys(start), (end - start) << PAGE_SHIFT); + } + + /* Reserve the lowmem memblock reserved regions in bootmem. */ + for_each_memblock(reserved, reg) { + unsigned long start = memblock_region_reserved_base_pfn(reg); + unsigned long end = memblock_region_reserved_end_pfn(reg); + + if (end >= end_pfn) + end = end_pfn; + if (start >= end) + break; + + reserve_bootmem(__pfn_to_phys(start), + (end - start) << PAGE_SHIFT, BOOTMEM_DEFAULT); + } + + #if 0/*not needed in kernel resume path.*/ + /* + FIXME: This is hard-coded for WM3498 EVB. + Mark u-boot used memory section as nosave. + Thus, there will be no data needs to be restore to this area + so that u-boot code/data will not be overwritten during restoring + hibernation image from u-boot cmd_swsusp.c. + */ + uboot_start = 0x3C00000; //start: 60MB + uboot_end = 0x4100000; //end: 65MB + reserve_bootmem_node(pgdat, uboot_start, uboot_end - uboot_start, + BOOTMEM_DEFAULT);//Lch + register_nosave_region(uboot_start>>PAGE_SHIFT,uboot_end>>PAGE_SHIFT);//Lch + #endif +} + +#ifdef CONFIG_ZONE_DMA + +unsigned long arm_dma_zone_size __read_mostly; +EXPORT_SYMBOL(arm_dma_zone_size); + +/* + * The DMA mask corresponding to the maximum bus address allocatable + * using GFP_DMA. The default here places no restriction on DMA + * allocations. This must be the smallest DMA mask in the system, + * so a successful GFP_DMA allocation will always satisfy this. + */ +u32 arm_dma_limit; + +static void __init arm_adjust_dma_zone(unsigned long *size, unsigned long *hole, + unsigned long dma_size) +{ + if (size[0] <= dma_size) + return; + + size[ZONE_NORMAL] = size[0] - dma_size; + size[ZONE_DMA] = dma_size; + hole[ZONE_NORMAL] = hole[0]; + hole[ZONE_DMA] = 0; +} +#endif + +static void __init arm_bootmem_free(unsigned long min, unsigned long max_low, + unsigned long max_high) +{ + unsigned long zone_size[MAX_NR_ZONES], zhole_size[MAX_NR_ZONES]; + struct memblock_region *reg; + + /* + * initialise the zones. + */ + memset(zone_size, 0, sizeof(zone_size)); + + /* + * The memory size has already been determined. If we need + * to do anything fancy with the allocation of this memory + * to the zones, now is the time to do it. + */ + zone_size[0] = max_low - min; +#ifdef CONFIG_HIGHMEM + zone_size[ZONE_HIGHMEM] = max_high - max_low; +#endif + + /* + * Calculate the size of the holes. + * holes = node_size - sum(bank_sizes) + */ + memcpy(zhole_size, zone_size, sizeof(zhole_size)); + for_each_memblock(memory, reg) { + unsigned long start = memblock_region_memory_base_pfn(reg); + unsigned long end = memblock_region_memory_end_pfn(reg); + + if (start < max_low) { + unsigned long low_end = min(end, max_low); + zhole_size[0] -= low_end - start; + } +#ifdef CONFIG_HIGHMEM + if (end > max_low) { + unsigned long high_start = max(start, max_low); + zhole_size[ZONE_HIGHMEM] -= end - high_start; + } +#endif + } + +#ifdef CONFIG_ZONE_DMA + /* + * Adjust the sizes according to any special requirements for + * this machine type. + */ + if (arm_dma_zone_size) { + arm_adjust_dma_zone(zone_size, zhole_size, + arm_dma_zone_size >> PAGE_SHIFT); + arm_dma_limit = PHYS_OFFSET + arm_dma_zone_size - 1; + } else + arm_dma_limit = 0xffffffff; +#endif + + free_area_init_node(0, zone_size, min, zhole_size); +} + +#ifdef CONFIG_HAVE_ARCH_PFN_VALID +int pfn_valid(unsigned long pfn) +{ + return memblock_is_memory(__pfn_to_phys(pfn)); +} +EXPORT_SYMBOL(pfn_valid); +#endif + +#ifndef CONFIG_SPARSEMEM +static void __init arm_memory_present(void) +{ +} +#else +static void __init arm_memory_present(void) +{ + struct memblock_region *reg; + + for_each_memblock(memory, reg) + memory_present(0, memblock_region_memory_base_pfn(reg), + memblock_region_memory_end_pfn(reg)); +} +#endif + +static bool arm_memblock_steal_permitted = true; + +phys_addr_t __init arm_memblock_steal(phys_addr_t size, phys_addr_t align) +{ + phys_addr_t phys; + + BUG_ON(!arm_memblock_steal_permitted); + + phys = memblock_alloc(size, align); + memblock_free(phys, size); + memblock_remove(phys, size); + + return phys; +} + +void __init arm_memblock_init(struct meminfo *mi, struct machine_desc *mdesc) +{ + int i; + + for (i = 0; i < mi->nr_banks; i++) + memblock_add(mi->bank[i].start, mi->bank[i].size); + + /* Register the kernel text, kernel data and initrd with memblock. */ +#ifdef CONFIG_XIP_KERNEL + memblock_reserve(__pa(_sdata), _end - _sdata); +#else + memblock_reserve(__pa(_stext), _end - _stext); +#endif +#ifdef CONFIG_BLK_DEV_INITRD + if (phys_initrd_size && + !memblock_is_region_memory(phys_initrd_start, phys_initrd_size)) { + pr_err("INITRD: 0x%08lx+0x%08lx is not a memory region - disabling initrd\n", + phys_initrd_start, phys_initrd_size); + phys_initrd_start = phys_initrd_size = 0; + } + if (phys_initrd_size && + memblock_is_region_reserved(phys_initrd_start, phys_initrd_size)) { + pr_err("INITRD: 0x%08lx+0x%08lx overlaps in-use memory region - disabling initrd\n", + phys_initrd_start, phys_initrd_size); + phys_initrd_start = phys_initrd_size = 0; + } + if (phys_initrd_size) { + memblock_reserve(phys_initrd_start, phys_initrd_size); + + /* Now convert initrd to virtual addresses */ + initrd_start = __phys_to_virt(phys_initrd_start); + initrd_end = initrd_start + phys_initrd_size; + } +#endif + + arm_mm_memblock_reserve(); + arm_dt_memblock_reserve(); + + /* reserve any platform specific memblock areas */ + if (mdesc->reserve) + mdesc->reserve(); + + arm_memblock_steal_permitted = false; + memblock_allow_resize(); + memblock_dump_all(); +} + +void __init bootmem_init(void) +{ + unsigned long min, max_low, max_high; + + max_low = max_high = 0; + + find_limits(&min, &max_low, &max_high); + + arm_bootmem_init(min, max_low); + + /* + * Sparsemem tries to allocate bootmem in memory_present(), + * so must be done after the fixed reservations + */ + arm_memory_present(); + + /* + * sparse_init() needs the bootmem allocator up and running. + */ + sparse_init(); + + /* + * Now free the memory - free_area_init_node needs + * the sparse mem_map arrays initialized by sparse_init() + * for memmap_init_zone(), otherwise all PFNs are invalid. + */ + arm_bootmem_free(min, max_low, max_high); + + /* + * This doesn't seem to be used by the Linux memory manager any + * more, but is used by ll_rw_block. If we can get rid of it, we + * also get rid of some of the stuff above as well. + * + * Note: max_low_pfn and max_pfn reflect the number of _pages_ in + * the system, not the maximum PFN. + */ + max_low_pfn = max_low - PHYS_PFN_OFFSET; + max_pfn = max_high - PHYS_PFN_OFFSET; +} + +static inline int free_area(unsigned long pfn, unsigned long end, char *s) +{ + unsigned int pages = 0, size = (end - pfn) << (PAGE_SHIFT - 10); + + for (; pfn < end; pfn++) { + struct page *page = pfn_to_page(pfn); + ClearPageReserved(page); + init_page_count(page); + __free_page(page); + pages++; + } + + if (size && s) + printk(KERN_INFO "Freeing %s memory: %dK\n", s, size); + + return pages; +} + +/* + * Poison init memory with an undefined instruction (ARM) or a branch to an + * undefined instruction (Thumb). + */ +static inline void poison_init_mem(void *s, size_t count) +{ + u32 *p = (u32 *)s; + for (; count != 0; count -= 4) + *p++ = 0xe7fddef0; +} + +static inline void +free_memmap(unsigned long start_pfn, unsigned long end_pfn) +{ + struct page *start_pg, *end_pg; + unsigned long pg, pgend; + + /* + * Convert start_pfn/end_pfn to a struct page pointer. + */ + start_pg = pfn_to_page(start_pfn - 1) + 1; + end_pg = pfn_to_page(end_pfn - 1) + 1; + + /* + * Convert to physical addresses, and + * round start upwards and end downwards. + */ + pg = (unsigned long)PAGE_ALIGN(__pa(start_pg)); + pgend = (unsigned long)__pa(end_pg) & PAGE_MASK; + + /* + * If there are free pages between these, + * free the section of the memmap array. + */ + if (pg < pgend) + free_bootmem(pg, pgend - pg); +} + +/* + * The mem_map array can get very big. Free the unused area of the memory map. + */ +static void __init free_unused_memmap(struct meminfo *mi) +{ + unsigned long bank_start, prev_bank_end = 0; + unsigned int i; + + /* + * This relies on each bank being in address order. + * The banks are sorted previously in bootmem_init(). + */ + for_each_bank(i, mi) { + struct membank *bank = &mi->bank[i]; + + bank_start = bank_pfn_start(bank); + +#ifdef CONFIG_SPARSEMEM + /* + * Take care not to free memmap entries that don't exist + * due to SPARSEMEM sections which aren't present. + */ + bank_start = min(bank_start, + ALIGN(prev_bank_end, PAGES_PER_SECTION)); +#else + /* + * Align down here since the VM subsystem insists that the + * memmap entries are valid from the bank start aligned to + * MAX_ORDER_NR_PAGES. + */ + bank_start = round_down(bank_start, MAX_ORDER_NR_PAGES); +#endif + /* + * If we had a previous bank, and there is a space + * between the current bank and the previous, free it. + */ + if (prev_bank_end && prev_bank_end < bank_start) + free_memmap(prev_bank_end, bank_start); + + /* + * Align up here since the VM subsystem insists that the + * memmap entries are valid from the bank end aligned to + * MAX_ORDER_NR_PAGES. + */ + prev_bank_end = ALIGN(bank_pfn_end(bank), MAX_ORDER_NR_PAGES); + } + +#ifdef CONFIG_SPARSEMEM + if (!IS_ALIGNED(prev_bank_end, PAGES_PER_SECTION)) + free_memmap(prev_bank_end, + ALIGN(prev_bank_end, PAGES_PER_SECTION)); +#endif +} + +static void __init free_highpages(void) +{ +#ifdef CONFIG_HIGHMEM + unsigned long max_low = max_low_pfn + PHYS_PFN_OFFSET; + struct memblock_region *mem, *res; + + /* set highmem page free */ + for_each_memblock(memory, mem) { + unsigned long start = memblock_region_memory_base_pfn(mem); + unsigned long end = memblock_region_memory_end_pfn(mem); + + /* Ignore complete lowmem entries */ + if (end <= max_low) + continue; + + /* Truncate partial highmem entries */ + if (start < max_low) + start = max_low; + + /* Find and exclude any reserved regions */ + for_each_memblock(reserved, res) { + unsigned long res_start, res_end; + + res_start = memblock_region_reserved_base_pfn(res); + res_end = memblock_region_reserved_end_pfn(res); + + if (res_end < start) + continue; + if (res_start < start) + res_start = start; + if (res_start > end) + res_start = end; + if (res_end > end) + res_end = end; + if (res_start != start) + totalhigh_pages += free_area(start, res_start, + NULL); + start = res_end; + if (start == end) + break; + } + + /* And now free anything which remains */ + if (start < end) + totalhigh_pages += free_area(start, end, NULL); + } + totalram_pages += totalhigh_pages; +#endif +} + +/* + * mem_init() marks the free areas in the mem_map and tells us how much + * memory is free. This is done after various parts of the system have + * claimed their memory after the kernel image. + */ +void __init mem_init(void) +{ + unsigned long reserved_pages, free_pages; + struct memblock_region *reg; + int i; +#ifdef CONFIG_HAVE_TCM + /* These pointers are filled in on TCM detection */ + extern u32 dtcm_end; + extern u32 itcm_end; +#endif + + max_mapnr = pfn_to_page(max_pfn + PHYS_PFN_OFFSET) - mem_map; + + /* this will put all unused low memory onto the freelists */ + free_unused_memmap(&meminfo); + + totalram_pages += free_all_bootmem(); + +#ifdef CONFIG_SA1111 + /* now that our DMA memory is actually so designated, we can free it */ + totalram_pages += free_area(PHYS_PFN_OFFSET, + __phys_to_pfn(__pa(swapper_pg_dir)), NULL); +#endif + + free_highpages(); + + reserved_pages = free_pages = 0; + + for_each_bank(i, &meminfo) { + struct membank *bank = &meminfo.bank[i]; + unsigned int pfn1, pfn2; + struct page *page, *end; + + pfn1 = bank_pfn_start(bank); + pfn2 = bank_pfn_end(bank); + + page = pfn_to_page(pfn1); + end = pfn_to_page(pfn2 - 1) + 1; + + do { + if (PageReserved(page)) + reserved_pages++; + else if (!page_count(page)) + free_pages++; + page++; + } while (page < end); + } + + /* + * Since our memory may not be contiguous, calculate the + * real number of pages we have in this system + */ + printk(KERN_INFO "Memory:"); + num_physpages = 0; + for_each_memblock(memory, reg) { + unsigned long pages = memblock_region_memory_end_pfn(reg) - + memblock_region_memory_base_pfn(reg); + num_physpages += pages; + printk(" %ldMB", pages >> (20 - PAGE_SHIFT)); + } + printk(" = %luMB total\n", num_physpages >> (20 - PAGE_SHIFT)); + + printk(KERN_NOTICE "Memory: %luk/%luk available, %luk reserved, %luK highmem\n", + nr_free_pages() << (PAGE_SHIFT-10), + free_pages << (PAGE_SHIFT-10), + reserved_pages << (PAGE_SHIFT-10), + totalhigh_pages << (PAGE_SHIFT-10)); + +#define MLK(b, t) b, t, ((t) - (b)) >> 10 +#define MLM(b, t) b, t, ((t) - (b)) >> 20 +#define MLK_ROUNDUP(b, t) b, t, DIV_ROUND_UP(((t) - (b)), SZ_1K) + + printk(KERN_NOTICE "Virtual kernel memory layout:\n" + " vector : 0x%08lx - 0x%08lx (%4ld kB)\n" +#ifdef CONFIG_HAVE_TCM + " DTCM : 0x%08lx - 0x%08lx (%4ld kB)\n" + " ITCM : 0x%08lx - 0x%08lx (%4ld kB)\n" +#endif + " fixmap : 0x%08lx - 0x%08lx (%4ld kB)\n" + " vmalloc : 0x%08lx - 0x%08lx (%4ld MB)\n" + " lowmem : 0x%08lx - 0x%08lx (%4ld MB)\n" +#ifdef CONFIG_HIGHMEM + " pkmap : 0x%08lx - 0x%08lx (%4ld MB)\n" +#endif +#ifdef CONFIG_MODULES + " modules : 0x%08lx - 0x%08lx (%4ld MB)\n" +#endif + " .text : 0x%p" " - 0x%p" " (%4d kB)\n" + " .init : 0x%p" " - 0x%p" " (%4d kB)\n" + " .data : 0x%p" " - 0x%p" " (%4d kB)\n" + " .bss : 0x%p" " - 0x%p" " (%4d kB)\n", + + MLK(UL(CONFIG_VECTORS_BASE), UL(CONFIG_VECTORS_BASE) + + (PAGE_SIZE)), +#ifdef CONFIG_HAVE_TCM + MLK(DTCM_OFFSET, (unsigned long) dtcm_end), + MLK(ITCM_OFFSET, (unsigned long) itcm_end), +#endif + MLK(FIXADDR_START, FIXADDR_TOP), + MLM(VMALLOC_START, VMALLOC_END), + MLM(PAGE_OFFSET, (unsigned long)high_memory), +#ifdef CONFIG_HIGHMEM + MLM(PKMAP_BASE, (PKMAP_BASE) + (LAST_PKMAP) * + (PAGE_SIZE)), +#endif +#ifdef CONFIG_MODULES + MLM(MODULES_VADDR, MODULES_END), +#endif + + MLK_ROUNDUP(_text, _etext), + MLK_ROUNDUP(__init_begin, __init_end), + MLK_ROUNDUP(_sdata, _edata), + MLK_ROUNDUP(__bss_start, __bss_stop)); + +#undef MLK +#undef MLM +#undef MLK_ROUNDUP + + /* + * Check boundaries twice: Some fundamental inconsistencies can + * be detected at build time already. + */ +#ifdef CONFIG_MMU + BUILD_BUG_ON(TASK_SIZE > MODULES_VADDR); + BUG_ON(TASK_SIZE > MODULES_VADDR); +#endif + +#ifdef CONFIG_HIGHMEM + BUILD_BUG_ON(PKMAP_BASE + LAST_PKMAP * PAGE_SIZE > PAGE_OFFSET); + BUG_ON(PKMAP_BASE + LAST_PKMAP * PAGE_SIZE > PAGE_OFFSET); +#endif + + if (PAGE_SIZE >= 16384 && num_physpages <= 128) { + extern int sysctl_overcommit_memory; + /* + * On a machine this small we won't get + * anywhere without overcommit, so turn + * it on by default. + */ + sysctl_overcommit_memory = OVERCOMMIT_ALWAYS; + } +} + +void free_initmem(void) +{ +#ifdef CONFIG_HAVE_TCM + extern char __tcm_start, __tcm_end; + + poison_init_mem(&__tcm_start, &__tcm_end - &__tcm_start); + totalram_pages += free_area(__phys_to_pfn(__pa(&__tcm_start)), + __phys_to_pfn(__pa(&__tcm_end)), + "TCM link"); +#endif + + poison_init_mem(__init_begin, __init_end - __init_begin); + if (!machine_is_integrator() && !machine_is_cintegrator()) + totalram_pages += free_area(__phys_to_pfn(__pa(__init_begin)), + __phys_to_pfn(__pa(__init_end)), + "init"); +} + +#ifdef CONFIG_BLK_DEV_INITRD + +static int keep_initrd; + +void free_initrd_mem(unsigned long start, unsigned long end) +{ + if (!keep_initrd) { + poison_init_mem((void *)start, PAGE_ALIGN(end) - start); + totalram_pages += free_area(__phys_to_pfn(__pa(start)), + __phys_to_pfn(__pa(end)), + "initrd"); + } +} + +static int __init keepinitrd_setup(char *__unused) +{ + keep_initrd = 1; + return 1; +} + +__setup("keepinitrd", keepinitrd_setup); +#endif diff --git a/arch/arm/mm/iomap.c b/arch/arm/mm/iomap.c new file mode 100644 index 00000000..46142083 --- /dev/null +++ b/arch/arm/mm/iomap.c @@ -0,0 +1,42 @@ +/* + * linux/arch/arm/mm/iomap.c + * + * Map IO port and PCI memory spaces so that {read,write}[bwl] can + * be used to access this memory. + */ +#include <linux/module.h> +#include <linux/pci.h> +#include <linux/ioport.h> +#include <linux/io.h> + +unsigned long vga_base; +EXPORT_SYMBOL(vga_base); + +#ifdef __io +void __iomem *ioport_map(unsigned long port, unsigned int nr) +{ + return __io(port); +} +EXPORT_SYMBOL(ioport_map); + +void ioport_unmap(void __iomem *addr) +{ +} +EXPORT_SYMBOL(ioport_unmap); +#endif + +#ifdef CONFIG_PCI +unsigned long pcibios_min_io = 0x1000; +EXPORT_SYMBOL(pcibios_min_io); + +unsigned long pcibios_min_mem = 0x01000000; +EXPORT_SYMBOL(pcibios_min_mem); + +void pci_iounmap(struct pci_dev *dev, void __iomem *addr) +{ + if ((unsigned long)addr >= VMALLOC_START && + (unsigned long)addr < VMALLOC_END) + iounmap(addr); +} +EXPORT_SYMBOL(pci_iounmap); +#endif diff --git a/arch/arm/mm/ioremap.c b/arch/arm/mm/ioremap.c new file mode 100644 index 00000000..4f55f506 --- /dev/null +++ b/arch/arm/mm/ioremap.c @@ -0,0 +1,385 @@ +/* + * linux/arch/arm/mm/ioremap.c + * + * Re-map IO memory to kernel address space so that we can access it. + * + * (C) Copyright 1995 1996 Linus Torvalds + * + * Hacked for ARM by Phil Blundell <philb@gnu.org> + * Hacked to allow all architectures to build, and various cleanups + * by Russell King + * + * This allows a driver to remap an arbitrary region of bus memory into + * virtual space. One should *only* use readl, writel, memcpy_toio and + * so on with such remapped areas. + * + * Because the ARM only has a 32-bit address space we can't address the + * whole of the (physical) PCI space at once. PCI huge-mode addressing + * allows us to circumvent this restriction by splitting PCI space into + * two 2GB chunks and mapping only one at a time into processor memory. + * We use MMU protection domains to trap any attempt to access the bank + * that is not currently mapped. (This isn't fully implemented yet.) + */ +#include <linux/module.h> +#include <linux/errno.h> +#include <linux/mm.h> +#include <linux/vmalloc.h> +#include <linux/io.h> + +#include <asm/cp15.h> +#include <asm/cputype.h> +#include <asm/cacheflush.h> +#include <asm/mmu_context.h> +#include <asm/pgalloc.h> +#include <asm/tlbflush.h> +#include <asm/sizes.h> +#include <asm/system_info.h> + +#include <asm/mach/map.h> +#include "mm.h" + +int ioremap_page(unsigned long virt, unsigned long phys, + const struct mem_type *mtype) +{ + return ioremap_page_range(virt, virt + PAGE_SIZE, phys, + __pgprot(mtype->prot_pte)); +} +EXPORT_SYMBOL(ioremap_page); + +void __check_kvm_seq(struct mm_struct *mm) +{ + unsigned int seq; + + do { + seq = init_mm.context.kvm_seq; + memcpy(pgd_offset(mm, VMALLOC_START), + pgd_offset_k(VMALLOC_START), + sizeof(pgd_t) * (pgd_index(VMALLOC_END) - + pgd_index(VMALLOC_START))); + mm->context.kvm_seq = seq; + } while (seq != init_mm.context.kvm_seq); +} + +#if !defined(CONFIG_SMP) && !defined(CONFIG_ARM_LPAE) +/* + * Section support is unsafe on SMP - If you iounmap and ioremap a region, + * the other CPUs will not see this change until their next context switch. + * Meanwhile, (eg) if an interrupt comes in on one of those other CPUs + * which requires the new ioremap'd region to be referenced, the CPU will + * reference the _old_ region. + * + * Note that get_vm_area_caller() allocates a guard 4K page, so we need to + * mask the size back to 1MB aligned or we will overflow in the loop below. + */ +static void unmap_area_sections(unsigned long virt, unsigned long size) +{ + unsigned long addr = virt, end = virt + (size & ~(SZ_1M - 1)); + pgd_t *pgd; + pud_t *pud; + pmd_t *pmdp; + + flush_cache_vunmap(addr, end); + pgd = pgd_offset_k(addr); + pud = pud_offset(pgd, addr); + pmdp = pmd_offset(pud, addr); + do { + pmd_t pmd = *pmdp; + + if (!pmd_none(pmd)) { + /* + * Clear the PMD from the page table, and + * increment the kvm sequence so others + * notice this change. + * + * Note: this is still racy on SMP machines. + */ + pmd_clear(pmdp); + init_mm.context.kvm_seq++; + + /* + * Free the page table, if there was one. + */ + if ((pmd_val(pmd) & PMD_TYPE_MASK) == PMD_TYPE_TABLE) + pte_free_kernel(&init_mm, pmd_page_vaddr(pmd)); + } + + addr += PMD_SIZE; + pmdp += 2; + } while (addr < end); + + /* + * Ensure that the active_mm is up to date - we want to + * catch any use-after-iounmap cases. + */ + if (current->active_mm->context.kvm_seq != init_mm.context.kvm_seq) + __check_kvm_seq(current->active_mm); + + flush_tlb_kernel_range(virt, end); +} + +static int +remap_area_sections(unsigned long virt, unsigned long pfn, + size_t size, const struct mem_type *type) +{ + unsigned long addr = virt, end = virt + size; + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + + /* + * Remove and free any PTE-based mapping, and + * sync the current kernel mapping. + */ + unmap_area_sections(virt, size); + + pgd = pgd_offset_k(addr); + pud = pud_offset(pgd, addr); + pmd = pmd_offset(pud, addr); + do { + pmd[0] = __pmd(__pfn_to_phys(pfn) | type->prot_sect); + pfn += SZ_1M >> PAGE_SHIFT; + pmd[1] = __pmd(__pfn_to_phys(pfn) | type->prot_sect); + pfn += SZ_1M >> PAGE_SHIFT; + flush_pmd_entry(pmd); + + addr += PMD_SIZE; + pmd += 2; + } while (addr < end); + + return 0; +} + +static int +remap_area_supersections(unsigned long virt, unsigned long pfn, + size_t size, const struct mem_type *type) +{ + unsigned long addr = virt, end = virt + size; + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + + /* + * Remove and free any PTE-based mapping, and + * sync the current kernel mapping. + */ + unmap_area_sections(virt, size); + + pgd = pgd_offset_k(virt); + pud = pud_offset(pgd, addr); + pmd = pmd_offset(pud, addr); + do { + unsigned long super_pmd_val, i; + + super_pmd_val = __pfn_to_phys(pfn) | type->prot_sect | + PMD_SECT_SUPER; + super_pmd_val |= ((pfn >> (32 - PAGE_SHIFT)) & 0xf) << 20; + + for (i = 0; i < 8; i++) { + pmd[0] = __pmd(super_pmd_val); + pmd[1] = __pmd(super_pmd_val); + flush_pmd_entry(pmd); + + addr += PMD_SIZE; + pmd += 2; + } + + pfn += SUPERSECTION_SIZE >> PAGE_SHIFT; + } while (addr < end); + + return 0; +} +#endif + +void __iomem * __arm_ioremap_pfn_caller(unsigned long pfn, + unsigned long offset, size_t size, unsigned int mtype, void *caller) +{ + const struct mem_type *type; + int err; + unsigned long addr; + struct vm_struct * area; + +#ifndef CONFIG_ARM_LPAE + /* + * High mappings must be supersection aligned + */ + if (pfn >= 0x100000 && (__pfn_to_phys(pfn) & ~SUPERSECTION_MASK)) + return NULL; +#endif + + type = get_mem_type(mtype); + if (!type) + return NULL; + + /* + * Page align the mapping size, taking account of any offset. + */ + size = PAGE_ALIGN(offset + size); + + /* + * Try to reuse one of the static mapping whenever possible. + */ + read_lock(&vmlist_lock); + for (area = vmlist; area; area = area->next) { + if (!size || (sizeof(phys_addr_t) == 4 && pfn >= 0x100000)) + break; + if (!(area->flags & VM_ARM_STATIC_MAPPING)) + continue; + if ((area->flags & VM_ARM_MTYPE_MASK) != VM_ARM_MTYPE(mtype)) + continue; + if (__phys_to_pfn(area->phys_addr) > pfn || + __pfn_to_phys(pfn) + size-1 > area->phys_addr + area->size-1) + continue; + /* we can drop the lock here as we know *area is static */ + read_unlock(&vmlist_lock); + addr = (unsigned long)area->addr; + addr += __pfn_to_phys(pfn) - area->phys_addr; + return (void __iomem *) (offset + addr); + } + read_unlock(&vmlist_lock); + + /* + * Don't allow RAM to be mapped - this causes problems with ARMv6+ + */ + if (WARN_ON(pfn_valid(pfn))) + return NULL; + + area = get_vm_area_caller(size, VM_IOREMAP, caller); + if (!area) + return NULL; + addr = (unsigned long)area->addr; + +#if !defined(CONFIG_SMP) && !defined(CONFIG_ARM_LPAE) + if (DOMAIN_IO == 0 && + (((cpu_architecture() >= CPU_ARCH_ARMv6) && (get_cr() & CR_XP)) || + cpu_is_xsc3()) && pfn >= 0x100000 && + !((__pfn_to_phys(pfn) | size | addr) & ~SUPERSECTION_MASK)) { + area->flags |= VM_ARM_SECTION_MAPPING; + err = remap_area_supersections(addr, pfn, size, type); + } else if (!((__pfn_to_phys(pfn) | size | addr) & ~PMD_MASK)) { + area->flags |= VM_ARM_SECTION_MAPPING; + err = remap_area_sections(addr, pfn, size, type); + } else +#endif + err = ioremap_page_range(addr, addr + size, __pfn_to_phys(pfn), + __pgprot(type->prot_pte)); + + if (err) { + vunmap((void *)addr); + return NULL; + } + + flush_cache_vmap(addr, addr + size); + return (void __iomem *) (offset + addr); +} + +void __iomem *__arm_ioremap_caller(unsigned long phys_addr, size_t size, + unsigned int mtype, void *caller) +{ + unsigned long last_addr; + unsigned long offset = phys_addr & ~PAGE_MASK; + unsigned long pfn = __phys_to_pfn(phys_addr); + + /* + * Don't allow wraparound or zero size + */ + last_addr = phys_addr + size - 1; + if (!size || last_addr < phys_addr) + return NULL; + + return __arm_ioremap_pfn_caller(pfn, offset, size, mtype, + caller); +} + +/* + * Remap an arbitrary physical address space into the kernel virtual + * address space. Needed when the kernel wants to access high addresses + * directly. + * + * NOTE! We need to allow non-page-aligned mappings too: we will obviously + * have to convert them into an offset in a page-aligned mapping, but the + * caller shouldn't need to know that small detail. + */ +void __iomem * +__arm_ioremap_pfn(unsigned long pfn, unsigned long offset, size_t size, + unsigned int mtype) +{ + return __arm_ioremap_pfn_caller(pfn, offset, size, mtype, + __builtin_return_address(0)); +} +EXPORT_SYMBOL(__arm_ioremap_pfn); + +void __iomem * (*arch_ioremap_caller)(unsigned long, size_t, + unsigned int, void *) = + __arm_ioremap_caller; + +void __iomem * +__arm_ioremap(unsigned long phys_addr, size_t size, unsigned int mtype) +{ + return arch_ioremap_caller(phys_addr, size, mtype, + __builtin_return_address(0)); +} +EXPORT_SYMBOL(__arm_ioremap); + +/* + * Remap an arbitrary physical address space into the kernel virtual + * address space as memory. Needed when the kernel wants to execute + * code in external memory. This is needed for reprogramming source + * clocks that would affect normal memory for example. Please see + * CONFIG_GENERIC_ALLOCATOR for allocating external memory. + */ +void __iomem * +__arm_ioremap_exec(unsigned long phys_addr, size_t size, bool cached) +{ + unsigned int mtype; + + if (cached) + mtype = MT_MEMORY; + else + mtype = MT_MEMORY_NONCACHED; + + return __arm_ioremap_caller(phys_addr, size, mtype, + __builtin_return_address(0)); +} + +void __iounmap(volatile void __iomem *io_addr) +{ + void *addr = (void *)(PAGE_MASK & (unsigned long)io_addr); + struct vm_struct *vm; + + read_lock(&vmlist_lock); + for (vm = vmlist; vm; vm = vm->next) { + if (vm->addr > addr) + break; + if (!(vm->flags & VM_IOREMAP)) + continue; + /* If this is a static mapping we must leave it alone */ + if ((vm->flags & VM_ARM_STATIC_MAPPING) && + (vm->addr <= addr) && (vm->addr + vm->size > addr)) { + read_unlock(&vmlist_lock); + return; + } +#if !defined(CONFIG_SMP) && !defined(CONFIG_ARM_LPAE) + /* + * If this is a section based mapping we need to handle it + * specially as the VM subsystem does not know how to handle + * such a beast. + */ + if ((vm->addr == addr) && + (vm->flags & VM_ARM_SECTION_MAPPING)) { + unmap_area_sections((unsigned long)vm->addr, vm->size); + break; + } +#endif + } + read_unlock(&vmlist_lock); + + vunmap(addr); +} + +void (*arch_iounmap)(volatile void __iomem *) = __iounmap; + +void __arm_iounmap(volatile void __iomem *io_addr) +{ + arch_iounmap(io_addr); +} +EXPORT_SYMBOL(__arm_iounmap); diff --git a/arch/arm/mm/mm.h b/arch/arm/mm/mm.h new file mode 100644 index 00000000..27f4a619 --- /dev/null +++ b/arch/arm/mm/mm.h @@ -0,0 +1,71 @@ +#ifdef CONFIG_MMU + +/* the upper-most page table pointer */ +extern pmd_t *top_pmd; + +/* + * 0xffff8000 to 0xffffffff is reserved for any ARM architecture + * specific hacks for copying pages efficiently, while 0xffff4000 + * is reserved for VIPT aliasing flushing by generic code. + * + * Note that we don't allow VIPT aliasing caches with SMP. + */ +#define COPYPAGE_MINICACHE 0xffff8000 +#define COPYPAGE_V6_FROM 0xffff8000 +#define COPYPAGE_V6_TO 0xffffc000 +/* PFN alias flushing, for VIPT caches */ +#define FLUSH_ALIAS_START 0xffff4000 + +static inline void set_top_pte(unsigned long va, pte_t pte) +{ + pte_t *ptep = pte_offset_kernel(top_pmd, va); + set_pte_ext(ptep, pte, 0); + local_flush_tlb_kernel_page(va); +} + +static inline pte_t get_top_pte(unsigned long va) +{ + pte_t *ptep = pte_offset_kernel(top_pmd, va); + return *ptep; +} + +static inline pmd_t *pmd_off_k(unsigned long virt) +{ + return pmd_offset(pud_offset(pgd_offset_k(virt), virt), virt); +} + +struct mem_type { + pteval_t prot_pte; + pmdval_t prot_l1; + pmdval_t prot_sect; + unsigned int domain; +}; + +const struct mem_type *get_mem_type(unsigned int type); + +extern void __flush_dcache_page(struct address_space *mapping, struct page *page); + +/* + * ARM specific vm_struct->flags bits. + */ + +/* (super)section-mapped I/O regions used by ioremap()/iounmap() */ +#define VM_ARM_SECTION_MAPPING 0x80000000 + +/* permanent static mappings from iotable_init() */ +#define VM_ARM_STATIC_MAPPING 0x40000000 + +/* mapping type (attributes) for permanent static mappings */ +#define VM_ARM_MTYPE(mt) ((mt) << 20) +#define VM_ARM_MTYPE_MASK (0x1f << 20) + +#endif + +#ifdef CONFIG_ZONE_DMA +extern u32 arm_dma_limit; +#else +#define arm_dma_limit ((u32)~0) +#endif + +void __init bootmem_init(void); +void arm_mm_memblock_reserve(void); diff --git a/arch/arm/mm/mmap.c b/arch/arm/mm/mmap.c new file mode 100644 index 00000000..ce8cb197 --- /dev/null +++ b/arch/arm/mm/mmap.c @@ -0,0 +1,322 @@ +/* + * linux/arch/arm/mm/mmap.c + */ +#include <linux/fs.h> +#include <linux/mm.h> +#include <linux/mman.h> +#include <linux/shm.h> +#include <linux/sched.h> +#include <linux/io.h> +#include <linux/personality.h> +#include <linux/random.h> +#include <asm/cachetype.h> + +static inline unsigned long COLOUR_ALIGN_DOWN(unsigned long addr, + unsigned long pgoff) +{ + unsigned long base = addr & ~(SHMLBA-1); + unsigned long off = (pgoff << PAGE_SHIFT) & (SHMLBA-1); + + if (base + off <= addr) + return base + off; + + return base - off; +} + +#define COLOUR_ALIGN(addr,pgoff) \ + ((((addr)+SHMLBA-1)&~(SHMLBA-1)) + \ + (((pgoff)<<PAGE_SHIFT) & (SHMLBA-1))) + +/* gap between mmap and stack */ +#define MIN_GAP (128*1024*1024UL) +#define MAX_GAP ((TASK_SIZE)/6*5) + +static int mmap_is_legacy(void) +{ + if (current->personality & ADDR_COMPAT_LAYOUT) + return 1; + + if (rlimit(RLIMIT_STACK) == RLIM_INFINITY) + return 1; + + return sysctl_legacy_va_layout; +} + +static unsigned long mmap_base(unsigned long rnd) +{ + unsigned long gap = rlimit(RLIMIT_STACK); + + if (gap < MIN_GAP) + gap = MIN_GAP; + else if (gap > MAX_GAP) + gap = MAX_GAP; + + return PAGE_ALIGN(TASK_SIZE - gap - rnd); +} + +/* + * We need to ensure that shared mappings are correctly aligned to + * avoid aliasing issues with VIPT caches. We need to ensure that + * a specific page of an object is always mapped at a multiple of + * SHMLBA bytes. + * + * We unconditionally provide this function for all cases, however + * in the VIVT case, we optimise out the alignment rules. + */ +unsigned long +arch_get_unmapped_area(struct file *filp, unsigned long addr, + unsigned long len, unsigned long pgoff, unsigned long flags) +{ + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; + unsigned long start_addr; + int do_align = 0; + int aliasing = cache_is_vipt_aliasing(); + + /* + * We only need to do colour alignment if either the I or D + * caches alias. + */ + if (aliasing) + do_align = filp || (flags & MAP_SHARED); + + /* + * We enforce the MAP_FIXED case. + */ + if (flags & MAP_FIXED) { + if (aliasing && flags & MAP_SHARED && + (addr - (pgoff << PAGE_SHIFT)) & (SHMLBA - 1)) + return -EINVAL; + return addr; + } + + if (len > TASK_SIZE) + return -ENOMEM; + + if (addr) { + if (do_align) + addr = COLOUR_ALIGN(addr, pgoff); + else + addr = PAGE_ALIGN(addr); + + vma = find_vma(mm, addr); + if (TASK_SIZE - len >= addr && + (!vma || addr + len <= vma->vm_start)) + return addr; + } + if (len > mm->cached_hole_size) { + start_addr = addr = mm->free_area_cache; + } else { + start_addr = addr = mm->mmap_base; + mm->cached_hole_size = 0; + } + +full_search: + if (do_align) + addr = COLOUR_ALIGN(addr, pgoff); + else + addr = PAGE_ALIGN(addr); + + for (vma = find_vma(mm, addr); ; vma = vma->vm_next) { + /* At this point: (!vma || addr < vma->vm_end). */ + if (TASK_SIZE - len < addr) { + /* + * Start a new search - just in case we missed + * some holes. + */ + if (start_addr != TASK_UNMAPPED_BASE) { + start_addr = addr = TASK_UNMAPPED_BASE; + mm->cached_hole_size = 0; + goto full_search; + } + return -ENOMEM; + } + if (!vma || addr + len <= vma->vm_start) { + /* + * Remember the place where we stopped the search: + */ + mm->free_area_cache = addr + len; + return addr; + } + if (addr + mm->cached_hole_size < vma->vm_start) + mm->cached_hole_size = vma->vm_start - addr; + addr = vma->vm_end; + if (do_align) + addr = COLOUR_ALIGN(addr, pgoff); + } +} + +unsigned long +arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, + const unsigned long len, const unsigned long pgoff, + const unsigned long flags) +{ + struct vm_area_struct *vma; + struct mm_struct *mm = current->mm; + unsigned long addr = addr0; + int do_align = 0; + int aliasing = cache_is_vipt_aliasing(); + + /* + * We only need to do colour alignment if either the I or D + * caches alias. + */ + if (aliasing) + do_align = filp || (flags & MAP_SHARED); + + /* requested length too big for entire address space */ + if (len > TASK_SIZE) + return -ENOMEM; + + if (flags & MAP_FIXED) { + if (aliasing && flags & MAP_SHARED && + (addr - (pgoff << PAGE_SHIFT)) & (SHMLBA - 1)) + return -EINVAL; + return addr; + } + + /* requesting a specific address */ + if (addr) { + if (do_align) + addr = COLOUR_ALIGN(addr, pgoff); + else + addr = PAGE_ALIGN(addr); + vma = find_vma(mm, addr); + if (TASK_SIZE - len >= addr && + (!vma || addr + len <= vma->vm_start)) + return addr; + } + + /* check if free_area_cache is useful for us */ + if (len <= mm->cached_hole_size) { + mm->cached_hole_size = 0; + mm->free_area_cache = mm->mmap_base; + } + + /* either no address requested or can't fit in requested address hole */ + addr = mm->free_area_cache; + if (do_align) { + unsigned long base = COLOUR_ALIGN_DOWN(addr - len, pgoff); + addr = base + len; + } + + /* make sure it can fit in the remaining address space */ + if (addr > len) { + vma = find_vma(mm, addr-len); + if (!vma || addr <= vma->vm_start) + /* remember the address as a hint for next time */ + return (mm->free_area_cache = addr-len); + } + + if (mm->mmap_base < len) + goto bottomup; + + addr = mm->mmap_base - len; + if (do_align) + addr = COLOUR_ALIGN_DOWN(addr, pgoff); + + do { + /* + * Lookup failure means no vma is above this address, + * else if new region fits below vma->vm_start, + * return with success: + */ + vma = find_vma(mm, addr); + if (!vma || addr+len <= vma->vm_start) + /* remember the address as a hint for next time */ + return (mm->free_area_cache = addr); + + /* remember the largest hole we saw so far */ + if (addr + mm->cached_hole_size < vma->vm_start) + mm->cached_hole_size = vma->vm_start - addr; + + /* try just below the current vma->vm_start */ + addr = vma->vm_start - len; + if (do_align) + addr = COLOUR_ALIGN_DOWN(addr, pgoff); + } while (len < vma->vm_start); + +bottomup: + /* + * A failed mmap() very likely causes application failure, + * so fall back to the bottom-up function here. This scenario + * can happen with large stack limits and large mmap() + * allocations. + */ + mm->cached_hole_size = ~0UL; + mm->free_area_cache = TASK_UNMAPPED_BASE; + addr = arch_get_unmapped_area(filp, addr0, len, pgoff, flags); + /* + * Restore the topdown base: + */ + mm->free_area_cache = mm->mmap_base; + mm->cached_hole_size = ~0UL; + + return addr; +} + +void arch_pick_mmap_layout(struct mm_struct *mm) +{ + unsigned long random_factor = 0UL; + + /* 8 bits of randomness in 20 address space bits */ + if ((current->flags & PF_RANDOMIZE) && + !(current->personality & ADDR_NO_RANDOMIZE)) + random_factor = (get_random_int() % (1 << 8)) << PAGE_SHIFT; + + if (mmap_is_legacy()) { + mm->mmap_base = TASK_UNMAPPED_BASE + random_factor; + mm->get_unmapped_area = arch_get_unmapped_area; + mm->unmap_area = arch_unmap_area; + } else { + mm->mmap_base = mmap_base(random_factor); + mm->get_unmapped_area = arch_get_unmapped_area_topdown; + mm->unmap_area = arch_unmap_area_topdown; + } +} + +/* + * You really shouldn't be using read() or write() on /dev/mem. This + * might go away in the future. + */ +int valid_phys_addr_range(unsigned long addr, size_t size) +{ + if (addr < PHYS_OFFSET) + return 0; + if (addr + size > __pa(high_memory - 1) + 1) + return 0; + + return 1; +} + +/* + * We don't use supersection mappings for mmap() on /dev/mem, which + * means that we can't map the memory area above the 4G barrier into + * userspace. + */ +int valid_mmap_phys_addr_range(unsigned long pfn, size_t size) +{ + return !(pfn + (size >> PAGE_SHIFT) > 0x00100000); +} + +#ifdef CONFIG_STRICT_DEVMEM + +#include <linux/ioport.h> + +/* + * devmem_is_allowed() checks to see if /dev/mem access to a certain + * address is valid. The argument is a physical page number. + * We mimic x86 here by disallowing access to system RAM as well as + * device-exclusive MMIO regions. This effectively disable read()/write() + * on /dev/mem. + */ +int devmem_is_allowed(unsigned long pfn) +{ + if (iomem_is_exclusive(pfn << PAGE_SHIFT)) + return 0; + if (!page_is_ram(pfn)) + return 1; + return 0; +} + +#endif diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c new file mode 100644 index 00000000..2dacda29 --- /dev/null +++ b/arch/arm/mm/mmu.c @@ -0,0 +1,1250 @@ +/* + * linux/arch/arm/mm/mmu.c + * + * Copyright (C) 1995-2005 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/init.h> +#include <linux/mman.h> +#include <linux/nodemask.h> +#include <linux/memblock.h> +#include <linux/fs.h> +#include <linux/vmalloc.h> + +#include <asm/cp15.h> +#include <asm/cputype.h> +#include <asm/sections.h> +#include <asm/cachetype.h> +#include <asm/setup.h> +#include <asm/sizes.h> +#include <asm/smp_plat.h> +#include <asm/tlb.h> +#include <asm/highmem.h> +#include <asm/system_info.h> +#include <asm/traps.h> + +#include <asm/mach/arch.h> +#include <asm/mach/map.h> + +#include "mm.h" + +/* + * empty_zero_page is a special page that is used for + * zero-initialized data and COW. + */ +struct page *empty_zero_page; +EXPORT_SYMBOL(empty_zero_page); + +/* + * The pmd table for the upper-most set of pages. + */ +pmd_t *top_pmd; + +#define CPOLICY_UNCACHED 0 +#define CPOLICY_BUFFERED 1 +#define CPOLICY_WRITETHROUGH 2 +#define CPOLICY_WRITEBACK 3 +#define CPOLICY_WRITEALLOC 4 + +static unsigned int cachepolicy __initdata = CPOLICY_WRITEBACK; +static unsigned int ecc_mask __initdata = 0; +pgprot_t pgprot_user; +pgprot_t pgprot_kernel; + +EXPORT_SYMBOL(pgprot_user); +EXPORT_SYMBOL(pgprot_kernel); + +struct cachepolicy { + const char policy[16]; + unsigned int cr_mask; + pmdval_t pmd; + pteval_t pte; +}; + +static struct cachepolicy cache_policies[] __initdata = { + { + .policy = "uncached", + .cr_mask = CR_W|CR_C, + .pmd = PMD_SECT_UNCACHED, + .pte = L_PTE_MT_UNCACHED, + }, { + .policy = "buffered", + .cr_mask = CR_C, + .pmd = PMD_SECT_BUFFERED, + .pte = L_PTE_MT_BUFFERABLE, + }, { + .policy = "writethrough", + .cr_mask = 0, + .pmd = PMD_SECT_WT, + .pte = L_PTE_MT_WRITETHROUGH, + }, { + .policy = "writeback", + .cr_mask = 0, + .pmd = PMD_SECT_WB, + .pte = L_PTE_MT_WRITEBACK, + }, { + .policy = "writealloc", + .cr_mask = 0, + .pmd = PMD_SECT_WBWA, + .pte = L_PTE_MT_WRITEALLOC, + } +}; + +/* + * These are useful for identifying cache coherency + * problems by allowing the cache or the cache and + * writebuffer to be turned off. (Note: the write + * buffer should not be on and the cache off). + */ +static int __init early_cachepolicy(char *p) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(cache_policies); i++) { + int len = strlen(cache_policies[i].policy); + + if (memcmp(p, cache_policies[i].policy, len) == 0) { + cachepolicy = i; + cr_alignment &= ~cache_policies[i].cr_mask; + cr_no_alignment &= ~cache_policies[i].cr_mask; + break; + } + } + if (i == ARRAY_SIZE(cache_policies)) + printk(KERN_ERR "ERROR: unknown or unsupported cache policy\n"); + /* + * This restriction is partly to do with the way we boot; it is + * unpredictable to have memory mapped using two different sets of + * memory attributes (shared, type, and cache attribs). We can not + * change these attributes once the initial assembly has setup the + * page tables. + */ + if (cpu_architecture() >= CPU_ARCH_ARMv6) { + printk(KERN_WARNING "Only cachepolicy=writeback supported on ARMv6 and later\n"); + cachepolicy = CPOLICY_WRITEBACK; + } + flush_cache_all(); + set_cr(cr_alignment); + return 0; +} +early_param("cachepolicy", early_cachepolicy); + +static int __init early_nocache(char *__unused) +{ + char *p = "buffered"; + printk(KERN_WARNING "nocache is deprecated; use cachepolicy=%s\n", p); + early_cachepolicy(p); + return 0; +} +early_param("nocache", early_nocache); + +static int __init early_nowrite(char *__unused) +{ + char *p = "uncached"; + printk(KERN_WARNING "nowb is deprecated; use cachepolicy=%s\n", p); + early_cachepolicy(p); + return 0; +} +early_param("nowb", early_nowrite); + +#ifndef CONFIG_ARM_LPAE +static int __init early_ecc(char *p) +{ + if (memcmp(p, "on", 2) == 0) + ecc_mask = PMD_PROTECTION; + else if (memcmp(p, "off", 3) == 0) + ecc_mask = 0; + return 0; +} +early_param("ecc", early_ecc); +#endif + +static int __init noalign_setup(char *__unused) +{ + cr_alignment &= ~CR_A; + cr_no_alignment &= ~CR_A; + set_cr(cr_alignment); + return 1; +} +__setup("noalign", noalign_setup); + +#ifndef CONFIG_SMP +void adjust_cr(unsigned long mask, unsigned long set) +{ + unsigned long flags; + + mask &= ~CR_A; + + set &= mask; + + local_irq_save(flags); + + cr_no_alignment = (cr_no_alignment & ~mask) | set; + cr_alignment = (cr_alignment & ~mask) | set; + + set_cr((get_cr() & ~mask) | set); + + local_irq_restore(flags); +} +#endif + +#define PROT_PTE_DEVICE L_PTE_PRESENT|L_PTE_YOUNG|L_PTE_DIRTY|L_PTE_XN +#define PROT_SECT_DEVICE PMD_TYPE_SECT|PMD_SECT_AP_WRITE + +static struct mem_type mem_types[] = { + [MT_DEVICE] = { /* Strongly ordered / ARMv6 shared device */ + .prot_pte = PROT_PTE_DEVICE | L_PTE_MT_DEV_SHARED | + L_PTE_SHARED, + .prot_l1 = PMD_TYPE_TABLE, + .prot_sect = PROT_SECT_DEVICE | PMD_SECT_S, + .domain = DOMAIN_IO, + }, + [MT_DEVICE_NONSHARED] = { /* ARMv6 non-shared device */ + .prot_pte = PROT_PTE_DEVICE | L_PTE_MT_DEV_NONSHARED, + .prot_l1 = PMD_TYPE_TABLE, + .prot_sect = PROT_SECT_DEVICE, + .domain = DOMAIN_IO, + }, + [MT_DEVICE_CACHED] = { /* ioremap_cached */ + .prot_pte = PROT_PTE_DEVICE | L_PTE_MT_DEV_CACHED, + .prot_l1 = PMD_TYPE_TABLE, + .prot_sect = PROT_SECT_DEVICE | PMD_SECT_WB, + .domain = DOMAIN_IO, + }, + [MT_DEVICE_WC] = { /* ioremap_wc */ + .prot_pte = PROT_PTE_DEVICE | L_PTE_MT_DEV_WC, + .prot_l1 = PMD_TYPE_TABLE, + .prot_sect = PROT_SECT_DEVICE, + .domain = DOMAIN_IO, + }, + [MT_UNCACHED] = { + .prot_pte = PROT_PTE_DEVICE, + .prot_l1 = PMD_TYPE_TABLE, + .prot_sect = PMD_TYPE_SECT | PMD_SECT_XN, + .domain = DOMAIN_IO, + }, + [MT_CACHECLEAN] = { + .prot_sect = PMD_TYPE_SECT | PMD_SECT_XN, + .domain = DOMAIN_KERNEL, + }, +#ifndef CONFIG_ARM_LPAE + [MT_MINICLEAN] = { + .prot_sect = PMD_TYPE_SECT | PMD_SECT_XN | PMD_SECT_MINICACHE, + .domain = DOMAIN_KERNEL, + }, +#endif + [MT_LOW_VECTORS] = { + .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY | + L_PTE_RDONLY, + .prot_l1 = PMD_TYPE_TABLE, + .domain = DOMAIN_USER, + }, + [MT_HIGH_VECTORS] = { + .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY | + L_PTE_USER | L_PTE_RDONLY, + .prot_l1 = PMD_TYPE_TABLE, + .domain = DOMAIN_USER, + }, + [MT_MEMORY] = { + .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY, + .prot_l1 = PMD_TYPE_TABLE, + .prot_sect = PMD_TYPE_SECT | PMD_SECT_AP_WRITE, + .domain = DOMAIN_KERNEL, + }, + [MT_ROM] = { + .prot_sect = PMD_TYPE_SECT, + .domain = DOMAIN_KERNEL, + }, + [MT_MEMORY_NONCACHED] = { + .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY | + L_PTE_MT_BUFFERABLE, + .prot_l1 = PMD_TYPE_TABLE, + .prot_sect = PMD_TYPE_SECT | PMD_SECT_AP_WRITE, + .domain = DOMAIN_KERNEL, + }, + [MT_MEMORY_DTCM] = { + .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY | + L_PTE_XN, + .prot_l1 = PMD_TYPE_TABLE, + .prot_sect = PMD_TYPE_SECT | PMD_SECT_XN, + .domain = DOMAIN_KERNEL, + }, + [MT_MEMORY_ITCM] = { + .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY, + .prot_l1 = PMD_TYPE_TABLE, + .domain = DOMAIN_KERNEL, + }, + [MT_MEMORY_SO] = { + .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY | + L_PTE_MT_UNCACHED, + .prot_l1 = PMD_TYPE_TABLE, + .prot_sect = PMD_TYPE_SECT | PMD_SECT_AP_WRITE | PMD_SECT_S | + PMD_SECT_UNCACHED | PMD_SECT_XN, + .domain = DOMAIN_KERNEL, + }, +}; + +const struct mem_type *get_mem_type(unsigned int type) +{ + return type < ARRAY_SIZE(mem_types) ? &mem_types[type] : NULL; +} +EXPORT_SYMBOL(get_mem_type); + +/* + * Adjust the PMD section entries according to the CPU in use. + */ +static void __init build_mem_type_table(void) +{ + struct cachepolicy *cp; + unsigned int cr = get_cr(); + pteval_t user_pgprot, kern_pgprot, vecs_pgprot; + int cpu_arch = cpu_architecture(); + int i; + + if (cpu_arch < CPU_ARCH_ARMv6) { +#if defined(CONFIG_CPU_DCACHE_DISABLE) + if (cachepolicy > CPOLICY_BUFFERED) + cachepolicy = CPOLICY_BUFFERED; +#elif defined(CONFIG_CPU_DCACHE_WRITETHROUGH) + if (cachepolicy > CPOLICY_WRITETHROUGH) + cachepolicy = CPOLICY_WRITETHROUGH; +#endif + } + if (cpu_arch < CPU_ARCH_ARMv5) { + if (cachepolicy >= CPOLICY_WRITEALLOC) + cachepolicy = CPOLICY_WRITEBACK; + ecc_mask = 0; + } + if (is_smp()) +#if defined(CONFIG_ARCH_WMT_SMP_CACHEPOLICY_WRITEBACK) + cachepolicy = CPOLICY_WRITEBACK; +#else + cachepolicy = CPOLICY_WRITEALLOC; +#endif + + /* + * Strip out features not present on earlier architectures. + * Pre-ARMv5 CPUs don't have TEX bits. Pre-ARMv6 CPUs or those + * without extended page tables don't have the 'Shared' bit. + */ + if (cpu_arch < CPU_ARCH_ARMv5) + for (i = 0; i < ARRAY_SIZE(mem_types); i++) + mem_types[i].prot_sect &= ~PMD_SECT_TEX(7); + if ((cpu_arch < CPU_ARCH_ARMv6 || !(cr & CR_XP)) && !cpu_is_xsc3()) + for (i = 0; i < ARRAY_SIZE(mem_types); i++) + mem_types[i].prot_sect &= ~PMD_SECT_S; + + /* + * ARMv5 and lower, bit 4 must be set for page tables (was: cache + * "update-able on write" bit on ARM610). However, Xscale and + * Xscale3 require this bit to be cleared. + */ + if (cpu_is_xscale() || cpu_is_xsc3()) { + for (i = 0; i < ARRAY_SIZE(mem_types); i++) { + mem_types[i].prot_sect &= ~PMD_BIT4; + mem_types[i].prot_l1 &= ~PMD_BIT4; + } + } else if (cpu_arch < CPU_ARCH_ARMv6) { + for (i = 0; i < ARRAY_SIZE(mem_types); i++) { + if (mem_types[i].prot_l1) + mem_types[i].prot_l1 |= PMD_BIT4; + if (mem_types[i].prot_sect) + mem_types[i].prot_sect |= PMD_BIT4; + } + } + + /* + * Mark the device areas according to the CPU/architecture. + */ + if (cpu_is_xsc3() || (cpu_arch >= CPU_ARCH_ARMv6 && (cr & CR_XP))) { + if (!cpu_is_xsc3()) { + /* + * Mark device regions on ARMv6+ as execute-never + * to prevent speculative instruction fetches. + */ + mem_types[MT_DEVICE].prot_sect |= PMD_SECT_XN; + mem_types[MT_DEVICE_NONSHARED].prot_sect |= PMD_SECT_XN; + mem_types[MT_DEVICE_CACHED].prot_sect |= PMD_SECT_XN; + mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_XN; + } + if (cpu_arch >= CPU_ARCH_ARMv7 && (cr & CR_TRE)) { + /* + * For ARMv7 with TEX remapping, + * - shared device is SXCB=1100 + * - nonshared device is SXCB=0100 + * - write combine device mem is SXCB=0001 + * (Uncached Normal memory) + */ + mem_types[MT_DEVICE].prot_sect |= PMD_SECT_TEX(1); + mem_types[MT_DEVICE_NONSHARED].prot_sect |= PMD_SECT_TEX(1); + mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_BUFFERABLE; + } else if (cpu_is_xsc3()) { + /* + * For Xscale3, + * - shared device is TEXCB=00101 + * - nonshared device is TEXCB=01000 + * - write combine device mem is TEXCB=00100 + * (Inner/Outer Uncacheable in xsc3 parlance) + */ + mem_types[MT_DEVICE].prot_sect |= PMD_SECT_TEX(1) | PMD_SECT_BUFFERED; + mem_types[MT_DEVICE_NONSHARED].prot_sect |= PMD_SECT_TEX(2); + mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_TEX(1); + } else { + /* + * For ARMv6 and ARMv7 without TEX remapping, + * - shared device is TEXCB=00001 + * - nonshared device is TEXCB=01000 + * - write combine device mem is TEXCB=00100 + * (Uncached Normal in ARMv6 parlance). + */ + mem_types[MT_DEVICE].prot_sect |= PMD_SECT_BUFFERED; + mem_types[MT_DEVICE_NONSHARED].prot_sect |= PMD_SECT_TEX(2); + mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_TEX(1); + } + } else { + /* + * On others, write combining is "Uncached/Buffered" + */ + mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_BUFFERABLE; + } + + /* + * Now deal with the memory-type mappings + */ + cp = &cache_policies[cachepolicy]; + vecs_pgprot = kern_pgprot = user_pgprot = cp->pte; + + /* + * Only use write-through for non-SMP systems + */ + if (!is_smp() && cpu_arch >= CPU_ARCH_ARMv5 && cachepolicy > CPOLICY_WRITETHROUGH) + vecs_pgprot = cache_policies[CPOLICY_WRITETHROUGH].pte; + + /* + * Enable CPU-specific coherency if supported. + * (Only available on XSC3 at the moment.) + */ + if (arch_is_coherent() && cpu_is_xsc3()) { + mem_types[MT_MEMORY].prot_sect |= PMD_SECT_S; + mem_types[MT_MEMORY].prot_pte |= L_PTE_SHARED; + mem_types[MT_MEMORY_NONCACHED].prot_sect |= PMD_SECT_S; + mem_types[MT_MEMORY_NONCACHED].prot_pte |= L_PTE_SHARED; + } + /* + * ARMv6 and above have extended page tables. + */ + if (cpu_arch >= CPU_ARCH_ARMv6 && (cr & CR_XP)) { +#ifndef CONFIG_ARM_LPAE + /* + * Mark cache clean areas and XIP ROM read only + * from SVC mode and no access from userspace. + */ + mem_types[MT_ROM].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE; + mem_types[MT_MINICLEAN].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE; + mem_types[MT_CACHECLEAN].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE; +#endif + + if (is_smp()) { + /* + * Mark memory with the "shared" attribute + * for SMP systems + */ + user_pgprot |= L_PTE_SHARED; + kern_pgprot |= L_PTE_SHARED; + vecs_pgprot |= L_PTE_SHARED; + mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_S; + mem_types[MT_DEVICE_WC].prot_pte |= L_PTE_SHARED; + mem_types[MT_DEVICE_CACHED].prot_sect |= PMD_SECT_S; + mem_types[MT_DEVICE_CACHED].prot_pte |= L_PTE_SHARED; + mem_types[MT_MEMORY].prot_sect |= PMD_SECT_S; + mem_types[MT_MEMORY].prot_pte |= L_PTE_SHARED; + mem_types[MT_MEMORY_NONCACHED].prot_sect |= PMD_SECT_S; + mem_types[MT_MEMORY_NONCACHED].prot_pte |= L_PTE_SHARED; + } + } + + /* + * Non-cacheable Normal - intended for memory areas that must + * not cause dirty cache line writebacks when used + */ + if (cpu_arch >= CPU_ARCH_ARMv6) { + if (cpu_arch >= CPU_ARCH_ARMv7 && (cr & CR_TRE)) { + /* Non-cacheable Normal is XCB = 001 */ + mem_types[MT_MEMORY_NONCACHED].prot_sect |= + PMD_SECT_BUFFERED; + } else { + /* For both ARMv6 and non-TEX-remapping ARMv7 */ + mem_types[MT_MEMORY_NONCACHED].prot_sect |= + PMD_SECT_TEX(1); + } + } else { + mem_types[MT_MEMORY_NONCACHED].prot_sect |= PMD_SECT_BUFFERABLE; + } + +#ifdef CONFIG_ARM_LPAE + /* + * Do not generate access flag faults for the kernel mappings. + */ + for (i = 0; i < ARRAY_SIZE(mem_types); i++) { + mem_types[i].prot_pte |= PTE_EXT_AF; + if (mem_types[i].prot_sect) + mem_types[i].prot_sect |= PMD_SECT_AF; + } + kern_pgprot |= PTE_EXT_AF; + vecs_pgprot |= PTE_EXT_AF; +#endif + + for (i = 0; i < 16; i++) { + unsigned long v = pgprot_val(protection_map[i]); + protection_map[i] = __pgprot(v | user_pgprot); + } + + mem_types[MT_LOW_VECTORS].prot_pte |= vecs_pgprot; + mem_types[MT_HIGH_VECTORS].prot_pte |= vecs_pgprot; + + pgprot_user = __pgprot(L_PTE_PRESENT | L_PTE_YOUNG | user_pgprot); + pgprot_kernel = __pgprot(L_PTE_PRESENT | L_PTE_YOUNG | + L_PTE_DIRTY | kern_pgprot); + + mem_types[MT_LOW_VECTORS].prot_l1 |= ecc_mask; + mem_types[MT_HIGH_VECTORS].prot_l1 |= ecc_mask; + mem_types[MT_MEMORY].prot_sect |= ecc_mask | cp->pmd; + mem_types[MT_MEMORY].prot_pte |= kern_pgprot; + mem_types[MT_MEMORY_NONCACHED].prot_sect |= ecc_mask; + mem_types[MT_ROM].prot_sect |= cp->pmd; + + switch (cp->pmd) { + case PMD_SECT_WT: + mem_types[MT_CACHECLEAN].prot_sect |= PMD_SECT_WT; + break; + case PMD_SECT_WB: + case PMD_SECT_WBWA: + mem_types[MT_CACHECLEAN].prot_sect |= PMD_SECT_WB; + break; + } + printk("Memory policy: ECC %sabled, Data cache %s\n", + ecc_mask ? "en" : "dis", cp->policy); + + for (i = 0; i < ARRAY_SIZE(mem_types); i++) { + struct mem_type *t = &mem_types[i]; + if (t->prot_l1) + t->prot_l1 |= PMD_DOMAIN(t->domain); + if (t->prot_sect) + t->prot_sect |= PMD_DOMAIN(t->domain); + } +} + +#ifdef CONFIG_ARM_DMA_MEM_BUFFERABLE +pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, + unsigned long size, pgprot_t vma_prot) +{ + if (!pfn_valid(pfn)) + return pgprot_noncached(vma_prot); + else if (file->f_flags & O_SYNC) + return pgprot_writecombine(vma_prot); + return vma_prot; +} +EXPORT_SYMBOL(phys_mem_access_prot); +#endif + +#define vectors_base() (vectors_high() ? 0xffff0000 : 0) + +static void __init *early_alloc_aligned(unsigned long sz, unsigned long align) +{ + void *ptr = __va(memblock_alloc(sz, align)); + memset(ptr, 0, sz); + return ptr; +} + +static void __init *early_alloc(unsigned long sz) +{ + return early_alloc_aligned(sz, sz); +} + +static pte_t * __init early_pte_alloc(pmd_t *pmd) +{ + if (pmd_none(*pmd) || pmd_bad(*pmd)) + return early_alloc(PTE_HWTABLE_OFF + PTE_HWTABLE_SIZE); + return pmd_page_vaddr(*pmd); +} + +static void __init early_pte_install(pmd_t *pmd, pte_t *pte, unsigned long prot) +{ + __pmd_populate(pmd, __pa(pte), prot); + BUG_ON(pmd_bad(*pmd)); +} + +static pte_t * __init early_pte_alloc_and_install(pmd_t *pmd, + unsigned long addr, unsigned long prot) +{ + if (pmd_none(*pmd)) { + pte_t *pte = early_pte_alloc(pmd); + early_pte_install(pmd, pte, prot); + } + BUG_ON(pmd_bad(*pmd)); + return pte_offset_kernel(pmd, addr); +} + +static void __init alloc_init_pte(pmd_t *pmd, unsigned long addr, + unsigned long end, unsigned long pfn, + const struct mem_type *type) +{ + pte_t *start_pte = early_pte_alloc(pmd); + pte_t *pte = start_pte + pte_index(addr); + + /* If replacing a section mapping, the whole section must be replaced */ + BUG_ON(pmd_bad(*pmd) && ((addr | end) & ~PMD_MASK)); + + do { + set_pte_ext(pte, pfn_pte(pfn, __pgprot(type->prot_pte)), 0); + pfn++; + } while (pte++, addr += PAGE_SIZE, addr != end); + early_pte_install(pmd, start_pte, type->prot_l1); +} + +static void __init alloc_init_section(pud_t *pud, unsigned long addr, + unsigned long end, phys_addr_t phys, + const struct mem_type *type, + bool force_pages) +{ + pmd_t *pmd = pmd_offset(pud, addr); + + /* + * Try a section mapping - end, addr and phys must all be aligned + * to a section boundary. Note that PMDs refer to the individual + * L1 entries, whereas PGDs refer to a group of L1 entries making + * up one logical pointer to an L2 table. + */ + if (((addr | end | phys) & ~SECTION_MASK) == 0 && !force_pages) { + pmd_t *p = pmd; + +#ifndef CONFIG_ARM_LPAE + if (addr & SECTION_SIZE) + pmd++; +#endif + + do { + *pmd = __pmd(phys | type->prot_sect); + phys += SECTION_SIZE; + } while (pmd++, addr += SECTION_SIZE, addr != end); + + flush_pmd_entry(p); + } else { + /* + * No need to loop; pte's aren't interested in the + * individual L1 entries. + */ + alloc_init_pte(pmd, addr, end, __phys_to_pfn(phys), type); + } +} + +static void __init alloc_init_pud(pgd_t *pgd, unsigned long addr, + unsigned long end, unsigned long phys, const struct mem_type *type, + bool force_pages) +{ + pud_t *pud = pud_offset(pgd, addr); + unsigned long next; + + do { + next = pud_addr_end(addr, end); + alloc_init_section(pud, addr, next, phys, type, force_pages); + phys += next - addr; + } while (pud++, addr = next, addr != end); +} + +#ifndef CONFIG_ARM_LPAE +static void __init create_36bit_mapping(struct map_desc *md, + const struct mem_type *type) +{ + unsigned long addr, length, end; + phys_addr_t phys; + pgd_t *pgd; + + addr = md->virtual; + phys = __pfn_to_phys(md->pfn); + length = PAGE_ALIGN(md->length); + + if (!(cpu_architecture() >= CPU_ARCH_ARMv6 || cpu_is_xsc3())) { + printk(KERN_ERR "MM: CPU does not support supersection " + "mapping for 0x%08llx at 0x%08lx\n", + (long long)__pfn_to_phys((u64)md->pfn), addr); + return; + } + + /* N.B. ARMv6 supersections are only defined to work with domain 0. + * Since domain assignments can in fact be arbitrary, the + * 'domain == 0' check below is required to insure that ARMv6 + * supersections are only allocated for domain 0 regardless + * of the actual domain assignments in use. + */ + if (type->domain) { + printk(KERN_ERR "MM: invalid domain in supersection " + "mapping for 0x%08llx at 0x%08lx\n", + (long long)__pfn_to_phys((u64)md->pfn), addr); + return; + } + + if ((addr | length | __pfn_to_phys(md->pfn)) & ~SUPERSECTION_MASK) { + printk(KERN_ERR "MM: cannot create mapping for 0x%08llx" + " at 0x%08lx invalid alignment\n", + (long long)__pfn_to_phys((u64)md->pfn), addr); + return; + } + + /* + * Shift bits [35:32] of address into bits [23:20] of PMD + * (See ARMv6 spec). + */ + phys |= (((md->pfn >> (32 - PAGE_SHIFT)) & 0xF) << 20); + + pgd = pgd_offset_k(addr); + end = addr + length; + do { + pud_t *pud = pud_offset(pgd, addr); + pmd_t *pmd = pmd_offset(pud, addr); + int i; + + for (i = 0; i < 16; i++) + *pmd++ = __pmd(phys | type->prot_sect | PMD_SECT_SUPER); + + addr += SUPERSECTION_SIZE; + phys += SUPERSECTION_SIZE; + pgd += SUPERSECTION_SIZE >> PGDIR_SHIFT; + } while (addr != end); +} +#endif /* !CONFIG_ARM_LPAE */ + +/* + * Create the page directory entries and any necessary + * page tables for the mapping specified by `md'. We + * are able to cope here with varying sizes and address + * offsets, and we take full advantage of sections and + * supersections. + */ +static void __init create_mapping(struct map_desc *md, bool force_pages) +{ + unsigned long addr, length, end; + phys_addr_t phys; + const struct mem_type *type; + pgd_t *pgd; + + if (md->virtual != vectors_base() && md->virtual < TASK_SIZE) { + printk(KERN_WARNING "BUG: not creating mapping for 0x%08llx" + " at 0x%08lx in user region\n", + (long long)__pfn_to_phys((u64)md->pfn), md->virtual); + return; + } + + if ((md->type == MT_DEVICE || md->type == MT_ROM) && + md->virtual >= PAGE_OFFSET && + (md->virtual < VMALLOC_START || md->virtual >= VMALLOC_END)) { + printk(KERN_WARNING "BUG: mapping for 0x%08llx" + " at 0x%08lx out of vmalloc space\n", + (long long)__pfn_to_phys((u64)md->pfn), md->virtual); + } + + type = &mem_types[md->type]; + +#ifndef CONFIG_ARM_LPAE + /* + * Catch 36-bit addresses + */ + if (md->pfn >= 0x100000) { + create_36bit_mapping(md, type); + return; + } +#endif + + addr = md->virtual & PAGE_MASK; + phys = __pfn_to_phys(md->pfn); + length = PAGE_ALIGN(md->length + (md->virtual & ~PAGE_MASK)); + + if (type->prot_l1 == 0 && ((addr | phys | length) & ~SECTION_MASK)) { + printk(KERN_WARNING "BUG: map for 0x%08llx at 0x%08lx can not " + "be mapped using pages, ignoring.\n", + (long long)__pfn_to_phys(md->pfn), addr); + return; + } + + pgd = pgd_offset_k(addr); + end = addr + length; + do { + unsigned long next = pgd_addr_end(addr, end); + + alloc_init_pud(pgd, addr, next, phys, type, force_pages); + + phys += next - addr; + addr = next; + } while (pgd++, addr != end); +} + +/* + * Create the architecture specific mappings + */ +void __init iotable_init(struct map_desc *io_desc, int nr) +{ + struct map_desc *md; + struct vm_struct *vm; + + if (!nr) + return; + + vm = early_alloc_aligned(sizeof(*vm) * nr, __alignof__(*vm)); + + for (md = io_desc; nr; md++, nr--) { + create_mapping(md, false); + vm->addr = (void *)(md->virtual & PAGE_MASK); + vm->size = PAGE_ALIGN(md->length + (md->virtual & ~PAGE_MASK)); + vm->phys_addr = __pfn_to_phys(md->pfn); + vm->flags = VM_IOREMAP | VM_ARM_STATIC_MAPPING; + vm->flags |= VM_ARM_MTYPE(md->type); + vm->caller = iotable_init; + vm_area_add_early(vm++); + } +} + +#ifndef CONFIG_ARM_LPAE + +/* + * The Linux PMD is made of two consecutive section entries covering 2MB + * (see definition in include/asm/pgtable-2level.h). However a call to + * create_mapping() may optimize static mappings by using individual + * 1MB section mappings. This leaves the actual PMD potentially half + * initialized if the top or bottom section entry isn't used, leaving it + * open to problems if a subsequent ioremap() or vmalloc() tries to use + * the virtual space left free by that unused section entry. + * + * Let's avoid the issue by inserting dummy vm entries covering the unused + * PMD halves once the static mappings are in place. + */ + +static void __init pmd_empty_section_gap(unsigned long addr) +{ + struct vm_struct *vm; + + vm = early_alloc_aligned(sizeof(*vm), __alignof__(*vm)); + vm->addr = (void *)addr; + vm->size = SECTION_SIZE; + vm->flags = VM_IOREMAP | VM_ARM_STATIC_MAPPING; + vm->caller = pmd_empty_section_gap; + vm_area_add_early(vm); +} + +static void __init fill_pmd_gaps(void) +{ + struct vm_struct *vm; + unsigned long addr, next = 0; + pmd_t *pmd; + + /* we're still single threaded hence no lock needed here */ + for (vm = vmlist; vm; vm = vm->next) { + if (!(vm->flags & VM_ARM_STATIC_MAPPING)) + continue; + addr = (unsigned long)vm->addr; + if (addr < next) + continue; + + /* + * Check if this vm starts on an odd section boundary. + * If so and the first section entry for this PMD is free + * then we block the corresponding virtual address. + */ + if ((addr & ~PMD_MASK) == SECTION_SIZE) { + pmd = pmd_off_k(addr); + if (pmd_none(*pmd)) + pmd_empty_section_gap(addr & PMD_MASK); + } + + /* + * Then check if this vm ends on an odd section boundary. + * If so and the second section entry for this PMD is empty + * then we block the corresponding virtual address. + */ + addr += vm->size; + if ((addr & ~PMD_MASK) == SECTION_SIZE) { + pmd = pmd_off_k(addr) + 1; + if (pmd_none(*pmd)) + pmd_empty_section_gap(addr); + } + + /* no need to look at any vm entry until we hit the next PMD */ + next = (addr + PMD_SIZE - 1) & PMD_MASK; + } +} + +#else +#define fill_pmd_gaps() do { } while (0) +#endif + +static void * __initdata vmalloc_min = + (void *)(VMALLOC_END - (240 << 20) - VMALLOC_OFFSET); + +/* + * vmalloc=size forces the vmalloc area to be exactly 'size' + * bytes. This can be used to increase (or decrease) the vmalloc + * area - the default is 240m. + */ +static int __init early_vmalloc(char *arg) +{ + unsigned long vmalloc_reserve = memparse(arg, NULL); + + if (vmalloc_reserve < SZ_16M) { + vmalloc_reserve = SZ_16M; + printk(KERN_WARNING + "vmalloc area too small, limiting to %luMB\n", + vmalloc_reserve >> 20); + } + + if (vmalloc_reserve > VMALLOC_END - (PAGE_OFFSET + SZ_32M)) { + vmalloc_reserve = VMALLOC_END - (PAGE_OFFSET + SZ_32M); + printk(KERN_WARNING + "vmalloc area is too big, limiting to %luMB\n", + vmalloc_reserve >> 20); + } + + vmalloc_min = (void *)(VMALLOC_END - vmalloc_reserve); + return 0; +} +early_param("vmalloc", early_vmalloc); + +static phys_addr_t lowmem_limit __initdata = 0; + +void __init sanity_check_meminfo(void) +{ + int i, j, highmem = 0; + + for (i = 0, j = 0; i < meminfo.nr_banks; i++) { + struct membank *bank = &meminfo.bank[j]; + *bank = meminfo.bank[i]; + + if (bank->start > ULONG_MAX) + highmem = 1; + +#ifdef CONFIG_HIGHMEM + if (__va(bank->start) >= vmalloc_min || + __va(bank->start) < (void *)PAGE_OFFSET) + highmem = 1; + + bank->highmem = highmem; + + /* + * Split those memory banks which are partially overlapping + * the vmalloc area greatly simplifying things later. + */ + if (!highmem && __va(bank->start) < vmalloc_min && + bank->size > vmalloc_min - __va(bank->start)) { + if (meminfo.nr_banks >= NR_BANKS) { + printk(KERN_CRIT "NR_BANKS too low, " + "ignoring high memory\n"); + } else { + memmove(bank + 1, bank, + (meminfo.nr_banks - i) * sizeof(*bank)); + meminfo.nr_banks++; + i++; + bank[1].size -= vmalloc_min - __va(bank->start); + bank[1].start = __pa(vmalloc_min - 1) + 1; + bank[1].highmem = highmem = 1; + j++; + } + bank->size = vmalloc_min - __va(bank->start); + } +#else + bank->highmem = highmem; + + /* + * Highmem banks not allowed with !CONFIG_HIGHMEM. + */ + if (highmem) { + printk(KERN_NOTICE "Ignoring RAM at %.8llx-%.8llx " + "(!CONFIG_HIGHMEM).\n", + (unsigned long long)bank->start, + (unsigned long long)bank->start + bank->size - 1); + continue; + } + + /* + * Check whether this memory bank would entirely overlap + * the vmalloc area. + */ + if (__va(bank->start) >= vmalloc_min || + __va(bank->start) < (void *)PAGE_OFFSET) { + printk(KERN_NOTICE "Ignoring RAM at %.8llx-%.8llx " + "(vmalloc region overlap).\n", + (unsigned long long)bank->start, + (unsigned long long)bank->start + bank->size - 1); + continue; + } + + /* + * Check whether this memory bank would partially overlap + * the vmalloc area. + */ + if (__va(bank->start + bank->size) > vmalloc_min || + __va(bank->start + bank->size) < __va(bank->start)) { + unsigned long newsize = vmalloc_min - __va(bank->start); + printk(KERN_NOTICE "Truncating RAM at %.8llx-%.8llx " + "to -%.8llx (vmalloc region overlap).\n", + (unsigned long long)bank->start, + (unsigned long long)bank->start + bank->size - 1, + (unsigned long long)bank->start + newsize - 1); + bank->size = newsize; + } +#endif + if (!bank->highmem && bank->start + bank->size > lowmem_limit) + lowmem_limit = bank->start + bank->size; + + j++; + } +#ifdef CONFIG_HIGHMEM + if (highmem) { + const char *reason = NULL; + + if (cache_is_vipt_aliasing()) { + /* + * Interactions between kmap and other mappings + * make highmem support with aliasing VIPT caches + * rather difficult. + */ + reason = "with VIPT aliasing cache"; + } + if (reason) { + printk(KERN_CRIT "HIGHMEM is not supported %s, ignoring high memory\n", + reason); + while (j > 0 && meminfo.bank[j - 1].highmem) + j--; + } + } +#endif + meminfo.nr_banks = j; + high_memory = __va(lowmem_limit - 1) + 1; + memblock_set_current_limit(lowmem_limit); +} + +static inline void prepare_page_table(void) +{ + unsigned long addr; + phys_addr_t end; + + /* + * Clear out all the mappings below the kernel image. + */ + for (addr = 0; addr < MODULES_VADDR; addr += PMD_SIZE) + pmd_clear(pmd_off_k(addr)); + +#ifdef CONFIG_XIP_KERNEL + /* The XIP kernel is mapped in the module area -- skip over it */ + addr = ((unsigned long)_etext + PMD_SIZE - 1) & PMD_MASK; +#endif + for ( ; addr < PAGE_OFFSET; addr += PMD_SIZE) + pmd_clear(pmd_off_k(addr)); + + /* + * Find the end of the first block of lowmem. + */ + end = memblock.memory.regions[0].base + memblock.memory.regions[0].size; + if (end >= lowmem_limit) + end = lowmem_limit; + + /* + * Clear out all the kernel space mappings, except for the first + * memory bank, up to the vmalloc region. + */ + for (addr = __phys_to_virt(end); + addr < VMALLOC_START; addr += PMD_SIZE) + pmd_clear(pmd_off_k(addr)); +} + +#ifdef CONFIG_ARM_LPAE +/* the first page is reserved for pgd */ +#define SWAPPER_PG_DIR_SIZE (PAGE_SIZE + \ + PTRS_PER_PGD * PTRS_PER_PMD * sizeof(pmd_t)) +#else +#define SWAPPER_PG_DIR_SIZE (PTRS_PER_PGD * sizeof(pgd_t)) +#endif + +/* + * Reserve the special regions of memory + */ +void __init arm_mm_memblock_reserve(void) +{ + /* + * Reserve the page tables. These are already in use, + * and can only be in node 0. + */ + memblock_reserve(__pa(swapper_pg_dir), SWAPPER_PG_DIR_SIZE); + +#ifdef CONFIG_SA1111 + /* + * Because of the SA1111 DMA bug, we want to preserve our + * precious DMA-able memory... + */ + memblock_reserve(PHYS_OFFSET, __pa(swapper_pg_dir) - PHYS_OFFSET); +#endif +} + +/* + * Set up the device mappings. Since we clear out the page tables for all + * mappings above VMALLOC_START, we will remove any debug device mappings. + * This means you have to be careful how you debug this function, or any + * called function. This means you can't use any function or debugging + * method which may touch any device, otherwise the kernel _will_ crash. + */ +static void __init devicemaps_init(struct machine_desc *mdesc) +{ + struct map_desc map; + unsigned long addr; + void *vectors; + + /* + * Allocate the vector page early. + */ + vectors = early_alloc(PAGE_SIZE); + + early_trap_init(vectors); + + for (addr = VMALLOC_START; addr; addr += PMD_SIZE) + pmd_clear(pmd_off_k(addr)); + + /* + * Map the kernel if it is XIP. + * It is always first in the modulearea. + */ +#ifdef CONFIG_XIP_KERNEL + map.pfn = __phys_to_pfn(CONFIG_XIP_PHYS_ADDR & SECTION_MASK); + map.virtual = MODULES_VADDR; + map.length = ((unsigned long)_etext - map.virtual + ~SECTION_MASK) & SECTION_MASK; + map.type = MT_ROM; + create_mapping(&map); +#endif + + /* + * Map the cache flushing regions. + */ +#ifdef FLUSH_BASE + map.pfn = __phys_to_pfn(FLUSH_BASE_PHYS); + map.virtual = FLUSH_BASE; + map.length = SZ_1M; + map.type = MT_CACHECLEAN; + create_mapping(&map); +#endif +#ifdef FLUSH_BASE_MINICACHE + map.pfn = __phys_to_pfn(FLUSH_BASE_PHYS + SZ_1M); + map.virtual = FLUSH_BASE_MINICACHE; + map.length = SZ_1M; + map.type = MT_MINICLEAN; + create_mapping(&map); +#endif + + /* + * Create a mapping for the machine vectors at the high-vectors + * location (0xffff0000). If we aren't using high-vectors, also + * create a mapping at the low-vectors virtual address. + */ + map.pfn = __phys_to_pfn(virt_to_phys(vectors)); + map.virtual = 0xffff0000; + map.length = PAGE_SIZE; + map.type = MT_HIGH_VECTORS; + create_mapping(&map, false); + + if (!vectors_high()) { + map.virtual = 0; + map.type = MT_LOW_VECTORS; + create_mapping(&map, false); + } + + /* + * Ask the machine support to map in the statically mapped devices. + */ + if (mdesc->map_io) + mdesc->map_io(); + fill_pmd_gaps(); + + /* + * Finally flush the caches and tlb to ensure that we're in a + * consistent state wrt the writebuffer. This also ensures that + * any write-allocated cache lines in the vector page are written + * back. After this point, we can start to touch devices again. + */ + local_flush_tlb_all(); + flush_cache_all(); +} + +static void __init kmap_init(void) +{ +#ifdef CONFIG_HIGHMEM + pkmap_page_table = early_pte_alloc_and_install(pmd_off_k(PKMAP_BASE), + PKMAP_BASE, _PAGE_KERNEL_TABLE); +#endif +} + + +static void __init map_lowmem(void) +{ + struct memblock_region *reg; + phys_addr_t start; + phys_addr_t end; + struct map_desc map; + + /* Map all the lowmem memory banks. */ + for_each_memblock(memory, reg) { + start = reg->base; + end = start + reg->size; + + if (end > lowmem_limit) + end = lowmem_limit; + if (start >= end) + break; + + map.pfn = __phys_to_pfn(start); + map.virtual = __phys_to_virt(start); + map.length = end - start; + map.type = MT_MEMORY; + + create_mapping(&map, false); + } + +#ifdef CONFIG_DEBUG_RODATA + start = __pa(_stext) & PMD_MASK; + end = ALIGN(__pa(__end_rodata), PMD_SIZE); + + map.pfn = __phys_to_pfn(start); + map.virtual = __phys_to_virt(start); + map.length = end - start; + map.type = MT_MEMORY; + + create_mapping(&map, true); +#endif +} + +/* + * paging_init() sets up the page tables, initialises the zone memory + * maps, and sets up the zero page, bad page and bad page tables. + */ +void __init paging_init(struct machine_desc *mdesc) +{ + void *zero_page; + + memblock_set_current_limit(lowmem_limit); + + build_mem_type_table(); + prepare_page_table(); + map_lowmem(); + devicemaps_init(mdesc); + kmap_init(); + + top_pmd = pmd_off_k(0xffff0000); + + /* allocate the zero page. */ + zero_page = early_alloc(PAGE_SIZE); + + bootmem_init(); + + empty_zero_page = virt_to_page(zero_page); + __flush_dcache_page(NULL, empty_zero_page); +} diff --git a/arch/arm/mm/nommu.c b/arch/arm/mm/nommu.c new file mode 100644 index 00000000..d51225f9 --- /dev/null +++ b/arch/arm/mm/nommu.c @@ -0,0 +1,104 @@ +/* + * linux/arch/arm/mm/nommu.c + * + * ARM uCLinux supporting functions. + */ +#include <linux/module.h> +#include <linux/mm.h> +#include <linux/pagemap.h> +#include <linux/io.h> +#include <linux/memblock.h> + +#include <asm/cacheflush.h> +#include <asm/sections.h> +#include <asm/page.h> +#include <asm/setup.h> +#include <asm/traps.h> +#include <asm/mach/arch.h> + +#include "mm.h" + +void __init arm_mm_memblock_reserve(void) +{ + /* + * Register the exception vector page. + * some architectures which the DRAM is the exception vector to trap, + * alloc_page breaks with error, although it is not NULL, but "0." + */ + memblock_reserve(CONFIG_VECTORS_BASE, PAGE_SIZE); +} + +void __init sanity_check_meminfo(void) +{ + phys_addr_t end = bank_phys_end(&meminfo.bank[meminfo.nr_banks - 1]); + high_memory = __va(end - 1) + 1; +} + +/* + * paging_init() sets up the page tables, initialises the zone memory + * maps, and sets up the zero page, bad page and bad page tables. + */ +void __init paging_init(struct machine_desc *mdesc) +{ + early_trap_init((void *)CONFIG_VECTORS_BASE); + bootmem_init(); +} + +/* + * We don't need to do anything here for nommu machines. + */ +void setup_mm_for_reboot(void) +{ +} + +void flush_dcache_page(struct page *page) +{ + __cpuc_flush_dcache_area(page_address(page), PAGE_SIZE); +} +EXPORT_SYMBOL(flush_dcache_page); + +void copy_to_user_page(struct vm_area_struct *vma, struct page *page, + unsigned long uaddr, void *dst, const void *src, + unsigned long len) +{ + memcpy(dst, src, len); + if (vma->vm_flags & VM_EXEC) + __cpuc_coherent_user_range(uaddr, uaddr + len); +} + +void __iomem *__arm_ioremap_pfn(unsigned long pfn, unsigned long offset, + size_t size, unsigned int mtype) +{ + if (pfn >= (0x100000000ULL >> PAGE_SHIFT)) + return NULL; + return (void __iomem *) (offset + (pfn << PAGE_SHIFT)); +} +EXPORT_SYMBOL(__arm_ioremap_pfn); + +void __iomem *__arm_ioremap_pfn_caller(unsigned long pfn, unsigned long offset, + size_t size, unsigned int mtype, void *caller) +{ + return __arm_ioremap_pfn(pfn, offset, size, mtype); +} + +void __iomem *__arm_ioremap(unsigned long phys_addr, size_t size, + unsigned int mtype) +{ + return (void __iomem *)phys_addr; +} +EXPORT_SYMBOL(__arm_ioremap); + +void __iomem * (*arch_ioremap_caller)(unsigned long, size_t, unsigned int, void *); + +void __iomem *__arm_ioremap_caller(unsigned long phys_addr, size_t size, + unsigned int mtype, void *caller) +{ + return __arm_ioremap(phys_addr, size, mtype); +} + +void (*arch_iounmap)(volatile void __iomem *); + +void __arm_iounmap(volatile void __iomem *addr) +{ +} +EXPORT_SYMBOL(__arm_iounmap); diff --git a/arch/arm/mm/pabort-legacy.S b/arch/arm/mm/pabort-legacy.S new file mode 100644 index 00000000..8bbff025 --- /dev/null +++ b/arch/arm/mm/pabort-legacy.S @@ -0,0 +1,21 @@ +#include <linux/linkage.h> +#include <asm/assembler.h> + +/* + * Function: legacy_pabort + * + * Params : r2 = pt_regs + * : r4 = address of aborted instruction + * : r5 = psr for parent context + * + * Returns : r4 - r11, r13 preserved + * + * Purpose : obtain information about current prefetch abort. + */ + + .align 5 +ENTRY(legacy_pabort) + mov r0, r4 + mov r1, #5 + b do_PrefetchAbort +ENDPROC(legacy_pabort) diff --git a/arch/arm/mm/pabort-v6.S b/arch/arm/mm/pabort-v6.S new file mode 100644 index 00000000..9627646c --- /dev/null +++ b/arch/arm/mm/pabort-v6.S @@ -0,0 +1,21 @@ +#include <linux/linkage.h> +#include <asm/assembler.h> + +/* + * Function: v6_pabort + * + * Params : r2 = pt_regs + * : r4 = address of aborted instruction + * : r5 = psr for parent context + * + * Returns : r4 - r11, r13 preserved + * + * Purpose : obtain information about current prefetch abort. + */ + + .align 5 +ENTRY(v6_pabort) + mov r0, r4 + mrc p15, 0, r1, c5, c0, 1 @ get IFSR + b do_PrefetchAbort +ENDPROC(v6_pabort) diff --git a/arch/arm/mm/pabort-v7.S b/arch/arm/mm/pabort-v7.S new file mode 100644 index 00000000..875761f4 --- /dev/null +++ b/arch/arm/mm/pabort-v7.S @@ -0,0 +1,21 @@ +#include <linux/linkage.h> +#include <asm/assembler.h> + +/* + * Function: v7_pabort + * + * Params : r2 = pt_regs + * : r4 = address of aborted instruction + * : r5 = psr for parent context + * + * Returns : r4 - r11, r13 preserved + * + * Purpose : obtain information about current prefetch abort. + */ + + .align 5 +ENTRY(v7_pabort) + mrc p15, 0, r0, c6, c0, 2 @ get IFAR + mrc p15, 0, r1, c5, c0, 1 @ get IFSR + b do_PrefetchAbort +ENDPROC(v7_pabort) diff --git a/arch/arm/mm/pgd.c b/arch/arm/mm/pgd.c new file mode 100644 index 00000000..0acb089d --- /dev/null +++ b/arch/arm/mm/pgd.c @@ -0,0 +1,159 @@ +/* + * linux/arch/arm/mm/pgd.c + * + * Copyright (C) 1998-2005 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/mm.h> +#include <linux/gfp.h> +#include <linux/highmem.h> +#include <linux/slab.h> + +#include <asm/cp15.h> +#include <asm/pgalloc.h> +#include <asm/page.h> +#include <asm/tlbflush.h> + +#include "mm.h" + +#ifdef CONFIG_ARM_LPAE +#define __pgd_alloc() kmalloc(PTRS_PER_PGD * sizeof(pgd_t), GFP_KERNEL) +#define __pgd_free(pgd) kfree(pgd) +#else +#define __pgd_alloc() (pgd_t *)__get_free_pages(GFP_KERNEL, 2) +#define __pgd_free(pgd) free_pages((unsigned long)pgd, 2) +#endif + +/* + * need to get a 16k page for level 1 + */ +pgd_t *pgd_alloc(struct mm_struct *mm) +{ + pgd_t *new_pgd, *init_pgd; + pud_t *new_pud, *init_pud; + pmd_t *new_pmd, *init_pmd; + pte_t *new_pte, *init_pte; + + new_pgd = __pgd_alloc(); + if (!new_pgd) + goto no_pgd; + + memset(new_pgd, 0, USER_PTRS_PER_PGD * sizeof(pgd_t)); + + /* + * Copy over the kernel and IO PGD entries + */ + init_pgd = pgd_offset_k(0); + memcpy(new_pgd + USER_PTRS_PER_PGD, init_pgd + USER_PTRS_PER_PGD, + (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t)); + + clean_dcache_area(new_pgd, PTRS_PER_PGD * sizeof(pgd_t)); + +#ifdef CONFIG_ARM_LPAE + /* + * Allocate PMD table for modules and pkmap mappings. + */ + new_pud = pud_alloc(mm, new_pgd + pgd_index(MODULES_VADDR), + MODULES_VADDR); + if (!new_pud) + goto no_pud; + + new_pmd = pmd_alloc(mm, new_pud, 0); + if (!new_pmd) + goto no_pmd; +#endif + + if (!vectors_high()) { + /* + * On ARM, first page must always be allocated since it + * contains the machine vectors. The vectors are always high + * with LPAE. + */ + new_pud = pud_alloc(mm, new_pgd, 0); + if (!new_pud) + goto no_pud; + + new_pmd = pmd_alloc(mm, new_pud, 0); + if (!new_pmd) + goto no_pmd; + + new_pte = pte_alloc_map(mm, NULL, new_pmd, 0); + if (!new_pte) + goto no_pte; + + init_pud = pud_offset(init_pgd, 0); + init_pmd = pmd_offset(init_pud, 0); + init_pte = pte_offset_map(init_pmd, 0); + set_pte_ext(new_pte, *init_pte, 0); + pte_unmap(init_pte); + pte_unmap(new_pte); + } + + return new_pgd; + +no_pte: + pmd_free(mm, new_pmd); +no_pmd: + pud_free(mm, new_pud); +no_pud: + __pgd_free(new_pgd); +no_pgd: + return NULL; +} + +void pgd_free(struct mm_struct *mm, pgd_t *pgd_base) +{ + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + pgtable_t pte; + + if (!pgd_base) + return; + + pgd = pgd_base + pgd_index(0); + if (pgd_none_or_clear_bad(pgd)) + goto no_pgd; + + pud = pud_offset(pgd, 0); + if (pud_none_or_clear_bad(pud)) + goto no_pud; + + pmd = pmd_offset(pud, 0); + if (pmd_none_or_clear_bad(pmd)) + goto no_pmd; + + pte = pmd_pgtable(*pmd); + pmd_clear(pmd); + pte_free(mm, pte); +no_pmd: + pud_clear(pud); + pmd_free(mm, pmd); +no_pud: + pgd_clear(pgd); + pud_free(mm, pud); +no_pgd: +#ifdef CONFIG_ARM_LPAE + /* + * Free modules/pkmap or identity pmd tables. + */ + for (pgd = pgd_base; pgd < pgd_base + PTRS_PER_PGD; pgd++) { + if (pgd_none_or_clear_bad(pgd)) + continue; + if (pgd_val(*pgd) & L_PGD_SWAPPER) + continue; + pud = pud_offset(pgd, 0); + if (pud_none_or_clear_bad(pud)) + continue; + pmd = pmd_offset(pud, 0); + pud_clear(pud); + pmd_free(mm, pmd); + pgd_clear(pgd); + pud_free(mm, pud); + } +#endif + __pgd_free(pgd_base); +} diff --git a/arch/arm/mm/proc-arm1020.S b/arch/arm/mm/proc-arm1020.S new file mode 100644 index 00000000..23495134 --- /dev/null +++ b/arch/arm/mm/proc-arm1020.S @@ -0,0 +1,529 @@ +/* + * linux/arch/arm/mm/proc-arm1020.S: MMU functions for ARM1020 + * + * Copyright (C) 2000 ARM Limited + * Copyright (C) 2000 Deep Blue Solutions Ltd. + * hacked for non-paged-MM by Hyok S. Choi, 2003. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * + * These are the low level assembler for performing cache and TLB + * functions on the arm1020. + * + * CONFIG_CPU_ARM1020_CPU_IDLE -> nohlt + */ +#include <linux/linkage.h> +#include <linux/init.h> +#include <asm/assembler.h> +#include <asm/asm-offsets.h> +#include <asm/hwcap.h> +#include <asm/pgtable-hwdef.h> +#include <asm/pgtable.h> +#include <asm/ptrace.h> + +#include "proc-macros.S" + +/* + * This is the maximum size of an area which will be invalidated + * using the single invalidate entry instructions. Anything larger + * than this, and we go for the whole cache. + * + * This value should be chosen such that we choose the cheapest + * alternative. + */ +#define MAX_AREA_SIZE 32768 + +/* + * The size of one data cache line. + */ +#define CACHE_DLINESIZE 32 + +/* + * The number of data cache segments. + */ +#define CACHE_DSEGMENTS 16 + +/* + * The number of lines in a cache segment. + */ +#define CACHE_DENTRIES 64 + +/* + * This is the size at which it becomes more efficient to + * clean the whole cache, rather than using the individual + * cache line maintenance instructions. + */ +#define CACHE_DLIMIT 32768 + + .text +/* + * cpu_arm1020_proc_init() + */ +ENTRY(cpu_arm1020_proc_init) + mov pc, lr + +/* + * cpu_arm1020_proc_fin() + */ +ENTRY(cpu_arm1020_proc_fin) + mrc p15, 0, r0, c1, c0, 0 @ ctrl register + bic r0, r0, #0x1000 @ ...i............ + bic r0, r0, #0x000e @ ............wca. + mcr p15, 0, r0, c1, c0, 0 @ disable caches + mov pc, lr + +/* + * cpu_arm1020_reset(loc) + * + * Perform a soft reset of the system. Put the CPU into the + * same state as it would be if it had been reset, and branch + * to what would be the reset vector. + * + * loc: location to jump to for soft reset + */ + .align 5 + .pushsection .idmap.text, "ax" +ENTRY(cpu_arm1020_reset) + mov ip, #0 + mcr p15, 0, ip, c7, c7, 0 @ invalidate I,D caches + mcr p15, 0, ip, c7, c10, 4 @ drain WB +#ifdef CONFIG_MMU + mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs +#endif + mrc p15, 0, ip, c1, c0, 0 @ ctrl register + bic ip, ip, #0x000f @ ............wcam + bic ip, ip, #0x1100 @ ...i...s........ + mcr p15, 0, ip, c1, c0, 0 @ ctrl register + mov pc, r0 +ENDPROC(cpu_arm1020_reset) + .popsection + +/* + * cpu_arm1020_do_idle() + */ + .align 5 +ENTRY(cpu_arm1020_do_idle) + mcr p15, 0, r0, c7, c0, 4 @ Wait for interrupt + mov pc, lr + +/* ================================= CACHE ================================ */ + + .align 5 + +/* + * flush_icache_all() + * + * Unconditionally clean and invalidate the entire icache. + */ +ENTRY(arm1020_flush_icache_all) +#ifndef CONFIG_CPU_ICACHE_DISABLE + mov r0, #0 + mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache +#endif + mov pc, lr +ENDPROC(arm1020_flush_icache_all) + +/* + * flush_user_cache_all() + * + * Invalidate all cache entries in a particular address + * space. + */ +ENTRY(arm1020_flush_user_cache_all) + /* FALLTHROUGH */ +/* + * flush_kern_cache_all() + * + * Clean and invalidate the entire cache. + */ +ENTRY(arm1020_flush_kern_cache_all) + mov r2, #VM_EXEC + mov ip, #0 +__flush_whole_cache: +#ifndef CONFIG_CPU_DCACHE_DISABLE + mcr p15, 0, ip, c7, c10, 4 @ drain WB + mov r1, #(CACHE_DSEGMENTS - 1) << 5 @ 16 segments +1: orr r3, r1, #(CACHE_DENTRIES - 1) << 26 @ 64 entries +2: mcr p15, 0, r3, c7, c14, 2 @ clean+invalidate D index + mcr p15, 0, ip, c7, c10, 4 @ drain WB + subs r3, r3, #1 << 26 + bcs 2b @ entries 63 to 0 + subs r1, r1, #1 << 5 + bcs 1b @ segments 15 to 0 +#endif + tst r2, #VM_EXEC +#ifndef CONFIG_CPU_ICACHE_DISABLE + mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache +#endif + mcrne p15, 0, ip, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * flush_user_cache_range(start, end, flags) + * + * Invalidate a range of cache entries in the specified + * address space. + * + * - start - start address (inclusive) + * - end - end address (exclusive) + * - flags - vm_flags for this space + */ +ENTRY(arm1020_flush_user_cache_range) + mov ip, #0 + sub r3, r1, r0 @ calculate total size + cmp r3, #CACHE_DLIMIT + bhs __flush_whole_cache + +#ifndef CONFIG_CPU_DCACHE_DISABLE + mcr p15, 0, ip, c7, c10, 4 +1: mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry + mcr p15, 0, ip, c7, c10, 4 @ drain WB + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b +#endif + tst r2, #VM_EXEC +#ifndef CONFIG_CPU_ICACHE_DISABLE + mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache +#endif + mcrne p15, 0, ip, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * coherent_kern_range(start, end) + * + * Ensure coherency between the Icache and the Dcache in the + * region described by start. If you have non-snooping + * Harvard caches, you need to implement this function. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(arm1020_coherent_kern_range) + /* FALLTRHOUGH */ + +/* + * coherent_user_range(start, end) + * + * Ensure coherency between the Icache and the Dcache in the + * region described by start. If you have non-snooping + * Harvard caches, you need to implement this function. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(arm1020_coherent_user_range) + mov ip, #0 + bic r0, r0, #CACHE_DLINESIZE - 1 + mcr p15, 0, ip, c7, c10, 4 +1: +#ifndef CONFIG_CPU_DCACHE_DISABLE + mcr p15, 0, r0, c7, c10, 1 @ clean D entry + mcr p15, 0, ip, c7, c10, 4 @ drain WB +#endif +#ifndef CONFIG_CPU_ICACHE_DISABLE + mcr p15, 0, r0, c7, c5, 1 @ invalidate I entry +#endif + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mcr p15, 0, ip, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * flush_kern_dcache_area(void *addr, size_t size) + * + * Ensure no D cache aliasing occurs, either with itself or + * the I cache + * + * - addr - kernel address + * - size - region size + */ +ENTRY(arm1020_flush_kern_dcache_area) + mov ip, #0 +#ifndef CONFIG_CPU_DCACHE_DISABLE + add r1, r0, r1 +1: mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry + mcr p15, 0, ip, c7, c10, 4 @ drain WB + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b +#endif + mcr p15, 0, ip, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * dma_inv_range(start, end) + * + * Invalidate (discard) the specified virtual address range. + * May not write back any entries. If 'start' or 'end' + * are not cache line aligned, those lines must be written + * back. + * + * - start - virtual start address + * - end - virtual end address + * + * (same as v4wb) + */ +arm1020_dma_inv_range: + mov ip, #0 +#ifndef CONFIG_CPU_DCACHE_DISABLE + tst r0, #CACHE_DLINESIZE - 1 + bic r0, r0, #CACHE_DLINESIZE - 1 + mcrne p15, 0, ip, c7, c10, 4 + mcrne p15, 0, r0, c7, c10, 1 @ clean D entry + mcrne p15, 0, ip, c7, c10, 4 @ drain WB + tst r1, #CACHE_DLINESIZE - 1 + mcrne p15, 0, ip, c7, c10, 4 + mcrne p15, 0, r1, c7, c10, 1 @ clean D entry + mcrne p15, 0, ip, c7, c10, 4 @ drain WB +1: mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b +#endif + mcr p15, 0, ip, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * dma_clean_range(start, end) + * + * Clean the specified virtual address range. + * + * - start - virtual start address + * - end - virtual end address + * + * (same as v4wb) + */ +arm1020_dma_clean_range: + mov ip, #0 +#ifndef CONFIG_CPU_DCACHE_DISABLE + bic r0, r0, #CACHE_DLINESIZE - 1 +1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry + mcr p15, 0, ip, c7, c10, 4 @ drain WB + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b +#endif + mcr p15, 0, ip, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * dma_flush_range(start, end) + * + * Clean and invalidate the specified virtual address range. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(arm1020_dma_flush_range) + mov ip, #0 +#ifndef CONFIG_CPU_DCACHE_DISABLE + bic r0, r0, #CACHE_DLINESIZE - 1 + mcr p15, 0, ip, c7, c10, 4 +1: mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry + mcr p15, 0, ip, c7, c10, 4 @ drain WB + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b +#endif + mcr p15, 0, ip, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * dma_map_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(arm1020_dma_map_area) + add r1, r1, r0 + cmp r2, #DMA_TO_DEVICE + beq arm1020_dma_clean_range + bcs arm1020_dma_inv_range + b arm1020_dma_flush_range +ENDPROC(arm1020_dma_map_area) + +/* + * dma_unmap_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(arm1020_dma_unmap_area) + mov pc, lr +ENDPROC(arm1020_dma_unmap_area) + + @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) + define_cache_functions arm1020 + + .align 5 +ENTRY(cpu_arm1020_dcache_clean_area) +#ifndef CONFIG_CPU_DCACHE_DISABLE + mov ip, #0 +1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry + mcr p15, 0, ip, c7, c10, 4 @ drain WB + add r0, r0, #CACHE_DLINESIZE + subs r1, r1, #CACHE_DLINESIZE + bhi 1b +#endif + mov pc, lr + +/* =============================== PageTable ============================== */ + +/* + * cpu_arm1020_switch_mm(pgd) + * + * Set the translation base pointer to be as described by pgd. + * + * pgd: new page tables + */ + .align 5 +ENTRY(cpu_arm1020_switch_mm) +#ifdef CONFIG_MMU +#ifndef CONFIG_CPU_DCACHE_DISABLE + mcr p15, 0, r3, c7, c10, 4 + mov r1, #0xF @ 16 segments +1: mov r3, #0x3F @ 64 entries +2: mov ip, r3, LSL #26 @ shift up entry + orr ip, ip, r1, LSL #5 @ shift in/up index + mcr p15, 0, ip, c7, c14, 2 @ Clean & Inval DCache entry + mov ip, #0 + mcr p15, 0, ip, c7, c10, 4 + subs r3, r3, #1 + cmp r3, #0 + bge 2b @ entries 3F to 0 + subs r1, r1, #1 + cmp r1, #0 + bge 1b @ segments 15 to 0 + +#endif + mov r1, #0 +#ifndef CONFIG_CPU_ICACHE_DISABLE + mcr p15, 0, r1, c7, c5, 0 @ invalidate I cache +#endif + mcr p15, 0, r1, c7, c10, 4 @ drain WB + mcr p15, 0, r0, c2, c0, 0 @ load page table pointer + mcr p15, 0, r1, c8, c7, 0 @ invalidate I & D TLBs +#endif /* CONFIG_MMU */ + mov pc, lr + +/* + * cpu_arm1020_set_pte(ptep, pte) + * + * Set a PTE and flush it out + */ + .align 5 +ENTRY(cpu_arm1020_set_pte_ext) +#ifdef CONFIG_MMU + armv3_set_pte_ext + mov r0, r0 +#ifndef CONFIG_CPU_DCACHE_DISABLE + mcr p15, 0, r0, c7, c10, 4 + mcr p15, 0, r0, c7, c10, 1 @ clean D entry +#endif + mcr p15, 0, r0, c7, c10, 4 @ drain WB +#endif /* CONFIG_MMU */ + mov pc, lr + + __CPUINIT + + .type __arm1020_setup, #function +__arm1020_setup: + mov r0, #0 + mcr p15, 0, r0, c7, c7 @ invalidate I,D caches on v4 + mcr p15, 0, r0, c7, c10, 4 @ drain write buffer on v4 +#ifdef CONFIG_MMU + mcr p15, 0, r0, c8, c7 @ invalidate I,D TLBs on v4 +#endif + + adr r5, arm1020_crval + ldmia r5, {r5, r6} + mrc p15, 0, r0, c1, c0 @ get control register v4 + bic r0, r0, r5 + orr r0, r0, r6 +#ifdef CONFIG_CPU_CACHE_ROUND_ROBIN + orr r0, r0, #0x4000 @ .R.. .... .... .... +#endif + mov pc, lr + .size __arm1020_setup, . - __arm1020_setup + + /* + * R + * .RVI ZFRS BLDP WCAM + * .011 1001 ..11 0101 + */ + .type arm1020_crval, #object +arm1020_crval: + crval clear=0x0000593f, mmuset=0x00003935, ucset=0x00001930 + + __INITDATA + @ define struct processor (see <asm/proc-fns.h> and proc-macros.S) + define_processor_functions arm1020, dabort=v4t_early_abort, pabort=legacy_pabort + + + .section ".rodata" + + string cpu_arch_name, "armv5t" + string cpu_elf_name, "v5" + + .type cpu_arm1020_name, #object +cpu_arm1020_name: + .ascii "ARM1020" +#ifndef CONFIG_CPU_ICACHE_DISABLE + .ascii "i" +#endif +#ifndef CONFIG_CPU_DCACHE_DISABLE + .ascii "d" +#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH + .ascii "(wt)" +#else + .ascii "(wb)" +#endif +#endif +#ifndef CONFIG_CPU_BPREDICT_DISABLE + .ascii "B" +#endif +#ifdef CONFIG_CPU_CACHE_ROUND_ROBIN + .ascii "RR" +#endif + .ascii "\0" + .size cpu_arm1020_name, . - cpu_arm1020_name + + .align + + .section ".proc.info.init", #alloc, #execinstr + + .type __arm1020_proc_info,#object +__arm1020_proc_info: + .long 0x4104a200 @ ARM 1020T (Architecture v5T) + .long 0xff0ffff0 + .long PMD_TYPE_SECT | \ + PMD_SECT_AP_WRITE | \ + PMD_SECT_AP_READ + .long PMD_TYPE_SECT | \ + PMD_SECT_AP_WRITE | \ + PMD_SECT_AP_READ + b __arm1020_setup + .long cpu_arch_name + .long cpu_elf_name + .long HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB + .long cpu_arm1020_name + .long arm1020_processor_functions + .long v4wbi_tlb_fns + .long v4wb_user_fns + .long arm1020_cache_fns + .size __arm1020_proc_info, . - __arm1020_proc_info diff --git a/arch/arm/mm/proc-arm1020e.S b/arch/arm/mm/proc-arm1020e.S new file mode 100644 index 00000000..c244b06c --- /dev/null +++ b/arch/arm/mm/proc-arm1020e.S @@ -0,0 +1,489 @@ +/* + * linux/arch/arm/mm/proc-arm1020e.S: MMU functions for ARM1020 + * + * Copyright (C) 2000 ARM Limited + * Copyright (C) 2000 Deep Blue Solutions Ltd. + * hacked for non-paged-MM by Hyok S. Choi, 2003. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * + * These are the low level assembler for performing cache and TLB + * functions on the arm1020e. + * + * CONFIG_CPU_ARM1020_CPU_IDLE -> nohlt + */ +#include <linux/linkage.h> +#include <linux/init.h> +#include <asm/assembler.h> +#include <asm/asm-offsets.h> +#include <asm/hwcap.h> +#include <asm/pgtable-hwdef.h> +#include <asm/pgtable.h> +#include <asm/ptrace.h> + +#include "proc-macros.S" + +/* + * This is the maximum size of an area which will be invalidated + * using the single invalidate entry instructions. Anything larger + * than this, and we go for the whole cache. + * + * This value should be chosen such that we choose the cheapest + * alternative. + */ +#define MAX_AREA_SIZE 32768 + +/* + * The size of one data cache line. + */ +#define CACHE_DLINESIZE 32 + +/* + * The number of data cache segments. + */ +#define CACHE_DSEGMENTS 16 + +/* + * The number of lines in a cache segment. + */ +#define CACHE_DENTRIES 64 + +/* + * This is the size at which it becomes more efficient to + * clean the whole cache, rather than using the individual + * cache line maintenance instructions. + */ +#define CACHE_DLIMIT 32768 + + .text +/* + * cpu_arm1020e_proc_init() + */ +ENTRY(cpu_arm1020e_proc_init) + mov pc, lr + +/* + * cpu_arm1020e_proc_fin() + */ +ENTRY(cpu_arm1020e_proc_fin) + mrc p15, 0, r0, c1, c0, 0 @ ctrl register + bic r0, r0, #0x1000 @ ...i............ + bic r0, r0, #0x000e @ ............wca. + mcr p15, 0, r0, c1, c0, 0 @ disable caches + mov pc, lr + +/* + * cpu_arm1020e_reset(loc) + * + * Perform a soft reset of the system. Put the CPU into the + * same state as it would be if it had been reset, and branch + * to what would be the reset vector. + * + * loc: location to jump to for soft reset + */ + .align 5 + .pushsection .idmap.text, "ax" +ENTRY(cpu_arm1020e_reset) + mov ip, #0 + mcr p15, 0, ip, c7, c7, 0 @ invalidate I,D caches + mcr p15, 0, ip, c7, c10, 4 @ drain WB +#ifdef CONFIG_MMU + mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs +#endif + mrc p15, 0, ip, c1, c0, 0 @ ctrl register + bic ip, ip, #0x000f @ ............wcam + bic ip, ip, #0x1100 @ ...i...s........ + mcr p15, 0, ip, c1, c0, 0 @ ctrl register + mov pc, r0 +ENDPROC(cpu_arm1020e_reset) + .popsection + +/* + * cpu_arm1020e_do_idle() + */ + .align 5 +ENTRY(cpu_arm1020e_do_idle) + mcr p15, 0, r0, c7, c0, 4 @ Wait for interrupt + mov pc, lr + +/* ================================= CACHE ================================ */ + + .align 5 + +/* + * flush_icache_all() + * + * Unconditionally clean and invalidate the entire icache. + */ +ENTRY(arm1020e_flush_icache_all) +#ifndef CONFIG_CPU_ICACHE_DISABLE + mov r0, #0 + mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache +#endif + mov pc, lr +ENDPROC(arm1020e_flush_icache_all) + +/* + * flush_user_cache_all() + * + * Invalidate all cache entries in a particular address + * space. + */ +ENTRY(arm1020e_flush_user_cache_all) + /* FALLTHROUGH */ +/* + * flush_kern_cache_all() + * + * Clean and invalidate the entire cache. + */ +ENTRY(arm1020e_flush_kern_cache_all) + mov r2, #VM_EXEC + mov ip, #0 +__flush_whole_cache: +#ifndef CONFIG_CPU_DCACHE_DISABLE + mcr p15, 0, ip, c7, c10, 4 @ drain WB + mov r1, #(CACHE_DSEGMENTS - 1) << 5 @ 16 segments +1: orr r3, r1, #(CACHE_DENTRIES - 1) << 26 @ 64 entries +2: mcr p15, 0, r3, c7, c14, 2 @ clean+invalidate D index + subs r3, r3, #1 << 26 + bcs 2b @ entries 63 to 0 + subs r1, r1, #1 << 5 + bcs 1b @ segments 15 to 0 +#endif + tst r2, #VM_EXEC +#ifndef CONFIG_CPU_ICACHE_DISABLE + mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache +#endif + mcrne p15, 0, ip, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * flush_user_cache_range(start, end, flags) + * + * Invalidate a range of cache entries in the specified + * address space. + * + * - start - start address (inclusive) + * - end - end address (exclusive) + * - flags - vm_flags for this space + */ +ENTRY(arm1020e_flush_user_cache_range) + mov ip, #0 + sub r3, r1, r0 @ calculate total size + cmp r3, #CACHE_DLIMIT + bhs __flush_whole_cache + +#ifndef CONFIG_CPU_DCACHE_DISABLE +1: mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b +#endif + tst r2, #VM_EXEC +#ifndef CONFIG_CPU_ICACHE_DISABLE + mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache +#endif + mcrne p15, 0, ip, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * coherent_kern_range(start, end) + * + * Ensure coherency between the Icache and the Dcache in the + * region described by start. If you have non-snooping + * Harvard caches, you need to implement this function. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(arm1020e_coherent_kern_range) + /* FALLTHROUGH */ +/* + * coherent_user_range(start, end) + * + * Ensure coherency between the Icache and the Dcache in the + * region described by start. If you have non-snooping + * Harvard caches, you need to implement this function. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(arm1020e_coherent_user_range) + mov ip, #0 + bic r0, r0, #CACHE_DLINESIZE - 1 +1: +#ifndef CONFIG_CPU_DCACHE_DISABLE + mcr p15, 0, r0, c7, c10, 1 @ clean D entry +#endif +#ifndef CONFIG_CPU_ICACHE_DISABLE + mcr p15, 0, r0, c7, c5, 1 @ invalidate I entry +#endif + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mcr p15, 0, ip, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * flush_kern_dcache_area(void *addr, size_t size) + * + * Ensure no D cache aliasing occurs, either with itself or + * the I cache + * + * - addr - kernel address + * - size - region size + */ +ENTRY(arm1020e_flush_kern_dcache_area) + mov ip, #0 +#ifndef CONFIG_CPU_DCACHE_DISABLE + add r1, r0, r1 +1: mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b +#endif + mcr p15, 0, ip, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * dma_inv_range(start, end) + * + * Invalidate (discard) the specified virtual address range. + * May not write back any entries. If 'start' or 'end' + * are not cache line aligned, those lines must be written + * back. + * + * - start - virtual start address + * - end - virtual end address + * + * (same as v4wb) + */ +arm1020e_dma_inv_range: + mov ip, #0 +#ifndef CONFIG_CPU_DCACHE_DISABLE + tst r0, #CACHE_DLINESIZE - 1 + bic r0, r0, #CACHE_DLINESIZE - 1 + mcrne p15, 0, r0, c7, c10, 1 @ clean D entry + tst r1, #CACHE_DLINESIZE - 1 + mcrne p15, 0, r1, c7, c10, 1 @ clean D entry +1: mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b +#endif + mcr p15, 0, ip, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * dma_clean_range(start, end) + * + * Clean the specified virtual address range. + * + * - start - virtual start address + * - end - virtual end address + * + * (same as v4wb) + */ +arm1020e_dma_clean_range: + mov ip, #0 +#ifndef CONFIG_CPU_DCACHE_DISABLE + bic r0, r0, #CACHE_DLINESIZE - 1 +1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b +#endif + mcr p15, 0, ip, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * dma_flush_range(start, end) + * + * Clean and invalidate the specified virtual address range. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(arm1020e_dma_flush_range) + mov ip, #0 +#ifndef CONFIG_CPU_DCACHE_DISABLE + bic r0, r0, #CACHE_DLINESIZE - 1 +1: mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b +#endif + mcr p15, 0, ip, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * dma_map_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(arm1020e_dma_map_area) + add r1, r1, r0 + cmp r2, #DMA_TO_DEVICE + beq arm1020e_dma_clean_range + bcs arm1020e_dma_inv_range + b arm1020e_dma_flush_range +ENDPROC(arm1020e_dma_map_area) + +/* + * dma_unmap_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(arm1020e_dma_unmap_area) + mov pc, lr +ENDPROC(arm1020e_dma_unmap_area) + + @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) + define_cache_functions arm1020e + + .align 5 +ENTRY(cpu_arm1020e_dcache_clean_area) +#ifndef CONFIG_CPU_DCACHE_DISABLE + mov ip, #0 +1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry + add r0, r0, #CACHE_DLINESIZE + subs r1, r1, #CACHE_DLINESIZE + bhi 1b +#endif + mov pc, lr + +/* =============================== PageTable ============================== */ + +/* + * cpu_arm1020e_switch_mm(pgd) + * + * Set the translation base pointer to be as described by pgd. + * + * pgd: new page tables + */ + .align 5 +ENTRY(cpu_arm1020e_switch_mm) +#ifdef CONFIG_MMU +#ifndef CONFIG_CPU_DCACHE_DISABLE + mcr p15, 0, r3, c7, c10, 4 + mov r1, #0xF @ 16 segments +1: mov r3, #0x3F @ 64 entries +2: mov ip, r3, LSL #26 @ shift up entry + orr ip, ip, r1, LSL #5 @ shift in/up index + mcr p15, 0, ip, c7, c14, 2 @ Clean & Inval DCache entry + mov ip, #0 + subs r3, r3, #1 + cmp r3, #0 + bge 2b @ entries 3F to 0 + subs r1, r1, #1 + cmp r1, #0 + bge 1b @ segments 15 to 0 + +#endif + mov r1, #0 +#ifndef CONFIG_CPU_ICACHE_DISABLE + mcr p15, 0, r1, c7, c5, 0 @ invalidate I cache +#endif + mcr p15, 0, r1, c7, c10, 4 @ drain WB + mcr p15, 0, r0, c2, c0, 0 @ load page table pointer + mcr p15, 0, r1, c8, c7, 0 @ invalidate I & D TLBs +#endif + mov pc, lr + +/* + * cpu_arm1020e_set_pte(ptep, pte) + * + * Set a PTE and flush it out + */ + .align 5 +ENTRY(cpu_arm1020e_set_pte_ext) +#ifdef CONFIG_MMU + armv3_set_pte_ext + mov r0, r0 +#ifndef CONFIG_CPU_DCACHE_DISABLE + mcr p15, 0, r0, c7, c10, 1 @ clean D entry +#endif +#endif /* CONFIG_MMU */ + mov pc, lr + + __CPUINIT + + .type __arm1020e_setup, #function +__arm1020e_setup: + mov r0, #0 + mcr p15, 0, r0, c7, c7 @ invalidate I,D caches on v4 + mcr p15, 0, r0, c7, c10, 4 @ drain write buffer on v4 +#ifdef CONFIG_MMU + mcr p15, 0, r0, c8, c7 @ invalidate I,D TLBs on v4 +#endif + adr r5, arm1020e_crval + ldmia r5, {r5, r6} + mrc p15, 0, r0, c1, c0 @ get control register v4 + bic r0, r0, r5 + orr r0, r0, r6 +#ifdef CONFIG_CPU_CACHE_ROUND_ROBIN + orr r0, r0, #0x4000 @ .R.. .... .... .... +#endif + mov pc, lr + .size __arm1020e_setup, . - __arm1020e_setup + + /* + * R + * .RVI ZFRS BLDP WCAM + * .011 1001 ..11 0101 + */ + .type arm1020e_crval, #object +arm1020e_crval: + crval clear=0x00007f3f, mmuset=0x00003935, ucset=0x00001930 + + __INITDATA + @ define struct processor (see <asm/proc-fns.h> and proc-macros.S) + define_processor_functions arm1020e, dabort=v4t_early_abort, pabort=legacy_pabort + + .section ".rodata" + + string cpu_arch_name, "armv5te" + string cpu_elf_name, "v5" + string cpu_arm1020e_name, "ARM1020E" + + .align + + .section ".proc.info.init", #alloc, #execinstr + + .type __arm1020e_proc_info,#object +__arm1020e_proc_info: + .long 0x4105a200 @ ARM 1020TE (Architecture v5TE) + .long 0xff0ffff0 + .long PMD_TYPE_SECT | \ + PMD_BIT4 | \ + PMD_SECT_AP_WRITE | \ + PMD_SECT_AP_READ + .long PMD_TYPE_SECT | \ + PMD_BIT4 | \ + PMD_SECT_AP_WRITE | \ + PMD_SECT_AP_READ + b __arm1020e_setup + .long cpu_arch_name + .long cpu_elf_name + .long HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB | HWCAP_EDSP + .long cpu_arm1020e_name + .long arm1020e_processor_functions + .long v4wbi_tlb_fns + .long v4wb_user_fns + .long arm1020e_cache_fns + .size __arm1020e_proc_info, . - __arm1020e_proc_info diff --git a/arch/arm/mm/proc-arm1022.S b/arch/arm/mm/proc-arm1022.S new file mode 100644 index 00000000..38fe22ef --- /dev/null +++ b/arch/arm/mm/proc-arm1022.S @@ -0,0 +1,472 @@ +/* + * linux/arch/arm/mm/proc-arm1022.S: MMU functions for ARM1022E + * + * Copyright (C) 2000 ARM Limited + * Copyright (C) 2000 Deep Blue Solutions Ltd. + * hacked for non-paged-MM by Hyok S. Choi, 2003. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * + * These are the low level assembler for performing cache and TLB + * functions on the ARM1022E. + */ +#include <linux/linkage.h> +#include <linux/init.h> +#include <asm/assembler.h> +#include <asm/asm-offsets.h> +#include <asm/hwcap.h> +#include <asm/pgtable-hwdef.h> +#include <asm/pgtable.h> +#include <asm/ptrace.h> + +#include "proc-macros.S" + +/* + * This is the maximum size of an area which will be invalidated + * using the single invalidate entry instructions. Anything larger + * than this, and we go for the whole cache. + * + * This value should be chosen such that we choose the cheapest + * alternative. + */ +#define MAX_AREA_SIZE 32768 + +/* + * The size of one data cache line. + */ +#define CACHE_DLINESIZE 32 + +/* + * The number of data cache segments. + */ +#define CACHE_DSEGMENTS 16 + +/* + * The number of lines in a cache segment. + */ +#define CACHE_DENTRIES 64 + +/* + * This is the size at which it becomes more efficient to + * clean the whole cache, rather than using the individual + * cache line maintenance instructions. + */ +#define CACHE_DLIMIT 32768 + + .text +/* + * cpu_arm1022_proc_init() + */ +ENTRY(cpu_arm1022_proc_init) + mov pc, lr + +/* + * cpu_arm1022_proc_fin() + */ +ENTRY(cpu_arm1022_proc_fin) + mrc p15, 0, r0, c1, c0, 0 @ ctrl register + bic r0, r0, #0x1000 @ ...i............ + bic r0, r0, #0x000e @ ............wca. + mcr p15, 0, r0, c1, c0, 0 @ disable caches + mov pc, lr + +/* + * cpu_arm1022_reset(loc) + * + * Perform a soft reset of the system. Put the CPU into the + * same state as it would be if it had been reset, and branch + * to what would be the reset vector. + * + * loc: location to jump to for soft reset + */ + .align 5 + .pushsection .idmap.text, "ax" +ENTRY(cpu_arm1022_reset) + mov ip, #0 + mcr p15, 0, ip, c7, c7, 0 @ invalidate I,D caches + mcr p15, 0, ip, c7, c10, 4 @ drain WB +#ifdef CONFIG_MMU + mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs +#endif + mrc p15, 0, ip, c1, c0, 0 @ ctrl register + bic ip, ip, #0x000f @ ............wcam + bic ip, ip, #0x1100 @ ...i...s........ + mcr p15, 0, ip, c1, c0, 0 @ ctrl register + mov pc, r0 +ENDPROC(cpu_arm1022_reset) + .popsection + +/* + * cpu_arm1022_do_idle() + */ + .align 5 +ENTRY(cpu_arm1022_do_idle) + mcr p15, 0, r0, c7, c0, 4 @ Wait for interrupt + mov pc, lr + +/* ================================= CACHE ================================ */ + + .align 5 + +/* + * flush_icache_all() + * + * Unconditionally clean and invalidate the entire icache. + */ +ENTRY(arm1022_flush_icache_all) +#ifndef CONFIG_CPU_ICACHE_DISABLE + mov r0, #0 + mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache +#endif + mov pc, lr +ENDPROC(arm1022_flush_icache_all) + +/* + * flush_user_cache_all() + * + * Invalidate all cache entries in a particular address + * space. + */ +ENTRY(arm1022_flush_user_cache_all) + /* FALLTHROUGH */ +/* + * flush_kern_cache_all() + * + * Clean and invalidate the entire cache. + */ +ENTRY(arm1022_flush_kern_cache_all) + mov r2, #VM_EXEC + mov ip, #0 +__flush_whole_cache: +#ifndef CONFIG_CPU_DCACHE_DISABLE + mov r1, #(CACHE_DSEGMENTS - 1) << 5 @ 16 segments +1: orr r3, r1, #(CACHE_DENTRIES - 1) << 26 @ 64 entries +2: mcr p15, 0, r3, c7, c14, 2 @ clean+invalidate D index + subs r3, r3, #1 << 26 + bcs 2b @ entries 63 to 0 + subs r1, r1, #1 << 5 + bcs 1b @ segments 15 to 0 +#endif + tst r2, #VM_EXEC +#ifndef CONFIG_CPU_ICACHE_DISABLE + mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache +#endif + mcrne p15, 0, ip, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * flush_user_cache_range(start, end, flags) + * + * Invalidate a range of cache entries in the specified + * address space. + * + * - start - start address (inclusive) + * - end - end address (exclusive) + * - flags - vm_flags for this space + */ +ENTRY(arm1022_flush_user_cache_range) + mov ip, #0 + sub r3, r1, r0 @ calculate total size + cmp r3, #CACHE_DLIMIT + bhs __flush_whole_cache + +#ifndef CONFIG_CPU_DCACHE_DISABLE +1: mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b +#endif + tst r2, #VM_EXEC +#ifndef CONFIG_CPU_ICACHE_DISABLE + mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache +#endif + mcrne p15, 0, ip, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * coherent_kern_range(start, end) + * + * Ensure coherency between the Icache and the Dcache in the + * region described by start. If you have non-snooping + * Harvard caches, you need to implement this function. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(arm1022_coherent_kern_range) + /* FALLTHROUGH */ + +/* + * coherent_user_range(start, end) + * + * Ensure coherency between the Icache and the Dcache in the + * region described by start. If you have non-snooping + * Harvard caches, you need to implement this function. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(arm1022_coherent_user_range) + mov ip, #0 + bic r0, r0, #CACHE_DLINESIZE - 1 +1: +#ifndef CONFIG_CPU_DCACHE_DISABLE + mcr p15, 0, r0, c7, c10, 1 @ clean D entry +#endif +#ifndef CONFIG_CPU_ICACHE_DISABLE + mcr p15, 0, r0, c7, c5, 1 @ invalidate I entry +#endif + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mcr p15, 0, ip, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * flush_kern_dcache_area(void *addr, size_t size) + * + * Ensure no D cache aliasing occurs, either with itself or + * the I cache + * + * - addr - kernel address + * - size - region size + */ +ENTRY(arm1022_flush_kern_dcache_area) + mov ip, #0 +#ifndef CONFIG_CPU_DCACHE_DISABLE + add r1, r0, r1 +1: mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b +#endif + mcr p15, 0, ip, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * dma_inv_range(start, end) + * + * Invalidate (discard) the specified virtual address range. + * May not write back any entries. If 'start' or 'end' + * are not cache line aligned, those lines must be written + * back. + * + * - start - virtual start address + * - end - virtual end address + * + * (same as v4wb) + */ +arm1022_dma_inv_range: + mov ip, #0 +#ifndef CONFIG_CPU_DCACHE_DISABLE + tst r0, #CACHE_DLINESIZE - 1 + bic r0, r0, #CACHE_DLINESIZE - 1 + mcrne p15, 0, r0, c7, c10, 1 @ clean D entry + tst r1, #CACHE_DLINESIZE - 1 + mcrne p15, 0, r1, c7, c10, 1 @ clean D entry +1: mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b +#endif + mcr p15, 0, ip, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * dma_clean_range(start, end) + * + * Clean the specified virtual address range. + * + * - start - virtual start address + * - end - virtual end address + * + * (same as v4wb) + */ +arm1022_dma_clean_range: + mov ip, #0 +#ifndef CONFIG_CPU_DCACHE_DISABLE + bic r0, r0, #CACHE_DLINESIZE - 1 +1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b +#endif + mcr p15, 0, ip, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * dma_flush_range(start, end) + * + * Clean and invalidate the specified virtual address range. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(arm1022_dma_flush_range) + mov ip, #0 +#ifndef CONFIG_CPU_DCACHE_DISABLE + bic r0, r0, #CACHE_DLINESIZE - 1 +1: mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b +#endif + mcr p15, 0, ip, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * dma_map_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(arm1022_dma_map_area) + add r1, r1, r0 + cmp r2, #DMA_TO_DEVICE + beq arm1022_dma_clean_range + bcs arm1022_dma_inv_range + b arm1022_dma_flush_range +ENDPROC(arm1022_dma_map_area) + +/* + * dma_unmap_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(arm1022_dma_unmap_area) + mov pc, lr +ENDPROC(arm1022_dma_unmap_area) + + @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) + define_cache_functions arm1022 + + .align 5 +ENTRY(cpu_arm1022_dcache_clean_area) +#ifndef CONFIG_CPU_DCACHE_DISABLE + mov ip, #0 +1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry + add r0, r0, #CACHE_DLINESIZE + subs r1, r1, #CACHE_DLINESIZE + bhi 1b +#endif + mov pc, lr + +/* =============================== PageTable ============================== */ + +/* + * cpu_arm1022_switch_mm(pgd) + * + * Set the translation base pointer to be as described by pgd. + * + * pgd: new page tables + */ + .align 5 +ENTRY(cpu_arm1022_switch_mm) +#ifdef CONFIG_MMU +#ifndef CONFIG_CPU_DCACHE_DISABLE + mov r1, #(CACHE_DSEGMENTS - 1) << 5 @ 16 segments +1: orr r3, r1, #(CACHE_DENTRIES - 1) << 26 @ 64 entries +2: mcr p15, 0, r3, c7, c14, 2 @ clean+invalidate D index + subs r3, r3, #1 << 26 + bcs 2b @ entries 63 to 0 + subs r1, r1, #1 << 5 + bcs 1b @ segments 15 to 0 +#endif + mov r1, #0 +#ifndef CONFIG_CPU_ICACHE_DISABLE + mcr p15, 0, r1, c7, c5, 0 @ invalidate I cache +#endif + mcr p15, 0, r1, c7, c10, 4 @ drain WB + mcr p15, 0, r0, c2, c0, 0 @ load page table pointer + mcr p15, 0, r1, c8, c7, 0 @ invalidate I & D TLBs +#endif + mov pc, lr + +/* + * cpu_arm1022_set_pte_ext(ptep, pte, ext) + * + * Set a PTE and flush it out + */ + .align 5 +ENTRY(cpu_arm1022_set_pte_ext) +#ifdef CONFIG_MMU + armv3_set_pte_ext + mov r0, r0 +#ifndef CONFIG_CPU_DCACHE_DISABLE + mcr p15, 0, r0, c7, c10, 1 @ clean D entry +#endif +#endif /* CONFIG_MMU */ + mov pc, lr + + __CPUINIT + + .type __arm1022_setup, #function +__arm1022_setup: + mov r0, #0 + mcr p15, 0, r0, c7, c7 @ invalidate I,D caches on v4 + mcr p15, 0, r0, c7, c10, 4 @ drain write buffer on v4 +#ifdef CONFIG_MMU + mcr p15, 0, r0, c8, c7 @ invalidate I,D TLBs on v4 +#endif + adr r5, arm1022_crval + ldmia r5, {r5, r6} + mrc p15, 0, r0, c1, c0 @ get control register v4 + bic r0, r0, r5 + orr r0, r0, r6 +#ifdef CONFIG_CPU_CACHE_ROUND_ROBIN + orr r0, r0, #0x4000 @ .R.............. +#endif + mov pc, lr + .size __arm1022_setup, . - __arm1022_setup + + /* + * R + * .RVI ZFRS BLDP WCAM + * .011 1001 ..11 0101 + * + */ + .type arm1022_crval, #object +arm1022_crval: + crval clear=0x00007f3f, mmuset=0x00003935, ucset=0x00001930 + + __INITDATA + @ define struct processor (see <asm/proc-fns.h> and proc-macros.S) + define_processor_functions arm1022, dabort=v4t_early_abort, pabort=legacy_pabort + + .section ".rodata" + + string cpu_arch_name, "armv5te" + string cpu_elf_name, "v5" + string cpu_arm1022_name, "ARM1022" + + .align + + .section ".proc.info.init", #alloc, #execinstr + + .type __arm1022_proc_info,#object +__arm1022_proc_info: + .long 0x4105a220 @ ARM 1022E (v5TE) + .long 0xff0ffff0 + .long PMD_TYPE_SECT | \ + PMD_BIT4 | \ + PMD_SECT_AP_WRITE | \ + PMD_SECT_AP_READ + .long PMD_TYPE_SECT | \ + PMD_BIT4 | \ + PMD_SECT_AP_WRITE | \ + PMD_SECT_AP_READ + b __arm1022_setup + .long cpu_arch_name + .long cpu_elf_name + .long HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB | HWCAP_EDSP + .long cpu_arm1022_name + .long arm1022_processor_functions + .long v4wbi_tlb_fns + .long v4wb_user_fns + .long arm1022_cache_fns + .size __arm1022_proc_info, . - __arm1022_proc_info diff --git a/arch/arm/mm/proc-arm1026.S b/arch/arm/mm/proc-arm1026.S new file mode 100644 index 00000000..3eb9c3c2 --- /dev/null +++ b/arch/arm/mm/proc-arm1026.S @@ -0,0 +1,467 @@ +/* + * linux/arch/arm/mm/proc-arm1026.S: MMU functions for ARM1026EJ-S + * + * Copyright (C) 2000 ARM Limited + * Copyright (C) 2000 Deep Blue Solutions Ltd. + * hacked for non-paged-MM by Hyok S. Choi, 2003. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * + * These are the low level assembler for performing cache and TLB + * functions on the ARM1026EJ-S. + */ +#include <linux/linkage.h> +#include <linux/init.h> +#include <asm/assembler.h> +#include <asm/asm-offsets.h> +#include <asm/hwcap.h> +#include <asm/pgtable-hwdef.h> +#include <asm/pgtable.h> +#include <asm/ptrace.h> + +#include "proc-macros.S" + +/* + * This is the maximum size of an area which will be invalidated + * using the single invalidate entry instructions. Anything larger + * than this, and we go for the whole cache. + * + * This value should be chosen such that we choose the cheapest + * alternative. + */ +#define MAX_AREA_SIZE 32768 + +/* + * The size of one data cache line. + */ +#define CACHE_DLINESIZE 32 + +/* + * The number of data cache segments. + */ +#define CACHE_DSEGMENTS 16 + +/* + * The number of lines in a cache segment. + */ +#define CACHE_DENTRIES 64 + +/* + * This is the size at which it becomes more efficient to + * clean the whole cache, rather than using the individual + * cache line maintenance instructions. + */ +#define CACHE_DLIMIT 32768 + + .text +/* + * cpu_arm1026_proc_init() + */ +ENTRY(cpu_arm1026_proc_init) + mov pc, lr + +/* + * cpu_arm1026_proc_fin() + */ +ENTRY(cpu_arm1026_proc_fin) + mrc p15, 0, r0, c1, c0, 0 @ ctrl register + bic r0, r0, #0x1000 @ ...i............ + bic r0, r0, #0x000e @ ............wca. + mcr p15, 0, r0, c1, c0, 0 @ disable caches + mov pc, lr + +/* + * cpu_arm1026_reset(loc) + * + * Perform a soft reset of the system. Put the CPU into the + * same state as it would be if it had been reset, and branch + * to what would be the reset vector. + * + * loc: location to jump to for soft reset + */ + .align 5 + .pushsection .idmap.text, "ax" +ENTRY(cpu_arm1026_reset) + mov ip, #0 + mcr p15, 0, ip, c7, c7, 0 @ invalidate I,D caches + mcr p15, 0, ip, c7, c10, 4 @ drain WB +#ifdef CONFIG_MMU + mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs +#endif + mrc p15, 0, ip, c1, c0, 0 @ ctrl register + bic ip, ip, #0x000f @ ............wcam + bic ip, ip, #0x1100 @ ...i...s........ + mcr p15, 0, ip, c1, c0, 0 @ ctrl register + mov pc, r0 +ENDPROC(cpu_arm1026_reset) + .popsection + +/* + * cpu_arm1026_do_idle() + */ + .align 5 +ENTRY(cpu_arm1026_do_idle) + mcr p15, 0, r0, c7, c0, 4 @ Wait for interrupt + mov pc, lr + +/* ================================= CACHE ================================ */ + + .align 5 + +/* + * flush_icache_all() + * + * Unconditionally clean and invalidate the entire icache. + */ +ENTRY(arm1026_flush_icache_all) +#ifndef CONFIG_CPU_ICACHE_DISABLE + mov r0, #0 + mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache +#endif + mov pc, lr +ENDPROC(arm1026_flush_icache_all) + +/* + * flush_user_cache_all() + * + * Invalidate all cache entries in a particular address + * space. + */ +ENTRY(arm1026_flush_user_cache_all) + /* FALLTHROUGH */ +/* + * flush_kern_cache_all() + * + * Clean and invalidate the entire cache. + */ +ENTRY(arm1026_flush_kern_cache_all) + mov r2, #VM_EXEC + mov ip, #0 +__flush_whole_cache: +#ifndef CONFIG_CPU_DCACHE_DISABLE +1: mrc p15, 0, r15, c7, c14, 3 @ test, clean, invalidate + bne 1b +#endif + tst r2, #VM_EXEC +#ifndef CONFIG_CPU_ICACHE_DISABLE + mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache +#endif + mcrne p15, 0, ip, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * flush_user_cache_range(start, end, flags) + * + * Invalidate a range of cache entries in the specified + * address space. + * + * - start - start address (inclusive) + * - end - end address (exclusive) + * - flags - vm_flags for this space + */ +ENTRY(arm1026_flush_user_cache_range) + mov ip, #0 + sub r3, r1, r0 @ calculate total size + cmp r3, #CACHE_DLIMIT + bhs __flush_whole_cache + +#ifndef CONFIG_CPU_DCACHE_DISABLE +1: mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b +#endif + tst r2, #VM_EXEC +#ifndef CONFIG_CPU_ICACHE_DISABLE + mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache +#endif + mcrne p15, 0, ip, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * coherent_kern_range(start, end) + * + * Ensure coherency between the Icache and the Dcache in the + * region described by start. If you have non-snooping + * Harvard caches, you need to implement this function. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(arm1026_coherent_kern_range) + /* FALLTHROUGH */ +/* + * coherent_user_range(start, end) + * + * Ensure coherency between the Icache and the Dcache in the + * region described by start. If you have non-snooping + * Harvard caches, you need to implement this function. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(arm1026_coherent_user_range) + mov ip, #0 + bic r0, r0, #CACHE_DLINESIZE - 1 +1: +#ifndef CONFIG_CPU_DCACHE_DISABLE + mcr p15, 0, r0, c7, c10, 1 @ clean D entry +#endif +#ifndef CONFIG_CPU_ICACHE_DISABLE + mcr p15, 0, r0, c7, c5, 1 @ invalidate I entry +#endif + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mcr p15, 0, ip, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * flush_kern_dcache_area(void *addr, size_t size) + * + * Ensure no D cache aliasing occurs, either with itself or + * the I cache + * + * - addr - kernel address + * - size - region size + */ +ENTRY(arm1026_flush_kern_dcache_area) + mov ip, #0 +#ifndef CONFIG_CPU_DCACHE_DISABLE + add r1, r0, r1 +1: mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b +#endif + mcr p15, 0, ip, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * dma_inv_range(start, end) + * + * Invalidate (discard) the specified virtual address range. + * May not write back any entries. If 'start' or 'end' + * are not cache line aligned, those lines must be written + * back. + * + * - start - virtual start address + * - end - virtual end address + * + * (same as v4wb) + */ +arm1026_dma_inv_range: + mov ip, #0 +#ifndef CONFIG_CPU_DCACHE_DISABLE + tst r0, #CACHE_DLINESIZE - 1 + bic r0, r0, #CACHE_DLINESIZE - 1 + mcrne p15, 0, r0, c7, c10, 1 @ clean D entry + tst r1, #CACHE_DLINESIZE - 1 + mcrne p15, 0, r1, c7, c10, 1 @ clean D entry +1: mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b +#endif + mcr p15, 0, ip, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * dma_clean_range(start, end) + * + * Clean the specified virtual address range. + * + * - start - virtual start address + * - end - virtual end address + * + * (same as v4wb) + */ +arm1026_dma_clean_range: + mov ip, #0 +#ifndef CONFIG_CPU_DCACHE_DISABLE + bic r0, r0, #CACHE_DLINESIZE - 1 +1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b +#endif + mcr p15, 0, ip, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * dma_flush_range(start, end) + * + * Clean and invalidate the specified virtual address range. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(arm1026_dma_flush_range) + mov ip, #0 +#ifndef CONFIG_CPU_DCACHE_DISABLE + bic r0, r0, #CACHE_DLINESIZE - 1 +1: mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b +#endif + mcr p15, 0, ip, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * dma_map_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(arm1026_dma_map_area) + add r1, r1, r0 + cmp r2, #DMA_TO_DEVICE + beq arm1026_dma_clean_range + bcs arm1026_dma_inv_range + b arm1026_dma_flush_range +ENDPROC(arm1026_dma_map_area) + +/* + * dma_unmap_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(arm1026_dma_unmap_area) + mov pc, lr +ENDPROC(arm1026_dma_unmap_area) + + @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) + define_cache_functions arm1026 + + .align 5 +ENTRY(cpu_arm1026_dcache_clean_area) +#ifndef CONFIG_CPU_DCACHE_DISABLE + mov ip, #0 +1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry + add r0, r0, #CACHE_DLINESIZE + subs r1, r1, #CACHE_DLINESIZE + bhi 1b +#endif + mov pc, lr + +/* =============================== PageTable ============================== */ + +/* + * cpu_arm1026_switch_mm(pgd) + * + * Set the translation base pointer to be as described by pgd. + * + * pgd: new page tables + */ + .align 5 +ENTRY(cpu_arm1026_switch_mm) +#ifdef CONFIG_MMU + mov r1, #0 +#ifndef CONFIG_CPU_DCACHE_DISABLE +1: mrc p15, 0, r15, c7, c14, 3 @ test, clean, invalidate + bne 1b +#endif +#ifndef CONFIG_CPU_ICACHE_DISABLE + mcr p15, 0, r1, c7, c5, 0 @ invalidate I cache +#endif + mcr p15, 0, r1, c7, c10, 4 @ drain WB + mcr p15, 0, r0, c2, c0, 0 @ load page table pointer + mcr p15, 0, r1, c8, c7, 0 @ invalidate I & D TLBs +#endif + mov pc, lr + +/* + * cpu_arm1026_set_pte_ext(ptep, pte, ext) + * + * Set a PTE and flush it out + */ + .align 5 +ENTRY(cpu_arm1026_set_pte_ext) +#ifdef CONFIG_MMU + armv3_set_pte_ext + mov r0, r0 +#ifndef CONFIG_CPU_DCACHE_DISABLE + mcr p15, 0, r0, c7, c10, 1 @ clean D entry +#endif +#endif /* CONFIG_MMU */ + mov pc, lr + + + __CPUINIT + + .type __arm1026_setup, #function +__arm1026_setup: + mov r0, #0 + mcr p15, 0, r0, c7, c7 @ invalidate I,D caches on v4 + mcr p15, 0, r0, c7, c10, 4 @ drain write buffer on v4 +#ifdef CONFIG_MMU + mcr p15, 0, r0, c8, c7 @ invalidate I,D TLBs on v4 + mcr p15, 0, r4, c2, c0 @ load page table pointer +#endif +#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH + mov r0, #4 @ explicitly disable writeback + mcr p15, 7, r0, c15, c0, 0 +#endif + adr r5, arm1026_crval + ldmia r5, {r5, r6} + mrc p15, 0, r0, c1, c0 @ get control register v4 + bic r0, r0, r5 + orr r0, r0, r6 +#ifdef CONFIG_CPU_CACHE_ROUND_ROBIN + orr r0, r0, #0x4000 @ .R.. .... .... .... +#endif + mov pc, lr + .size __arm1026_setup, . - __arm1026_setup + + /* + * R + * .RVI ZFRS BLDP WCAM + * .011 1001 ..11 0101 + * + */ + .type arm1026_crval, #object +arm1026_crval: + crval clear=0x00007f3f, mmuset=0x00003935, ucset=0x00001934 + + __INITDATA + @ define struct processor (see <asm/proc-fns.h> and proc-macros.S) + define_processor_functions arm1026, dabort=v5t_early_abort, pabort=legacy_pabort + + .section .rodata + + string cpu_arch_name, "armv5tej" + string cpu_elf_name, "v5" + .align + string cpu_arm1026_name, "ARM1026EJ-S" + .align + + .section ".proc.info.init", #alloc, #execinstr + + .type __arm1026_proc_info,#object +__arm1026_proc_info: + .long 0x4106a260 @ ARM 1026EJ-S (v5TEJ) + .long 0xff0ffff0 + .long PMD_TYPE_SECT | \ + PMD_BIT4 | \ + PMD_SECT_AP_WRITE | \ + PMD_SECT_AP_READ + .long PMD_TYPE_SECT | \ + PMD_BIT4 | \ + PMD_SECT_AP_WRITE | \ + PMD_SECT_AP_READ + b __arm1026_setup + .long cpu_arch_name + .long cpu_elf_name + .long HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP|HWCAP_JAVA + .long cpu_arm1026_name + .long arm1026_processor_functions + .long v4wbi_tlb_fns + .long v4wb_user_fns + .long arm1026_cache_fns + .size __arm1026_proc_info, . - __arm1026_proc_info diff --git a/arch/arm/mm/proc-arm6_7.S b/arch/arm/mm/proc-arm6_7.S new file mode 100644 index 00000000..4fbeb5b8 --- /dev/null +++ b/arch/arm/mm/proc-arm6_7.S @@ -0,0 +1,327 @@ +/* + * linux/arch/arm/mm/proc-arm6,7.S + * + * Copyright (C) 1997-2000 Russell King + * hacked for non-paged-MM by Hyok S. Choi, 2003. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * These are the low level assembler for performing cache and TLB + * functions on the ARM610 & ARM710. + */ +#include <linux/linkage.h> +#include <linux/init.h> +#include <asm/assembler.h> +#include <asm/asm-offsets.h> +#include <asm/hwcap.h> +#include <asm/pgtable-hwdef.h> +#include <asm/pgtable.h> +#include <asm/ptrace.h> + +#include "proc-macros.S" + +ENTRY(cpu_arm6_dcache_clean_area) +ENTRY(cpu_arm7_dcache_clean_area) + mov pc, lr + +/* + * Function: arm6_7_data_abort () + * + * Params : r2 = pt_regs + * : r4 = aborted context pc + * : r5 = aborted context psr + * + * Purpose : obtain information about current aborted instruction + * + * Returns : r4-r5, r10-r11, r13 preserved + */ + +ENTRY(cpu_arm7_data_abort) + mrc p15, 0, r1, c5, c0, 0 @ get FSR + mrc p15, 0, r0, c6, c0, 0 @ get FAR + ldr r8, [r4] @ read arm instruction + tst r8, #1 << 20 @ L = 0 -> write? + orreq r1, r1, #1 << 11 @ yes. + and r7, r8, #15 << 24 + add pc, pc, r7, lsr #22 @ Now branch to the relevant processing routine + nop + +/* 0 */ b .data_unknown +/* 1 */ b do_DataAbort @ swp +/* 2 */ b .data_unknown +/* 3 */ b .data_unknown +/* 4 */ b .data_arm_lateldrpostconst @ ldr rd, [rn], #m +/* 5 */ b .data_arm_lateldrpreconst @ ldr rd, [rn, #m] +/* 6 */ b .data_arm_lateldrpostreg @ ldr rd, [rn], rm +/* 7 */ b .data_arm_lateldrprereg @ ldr rd, [rn, rm] +/* 8 */ b .data_arm_ldmstm @ ldm*a rn, <rlist> +/* 9 */ b .data_arm_ldmstm @ ldm*b rn, <rlist> +/* a */ b .data_unknown +/* b */ b .data_unknown +/* c */ b do_DataAbort @ ldc rd, [rn], #m @ Same as ldr rd, [rn], #m +/* d */ b do_DataAbort @ ldc rd, [rn, #m] +/* e */ b .data_unknown +/* f */ +.data_unknown: @ Part of jumptable + mov r0, r4 + mov r1, r8 + b baddataabort + +ENTRY(cpu_arm6_data_abort) + mrc p15, 0, r1, c5, c0, 0 @ get FSR + mrc p15, 0, r0, c6, c0, 0 @ get FAR + ldr r8, [r4] @ read arm instruction + tst r8, #1 << 20 @ L = 0 -> write? + orreq r1, r1, #1 << 11 @ yes. + and r7, r8, #14 << 24 + teq r7, #8 << 24 @ was it ldm/stm + bne do_DataAbort + +.data_arm_ldmstm: + tst r8, #1 << 21 @ check writeback bit + beq do_DataAbort @ no writeback -> no fixup + mov r7, #0x11 + orr r7, r7, #0x1100 + and r6, r8, r7 + and r9, r8, r7, lsl #1 + add r6, r6, r9, lsr #1 + and r9, r8, r7, lsl #2 + add r6, r6, r9, lsr #2 + and r9, r8, r7, lsl #3 + add r6, r6, r9, lsr #3 + add r6, r6, r6, lsr #8 + add r6, r6, r6, lsr #4 + and r6, r6, #15 @ r6 = no. of registers to transfer. + and r9, r8, #15 << 16 @ Extract 'n' from instruction + ldr r7, [r2, r9, lsr #14] @ Get register 'Rn' + tst r8, #1 << 23 @ Check U bit + subne r7, r7, r6, lsl #2 @ Undo increment + addeq r7, r7, r6, lsl #2 @ Undo decrement + str r7, [r2, r9, lsr #14] @ Put register 'Rn' + b do_DataAbort + +.data_arm_apply_r6_and_rn: + and r9, r8, #15 << 16 @ Extract 'n' from instruction + ldr r7, [r2, r9, lsr #14] @ Get register 'Rn' + tst r8, #1 << 23 @ Check U bit + subne r7, r7, r6 @ Undo incrmenet + addeq r7, r7, r6 @ Undo decrement + str r7, [r2, r9, lsr #14] @ Put register 'Rn' + b do_DataAbort + +.data_arm_lateldrpreconst: + tst r8, #1 << 21 @ check writeback bit + beq do_DataAbort @ no writeback -> no fixup +.data_arm_lateldrpostconst: + movs r6, r8, lsl #20 @ Get offset + beq do_DataAbort @ zero -> no fixup + and r9, r8, #15 << 16 @ Extract 'n' from instruction + ldr r7, [r2, r9, lsr #14] @ Get register 'Rn' + tst r8, #1 << 23 @ Check U bit + subne r7, r7, r6, lsr #20 @ Undo increment + addeq r7, r7, r6, lsr #20 @ Undo decrement + str r7, [r2, r9, lsr #14] @ Put register 'Rn' + b do_DataAbort + +.data_arm_lateldrprereg: + tst r8, #1 << 21 @ check writeback bit + beq do_DataAbort @ no writeback -> no fixup +.data_arm_lateldrpostreg: + and r7, r8, #15 @ Extract 'm' from instruction + ldr r6, [r2, r7, lsl #2] @ Get register 'Rm' + mov r9, r8, lsr #7 @ get shift count + ands r9, r9, #31 + and r7, r8, #0x70 @ get shift type + orreq r7, r7, #8 @ shift count = 0 + add pc, pc, r7 + nop + + mov r6, r6, lsl r9 @ 0: LSL #!0 + b .data_arm_apply_r6_and_rn + b .data_arm_apply_r6_and_rn @ 1: LSL #0 + nop + b .data_unknown @ 2: MUL? + nop + b .data_unknown @ 3: MUL? + nop + mov r6, r6, lsr r9 @ 4: LSR #!0 + b .data_arm_apply_r6_and_rn + mov r6, r6, lsr #32 @ 5: LSR #32 + b .data_arm_apply_r6_and_rn + b .data_unknown @ 6: MUL? + nop + b .data_unknown @ 7: MUL? + nop + mov r6, r6, asr r9 @ 8: ASR #!0 + b .data_arm_apply_r6_and_rn + mov r6, r6, asr #32 @ 9: ASR #32 + b .data_arm_apply_r6_and_rn + b .data_unknown @ A: MUL? + nop + b .data_unknown @ B: MUL? + nop + mov r6, r6, ror r9 @ C: ROR #!0 + b .data_arm_apply_r6_and_rn + mov r6, r6, rrx @ D: RRX + b .data_arm_apply_r6_and_rn + b .data_unknown @ E: MUL? + nop + b .data_unknown @ F: MUL? + +/* + * Function: arm6_7_proc_init (void) + * : arm6_7_proc_fin (void) + * + * Notes : This processor does not require these + */ +ENTRY(cpu_arm6_proc_init) +ENTRY(cpu_arm7_proc_init) + mov pc, lr + +ENTRY(cpu_arm6_proc_fin) +ENTRY(cpu_arm7_proc_fin) + mov r0, #0x31 @ ....S..DP...M + mcr p15, 0, r0, c1, c0, 0 @ disable caches + mov pc, lr + +ENTRY(cpu_arm6_do_idle) +ENTRY(cpu_arm7_do_idle) + mov pc, lr + +/* + * Function: arm6_7_switch_mm(unsigned long pgd_phys) + * Params : pgd_phys Physical address of page table + * Purpose : Perform a task switch, saving the old processes state, and restoring + * the new. + */ +ENTRY(cpu_arm6_switch_mm) +ENTRY(cpu_arm7_switch_mm) +#ifdef CONFIG_MMU + mov r1, #0 + mcr p15, 0, r1, c7, c0, 0 @ flush cache + mcr p15, 0, r0, c2, c0, 0 @ update page table ptr + mcr p15, 0, r1, c5, c0, 0 @ flush TLBs +#endif + mov pc, lr + +/* + * Function: arm6_7_set_pte_ext(pte_t *ptep, pte_t pte, unsigned int ext) + * Params : r0 = Address to set + * : r1 = value to set + * Purpose : Set a PTE and flush it out of any WB cache + */ + .align 5 +ENTRY(cpu_arm6_set_pte_ext) +ENTRY(cpu_arm7_set_pte_ext) +#ifdef CONFIG_MMU + armv3_set_pte_ext wc_disable=0 +#endif /* CONFIG_MMU */ + mov pc, lr + +/* + * Function: _arm6_7_reset + * Params : r0 = address to jump to + * Notes : This sets up everything for a reset + */ + .pushsection .idmap.text, "ax" +ENTRY(cpu_arm6_reset) +ENTRY(cpu_arm7_reset) + mov r1, #0 + mcr p15, 0, r1, c7, c0, 0 @ flush cache +#ifdef CONFIG_MMU + mcr p15, 0, r1, c5, c0, 0 @ flush TLB +#endif + mov r1, #0x30 + mcr p15, 0, r1, c1, c0, 0 @ turn off MMU etc + mov pc, r0 +ENDPROC(cpu_arm6_reset) +ENDPROC(cpu_arm7_reset) + .popsection + + __CPUINIT + + .type __arm6_setup, #function +__arm6_setup: mov r0, #0 + mcr p15, 0, r0, c7, c0 @ flush caches on v3 +#ifdef CONFIG_MMU + mcr p15, 0, r0, c5, c0 @ flush TLBs on v3 + mov r0, #0x3d @ . ..RS BLDP WCAM + orr r0, r0, #0x100 @ . ..01 0011 1101 +#else + mov r0, #0x3c @ . ..RS BLDP WCA. +#endif + mov pc, lr + .size __arm6_setup, . - __arm6_setup + + .type __arm7_setup, #function +__arm7_setup: mov r0, #0 + mcr p15, 0, r0, c7, c0 @ flush caches on v3 +#ifdef CONFIG_MMU + mcr p15, 0, r0, c5, c0 @ flush TLBs on v3 + mcr p15, 0, r0, c3, c0 @ load domain access register + mov r0, #0x7d @ . ..RS BLDP WCAM + orr r0, r0, #0x100 @ . ..01 0111 1101 +#else + mov r0, #0x7c @ . ..RS BLDP WCA. +#endif + mov pc, lr + .size __arm7_setup, . - __arm7_setup + + __INITDATA + + @ define struct processor (see <asm/proc-fns.h> and proc-macros.S) + define_processor_functions arm6, dabort=cpu_arm6_data_abort, pabort=legacy_pabort + define_processor_functions arm7, dabort=cpu_arm7_data_abort, pabort=legacy_pabort + + .section ".rodata" + + string cpu_arch_name, "armv3" + string cpu_elf_name, "v3" + string cpu_arm6_name, "ARM6" + string cpu_arm610_name, "ARM610" + string cpu_arm7_name, "ARM7" + string cpu_arm710_name, "ARM710" + + .align + + .section ".proc.info.init", #alloc, #execinstr + +.macro arm67_proc_info name:req, cpu_val:req, cpu_mask:req, cpu_name:req, \ + cpu_mm_mmu_flags:req, cpu_flush:req, cpu_proc_funcs:req + .type __\name\()_proc_info, #object +__\name\()_proc_info: + .long \cpu_val + .long \cpu_mask + .long \cpu_mm_mmu_flags + .long PMD_TYPE_SECT | \ + PMD_BIT4 | \ + PMD_SECT_AP_WRITE | \ + PMD_SECT_AP_READ + b \cpu_flush + .long cpu_arch_name + .long cpu_elf_name + .long HWCAP_SWP | HWCAP_26BIT + .long \cpu_name + .long \cpu_proc_funcs + .long v3_tlb_fns + .long v3_user_fns + .long v3_cache_fns + .size __\name\()_proc_info, . - __\name\()_proc_info +.endm + + arm67_proc_info arm6, 0x41560600, 0xfffffff0, cpu_arm6_name, \ + 0x00000c1e, __arm6_setup, arm6_processor_functions + arm67_proc_info arm610, 0x41560610, 0xfffffff0, cpu_arm610_name, \ + 0x00000c1e, __arm6_setup, arm6_processor_functions + arm67_proc_info arm7, 0x41007000, 0xffffff00, cpu_arm7_name, \ + 0x00000c1e, __arm7_setup, arm7_processor_functions + arm67_proc_info arm710, 0x41007100, 0xfff8ff00, cpu_arm710_name, \ + PMD_TYPE_SECT | \ + PMD_SECT_BUFFERABLE | \ + PMD_SECT_CACHEABLE | \ + PMD_BIT4 | \ + PMD_SECT_AP_WRITE | \ + PMD_SECT_AP_READ, \ + __arm7_setup, arm7_processor_functions diff --git a/arch/arm/mm/proc-arm720.S b/arch/arm/mm/proc-arm720.S new file mode 100644 index 00000000..0ac908c7 --- /dev/null +++ b/arch/arm/mm/proc-arm720.S @@ -0,0 +1,221 @@ +/* + * linux/arch/arm/mm/proc-arm720.S: MMU functions for ARM720 + * + * Copyright (C) 2000 Steve Hill (sjhill@cotw.com) + * Rob Scott (rscott@mtrob.fdns.net) + * Copyright (C) 2000 ARM Limited, Deep Blue Solutions Ltd. + * hacked for non-paged-MM by Hyok S. Choi, 2004. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * + * These are the low level assembler for performing cache and TLB + * functions on the ARM720T. The ARM720T has a writethrough IDC + * cache, so we don't need to clean it. + * + * Changelog: + * 05-09-2000 SJH Created by moving 720 specific functions + * out of 'proc-arm6,7.S' per RMK discussion + * 07-25-2000 SJH Added idle function. + * 08-25-2000 DBS Updated for integration of ARM Ltd version. + * 04-20-2004 HSC modified for non-paged memory management mode. + */ +#include <linux/linkage.h> +#include <linux/init.h> +#include <asm/assembler.h> +#include <asm/asm-offsets.h> +#include <asm/hwcap.h> +#include <asm/pgtable-hwdef.h> +#include <asm/pgtable.h> +#include <asm/ptrace.h> + +#include "proc-macros.S" + +/* + * Function: arm720_proc_init (void) + * : arm720_proc_fin (void) + * + * Notes : This processor does not require these + */ +ENTRY(cpu_arm720_dcache_clean_area) +ENTRY(cpu_arm720_proc_init) + mov pc, lr + +ENTRY(cpu_arm720_proc_fin) + mrc p15, 0, r0, c1, c0, 0 + bic r0, r0, #0x1000 @ ...i............ + bic r0, r0, #0x000e @ ............wca. + mcr p15, 0, r0, c1, c0, 0 @ disable caches + mov pc, lr + +/* + * Function: arm720_proc_do_idle(void) + * Params : r0 = unused + * Purpose : put the processor in proper idle mode + */ +ENTRY(cpu_arm720_do_idle) + mov pc, lr + +/* + * Function: arm720_switch_mm(unsigned long pgd_phys) + * Params : pgd_phys Physical address of page table + * Purpose : Perform a task switch, saving the old process' state and restoring + * the new. + */ +ENTRY(cpu_arm720_switch_mm) +#ifdef CONFIG_MMU + mov r1, #0 + mcr p15, 0, r1, c7, c7, 0 @ invalidate cache + mcr p15, 0, r0, c2, c0, 0 @ update page table ptr + mcr p15, 0, r1, c8, c7, 0 @ flush TLB (v4) +#endif + mov pc, lr + +/* + * Function: arm720_set_pte_ext(pte_t *ptep, pte_t pte, unsigned int ext) + * Params : r0 = Address to set + * : r1 = value to set + * Purpose : Set a PTE and flush it out of any WB cache + */ + .align 5 +ENTRY(cpu_arm720_set_pte_ext) +#ifdef CONFIG_MMU + armv3_set_pte_ext wc_disable=0 +#endif + mov pc, lr + +/* + * Function: arm720_reset + * Params : r0 = address to jump to + * Notes : This sets up everything for a reset + */ + .pushsection .idmap.text, "ax" +ENTRY(cpu_arm720_reset) + mov ip, #0 + mcr p15, 0, ip, c7, c7, 0 @ invalidate cache +#ifdef CONFIG_MMU + mcr p15, 0, ip, c8, c7, 0 @ flush TLB (v4) +#endif + mrc p15, 0, ip, c1, c0, 0 @ get ctrl register + bic ip, ip, #0x000f @ ............wcam + bic ip, ip, #0x2100 @ ..v....s........ + mcr p15, 0, ip, c1, c0, 0 @ ctrl register + mov pc, r0 +ENDPROC(cpu_arm720_reset) + .popsection + + __CPUINIT + + .type __arm710_setup, #function +__arm710_setup: + mov r0, #0 + mcr p15, 0, r0, c7, c7, 0 @ invalidate caches +#ifdef CONFIG_MMU + mcr p15, 0, r0, c8, c7, 0 @ flush TLB (v4) +#endif + mrc p15, 0, r0, c1, c0 @ get control register + ldr r5, arm710_cr1_clear + bic r0, r0, r5 + ldr r5, arm710_cr1_set + orr r0, r0, r5 + mov pc, lr @ __ret (head.S) + .size __arm710_setup, . - __arm710_setup + + /* + * R + * .RVI ZFRS BLDP WCAM + * .... 0001 ..11 1101 + * + */ + .type arm710_cr1_clear, #object + .type arm710_cr1_set, #object +arm710_cr1_clear: + .word 0x0f3f +arm710_cr1_set: + .word 0x013d + + .type __arm720_setup, #function +__arm720_setup: + mov r0, #0 + mcr p15, 0, r0, c7, c7, 0 @ invalidate caches +#ifdef CONFIG_MMU + mcr p15, 0, r0, c8, c7, 0 @ flush TLB (v4) +#endif + adr r5, arm720_crval + ldmia r5, {r5, r6} + mrc p15, 0, r0, c1, c0 @ get control register + bic r0, r0, r5 + orr r0, r0, r6 + mov pc, lr @ __ret (head.S) + .size __arm720_setup, . - __arm720_setup + + /* + * R + * .RVI ZFRS BLDP WCAM + * ..1. 1001 ..11 1101 + * + */ + .type arm720_crval, #object +arm720_crval: + crval clear=0x00002f3f, mmuset=0x0000213d, ucset=0x00000130 + + __INITDATA + @ define struct processor (see <asm/proc-fns.h> and proc-macros.S) + define_processor_functions arm720, dabort=v4t_late_abort, pabort=legacy_pabort + + .section ".rodata" + + string cpu_arch_name, "armv4t" + string cpu_elf_name, "v4" + string cpu_arm710_name, "ARM710T" + string cpu_arm720_name, "ARM720T" + + .align + +/* + * See <asm/procinfo.h> for a definition of this structure. + */ + + .section ".proc.info.init", #alloc, #execinstr + +.macro arm720_proc_info name:req, cpu_val:req, cpu_mask:req, cpu_name:req, cpu_flush:req + .type __\name\()_proc_info,#object +__\name\()_proc_info: + .long \cpu_val + .long \cpu_mask + .long PMD_TYPE_SECT | \ + PMD_SECT_BUFFERABLE | \ + PMD_SECT_CACHEABLE | \ + PMD_BIT4 | \ + PMD_SECT_AP_WRITE | \ + PMD_SECT_AP_READ + .long PMD_TYPE_SECT | \ + PMD_BIT4 | \ + PMD_SECT_AP_WRITE | \ + PMD_SECT_AP_READ + b \cpu_flush @ cpu_flush + .long cpu_arch_name @ arch_name + .long cpu_elf_name @ elf_name + .long HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB @ elf_hwcap + .long \cpu_name + .long arm720_processor_functions + .long v4_tlb_fns + .long v4wt_user_fns + .long v4_cache_fns + .size __\name\()_proc_info, . - __\name\()_proc_info +.endm + + arm720_proc_info arm710, 0x41807100, 0xffffff00, cpu_arm710_name, __arm710_setup + arm720_proc_info arm720, 0x41807200, 0xffffff00, cpu_arm720_name, __arm720_setup diff --git a/arch/arm/mm/proc-arm740.S b/arch/arm/mm/proc-arm740.S new file mode 100644 index 00000000..dc5de5d5 --- /dev/null +++ b/arch/arm/mm/proc-arm740.S @@ -0,0 +1,149 @@ +/* + * linux/arch/arm/mm/arm740.S: utility functions for ARM740 + * + * Copyright (C) 2004-2006 Hyok S. Choi (hyok.choi@samsung.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ +#include <linux/linkage.h> +#include <linux/init.h> +#include <asm/assembler.h> +#include <asm/asm-offsets.h> +#include <asm/hwcap.h> +#include <asm/pgtable-hwdef.h> +#include <asm/pgtable.h> +#include <asm/ptrace.h> + +#include "proc-macros.S" + + .text +/* + * cpu_arm740_proc_init() + * cpu_arm740_do_idle() + * cpu_arm740_dcache_clean_area() + * cpu_arm740_switch_mm() + * + * These are not required. + */ +ENTRY(cpu_arm740_proc_init) +ENTRY(cpu_arm740_do_idle) +ENTRY(cpu_arm740_dcache_clean_area) +ENTRY(cpu_arm740_switch_mm) + mov pc, lr + +/* + * cpu_arm740_proc_fin() + */ +ENTRY(cpu_arm740_proc_fin) + mrc p15, 0, r0, c1, c0, 0 + bic r0, r0, #0x3f000000 @ bank/f/lock/s + bic r0, r0, #0x0000000c @ w-buffer/cache + mcr p15, 0, r0, c1, c0, 0 @ disable caches + mov pc, lr + +/* + * cpu_arm740_reset(loc) + * Params : r0 = address to jump to + * Notes : This sets up everything for a reset + */ + .pushsection .idmap.text, "ax" +ENTRY(cpu_arm740_reset) + mov ip, #0 + mcr p15, 0, ip, c7, c0, 0 @ invalidate cache + mrc p15, 0, ip, c1, c0, 0 @ get ctrl register + bic ip, ip, #0x0000000c @ ............wc.. + mcr p15, 0, ip, c1, c0, 0 @ ctrl register + mov pc, r0 +ENDPROC(cpu_arm740_reset) + .popsection + + __CPUINIT + + .type __arm740_setup, #function +__arm740_setup: + mov r0, #0 + mcr p15, 0, r0, c7, c0, 0 @ invalidate caches + + mcr p15, 0, r0, c6, c3 @ disable area 3~7 + mcr p15, 0, r0, c6, c4 + mcr p15, 0, r0, c6, c5 + mcr p15, 0, r0, c6, c6 + mcr p15, 0, r0, c6, c7 + + mov r0, #0x0000003F @ base = 0, size = 4GB + mcr p15, 0, r0, c6, c0 @ set area 0, default + + ldr r0, =(CONFIG_DRAM_BASE & 0xFFFFF000) @ base[31:12] of RAM + ldr r1, =(CONFIG_DRAM_SIZE >> 12) @ size of RAM (must be >= 4KB) + mov r2, #10 @ 11 is the minimum (4KB) +1: add r2, r2, #1 @ area size *= 2 + mov r1, r1, lsr #1 + bne 1b @ count not zero r-shift + orr r0, r0, r2, lsl #1 @ the area register value + orr r0, r0, #1 @ set enable bit + mcr p15, 0, r0, c6, c1 @ set area 1, RAM + + ldr r0, =(CONFIG_FLASH_MEM_BASE & 0xFFFFF000) @ base[31:12] of FLASH + ldr r1, =(CONFIG_FLASH_SIZE >> 12) @ size of FLASH (must be >= 4KB) + mov r2, #10 @ 11 is the minimum (4KB) +1: add r2, r2, #1 @ area size *= 2 + mov r1, r1, lsr #1 + bne 1b @ count not zero r-shift + orr r0, r0, r2, lsl #1 @ the area register value + orr r0, r0, #1 @ set enable bit + mcr p15, 0, r0, c6, c2 @ set area 2, ROM/FLASH + + mov r0, #0x06 + mcr p15, 0, r0, c2, c0 @ Region 1&2 cacheable +#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH + mov r0, #0x00 @ disable whole write buffer +#else + mov r0, #0x02 @ Region 1 write bufferred +#endif + mcr p15, 0, r0, c3, c0 + + mov r0, #0x10000 + sub r0, r0, #1 @ r0 = 0xffff + mcr p15, 0, r0, c5, c0 @ all read/write access + + mrc p15, 0, r0, c1, c0 @ get control register + bic r0, r0, #0x3F000000 @ set to standard caching mode + @ need some benchmark + orr r0, r0, #0x0000000d @ MPU/Cache/WB + + mov pc, lr + + .size __arm740_setup, . - __arm740_setup + + __INITDATA + + @ define struct processor (see <asm/proc-fns.h> and proc-macros.S) + define_processor_functions arm740, dabort=v4t_late_abort, pabort=legacy_pabort, nommu=1 + + .section ".rodata" + + string cpu_arch_name, "armv4" + string cpu_elf_name, "v4" + string cpu_arm740_name, "ARM740T" + + .align + + .section ".proc.info.init", #alloc, #execinstr + .type __arm740_proc_info,#object +__arm740_proc_info: + .long 0x41807400 + .long 0xfffffff0 + .long 0 + b __arm740_setup + .long cpu_arch_name + .long cpu_elf_name + .long HWCAP_SWP | HWCAP_HALF | HWCAP_26BIT + .long cpu_arm740_name + .long arm740_processor_functions + .long 0 + .long 0 + .long v3_cache_fns @ cache model + .size __arm740_proc_info, . - __arm740_proc_info diff --git a/arch/arm/mm/proc-arm7tdmi.S b/arch/arm/mm/proc-arm7tdmi.S new file mode 100644 index 00000000..6ddea3e4 --- /dev/null +++ b/arch/arm/mm/proc-arm7tdmi.S @@ -0,0 +1,116 @@ +/* + * linux/arch/arm/mm/proc-arm7tdmi.S: utility functions for ARM7TDMI + * + * Copyright (C) 2003-2006 Hyok S. Choi <hyok.choi@samsung.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ +#include <linux/linkage.h> +#include <linux/init.h> +#include <asm/assembler.h> +#include <asm/asm-offsets.h> +#include <asm/hwcap.h> +#include <asm/pgtable-hwdef.h> +#include <asm/pgtable.h> +#include <asm/ptrace.h> + +#include "proc-macros.S" + + .text +/* + * cpu_arm7tdmi_proc_init() + * cpu_arm7tdmi_do_idle() + * cpu_arm7tdmi_dcache_clean_area() + * cpu_arm7tdmi_switch_mm() + * + * These are not required. + */ +ENTRY(cpu_arm7tdmi_proc_init) +ENTRY(cpu_arm7tdmi_do_idle) +ENTRY(cpu_arm7tdmi_dcache_clean_area) +ENTRY(cpu_arm7tdmi_switch_mm) + mov pc, lr + +/* + * cpu_arm7tdmi_proc_fin() + */ +ENTRY(cpu_arm7tdmi_proc_fin) + mov pc, lr + +/* + * Function: cpu_arm7tdmi_reset(loc) + * Params : loc(r0) address to jump to + * Purpose : Sets up everything for a reset and jump to the location for soft reset. + */ + .pushsection .idmap.text, "ax" +ENTRY(cpu_arm7tdmi_reset) + mov pc, r0 +ENDPROC(cpu_arm7tdmi_reset) + .popsection + + __CPUINIT + + .type __arm7tdmi_setup, #function +__arm7tdmi_setup: + mov pc, lr + .size __arm7tdmi_setup, . - __arm7tdmi_setup + + __INITDATA + + @ define struct processor (see <asm/proc-fns.h> and proc-macros.S) + define_processor_functions arm7tdmi, dabort=v4t_late_abort, pabort=legacy_pabort, nommu=1 + + .section ".rodata" + + string cpu_arch_name, "armv4t" + string cpu_elf_name, "v4" + string cpu_arm7tdmi_name, "ARM7TDMI" + string cpu_triscenda7_name, "Triscend-A7x" + string cpu_at91_name, "Atmel-AT91M40xxx" + string cpu_s3c3410_name, "Samsung-S3C3410" + string cpu_s3c44b0x_name, "Samsung-S3C44B0x" + string cpu_s3c4510b_name, "Samsung-S3C4510B" + string cpu_s3c4530_name, "Samsung-S3C4530" + string cpu_netarm_name, "NETARM" + + .align + + .section ".proc.info.init", #alloc, #execinstr + +.macro arm7tdmi_proc_info name:req, cpu_val:req, cpu_mask:req, cpu_name:req, \ + extra_hwcaps=0 + .type __\name\()_proc_info, #object +__\name\()_proc_info: + .long \cpu_val + .long \cpu_mask + .long 0 + .long 0 + b __arm7tdmi_setup + .long cpu_arch_name + .long cpu_elf_name + .long HWCAP_SWP | HWCAP_26BIT | ( \extra_hwcaps ) + .long \cpu_name + .long arm7tdmi_processor_functions + .long 0 + .long 0 + .long v4_cache_fns + .size __\name\()_proc_info, . - __\name\()_proc_info +.endm + + arm7tdmi_proc_info arm7tdmi, 0x41007700, 0xfff8ff00, \ + cpu_arm7tdmi_name + arm7tdmi_proc_info triscenda7, 0x0001d2ff, 0x0001ffff, \ + cpu_triscenda7_name, extra_hwcaps=HWCAP_THUMB + arm7tdmi_proc_info at91, 0x14000040, 0xfff000e0, \ + cpu_at91_name, extra_hwcaps=HWCAP_THUMB + arm7tdmi_proc_info s3c4510b, 0x36365000, 0xfffff000, \ + cpu_s3c4510b_name, extra_hwcaps=HWCAP_THUMB + arm7tdmi_proc_info s3c4530, 0x4c000000, 0xfff000e0, \ + cpu_s3c4530_name, extra_hwcaps=HWCAP_THUMB + arm7tdmi_proc_info s3c3410, 0x34100000, 0xffff0000, \ + cpu_s3c3410_name, extra_hwcaps=HWCAP_THUMB + arm7tdmi_proc_info s3c44b0x, 0x44b00000, 0xffff0000, \ + cpu_s3c44b0x_name, extra_hwcaps=HWCAP_THUMB diff --git a/arch/arm/mm/proc-arm920.S b/arch/arm/mm/proc-arm920.S new file mode 100644 index 00000000..cb941ae9 --- /dev/null +++ b/arch/arm/mm/proc-arm920.S @@ -0,0 +1,478 @@ +/* + * linux/arch/arm/mm/proc-arm920.S: MMU functions for ARM920 + * + * Copyright (C) 1999,2000 ARM Limited + * Copyright (C) 2000 Deep Blue Solutions Ltd. + * hacked for non-paged-MM by Hyok S. Choi, 2003. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * + * These are the low level assembler for performing cache and TLB + * functions on the arm920. + * + * CONFIG_CPU_ARM920_CPU_IDLE -> nohlt + */ +#include <linux/linkage.h> +#include <linux/init.h> +#include <asm/assembler.h> +#include <asm/hwcap.h> +#include <asm/pgtable-hwdef.h> +#include <asm/pgtable.h> +#include <asm/page.h> +#include <asm/ptrace.h> +#include "proc-macros.S" + +/* + * The size of one data cache line. + */ +#define CACHE_DLINESIZE 32 + +/* + * The number of data cache segments. + */ +#define CACHE_DSEGMENTS 8 + +/* + * The number of lines in a cache segment. + */ +#define CACHE_DENTRIES 64 + +/* + * This is the size at which it becomes more efficient to + * clean the whole cache, rather than using the individual + * cache line maintenance instructions. + */ +#define CACHE_DLIMIT 65536 + + + .text +/* + * cpu_arm920_proc_init() + */ +ENTRY(cpu_arm920_proc_init) + mov pc, lr + +/* + * cpu_arm920_proc_fin() + */ +ENTRY(cpu_arm920_proc_fin) + mrc p15, 0, r0, c1, c0, 0 @ ctrl register + bic r0, r0, #0x1000 @ ...i............ + bic r0, r0, #0x000e @ ............wca. + mcr p15, 0, r0, c1, c0, 0 @ disable caches + mov pc, lr + +/* + * cpu_arm920_reset(loc) + * + * Perform a soft reset of the system. Put the CPU into the + * same state as it would be if it had been reset, and branch + * to what would be the reset vector. + * + * loc: location to jump to for soft reset + */ + .align 5 + .pushsection .idmap.text, "ax" +ENTRY(cpu_arm920_reset) + mov ip, #0 + mcr p15, 0, ip, c7, c7, 0 @ invalidate I,D caches + mcr p15, 0, ip, c7, c10, 4 @ drain WB +#ifdef CONFIG_MMU + mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs +#endif + mrc p15, 0, ip, c1, c0, 0 @ ctrl register + bic ip, ip, #0x000f @ ............wcam + bic ip, ip, #0x1100 @ ...i...s........ + mcr p15, 0, ip, c1, c0, 0 @ ctrl register + mov pc, r0 +ENDPROC(cpu_arm920_reset) + .popsection + +/* + * cpu_arm920_do_idle() + */ + .align 5 +ENTRY(cpu_arm920_do_idle) + mcr p15, 0, r0, c7, c0, 4 @ Wait for interrupt + mov pc, lr + + +#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH + +/* + * flush_icache_all() + * + * Unconditionally clean and invalidate the entire icache. + */ +ENTRY(arm920_flush_icache_all) + mov r0, #0 + mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache + mov pc, lr +ENDPROC(arm920_flush_icache_all) + +/* + * flush_user_cache_all() + * + * Invalidate all cache entries in a particular address + * space. + */ +ENTRY(arm920_flush_user_cache_all) + /* FALLTHROUGH */ + +/* + * flush_kern_cache_all() + * + * Clean and invalidate the entire cache. + */ +ENTRY(arm920_flush_kern_cache_all) + mov r2, #VM_EXEC + mov ip, #0 +__flush_whole_cache: + mov r1, #(CACHE_DSEGMENTS - 1) << 5 @ 8 segments +1: orr r3, r1, #(CACHE_DENTRIES - 1) << 26 @ 64 entries +2: mcr p15, 0, r3, c7, c14, 2 @ clean+invalidate D index + subs r3, r3, #1 << 26 + bcs 2b @ entries 63 to 0 + subs r1, r1, #1 << 5 + bcs 1b @ segments 7 to 0 + tst r2, #VM_EXEC + mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache + mcrne p15, 0, ip, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * flush_user_cache_range(start, end, flags) + * + * Invalidate a range of cache entries in the specified + * address space. + * + * - start - start address (inclusive) + * - end - end address (exclusive) + * - flags - vm_flags for address space + */ +ENTRY(arm920_flush_user_cache_range) + mov ip, #0 + sub r3, r1, r0 @ calculate total size + cmp r3, #CACHE_DLIMIT + bhs __flush_whole_cache + +1: mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry + tst r2, #VM_EXEC + mcrne p15, 0, r0, c7, c5, 1 @ invalidate I entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + tst r2, #VM_EXEC + mcrne p15, 0, ip, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * coherent_kern_range(start, end) + * + * Ensure coherency between the Icache and the Dcache in the + * region described by start, end. If you have non-snooping + * Harvard caches, you need to implement this function. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(arm920_coherent_kern_range) + /* FALLTHROUGH */ + +/* + * coherent_user_range(start, end) + * + * Ensure coherency between the Icache and the Dcache in the + * region described by start, end. If you have non-snooping + * Harvard caches, you need to implement this function. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(arm920_coherent_user_range) + bic r0, r0, #CACHE_DLINESIZE - 1 +1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry + mcr p15, 0, r0, c7, c5, 1 @ invalidate I entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mcr p15, 0, r0, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * flush_kern_dcache_area(void *addr, size_t size) + * + * Ensure no D cache aliasing occurs, either with itself or + * the I cache + * + * - addr - kernel address + * - size - region size + */ +ENTRY(arm920_flush_kern_dcache_area) + add r1, r0, r1 +1: mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mov r0, #0 + mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache + mcr p15, 0, r0, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * dma_inv_range(start, end) + * + * Invalidate (discard) the specified virtual address range. + * May not write back any entries. If 'start' or 'end' + * are not cache line aligned, those lines must be written + * back. + * + * - start - virtual start address + * - end - virtual end address + * + * (same as v4wb) + */ +arm920_dma_inv_range: + tst r0, #CACHE_DLINESIZE - 1 + bic r0, r0, #CACHE_DLINESIZE - 1 + mcrne p15, 0, r0, c7, c10, 1 @ clean D entry + tst r1, #CACHE_DLINESIZE - 1 + mcrne p15, 0, r1, c7, c10, 1 @ clean D entry +1: mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mcr p15, 0, r0, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * dma_clean_range(start, end) + * + * Clean the specified virtual address range. + * + * - start - virtual start address + * - end - virtual end address + * + * (same as v4wb) + */ +arm920_dma_clean_range: + bic r0, r0, #CACHE_DLINESIZE - 1 +1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mcr p15, 0, r0, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * dma_flush_range(start, end) + * + * Clean and invalidate the specified virtual address range. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(arm920_dma_flush_range) + bic r0, r0, #CACHE_DLINESIZE - 1 +1: mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mcr p15, 0, r0, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * dma_map_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(arm920_dma_map_area) + add r1, r1, r0 + cmp r2, #DMA_TO_DEVICE + beq arm920_dma_clean_range + bcs arm920_dma_inv_range + b arm920_dma_flush_range +ENDPROC(arm920_dma_map_area) + +/* + * dma_unmap_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(arm920_dma_unmap_area) + mov pc, lr +ENDPROC(arm920_dma_unmap_area) + + @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) + define_cache_functions arm920 +#endif + + +ENTRY(cpu_arm920_dcache_clean_area) +1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry + add r0, r0, #CACHE_DLINESIZE + subs r1, r1, #CACHE_DLINESIZE + bhi 1b + mov pc, lr + +/* =============================== PageTable ============================== */ + +/* + * cpu_arm920_switch_mm(pgd) + * + * Set the translation base pointer to be as described by pgd. + * + * pgd: new page tables + */ + .align 5 +ENTRY(cpu_arm920_switch_mm) +#ifdef CONFIG_MMU + mov ip, #0 +#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH + mcr p15, 0, ip, c7, c6, 0 @ invalidate D cache +#else +@ && 'Clean & Invalidate whole DCache' +@ && Re-written to use Index Ops. +@ && Uses registers r1, r3 and ip + + mov r1, #(CACHE_DSEGMENTS - 1) << 5 @ 8 segments +1: orr r3, r1, #(CACHE_DENTRIES - 1) << 26 @ 64 entries +2: mcr p15, 0, r3, c7, c14, 2 @ clean & invalidate D index + subs r3, r3, #1 << 26 + bcs 2b @ entries 63 to 0 + subs r1, r1, #1 << 5 + bcs 1b @ segments 7 to 0 +#endif + mcr p15, 0, ip, c7, c5, 0 @ invalidate I cache + mcr p15, 0, ip, c7, c10, 4 @ drain WB + mcr p15, 0, r0, c2, c0, 0 @ load page table pointer + mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs +#endif + mov pc, lr + +/* + * cpu_arm920_set_pte(ptep, pte, ext) + * + * Set a PTE and flush it out + */ + .align 5 +ENTRY(cpu_arm920_set_pte_ext) +#ifdef CONFIG_MMU + armv3_set_pte_ext + mov r0, r0 + mcr p15, 0, r0, c7, c10, 1 @ clean D entry + mcr p15, 0, r0, c7, c10, 4 @ drain WB +#endif + mov pc, lr + +/* Suspend/resume support: taken from arch/arm/plat-s3c24xx/sleep.S */ +.globl cpu_arm920_suspend_size +.equ cpu_arm920_suspend_size, 4 * 3 +#ifdef CONFIG_PM_SLEEP +ENTRY(cpu_arm920_do_suspend) + stmfd sp!, {r4 - r6, lr} + mrc p15, 0, r4, c13, c0, 0 @ PID + mrc p15, 0, r5, c3, c0, 0 @ Domain ID + mrc p15, 0, r6, c1, c0, 0 @ Control register + stmia r0, {r4 - r6} + ldmfd sp!, {r4 - r6, pc} +ENDPROC(cpu_arm920_do_suspend) + +ENTRY(cpu_arm920_do_resume) + mov ip, #0 + mcr p15, 0, ip, c8, c7, 0 @ invalidate I+D TLBs + mcr p15, 0, ip, c7, c7, 0 @ invalidate I+D caches + ldmia r0, {r4 - r6} + mcr p15, 0, r4, c13, c0, 0 @ PID + mcr p15, 0, r5, c3, c0, 0 @ Domain ID + mcr p15, 0, r1, c2, c0, 0 @ TTB address + mov r0, r6 @ control register + b cpu_resume_mmu +ENDPROC(cpu_arm920_do_resume) +#endif + + __CPUINIT + + .type __arm920_setup, #function +__arm920_setup: + mov r0, #0 + mcr p15, 0, r0, c7, c7 @ invalidate I,D caches on v4 + mcr p15, 0, r0, c7, c10, 4 @ drain write buffer on v4 +#ifdef CONFIG_MMU + mcr p15, 0, r0, c8, c7 @ invalidate I,D TLBs on v4 +#endif + adr r5, arm920_crval + ldmia r5, {r5, r6} + mrc p15, 0, r0, c1, c0 @ get control register v4 + bic r0, r0, r5 + orr r0, r0, r6 + mov pc, lr + .size __arm920_setup, . - __arm920_setup + + /* + * R + * .RVI ZFRS BLDP WCAM + * ..11 0001 ..11 0101 + * + */ + .type arm920_crval, #object +arm920_crval: + crval clear=0x00003f3f, mmuset=0x00003135, ucset=0x00001130 + + __INITDATA + @ define struct processor (see <asm/proc-fns.h> and proc-macros.S) + define_processor_functions arm920, dabort=v4t_early_abort, pabort=legacy_pabort, suspend=1 + + .section ".rodata" + + string cpu_arch_name, "armv4t" + string cpu_elf_name, "v4" + string cpu_arm920_name, "ARM920T" + + .align + + .section ".proc.info.init", #alloc, #execinstr + + .type __arm920_proc_info,#object +__arm920_proc_info: + .long 0x41009200 + .long 0xff00fff0 + .long PMD_TYPE_SECT | \ + PMD_SECT_BUFFERABLE | \ + PMD_SECT_CACHEABLE | \ + PMD_BIT4 | \ + PMD_SECT_AP_WRITE | \ + PMD_SECT_AP_READ + .long PMD_TYPE_SECT | \ + PMD_BIT4 | \ + PMD_SECT_AP_WRITE | \ + PMD_SECT_AP_READ + b __arm920_setup + .long cpu_arch_name + .long cpu_elf_name + .long HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB + .long cpu_arm920_name + .long arm920_processor_functions + .long v4wbi_tlb_fns + .long v4wb_user_fns +#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH + .long arm920_cache_fns +#else + .long v4wt_cache_fns +#endif + .size __arm920_proc_info, . - __arm920_proc_info diff --git a/arch/arm/mm/proc-arm922.S b/arch/arm/mm/proc-arm922.S new file mode 100644 index 00000000..4ec0e074 --- /dev/null +++ b/arch/arm/mm/proc-arm922.S @@ -0,0 +1,456 @@ +/* + * linux/arch/arm/mm/proc-arm922.S: MMU functions for ARM922 + * + * Copyright (C) 1999,2000 ARM Limited + * Copyright (C) 2000 Deep Blue Solutions Ltd. + * Copyright (C) 2001 Altera Corporation + * hacked for non-paged-MM by Hyok S. Choi, 2003. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * + * These are the low level assembler for performing cache and TLB + * functions on the arm922. + * + * CONFIG_CPU_ARM922_CPU_IDLE -> nohlt + */ +#include <linux/linkage.h> +#include <linux/init.h> +#include <asm/assembler.h> +#include <asm/hwcap.h> +#include <asm/pgtable-hwdef.h> +#include <asm/pgtable.h> +#include <asm/page.h> +#include <asm/ptrace.h> +#include "proc-macros.S" + +/* + * The size of one data cache line. + */ +#define CACHE_DLINESIZE 32 + +/* + * The number of data cache segments. + */ +#define CACHE_DSEGMENTS 4 + +/* + * The number of lines in a cache segment. + */ +#define CACHE_DENTRIES 64 + +/* + * This is the size at which it becomes more efficient to + * clean the whole cache, rather than using the individual + * cache line maintenance instructions. (I think this should + * be 32768). + */ +#define CACHE_DLIMIT 8192 + + + .text +/* + * cpu_arm922_proc_init() + */ +ENTRY(cpu_arm922_proc_init) + mov pc, lr + +/* + * cpu_arm922_proc_fin() + */ +ENTRY(cpu_arm922_proc_fin) + mrc p15, 0, r0, c1, c0, 0 @ ctrl register + bic r0, r0, #0x1000 @ ...i............ + bic r0, r0, #0x000e @ ............wca. + mcr p15, 0, r0, c1, c0, 0 @ disable caches + mov pc, lr + +/* + * cpu_arm922_reset(loc) + * + * Perform a soft reset of the system. Put the CPU into the + * same state as it would be if it had been reset, and branch + * to what would be the reset vector. + * + * loc: location to jump to for soft reset + */ + .align 5 + .pushsection .idmap.text, "ax" +ENTRY(cpu_arm922_reset) + mov ip, #0 + mcr p15, 0, ip, c7, c7, 0 @ invalidate I,D caches + mcr p15, 0, ip, c7, c10, 4 @ drain WB +#ifdef CONFIG_MMU + mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs +#endif + mrc p15, 0, ip, c1, c0, 0 @ ctrl register + bic ip, ip, #0x000f @ ............wcam + bic ip, ip, #0x1100 @ ...i...s........ + mcr p15, 0, ip, c1, c0, 0 @ ctrl register + mov pc, r0 +ENDPROC(cpu_arm922_reset) + .popsection + +/* + * cpu_arm922_do_idle() + */ + .align 5 +ENTRY(cpu_arm922_do_idle) + mcr p15, 0, r0, c7, c0, 4 @ Wait for interrupt + mov pc, lr + + +#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH + +/* + * flush_icache_all() + * + * Unconditionally clean and invalidate the entire icache. + */ +ENTRY(arm922_flush_icache_all) + mov r0, #0 + mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache + mov pc, lr +ENDPROC(arm922_flush_icache_all) + +/* + * flush_user_cache_all() + * + * Clean and invalidate all cache entries in a particular + * address space. + */ +ENTRY(arm922_flush_user_cache_all) + /* FALLTHROUGH */ + +/* + * flush_kern_cache_all() + * + * Clean and invalidate the entire cache. + */ +ENTRY(arm922_flush_kern_cache_all) + mov r2, #VM_EXEC + mov ip, #0 +__flush_whole_cache: + mov r1, #(CACHE_DSEGMENTS - 1) << 5 @ 8 segments +1: orr r3, r1, #(CACHE_DENTRIES - 1) << 26 @ 64 entries +2: mcr p15, 0, r3, c7, c14, 2 @ clean+invalidate D index + subs r3, r3, #1 << 26 + bcs 2b @ entries 63 to 0 + subs r1, r1, #1 << 5 + bcs 1b @ segments 7 to 0 + tst r2, #VM_EXEC + mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache + mcrne p15, 0, ip, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * flush_user_cache_range(start, end, flags) + * + * Clean and invalidate a range of cache entries in the + * specified address range. + * + * - start - start address (inclusive) + * - end - end address (exclusive) + * - flags - vm_flags describing address space + */ +ENTRY(arm922_flush_user_cache_range) + mov ip, #0 + sub r3, r1, r0 @ calculate total size + cmp r3, #CACHE_DLIMIT + bhs __flush_whole_cache + +1: mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry + tst r2, #VM_EXEC + mcrne p15, 0, r0, c7, c5, 1 @ invalidate I entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + tst r2, #VM_EXEC + mcrne p15, 0, ip, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * coherent_kern_range(start, end) + * + * Ensure coherency between the Icache and the Dcache in the + * region described by start, end. If you have non-snooping + * Harvard caches, you need to implement this function. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(arm922_coherent_kern_range) + /* FALLTHROUGH */ + +/* + * coherent_user_range(start, end) + * + * Ensure coherency between the Icache and the Dcache in the + * region described by start, end. If you have non-snooping + * Harvard caches, you need to implement this function. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(arm922_coherent_user_range) + bic r0, r0, #CACHE_DLINESIZE - 1 +1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry + mcr p15, 0, r0, c7, c5, 1 @ invalidate I entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mcr p15, 0, r0, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * flush_kern_dcache_area(void *addr, size_t size) + * + * Ensure no D cache aliasing occurs, either with itself or + * the I cache + * + * - addr - kernel address + * - size - region size + */ +ENTRY(arm922_flush_kern_dcache_area) + add r1, r0, r1 +1: mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mov r0, #0 + mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache + mcr p15, 0, r0, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * dma_inv_range(start, end) + * + * Invalidate (discard) the specified virtual address range. + * May not write back any entries. If 'start' or 'end' + * are not cache line aligned, those lines must be written + * back. + * + * - start - virtual start address + * - end - virtual end address + * + * (same as v4wb) + */ +arm922_dma_inv_range: + tst r0, #CACHE_DLINESIZE - 1 + bic r0, r0, #CACHE_DLINESIZE - 1 + mcrne p15, 0, r0, c7, c10, 1 @ clean D entry + tst r1, #CACHE_DLINESIZE - 1 + mcrne p15, 0, r1, c7, c10, 1 @ clean D entry +1: mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mcr p15, 0, r0, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * dma_clean_range(start, end) + * + * Clean the specified virtual address range. + * + * - start - virtual start address + * - end - virtual end address + * + * (same as v4wb) + */ +arm922_dma_clean_range: + bic r0, r0, #CACHE_DLINESIZE - 1 +1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mcr p15, 0, r0, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * dma_flush_range(start, end) + * + * Clean and invalidate the specified virtual address range. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(arm922_dma_flush_range) + bic r0, r0, #CACHE_DLINESIZE - 1 +1: mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mcr p15, 0, r0, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * dma_map_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(arm922_dma_map_area) + add r1, r1, r0 + cmp r2, #DMA_TO_DEVICE + beq arm922_dma_clean_range + bcs arm922_dma_inv_range + b arm922_dma_flush_range +ENDPROC(arm922_dma_map_area) + +/* + * dma_unmap_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(arm922_dma_unmap_area) + mov pc, lr +ENDPROC(arm922_dma_unmap_area) + + @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) + define_cache_functions arm922 +#endif + + +ENTRY(cpu_arm922_dcache_clean_area) +#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH +1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry + add r0, r0, #CACHE_DLINESIZE + subs r1, r1, #CACHE_DLINESIZE + bhi 1b +#endif + mov pc, lr + +/* =============================== PageTable ============================== */ + +/* + * cpu_arm922_switch_mm(pgd) + * + * Set the translation base pointer to be as described by pgd. + * + * pgd: new page tables + */ + .align 5 +ENTRY(cpu_arm922_switch_mm) +#ifdef CONFIG_MMU + mov ip, #0 +#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH + mcr p15, 0, ip, c7, c6, 0 @ invalidate D cache +#else +@ && 'Clean & Invalidate whole DCache' +@ && Re-written to use Index Ops. +@ && Uses registers r1, r3 and ip + + mov r1, #(CACHE_DSEGMENTS - 1) << 5 @ 4 segments +1: orr r3, r1, #(CACHE_DENTRIES - 1) << 26 @ 64 entries +2: mcr p15, 0, r3, c7, c14, 2 @ clean & invalidate D index + subs r3, r3, #1 << 26 + bcs 2b @ entries 63 to 0 + subs r1, r1, #1 << 5 + bcs 1b @ segments 7 to 0 +#endif + mcr p15, 0, ip, c7, c5, 0 @ invalidate I cache + mcr p15, 0, ip, c7, c10, 4 @ drain WB + mcr p15, 0, r0, c2, c0, 0 @ load page table pointer + mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs +#endif + mov pc, lr + +/* + * cpu_arm922_set_pte_ext(ptep, pte, ext) + * + * Set a PTE and flush it out + */ + .align 5 +ENTRY(cpu_arm922_set_pte_ext) +#ifdef CONFIG_MMU + armv3_set_pte_ext + mov r0, r0 + mcr p15, 0, r0, c7, c10, 1 @ clean D entry + mcr p15, 0, r0, c7, c10, 4 @ drain WB +#endif /* CONFIG_MMU */ + mov pc, lr + + __CPUINIT + + .type __arm922_setup, #function +__arm922_setup: + mov r0, #0 + mcr p15, 0, r0, c7, c7 @ invalidate I,D caches on v4 + mcr p15, 0, r0, c7, c10, 4 @ drain write buffer on v4 +#ifdef CONFIG_MMU + mcr p15, 0, r0, c8, c7 @ invalidate I,D TLBs on v4 +#endif + adr r5, arm922_crval + ldmia r5, {r5, r6} + mrc p15, 0, r0, c1, c0 @ get control register v4 + bic r0, r0, r5 + orr r0, r0, r6 + mov pc, lr + .size __arm922_setup, . - __arm922_setup + + /* + * R + * .RVI ZFRS BLDP WCAM + * ..11 0001 ..11 0101 + * + */ + .type arm922_crval, #object +arm922_crval: + crval clear=0x00003f3f, mmuset=0x00003135, ucset=0x00001130 + + __INITDATA + @ define struct processor (see <asm/proc-fns.h> and proc-macros.S) + define_processor_functions arm922, dabort=v4t_early_abort, pabort=legacy_pabort + + .section ".rodata" + + string cpu_arch_name, "armv4t" + string cpu_elf_name, "v4" + string cpu_arm922_name, "ARM922T" + + .align + + .section ".proc.info.init", #alloc, #execinstr + + .type __arm922_proc_info,#object +__arm922_proc_info: + .long 0x41009220 + .long 0xff00fff0 + .long PMD_TYPE_SECT | \ + PMD_SECT_BUFFERABLE | \ + PMD_SECT_CACHEABLE | \ + PMD_BIT4 | \ + PMD_SECT_AP_WRITE | \ + PMD_SECT_AP_READ + .long PMD_TYPE_SECT | \ + PMD_BIT4 | \ + PMD_SECT_AP_WRITE | \ + PMD_SECT_AP_READ + b __arm922_setup + .long cpu_arch_name + .long cpu_elf_name + .long HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB + .long cpu_arm922_name + .long arm922_processor_functions + .long v4wbi_tlb_fns + .long v4wb_user_fns +#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH + .long arm922_cache_fns +#else + .long v4wt_cache_fns +#endif + .size __arm922_proc_info, . - __arm922_proc_info diff --git a/arch/arm/mm/proc-arm925.S b/arch/arm/mm/proc-arm925.S new file mode 100644 index 00000000..9dccd9a3 --- /dev/null +++ b/arch/arm/mm/proc-arm925.S @@ -0,0 +1,523 @@ +/* + * linux/arch/arm/mm/arm925.S: MMU functions for ARM925 + * + * Copyright (C) 1999,2000 ARM Limited + * Copyright (C) 2000 Deep Blue Solutions Ltd. + * Copyright (C) 2002 RidgeRun, Inc. + * Copyright (C) 2002-2003 MontaVista Software, Inc. + * + * Update for Linux-2.6 and cache flush improvements + * Copyright (C) 2004 Nokia Corporation by Tony Lindgren <tony@atomide.com> + * + * hacked for non-paged-MM by Hyok S. Choi, 2004. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * + * These are the low level assembler for performing cache and TLB + * functions on the arm925. + * + * CONFIG_CPU_ARM925_CPU_IDLE -> nohlt + * + * Some additional notes based on deciphering the TI TRM on OMAP-5910: + * + * NOTE1: The TI925T Configuration Register bit "D-cache clean and flush + * entry mode" must be 0 to flush the entries in both segments + * at once. This is the default value. See TRM 2-20 and 2-24 for + * more information. + * + * NOTE2: Default is the "D-cache clean and flush entry mode". It looks + * like the "Transparent mode" must be on for partial cache flushes + * to work in this mode. This mode only works with 16-bit external + * memory. See TRM 2-24 for more information. + * + * NOTE3: Write-back cache flushing seems to be flakey with devices using + * direct memory access, such as USB OHCI. The workaround is to use + * write-through cache with CONFIG_CPU_DCACHE_WRITETHROUGH (this is + * the default for OMAP-1510). + */ + +#include <linux/linkage.h> +#include <linux/init.h> +#include <asm/assembler.h> +#include <asm/hwcap.h> +#include <asm/pgtable-hwdef.h> +#include <asm/pgtable.h> +#include <asm/page.h> +#include <asm/ptrace.h> +#include "proc-macros.S" + +/* + * The size of one data cache line. + */ +#define CACHE_DLINESIZE 16 + +/* + * The number of data cache segments. + */ +#define CACHE_DSEGMENTS 2 + +/* + * The number of lines in a cache segment. + */ +#define CACHE_DENTRIES 256 + +/* + * This is the size at which it becomes more efficient to + * clean the whole cache, rather than using the individual + * cache line maintenance instructions. + */ +#define CACHE_DLIMIT 8192 + + .text +/* + * cpu_arm925_proc_init() + */ +ENTRY(cpu_arm925_proc_init) + mov pc, lr + +/* + * cpu_arm925_proc_fin() + */ +ENTRY(cpu_arm925_proc_fin) + mrc p15, 0, r0, c1, c0, 0 @ ctrl register + bic r0, r0, #0x1000 @ ...i............ + bic r0, r0, #0x000e @ ............wca. + mcr p15, 0, r0, c1, c0, 0 @ disable caches + mov pc, lr + +/* + * cpu_arm925_reset(loc) + * + * Perform a soft reset of the system. Put the CPU into the + * same state as it would be if it had been reset, and branch + * to what would be the reset vector. + * + * loc: location to jump to for soft reset + */ + .align 5 + .pushsection .idmap.text, "ax" +ENTRY(cpu_arm925_reset) + /* Send software reset to MPU and DSP */ + mov ip, #0xff000000 + orr ip, ip, #0x00fe0000 + orr ip, ip, #0x0000ce00 + mov r4, #1 + strh r4, [ip, #0x10] +ENDPROC(cpu_arm925_reset) + .popsection + + mov ip, #0 + mcr p15, 0, ip, c7, c7, 0 @ invalidate I,D caches + mcr p15, 0, ip, c7, c10, 4 @ drain WB +#ifdef CONFIG_MMU + mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs +#endif + mrc p15, 0, ip, c1, c0, 0 @ ctrl register + bic ip, ip, #0x000f @ ............wcam + bic ip, ip, #0x1100 @ ...i...s........ + mcr p15, 0, ip, c1, c0, 0 @ ctrl register + mov pc, r0 + +/* + * cpu_arm925_do_idle() + * + * Called with IRQs disabled + */ + .align 10 +ENTRY(cpu_arm925_do_idle) + mov r0, #0 + mrc p15, 0, r1, c1, c0, 0 @ Read control register + mcr p15, 0, r0, c7, c10, 4 @ Drain write buffer + bic r2, r1, #1 << 12 + mcr p15, 0, r2, c1, c0, 0 @ Disable I cache + mcr p15, 0, r0, c7, c0, 4 @ Wait for interrupt + mcr p15, 0, r1, c1, c0, 0 @ Restore ICache enable + mov pc, lr + +/* + * flush_icache_all() + * + * Unconditionally clean and invalidate the entire icache. + */ +ENTRY(arm925_flush_icache_all) + mov r0, #0 + mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache + mov pc, lr +ENDPROC(arm925_flush_icache_all) + +/* + * flush_user_cache_all() + * + * Clean and invalidate all cache entries in a particular + * address space. + */ +ENTRY(arm925_flush_user_cache_all) + /* FALLTHROUGH */ + +/* + * flush_kern_cache_all() + * + * Clean and invalidate the entire cache. + */ +ENTRY(arm925_flush_kern_cache_all) + mov r2, #VM_EXEC + mov ip, #0 +__flush_whole_cache: +#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH + mcr p15, 0, ip, c7, c6, 0 @ invalidate D cache +#else + /* Flush entries in both segments at once, see NOTE1 above */ + mov r3, #(CACHE_DENTRIES - 1) << 4 @ 256 entries in segment +2: mcr p15, 0, r3, c7, c14, 2 @ clean+invalidate D index + subs r3, r3, #1 << 4 + bcs 2b @ entries 255 to 0 +#endif + tst r2, #VM_EXEC + mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache + mcrne p15, 0, ip, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * flush_user_cache_range(start, end, flags) + * + * Clean and invalidate a range of cache entries in the + * specified address range. + * + * - start - start address (inclusive) + * - end - end address (exclusive) + * - flags - vm_flags describing address space + */ +ENTRY(arm925_flush_user_cache_range) + mov ip, #0 + sub r3, r1, r0 @ calculate total size + cmp r3, #CACHE_DLIMIT + bgt __flush_whole_cache +1: tst r2, #VM_EXEC +#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH + mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry + mcrne p15, 0, r0, c7, c5, 1 @ invalidate I entry + add r0, r0, #CACHE_DLINESIZE + mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry + mcrne p15, 0, r0, c7, c5, 1 @ invalidate I entry + add r0, r0, #CACHE_DLINESIZE +#else + mcr p15, 0, r0, c7, c14, 1 @ clean and invalidate D entry + mcrne p15, 0, r0, c7, c5, 1 @ invalidate I entry + add r0, r0, #CACHE_DLINESIZE + mcr p15, 0, r0, c7, c14, 1 @ clean and invalidate D entry + mcrne p15, 0, r0, c7, c5, 1 @ invalidate I entry + add r0, r0, #CACHE_DLINESIZE +#endif + cmp r0, r1 + blo 1b + tst r2, #VM_EXEC + mcrne p15, 0, ip, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * coherent_kern_range(start, end) + * + * Ensure coherency between the Icache and the Dcache in the + * region described by start, end. If you have non-snooping + * Harvard caches, you need to implement this function. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(arm925_coherent_kern_range) + /* FALLTHROUGH */ + +/* + * coherent_user_range(start, end) + * + * Ensure coherency between the Icache and the Dcache in the + * region described by start, end. If you have non-snooping + * Harvard caches, you need to implement this function. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(arm925_coherent_user_range) + bic r0, r0, #CACHE_DLINESIZE - 1 +1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry + mcr p15, 0, r0, c7, c5, 1 @ invalidate I entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mcr p15, 0, r0, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * flush_kern_dcache_area(void *addr, size_t size) + * + * Ensure no D cache aliasing occurs, either with itself or + * the I cache + * + * - addr - kernel address + * - size - region size + */ +ENTRY(arm925_flush_kern_dcache_area) + add r1, r0, r1 +1: mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mov r0, #0 + mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache + mcr p15, 0, r0, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * dma_inv_range(start, end) + * + * Invalidate (discard) the specified virtual address range. + * May not write back any entries. If 'start' or 'end' + * are not cache line aligned, those lines must be written + * back. + * + * - start - virtual start address + * - end - virtual end address + * + * (same as v4wb) + */ +arm925_dma_inv_range: +#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH + tst r0, #CACHE_DLINESIZE - 1 + mcrne p15, 0, r0, c7, c10, 1 @ clean D entry + tst r1, #CACHE_DLINESIZE - 1 + mcrne p15, 0, r1, c7, c10, 1 @ clean D entry +#endif + bic r0, r0, #CACHE_DLINESIZE - 1 +1: mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mcr p15, 0, r0, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * dma_clean_range(start, end) + * + * Clean the specified virtual address range. + * + * - start - virtual start address + * - end - virtual end address + * + * (same as v4wb) + */ +arm925_dma_clean_range: +#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH + bic r0, r0, #CACHE_DLINESIZE - 1 +1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b +#endif + mcr p15, 0, r0, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * dma_flush_range(start, end) + * + * Clean and invalidate the specified virtual address range. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(arm925_dma_flush_range) + bic r0, r0, #CACHE_DLINESIZE - 1 +1: +#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH + mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry +#else + mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry +#endif + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mcr p15, 0, r0, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * dma_map_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(arm925_dma_map_area) + add r1, r1, r0 + cmp r2, #DMA_TO_DEVICE + beq arm925_dma_clean_range + bcs arm925_dma_inv_range + b arm925_dma_flush_range +ENDPROC(arm925_dma_map_area) + +/* + * dma_unmap_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(arm925_dma_unmap_area) + mov pc, lr +ENDPROC(arm925_dma_unmap_area) + + @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) + define_cache_functions arm925 + +ENTRY(cpu_arm925_dcache_clean_area) +#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH +1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry + add r0, r0, #CACHE_DLINESIZE + subs r1, r1, #CACHE_DLINESIZE + bhi 1b +#endif + mcr p15, 0, r0, c7, c10, 4 @ drain WB + mov pc, lr + +/* =============================== PageTable ============================== */ + +/* + * cpu_arm925_switch_mm(pgd) + * + * Set the translation base pointer to be as described by pgd. + * + * pgd: new page tables + */ + .align 5 +ENTRY(cpu_arm925_switch_mm) +#ifdef CONFIG_MMU + mov ip, #0 +#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH + mcr p15, 0, ip, c7, c6, 0 @ invalidate D cache +#else + /* Flush entries in bothe segments at once, see NOTE1 above */ + mov r3, #(CACHE_DENTRIES - 1) << 4 @ 256 entries in segment +2: mcr p15, 0, r3, c7, c14, 2 @ clean & invalidate D index + subs r3, r3, #1 << 4 + bcs 2b @ entries 255 to 0 +#endif + mcr p15, 0, ip, c7, c5, 0 @ invalidate I cache + mcr p15, 0, ip, c7, c10, 4 @ drain WB + mcr p15, 0, r0, c2, c0, 0 @ load page table pointer + mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs +#endif + mov pc, lr + +/* + * cpu_arm925_set_pte_ext(ptep, pte, ext) + * + * Set a PTE and flush it out + */ + .align 5 +ENTRY(cpu_arm925_set_pte_ext) +#ifdef CONFIG_MMU + armv3_set_pte_ext + mov r0, r0 +#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH + mcr p15, 0, r0, c7, c10, 1 @ clean D entry +#endif + mcr p15, 0, r0, c7, c10, 4 @ drain WB +#endif /* CONFIG_MMU */ + mov pc, lr + + __CPUINIT + + .type __arm925_setup, #function +__arm925_setup: + mov r0, #0 +#if defined(CONFIG_CPU_ICACHE_STREAMING_DISABLE) + orr r0,r0,#1 << 7 +#endif + + /* Transparent on, D-cache clean & flush mode. See NOTE2 above */ + orr r0,r0,#1 << 1 @ transparent mode on + mcr p15, 0, r0, c15, c1, 0 @ write TI config register + + mov r0, #0 + mcr p15, 0, r0, c7, c7 @ invalidate I,D caches on v4 + mcr p15, 0, r0, c7, c10, 4 @ drain write buffer on v4 +#ifdef CONFIG_MMU + mcr p15, 0, r0, c8, c7 @ invalidate I,D TLBs on v4 +#endif + +#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH + mov r0, #4 @ disable write-back on caches explicitly + mcr p15, 7, r0, c15, c0, 0 +#endif + + adr r5, arm925_crval + ldmia r5, {r5, r6} + mrc p15, 0, r0, c1, c0 @ get control register v4 + bic r0, r0, r5 + orr r0, r0, r6 +#ifdef CONFIG_CPU_CACHE_ROUND_ROBIN + orr r0, r0, #0x4000 @ .1.. .... .... .... +#endif + mov pc, lr + .size __arm925_setup, . - __arm925_setup + + /* + * R + * .RVI ZFRS BLDP WCAM + * .011 0001 ..11 1101 + * + */ + .type arm925_crval, #object +arm925_crval: + crval clear=0x00007f3f, mmuset=0x0000313d, ucset=0x00001130 + + __INITDATA + @ define struct processor (see <asm/proc-fns.h> and proc-macros.S) + define_processor_functions arm925, dabort=v4t_early_abort, pabort=legacy_pabort + + .section ".rodata" + + string cpu_arch_name, "armv4t" + string cpu_elf_name, "v4" + string cpu_arm925_name, "ARM925T" + + .align + + .section ".proc.info.init", #alloc, #execinstr + +.macro arm925_proc_info name:req, cpu_val:req, cpu_mask:req, cpu_name:req, cache + .type __\name\()_proc_info,#object +__\name\()_proc_info: + .long \cpu_val + .long \cpu_mask + .long PMD_TYPE_SECT | \ + PMD_BIT4 | \ + PMD_SECT_AP_WRITE | \ + PMD_SECT_AP_READ + .long PMD_TYPE_SECT | \ + PMD_BIT4 | \ + PMD_SECT_AP_WRITE | \ + PMD_SECT_AP_READ + b __arm925_setup + .long cpu_arch_name + .long cpu_elf_name + .long HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB + .long cpu_arm925_name + .long arm925_processor_functions + .long v4wbi_tlb_fns + .long v4wb_user_fns + .long arm925_cache_fns + .size __\name\()_proc_info, . - __\name\()_proc_info +.endm + + arm925_proc_info arm925, 0x54029250, 0xfffffff0, cpu_arm925_name + arm925_proc_info arm915, 0x54029150, 0xfffffff0, cpu_arm925_name diff --git a/arch/arm/mm/proc-arm926.S b/arch/arm/mm/proc-arm926.S new file mode 100644 index 00000000..820259b8 --- /dev/null +++ b/arch/arm/mm/proc-arm926.S @@ -0,0 +1,500 @@ +/* + * linux/arch/arm/mm/proc-arm926.S: MMU functions for ARM926EJ-S + * + * Copyright (C) 1999-2001 ARM Limited + * Copyright (C) 2000 Deep Blue Solutions Ltd. + * hacked for non-paged-MM by Hyok S. Choi, 2003. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * + * These are the low level assembler for performing cache and TLB + * functions on the arm926. + * + * CONFIG_CPU_ARM926_CPU_IDLE -> nohlt + */ +#include <linux/linkage.h> +#include <linux/init.h> +#include <asm/assembler.h> +#include <asm/hwcap.h> +#include <asm/pgtable-hwdef.h> +#include <asm/pgtable.h> +#include <asm/page.h> +#include <asm/ptrace.h> +#include "proc-macros.S" + +/* + * This is the maximum size of an area which will be invalidated + * using the single invalidate entry instructions. Anything larger + * than this, and we go for the whole cache. + * + * This value should be chosen such that we choose the cheapest + * alternative. + */ +#define CACHE_DLIMIT 16384 + +/* + * the cache line size of the I and D cache + */ +#define CACHE_DLINESIZE 32 + + .text +/* + * cpu_arm926_proc_init() + */ +ENTRY(cpu_arm926_proc_init) + mov pc, lr + +/* + * cpu_arm926_proc_fin() + */ +ENTRY(cpu_arm926_proc_fin) + mrc p15, 0, r0, c1, c0, 0 @ ctrl register + bic r0, r0, #0x1000 @ ...i............ + bic r0, r0, #0x000e @ ............wca. + mcr p15, 0, r0, c1, c0, 0 @ disable caches + mov pc, lr + +/* + * cpu_arm926_reset(loc) + * + * Perform a soft reset of the system. Put the CPU into the + * same state as it would be if it had been reset, and branch + * to what would be the reset vector. + * + * loc: location to jump to for soft reset + */ + .align 5 + .pushsection .idmap.text, "ax" +ENTRY(cpu_arm926_reset) + mov ip, #0 + mcr p15, 0, ip, c7, c7, 0 @ invalidate I,D caches + mcr p15, 0, ip, c7, c10, 4 @ drain WB +#ifdef CONFIG_MMU + mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs +#endif + mrc p15, 0, ip, c1, c0, 0 @ ctrl register + bic ip, ip, #0x000f @ ............wcam + bic ip, ip, #0x1100 @ ...i...s........ + mcr p15, 0, ip, c1, c0, 0 @ ctrl register + mov pc, r0 +ENDPROC(cpu_arm926_reset) + .popsection + +/* + * cpu_arm926_do_idle() + * + * Called with IRQs disabled + */ + .align 10 +ENTRY(cpu_arm926_do_idle) + mov r0, #0 + mrc p15, 0, r1, c1, c0, 0 @ Read control register + mcr p15, 0, r0, c7, c10, 4 @ Drain write buffer + bic r2, r1, #1 << 12 + mrs r3, cpsr @ Disable FIQs while Icache + orr ip, r3, #PSR_F_BIT @ is disabled + msr cpsr_c, ip + mcr p15, 0, r2, c1, c0, 0 @ Disable I cache + mcr p15, 0, r0, c7, c0, 4 @ Wait for interrupt + mcr p15, 0, r1, c1, c0, 0 @ Restore ICache enable + msr cpsr_c, r3 @ Restore FIQ state + mov pc, lr + +/* + * flush_icache_all() + * + * Unconditionally clean and invalidate the entire icache. + */ +ENTRY(arm926_flush_icache_all) + mov r0, #0 + mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache + mov pc, lr +ENDPROC(arm926_flush_icache_all) + +/* + * flush_user_cache_all() + * + * Clean and invalidate all cache entries in a particular + * address space. + */ +ENTRY(arm926_flush_user_cache_all) + /* FALLTHROUGH */ + +/* + * flush_kern_cache_all() + * + * Clean and invalidate the entire cache. + */ +ENTRY(arm926_flush_kern_cache_all) + mov r2, #VM_EXEC + mov ip, #0 +__flush_whole_cache: +#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH + mcr p15, 0, ip, c7, c6, 0 @ invalidate D cache +#else +1: mrc p15, 0, r15, c7, c14, 3 @ test,clean,invalidate + bne 1b +#endif + tst r2, #VM_EXEC + mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache + mcrne p15, 0, ip, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * flush_user_cache_range(start, end, flags) + * + * Clean and invalidate a range of cache entries in the + * specified address range. + * + * - start - start address (inclusive) + * - end - end address (exclusive) + * - flags - vm_flags describing address space + */ +ENTRY(arm926_flush_user_cache_range) + mov ip, #0 + sub r3, r1, r0 @ calculate total size + cmp r3, #CACHE_DLIMIT + bgt __flush_whole_cache +1: tst r2, #VM_EXEC +#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH + mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry + mcrne p15, 0, r0, c7, c5, 1 @ invalidate I entry + add r0, r0, #CACHE_DLINESIZE + mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry + mcrne p15, 0, r0, c7, c5, 1 @ invalidate I entry + add r0, r0, #CACHE_DLINESIZE +#else + mcr p15, 0, r0, c7, c14, 1 @ clean and invalidate D entry + mcrne p15, 0, r0, c7, c5, 1 @ invalidate I entry + add r0, r0, #CACHE_DLINESIZE + mcr p15, 0, r0, c7, c14, 1 @ clean and invalidate D entry + mcrne p15, 0, r0, c7, c5, 1 @ invalidate I entry + add r0, r0, #CACHE_DLINESIZE +#endif + cmp r0, r1 + blo 1b + tst r2, #VM_EXEC + mcrne p15, 0, ip, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * coherent_kern_range(start, end) + * + * Ensure coherency between the Icache and the Dcache in the + * region described by start, end. If you have non-snooping + * Harvard caches, you need to implement this function. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(arm926_coherent_kern_range) + /* FALLTHROUGH */ + +/* + * coherent_user_range(start, end) + * + * Ensure coherency between the Icache and the Dcache in the + * region described by start, end. If you have non-snooping + * Harvard caches, you need to implement this function. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(arm926_coherent_user_range) + bic r0, r0, #CACHE_DLINESIZE - 1 +1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry + mcr p15, 0, r0, c7, c5, 1 @ invalidate I entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mcr p15, 0, r0, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * flush_kern_dcache_area(void *addr, size_t size) + * + * Ensure no D cache aliasing occurs, either with itself or + * the I cache + * + * - addr - kernel address + * - size - region size + */ +ENTRY(arm926_flush_kern_dcache_area) + add r1, r0, r1 +1: mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mov r0, #0 + mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache + mcr p15, 0, r0, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * dma_inv_range(start, end) + * + * Invalidate (discard) the specified virtual address range. + * May not write back any entries. If 'start' or 'end' + * are not cache line aligned, those lines must be written + * back. + * + * - start - virtual start address + * - end - virtual end address + * + * (same as v4wb) + */ +arm926_dma_inv_range: +#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH + tst r0, #CACHE_DLINESIZE - 1 + mcrne p15, 0, r0, c7, c10, 1 @ clean D entry + tst r1, #CACHE_DLINESIZE - 1 + mcrne p15, 0, r1, c7, c10, 1 @ clean D entry +#endif + bic r0, r0, #CACHE_DLINESIZE - 1 +1: mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mcr p15, 0, r0, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * dma_clean_range(start, end) + * + * Clean the specified virtual address range. + * + * - start - virtual start address + * - end - virtual end address + * + * (same as v4wb) + */ +arm926_dma_clean_range: +#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH + bic r0, r0, #CACHE_DLINESIZE - 1 +1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b +#endif + mcr p15, 0, r0, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * dma_flush_range(start, end) + * + * Clean and invalidate the specified virtual address range. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(arm926_dma_flush_range) + bic r0, r0, #CACHE_DLINESIZE - 1 +1: +#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH + mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry +#else + mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry +#endif + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mcr p15, 0, r0, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * dma_map_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(arm926_dma_map_area) + add r1, r1, r0 + cmp r2, #DMA_TO_DEVICE + beq arm926_dma_clean_range + bcs arm926_dma_inv_range + b arm926_dma_flush_range +ENDPROC(arm926_dma_map_area) + +/* + * dma_unmap_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(arm926_dma_unmap_area) + mov pc, lr +ENDPROC(arm926_dma_unmap_area) + + @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) + define_cache_functions arm926 + +ENTRY(cpu_arm926_dcache_clean_area) +#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH +1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry + add r0, r0, #CACHE_DLINESIZE + subs r1, r1, #CACHE_DLINESIZE + bhi 1b +#endif + mcr p15, 0, r0, c7, c10, 4 @ drain WB + mov pc, lr + +/* =============================== PageTable ============================== */ + +/* + * cpu_arm926_switch_mm(pgd) + * + * Set the translation base pointer to be as described by pgd. + * + * pgd: new page tables + */ + .align 5 +ENTRY(cpu_arm926_switch_mm) +#ifdef CONFIG_MMU + mov ip, #0 +#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH + mcr p15, 0, ip, c7, c6, 0 @ invalidate D cache +#else +@ && 'Clean & Invalidate whole DCache' +1: mrc p15, 0, r15, c7, c14, 3 @ test,clean,invalidate + bne 1b +#endif + mcr p15, 0, ip, c7, c5, 0 @ invalidate I cache + mcr p15, 0, ip, c7, c10, 4 @ drain WB + mcr p15, 0, r0, c2, c0, 0 @ load page table pointer + mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs +#endif + mov pc, lr + +/* + * cpu_arm926_set_pte_ext(ptep, pte, ext) + * + * Set a PTE and flush it out + */ + .align 5 +ENTRY(cpu_arm926_set_pte_ext) +#ifdef CONFIG_MMU + armv3_set_pte_ext + mov r0, r0 +#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH + mcr p15, 0, r0, c7, c10, 1 @ clean D entry +#endif + mcr p15, 0, r0, c7, c10, 4 @ drain WB +#endif + mov pc, lr + +/* Suspend/resume support: taken from arch/arm/plat-s3c24xx/sleep.S */ +.globl cpu_arm926_suspend_size +.equ cpu_arm926_suspend_size, 4 * 3 +#ifdef CONFIG_PM_SLEEP +ENTRY(cpu_arm926_do_suspend) + stmfd sp!, {r4 - r6, lr} + mrc p15, 0, r4, c13, c0, 0 @ PID + mrc p15, 0, r5, c3, c0, 0 @ Domain ID + mrc p15, 0, r6, c1, c0, 0 @ Control register + stmia r0, {r4 - r6} + ldmfd sp!, {r4 - r6, pc} +ENDPROC(cpu_arm926_do_suspend) + +ENTRY(cpu_arm926_do_resume) + mov ip, #0 + mcr p15, 0, ip, c8, c7, 0 @ invalidate I+D TLBs + mcr p15, 0, ip, c7, c7, 0 @ invalidate I+D caches + ldmia r0, {r4 - r6} + mcr p15, 0, r4, c13, c0, 0 @ PID + mcr p15, 0, r5, c3, c0, 0 @ Domain ID + mcr p15, 0, r1, c2, c0, 0 @ TTB address + mov r0, r6 @ control register + b cpu_resume_mmu +ENDPROC(cpu_arm926_do_resume) +#endif + + __CPUINIT + + .type __arm926_setup, #function +__arm926_setup: + mov r0, #0 + mcr p15, 0, r0, c7, c7 @ invalidate I,D caches on v4 + mcr p15, 0, r0, c7, c10, 4 @ drain write buffer on v4 +#ifdef CONFIG_MMU + mcr p15, 0, r0, c8, c7 @ invalidate I,D TLBs on v4 +#endif + + +#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH + mov r0, #4 @ disable write-back on caches explicitly + mcr p15, 7, r0, c15, c0, 0 +#endif + + adr r5, arm926_crval + ldmia r5, {r5, r6} + mrc p15, 0, r0, c1, c0 @ get control register v4 + bic r0, r0, r5 + orr r0, r0, r6 +#ifdef CONFIG_CPU_CACHE_ROUND_ROBIN + orr r0, r0, #0x4000 @ .1.. .... .... .... +#endif + mov pc, lr + .size __arm926_setup, . - __arm926_setup + + /* + * R + * .RVI ZFRS BLDP WCAM + * .011 0001 ..11 0101 + * + */ + .type arm926_crval, #object +arm926_crval: + crval clear=0x00007f3f, mmuset=0x00003135, ucset=0x00001134 + + __INITDATA + + @ define struct processor (see <asm/proc-fns.h> and proc-macros.S) + define_processor_functions arm926, dabort=v5tj_early_abort, pabort=legacy_pabort, suspend=1 + + .section ".rodata" + + string cpu_arch_name, "armv5tej" + string cpu_elf_name, "v5" + string cpu_arm926_name, "ARM926EJ-S" + + .align + + .section ".proc.info.init", #alloc, #execinstr + + .type __arm926_proc_info,#object +__arm926_proc_info: + .long 0x41069260 @ ARM926EJ-S (v5TEJ) + .long 0xff0ffff0 + .long PMD_TYPE_SECT | \ + PMD_SECT_BUFFERABLE | \ + PMD_SECT_CACHEABLE | \ + PMD_BIT4 | \ + PMD_SECT_AP_WRITE | \ + PMD_SECT_AP_READ + .long PMD_TYPE_SECT | \ + PMD_BIT4 | \ + PMD_SECT_AP_WRITE | \ + PMD_SECT_AP_READ + b __arm926_setup + .long cpu_arch_name + .long cpu_elf_name + .long HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP|HWCAP_JAVA + .long cpu_arm926_name + .long arm926_processor_functions + .long v4wbi_tlb_fns + .long v4wb_user_fns + .long arm926_cache_fns + .size __arm926_proc_info, . - __arm926_proc_info diff --git a/arch/arm/mm/proc-arm940.S b/arch/arm/mm/proc-arm940.S new file mode 100644 index 00000000..9fdc0a17 --- /dev/null +++ b/arch/arm/mm/proc-arm940.S @@ -0,0 +1,373 @@ +/* + * linux/arch/arm/mm/arm940.S: utility functions for ARM940T + * + * Copyright (C) 2004-2006 Hyok S. Choi (hyok.choi@samsung.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ +#include <linux/linkage.h> +#include <linux/init.h> +#include <asm/assembler.h> +#include <asm/hwcap.h> +#include <asm/pgtable-hwdef.h> +#include <asm/pgtable.h> +#include <asm/ptrace.h> +#include "proc-macros.S" + +/* ARM940T has a 4KB DCache comprising 256 lines of 4 words */ +#define CACHE_DLINESIZE 16 +#define CACHE_DSEGMENTS 4 +#define CACHE_DENTRIES 64 + + .text +/* + * cpu_arm940_proc_init() + * cpu_arm940_switch_mm() + * + * These are not required. + */ +ENTRY(cpu_arm940_proc_init) +ENTRY(cpu_arm940_switch_mm) + mov pc, lr + +/* + * cpu_arm940_proc_fin() + */ +ENTRY(cpu_arm940_proc_fin) + mrc p15, 0, r0, c1, c0, 0 @ ctrl register + bic r0, r0, #0x00001000 @ i-cache + bic r0, r0, #0x00000004 @ d-cache + mcr p15, 0, r0, c1, c0, 0 @ disable caches + mov pc, lr + +/* + * cpu_arm940_reset(loc) + * Params : r0 = address to jump to + * Notes : This sets up everything for a reset + */ + .pushsection .idmap.text, "ax" +ENTRY(cpu_arm940_reset) + mov ip, #0 + mcr p15, 0, ip, c7, c5, 0 @ flush I cache + mcr p15, 0, ip, c7, c6, 0 @ flush D cache + mcr p15, 0, ip, c7, c10, 4 @ drain WB + mrc p15, 0, ip, c1, c0, 0 @ ctrl register + bic ip, ip, #0x00000005 @ .............c.p + bic ip, ip, #0x00001000 @ i-cache + mcr p15, 0, ip, c1, c0, 0 @ ctrl register + mov pc, r0 +ENDPROC(cpu_arm940_reset) + .popsection + +/* + * cpu_arm940_do_idle() + */ + .align 5 +ENTRY(cpu_arm940_do_idle) + mcr p15, 0, r0, c7, c0, 4 @ Wait for interrupt + mov pc, lr + +/* + * flush_icache_all() + * + * Unconditionally clean and invalidate the entire icache. + */ +ENTRY(arm940_flush_icache_all) + mov r0, #0 + mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache + mov pc, lr +ENDPROC(arm940_flush_icache_all) + +/* + * flush_user_cache_all() + */ +ENTRY(arm940_flush_user_cache_all) + /* FALLTHROUGH */ + +/* + * flush_kern_cache_all() + * + * Clean and invalidate the entire cache. + */ +ENTRY(arm940_flush_kern_cache_all) + mov r2, #VM_EXEC + /* FALLTHROUGH */ + +/* + * flush_user_cache_range(start, end, flags) + * + * There is no efficient way to flush a range of cache entries + * in the specified address range. Thus, flushes all. + * + * - start - start address (inclusive) + * - end - end address (exclusive) + * - flags - vm_flags describing address space + */ +ENTRY(arm940_flush_user_cache_range) + mov ip, #0 +#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH + mcr p15, 0, ip, c7, c6, 0 @ flush D cache +#else + mov r1, #(CACHE_DSEGMENTS - 1) << 4 @ 4 segments +1: orr r3, r1, #(CACHE_DENTRIES - 1) << 26 @ 64 entries +2: mcr p15, 0, r3, c7, c14, 2 @ clean/flush D index + subs r3, r3, #1 << 26 + bcs 2b @ entries 63 to 0 + subs r1, r1, #1 << 4 + bcs 1b @ segments 3 to 0 +#endif + tst r2, #VM_EXEC + mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache + mcrne p15, 0, ip, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * coherent_kern_range(start, end) + * + * Ensure coherency between the Icache and the Dcache in the + * region described by start, end. If you have non-snooping + * Harvard caches, you need to implement this function. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(arm940_coherent_kern_range) + /* FALLTHROUGH */ + +/* + * coherent_user_range(start, end) + * + * Ensure coherency between the Icache and the Dcache in the + * region described by start, end. If you have non-snooping + * Harvard caches, you need to implement this function. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(arm940_coherent_user_range) + /* FALLTHROUGH */ + +/* + * flush_kern_dcache_area(void *addr, size_t size) + * + * Ensure no D cache aliasing occurs, either with itself or + * the I cache + * + * - addr - kernel address + * - size - region size + */ +ENTRY(arm940_flush_kern_dcache_area) + mov ip, #0 + mov r1, #(CACHE_DSEGMENTS - 1) << 4 @ 4 segments +1: orr r3, r1, #(CACHE_DENTRIES - 1) << 26 @ 64 entries +2: mcr p15, 0, r3, c7, c14, 2 @ clean/flush D index + subs r3, r3, #1 << 26 + bcs 2b @ entries 63 to 0 + subs r1, r1, #1 << 4 + bcs 1b @ segments 7 to 0 + mcr p15, 0, ip, c7, c5, 0 @ invalidate I cache + mcr p15, 0, ip, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * dma_inv_range(start, end) + * + * There is no efficient way to invalidate a specifid virtual + * address range. Thus, invalidates all. + * + * - start - virtual start address + * - end - virtual end address + */ +arm940_dma_inv_range: + mov ip, #0 + mov r1, #(CACHE_DSEGMENTS - 1) << 4 @ 4 segments +1: orr r3, r1, #(CACHE_DENTRIES - 1) << 26 @ 64 entries +2: mcr p15, 0, r3, c7, c6, 2 @ flush D entry + subs r3, r3, #1 << 26 + bcs 2b @ entries 63 to 0 + subs r1, r1, #1 << 4 + bcs 1b @ segments 7 to 0 + mcr p15, 0, ip, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * dma_clean_range(start, end) + * + * There is no efficient way to clean a specifid virtual + * address range. Thus, cleans all. + * + * - start - virtual start address + * - end - virtual end address + */ +arm940_dma_clean_range: +ENTRY(cpu_arm940_dcache_clean_area) + mov ip, #0 +#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH + mov r1, #(CACHE_DSEGMENTS - 1) << 4 @ 4 segments +1: orr r3, r1, #(CACHE_DENTRIES - 1) << 26 @ 64 entries +2: mcr p15, 0, r3, c7, c10, 2 @ clean D entry + subs r3, r3, #1 << 26 + bcs 2b @ entries 63 to 0 + subs r1, r1, #1 << 4 + bcs 1b @ segments 7 to 0 +#endif + mcr p15, 0, ip, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * dma_flush_range(start, end) + * + * There is no efficient way to clean and invalidate a specifid + * virtual address range. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(arm940_dma_flush_range) + mov ip, #0 + mov r1, #(CACHE_DSEGMENTS - 1) << 4 @ 4 segments +1: orr r3, r1, #(CACHE_DENTRIES - 1) << 26 @ 64 entries +2: +#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH + mcr p15, 0, r3, c7, c14, 2 @ clean/flush D entry +#else + mcr p15, 0, r3, c7, c6, 2 @ invalidate D entry +#endif + subs r3, r3, #1 << 26 + bcs 2b @ entries 63 to 0 + subs r1, r1, #1 << 4 + bcs 1b @ segments 7 to 0 + mcr p15, 0, ip, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * dma_map_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(arm940_dma_map_area) + add r1, r1, r0 + cmp r2, #DMA_TO_DEVICE + beq arm940_dma_clean_range + bcs arm940_dma_inv_range + b arm940_dma_flush_range +ENDPROC(arm940_dma_map_area) + +/* + * dma_unmap_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(arm940_dma_unmap_area) + mov pc, lr +ENDPROC(arm940_dma_unmap_area) + + @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) + define_cache_functions arm940 + + __CPUINIT + + .type __arm940_setup, #function +__arm940_setup: + mov r0, #0 + mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache + mcr p15, 0, r0, c7, c6, 0 @ invalidate D cache + mcr p15, 0, r0, c7, c10, 4 @ drain WB + + mcr p15, 0, r0, c6, c3, 0 @ disable data area 3~7 + mcr p15, 0, r0, c6, c4, 0 + mcr p15, 0, r0, c6, c5, 0 + mcr p15, 0, r0, c6, c6, 0 + mcr p15, 0, r0, c6, c7, 0 + + mcr p15, 0, r0, c6, c3, 1 @ disable instruction area 3~7 + mcr p15, 0, r0, c6, c4, 1 + mcr p15, 0, r0, c6, c5, 1 + mcr p15, 0, r0, c6, c6, 1 + mcr p15, 0, r0, c6, c7, 1 + + mov r0, #0x0000003F @ base = 0, size = 4GB + mcr p15, 0, r0, c6, c0, 0 @ set area 0, default + mcr p15, 0, r0, c6, c0, 1 + + ldr r0, =(CONFIG_DRAM_BASE & 0xFFFFF000) @ base[31:12] of RAM + ldr r1, =(CONFIG_DRAM_SIZE >> 12) @ size of RAM (must be >= 4KB) + mov r2, #10 @ 11 is the minimum (4KB) +1: add r2, r2, #1 @ area size *= 2 + mov r1, r1, lsr #1 + bne 1b @ count not zero r-shift + orr r0, r0, r2, lsl #1 @ the area register value + orr r0, r0, #1 @ set enable bit + mcr p15, 0, r0, c6, c1, 0 @ set area 1, RAM + mcr p15, 0, r0, c6, c1, 1 + + ldr r0, =(CONFIG_FLASH_MEM_BASE & 0xFFFFF000) @ base[31:12] of FLASH + ldr r1, =(CONFIG_FLASH_SIZE >> 12) @ size of FLASH (must be >= 4KB) + mov r2, #10 @ 11 is the minimum (4KB) +1: add r2, r2, #1 @ area size *= 2 + mov r1, r1, lsr #1 + bne 1b @ count not zero r-shift + orr r0, r0, r2, lsl #1 @ the area register value + orr r0, r0, #1 @ set enable bit + mcr p15, 0, r0, c6, c2, 0 @ set area 2, ROM/FLASH + mcr p15, 0, r0, c6, c2, 1 + + mov r0, #0x06 + mcr p15, 0, r0, c2, c0, 0 @ Region 1&2 cacheable + mcr p15, 0, r0, c2, c0, 1 +#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH + mov r0, #0x00 @ disable whole write buffer +#else + mov r0, #0x02 @ Region 1 write bufferred +#endif + mcr p15, 0, r0, c3, c0, 0 + + mov r0, #0x10000 + sub r0, r0, #1 @ r0 = 0xffff + mcr p15, 0, r0, c5, c0, 0 @ all read/write access + mcr p15, 0, r0, c5, c0, 1 + + mrc p15, 0, r0, c1, c0 @ get control register + orr r0, r0, #0x00001000 @ I-cache + orr r0, r0, #0x00000005 @ MPU/D-cache + + mov pc, lr + + .size __arm940_setup, . - __arm940_setup + + __INITDATA + + @ define struct processor (see <asm/proc-fns.h> and proc-macros.S) + define_processor_functions arm940, dabort=nommu_early_abort, pabort=legacy_pabort, nommu=1 + + .section ".rodata" + + string cpu_arch_name, "armv4t" + string cpu_elf_name, "v4" + string cpu_arm940_name, "ARM940T" + + .align + + .section ".proc.info.init", #alloc, #execinstr + + .type __arm940_proc_info,#object +__arm940_proc_info: + .long 0x41009400 + .long 0xff00fff0 + .long 0 + b __arm940_setup + .long cpu_arch_name + .long cpu_elf_name + .long HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB + .long cpu_arm940_name + .long arm940_processor_functions + .long 0 + .long 0 + .long arm940_cache_fns + .size __arm940_proc_info, . - __arm940_proc_info + diff --git a/arch/arm/mm/proc-arm946.S b/arch/arm/mm/proc-arm946.S new file mode 100644 index 00000000..f684cfed --- /dev/null +++ b/arch/arm/mm/proc-arm946.S @@ -0,0 +1,427 @@ +/* + * linux/arch/arm/mm/arm946.S: utility functions for ARM946E-S + * + * Copyright (C) 2004-2006 Hyok S. Choi (hyok.choi@samsung.com) + * + * (Many of cache codes are from proc-arm926.S) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ +#include <linux/linkage.h> +#include <linux/init.h> +#include <asm/assembler.h> +#include <asm/hwcap.h> +#include <asm/pgtable-hwdef.h> +#include <asm/pgtable.h> +#include <asm/ptrace.h> +#include "proc-macros.S" + +/* + * ARM946E-S is synthesizable to have 0KB to 1MB sized D-Cache, + * comprising 256 lines of 32 bytes (8 words). + */ +#define CACHE_DSIZE (CONFIG_CPU_DCACHE_SIZE) /* typically 8KB. */ +#define CACHE_DLINESIZE 32 /* fixed */ +#define CACHE_DSEGMENTS 4 /* fixed */ +#define CACHE_DENTRIES (CACHE_DSIZE / CACHE_DSEGMENTS / CACHE_DLINESIZE) +#define CACHE_DLIMIT (CACHE_DSIZE * 4) /* benchmark needed */ + + .text +/* + * cpu_arm946_proc_init() + * cpu_arm946_switch_mm() + * + * These are not required. + */ +ENTRY(cpu_arm946_proc_init) +ENTRY(cpu_arm946_switch_mm) + mov pc, lr + +/* + * cpu_arm946_proc_fin() + */ +ENTRY(cpu_arm946_proc_fin) + mrc p15, 0, r0, c1, c0, 0 @ ctrl register + bic r0, r0, #0x00001000 @ i-cache + bic r0, r0, #0x00000004 @ d-cache + mcr p15, 0, r0, c1, c0, 0 @ disable caches + mov pc, lr + +/* + * cpu_arm946_reset(loc) + * Params : r0 = address to jump to + * Notes : This sets up everything for a reset + */ + .pushsection .idmap.text, "ax" +ENTRY(cpu_arm946_reset) + mov ip, #0 + mcr p15, 0, ip, c7, c5, 0 @ flush I cache + mcr p15, 0, ip, c7, c6, 0 @ flush D cache + mcr p15, 0, ip, c7, c10, 4 @ drain WB + mrc p15, 0, ip, c1, c0, 0 @ ctrl register + bic ip, ip, #0x00000005 @ .............c.p + bic ip, ip, #0x00001000 @ i-cache + mcr p15, 0, ip, c1, c0, 0 @ ctrl register + mov pc, r0 +ENDPROC(cpu_arm946_reset) + .popsection + +/* + * cpu_arm946_do_idle() + */ + .align 5 +ENTRY(cpu_arm946_do_idle) + mcr p15, 0, r0, c7, c0, 4 @ Wait for interrupt + mov pc, lr + +/* + * flush_icache_all() + * + * Unconditionally clean and invalidate the entire icache. + */ +ENTRY(arm946_flush_icache_all) + mov r0, #0 + mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache + mov pc, lr +ENDPROC(arm946_flush_icache_all) + +/* + * flush_user_cache_all() + */ +ENTRY(arm946_flush_user_cache_all) + /* FALLTHROUGH */ + +/* + * flush_kern_cache_all() + * + * Clean and invalidate the entire cache. + */ +ENTRY(arm946_flush_kern_cache_all) + mov r2, #VM_EXEC + mov ip, #0 +__flush_whole_cache: +#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH + mcr p15, 0, ip, c7, c6, 0 @ flush D cache +#else + mov r1, #(CACHE_DSEGMENTS - 1) << 29 @ 4 segments +1: orr r3, r1, #(CACHE_DENTRIES - 1) << 4 @ n entries +2: mcr p15, 0, r3, c7, c14, 2 @ clean/flush D index + subs r3, r3, #1 << 4 + bcs 2b @ entries n to 0 + subs r1, r1, #1 << 29 + bcs 1b @ segments 3 to 0 +#endif + tst r2, #VM_EXEC + mcrne p15, 0, ip, c7, c5, 0 @ flush I cache + mcrne p15, 0, ip, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * flush_user_cache_range(start, end, flags) + * + * Clean and invalidate a range of cache entries in the + * specified address range. + * + * - start - start address (inclusive) + * - end - end address (exclusive) + * - flags - vm_flags describing address space + * (same as arm926) + */ +ENTRY(arm946_flush_user_cache_range) + mov ip, #0 + sub r3, r1, r0 @ calculate total size + cmp r3, #CACHE_DLIMIT + bhs __flush_whole_cache + +1: tst r2, #VM_EXEC +#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH + mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry + mcrne p15, 0, r0, c7, c5, 1 @ invalidate I entry + add r0, r0, #CACHE_DLINESIZE + mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry + mcrne p15, 0, r0, c7, c5, 1 @ invalidate I entry + add r0, r0, #CACHE_DLINESIZE +#else + mcr p15, 0, r0, c7, c14, 1 @ clean and invalidate D entry + mcrne p15, 0, r0, c7, c5, 1 @ invalidate I entry + add r0, r0, #CACHE_DLINESIZE + mcr p15, 0, r0, c7, c14, 1 @ clean and invalidate D entry + mcrne p15, 0, r0, c7, c5, 1 @ invalidate I entry + add r0, r0, #CACHE_DLINESIZE +#endif + cmp r0, r1 + blo 1b + tst r2, #VM_EXEC + mcrne p15, 0, ip, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * coherent_kern_range(start, end) + * + * Ensure coherency between the Icache and the Dcache in the + * region described by start, end. If you have non-snooping + * Harvard caches, you need to implement this function. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(arm946_coherent_kern_range) + /* FALLTHROUGH */ + +/* + * coherent_user_range(start, end) + * + * Ensure coherency between the Icache and the Dcache in the + * region described by start, end. If you have non-snooping + * Harvard caches, you need to implement this function. + * + * - start - virtual start address + * - end - virtual end address + * (same as arm926) + */ +ENTRY(arm946_coherent_user_range) + bic r0, r0, #CACHE_DLINESIZE - 1 +1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry + mcr p15, 0, r0, c7, c5, 1 @ invalidate I entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mcr p15, 0, r0, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * flush_kern_dcache_area(void *addr, size_t size) + * + * Ensure no D cache aliasing occurs, either with itself or + * the I cache + * + * - addr - kernel address + * - size - region size + * (same as arm926) + */ +ENTRY(arm946_flush_kern_dcache_area) + add r1, r0, r1 +1: mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mov r0, #0 + mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache + mcr p15, 0, r0, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * dma_inv_range(start, end) + * + * Invalidate (discard) the specified virtual address range. + * May not write back any entries. If 'start' or 'end' + * are not cache line aligned, those lines must be written + * back. + * + * - start - virtual start address + * - end - virtual end address + * (same as arm926) + */ +arm946_dma_inv_range: +#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH + tst r0, #CACHE_DLINESIZE - 1 + mcrne p15, 0, r0, c7, c10, 1 @ clean D entry + tst r1, #CACHE_DLINESIZE - 1 + mcrne p15, 0, r1, c7, c10, 1 @ clean D entry +#endif + bic r0, r0, #CACHE_DLINESIZE - 1 +1: mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mcr p15, 0, r0, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * dma_clean_range(start, end) + * + * Clean the specified virtual address range. + * + * - start - virtual start address + * - end - virtual end address + * + * (same as arm926) + */ +arm946_dma_clean_range: +#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH + bic r0, r0, #CACHE_DLINESIZE - 1 +1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b +#endif + mcr p15, 0, r0, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * dma_flush_range(start, end) + * + * Clean and invalidate the specified virtual address range. + * + * - start - virtual start address + * - end - virtual end address + * + * (same as arm926) + */ +ENTRY(arm946_dma_flush_range) + bic r0, r0, #CACHE_DLINESIZE - 1 +1: +#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH + mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry +#else + mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry +#endif + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mcr p15, 0, r0, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * dma_map_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(arm946_dma_map_area) + add r1, r1, r0 + cmp r2, #DMA_TO_DEVICE + beq arm946_dma_clean_range + bcs arm946_dma_inv_range + b arm946_dma_flush_range +ENDPROC(arm946_dma_map_area) + +/* + * dma_unmap_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(arm946_dma_unmap_area) + mov pc, lr +ENDPROC(arm946_dma_unmap_area) + + @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) + define_cache_functions arm946 + +ENTRY(cpu_arm946_dcache_clean_area) +#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH +1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry + add r0, r0, #CACHE_DLINESIZE + subs r1, r1, #CACHE_DLINESIZE + bhi 1b +#endif + mcr p15, 0, r0, c7, c10, 4 @ drain WB + mov pc, lr + + __CPUINIT + + .type __arm946_setup, #function +__arm946_setup: + mov r0, #0 + mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache + mcr p15, 0, r0, c7, c6, 0 @ invalidate D cache + mcr p15, 0, r0, c7, c10, 4 @ drain WB + + mcr p15, 0, r0, c6, c3, 0 @ disable memory region 3~7 + mcr p15, 0, r0, c6, c4, 0 + mcr p15, 0, r0, c6, c5, 0 + mcr p15, 0, r0, c6, c6, 0 + mcr p15, 0, r0, c6, c7, 0 + + mov r0, #0x0000003F @ base = 0, size = 4GB + mcr p15, 0, r0, c6, c0, 0 @ set region 0, default + + ldr r0, =(CONFIG_DRAM_BASE & 0xFFFFF000) @ base[31:12] of RAM + ldr r1, =(CONFIG_DRAM_SIZE >> 12) @ size of RAM (must be >= 4KB) + mov r2, #10 @ 11 is the minimum (4KB) +1: add r2, r2, #1 @ area size *= 2 + mov r1, r1, lsr #1 + bne 1b @ count not zero r-shift + orr r0, r0, r2, lsl #1 @ the region register value + orr r0, r0, #1 @ set enable bit + mcr p15, 0, r0, c6, c1, 0 @ set region 1, RAM + + ldr r0, =(CONFIG_FLASH_MEM_BASE & 0xFFFFF000) @ base[31:12] of FLASH + ldr r1, =(CONFIG_FLASH_SIZE >> 12) @ size of FLASH (must be >= 4KB) + mov r2, #10 @ 11 is the minimum (4KB) +1: add r2, r2, #1 @ area size *= 2 + mov r1, r1, lsr #1 + bne 1b @ count not zero r-shift + orr r0, r0, r2, lsl #1 @ the region register value + orr r0, r0, #1 @ set enable bit + mcr p15, 0, r0, c6, c2, 0 @ set region 2, ROM/FLASH + + mov r0, #0x06 + mcr p15, 0, r0, c2, c0, 0 @ region 1,2 d-cacheable + mcr p15, 0, r0, c2, c0, 1 @ region 1,2 i-cacheable +#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH + mov r0, #0x00 @ disable whole write buffer +#else + mov r0, #0x02 @ region 1 write bufferred +#endif + mcr p15, 0, r0, c3, c0, 0 + +/* + * Access Permission Settings for future permission control by PU. + * + * priv. user + * region 0 (whole) rw -- : b0001 + * region 1 (RAM) rw rw : b0011 + * region 2 (FLASH) rw r- : b0010 + * region 3~7 (none) -- -- : b0000 + */ + mov r0, #0x00000031 + orr r0, r0, #0x00000200 + mcr p15, 0, r0, c5, c0, 2 @ set data access permission + mcr p15, 0, r0, c5, c0, 3 @ set inst. access permission + + mrc p15, 0, r0, c1, c0 @ get control register + orr r0, r0, #0x00001000 @ I-cache + orr r0, r0, #0x00000005 @ MPU/D-cache +#ifdef CONFIG_CPU_CACHE_ROUND_ROBIN + orr r0, r0, #0x00004000 @ .1.. .... .... .... +#endif + mov pc, lr + + .size __arm946_setup, . - __arm946_setup + + __INITDATA + + @ define struct processor (see <asm/proc-fns.h> and proc-macros.S) + define_processor_functions arm946, dabort=nommu_early_abort, pabort=legacy_pabort, nommu=1 + + .section ".rodata" + + string cpu_arch_name, "armv5te" + string cpu_elf_name, "v5t" + string cpu_arm946_name, "ARM946E-S" + + .align + + .section ".proc.info.init", #alloc, #execinstr + .type __arm946_proc_info,#object +__arm946_proc_info: + .long 0x41009460 + .long 0xff00fff0 + .long 0 + .long 0 + b __arm946_setup + .long cpu_arch_name + .long cpu_elf_name + .long HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB + .long cpu_arm946_name + .long arm946_processor_functions + .long 0 + .long 0 + .long arm946_cache_fns + .size __arm946_proc_info, . - __arm946_proc_info + diff --git a/arch/arm/mm/proc-arm9tdmi.S b/arch/arm/mm/proc-arm9tdmi.S new file mode 100644 index 00000000..8881391d --- /dev/null +++ b/arch/arm/mm/proc-arm9tdmi.S @@ -0,0 +1,97 @@ +/* + * linux/arch/arm/mm/proc-arm9tdmi.S: utility functions for ARM9TDMI + * + * Copyright (C) 2003-2006 Hyok S. Choi <hyok.choi@samsung.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ +#include <linux/linkage.h> +#include <linux/init.h> +#include <asm/assembler.h> +#include <asm/asm-offsets.h> +#include <asm/hwcap.h> +#include <asm/pgtable-hwdef.h> +#include <asm/pgtable.h> +#include <asm/ptrace.h> + +#include "proc-macros.S" + + .text +/* + * cpu_arm9tdmi_proc_init() + * cpu_arm9tdmi_do_idle() + * cpu_arm9tdmi_dcache_clean_area() + * cpu_arm9tdmi_switch_mm() + * + * These are not required. + */ +ENTRY(cpu_arm9tdmi_proc_init) +ENTRY(cpu_arm9tdmi_do_idle) +ENTRY(cpu_arm9tdmi_dcache_clean_area) +ENTRY(cpu_arm9tdmi_switch_mm) + mov pc, lr + +/* + * cpu_arm9tdmi_proc_fin() + */ +ENTRY(cpu_arm9tdmi_proc_fin) + mov pc, lr + +/* + * Function: cpu_arm9tdmi_reset(loc) + * Params : loc(r0) address to jump to + * Purpose : Sets up everything for a reset and jump to the location for soft reset. + */ + .pushsection .idmap.text, "ax" +ENTRY(cpu_arm9tdmi_reset) + mov pc, r0 +ENDPROC(cpu_arm9tdmi_reset) + .popsection + + __CPUINIT + + .type __arm9tdmi_setup, #function +__arm9tdmi_setup: + mov pc, lr + .size __arm9tdmi_setup, . - __arm9tdmi_setup + + __INITDATA + + @ define struct processor (see <asm/proc-fns.h> and proc-macros.S) + define_processor_functions arm9tdmi, dabort=nommu_early_abort, pabort=legacy_pabort, nommu=1 + + .section ".rodata" + + string cpu_arch_name, "armv4t" + string cpu_elf_name, "v4" + string cpu_arm9tdmi_name, "ARM9TDMI" + string cpu_p2001_name, "P2001" + + .align + + .section ".proc.info.init", #alloc, #execinstr + +.macro arm9tdmi_proc_info name:req, cpu_val:req, cpu_mask:req, cpu_name:req + .type __\name\()_proc_info, #object +__\name\()_proc_info: + .long \cpu_val + .long \cpu_mask + .long 0 + .long 0 + b __arm9tdmi_setup + .long cpu_arch_name + .long cpu_elf_name + .long HWCAP_SWP | HWCAP_THUMB | HWCAP_26BIT + .long \cpu_name + .long arm9tdmi_processor_functions + .long 0 + .long 0 + .long v4_cache_fns + .size __\name\()_proc_info, . - __\name\()_proc_info +.endm + + arm9tdmi_proc_info arm9tdmi, 0x41009900, 0xfff8ff00, cpu_arm9tdmi_name + arm9tdmi_proc_info p2001, 0x41029000, 0xffffffff, cpu_p2001_name diff --git a/arch/arm/mm/proc-fa526.S b/arch/arm/mm/proc-fa526.S new file mode 100644 index 00000000..d217e979 --- /dev/null +++ b/arch/arm/mm/proc-fa526.S @@ -0,0 +1,221 @@ +/* + * linux/arch/arm/mm/proc-fa526.S: MMU functions for FA526 + * + * Written by : Luke Lee + * Copyright (C) 2005 Faraday Corp. + * Copyright (C) 2008-2009 Paulius Zaleckas <paulius.zaleckas@teltonika.lt> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * + * These are the low level assembler for performing cache and TLB + * functions on the fa526. + */ +#include <linux/linkage.h> +#include <linux/init.h> +#include <asm/assembler.h> +#include <asm/hwcap.h> +#include <asm/pgtable-hwdef.h> +#include <asm/pgtable.h> +#include <asm/page.h> +#include <asm/ptrace.h> + +#include "proc-macros.S" + +#define CACHE_DLINESIZE 16 + + .text +/* + * cpu_fa526_proc_init() + */ +ENTRY(cpu_fa526_proc_init) + mov pc, lr + +/* + * cpu_fa526_proc_fin() + */ +ENTRY(cpu_fa526_proc_fin) + mrc p15, 0, r0, c1, c0, 0 @ ctrl register + bic r0, r0, #0x1000 @ ...i............ + bic r0, r0, #0x000e @ ............wca. + mcr p15, 0, r0, c1, c0, 0 @ disable caches + nop + nop + mov pc, lr + +/* + * cpu_fa526_reset(loc) + * + * Perform a soft reset of the system. Put the CPU into the + * same state as it would be if it had been reset, and branch + * to what would be the reset vector. + * + * loc: location to jump to for soft reset + */ + .align 4 + .pushsection .idmap.text, "ax" +ENTRY(cpu_fa526_reset) +/* TODO: Use CP8 if possible... */ + mov ip, #0 + mcr p15, 0, ip, c7, c7, 0 @ invalidate I,D caches + mcr p15, 0, ip, c7, c10, 4 @ drain WB +#ifdef CONFIG_MMU + mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs +#endif + mrc p15, 0, ip, c1, c0, 0 @ ctrl register + bic ip, ip, #0x000f @ ............wcam + bic ip, ip, #0x1100 @ ...i...s........ + bic ip, ip, #0x0800 @ BTB off + mcr p15, 0, ip, c1, c0, 0 @ ctrl register + nop + nop + mov pc, r0 +ENDPROC(cpu_fa526_reset) + .popsection + +/* + * cpu_fa526_do_idle() + */ + .align 4 +ENTRY(cpu_fa526_do_idle) + mcr p15, 0, r0, c7, c0, 4 @ Wait for interrupt + mov pc, lr + + +ENTRY(cpu_fa526_dcache_clean_area) +1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry + add r0, r0, #CACHE_DLINESIZE + subs r1, r1, #CACHE_DLINESIZE + bhi 1b + mcr p15, 0, r0, c7, c10, 4 @ drain WB + mov pc, lr + +/* =============================== PageTable ============================== */ + +/* + * cpu_fa526_switch_mm(pgd) + * + * Set the translation base pointer to be as described by pgd. + * + * pgd: new page tables + */ + .align 4 +ENTRY(cpu_fa526_switch_mm) +#ifdef CONFIG_MMU + mov ip, #0 +#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH + mcr p15, 0, ip, c7, c6, 0 @ invalidate D cache +#else + mcr p15, 0, ip, c7, c14, 0 @ clean and invalidate whole D cache +#endif + mcr p15, 0, ip, c7, c5, 0 @ invalidate I cache + mcr p15, 0, ip, c7, c5, 6 @ invalidate BTB since mm changed + mcr p15, 0, ip, c7, c10, 4 @ data write barrier + mcr p15, 0, ip, c7, c5, 4 @ prefetch flush + mcr p15, 0, r0, c2, c0, 0 @ load page table pointer + mcr p15, 0, ip, c8, c7, 0 @ invalidate UTLB +#endif + mov pc, lr + +/* + * cpu_fa526_set_pte_ext(ptep, pte, ext) + * + * Set a PTE and flush it out + */ + .align 4 +ENTRY(cpu_fa526_set_pte_ext) +#ifdef CONFIG_MMU + armv3_set_pte_ext + mov r0, r0 + mcr p15, 0, r0, c7, c10, 1 @ clean D entry + mov r0, #0 + mcr p15, 0, r0, c7, c10, 4 @ drain WB +#endif + mov pc, lr + + __CPUINIT + + .type __fa526_setup, #function +__fa526_setup: + /* On return of this routine, r0 must carry correct flags for CFG register */ + mov r0, #0 + mcr p15, 0, r0, c7, c7 @ invalidate I,D caches on v4 + mcr p15, 0, r0, c7, c10, 4 @ drain write buffer on v4 +#ifdef CONFIG_MMU + mcr p15, 0, r0, c8, c7 @ invalidate I,D TLBs on v4 +#endif + mcr p15, 0, r0, c7, c5, 5 @ invalidate IScratchpad RAM + + mov r0, #1 + mcr p15, 0, r0, c1, c1, 0 @ turn-on ECR + + mov r0, #0 + mcr p15, 0, r0, c7, c5, 6 @ invalidate BTB All + mcr p15, 0, r0, c7, c10, 4 @ data write barrier + mcr p15, 0, r0, c7, c5, 4 @ prefetch flush + + mov r0, #0x1f @ Domains 0, 1 = manager, 2 = client + mcr p15, 0, r0, c3, c0 @ load domain access register + + mrc p15, 0, r0, c1, c0 @ get control register v4 + ldr r5, fa526_cr1_clear + bic r0, r0, r5 + ldr r5, fa526_cr1_set + orr r0, r0, r5 + mov pc, lr + .size __fa526_setup, . - __fa526_setup + + /* + * .RVI ZFRS BLDP WCAM + * ..11 1001 .111 1101 + * + */ + .type fa526_cr1_clear, #object + .type fa526_cr1_set, #object +fa526_cr1_clear: + .word 0x3f3f +fa526_cr1_set: + .word 0x397D + + __INITDATA + + @ define struct processor (see <asm/proc-fns.h> and proc-macros.S) + define_processor_functions fa526, dabort=v4_early_abort, pabort=legacy_pabort + + .section ".rodata" + + string cpu_arch_name, "armv4" + string cpu_elf_name, "v4" + string cpu_fa526_name, "FA526" + + .align + + .section ".proc.info.init", #alloc, #execinstr + + .type __fa526_proc_info,#object +__fa526_proc_info: + .long 0x66015261 + .long 0xff01fff1 + .long PMD_TYPE_SECT | \ + PMD_SECT_BUFFERABLE | \ + PMD_SECT_CACHEABLE | \ + PMD_BIT4 | \ + PMD_SECT_AP_WRITE | \ + PMD_SECT_AP_READ + .long PMD_TYPE_SECT | \ + PMD_BIT4 | \ + PMD_SECT_AP_WRITE | \ + PMD_SECT_AP_READ + b __fa526_setup + .long cpu_arch_name + .long cpu_elf_name + .long HWCAP_SWP | HWCAP_HALF + .long cpu_fa526_name + .long fa526_processor_functions + .long fa_tlb_fns + .long fa_user_fns + .long fa_cache_fns + .size __fa526_proc_info, . - __fa526_proc_info diff --git a/arch/arm/mm/proc-feroceon.S b/arch/arm/mm/proc-feroceon.S new file mode 100644 index 00000000..ba3c5005 --- /dev/null +++ b/arch/arm/mm/proc-feroceon.S @@ -0,0 +1,597 @@ +/* + * linux/arch/arm/mm/proc-feroceon.S: MMU functions for Feroceon + * + * Heavily based on proc-arm926.S + * Maintainer: Assaf Hoffman <hoffman@marvell.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <linux/linkage.h> +#include <linux/init.h> +#include <asm/assembler.h> +#include <asm/hwcap.h> +#include <asm/pgtable-hwdef.h> +#include <asm/pgtable.h> +#include <asm/page.h> +#include <asm/ptrace.h> +#include "proc-macros.S" + +/* + * This is the maximum size of an area which will be invalidated + * using the single invalidate entry instructions. Anything larger + * than this, and we go for the whole cache. + * + * This value should be chosen such that we choose the cheapest + * alternative. + */ +#define CACHE_DLIMIT 16384 + +/* + * the cache line size of the I and D cache + */ +#define CACHE_DLINESIZE 32 + + .bss + .align 3 +__cache_params_loc: + .space 8 + + .text +__cache_params: + .word __cache_params_loc + +/* + * cpu_feroceon_proc_init() + */ +ENTRY(cpu_feroceon_proc_init) + mrc p15, 0, r0, c0, c0, 1 @ read cache type register + ldr r1, __cache_params + mov r2, #(16 << 5) + tst r0, #(1 << 16) @ get way + mov r0, r0, lsr #18 @ get cache size order + movne r3, #((4 - 1) << 30) @ 4-way + and r0, r0, #0xf + moveq r3, #0 @ 1-way + mov r2, r2, lsl r0 @ actual cache size + movne r2, r2, lsr #2 @ turned into # of sets + sub r2, r2, #(1 << 5) + stmia r1, {r2, r3} + mov pc, lr + +/* + * cpu_feroceon_proc_fin() + */ +ENTRY(cpu_feroceon_proc_fin) +#if defined(CONFIG_CACHE_FEROCEON_L2) && \ + !defined(CONFIG_CACHE_FEROCEON_L2_WRITETHROUGH) + mov r0, #0 + mcr p15, 1, r0, c15, c9, 0 @ clean L2 + mcr p15, 0, r0, c7, c10, 4 @ drain WB +#endif + + mrc p15, 0, r0, c1, c0, 0 @ ctrl register + bic r0, r0, #0x1000 @ ...i............ + bic r0, r0, #0x000e @ ............wca. + mcr p15, 0, r0, c1, c0, 0 @ disable caches + mov pc, lr + +/* + * cpu_feroceon_reset(loc) + * + * Perform a soft reset of the system. Put the CPU into the + * same state as it would be if it had been reset, and branch + * to what would be the reset vector. + * + * loc: location to jump to for soft reset + */ + .align 5 + .pushsection .idmap.text, "ax" +ENTRY(cpu_feroceon_reset) + mov ip, #0 + mcr p15, 0, ip, c7, c7, 0 @ invalidate I,D caches + mcr p15, 0, ip, c7, c10, 4 @ drain WB +#ifdef CONFIG_MMU + mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs +#endif + mrc p15, 0, ip, c1, c0, 0 @ ctrl register + bic ip, ip, #0x000f @ ............wcam + bic ip, ip, #0x1100 @ ...i...s........ + mcr p15, 0, ip, c1, c0, 0 @ ctrl register + mov pc, r0 +ENDPROC(cpu_feroceon_reset) + .popsection + +/* + * cpu_feroceon_do_idle() + * + * Called with IRQs disabled + */ + .align 5 +ENTRY(cpu_feroceon_do_idle) + mov r0, #0 + mcr p15, 0, r0, c7, c10, 4 @ Drain write buffer + mcr p15, 0, r0, c7, c0, 4 @ Wait for interrupt + mov pc, lr + +/* + * flush_icache_all() + * + * Unconditionally clean and invalidate the entire icache. + */ +ENTRY(feroceon_flush_icache_all) + mov r0, #0 + mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache + mov pc, lr +ENDPROC(feroceon_flush_icache_all) + +/* + * flush_user_cache_all() + * + * Clean and invalidate all cache entries in a particular + * address space. + */ + .align 5 +ENTRY(feroceon_flush_user_cache_all) + /* FALLTHROUGH */ + +/* + * flush_kern_cache_all() + * + * Clean and invalidate the entire cache. + */ +ENTRY(feroceon_flush_kern_cache_all) + mov r2, #VM_EXEC + +__flush_whole_cache: + ldr r1, __cache_params + ldmia r1, {r1, r3} +1: orr ip, r1, r3 +2: mcr p15, 0, ip, c7, c14, 2 @ clean + invalidate D set/way + subs ip, ip, #(1 << 30) @ next way + bcs 2b + subs r1, r1, #(1 << 5) @ next set + bcs 1b + + tst r2, #VM_EXEC + mov ip, #0 + mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache + mcrne p15, 0, ip, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * flush_user_cache_range(start, end, flags) + * + * Clean and invalidate a range of cache entries in the + * specified address range. + * + * - start - start address (inclusive) + * - end - end address (exclusive) + * - flags - vm_flags describing address space + */ + .align 5 +ENTRY(feroceon_flush_user_cache_range) + sub r3, r1, r0 @ calculate total size + cmp r3, #CACHE_DLIMIT + bgt __flush_whole_cache +1: tst r2, #VM_EXEC + mcr p15, 0, r0, c7, c14, 1 @ clean and invalidate D entry + mcrne p15, 0, r0, c7, c5, 1 @ invalidate I entry + add r0, r0, #CACHE_DLINESIZE + mcr p15, 0, r0, c7, c14, 1 @ clean and invalidate D entry + mcrne p15, 0, r0, c7, c5, 1 @ invalidate I entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + tst r2, #VM_EXEC + mov ip, #0 + mcrne p15, 0, ip, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * coherent_kern_range(start, end) + * + * Ensure coherency between the Icache and the Dcache in the + * region described by start, end. If you have non-snooping + * Harvard caches, you need to implement this function. + * + * - start - virtual start address + * - end - virtual end address + */ + .align 5 +ENTRY(feroceon_coherent_kern_range) + /* FALLTHROUGH */ + +/* + * coherent_user_range(start, end) + * + * Ensure coherency between the Icache and the Dcache in the + * region described by start, end. If you have non-snooping + * Harvard caches, you need to implement this function. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(feroceon_coherent_user_range) + bic r0, r0, #CACHE_DLINESIZE - 1 +1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry + mcr p15, 0, r0, c7, c5, 1 @ invalidate I entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mcr p15, 0, r0, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * flush_kern_dcache_area(void *addr, size_t size) + * + * Ensure no D cache aliasing occurs, either with itself or + * the I cache + * + * - addr - kernel address + * - size - region size + */ + .align 5 +ENTRY(feroceon_flush_kern_dcache_area) + add r1, r0, r1 +1: mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mov r0, #0 + mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache + mcr p15, 0, r0, c7, c10, 4 @ drain WB + mov pc, lr + + .align 5 +ENTRY(feroceon_range_flush_kern_dcache_area) + mrs r2, cpsr + add r1, r0, #PAGE_SZ - CACHE_DLINESIZE @ top addr is inclusive + orr r3, r2, #PSR_I_BIT + msr cpsr_c, r3 @ disable interrupts + mcr p15, 5, r0, c15, c15, 0 @ D clean/inv range start + mcr p15, 5, r1, c15, c15, 1 @ D clean/inv range top + msr cpsr_c, r2 @ restore interrupts + mov r0, #0 + mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache + mcr p15, 0, r0, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * dma_inv_range(start, end) + * + * Invalidate (discard) the specified virtual address range. + * May not write back any entries. If 'start' or 'end' + * are not cache line aligned, those lines must be written + * back. + * + * - start - virtual start address + * - end - virtual end address + * + * (same as v4wb) + */ + .align 5 +feroceon_dma_inv_range: + tst r0, #CACHE_DLINESIZE - 1 + bic r0, r0, #CACHE_DLINESIZE - 1 + mcrne p15, 0, r0, c7, c10, 1 @ clean D entry + tst r1, #CACHE_DLINESIZE - 1 + mcrne p15, 0, r1, c7, c10, 1 @ clean D entry +1: mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mcr p15, 0, r0, c7, c10, 4 @ drain WB + mov pc, lr + + .align 5 +feroceon_range_dma_inv_range: + mrs r2, cpsr + tst r0, #CACHE_DLINESIZE - 1 + mcrne p15, 0, r0, c7, c10, 1 @ clean D entry + tst r1, #CACHE_DLINESIZE - 1 + mcrne p15, 0, r1, c7, c10, 1 @ clean D entry + cmp r1, r0 + subne r1, r1, #1 @ top address is inclusive + orr r3, r2, #PSR_I_BIT + msr cpsr_c, r3 @ disable interrupts + mcr p15, 5, r0, c15, c14, 0 @ D inv range start + mcr p15, 5, r1, c15, c14, 1 @ D inv range top + msr cpsr_c, r2 @ restore interrupts + mov pc, lr + +/* + * dma_clean_range(start, end) + * + * Clean the specified virtual address range. + * + * - start - virtual start address + * - end - virtual end address + * + * (same as v4wb) + */ + .align 5 +feroceon_dma_clean_range: + bic r0, r0, #CACHE_DLINESIZE - 1 +1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mcr p15, 0, r0, c7, c10, 4 @ drain WB + mov pc, lr + + .align 5 +feroceon_range_dma_clean_range: + mrs r2, cpsr + cmp r1, r0 + subne r1, r1, #1 @ top address is inclusive + orr r3, r2, #PSR_I_BIT + msr cpsr_c, r3 @ disable interrupts + mcr p15, 5, r0, c15, c13, 0 @ D clean range start + mcr p15, 5, r1, c15, c13, 1 @ D clean range top + msr cpsr_c, r2 @ restore interrupts + mcr p15, 0, r0, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * dma_flush_range(start, end) + * + * Clean and invalidate the specified virtual address range. + * + * - start - virtual start address + * - end - virtual end address + */ + .align 5 +ENTRY(feroceon_dma_flush_range) + bic r0, r0, #CACHE_DLINESIZE - 1 +1: mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mcr p15, 0, r0, c7, c10, 4 @ drain WB + mov pc, lr + + .align 5 +ENTRY(feroceon_range_dma_flush_range) + mrs r2, cpsr + cmp r1, r0 + subne r1, r1, #1 @ top address is inclusive + orr r3, r2, #PSR_I_BIT + msr cpsr_c, r3 @ disable interrupts + mcr p15, 5, r0, c15, c15, 0 @ D clean/inv range start + mcr p15, 5, r1, c15, c15, 1 @ D clean/inv range top + msr cpsr_c, r2 @ restore interrupts + mcr p15, 0, r0, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * dma_map_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(feroceon_dma_map_area) + add r1, r1, r0 + cmp r2, #DMA_TO_DEVICE + beq feroceon_dma_clean_range + bcs feroceon_dma_inv_range + b feroceon_dma_flush_range +ENDPROC(feroceon_dma_map_area) + +/* + * dma_map_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(feroceon_range_dma_map_area) + add r1, r1, r0 + cmp r2, #DMA_TO_DEVICE + beq feroceon_range_dma_clean_range + bcs feroceon_range_dma_inv_range + b feroceon_range_dma_flush_range +ENDPROC(feroceon_range_dma_map_area) + +/* + * dma_unmap_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(feroceon_dma_unmap_area) + mov pc, lr +ENDPROC(feroceon_dma_unmap_area) + + @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) + define_cache_functions feroceon + +.macro range_alias basename + .globl feroceon_range_\basename + .type feroceon_range_\basename , %function + .equ feroceon_range_\basename , feroceon_\basename +.endm + +/* + * Most of the cache functions are unchanged for this case. + * Export suitable alias symbols for the unchanged functions: + */ + range_alias flush_icache_all + range_alias flush_user_cache_all + range_alias flush_kern_cache_all + range_alias flush_user_cache_range + range_alias coherent_kern_range + range_alias coherent_user_range + range_alias dma_unmap_area + + define_cache_functions feroceon_range + + .align 5 +ENTRY(cpu_feroceon_dcache_clean_area) +#if defined(CONFIG_CACHE_FEROCEON_L2) && \ + !defined(CONFIG_CACHE_FEROCEON_L2_WRITETHROUGH) + mov r2, r0 + mov r3, r1 +#endif +1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry + add r0, r0, #CACHE_DLINESIZE + subs r1, r1, #CACHE_DLINESIZE + bhi 1b +#if defined(CONFIG_CACHE_FEROCEON_L2) && \ + !defined(CONFIG_CACHE_FEROCEON_L2_WRITETHROUGH) +1: mcr p15, 1, r2, c15, c9, 1 @ clean L2 entry + add r2, r2, #CACHE_DLINESIZE + subs r3, r3, #CACHE_DLINESIZE + bhi 1b +#endif + mcr p15, 0, r0, c7, c10, 4 @ drain WB + mov pc, lr + +/* =============================== PageTable ============================== */ + +/* + * cpu_feroceon_switch_mm(pgd) + * + * Set the translation base pointer to be as described by pgd. + * + * pgd: new page tables + */ + .align 5 +ENTRY(cpu_feroceon_switch_mm) +#ifdef CONFIG_MMU + /* + * Note: we wish to call __flush_whole_cache but we need to preserve + * lr to do so. The only way without touching main memory is to + * use r2 which is normally used to test the VM_EXEC flag, and + * compensate locally for the skipped ops if it is not set. + */ + mov r2, lr @ abuse r2 to preserve lr + bl __flush_whole_cache + @ if r2 contains the VM_EXEC bit then the next 2 ops are done already + tst r2, #VM_EXEC + mcreq p15, 0, ip, c7, c5, 0 @ invalidate I cache + mcreq p15, 0, ip, c7, c10, 4 @ drain WB + + mcr p15, 0, r0, c2, c0, 0 @ load page table pointer + mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs + mov pc, r2 +#else + mov pc, lr +#endif + +/* + * cpu_feroceon_set_pte_ext(ptep, pte, ext) + * + * Set a PTE and flush it out + */ + .align 5 +ENTRY(cpu_feroceon_set_pte_ext) +#ifdef CONFIG_MMU + armv3_set_pte_ext wc_disable=0 + mov r0, r0 + mcr p15, 0, r0, c7, c10, 1 @ clean D entry +#if defined(CONFIG_CACHE_FEROCEON_L2) && \ + !defined(CONFIG_CACHE_FEROCEON_L2_WRITETHROUGH) + mcr p15, 1, r0, c15, c9, 1 @ clean L2 entry +#endif + mcr p15, 0, r0, c7, c10, 4 @ drain WB +#endif + mov pc, lr + + __CPUINIT + + .type __feroceon_setup, #function +__feroceon_setup: + mov r0, #0 + mcr p15, 0, r0, c7, c7 @ invalidate I,D caches on v4 + mcr p15, 0, r0, c7, c10, 4 @ drain write buffer on v4 +#ifdef CONFIG_MMU + mcr p15, 0, r0, c8, c7 @ invalidate I,D TLBs on v4 +#endif + + adr r5, feroceon_crval + ldmia r5, {r5, r6} + mrc p15, 0, r0, c1, c0 @ get control register v4 + bic r0, r0, r5 + orr r0, r0, r6 + mov pc, lr + .size __feroceon_setup, . - __feroceon_setup + + /* + * B + * R P + * .RVI UFRS BLDP WCAM + * .011 .001 ..11 0101 + * + */ + .type feroceon_crval, #object +feroceon_crval: + crval clear=0x0000773f, mmuset=0x00003135, ucset=0x00001134 + + __INITDATA + + @ define struct processor (see <asm/proc-fns.h> and proc-macros.S) + define_processor_functions feroceon, dabort=v5t_early_abort, pabort=legacy_pabort + + .section ".rodata" + + string cpu_arch_name, "armv5te" + string cpu_elf_name, "v5" + string cpu_feroceon_name, "Feroceon" + string cpu_88fr531_name, "Feroceon 88FR531-vd" + string cpu_88fr571_name, "Feroceon 88FR571-vd" + string cpu_88fr131_name, "Feroceon 88FR131" + + .align + + .section ".proc.info.init", #alloc, #execinstr + +.macro feroceon_proc_info name:req, cpu_val:req, cpu_mask:req, cpu_name:req, cache:req + .type __\name\()_proc_info,#object +__\name\()_proc_info: + .long \cpu_val + .long \cpu_mask + .long PMD_TYPE_SECT | \ + PMD_SECT_BUFFERABLE | \ + PMD_SECT_CACHEABLE | \ + PMD_BIT4 | \ + PMD_SECT_AP_WRITE | \ + PMD_SECT_AP_READ + .long PMD_TYPE_SECT | \ + PMD_BIT4 | \ + PMD_SECT_AP_WRITE | \ + PMD_SECT_AP_READ + b __feroceon_setup + .long cpu_arch_name + .long cpu_elf_name + .long HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP + .long \cpu_name + .long feroceon_processor_functions + .long v4wbi_tlb_fns + .long feroceon_user_fns + .long \cache + .size __\name\()_proc_info, . - __\name\()_proc_info +.endm + +#ifdef CONFIG_CPU_FEROCEON_OLD_ID + feroceon_proc_info feroceon_old_id, 0x41009260, 0xff00fff0, \ + cpu_name=cpu_feroceon_name, cache=feroceon_cache_fns +#endif + + feroceon_proc_info 88fr531, 0x56055310, 0xfffffff0, cpu_88fr531_name, \ + cache=feroceon_cache_fns + feroceon_proc_info 88fr571, 0x56155710, 0xfffffff0, cpu_88fr571_name, \ + cache=feroceon_range_cache_fns + feroceon_proc_info 88fr131, 0x56251310, 0xfffffff0, cpu_88fr131_name, \ + cache=feroceon_range_cache_fns diff --git a/arch/arm/mm/proc-macros.S b/arch/arm/mm/proc-macros.S new file mode 100644 index 00000000..2d8ff3ad --- /dev/null +++ b/arch/arm/mm/proc-macros.S @@ -0,0 +1,325 @@ +/* + * We need constants.h for: + * VMA_VM_MM + * VMA_VM_FLAGS + * VM_EXEC + */ +#include <asm/asm-offsets.h> +#include <asm/thread_info.h> + +/* + * vma_vm_mm - get mm pointer from vma pointer (vma->vm_mm) + */ + .macro vma_vm_mm, rd, rn + ldr \rd, [\rn, #VMA_VM_MM] + .endm + +/* + * vma_vm_flags - get vma->vm_flags + */ + .macro vma_vm_flags, rd, rn + ldr \rd, [\rn, #VMA_VM_FLAGS] + .endm + + .macro tsk_mm, rd, rn + ldr \rd, [\rn, #TI_TASK] + ldr \rd, [\rd, #TSK_ACTIVE_MM] + .endm + +/* + * act_mm - get current->active_mm + */ + .macro act_mm, rd + bic \rd, sp, #8128 + bic \rd, \rd, #63 + ldr \rd, [\rd, #TI_TASK] + ldr \rd, [\rd, #TSK_ACTIVE_MM] + .endm + +/* + * mmid - get context id from mm pointer (mm->context.id) + */ + .macro mmid, rd, rn + ldr \rd, [\rn, #MM_CONTEXT_ID] + .endm + +/* + * mask_asid - mask the ASID from the context ID + */ + .macro asid, rd, rn + and \rd, \rn, #255 + .endm + + .macro crval, clear, mmuset, ucset +#ifdef CONFIG_MMU + .word \clear + .word \mmuset +#else + .word \clear + .word \ucset +#endif + .endm + +/* + * dcache_line_size - get the minimum D-cache line size from the CTR register + * on ARMv7. + */ + .macro dcache_line_size, reg, tmp + mrc p15, 0, \tmp, c0, c0, 1 @ read ctr + lsr \tmp, \tmp, #16 + and \tmp, \tmp, #0xf @ cache line size encoding + mov \reg, #4 @ bytes per word + mov \reg, \reg, lsl \tmp @ actual cache line size + .endm + +/* + * icache_line_size - get the minimum I-cache line size from the CTR register + * on ARMv7. + */ + .macro icache_line_size, reg, tmp + mrc p15, 0, \tmp, c0, c0, 1 @ read ctr + and \tmp, \tmp, #0xf @ cache line size encoding + mov \reg, #4 @ bytes per word + mov \reg, \reg, lsl \tmp @ actual cache line size + .endm + +/* + * Sanity check the PTE configuration for the code below - which makes + * certain assumptions about how these bits are laid out. + */ +#ifdef CONFIG_MMU +#if L_PTE_SHARED != PTE_EXT_SHARED +#error PTE shared bit mismatch +#endif +#if !defined (CONFIG_ARM_LPAE) && \ + (L_PTE_XN+L_PTE_USER+L_PTE_RDONLY+L_PTE_DIRTY+L_PTE_YOUNG+\ + L_PTE_FILE+L_PTE_PRESENT) > L_PTE_SHARED +#error Invalid Linux PTE bit settings +#endif +#endif /* CONFIG_MMU */ + +/* + * The ARMv6 and ARMv7 set_pte_ext translation function. + * + * Permission translation: + * YUWD APX AP1 AP0 SVC User + * 0xxx 0 0 0 no acc no acc + * 100x 1 0 1 r/o no acc + * 10x0 1 0 1 r/o no acc + * 1011 0 0 1 r/w no acc + * 110x 0 1 0 r/w r/o + * 11x0 0 1 0 r/w r/o + * 1111 0 1 1 r/w r/w + * + * If !CONFIG_CPU_USE_DOMAINS, the following permissions are changed: + * 110x 1 1 1 r/o r/o + * 11x0 1 1 1 r/o r/o + */ + .macro armv6_mt_table pfx +\pfx\()_mt_table: + .long 0x00 @ L_PTE_MT_UNCACHED + .long PTE_EXT_TEX(1) @ L_PTE_MT_BUFFERABLE + .long PTE_CACHEABLE @ L_PTE_MT_WRITETHROUGH + .long PTE_CACHEABLE | PTE_BUFFERABLE @ L_PTE_MT_WRITEBACK + .long PTE_BUFFERABLE @ L_PTE_MT_DEV_SHARED + .long 0x00 @ unused + .long 0x00 @ L_PTE_MT_MINICACHE (not present) + .long PTE_EXT_TEX(1) | PTE_CACHEABLE | PTE_BUFFERABLE @ L_PTE_MT_WRITEALLOC + .long 0x00 @ unused + .long PTE_EXT_TEX(1) @ L_PTE_MT_DEV_WC + .long 0x00 @ unused + .long PTE_CACHEABLE | PTE_BUFFERABLE @ L_PTE_MT_DEV_CACHED + .long PTE_EXT_TEX(2) @ L_PTE_MT_DEV_NONSHARED + .long 0x00 @ unused + .long 0x00 @ unused + .long 0x00 @ unused + .endm + + .macro armv6_set_pte_ext pfx + str r1, [r0], #2048 @ linux version + + bic r3, r1, #0x000003fc + bic r3, r3, #PTE_TYPE_MASK + orr r3, r3, r2 + orr r3, r3, #PTE_EXT_AP0 | 2 + + adr ip, \pfx\()_mt_table + and r2, r1, #L_PTE_MT_MASK + ldr r2, [ip, r2] + + eor r1, r1, #L_PTE_DIRTY + tst r1, #L_PTE_DIRTY|L_PTE_RDONLY + orrne r3, r3, #PTE_EXT_APX + + tst r1, #L_PTE_USER + orrne r3, r3, #PTE_EXT_AP1 +#ifdef CONFIG_CPU_USE_DOMAINS + @ allow kernel read/write access to read-only user pages + tstne r3, #PTE_EXT_APX + bicne r3, r3, #PTE_EXT_APX | PTE_EXT_AP0 +#endif + + tst r1, #L_PTE_XN + orrne r3, r3, #PTE_EXT_XN + + orr r3, r3, r2 + + tst r1, #L_PTE_YOUNG + tstne r1, #L_PTE_PRESENT + moveq r3, #0 + + str r3, [r0] + mcr p15, 0, r0, c7, c10, 1 @ flush_pte + .endm + + +/* + * The ARMv3, ARMv4 and ARMv5 set_pte_ext translation function, + * covering most CPUs except Xscale and Xscale 3. + * + * Permission translation: + * YUWD AP SVC User + * 0xxx 0x00 no acc no acc + * 100x 0x00 r/o no acc + * 10x0 0x00 r/o no acc + * 1011 0x55 r/w no acc + * 110x 0xaa r/w r/o + * 11x0 0xaa r/w r/o + * 1111 0xff r/w r/w + */ + .macro armv3_set_pte_ext wc_disable=1 + str r1, [r0], #2048 @ linux version + + eor r3, r1, #L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY + + bic r2, r1, #PTE_SMALL_AP_MASK @ keep C, B bits + bic r2, r2, #PTE_TYPE_MASK + orr r2, r2, #PTE_TYPE_SMALL + + tst r3, #L_PTE_USER @ user? + orrne r2, r2, #PTE_SMALL_AP_URO_SRW + + tst r3, #L_PTE_RDONLY | L_PTE_DIRTY @ write and dirty? + orreq r2, r2, #PTE_SMALL_AP_UNO_SRW + + tst r3, #L_PTE_PRESENT | L_PTE_YOUNG @ present and young? + movne r2, #0 + + .if \wc_disable +#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH + tst r2, #PTE_CACHEABLE + bicne r2, r2, #PTE_BUFFERABLE +#endif + .endif + str r2, [r0] @ hardware version + .endm + + +/* + * Xscale set_pte_ext translation, split into two halves to cope + * with work-arounds. r3 must be preserved by code between these + * two macros. + * + * Permission translation: + * YUWD AP SVC User + * 0xxx 00 no acc no acc + * 100x 00 r/o no acc + * 10x0 00 r/o no acc + * 1011 01 r/w no acc + * 110x 10 r/w r/o + * 11x0 10 r/w r/o + * 1111 11 r/w r/w + */ + .macro xscale_set_pte_ext_prologue + str r1, [r0] @ linux version + + eor r3, r1, #L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY + + bic r2, r1, #PTE_SMALL_AP_MASK @ keep C, B bits + orr r2, r2, #PTE_TYPE_EXT @ extended page + + tst r3, #L_PTE_USER @ user? + orrne r2, r2, #PTE_EXT_AP_URO_SRW @ yes -> user r/o, system r/w + + tst r3, #L_PTE_RDONLY | L_PTE_DIRTY @ write and dirty? + orreq r2, r2, #PTE_EXT_AP_UNO_SRW @ yes -> user n/a, system r/w + @ combined with user -> user r/w + .endm + + .macro xscale_set_pte_ext_epilogue + tst r3, #L_PTE_PRESENT | L_PTE_YOUNG @ present and young? + movne r2, #0 @ no -> fault + + str r2, [r0, #2048]! @ hardware version + mov ip, #0 + mcr p15, 0, r0, c7, c10, 1 @ clean L1 D line + mcr p15, 0, ip, c7, c10, 4 @ data write barrier + .endm + +.macro define_processor_functions name:req, dabort:req, pabort:req, nommu=0, suspend=0 + .type \name\()_processor_functions, #object + .align 2 +ENTRY(\name\()_processor_functions) + .word \dabort + .word \pabort + .word cpu_\name\()_proc_init + .word cpu_\name\()_proc_fin + .word cpu_\name\()_reset + .word cpu_\name\()_do_idle + .word cpu_\name\()_dcache_clean_area + .word cpu_\name\()_switch_mm + + .if \nommu + .word 0 + .else + .word cpu_\name\()_set_pte_ext + .endif + + .if \suspend + .word cpu_\name\()_suspend_size +#ifdef CONFIG_PM_SLEEP + .word cpu_\name\()_do_suspend + .word cpu_\name\()_do_resume +#else + .word 0 + .word 0 +#endif + .else + .word 0 + .word 0 + .word 0 + .endif + + .size \name\()_processor_functions, . - \name\()_processor_functions +.endm + +.macro define_cache_functions name:req + .align 2 + .type \name\()_cache_fns, #object +ENTRY(\name\()_cache_fns) + .long \name\()_flush_icache_all + .long \name\()_flush_kern_cache_all + .long \name\()_flush_user_cache_all + .long \name\()_flush_user_cache_range + .long \name\()_coherent_kern_range + .long \name\()_coherent_user_range + .long \name\()_flush_kern_dcache_area + .long \name\()_dma_map_area + .long \name\()_dma_unmap_area + .long \name\()_dma_flush_range + .size \name\()_cache_fns, . - \name\()_cache_fns +.endm + +.macro define_tlb_functions name:req, flags_up:req, flags_smp + .type \name\()_tlb_fns, #object +ENTRY(\name\()_tlb_fns) + .long \name\()_flush_user_tlb_range + .long \name\()_flush_kern_tlb_range + .ifnb \flags_smp + ALT_SMP(.long \flags_smp ) + ALT_UP(.long \flags_up ) + .else + .long \flags_up + .endif + .size \name\()_tlb_fns, . - \name\()_tlb_fns +.endm diff --git a/arch/arm/mm/proc-mohawk.S b/arch/arm/mm/proc-mohawk.S new file mode 100644 index 00000000..cdfedc5b --- /dev/null +++ b/arch/arm/mm/proc-mohawk.S @@ -0,0 +1,418 @@ +/* + * linux/arch/arm/mm/proc-mohawk.S: MMU functions for Marvell PJ1 core + * + * PJ1 (codename Mohawk) is a hybrid of the xscale3 and Marvell's own core. + * + * Heavily based on proc-arm926.S and proc-xsc3.S + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <linux/linkage.h> +#include <linux/init.h> +#include <asm/assembler.h> +#include <asm/hwcap.h> +#include <asm/pgtable-hwdef.h> +#include <asm/pgtable.h> +#include <asm/page.h> +#include <asm/ptrace.h> +#include "proc-macros.S" + +/* + * This is the maximum size of an area which will be flushed. If the + * area is larger than this, then we flush the whole cache. + */ +#define CACHE_DLIMIT 32768 + +/* + * The cache line size of the L1 D cache. + */ +#define CACHE_DLINESIZE 32 + +/* + * cpu_mohawk_proc_init() + */ +ENTRY(cpu_mohawk_proc_init) + mov pc, lr + +/* + * cpu_mohawk_proc_fin() + */ +ENTRY(cpu_mohawk_proc_fin) + mrc p15, 0, r0, c1, c0, 0 @ ctrl register + bic r0, r0, #0x1800 @ ...iz........... + bic r0, r0, #0x0006 @ .............ca. + mcr p15, 0, r0, c1, c0, 0 @ disable caches + mov pc, lr + +/* + * cpu_mohawk_reset(loc) + * + * Perform a soft reset of the system. Put the CPU into the + * same state as it would be if it had been reset, and branch + * to what would be the reset vector. + * + * loc: location to jump to for soft reset + * + * (same as arm926) + */ + .align 5 + .pushsection .idmap.text, "ax" +ENTRY(cpu_mohawk_reset) + mov ip, #0 + mcr p15, 0, ip, c7, c7, 0 @ invalidate I,D caches + mcr p15, 0, ip, c7, c10, 4 @ drain WB + mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs + mrc p15, 0, ip, c1, c0, 0 @ ctrl register + bic ip, ip, #0x0007 @ .............cam + bic ip, ip, #0x1100 @ ...i...s........ + mcr p15, 0, ip, c1, c0, 0 @ ctrl register + mov pc, r0 +ENDPROC(cpu_mohawk_reset) + .popsection + +/* + * cpu_mohawk_do_idle() + * + * Called with IRQs disabled + */ + .align 5 +ENTRY(cpu_mohawk_do_idle) + mov r0, #0 + mcr p15, 0, r0, c7, c10, 4 @ drain write buffer + mcr p15, 0, r0, c7, c0, 4 @ wait for interrupt + mov pc, lr + +/* + * flush_icache_all() + * + * Unconditionally clean and invalidate the entire icache. + */ +ENTRY(mohawk_flush_icache_all) + mov r0, #0 + mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache + mov pc, lr +ENDPROC(mohawk_flush_icache_all) + +/* + * flush_user_cache_all() + * + * Clean and invalidate all cache entries in a particular + * address space. + */ +ENTRY(mohawk_flush_user_cache_all) + /* FALLTHROUGH */ + +/* + * flush_kern_cache_all() + * + * Clean and invalidate the entire cache. + */ +ENTRY(mohawk_flush_kern_cache_all) + mov r2, #VM_EXEC + mov ip, #0 +__flush_whole_cache: + mcr p15, 0, ip, c7, c14, 0 @ clean & invalidate all D cache + tst r2, #VM_EXEC + mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache + mcrne p15, 0, ip, c7, c10, 0 @ drain write buffer + mov pc, lr + +/* + * flush_user_cache_range(start, end, flags) + * + * Clean and invalidate a range of cache entries in the + * specified address range. + * + * - start - start address (inclusive) + * - end - end address (exclusive) + * - flags - vm_flags describing address space + * + * (same as arm926) + */ +ENTRY(mohawk_flush_user_cache_range) + mov ip, #0 + sub r3, r1, r0 @ calculate total size + cmp r3, #CACHE_DLIMIT + bgt __flush_whole_cache +1: tst r2, #VM_EXEC + mcr p15, 0, r0, c7, c14, 1 @ clean and invalidate D entry + mcrne p15, 0, r0, c7, c5, 1 @ invalidate I entry + add r0, r0, #CACHE_DLINESIZE + mcr p15, 0, r0, c7, c14, 1 @ clean and invalidate D entry + mcrne p15, 0, r0, c7, c5, 1 @ invalidate I entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + tst r2, #VM_EXEC + mcrne p15, 0, ip, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * coherent_kern_range(start, end) + * + * Ensure coherency between the Icache and the Dcache in the + * region described by start, end. If you have non-snooping + * Harvard caches, you need to implement this function. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(mohawk_coherent_kern_range) + /* FALLTHROUGH */ + +/* + * coherent_user_range(start, end) + * + * Ensure coherency between the Icache and the Dcache in the + * region described by start, end. If you have non-snooping + * Harvard caches, you need to implement this function. + * + * - start - virtual start address + * - end - virtual end address + * + * (same as arm926) + */ +ENTRY(mohawk_coherent_user_range) + bic r0, r0, #CACHE_DLINESIZE - 1 +1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry + mcr p15, 0, r0, c7, c5, 1 @ invalidate I entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mcr p15, 0, r0, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * flush_kern_dcache_area(void *addr, size_t size) + * + * Ensure no D cache aliasing occurs, either with itself or + * the I cache + * + * - addr - kernel address + * - size - region size + */ +ENTRY(mohawk_flush_kern_dcache_area) + add r1, r0, r1 +1: mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mov r0, #0 + mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache + mcr p15, 0, r0, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * dma_inv_range(start, end) + * + * Invalidate (discard) the specified virtual address range. + * May not write back any entries. If 'start' or 'end' + * are not cache line aligned, those lines must be written + * back. + * + * - start - virtual start address + * - end - virtual end address + * + * (same as v4wb) + */ +mohawk_dma_inv_range: + tst r0, #CACHE_DLINESIZE - 1 + mcrne p15, 0, r0, c7, c10, 1 @ clean D entry + tst r1, #CACHE_DLINESIZE - 1 + mcrne p15, 0, r1, c7, c10, 1 @ clean D entry + bic r0, r0, #CACHE_DLINESIZE - 1 +1: mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mcr p15, 0, r0, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * dma_clean_range(start, end) + * + * Clean the specified virtual address range. + * + * - start - virtual start address + * - end - virtual end address + * + * (same as v4wb) + */ +mohawk_dma_clean_range: + bic r0, r0, #CACHE_DLINESIZE - 1 +1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mcr p15, 0, r0, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * dma_flush_range(start, end) + * + * Clean and invalidate the specified virtual address range. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(mohawk_dma_flush_range) + bic r0, r0, #CACHE_DLINESIZE - 1 +1: + mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mcr p15, 0, r0, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * dma_map_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(mohawk_dma_map_area) + add r1, r1, r0 + cmp r2, #DMA_TO_DEVICE + beq mohawk_dma_clean_range + bcs mohawk_dma_inv_range + b mohawk_dma_flush_range +ENDPROC(mohawk_dma_map_area) + +/* + * dma_unmap_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(mohawk_dma_unmap_area) + mov pc, lr +ENDPROC(mohawk_dma_unmap_area) + + @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) + define_cache_functions mohawk + +ENTRY(cpu_mohawk_dcache_clean_area) +1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry + add r0, r0, #CACHE_DLINESIZE + subs r1, r1, #CACHE_DLINESIZE + bhi 1b + mcr p15, 0, r0, c7, c10, 4 @ drain WB + mov pc, lr + +/* + * cpu_mohawk_switch_mm(pgd) + * + * Set the translation base pointer to be as described by pgd. + * + * pgd: new page tables + */ + .align 5 +ENTRY(cpu_mohawk_switch_mm) + mov ip, #0 + mcr p15, 0, ip, c7, c14, 0 @ clean & invalidate all D cache + mcr p15, 0, ip, c7, c5, 0 @ invalidate I cache + mcr p15, 0, ip, c7, c10, 4 @ drain WB + orr r0, r0, #0x18 @ cache the page table in L2 + mcr p15, 0, r0, c2, c0, 0 @ load page table pointer + mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs + mov pc, lr + +/* + * cpu_mohawk_set_pte_ext(ptep, pte, ext) + * + * Set a PTE and flush it out + */ + .align 5 +ENTRY(cpu_mohawk_set_pte_ext) + armv3_set_pte_ext + mov r0, r0 + mcr p15, 0, r0, c7, c10, 1 @ clean D entry + mcr p15, 0, r0, c7, c10, 4 @ drain WB + mov pc, lr + + __CPUINIT + + .type __mohawk_setup, #function +__mohawk_setup: + mov r0, #0 + mcr p15, 0, r0, c7, c7 @ invalidate I,D caches + mcr p15, 0, r0, c7, c10, 4 @ drain write buffer + mcr p15, 0, r0, c8, c7 @ invalidate I,D TLBs + orr r4, r4, #0x18 @ cache the page table in L2 + mcr p15, 0, r4, c2, c0, 0 @ load page table pointer + + mov r0, #0 @ don't allow CP access + mcr p15, 0, r0, c15, c1, 0 @ write CP access register + + adr r5, mohawk_crval + ldmia r5, {r5, r6} + mrc p15, 0, r0, c1, c0 @ get control register + bic r0, r0, r5 + orr r0, r0, r6 + mov pc, lr + + .size __mohawk_setup, . - __mohawk_setup + + /* + * R + * .RVI ZFRS BLDP WCAM + * .011 1001 ..00 0101 + * + */ + .type mohawk_crval, #object +mohawk_crval: + crval clear=0x00007f3f, mmuset=0x00003905, ucset=0x00001134 + + __INITDATA + + @ define struct processor (see <asm/proc-fns.h> and proc-macros.S) + define_processor_functions mohawk, dabort=v5t_early_abort, pabort=legacy_pabort + + .section ".rodata" + + string cpu_arch_name, "armv5te" + string cpu_elf_name, "v5" + string cpu_mohawk_name, "Marvell 88SV331x" + + .align + + .section ".proc.info.init", #alloc, #execinstr + + .type __88sv331x_proc_info,#object +__88sv331x_proc_info: + .long 0x56158000 @ Marvell 88SV331x (MOHAWK) + .long 0xfffff000 + .long PMD_TYPE_SECT | \ + PMD_SECT_BUFFERABLE | \ + PMD_SECT_CACHEABLE | \ + PMD_BIT4 | \ + PMD_SECT_AP_WRITE | \ + PMD_SECT_AP_READ + .long PMD_TYPE_SECT | \ + PMD_BIT4 | \ + PMD_SECT_AP_WRITE | \ + PMD_SECT_AP_READ + b __mohawk_setup + .long cpu_arch_name + .long cpu_elf_name + .long HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP + .long cpu_mohawk_name + .long mohawk_processor_functions + .long v4wbi_tlb_fns + .long v4wb_user_fns + .long mohawk_cache_fns + .size __88sv331x_proc_info, . - __88sv331x_proc_info diff --git a/arch/arm/mm/proc-sa110.S b/arch/arm/mm/proc-sa110.S new file mode 100644 index 00000000..775d70fb --- /dev/null +++ b/arch/arm/mm/proc-sa110.S @@ -0,0 +1,227 @@ +/* + * linux/arch/arm/mm/proc-sa110.S + * + * Copyright (C) 1997-2002 Russell King + * hacked for non-paged-MM by Hyok S. Choi, 2003. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * MMU functions for SA110 + * + * These are the low level assembler for performing cache and TLB + * functions on the StrongARM-110. + */ +#include <linux/linkage.h> +#include <linux/init.h> +#include <asm/assembler.h> +#include <asm/asm-offsets.h> +#include <asm/hwcap.h> +#include <mach/hardware.h> +#include <asm/pgtable-hwdef.h> +#include <asm/pgtable.h> +#include <asm/ptrace.h> + +#include "proc-macros.S" + +/* + * the cache line size of the I and D cache + */ +#define DCACHELINESIZE 32 + + .text + +/* + * cpu_sa110_proc_init() + */ +ENTRY(cpu_sa110_proc_init) + mov r0, #0 + mcr p15, 0, r0, c15, c1, 2 @ Enable clock switching + mov pc, lr + +/* + * cpu_sa110_proc_fin() + */ +ENTRY(cpu_sa110_proc_fin) + mov r0, #0 + mcr p15, 0, r0, c15, c2, 2 @ Disable clock switching + mrc p15, 0, r0, c1, c0, 0 @ ctrl register + bic r0, r0, #0x1000 @ ...i............ + bic r0, r0, #0x000e @ ............wca. + mcr p15, 0, r0, c1, c0, 0 @ disable caches + mov pc, lr + +/* + * cpu_sa110_reset(loc) + * + * Perform a soft reset of the system. Put the CPU into the + * same state as it would be if it had been reset, and branch + * to what would be the reset vector. + * + * loc: location to jump to for soft reset + */ + .align 5 + .pushsection .idmap.text, "ax" +ENTRY(cpu_sa110_reset) + mov ip, #0 + mcr p15, 0, ip, c7, c7, 0 @ invalidate I,D caches + mcr p15, 0, ip, c7, c10, 4 @ drain WB +#ifdef CONFIG_MMU + mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs +#endif + mrc p15, 0, ip, c1, c0, 0 @ ctrl register + bic ip, ip, #0x000f @ ............wcam + bic ip, ip, #0x1100 @ ...i...s........ + mcr p15, 0, ip, c1, c0, 0 @ ctrl register + mov pc, r0 +ENDPROC(cpu_sa110_reset) + .popsection + +/* + * cpu_sa110_do_idle(type) + * + * Cause the processor to idle + * + * type: call type: + * 0 = slow idle + * 1 = fast idle + * 2 = switch to slow processor clock + * 3 = switch to fast processor clock + */ + .align 5 + +ENTRY(cpu_sa110_do_idle) + mcr p15, 0, ip, c15, c2, 2 @ disable clock switching + ldr r1, =UNCACHEABLE_ADDR @ load from uncacheable loc + ldr r1, [r1, #0] @ force switch to MCLK + mov r0, r0 @ safety + mov r0, r0 @ safety + mov r0, r0 @ safety + mcr p15, 0, r0, c15, c8, 2 @ Wait for interrupt, cache aligned + mov r0, r0 @ safety + mov r0, r0 @ safety + mov r0, r0 @ safety + mcr p15, 0, r0, c15, c1, 2 @ enable clock switching + mov pc, lr + +/* ================================= CACHE ================================ */ + +/* + * cpu_sa110_dcache_clean_area(addr,sz) + * + * Clean the specified entry of any caches such that the MMU + * translation fetches will obtain correct data. + * + * addr: cache-unaligned virtual address + */ + .align 5 +ENTRY(cpu_sa110_dcache_clean_area) +1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry + add r0, r0, #DCACHELINESIZE + subs r1, r1, #DCACHELINESIZE + bhi 1b + mov pc, lr + +/* =============================== PageTable ============================== */ + +/* + * cpu_sa110_switch_mm(pgd) + * + * Set the translation base pointer to be as described by pgd. + * + * pgd: new page tables + */ + .align 5 +ENTRY(cpu_sa110_switch_mm) +#ifdef CONFIG_MMU + str lr, [sp, #-4]! + bl v4wb_flush_kern_cache_all @ clears IP + mcr p15, 0, r0, c2, c0, 0 @ load page table pointer + mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs + ldr pc, [sp], #4 +#else + mov pc, lr +#endif + +/* + * cpu_sa110_set_pte_ext(ptep, pte, ext) + * + * Set a PTE and flush it out + */ + .align 5 +ENTRY(cpu_sa110_set_pte_ext) +#ifdef CONFIG_MMU + armv3_set_pte_ext wc_disable=0 + mov r0, r0 + mcr p15, 0, r0, c7, c10, 1 @ clean D entry + mcr p15, 0, r0, c7, c10, 4 @ drain WB +#endif + mov pc, lr + + __CPUINIT + + .type __sa110_setup, #function +__sa110_setup: + mov r10, #0 + mcr p15, 0, r10, c7, c7 @ invalidate I,D caches on v4 + mcr p15, 0, r10, c7, c10, 4 @ drain write buffer on v4 +#ifdef CONFIG_MMU + mcr p15, 0, r10, c8, c7 @ invalidate I,D TLBs on v4 +#endif + + adr r5, sa110_crval + ldmia r5, {r5, r6} + mrc p15, 0, r0, c1, c0 @ get control register v4 + bic r0, r0, r5 + orr r0, r0, r6 + mov pc, lr + .size __sa110_setup, . - __sa110_setup + + /* + * R + * .RVI ZFRS BLDP WCAM + * ..01 0001 ..11 1101 + * + */ + .type sa110_crval, #object +sa110_crval: + crval clear=0x00003f3f, mmuset=0x0000113d, ucset=0x00001130 + + __INITDATA + + @ define struct processor (see <asm/proc-fns.h> and proc-macros.S) + define_processor_functions sa110, dabort=v4_early_abort, pabort=legacy_pabort + + .section ".rodata" + + string cpu_arch_name, "armv4" + string cpu_elf_name, "v4" + string cpu_sa110_name, "StrongARM-110" + + .align + + .section ".proc.info.init", #alloc, #execinstr + + .type __sa110_proc_info,#object +__sa110_proc_info: + .long 0x4401a100 + .long 0xfffffff0 + .long PMD_TYPE_SECT | \ + PMD_SECT_BUFFERABLE | \ + PMD_SECT_CACHEABLE | \ + PMD_SECT_AP_WRITE | \ + PMD_SECT_AP_READ + .long PMD_TYPE_SECT | \ + PMD_SECT_AP_WRITE | \ + PMD_SECT_AP_READ + b __sa110_setup + .long cpu_arch_name + .long cpu_elf_name + .long HWCAP_SWP | HWCAP_HALF | HWCAP_26BIT | HWCAP_FAST_MULT + .long cpu_sa110_name + .long sa110_processor_functions + .long v4wb_tlb_fns + .long v4wb_user_fns + .long v4wb_cache_fns + .size __sa110_proc_info, . - __sa110_proc_info diff --git a/arch/arm/mm/proc-sa1100.S b/arch/arm/mm/proc-sa1100.S new file mode 100644 index 00000000..3aa0da11 --- /dev/null +++ b/arch/arm/mm/proc-sa1100.S @@ -0,0 +1,275 @@ +/* + * linux/arch/arm/mm/proc-sa1100.S + * + * Copyright (C) 1997-2002 Russell King + * hacked for non-paged-MM by Hyok S. Choi, 2003. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * MMU functions for SA110 + * + * These are the low level assembler for performing cache and TLB + * functions on the StrongARM-1100 and StrongARM-1110. + * + * Note that SA1100 and SA1110 share everything but their name and CPU ID. + * + * 12-jun-2000, Erik Mouw (J.A.K.Mouw@its.tudelft.nl): + * Flush the read buffer at context switches + */ +#include <linux/linkage.h> +#include <linux/init.h> +#include <asm/assembler.h> +#include <asm/asm-offsets.h> +#include <asm/hwcap.h> +#include <mach/hardware.h> +#include <asm/pgtable-hwdef.h> +#include <asm/pgtable.h> + +#include "proc-macros.S" + +/* + * the cache line size of the I and D cache + */ +#define DCACHELINESIZE 32 + + .section .text + +/* + * cpu_sa1100_proc_init() + */ +ENTRY(cpu_sa1100_proc_init) + mov r0, #0 + mcr p15, 0, r0, c15, c1, 2 @ Enable clock switching + mcr p15, 0, r0, c9, c0, 5 @ Allow read-buffer operations from userland + mov pc, lr + +/* + * cpu_sa1100_proc_fin() + * + * Prepare the CPU for reset: + * - Disable interrupts + * - Clean and turn off caches. + */ +ENTRY(cpu_sa1100_proc_fin) + mcr p15, 0, ip, c15, c2, 2 @ Disable clock switching + mrc p15, 0, r0, c1, c0, 0 @ ctrl register + bic r0, r0, #0x1000 @ ...i............ + bic r0, r0, #0x000e @ ............wca. + mcr p15, 0, r0, c1, c0, 0 @ disable caches + mov pc, lr + +/* + * cpu_sa1100_reset(loc) + * + * Perform a soft reset of the system. Put the CPU into the + * same state as it would be if it had been reset, and branch + * to what would be the reset vector. + * + * loc: location to jump to for soft reset + */ + .align 5 + .pushsection .idmap.text, "ax" +ENTRY(cpu_sa1100_reset) + mov ip, #0 + mcr p15, 0, ip, c7, c7, 0 @ invalidate I,D caches + mcr p15, 0, ip, c7, c10, 4 @ drain WB +#ifdef CONFIG_MMU + mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs +#endif + mrc p15, 0, ip, c1, c0, 0 @ ctrl register + bic ip, ip, #0x000f @ ............wcam + bic ip, ip, #0x1100 @ ...i...s........ + mcr p15, 0, ip, c1, c0, 0 @ ctrl register + mov pc, r0 +ENDPROC(cpu_sa1100_reset) + .popsection + +/* + * cpu_sa1100_do_idle(type) + * + * Cause the processor to idle + * + * type: call type: + * 0 = slow idle + * 1 = fast idle + * 2 = switch to slow processor clock + * 3 = switch to fast processor clock + */ + .align 5 +ENTRY(cpu_sa1100_do_idle) + mov r0, r0 @ 4 nop padding + mov r0, r0 + mov r0, r0 + mov r0, r0 @ 4 nop padding + mov r0, r0 + mov r0, r0 + mov r0, #0 + ldr r1, =UNCACHEABLE_ADDR @ ptr to uncacheable address + @ --- aligned to a cache line + mcr p15, 0, r0, c15, c2, 2 @ disable clock switching + ldr r1, [r1, #0] @ force switch to MCLK + mcr p15, 0, r0, c15, c8, 2 @ wait for interrupt + mov r0, r0 @ safety + mcr p15, 0, r0, c15, c1, 2 @ enable clock switching + mov pc, lr + +/* ================================= CACHE ================================ */ + +/* + * cpu_sa1100_dcache_clean_area(addr,sz) + * + * Clean the specified entry of any caches such that the MMU + * translation fetches will obtain correct data. + * + * addr: cache-unaligned virtual address + */ + .align 5 +ENTRY(cpu_sa1100_dcache_clean_area) +1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry + add r0, r0, #DCACHELINESIZE + subs r1, r1, #DCACHELINESIZE + bhi 1b + mov pc, lr + +/* =============================== PageTable ============================== */ + +/* + * cpu_sa1100_switch_mm(pgd) + * + * Set the translation base pointer to be as described by pgd. + * + * pgd: new page tables + */ + .align 5 +ENTRY(cpu_sa1100_switch_mm) +#ifdef CONFIG_MMU + str lr, [sp, #-4]! + bl v4wb_flush_kern_cache_all @ clears IP + mcr p15, 0, ip, c9, c0, 0 @ invalidate RB + mcr p15, 0, r0, c2, c0, 0 @ load page table pointer + mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs + ldr pc, [sp], #4 +#else + mov pc, lr +#endif + +/* + * cpu_sa1100_set_pte_ext(ptep, pte, ext) + * + * Set a PTE and flush it out + */ + .align 5 +ENTRY(cpu_sa1100_set_pte_ext) +#ifdef CONFIG_MMU + armv3_set_pte_ext wc_disable=0 + mov r0, r0 + mcr p15, 0, r0, c7, c10, 1 @ clean D entry + mcr p15, 0, r0, c7, c10, 4 @ drain WB +#endif + mov pc, lr + +.globl cpu_sa1100_suspend_size +.equ cpu_sa1100_suspend_size, 4 * 3 +#ifdef CONFIG_PM_SLEEP +ENTRY(cpu_sa1100_do_suspend) + stmfd sp!, {r4 - r6, lr} + mrc p15, 0, r4, c3, c0, 0 @ domain ID + mrc p15, 0, r5, c13, c0, 0 @ PID + mrc p15, 0, r6, c1, c0, 0 @ control reg + stmia r0, {r4 - r6} @ store cp regs + ldmfd sp!, {r4 - r6, pc} +ENDPROC(cpu_sa1100_do_suspend) + +ENTRY(cpu_sa1100_do_resume) + ldmia r0, {r4 - r6} @ load cp regs + mov ip, #0 + mcr p15, 0, ip, c8, c7, 0 @ flush I+D TLBs + mcr p15, 0, ip, c7, c7, 0 @ flush I&D cache + mcr p15, 0, ip, c9, c0, 0 @ invalidate RB + mcr p15, 0, ip, c9, c0, 5 @ allow user space to use RB + + mcr p15, 0, r4, c3, c0, 0 @ domain ID + mcr p15, 0, r1, c2, c0, 0 @ translation table base addr + mcr p15, 0, r5, c13, c0, 0 @ PID + mov r0, r6 @ control register + b cpu_resume_mmu +ENDPROC(cpu_sa1100_do_resume) +#endif + + __CPUINIT + + .type __sa1100_setup, #function +__sa1100_setup: + mov r0, #0 + mcr p15, 0, r0, c7, c7 @ invalidate I,D caches on v4 + mcr p15, 0, r0, c7, c10, 4 @ drain write buffer on v4 +#ifdef CONFIG_MMU + mcr p15, 0, r0, c8, c7 @ invalidate I,D TLBs on v4 +#endif + adr r5, sa1100_crval + ldmia r5, {r5, r6} + mrc p15, 0, r0, c1, c0 @ get control register v4 + bic r0, r0, r5 + orr r0, r0, r6 + mov pc, lr + .size __sa1100_setup, . - __sa1100_setup + + /* + * R + * .RVI ZFRS BLDP WCAM + * ..11 0001 ..11 1101 + * + */ + .type sa1100_crval, #object +sa1100_crval: + crval clear=0x00003f3f, mmuset=0x0000313d, ucset=0x00001130 + + __INITDATA + +/* + * SA1100 and SA1110 share the same function calls + */ + + @ define struct processor (see <asm/proc-fns.h> and proc-macros.S) + define_processor_functions sa1100, dabort=v4_early_abort, pabort=legacy_pabort, suspend=1 + + .section ".rodata" + + string cpu_arch_name, "armv4" + string cpu_elf_name, "v4" + string cpu_sa1100_name, "StrongARM-1100" + string cpu_sa1110_name, "StrongARM-1110" + + .align + + .section ".proc.info.init", #alloc, #execinstr + +.macro sa1100_proc_info name:req, cpu_val:req, cpu_mask:req, cpu_name:req + .type __\name\()_proc_info,#object +__\name\()_proc_info: + .long \cpu_val + .long \cpu_mask + .long PMD_TYPE_SECT | \ + PMD_SECT_BUFFERABLE | \ + PMD_SECT_CACHEABLE | \ + PMD_SECT_AP_WRITE | \ + PMD_SECT_AP_READ + .long PMD_TYPE_SECT | \ + PMD_SECT_AP_WRITE | \ + PMD_SECT_AP_READ + b __sa1100_setup + .long cpu_arch_name + .long cpu_elf_name + .long HWCAP_SWP | HWCAP_HALF | HWCAP_26BIT | HWCAP_FAST_MULT + .long \cpu_name + .long sa1100_processor_functions + .long v4wb_tlb_fns + .long v4_mc_user_fns + .long v4wb_cache_fns + .size __\name\()_proc_info, . - __\name\()_proc_info +.endm + + sa1100_proc_info sa1100, 0x4401a110, 0xfffffff0, cpu_sa1100_name + sa1100_proc_info sa1110, 0x6901b110, 0xfffffff0, cpu_sa1110_name diff --git a/arch/arm/mm/proc-syms.c b/arch/arm/mm/proc-syms.c new file mode 100644 index 00000000..3e6210b4 --- /dev/null +++ b/arch/arm/mm/proc-syms.c @@ -0,0 +1,51 @@ +/* + * linux/arch/arm/mm/proc-syms.c + * + * Copyright (C) 2000-2002 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/module.h> +#include <linux/mm.h> + +#include <asm/cacheflush.h> +#include <asm/proc-fns.h> +#include <asm/tlbflush.h> +#include <asm/page.h> + +#ifndef MULTI_CPU +EXPORT_SYMBOL(cpu_dcache_clean_area); +EXPORT_SYMBOL(cpu_set_pte_ext); +#else +EXPORT_SYMBOL(processor); +#endif + +#ifndef MULTI_CACHE +EXPORT_SYMBOL(__cpuc_flush_kern_all); +EXPORT_SYMBOL(__cpuc_flush_user_all); +EXPORT_SYMBOL(__cpuc_flush_user_range); +EXPORT_SYMBOL(__cpuc_coherent_kern_range); +EXPORT_SYMBOL(__cpuc_flush_dcache_area); +#else +EXPORT_SYMBOL(cpu_cache); +#endif + +#ifdef CONFIG_MMU +#ifndef MULTI_USER +EXPORT_SYMBOL(__cpu_clear_user_highpage); +EXPORT_SYMBOL(__cpu_copy_user_highpage); +#else +EXPORT_SYMBOL(cpu_user); +#endif +#endif + +/* + * No module should need to touch the TLB (and currently + * no modules do. We export this for "loadkernel" support + * (booting a new kernel from within a running kernel.) + */ +#ifdef MULTI_TLB +EXPORT_SYMBOL(cpu_tlb); +#endif diff --git a/arch/arm/mm/proc-v6.S b/arch/arm/mm/proc-v6.S new file mode 100644 index 00000000..5900cd52 --- /dev/null +++ b/arch/arm/mm/proc-v6.S @@ -0,0 +1,295 @@ +/* + * linux/arch/arm/mm/proc-v6.S + * + * Copyright (C) 2001 Deep Blue Solutions Ltd. + * Modified by Catalin Marinas for noMMU support + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This is the "shell" of the ARMv6 processor support. + */ +#include <linux/init.h> +#include <linux/linkage.h> +#include <asm/assembler.h> +#include <asm/asm-offsets.h> +#include <asm/hwcap.h> +#include <asm/pgtable-hwdef.h> +#include <asm/pgtable.h> + +#include "proc-macros.S" + +#define D_CACHE_LINE_SIZE 32 + +#define TTB_C (1 << 0) +#define TTB_S (1 << 1) +#define TTB_IMP (1 << 2) +#define TTB_RGN_NC (0 << 3) +#define TTB_RGN_WBWA (1 << 3) +#define TTB_RGN_WT (2 << 3) +#define TTB_RGN_WB (3 << 3) + +#define TTB_FLAGS_UP TTB_RGN_WBWA +#define PMD_FLAGS_UP PMD_SECT_WB +#define TTB_FLAGS_SMP TTB_RGN_WBWA|TTB_S +#define PMD_FLAGS_SMP PMD_SECT_WBWA|PMD_SECT_S + +ENTRY(cpu_v6_proc_init) + mov pc, lr + +ENTRY(cpu_v6_proc_fin) + mrc p15, 0, r0, c1, c0, 0 @ ctrl register + bic r0, r0, #0x1000 @ ...i............ + bic r0, r0, #0x0006 @ .............ca. + mcr p15, 0, r0, c1, c0, 0 @ disable caches + mov pc, lr + +/* + * cpu_v6_reset(loc) + * + * Perform a soft reset of the system. Put the CPU into the + * same state as it would be if it had been reset, and branch + * to what would be the reset vector. + * + * - loc - location to jump to for soft reset + */ + .align 5 + .pushsection .idmap.text, "ax" +ENTRY(cpu_v6_reset) + mrc p15, 0, r1, c1, c0, 0 @ ctrl register + bic r1, r1, #0x1 @ ...............m + mcr p15, 0, r1, c1, c0, 0 @ disable MMU + mov r1, #0 + mcr p15, 0, r1, c7, c5, 4 @ ISB + mov pc, r0 +ENDPROC(cpu_v6_reset) + .popsection + +/* + * cpu_v6_do_idle() + * + * Idle the processor (eg, wait for interrupt). + * + * IRQs are already disabled. + */ +ENTRY(cpu_v6_do_idle) + mov r1, #0 + mcr p15, 0, r1, c7, c10, 4 @ DWB - WFI may enter a low-power mode + mcr p15, 0, r1, c7, c0, 4 @ wait for interrupt + mov pc, lr + +ENTRY(cpu_v6_dcache_clean_area) +#ifndef TLB_CAN_READ_FROM_L1_CACHE +1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry + add r0, r0, #D_CACHE_LINE_SIZE + subs r1, r1, #D_CACHE_LINE_SIZE + bhi 1b +#endif + mov pc, lr + +/* + * cpu_arm926_switch_mm(pgd_phys, tsk) + * + * Set the translation table base pointer to be pgd_phys + * + * - pgd_phys - physical address of new TTB + * + * It is assumed that: + * - we are not using split page tables + */ +ENTRY(cpu_v6_switch_mm) +#ifdef CONFIG_MMU + mov r2, #0 + ldr r1, [r1, #MM_CONTEXT_ID] @ get mm->context.id + ALT_SMP(orr r0, r0, #TTB_FLAGS_SMP) + ALT_UP(orr r0, r0, #TTB_FLAGS_UP) + mcr p15, 0, r2, c7, c5, 6 @ flush BTAC/BTB + mcr p15, 0, r2, c7, c10, 4 @ drain write buffer + mcr p15, 0, r0, c2, c0, 0 @ set TTB 0 + mcr p15, 0, r1, c13, c0, 1 @ set context ID +#endif + mov pc, lr + +/* + * cpu_v6_set_pte_ext(ptep, pte, ext) + * + * Set a level 2 translation table entry. + * + * - ptep - pointer to level 2 translation table entry + * (hardware version is stored at -1024 bytes) + * - pte - PTE value to store + * - ext - value for extended PTE bits + */ + armv6_mt_table cpu_v6 + +ENTRY(cpu_v6_set_pte_ext) +#ifdef CONFIG_MMU + armv6_set_pte_ext cpu_v6 +#endif + mov pc, lr + +/* Suspend/resume support: taken from arch/arm/mach-s3c64xx/sleep.S */ +.globl cpu_v6_suspend_size +.equ cpu_v6_suspend_size, 4 * 6 +#ifdef CONFIG_PM_SLEEP +ENTRY(cpu_v6_do_suspend) + stmfd sp!, {r4 - r9, lr} + mrc p15, 0, r4, c13, c0, 0 @ FCSE/PID + mrc p15, 0, r5, c3, c0, 0 @ Domain ID + mrc p15, 0, r6, c2, c0, 1 @ Translation table base 1 + mrc p15, 0, r7, c1, c0, 1 @ auxiliary control register + mrc p15, 0, r8, c1, c0, 2 @ co-processor access control + mrc p15, 0, r9, c1, c0, 0 @ control register + stmia r0, {r4 - r9} + ldmfd sp!, {r4- r9, pc} +ENDPROC(cpu_v6_do_suspend) + +ENTRY(cpu_v6_do_resume) + mov ip, #0 + mcr p15, 0, ip, c7, c14, 0 @ clean+invalidate D cache + mcr p15, 0, ip, c7, c5, 0 @ invalidate I cache + mcr p15, 0, ip, c7, c15, 0 @ clean+invalidate cache + mcr p15, 0, ip, c7, c10, 4 @ drain write buffer + mcr p15, 0, ip, c13, c0, 1 @ set reserved context ID + ldmia r0, {r4 - r9} + mcr p15, 0, r4, c13, c0, 0 @ FCSE/PID + mcr p15, 0, r5, c3, c0, 0 @ Domain ID + ALT_SMP(orr r1, r1, #TTB_FLAGS_SMP) + ALT_UP(orr r1, r1, #TTB_FLAGS_UP) + mcr p15, 0, r1, c2, c0, 0 @ Translation table base 0 + mcr p15, 0, r6, c2, c0, 1 @ Translation table base 1 + mcr p15, 0, r7, c1, c0, 1 @ auxiliary control register + mcr p15, 0, r8, c1, c0, 2 @ co-processor access control + mcr p15, 0, ip, c2, c0, 2 @ TTB control register + mcr p15, 0, ip, c7, c5, 4 @ ISB + mov r0, r9 @ control register + b cpu_resume_mmu +ENDPROC(cpu_v6_do_resume) +#endif + + string cpu_v6_name, "ARMv6-compatible processor" + + .align + + __CPUINIT + +/* + * __v6_setup + * + * Initialise TLB, Caches, and MMU state ready to switch the MMU + * on. Return in r0 the new CP15 C1 control register setting. + * + * We automatically detect if we have a Harvard cache, and use the + * Harvard cache control instructions insead of the unified cache + * control instructions. + * + * This should be able to cover all ARMv6 cores. + * + * It is assumed that: + * - cache type register is implemented + */ +__v6_setup: +#ifdef CONFIG_SMP + ALT_SMP(mrc p15, 0, r0, c1, c0, 1) @ Enable SMP/nAMP mode + ALT_UP(nop) + orr r0, r0, #0x20 + ALT_SMP(mcr p15, 0, r0, c1, c0, 1) + ALT_UP(nop) +#endif + + mov r0, #0 + mcr p15, 0, r0, c7, c14, 0 @ clean+invalidate D cache + mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache + mcr p15, 0, r0, c7, c15, 0 @ clean+invalidate cache + mcr p15, 0, r0, c7, c10, 4 @ drain write buffer +#ifdef CONFIG_MMU + mcr p15, 0, r0, c8, c7, 0 @ invalidate I + D TLBs + mcr p15, 0, r0, c2, c0, 2 @ TTB control register + ALT_SMP(orr r4, r4, #TTB_FLAGS_SMP) + ALT_UP(orr r4, r4, #TTB_FLAGS_UP) + ALT_SMP(orr r8, r8, #TTB_FLAGS_SMP) + ALT_UP(orr r8, r8, #TTB_FLAGS_UP) + mcr p15, 0, r8, c2, c0, 1 @ load TTB1 +#endif /* CONFIG_MMU */ + adr r5, v6_crval + ldmia r5, {r5, r6} +#ifdef CONFIG_CPU_ENDIAN_BE8 + orr r6, r6, #1 << 25 @ big-endian page tables +#endif + mrc p15, 0, r0, c1, c0, 0 @ read control register + bic r0, r0, r5 @ clear bits them + orr r0, r0, r6 @ set them +#ifdef CONFIG_ARM_ERRATA_364296 + /* + * Workaround for the 364296 ARM1136 r0p2 erratum (possible cache data + * corruption with hit-under-miss enabled). The conditional code below + * (setting the undocumented bit 31 in the auxiliary control register + * and the FI bit in the control register) disables hit-under-miss + * without putting the processor into full low interrupt latency mode. + */ + ldr r6, =0x4107b362 @ id for ARM1136 r0p2 + mrc p15, 0, r5, c0, c0, 0 @ get processor id + teq r5, r6 @ check for the faulty core + mrceq p15, 0, r5, c1, c0, 1 @ load aux control reg + orreq r5, r5, #(1 << 31) @ set the undocumented bit 31 + mcreq p15, 0, r5, c1, c0, 1 @ write aux control reg + orreq r0, r0, #(1 << 21) @ low interrupt latency configuration +#endif + mov pc, lr @ return to head.S:__ret + + /* + * V X F I D LR + * .... ...E PUI. .T.T 4RVI ZFRS BLDP WCAM + * rrrr rrrx xxx0 0101 xxxx xxxx x111 xxxx < forced + * 0 110 0011 1.00 .111 1101 < we want + */ + .type v6_crval, #object +v6_crval: + crval clear=0x01e0fb7f, mmuset=0x00c0387d, ucset=0x00c0187c + + __INITDATA + + @ define struct processor (see <asm/proc-fns.h> and proc-macros.S) + define_processor_functions v6, dabort=v6_early_abort, pabort=v6_pabort, suspend=1 + + .section ".rodata" + + string cpu_arch_name, "armv6" + string cpu_elf_name, "v6" + .align + + .section ".proc.info.init", #alloc, #execinstr + + /* + * Match any ARMv6 processor core. + */ + .type __v6_proc_info, #object +__v6_proc_info: + .long 0x0007b000 + .long 0x0007f000 + ALT_SMP(.long \ + PMD_TYPE_SECT | \ + PMD_SECT_AP_WRITE | \ + PMD_SECT_AP_READ | \ + PMD_FLAGS_SMP) + ALT_UP(.long \ + PMD_TYPE_SECT | \ + PMD_SECT_AP_WRITE | \ + PMD_SECT_AP_READ | \ + PMD_FLAGS_UP) + .long PMD_TYPE_SECT | \ + PMD_SECT_XN | \ + PMD_SECT_AP_WRITE | \ + PMD_SECT_AP_READ + b __v6_setup + .long cpu_arch_name + .long cpu_elf_name + /* See also feat_v6_fixup() for HWCAP_TLS */ + .long HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP|HWCAP_JAVA|HWCAP_TLS + .long cpu_v6_name + .long v6_processor_functions + .long v6wbi_tlb_fns + .long v6_user_fns + .long v6_cache_fns + .size __v6_proc_info, . - __v6_proc_info diff --git a/arch/arm/mm/proc-v7-2level.S b/arch/arm/mm/proc-v7-2level.S new file mode 100644 index 00000000..42ac069c --- /dev/null +++ b/arch/arm/mm/proc-v7-2level.S @@ -0,0 +1,166 @@ +/* + * arch/arm/mm/proc-v7-2level.S + * + * Copyright (C) 2001 Deep Blue Solutions Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#define TTB_S (1 << 1) +#define TTB_RGN_NC (0 << 3) +#define TTB_RGN_OC_WBWA (1 << 3) +#define TTB_RGN_OC_WT (2 << 3) +#define TTB_RGN_OC_WB (3 << 3) +#define TTB_NOS (1 << 5) +#define TTB_IRGN_NC ((0 << 0) | (0 << 6)) +#define TTB_IRGN_WBWA ((0 << 0) | (1 << 6)) +#define TTB_IRGN_WT ((1 << 0) | (0 << 6)) +#define TTB_IRGN_WB ((1 << 0) | (1 << 6)) + +/* PTWs cacheable, inner WB not shareable, outer WB not shareable */ +#define TTB_FLAGS_UP TTB_IRGN_WB|TTB_RGN_OC_WB +#define PMD_FLAGS_UP PMD_SECT_WB + +/* PTWs cacheable, inner WBWA shareable, outer WBWA not shareable */ +#define TTB_FLAGS_SMP TTB_IRGN_WBWA|TTB_S|TTB_NOS|TTB_RGN_OC_WBWA +#define PMD_FLAGS_SMP PMD_SECT_WBWA|PMD_SECT_S + +/* + * cpu_v7_switch_mm(pgd_phys, tsk) + * + * Set the translation table base pointer to be pgd_phys + * + * - pgd_phys - physical address of new TTB + * + * It is assumed that: + * - we are not using split page tables + */ +ENTRY(cpu_v7_switch_mm) +#ifdef CONFIG_MMU + mov r2, #0 + ldr r1, [r1, #MM_CONTEXT_ID] @ get mm->context.id + ALT_SMP(orr r0, r0, #TTB_FLAGS_SMP) + ALT_UP(orr r0, r0, #TTB_FLAGS_UP) +#ifdef CONFIG_ARM_ERRATA_430973 + mcr p15, 0, r2, c7, c5, 6 @ flush BTAC/BTB +#endif +#ifdef CONFIG_ARM_ERRATA_754322 + dsb +#endif + mcr p15, 0, r1, c13, c0, 1 @ set context ID + isb + mcr p15, 0, r0, c2, c0, 0 @ set TTB 0 + isb +#endif + mov pc, lr +ENDPROC(cpu_v7_switch_mm) + +/* + * cpu_v7_set_pte_ext(ptep, pte) + * + * Set a level 2 translation table entry. + * + * - ptep - pointer to level 2 translation table entry + * (hardware version is stored at +2048 bytes) + * - pte - PTE value to store + * - ext - value for extended PTE bits + */ +ENTRY(cpu_v7_set_pte_ext) +#ifdef CONFIG_MMU + str r1, [r0] @ linux version + + bic r3, r1, #0x000003f0 + bic r3, r3, #PTE_TYPE_MASK + orr r3, r3, r2 + orr r3, r3, #PTE_EXT_AP0 | 2 + + tst r1, #1 << 4 + orrne r3, r3, #PTE_EXT_TEX(1) + + eor r1, r1, #L_PTE_DIRTY + tst r1, #L_PTE_RDONLY | L_PTE_DIRTY + orrne r3, r3, #PTE_EXT_APX + + tst r1, #L_PTE_USER + orrne r3, r3, #PTE_EXT_AP1 +#ifdef CONFIG_CPU_USE_DOMAINS + @ allow kernel read/write access to read-only user pages + tstne r3, #PTE_EXT_APX + bicne r3, r3, #PTE_EXT_APX | PTE_EXT_AP0 +#endif + + tst r1, #L_PTE_XN + orrne r3, r3, #PTE_EXT_XN + + tst r1, #L_PTE_YOUNG + tstne r1, #L_PTE_PRESENT + moveq r3, #0 + + ARM( str r3, [r0, #2048]! ) + THUMB( add r0, r0, #2048 ) + THUMB( str r3, [r0] ) + mcr p15, 0, r0, c7, c10, 1 @ flush_pte +#endif + mov pc, lr +ENDPROC(cpu_v7_set_pte_ext) + + /* + * Memory region attributes with SCTLR.TRE=1 + * + * n = TEX[0],C,B + * TR = PRRR[2n+1:2n] - memory type + * IR = NMRR[2n+1:2n] - inner cacheable property + * OR = NMRR[2n+17:2n+16] - outer cacheable property + * + * n TR IR OR + * UNCACHED 000 00 + * BUFFERABLE 001 10 00 00 + * WRITETHROUGH 010 10 10 10 + * WRITEBACK 011 10 11 11 + * reserved 110 + * WRITEALLOC 111 10 01 01 + * DEV_SHARED 100 01 + * DEV_NONSHARED 100 01 + * DEV_WC 001 10 + * DEV_CACHED 011 10 + * + * Other attributes: + * + * DS0 = PRRR[16] = 0 - device shareable property + * DS1 = PRRR[17] = 1 - device shareable property + * NS0 = PRRR[18] = 0 - normal shareable property + * NS1 = PRRR[19] = 1 - normal shareable property + * NOS = PRRR[24+n] = 1 - not outer shareable + */ +.equ PRRR, 0xff0a81a8 +.equ NMRR, 0x40e040e0 + + /* + * Macro for setting up the TTBRx and TTBCR registers. + * - \ttb0 and \ttb1 updated with the corresponding flags. + */ + .macro v7_ttb_setup, zero, ttbr0, ttbr1, tmp + mcr p15, 0, \zero, c2, c0, 2 @ TTB control register + ALT_SMP(orr \ttbr0, \ttbr0, #TTB_FLAGS_SMP) + ALT_UP(orr \ttbr0, \ttbr0, #TTB_FLAGS_UP) + ALT_SMP(orr \ttbr1, \ttbr1, #TTB_FLAGS_SMP) + ALT_UP(orr \ttbr1, \ttbr1, #TTB_FLAGS_UP) + mcr p15, 0, \ttbr1, c2, c0, 1 @ load TTB1 + .endm + + __CPUINIT + + /* AT + * TFR EV X F I D LR S + * .EEE ..EE PUI. .T.T 4RVI ZWRS BLDP WCAM + * rxxx rrxx xxx0 0101 xxxx xxxx x111 xxxx < forced + * 1 0 110 0011 1100 .111 1101 < we want + */ + .align 2 + .type v7_crval, #object +v7_crval: + crval clear=0x0120c302, mmuset=0x10c03c7d, ucset=0x00c01c7c + + .previous diff --git a/arch/arm/mm/proc-v7-3level.S b/arch/arm/mm/proc-v7-3level.S new file mode 100644 index 00000000..8de0f1dd --- /dev/null +++ b/arch/arm/mm/proc-v7-3level.S @@ -0,0 +1,150 @@ +/* + * arch/arm/mm/proc-v7-3level.S + * + * Copyright (C) 2001 Deep Blue Solutions Ltd. + * Copyright (C) 2011 ARM Ltd. + * Author: Catalin Marinas <catalin.marinas@arm.com> + * based on arch/arm/mm/proc-v7-2level.S + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#define TTB_IRGN_NC (0 << 8) +#define TTB_IRGN_WBWA (1 << 8) +#define TTB_IRGN_WT (2 << 8) +#define TTB_IRGN_WB (3 << 8) +#define TTB_RGN_NC (0 << 10) +#define TTB_RGN_OC_WBWA (1 << 10) +#define TTB_RGN_OC_WT (2 << 10) +#define TTB_RGN_OC_WB (3 << 10) +#define TTB_S (3 << 12) +#define TTB_EAE (1 << 31) + +/* PTWs cacheable, inner WB not shareable, outer WB not shareable */ +#define TTB_FLAGS_UP (TTB_IRGN_WB|TTB_RGN_OC_WB) +#define PMD_FLAGS_UP (PMD_SECT_WB) + +/* PTWs cacheable, inner WBWA shareable, outer WBWA not shareable */ +#define TTB_FLAGS_SMP (TTB_IRGN_WBWA|TTB_S|TTB_RGN_OC_WBWA) +#define PMD_FLAGS_SMP (PMD_SECT_WBWA|PMD_SECT_S) + +/* + * cpu_v7_switch_mm(pgd_phys, tsk) + * + * Set the translation table base pointer to be pgd_phys (physical address of + * the new TTB). + */ +ENTRY(cpu_v7_switch_mm) +#ifdef CONFIG_MMU + ldr r1, [r1, #MM_CONTEXT_ID] @ get mm->context.id + and r3, r1, #0xff + mov r3, r3, lsl #(48 - 32) @ ASID + mcrr p15, 0, r0, r3, c2 @ set TTB 0 + isb +#endif + mov pc, lr +ENDPROC(cpu_v7_switch_mm) + +/* + * cpu_v7_set_pte_ext(ptep, pte) + * + * Set a level 2 translation table entry. + * - ptep - pointer to level 3 translation table entry + * - pte - PTE value to store (64-bit in r2 and r3) + */ +ENTRY(cpu_v7_set_pte_ext) +#ifdef CONFIG_MMU + tst r2, #L_PTE_PRESENT + beq 1f + tst r3, #1 << (55 - 32) @ L_PTE_DIRTY + orreq r2, #L_PTE_RDONLY +1: strd r2, r3, [r0] + mcr p15, 0, r0, c7, c10, 1 @ flush_pte +#endif + mov pc, lr +ENDPROC(cpu_v7_set_pte_ext) + + /* + * Memory region attributes for LPAE (defined in pgtable-3level.h): + * + * n = AttrIndx[2:0] + * + * n MAIR + * UNCACHED 000 00000000 + * BUFFERABLE 001 01000100 + * DEV_WC 001 01000100 + * WRITETHROUGH 010 10101010 + * WRITEBACK 011 11101110 + * DEV_CACHED 011 11101110 + * DEV_SHARED 100 00000100 + * DEV_NONSHARED 100 00000100 + * unused 101 + * unused 110 + * WRITEALLOC 111 11111111 + */ +.equ PRRR, 0xeeaa4400 @ MAIR0 +.equ NMRR, 0xff000004 @ MAIR1 + + /* + * Macro for setting up the TTBRx and TTBCR registers. + * - \ttbr1 updated. + */ + .macro v7_ttb_setup, zero, ttbr0, ttbr1, tmp + ldr \tmp, =swapper_pg_dir @ swapper_pg_dir virtual address + cmp \ttbr1, \tmp @ PHYS_OFFSET > PAGE_OFFSET? (branch below) + mrc p15, 0, \tmp, c2, c0, 2 @ TTB control register + orr \tmp, \tmp, #TTB_EAE + ALT_SMP(orr \tmp, \tmp, #TTB_FLAGS_SMP) + ALT_UP(orr \tmp, \tmp, #TTB_FLAGS_UP) + ALT_SMP(orr \tmp, \tmp, #TTB_FLAGS_SMP << 16) + ALT_UP(orr \tmp, \tmp, #TTB_FLAGS_UP << 16) + /* + * TTBR0/TTBR1 split (PAGE_OFFSET): + * 0x40000000: T0SZ = 2, T1SZ = 0 (not used) + * 0x80000000: T0SZ = 0, T1SZ = 1 + * 0xc0000000: T0SZ = 0, T1SZ = 2 + * + * Only use this feature if PHYS_OFFSET <= PAGE_OFFSET, otherwise + * booting secondary CPUs would end up using TTBR1 for the identity + * mapping set up in TTBR0. + */ + bhi 9001f @ PHYS_OFFSET > PAGE_OFFSET? + orr \tmp, \tmp, #(((PAGE_OFFSET >> 30) - 1) << 16) @ TTBCR.T1SZ +#if defined CONFIG_VMSPLIT_2G + /* PAGE_OFFSET == 0x80000000, T1SZ == 1 */ + add \ttbr1, \ttbr1, #1 << 4 @ skip two L1 entries +#elif defined CONFIG_VMSPLIT_3G + /* PAGE_OFFSET == 0xc0000000, T1SZ == 2 */ + add \ttbr1, \ttbr1, #4096 * (1 + 3) @ only L2 used, skip pgd+3*pmd +#endif + /* CONFIG_VMSPLIT_1G does not need TTBR1 adjustment */ +9001: mcr p15, 0, \tmp, c2, c0, 2 @ TTB control register + mcrr p15, 1, \ttbr1, \zero, c2 @ load TTBR1 + .endm + + __CPUINIT + + /* + * AT + * TFR EV X F IHD LR S + * .EEE ..EE PUI. .TAT 4RVI ZWRS BLDP WCAM + * rxxx rrxx xxx0 0101 xxxx xxxx x111 xxxx < forced + * 11 0 110 1 0011 1100 .111 1101 < we want + */ + .align 2 + .type v7_crval, #object +v7_crval: + crval clear=0x0120c302, mmuset=0x30c23c7d, ucset=0x00c01c7c + + .previous diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S new file mode 100644 index 00000000..c2e2b66f --- /dev/null +++ b/arch/arm/mm/proc-v7.S @@ -0,0 +1,375 @@ +/* + * linux/arch/arm/mm/proc-v7.S + * + * Copyright (C) 2001 Deep Blue Solutions Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This is the "shell" of the ARMv7 processor support. + */ +#include <linux/init.h> +#include <linux/linkage.h> +#include <asm/assembler.h> +#include <asm/asm-offsets.h> +#include <asm/hwcap.h> +#include <asm/pgtable-hwdef.h> +#include <asm/pgtable.h> + +#include "proc-macros.S" + +#ifdef CONFIG_ARM_LPAE +#include "proc-v7-3level.S" +#else +#include "proc-v7-2level.S" +#endif + +ENTRY(cpu_v7_proc_init) + mov pc, lr +ENDPROC(cpu_v7_proc_init) + +ENTRY(cpu_v7_proc_fin) + mrc p15, 0, r0, c1, c0, 0 @ ctrl register + bic r0, r0, #0x1000 @ ...i............ + bic r0, r0, #0x0006 @ .............ca. + mcr p15, 0, r0, c1, c0, 0 @ disable caches + mov pc, lr +ENDPROC(cpu_v7_proc_fin) + +/* + * cpu_v7_reset(loc) + * + * Perform a soft reset of the system. Put the CPU into the + * same state as it would be if it had been reset, and branch + * to what would be the reset vector. + * + * - loc - location to jump to for soft reset + * + * This code must be executed using a flat identity mapping with + * caches disabled. + */ + .align 5 + .pushsection .idmap.text, "ax" +ENTRY(cpu_v7_reset) + mrc p15, 0, r1, c1, c0, 0 @ ctrl register + bic r1, r1, #0x1 @ ...............m + THUMB( bic r1, r1, #1 << 30 ) @ SCTLR.TE (Thumb exceptions) + mcr p15, 0, r1, c1, c0, 0 @ disable MMU + isb + mov pc, r0 +ENDPROC(cpu_v7_reset) + .popsection + +/* + * cpu_v7_do_idle() + * + * Idle the processor (eg, wait for interrupt). + * + * IRQs are already disabled. + */ +ENTRY(cpu_v7_do_idle) + dsb @ WFI may enter a low-power mode + wfi + mov pc, lr +ENDPROC(cpu_v7_do_idle) + +ENTRY(cpu_v7_dcache_clean_area) +#ifndef TLB_CAN_READ_FROM_L1_CACHE + dcache_line_size r2, r3 +1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry + add r0, r0, r2 + subs r1, r1, r2 + bhi 1b + dsb +#endif + mov pc, lr +ENDPROC(cpu_v7_dcache_clean_area) + + string cpu_v7_name, "ARMv7 Processor" + .align + +/* Suspend/resume support: derived from arch/arm/mach-s5pv210/sleep.S */ +.globl cpu_v7_suspend_size +.equ cpu_v7_suspend_size, 4 * 8 +#ifdef CONFIG_ARM_CPU_SUSPEND +ENTRY(cpu_v7_do_suspend) + stmfd sp!, {r4 - r10, lr} + mrc p15, 0, r4, c13, c0, 0 @ FCSE/PID + mrc p15, 0, r5, c13, c0, 3 @ User r/o thread ID + stmia r0!, {r4 - r5} + mrc p15, 0, r6, c3, c0, 0 @ Domain ID + mrc p15, 0, r7, c2, c0, 1 @ TTB 1 + mrc p15, 0, r11, c2, c0, 2 @ TTB control register + mrc p15, 0, r8, c1, c0, 0 @ Control register + mrc p15, 0, r9, c1, c0, 1 @ Auxiliary control register + mrc p15, 0, r10, c1, c0, 2 @ Co-processor access control + stmia r0, {r6 - r11} + ldmfd sp!, {r4 - r10, pc} +ENDPROC(cpu_v7_do_suspend) + +ENTRY(cpu_v7_do_resume) + mov ip, #0 + mcr p15, 0, ip, c8, c7, 0 @ invalidate TLBs + mcr p15, 0, ip, c7, c5, 0 @ invalidate I cache + mcr p15, 0, ip, c13, c0, 1 @ set reserved context ID + ldmia r0!, {r4 - r5} + mcr p15, 0, r4, c13, c0, 0 @ FCSE/PID + mcr p15, 0, r5, c13, c0, 3 @ User r/o thread ID + ldmia r0, {r6 - r11} + mcr p15, 0, r6, c3, c0, 0 @ Domain ID +#ifndef CONFIG_ARM_LPAE + ALT_SMP(orr r1, r1, #TTB_FLAGS_SMP) + ALT_UP(orr r1, r1, #TTB_FLAGS_UP) +#endif + mcr p15, 0, r1, c2, c0, 0 @ TTB 0 + mcr p15, 0, r7, c2, c0, 1 @ TTB 1 + mcr p15, 0, r11, c2, c0, 2 @ TTB control register + mrc p15, 0, r4, c1, c0, 1 @ Read Auxiliary control register + teq r4, r9 @ Is it already set? + mcrne p15, 0, r9, c1, c0, 1 @ No, so write it + mcr p15, 0, r10, c1, c0, 2 @ Co-processor access control + ldr r4, =PRRR @ PRRR + ldr r5, =NMRR @ NMRR + mcr p15, 0, r4, c10, c2, 0 @ write PRRR + mcr p15, 0, r5, c10, c2, 1 @ write NMRR + isb + dsb + mov r0, r8 @ control register + b cpu_resume_mmu +ENDPROC(cpu_v7_do_resume) +#endif + + __CPUINIT + +/* + * __v7_setup + * + * Initialise TLB, Caches, and MMU state ready to switch the MMU + * on. Return in r0 the new CP15 C1 control register setting. + * + * This should be able to cover all ARMv7 cores. + * + * It is assumed that: + * - cache type register is implemented + */ +__v7_ca5mp_setup: +__v7_ca9mp_setup: + mov r10, #(1 << 0) @ TLB ops broadcasting + b 1f +__v7_ca7mp_setup: +__v7_ca15mp_setup: + mov r10, #0 +1: +#ifdef CONFIG_SMP + ALT_SMP(mrc p15, 0, r0, c1, c0, 1) + ALT_UP(mov r0, #(1 << 6)) @ fake it for UP + tst r0, #(1 << 6) @ SMP/nAMP mode enabled? + orreq r0, r0, #(1 << 6) @ Enable SMP/nAMP mode + orreq r0, r0, r10 @ Enable CPU-specific SMP bits + mcreq p15, 0, r0, c1, c0, 1 +#endif +__v7_setup: + adr r12, __v7_setup_stack @ the local stack + stmia r12, {r0-r5, r7, r9, r11, lr} + bl v7_flush_dcache_all + ldmia r12, {r0-r5, r7, r9, r11, lr} + + mrc p15, 0, r0, c0, c0, 0 @ read main ID register + and r10, r0, #0xff000000 @ ARM? + teq r10, #0x41000000 + bne 3f + and r5, r0, #0x00f00000 @ variant + and r6, r0, #0x0000000f @ revision + orr r6, r6, r5, lsr #20-4 @ combine variant and revision + ubfx r0, r0, #4, #12 @ primary part number + + /* Cortex-A8 Errata */ + ldr r10, =0x00000c08 @ Cortex-A8 primary part number + teq r0, r10 + bne 2f +#ifdef CONFIG_ARM_ERRATA_430973 + teq r5, #0x00100000 @ only present in r1p* + mrceq p15, 0, r10, c1, c0, 1 @ read aux control register + orreq r10, r10, #(1 << 6) @ set IBE to 1 + mcreq p15, 0, r10, c1, c0, 1 @ write aux control register +#endif +#ifdef CONFIG_ARM_ERRATA_458693 + teq r6, #0x20 @ only present in r2p0 + mrceq p15, 0, r10, c1, c0, 1 @ read aux control register + orreq r10, r10, #(1 << 5) @ set L1NEON to 1 + orreq r10, r10, #(1 << 9) @ set PLDNOP to 1 + mcreq p15, 0, r10, c1, c0, 1 @ write aux control register +#endif +#ifdef CONFIG_ARM_ERRATA_460075 + teq r6, #0x20 @ only present in r2p0 + mrceq p15, 1, r10, c9, c0, 2 @ read L2 cache aux ctrl register + tsteq r10, #1 << 22 + orreq r10, r10, #(1 << 22) @ set the Write Allocate disable bit + mcreq p15, 1, r10, c9, c0, 2 @ write the L2 cache aux ctrl register +#endif + b 3f + + /* Cortex-A9 Errata */ +2: ldr r10, =0x00000c09 @ Cortex-A9 primary part number + teq r0, r10 + bne 3f +#ifdef CONFIG_ARM_ERRATA_742230 + cmp r6, #0x22 @ only present up to r2p2 + mrcle p15, 0, r10, c15, c0, 1 @ read diagnostic register + orrle r10, r10, #1 << 4 @ set bit #4 + mcrle p15, 0, r10, c15, c0, 1 @ write diagnostic register +#endif +#ifdef CONFIG_ARM_ERRATA_742231 + teq r6, #0x20 @ present in r2p0 + teqne r6, #0x21 @ present in r2p1 + teqne r6, #0x22 @ present in r2p2 + mrceq p15, 0, r10, c15, c0, 1 @ read diagnostic register + orreq r10, r10, #1 << 12 @ set bit #12 + orreq r10, r10, #1 << 22 @ set bit #22 + mcreq p15, 0, r10, c15, c0, 1 @ write diagnostic register +#endif +#ifdef CONFIG_ARM_ERRATA_743622 + teq r5, #0x00200000 @ only present in r2p* + mrceq p15, 0, r10, c15, c0, 1 @ read diagnostic register + orreq r10, r10, #1 << 6 @ set bit #6 + mcreq p15, 0, r10, c15, c0, 1 @ write diagnostic register +#endif +#if defined(CONFIG_ARM_ERRATA_751472) && defined(CONFIG_SMP) + ALT_SMP(cmp r6, #0x30) @ present prior to r3p0 + ALT_UP_B(1f) + mrclt p15, 0, r10, c15, c0, 1 @ read diagnostic register + orrlt r10, r10, #1 << 11 @ set bit #11 + mcrlt p15, 0, r10, c15, c0, 1 @ write diagnostic register +1: +#endif + +3: mov r10, #0 + mcr p15, 0, r10, c7, c5, 0 @ I+BTB cache invalidate + dsb +#ifdef CONFIG_MMU + mcr p15, 0, r10, c8, c7, 0 @ invalidate I + D TLBs + v7_ttb_setup r10, r4, r8, r5 @ TTBCR, TTBRx setup + ldr r5, =PRRR @ PRRR + ldr r6, =NMRR @ NMRR + mcr p15, 0, r5, c10, c2, 0 @ write PRRR + mcr p15, 0, r6, c10, c2, 1 @ write NMRR +#endif +#ifndef CONFIG_ARM_THUMBEE + mrc p15, 0, r0, c0, c1, 0 @ read ID_PFR0 for ThumbEE + and r0, r0, #(0xf << 12) @ ThumbEE enabled field + teq r0, #(1 << 12) @ check if ThumbEE is present + bne 1f + mov r5, #0 + mcr p14, 6, r5, c1, c0, 0 @ Initialize TEEHBR to 0 + mrc p14, 6, r0, c0, c0, 0 @ load TEECR + orr r0, r0, #1 @ set the 1st bit in order to + mcr p14, 6, r0, c0, c0, 0 @ stop userspace TEEHBR access +1: +#endif + adr r5, v7_crval + ldmia r5, {r5, r6} +#ifdef CONFIG_CPU_ENDIAN_BE8 + orr r6, r6, #1 << 25 @ big-endian page tables +#endif +#ifdef CONFIG_SWP_EMULATE + orr r5, r5, #(1 << 10) @ set SW bit in "clear" + bic r6, r6, #(1 << 10) @ clear it in "mmuset" +#endif + mrc p15, 0, r0, c1, c0, 0 @ read control register + bic r0, r0, r5 @ clear bits them + orr r0, r0, r6 @ set them + THUMB( orr r0, r0, #1 << 30 ) @ Thumb exceptions + mov pc, lr @ return to head.S:__ret +ENDPROC(__v7_setup) + + .align 2 +__v7_setup_stack: + .space 4 * 11 @ 11 registers + + __INITDATA + + @ define struct processor (see <asm/proc-fns.h> and proc-macros.S) + define_processor_functions v7, dabort=v7_early_abort, pabort=v7_pabort, suspend=1 + + .section ".rodata" + + string cpu_arch_name, "armv7" + string cpu_elf_name, "v7" + .align + + .section ".proc.info.init", #alloc, #execinstr + + /* + * Standard v7 proc info content + */ +.macro __v7_proc initfunc, mm_mmuflags = 0, io_mmuflags = 0, hwcaps = 0 + ALT_SMP(.long PMD_TYPE_SECT | PMD_SECT_AP_WRITE | PMD_SECT_AP_READ | \ + PMD_SECT_AF | PMD_FLAGS_SMP | \mm_mmuflags) + ALT_UP(.long PMD_TYPE_SECT | PMD_SECT_AP_WRITE | PMD_SECT_AP_READ | \ + PMD_SECT_AF | PMD_FLAGS_UP | \mm_mmuflags) + .long PMD_TYPE_SECT | PMD_SECT_AP_WRITE | \ + PMD_SECT_AP_READ | PMD_SECT_AF | \io_mmuflags + W(b) \initfunc + .long cpu_arch_name + .long cpu_elf_name + .long HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB | HWCAP_FAST_MULT | \ + HWCAP_EDSP | HWCAP_TLS | \hwcaps + .long cpu_v7_name + .long v7_processor_functions + .long v7wbi_tlb_fns + .long v6_user_fns + .long v7_cache_fns +.endm + +#ifndef CONFIG_ARM_LPAE + /* + * ARM Ltd. Cortex A5 processor. + */ + .type __v7_ca5mp_proc_info, #object +__v7_ca5mp_proc_info: + .long 0x410fc050 + .long 0xff0ffff0 + __v7_proc __v7_ca5mp_setup + .size __v7_ca5mp_proc_info, . - __v7_ca5mp_proc_info + + /* + * ARM Ltd. Cortex A9 processor. + */ + .type __v7_ca9mp_proc_info, #object +__v7_ca9mp_proc_info: + .long 0x410fc090 + .long 0xff0ffff0 + __v7_proc __v7_ca9mp_setup + .size __v7_ca9mp_proc_info, . - __v7_ca9mp_proc_info +#endif /* CONFIG_ARM_LPAE */ + + /* + * ARM Ltd. Cortex A7 processor. + */ + .type __v7_ca7mp_proc_info, #object +__v7_ca7mp_proc_info: + .long 0x410fc070 + .long 0xff0ffff0 + __v7_proc __v7_ca7mp_setup, hwcaps = HWCAP_IDIV + .size __v7_ca7mp_proc_info, . - __v7_ca7mp_proc_info + + /* + * ARM Ltd. Cortex A15 processor. + */ + .type __v7_ca15mp_proc_info, #object +__v7_ca15mp_proc_info: + .long 0x410fc0f0 + .long 0xff0ffff0 + __v7_proc __v7_ca15mp_setup, hwcaps = HWCAP_IDIV + .size __v7_ca15mp_proc_info, . - __v7_ca15mp_proc_info + + /* + * Match any ARMv7 processor core. + */ + .type __v7_proc_info, #object +__v7_proc_info: + .long 0x000f0000 @ Required ID value + .long 0x000f0000 @ Mask for ID + __v7_proc __v7_setup + .size __v7_proc_info, . - __v7_proc_info diff --git a/arch/arm/mm/proc-xsc3.S b/arch/arm/mm/proc-xsc3.S new file mode 100644 index 00000000..b0d57869 --- /dev/null +++ b/arch/arm/mm/proc-xsc3.S @@ -0,0 +1,531 @@ +/* + * linux/arch/arm/mm/proc-xsc3.S + * + * Original Author: Matthew Gilbert + * Current Maintainer: Lennert Buytenhek <buytenh@wantstofly.org> + * + * Copyright 2004 (C) Intel Corp. + * Copyright 2005 (C) MontaVista Software, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * MMU functions for the Intel XScale3 Core (XSC3). The XSC3 core is + * an extension to Intel's original XScale core that adds the following + * features: + * + * - ARMv6 Supersections + * - Low Locality Reference pages (replaces mini-cache) + * - 36-bit addressing + * - L2 cache + * - Cache coherency if chipset supports it + * + * Based on original XScale code by Nicolas Pitre. + */ + +#include <linux/linkage.h> +#include <linux/init.h> +#include <asm/assembler.h> +#include <asm/hwcap.h> +#include <asm/pgtable.h> +#include <asm/pgtable-hwdef.h> +#include <asm/page.h> +#include <asm/ptrace.h> +#include "proc-macros.S" + +/* + * This is the maximum size of an area which will be flushed. If the + * area is larger than this, then we flush the whole cache. + */ +#define MAX_AREA_SIZE 32768 + +/* + * The cache line size of the L1 I, L1 D and unified L2 cache. + */ +#define CACHELINESIZE 32 + +/* + * The size of the L1 D cache. + */ +#define CACHESIZE 32768 + +/* + * This macro is used to wait for a CP15 write and is needed when we + * have to ensure that the last operation to the coprocessor was + * completed before continuing with operation. + */ + .macro cpwait_ret, lr, rd + mrc p15, 0, \rd, c2, c0, 0 @ arbitrary read of cp15 + sub pc, \lr, \rd, LSR #32 @ wait for completion and + @ flush instruction pipeline + .endm + +/* + * This macro cleans and invalidates the entire L1 D cache. + */ + + .macro clean_d_cache rd, rs + mov \rd, #0x1f00 + orr \rd, \rd, #0x00e0 +1: mcr p15, 0, \rd, c7, c14, 2 @ clean/invalidate L1 D line + adds \rd, \rd, #0x40000000 + bcc 1b + subs \rd, \rd, #0x20 + bpl 1b + .endm + + .text + +/* + * cpu_xsc3_proc_init() + * + * Nothing too exciting at the moment + */ +ENTRY(cpu_xsc3_proc_init) + mov pc, lr + +/* + * cpu_xsc3_proc_fin() + */ +ENTRY(cpu_xsc3_proc_fin) + mrc p15, 0, r0, c1, c0, 0 @ ctrl register + bic r0, r0, #0x1800 @ ...IZ........... + bic r0, r0, #0x0006 @ .............CA. + mcr p15, 0, r0, c1, c0, 0 @ disable caches + mov pc, lr + +/* + * cpu_xsc3_reset(loc) + * + * Perform a soft reset of the system. Put the CPU into the + * same state as it would be if it had been reset, and branch + * to what would be the reset vector. + * + * loc: location to jump to for soft reset + */ + .align 5 + .pushsection .idmap.text, "ax" +ENTRY(cpu_xsc3_reset) + mov r1, #PSR_F_BIT|PSR_I_BIT|SVC_MODE + msr cpsr_c, r1 @ reset CPSR + mrc p15, 0, r1, c1, c0, 0 @ ctrl register + bic r1, r1, #0x3900 @ ..VIZ..S........ + bic r1, r1, #0x0086 @ ........B....CA. + mcr p15, 0, r1, c1, c0, 0 @ ctrl register + mcr p15, 0, ip, c7, c7, 0 @ invalidate L1 caches and BTB + bic r1, r1, #0x0001 @ ...............M + mcr p15, 0, r1, c1, c0, 0 @ ctrl register + @ CAUTION: MMU turned off from this point. We count on the pipeline + @ already containing those two last instructions to survive. + mcr p15, 0, ip, c8, c7, 0 @ invalidate I and D TLBs + mov pc, r0 +ENDPROC(cpu_xsc3_reset) + .popsection + +/* + * cpu_xsc3_do_idle() + * + * Cause the processor to idle + * + * For now we do nothing but go to idle mode for every case + * + * XScale supports clock switching, but using idle mode support + * allows external hardware to react to system state changes. + */ + .align 5 + +ENTRY(cpu_xsc3_do_idle) + mov r0, #1 + mcr p14, 0, r0, c7, c0, 0 @ go to idle + mov pc, lr + +/* ================================= CACHE ================================ */ + +/* + * flush_icache_all() + * + * Unconditionally clean and invalidate the entire icache. + */ +ENTRY(xsc3_flush_icache_all) + mov r0, #0 + mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache + mov pc, lr +ENDPROC(xsc3_flush_icache_all) + +/* + * flush_user_cache_all() + * + * Invalidate all cache entries in a particular address + * space. + */ +ENTRY(xsc3_flush_user_cache_all) + /* FALLTHROUGH */ + +/* + * flush_kern_cache_all() + * + * Clean and invalidate the entire cache. + */ +ENTRY(xsc3_flush_kern_cache_all) + mov r2, #VM_EXEC + mov ip, #0 +__flush_whole_cache: + clean_d_cache r0, r1 + tst r2, #VM_EXEC + mcrne p15, 0, ip, c7, c5, 0 @ invalidate L1 I cache and BTB + mcrne p15, 0, ip, c7, c10, 4 @ data write barrier + mcrne p15, 0, ip, c7, c5, 4 @ prefetch flush + mov pc, lr + +/* + * flush_user_cache_range(start, end, vm_flags) + * + * Invalidate a range of cache entries in the specified + * address space. + * + * - start - start address (may not be aligned) + * - end - end address (exclusive, may not be aligned) + * - vma - vma_area_struct describing address space + */ + .align 5 +ENTRY(xsc3_flush_user_cache_range) + mov ip, #0 + sub r3, r1, r0 @ calculate total size + cmp r3, #MAX_AREA_SIZE + bhs __flush_whole_cache + +1: tst r2, #VM_EXEC + mcrne p15, 0, r0, c7, c5, 1 @ invalidate L1 I line + mcr p15, 0, r0, c7, c14, 1 @ clean/invalidate L1 D line + add r0, r0, #CACHELINESIZE + cmp r0, r1 + blo 1b + tst r2, #VM_EXEC + mcrne p15, 0, ip, c7, c5, 6 @ invalidate BTB + mcrne p15, 0, ip, c7, c10, 4 @ data write barrier + mcrne p15, 0, ip, c7, c5, 4 @ prefetch flush + mov pc, lr + +/* + * coherent_kern_range(start, end) + * + * Ensure coherency between the I cache and the D cache in the + * region described by start. If you have non-snooping + * Harvard caches, you need to implement this function. + * + * - start - virtual start address + * - end - virtual end address + * + * Note: single I-cache line invalidation isn't used here since + * it also trashes the mini I-cache used by JTAG debuggers. + */ +ENTRY(xsc3_coherent_kern_range) +/* FALLTHROUGH */ +ENTRY(xsc3_coherent_user_range) + bic r0, r0, #CACHELINESIZE - 1 +1: mcr p15, 0, r0, c7, c10, 1 @ clean L1 D line + add r0, r0, #CACHELINESIZE + cmp r0, r1 + blo 1b + mov r0, #0 + mcr p15, 0, r0, c7, c5, 0 @ invalidate L1 I cache and BTB + mcr p15, 0, r0, c7, c10, 4 @ data write barrier + mcr p15, 0, r0, c7, c5, 4 @ prefetch flush + mov pc, lr + +/* + * flush_kern_dcache_area(void *addr, size_t size) + * + * Ensure no D cache aliasing occurs, either with itself or + * the I cache. + * + * - addr - kernel address + * - size - region size + */ +ENTRY(xsc3_flush_kern_dcache_area) + add r1, r0, r1 +1: mcr p15, 0, r0, c7, c14, 1 @ clean/invalidate L1 D line + add r0, r0, #CACHELINESIZE + cmp r0, r1 + blo 1b + mov r0, #0 + mcr p15, 0, r0, c7, c5, 0 @ invalidate L1 I cache and BTB + mcr p15, 0, r0, c7, c10, 4 @ data write barrier + mcr p15, 0, r0, c7, c5, 4 @ prefetch flush + mov pc, lr + +/* + * dma_inv_range(start, end) + * + * Invalidate (discard) the specified virtual address range. + * May not write back any entries. If 'start' or 'end' + * are not cache line aligned, those lines must be written + * back. + * + * - start - virtual start address + * - end - virtual end address + */ +xsc3_dma_inv_range: + tst r0, #CACHELINESIZE - 1 + bic r0, r0, #CACHELINESIZE - 1 + mcrne p15, 0, r0, c7, c10, 1 @ clean L1 D line + tst r1, #CACHELINESIZE - 1 + mcrne p15, 0, r1, c7, c10, 1 @ clean L1 D line +1: mcr p15, 0, r0, c7, c6, 1 @ invalidate L1 D line + add r0, r0, #CACHELINESIZE + cmp r0, r1 + blo 1b + mcr p15, 0, r0, c7, c10, 4 @ data write barrier + mov pc, lr + +/* + * dma_clean_range(start, end) + * + * Clean the specified virtual address range. + * + * - start - virtual start address + * - end - virtual end address + */ +xsc3_dma_clean_range: + bic r0, r0, #CACHELINESIZE - 1 +1: mcr p15, 0, r0, c7, c10, 1 @ clean L1 D line + add r0, r0, #CACHELINESIZE + cmp r0, r1 + blo 1b + mcr p15, 0, r0, c7, c10, 4 @ data write barrier + mov pc, lr + +/* + * dma_flush_range(start, end) + * + * Clean and invalidate the specified virtual address range. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(xsc3_dma_flush_range) + bic r0, r0, #CACHELINESIZE - 1 +1: mcr p15, 0, r0, c7, c14, 1 @ clean/invalidate L1 D line + add r0, r0, #CACHELINESIZE + cmp r0, r1 + blo 1b + mcr p15, 0, r0, c7, c10, 4 @ data write barrier + mov pc, lr + +/* + * dma_map_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(xsc3_dma_map_area) + add r1, r1, r0 + cmp r2, #DMA_TO_DEVICE + beq xsc3_dma_clean_range + bcs xsc3_dma_inv_range + b xsc3_dma_flush_range +ENDPROC(xsc3_dma_map_area) + +/* + * dma_unmap_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(xsc3_dma_unmap_area) + mov pc, lr +ENDPROC(xsc3_dma_unmap_area) + + @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) + define_cache_functions xsc3 + +ENTRY(cpu_xsc3_dcache_clean_area) +1: mcr p15, 0, r0, c7, c10, 1 @ clean L1 D line + add r0, r0, #CACHELINESIZE + subs r1, r1, #CACHELINESIZE + bhi 1b + mov pc, lr + +/* =============================== PageTable ============================== */ + +/* + * cpu_xsc3_switch_mm(pgd) + * + * Set the translation base pointer to be as described by pgd. + * + * pgd: new page tables + */ + .align 5 +ENTRY(cpu_xsc3_switch_mm) + clean_d_cache r1, r2 + mcr p15, 0, ip, c7, c5, 0 @ invalidate L1 I cache and BTB + mcr p15, 0, ip, c7, c10, 4 @ data write barrier + mcr p15, 0, ip, c7, c5, 4 @ prefetch flush + orr r0, r0, #0x18 @ cache the page table in L2 + mcr p15, 0, r0, c2, c0, 0 @ load page table pointer + mcr p15, 0, ip, c8, c7, 0 @ invalidate I and D TLBs + cpwait_ret lr, ip + +/* + * cpu_xsc3_set_pte_ext(ptep, pte, ext) + * + * Set a PTE and flush it out + */ +cpu_xsc3_mt_table: + .long 0x00 @ L_PTE_MT_UNCACHED + .long PTE_EXT_TEX(1) @ L_PTE_MT_BUFFERABLE + .long PTE_EXT_TEX(5) | PTE_CACHEABLE @ L_PTE_MT_WRITETHROUGH + .long PTE_CACHEABLE | PTE_BUFFERABLE @ L_PTE_MT_WRITEBACK + .long PTE_EXT_TEX(1) | PTE_BUFFERABLE @ L_PTE_MT_DEV_SHARED + .long 0x00 @ unused + .long 0x00 @ L_PTE_MT_MINICACHE (not present) + .long PTE_EXT_TEX(5) | PTE_CACHEABLE | PTE_BUFFERABLE @ L_PTE_MT_WRITEALLOC (not present?) + .long 0x00 @ unused + .long PTE_EXT_TEX(1) @ L_PTE_MT_DEV_WC + .long 0x00 @ unused + .long PTE_CACHEABLE | PTE_BUFFERABLE @ L_PTE_MT_DEV_CACHED + .long PTE_EXT_TEX(2) @ L_PTE_MT_DEV_NONSHARED + .long 0x00 @ unused + .long 0x00 @ unused + .long 0x00 @ unused + + .align 5 +ENTRY(cpu_xsc3_set_pte_ext) + xscale_set_pte_ext_prologue + + tst r1, #L_PTE_SHARED @ shared? + and r1, r1, #L_PTE_MT_MASK + adr ip, cpu_xsc3_mt_table + ldr ip, [ip, r1] + orrne r2, r2, #PTE_EXT_COHERENT @ interlock: mask in coherent bit + bic r2, r2, #0x0c @ clear old C,B bits + orr r2, r2, ip + + xscale_set_pte_ext_epilogue + mov pc, lr + + .ltorg + .align + +.globl cpu_xsc3_suspend_size +.equ cpu_xsc3_suspend_size, 4 * 6 +#ifdef CONFIG_PM_SLEEP +ENTRY(cpu_xsc3_do_suspend) + stmfd sp!, {r4 - r9, lr} + mrc p14, 0, r4, c6, c0, 0 @ clock configuration, for turbo mode + mrc p15, 0, r5, c15, c1, 0 @ CP access reg + mrc p15, 0, r6, c13, c0, 0 @ PID + mrc p15, 0, r7, c3, c0, 0 @ domain ID + mrc p15, 0, r8, c1, c0, 1 @ auxiliary control reg + mrc p15, 0, r9, c1, c0, 0 @ control reg + bic r4, r4, #2 @ clear frequency change bit + stmia r0, {r4 - r9} @ store cp regs + ldmia sp!, {r4 - r9, pc} +ENDPROC(cpu_xsc3_do_suspend) + +ENTRY(cpu_xsc3_do_resume) + ldmia r0, {r4 - r9} @ load cp regs + mov ip, #0 + mcr p15, 0, ip, c7, c7, 0 @ invalidate I & D caches, BTB + mcr p15, 0, ip, c7, c10, 4 @ drain write (&fill) buffer + mcr p15, 0, ip, c7, c5, 4 @ flush prefetch buffer + mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs + mcr p14, 0, r4, c6, c0, 0 @ clock configuration, turbo mode. + mcr p15, 0, r5, c15, c1, 0 @ CP access reg + mcr p15, 0, r6, c13, c0, 0 @ PID + mcr p15, 0, r7, c3, c0, 0 @ domain ID + orr r1, r1, #0x18 @ cache the page table in L2 + mcr p15, 0, r1, c2, c0, 0 @ translation table base addr + mcr p15, 0, r8, c1, c0, 1 @ auxiliary control reg + mov r0, r9 @ control register + b cpu_resume_mmu +ENDPROC(cpu_xsc3_do_resume) +#endif + + __CPUINIT + + .type __xsc3_setup, #function +__xsc3_setup: + mov r0, #PSR_F_BIT|PSR_I_BIT|SVC_MODE + msr cpsr_c, r0 + mcr p15, 0, ip, c7, c7, 0 @ invalidate L1 caches and BTB + mcr p15, 0, ip, c7, c10, 4 @ data write barrier + mcr p15, 0, ip, c7, c5, 4 @ prefetch flush + mcr p15, 0, ip, c8, c7, 0 @ invalidate I and D TLBs + orr r4, r4, #0x18 @ cache the page table in L2 + mcr p15, 0, r4, c2, c0, 0 @ load page table pointer + + mov r0, #1 << 6 @ cp6 access for early sched_clock + mcr p15, 0, r0, c15, c1, 0 @ write CP access register + + mrc p15, 0, r0, c1, c0, 1 @ get auxiliary control reg + and r0, r0, #2 @ preserve bit P bit setting + orr r0, r0, #(1 << 10) @ enable L2 for LLR cache + mcr p15, 0, r0, c1, c0, 1 @ set auxiliary control reg + + adr r5, xsc3_crval + ldmia r5, {r5, r6} + +#ifdef CONFIG_CACHE_XSC3L2 + mrc p15, 1, r0, c0, c0, 1 @ get L2 present information + ands r0, r0, #0xf8 + orrne r6, r6, #(1 << 26) @ enable L2 if present +#endif + + mrc p15, 0, r0, c1, c0, 0 @ get control register + bic r0, r0, r5 @ ..V. ..R. .... ..A. + orr r0, r0, r6 @ ..VI Z..S .... .C.M (mmu) + @ ...I Z..S .... .... (uc) + mov pc, lr + + .size __xsc3_setup, . - __xsc3_setup + + .type xsc3_crval, #object +xsc3_crval: + crval clear=0x04002202, mmuset=0x00003905, ucset=0x00001900 + + __INITDATA + + @ define struct processor (see <asm/proc-fns.h> and proc-macros.S) + define_processor_functions xsc3, dabort=v5t_early_abort, pabort=legacy_pabort, suspend=1 + + .section ".rodata" + + string cpu_arch_name, "armv5te" + string cpu_elf_name, "v5" + string cpu_xsc3_name, "XScale-V3 based processor" + + .align + + .section ".proc.info.init", #alloc, #execinstr + +.macro xsc3_proc_info name:req, cpu_val:req, cpu_mask:req + .type __\name\()_proc_info,#object +__\name\()_proc_info: + .long \cpu_val + .long \cpu_mask + .long PMD_TYPE_SECT | \ + PMD_SECT_BUFFERABLE | \ + PMD_SECT_CACHEABLE | \ + PMD_SECT_AP_WRITE | \ + PMD_SECT_AP_READ + .long PMD_TYPE_SECT | \ + PMD_SECT_AP_WRITE | \ + PMD_SECT_AP_READ + b __xsc3_setup + .long cpu_arch_name + .long cpu_elf_name + .long HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP + .long cpu_xsc3_name + .long xsc3_processor_functions + .long v4wbi_tlb_fns + .long xsc3_mc_user_fns + .long xsc3_cache_fns + .size __\name\()_proc_info, . - __\name\()_proc_info +.endm + + xsc3_proc_info xsc3, 0x69056000, 0xffffe000 + +/* Note: PXA935 changed its implementor ID from Intel to Marvell */ + xsc3_proc_info xsc3_pxa935, 0x56056000, 0xffffe000 diff --git a/arch/arm/mm/proc-xscale.S b/arch/arm/mm/proc-xscale.S new file mode 100644 index 00000000..4ffebaa5 --- /dev/null +++ b/arch/arm/mm/proc-xscale.S @@ -0,0 +1,658 @@ +/* + * linux/arch/arm/mm/proc-xscale.S + * + * Author: Nicolas Pitre + * Created: November 2000 + * Copyright: (C) 2000, 2001 MontaVista Software Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * MMU functions for the Intel XScale CPUs + * + * 2001 Aug 21: + * some contributions by Brett Gaines <brett.w.gaines@intel.com> + * Copyright 2001 by Intel Corp. + * + * 2001 Sep 08: + * Completely revisited, many important fixes + * Nicolas Pitre <nico@fluxnic.net> + */ + +#include <linux/linkage.h> +#include <linux/init.h> +#include <asm/assembler.h> +#include <asm/hwcap.h> +#include <asm/pgtable.h> +#include <asm/pgtable-hwdef.h> +#include <asm/page.h> +#include <asm/ptrace.h> +#include "proc-macros.S" + +/* + * This is the maximum size of an area which will be flushed. If the area + * is larger than this, then we flush the whole cache + */ +#define MAX_AREA_SIZE 32768 + +/* + * the cache line size of the I and D cache + */ +#define CACHELINESIZE 32 + +/* + * the size of the data cache + */ +#define CACHESIZE 32768 + +/* + * Virtual address used to allocate the cache when flushed + * + * This must be an address range which is _never_ used. It should + * apparently have a mapping in the corresponding page table for + * compatibility with future CPUs that _could_ require it. For instance we + * don't care. + * + * This must be aligned on a 2*CACHESIZE boundary. The code selects one of + * the 2 areas in alternance each time the clean_d_cache macro is used. + * Without this the XScale core exhibits cache eviction problems and no one + * knows why. + * + * Reminder: the vector table is located at 0xffff0000-0xffff0fff. + */ +#define CLEAN_ADDR 0xfffe0000 + +/* + * This macro is used to wait for a CP15 write and is needed + * when we have to ensure that the last operation to the co-pro + * was completed before continuing with operation. + */ + .macro cpwait, rd + mrc p15, 0, \rd, c2, c0, 0 @ arbitrary read of cp15 + mov \rd, \rd @ wait for completion + sub pc, pc, #4 @ flush instruction pipeline + .endm + + .macro cpwait_ret, lr, rd + mrc p15, 0, \rd, c2, c0, 0 @ arbitrary read of cp15 + sub pc, \lr, \rd, LSR #32 @ wait for completion and + @ flush instruction pipeline + .endm + +/* + * This macro cleans the entire dcache using line allocate. + * The main loop has been unrolled to reduce loop overhead. + * rd and rs are two scratch registers. + */ + .macro clean_d_cache, rd, rs + ldr \rs, =clean_addr + ldr \rd, [\rs] + eor \rd, \rd, #CACHESIZE + str \rd, [\rs] + add \rs, \rd, #CACHESIZE +1: mcr p15, 0, \rd, c7, c2, 5 @ allocate D cache line + add \rd, \rd, #CACHELINESIZE + mcr p15, 0, \rd, c7, c2, 5 @ allocate D cache line + add \rd, \rd, #CACHELINESIZE + mcr p15, 0, \rd, c7, c2, 5 @ allocate D cache line + add \rd, \rd, #CACHELINESIZE + mcr p15, 0, \rd, c7, c2, 5 @ allocate D cache line + add \rd, \rd, #CACHELINESIZE + teq \rd, \rs + bne 1b + .endm + + .data +clean_addr: .word CLEAN_ADDR + + .text + +/* + * cpu_xscale_proc_init() + * + * Nothing too exciting at the moment + */ +ENTRY(cpu_xscale_proc_init) + @ enable write buffer coalescing. Some bootloader disable it + mrc p15, 0, r1, c1, c0, 1 + bic r1, r1, #1 + mcr p15, 0, r1, c1, c0, 1 + mov pc, lr + +/* + * cpu_xscale_proc_fin() + */ +ENTRY(cpu_xscale_proc_fin) + mrc p15, 0, r0, c1, c0, 0 @ ctrl register + bic r0, r0, #0x1800 @ ...IZ........... + bic r0, r0, #0x0006 @ .............CA. + mcr p15, 0, r0, c1, c0, 0 @ disable caches + mov pc, lr + +/* + * cpu_xscale_reset(loc) + * + * Perform a soft reset of the system. Put the CPU into the + * same state as it would be if it had been reset, and branch + * to what would be the reset vector. + * + * loc: location to jump to for soft reset + * + * Beware PXA270 erratum E7. + */ + .align 5 + .pushsection .idmap.text, "ax" +ENTRY(cpu_xscale_reset) + mov r1, #PSR_F_BIT|PSR_I_BIT|SVC_MODE + msr cpsr_c, r1 @ reset CPSR + mcr p15, 0, r1, c10, c4, 1 @ unlock I-TLB + mcr p15, 0, r1, c8, c5, 0 @ invalidate I-TLB + mrc p15, 0, r1, c1, c0, 0 @ ctrl register + bic r1, r1, #0x0086 @ ........B....CA. + bic r1, r1, #0x3900 @ ..VIZ..S........ + sub pc, pc, #4 @ flush pipeline + @ *** cache line aligned *** + mcr p15, 0, r1, c1, c0, 0 @ ctrl register + bic r1, r1, #0x0001 @ ...............M + mcr p15, 0, ip, c7, c7, 0 @ invalidate I,D caches & BTB + mcr p15, 0, r1, c1, c0, 0 @ ctrl register + @ CAUTION: MMU turned off from this point. We count on the pipeline + @ already containing those two last instructions to survive. + mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs + mov pc, r0 +ENDPROC(cpu_xscale_reset) + .popsection + +/* + * cpu_xscale_do_idle() + * + * Cause the processor to idle + * + * For now we do nothing but go to idle mode for every case + * + * XScale supports clock switching, but using idle mode support + * allows external hardware to react to system state changes. + */ + .align 5 + +ENTRY(cpu_xscale_do_idle) + mov r0, #1 + mcr p14, 0, r0, c7, c0, 0 @ Go to IDLE + mov pc, lr + +/* ================================= CACHE ================================ */ + +/* + * flush_icache_all() + * + * Unconditionally clean and invalidate the entire icache. + */ +ENTRY(xscale_flush_icache_all) + mov r0, #0 + mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache + mov pc, lr +ENDPROC(xscale_flush_icache_all) + +/* + * flush_user_cache_all() + * + * Invalidate all cache entries in a particular address + * space. + */ +ENTRY(xscale_flush_user_cache_all) + /* FALLTHROUGH */ + +/* + * flush_kern_cache_all() + * + * Clean and invalidate the entire cache. + */ +ENTRY(xscale_flush_kern_cache_all) + mov r2, #VM_EXEC + mov ip, #0 +__flush_whole_cache: + clean_d_cache r0, r1 + tst r2, #VM_EXEC + mcrne p15, 0, ip, c7, c5, 0 @ Invalidate I cache & BTB + mcrne p15, 0, ip, c7, c10, 4 @ Drain Write (& Fill) Buffer + mov pc, lr + +/* + * flush_user_cache_range(start, end, vm_flags) + * + * Invalidate a range of cache entries in the specified + * address space. + * + * - start - start address (may not be aligned) + * - end - end address (exclusive, may not be aligned) + * - vma - vma_area_struct describing address space + */ + .align 5 +ENTRY(xscale_flush_user_cache_range) + mov ip, #0 + sub r3, r1, r0 @ calculate total size + cmp r3, #MAX_AREA_SIZE + bhs __flush_whole_cache + +1: tst r2, #VM_EXEC + mcrne p15, 0, r0, c7, c5, 1 @ Invalidate I cache line + mcr p15, 0, r0, c7, c10, 1 @ Clean D cache line + mcr p15, 0, r0, c7, c6, 1 @ Invalidate D cache line + add r0, r0, #CACHELINESIZE + cmp r0, r1 + blo 1b + tst r2, #VM_EXEC + mcrne p15, 0, ip, c7, c5, 6 @ Invalidate BTB + mcrne p15, 0, ip, c7, c10, 4 @ Drain Write (& Fill) Buffer + mov pc, lr + +/* + * coherent_kern_range(start, end) + * + * Ensure coherency between the Icache and the Dcache in the + * region described by start. If you have non-snooping + * Harvard caches, you need to implement this function. + * + * - start - virtual start address + * - end - virtual end address + * + * Note: single I-cache line invalidation isn't used here since + * it also trashes the mini I-cache used by JTAG debuggers. + */ +ENTRY(xscale_coherent_kern_range) + bic r0, r0, #CACHELINESIZE - 1 +1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry + add r0, r0, #CACHELINESIZE + cmp r0, r1 + blo 1b + mov r0, #0 + mcr p15, 0, r0, c7, c5, 0 @ Invalidate I cache & BTB + mcr p15, 0, r0, c7, c10, 4 @ Drain Write (& Fill) Buffer + mov pc, lr + +/* + * coherent_user_range(start, end) + * + * Ensure coherency between the Icache and the Dcache in the + * region described by start. If you have non-snooping + * Harvard caches, you need to implement this function. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(xscale_coherent_user_range) + bic r0, r0, #CACHELINESIZE - 1 +1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry + mcr p15, 0, r0, c7, c5, 1 @ Invalidate I cache entry + add r0, r0, #CACHELINESIZE + cmp r0, r1 + blo 1b + mov r0, #0 + mcr p15, 0, r0, c7, c5, 6 @ Invalidate BTB + mcr p15, 0, r0, c7, c10, 4 @ Drain Write (& Fill) Buffer + mov pc, lr + +/* + * flush_kern_dcache_area(void *addr, size_t size) + * + * Ensure no D cache aliasing occurs, either with itself or + * the I cache + * + * - addr - kernel address + * - size - region size + */ +ENTRY(xscale_flush_kern_dcache_area) + add r1, r0, r1 +1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry + mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry + add r0, r0, #CACHELINESIZE + cmp r0, r1 + blo 1b + mov r0, #0 + mcr p15, 0, r0, c7, c5, 0 @ Invalidate I cache & BTB + mcr p15, 0, r0, c7, c10, 4 @ Drain Write (& Fill) Buffer + mov pc, lr + +/* + * dma_inv_range(start, end) + * + * Invalidate (discard) the specified virtual address range. + * May not write back any entries. If 'start' or 'end' + * are not cache line aligned, those lines must be written + * back. + * + * - start - virtual start address + * - end - virtual end address + */ +xscale_dma_inv_range: + tst r0, #CACHELINESIZE - 1 + bic r0, r0, #CACHELINESIZE - 1 + mcrne p15, 0, r0, c7, c10, 1 @ clean D entry + tst r1, #CACHELINESIZE - 1 + mcrne p15, 0, r1, c7, c10, 1 @ clean D entry +1: mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry + add r0, r0, #CACHELINESIZE + cmp r0, r1 + blo 1b + mcr p15, 0, r0, c7, c10, 4 @ Drain Write (& Fill) Buffer + mov pc, lr + +/* + * dma_clean_range(start, end) + * + * Clean the specified virtual address range. + * + * - start - virtual start address + * - end - virtual end address + */ +xscale_dma_clean_range: + bic r0, r0, #CACHELINESIZE - 1 +1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry + add r0, r0, #CACHELINESIZE + cmp r0, r1 + blo 1b + mcr p15, 0, r0, c7, c10, 4 @ Drain Write (& Fill) Buffer + mov pc, lr + +/* + * dma_flush_range(start, end) + * + * Clean and invalidate the specified virtual address range. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(xscale_dma_flush_range) + bic r0, r0, #CACHELINESIZE - 1 +1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry + mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry + add r0, r0, #CACHELINESIZE + cmp r0, r1 + blo 1b + mcr p15, 0, r0, c7, c10, 4 @ Drain Write (& Fill) Buffer + mov pc, lr + +/* + * dma_map_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(xscale_dma_map_area) + add r1, r1, r0 + cmp r2, #DMA_TO_DEVICE + beq xscale_dma_clean_range + bcs xscale_dma_inv_range + b xscale_dma_flush_range +ENDPROC(xscale_dma_map_area) + +/* + * dma_map_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(xscale_80200_A0_A1_dma_map_area) + add r1, r1, r0 + teq r2, #DMA_TO_DEVICE + beq xscale_dma_clean_range + b xscale_dma_flush_range +ENDPROC(xscale_80200_A0_A1_dma_map_area) + +/* + * dma_unmap_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(xscale_dma_unmap_area) + mov pc, lr +ENDPROC(xscale_dma_unmap_area) + + @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) + define_cache_functions xscale + +/* + * On stepping A0/A1 of the 80200, invalidating D-cache by line doesn't + * clear the dirty bits, which means that if we invalidate a dirty line, + * the dirty data can still be written back to external memory later on. + * + * The recommended workaround is to always do a clean D-cache line before + * doing an invalidate D-cache line, so on the affected processors, + * dma_inv_range() is implemented as dma_flush_range(). + * + * See erratum #25 of "Intel 80200 Processor Specification Update", + * revision January 22, 2003, available at: + * http://www.intel.com/design/iio/specupdt/273415.htm + */ +.macro a0_alias basename + .globl xscale_80200_A0_A1_\basename + .type xscale_80200_A0_A1_\basename , %function + .equ xscale_80200_A0_A1_\basename , xscale_\basename +.endm + +/* + * Most of the cache functions are unchanged for these processor revisions. + * Export suitable alias symbols for the unchanged functions: + */ + a0_alias flush_icache_all + a0_alias flush_user_cache_all + a0_alias flush_kern_cache_all + a0_alias flush_user_cache_range + a0_alias coherent_kern_range + a0_alias coherent_user_range + a0_alias flush_kern_dcache_area + a0_alias dma_flush_range + a0_alias dma_unmap_area + + @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) + define_cache_functions xscale_80200_A0_A1 + +ENTRY(cpu_xscale_dcache_clean_area) +1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry + add r0, r0, #CACHELINESIZE + subs r1, r1, #CACHELINESIZE + bhi 1b + mov pc, lr + +/* =============================== PageTable ============================== */ + +/* + * cpu_xscale_switch_mm(pgd) + * + * Set the translation base pointer to be as described by pgd. + * + * pgd: new page tables + */ + .align 5 +ENTRY(cpu_xscale_switch_mm) + clean_d_cache r1, r2 + mcr p15, 0, ip, c7, c5, 0 @ Invalidate I cache & BTB + mcr p15, 0, ip, c7, c10, 4 @ Drain Write (& Fill) Buffer + mcr p15, 0, r0, c2, c0, 0 @ load page table pointer + mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs + cpwait_ret lr, ip + +/* + * cpu_xscale_set_pte_ext(ptep, pte, ext) + * + * Set a PTE and flush it out + * + * Errata 40: must set memory to write-through for user read-only pages. + */ +cpu_xscale_mt_table: + .long 0x00 @ L_PTE_MT_UNCACHED + .long PTE_BUFFERABLE @ L_PTE_MT_BUFFERABLE + .long PTE_CACHEABLE @ L_PTE_MT_WRITETHROUGH + .long PTE_CACHEABLE | PTE_BUFFERABLE @ L_PTE_MT_WRITEBACK + .long PTE_EXT_TEX(1) | PTE_BUFFERABLE @ L_PTE_MT_DEV_SHARED + .long 0x00 @ unused + .long PTE_EXT_TEX(1) | PTE_CACHEABLE @ L_PTE_MT_MINICACHE + .long PTE_EXT_TEX(1) | PTE_CACHEABLE | PTE_BUFFERABLE @ L_PTE_MT_WRITEALLOC + .long 0x00 @ unused + .long PTE_BUFFERABLE @ L_PTE_MT_DEV_WC + .long 0x00 @ unused + .long PTE_CACHEABLE | PTE_BUFFERABLE @ L_PTE_MT_DEV_CACHED + .long 0x00 @ L_PTE_MT_DEV_NONSHARED + .long 0x00 @ unused + .long 0x00 @ unused + .long 0x00 @ unused + + .align 5 +ENTRY(cpu_xscale_set_pte_ext) + xscale_set_pte_ext_prologue + + @ + @ Erratum 40: must set memory to write-through for user read-only pages + @ + and ip, r1, #(L_PTE_MT_MASK | L_PTE_USER | L_PTE_RDONLY) & ~(4 << 2) + teq ip, #L_PTE_MT_WRITEBACK | L_PTE_USER | L_PTE_RDONLY + + moveq r1, #L_PTE_MT_WRITETHROUGH + and r1, r1, #L_PTE_MT_MASK + adr ip, cpu_xscale_mt_table + ldr ip, [ip, r1] + bic r2, r2, #0x0c + orr r2, r2, ip + + xscale_set_pte_ext_epilogue + mov pc, lr + + .ltorg + .align + +.globl cpu_xscale_suspend_size +.equ cpu_xscale_suspend_size, 4 * 6 +#ifdef CONFIG_PM_SLEEP +ENTRY(cpu_xscale_do_suspend) + stmfd sp!, {r4 - r9, lr} + mrc p14, 0, r4, c6, c0, 0 @ clock configuration, for turbo mode + mrc p15, 0, r5, c15, c1, 0 @ CP access reg + mrc p15, 0, r6, c13, c0, 0 @ PID + mrc p15, 0, r7, c3, c0, 0 @ domain ID + mrc p15, 0, r8, c1, c1, 0 @ auxiliary control reg + mrc p15, 0, r9, c1, c0, 0 @ control reg + bic r4, r4, #2 @ clear frequency change bit + stmia r0, {r4 - r9} @ store cp regs + ldmfd sp!, {r4 - r9, pc} +ENDPROC(cpu_xscale_do_suspend) + +ENTRY(cpu_xscale_do_resume) + ldmia r0, {r4 - r9} @ load cp regs + mov ip, #0 + mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs + mcr p15, 0, ip, c7, c7, 0 @ invalidate I & D caches, BTB + mcr p14, 0, r4, c6, c0, 0 @ clock configuration, turbo mode. + mcr p15, 0, r5, c15, c1, 0 @ CP access reg + mcr p15, 0, r6, c13, c0, 0 @ PID + mcr p15, 0, r7, c3, c0, 0 @ domain ID + mcr p15, 0, r1, c2, c0, 0 @ translation table base addr + mcr p15, 0, r8, c1, c1, 0 @ auxiliary control reg + mov r0, r9 @ control register + b cpu_resume_mmu +ENDPROC(cpu_xscale_do_resume) +#endif + + __CPUINIT + + .type __xscale_setup, #function +__xscale_setup: + mcr p15, 0, ip, c7, c7, 0 @ invalidate I, D caches & BTB + mcr p15, 0, ip, c7, c10, 4 @ Drain Write (& Fill) Buffer + mcr p15, 0, ip, c8, c7, 0 @ invalidate I, D TLBs + mov r0, #1 << 6 @ cp6 for IOP3xx and Bulverde + orr r0, r0, #1 << 13 @ Its undefined whether this + mcr p15, 0, r0, c15, c1, 0 @ affects USR or SVC modes + + adr r5, xscale_crval + ldmia r5, {r5, r6} + mrc p15, 0, r0, c1, c0, 0 @ get control register + bic r0, r0, r5 + orr r0, r0, r6 + mov pc, lr + .size __xscale_setup, . - __xscale_setup + + /* + * R + * .RVI ZFRS BLDP WCAM + * ..11 1.01 .... .101 + * + */ + .type xscale_crval, #object +xscale_crval: + crval clear=0x00003b07, mmuset=0x00003905, ucset=0x00001900 + + __INITDATA + + @ define struct processor (see <asm/proc-fns.h> and proc-macros.S) + define_processor_functions xscale, dabort=v5t_early_abort, pabort=legacy_pabort, suspend=1 + + .section ".rodata" + + string cpu_arch_name, "armv5te" + string cpu_elf_name, "v5" + + string cpu_80200_A0_A1_name, "XScale-80200 A0/A1" + string cpu_80200_name, "XScale-80200" + string cpu_80219_name, "XScale-80219" + string cpu_8032x_name, "XScale-IOP8032x Family" + string cpu_8033x_name, "XScale-IOP8033x Family" + string cpu_pxa250_name, "XScale-PXA250" + string cpu_pxa210_name, "XScale-PXA210" + string cpu_ixp42x_name, "XScale-IXP42x Family" + string cpu_ixp43x_name, "XScale-IXP43x Family" + string cpu_ixp46x_name, "XScale-IXP46x Family" + string cpu_ixp2400_name, "XScale-IXP2400" + string cpu_ixp2800_name, "XScale-IXP2800" + string cpu_pxa255_name, "XScale-PXA255" + string cpu_pxa270_name, "XScale-PXA270" + + .align + + .section ".proc.info.init", #alloc, #execinstr + +.macro xscale_proc_info name:req, cpu_val:req, cpu_mask:req, cpu_name:req, cache + .type __\name\()_proc_info,#object +__\name\()_proc_info: + .long \cpu_val + .long \cpu_mask + .long PMD_TYPE_SECT | \ + PMD_SECT_BUFFERABLE | \ + PMD_SECT_CACHEABLE | \ + PMD_SECT_AP_WRITE | \ + PMD_SECT_AP_READ + .long PMD_TYPE_SECT | \ + PMD_SECT_AP_WRITE | \ + PMD_SECT_AP_READ + b __xscale_setup + .long cpu_arch_name + .long cpu_elf_name + .long HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP + .long \cpu_name + .long xscale_processor_functions + .long v4wbi_tlb_fns + .long xscale_mc_user_fns + .ifb \cache + .long xscale_cache_fns + .else + .long \cache + .endif + .size __\name\()_proc_info, . - __\name\()_proc_info +.endm + + xscale_proc_info 80200_A0_A1, 0x69052000, 0xfffffffe, cpu_80200_name, \ + cache=xscale_80200_A0_A1_cache_fns + xscale_proc_info 80200, 0x69052000, 0xfffffff0, cpu_80200_name + xscale_proc_info 80219, 0x69052e20, 0xffffffe0, cpu_80219_name + xscale_proc_info 8032x, 0x69052420, 0xfffff7e0, cpu_8032x_name + xscale_proc_info 8033x, 0x69054010, 0xfffffd30, cpu_8033x_name + xscale_proc_info pxa250, 0x69052100, 0xfffff7f0, cpu_pxa250_name + xscale_proc_info pxa210, 0x69052120, 0xfffff3f0, cpu_pxa210_name + xscale_proc_info ixp2400, 0x69054190, 0xfffffff0, cpu_ixp2400_name + xscale_proc_info ixp2800, 0x690541a0, 0xfffffff0, cpu_ixp2800_name + xscale_proc_info ixp42x, 0x690541c0, 0xffffffc0, cpu_ixp42x_name + xscale_proc_info ixp43x, 0x69054040, 0xfffffff0, cpu_ixp43x_name + xscale_proc_info ixp46x, 0x69054200, 0xffffff00, cpu_ixp46x_name + xscale_proc_info pxa255, 0x69052d00, 0xfffffff0, cpu_pxa255_name + xscale_proc_info pxa270, 0x69054110, 0xfffffff0, cpu_pxa270_name diff --git a/arch/arm/mm/rodata.c b/arch/arm/mm/rodata.c new file mode 100644 index 00000000..9a8eb841 --- /dev/null +++ b/arch/arm/mm/rodata.c @@ -0,0 +1,159 @@ +/* + * linux/arch/arm/mm/rodata.c + * + * Copyright (C) 2011 Google, Inc. + * + * Author: Colin Cross <ccross@android.com> + * + * Based on x86 implementation in arch/x86/mm/init_32.c + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/module.h> + +#include <asm/cache.h> +#include <asm/pgtable.h> +#include <asm/rodata.h> +#include <asm/sections.h> +#include <asm/tlbflush.h> + +#include "mm.h" + +static int kernel_set_to_readonly __read_mostly; + +#ifdef CONFIG_DEBUG_RODATA_TEST +static const int rodata_test_data = 0xC3; + +static noinline void rodata_test(void) +{ + int result; + + pr_info("%s: attempting to write to read-only section:\n", __func__); + + if (*(volatile int *)&rodata_test_data != 0xC3) { + pr_err("read only data changed before test\n"); + return; + } + + /* + * Attempt to to write to rodata_test_data, trapping the expected + * data abort. If the trap executed, result will be 1. If it didn't, + * result will be 0xFF. + */ + asm volatile( + "0: str %[zero], [%[rodata_test_data]]\n" + " mov %[result], #0xFF\n" + " b 2f\n" + "1: mov %[result], #1\n" + "2:\n" + + /* Exception fixup - if store at label 0 faults, jumps to 1 */ + ".pushsection __ex_table, \"a\"\n" + " .long 0b, 1b\n" + ".popsection\n" + + : [result] "=r" (result) + : [rodata_test_data] "r" (&rodata_test_data), [zero] "r" (0) + : "memory" + ); + + if (result == 1) + pr_info("write to read-only section trapped, success\n"); + else + pr_err("write to read-only section NOT trapped, test failed\n"); + + if (*(volatile int *)&rodata_test_data != 0xC3) + pr_err("read only data changed during write\n"); +} +#else +static inline void rodata_test(void) { } +#endif + +static int set_page_attributes(unsigned long virt, int numpages, + pte_t (*f)(pte_t)) +{ + pmd_t *pmd; + pte_t *pte; + unsigned long start = virt; + unsigned long end = virt + (numpages << PAGE_SHIFT); + unsigned long pmd_end; + + while (virt < end) { + pmd = pmd_off_k(virt); + pmd_end = min(ALIGN(virt + 1, PMD_SIZE), end); + + if ((pmd_val(*pmd) & PMD_TYPE_MASK) != PMD_TYPE_TABLE) { + pr_err("%s: pmd %p=%08lx for %08lx not page table\n", + __func__, pmd, pmd_val(*pmd), virt); + virt = pmd_end; + continue; + } + + while (virt < pmd_end) { + pte = pte_offset_kernel(pmd, virt); + set_pte_ext(pte, f(*pte), 0); + virt += PAGE_SIZE; + } + } + + flush_tlb_kernel_range(start, end); + + return 0; +} + +int set_memory_ro(unsigned long virt, int numpages) +{ + return set_page_attributes(virt, numpages, pte_wrprotect); +} +EXPORT_SYMBOL(set_memory_ro); + +int set_memory_rw(unsigned long virt, int numpages) +{ + return set_page_attributes(virt, numpages, pte_mkwrite); +} +EXPORT_SYMBOL(set_memory_rw); + +void set_kernel_text_rw(void) +{ + unsigned long start = PAGE_ALIGN((unsigned long)_text); + unsigned long size = PAGE_ALIGN((unsigned long)__end_rodata) - start; + + if (!kernel_set_to_readonly) + return; + + pr_debug("Set kernel text: %lx - %lx to read-write\n", + start, start + size); + + set_memory_rw(start, size >> PAGE_SHIFT); +} + +void set_kernel_text_ro(void) +{ + unsigned long start = PAGE_ALIGN((unsigned long)_text); + unsigned long size = PAGE_ALIGN((unsigned long)__end_rodata) - start; + + if (!kernel_set_to_readonly) + return; + + pr_info_once("Write protecting the kernel text section %lx - %lx\n", + start, start + size); + + pr_debug("Set kernel text: %lx - %lx to read only\n", + start, start + size); + + set_memory_ro(start, size >> PAGE_SHIFT); +} + +void mark_rodata_ro(void) +{ + kernel_set_to_readonly = 1; + + set_kernel_text_ro(); + + rodata_test(); +} diff --git a/arch/arm/mm/tlb-fa.S b/arch/arm/mm/tlb-fa.S new file mode 100644 index 00000000..d3ddcf9a --- /dev/null +++ b/arch/arm/mm/tlb-fa.S @@ -0,0 +1,69 @@ +/* + * linux/arch/arm/mm/tlb-fa.S + * + * Copyright (C) 2005 Faraday Corp. + * Copyright (C) 2008-2009 Paulius Zaleckas <paulius.zaleckas@teltonika.lt> + * + * Based on tlb-v4wbi.S: + * Copyright (C) 1997-2002 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * ARM architecture version 4, Faraday variation. + * This assume an unified TLBs, with a write buffer, and branch target buffer (BTB) + * + * Processors: FA520 FA526 FA626 + */ +#include <linux/linkage.h> +#include <linux/init.h> +#include <asm/asm-offsets.h> +#include <asm/tlbflush.h> +#include "proc-macros.S" + + +/* + * flush_user_tlb_range(start, end, mm) + * + * Invalidate a range of TLB entries in the specified address space. + * + * - start - range start address + * - end - range end address + * - mm - mm_struct describing address space + */ + .align 4 +ENTRY(fa_flush_user_tlb_range) + vma_vm_mm ip, r2 + act_mm r3 @ get current->active_mm + eors r3, ip, r3 @ == mm ? + movne pc, lr @ no, we dont do anything + mov r3, #0 + mcr p15, 0, r3, c7, c10, 4 @ drain WB + bic r0, r0, #0x0ff + bic r0, r0, #0xf00 +1: mcr p15, 0, r0, c8, c7, 1 @ invalidate UTLB entry + add r0, r0, #PAGE_SZ + cmp r0, r1 + blo 1b + mcr p15, 0, r3, c7, c10, 4 @ data write barrier + mov pc, lr + + +ENTRY(fa_flush_kern_tlb_range) + mov r3, #0 + mcr p15, 0, r3, c7, c10, 4 @ drain WB + bic r0, r0, #0x0ff + bic r0, r0, #0xf00 +1: mcr p15, 0, r0, c8, c7, 1 @ invalidate UTLB entry + add r0, r0, #PAGE_SZ + cmp r0, r1 + blo 1b + mcr p15, 0, r3, c7, c10, 4 @ data write barrier + mcr p15, 0, r3, c7, c5, 4 @ prefetch flush (isb) + mov pc, lr + + __INITDATA + + /* define struct cpu_tlb_fns (see <asm/tlbflush.h> and proc-macros.S) */ + define_tlb_functions fa, fa_tlb_flags diff --git a/arch/arm/mm/tlb-v3.S b/arch/arm/mm/tlb-v3.S new file mode 100644 index 00000000..d253995e --- /dev/null +++ b/arch/arm/mm/tlb-v3.S @@ -0,0 +1,48 @@ +/* + * linux/arch/arm/mm/tlbv3.S + * + * Copyright (C) 1997-2002 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * ARM architecture version 3 TLB handling functions. + * + * Processors: ARM610, ARM710. + */ +#include <linux/linkage.h> +#include <linux/init.h> +#include <asm/asm-offsets.h> +#include <asm/tlbflush.h> +#include "proc-macros.S" + + .align 5 +/* + * v3_flush_user_tlb_range(start, end, mm) + * + * Invalidate a range of TLB entries in the specified address space. + * + * - start - range start address + * - end - range end address + * - mm - mm_struct describing address space + */ + .align 5 +ENTRY(v3_flush_user_tlb_range) + vma_vm_mm r2, r2 + act_mm r3 @ get current->active_mm + teq r2, r3 @ == mm ? + movne pc, lr @ no, we dont do anything +ENTRY(v3_flush_kern_tlb_range) + bic r0, r0, #0x0ff + bic r0, r0, #0xf00 +1: mcr p15, 0, r0, c6, c0, 0 @ invalidate TLB entry + add r0, r0, #PAGE_SZ + cmp r0, r1 + blo 1b + mov pc, lr + + __INITDATA + + /* define struct cpu_tlb_fns (see <asm/tlbflush.h> and proc-macros.S) */ + define_tlb_functions v3, v3_tlb_flags diff --git a/arch/arm/mm/tlb-v4.S b/arch/arm/mm/tlb-v4.S new file mode 100644 index 00000000..17a025ad --- /dev/null +++ b/arch/arm/mm/tlb-v4.S @@ -0,0 +1,61 @@ +/* + * linux/arch/arm/mm/tlbv4.S + * + * Copyright (C) 1997-2002 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * ARM architecture version 4 TLB handling functions. + * These assume a split I/D TLBs, and no write buffer. + * + * Processors: ARM720T + */ +#include <linux/linkage.h> +#include <linux/init.h> +#include <asm/asm-offsets.h> +#include <asm/tlbflush.h> +#include "proc-macros.S" + + .align 5 +/* + * v4_flush_user_tlb_range(start, end, mm) + * + * Invalidate a range of TLB entries in the specified user address space. + * + * - start - range start address + * - end - range end address + * - mm - mm_struct describing address space + */ + .align 5 +ENTRY(v4_flush_user_tlb_range) + vma_vm_mm ip, r2 + act_mm r3 @ get current->active_mm + eors r3, ip, r3 @ == mm ? + movne pc, lr @ no, we dont do anything +.v4_flush_kern_tlb_range: + bic r0, r0, #0x0ff + bic r0, r0, #0xf00 +1: mcr p15, 0, r0, c8, c7, 1 @ invalidate TLB entry + add r0, r0, #PAGE_SZ + cmp r0, r1 + blo 1b + mov pc, lr + +/* + * v4_flush_kern_tlb_range(start, end) + * + * Invalidate a range of TLB entries in the specified kernel + * address range. + * + * - start - virtual address (may not be aligned) + * - end - virtual address (may not be aligned) + */ +.globl v4_flush_kern_tlb_range +.equ v4_flush_kern_tlb_range, .v4_flush_kern_tlb_range + + __INITDATA + + /* define struct cpu_tlb_fns (see <asm/tlbflush.h> and proc-macros.S) */ + define_tlb_functions v4, v4_tlb_flags diff --git a/arch/arm/mm/tlb-v4wb.S b/arch/arm/mm/tlb-v4wb.S new file mode 100644 index 00000000..c04598fa --- /dev/null +++ b/arch/arm/mm/tlb-v4wb.S @@ -0,0 +1,73 @@ +/* + * linux/arch/arm/mm/tlbv4wb.S + * + * Copyright (C) 1997-2002 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * ARM architecture version 4 TLB handling functions. + * These assume a split I/D TLBs w/o I TLB entry, with a write buffer. + * + * Processors: SA110 SA1100 SA1110 + */ +#include <linux/linkage.h> +#include <linux/init.h> +#include <asm/asm-offsets.h> +#include <asm/tlbflush.h> +#include "proc-macros.S" + + .align 5 +/* + * v4wb_flush_user_tlb_range(start, end, mm) + * + * Invalidate a range of TLB entries in the specified address space. + * + * - start - range start address + * - end - range end address + * - mm - mm_struct describing address space + */ + .align 5 +ENTRY(v4wb_flush_user_tlb_range) + vma_vm_mm ip, r2 + act_mm r3 @ get current->active_mm + eors r3, ip, r3 @ == mm ? + movne pc, lr @ no, we dont do anything + vma_vm_flags r2, r2 + mcr p15, 0, r3, c7, c10, 4 @ drain WB + tst r2, #VM_EXEC + mcrne p15, 0, r3, c8, c5, 0 @ invalidate I TLB + bic r0, r0, #0x0ff + bic r0, r0, #0xf00 +1: mcr p15, 0, r0, c8, c6, 1 @ invalidate D TLB entry + add r0, r0, #PAGE_SZ + cmp r0, r1 + blo 1b + mov pc, lr + +/* + * v4_flush_kern_tlb_range(start, end) + * + * Invalidate a range of TLB entries in the specified kernel + * address range. + * + * - start - virtual address (may not be aligned) + * - end - virtual address (may not be aligned) + */ +ENTRY(v4wb_flush_kern_tlb_range) + mov r3, #0 + mcr p15, 0, r3, c7, c10, 4 @ drain WB + bic r0, r0, #0x0ff + bic r0, r0, #0xf00 + mcr p15, 0, r3, c8, c5, 0 @ invalidate I TLB +1: mcr p15, 0, r0, c8, c6, 1 @ invalidate D TLB entry + add r0, r0, #PAGE_SZ + cmp r0, r1 + blo 1b + mov pc, lr + + __INITDATA + + /* define struct cpu_tlb_fns (see <asm/tlbflush.h> and proc-macros.S) */ + define_tlb_functions v4wb, v4wb_tlb_flags diff --git a/arch/arm/mm/tlb-v4wbi.S b/arch/arm/mm/tlb-v4wbi.S new file mode 100644 index 00000000..1f6062b6 --- /dev/null +++ b/arch/arm/mm/tlb-v4wbi.S @@ -0,0 +1,64 @@ +/* + * linux/arch/arm/mm/tlbv4wbi.S + * + * Copyright (C) 1997-2002 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * ARM architecture version 4 and version 5 TLB handling functions. + * These assume a split I/D TLBs, with a write buffer. + * + * Processors: ARM920 ARM922 ARM925 ARM926 XScale + */ +#include <linux/linkage.h> +#include <linux/init.h> +#include <asm/asm-offsets.h> +#include <asm/tlbflush.h> +#include "proc-macros.S" + +/* + * v4wb_flush_user_tlb_range(start, end, mm) + * + * Invalidate a range of TLB entries in the specified address space. + * + * - start - range start address + * - end - range end address + * - mm - mm_struct describing address space + */ + .align 5 +ENTRY(v4wbi_flush_user_tlb_range) + vma_vm_mm ip, r2 + act_mm r3 @ get current->active_mm + eors r3, ip, r3 @ == mm ? + movne pc, lr @ no, we dont do anything + mov r3, #0 + mcr p15, 0, r3, c7, c10, 4 @ drain WB + vma_vm_flags r2, r2 + bic r0, r0, #0x0ff + bic r0, r0, #0xf00 +1: tst r2, #VM_EXEC + mcrne p15, 0, r0, c8, c5, 1 @ invalidate I TLB entry + mcr p15, 0, r0, c8, c6, 1 @ invalidate D TLB entry + add r0, r0, #PAGE_SZ + cmp r0, r1 + blo 1b + mov pc, lr + +ENTRY(v4wbi_flush_kern_tlb_range) + mov r3, #0 + mcr p15, 0, r3, c7, c10, 4 @ drain WB + bic r0, r0, #0x0ff + bic r0, r0, #0xf00 +1: mcr p15, 0, r0, c8, c5, 1 @ invalidate I TLB entry + mcr p15, 0, r0, c8, c6, 1 @ invalidate D TLB entry + add r0, r0, #PAGE_SZ + cmp r0, r1 + blo 1b + mov pc, lr + + __INITDATA + + /* define struct cpu_tlb_fns (see <asm/tlbflush.h> and proc-macros.S) */ + define_tlb_functions v4wbi, v4wbi_tlb_flags diff --git a/arch/arm/mm/tlb-v6.S b/arch/arm/mm/tlb-v6.S new file mode 100644 index 00000000..eca07f55 --- /dev/null +++ b/arch/arm/mm/tlb-v6.S @@ -0,0 +1,92 @@ +/* + * linux/arch/arm/mm/tlb-v6.S + * + * Copyright (C) 1997-2002 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * ARM architecture version 6 TLB handling functions. + * These assume a split I/D TLB. + */ +#include <linux/init.h> +#include <linux/linkage.h> +#include <asm/asm-offsets.h> +#include <asm/page.h> +#include <asm/tlbflush.h> +#include "proc-macros.S" + +#define HARVARD_TLB + +/* + * v6wbi_flush_user_tlb_range(start, end, vma) + * + * Invalidate a range of TLB entries in the specified address space. + * + * - start - start address (may not be aligned) + * - end - end address (exclusive, may not be aligned) + * - vma - vma_struct describing address range + * + * It is assumed that: + * - the "Invalidate single entry" instruction will invalidate + * both the I and the D TLBs on Harvard-style TLBs + */ +ENTRY(v6wbi_flush_user_tlb_range) + vma_vm_mm r3, r2 @ get vma->vm_mm + mov ip, #0 + mmid r3, r3 @ get vm_mm->context.id + mcr p15, 0, ip, c7, c10, 4 @ drain write buffer + mov r0, r0, lsr #PAGE_SHIFT @ align address + mov r1, r1, lsr #PAGE_SHIFT + asid r3, r3 @ mask ASID + orr r0, r3, r0, lsl #PAGE_SHIFT @ Create initial MVA + mov r1, r1, lsl #PAGE_SHIFT + vma_vm_flags r2, r2 @ get vma->vm_flags +1: +#ifdef HARVARD_TLB + mcr p15, 0, r0, c8, c6, 1 @ TLB invalidate D MVA (was 1) + tst r2, #VM_EXEC @ Executable area ? + mcrne p15, 0, r0, c8, c5, 1 @ TLB invalidate I MVA (was 1) +#else + mcr p15, 0, r0, c8, c7, 1 @ TLB invalidate MVA (was 1) +#endif + add r0, r0, #PAGE_SZ + cmp r0, r1 + blo 1b + mcr p15, 0, ip, c7, c10, 4 @ data synchronization barrier + mov pc, lr + +/* + * v6wbi_flush_kern_tlb_range(start,end) + * + * Invalidate a range of kernel TLB entries + * + * - start - start address (may not be aligned) + * - end - end address (exclusive, may not be aligned) + */ +ENTRY(v6wbi_flush_kern_tlb_range) + mov r2, #0 + mcr p15, 0, r2, c7, c10, 4 @ drain write buffer + mov r0, r0, lsr #PAGE_SHIFT @ align address + mov r1, r1, lsr #PAGE_SHIFT + mov r0, r0, lsl #PAGE_SHIFT + mov r1, r1, lsl #PAGE_SHIFT +1: +#ifdef HARVARD_TLB + mcr p15, 0, r0, c8, c6, 1 @ TLB invalidate D MVA + mcr p15, 0, r0, c8, c5, 1 @ TLB invalidate I MVA +#else + mcr p15, 0, r0, c8, c7, 1 @ TLB invalidate MVA +#endif + add r0, r0, #PAGE_SZ + cmp r0, r1 + blo 1b + mcr p15, 0, r2, c7, c10, 4 @ data synchronization barrier + mcr p15, 0, r2, c7, c5, 4 @ prefetch flush (isb) + mov pc, lr + + __INIT + + /* define struct cpu_tlb_fns (see <asm/tlbflush.h> and proc-macros.S) */ + define_tlb_functions v6wbi, v6wbi_tlb_flags diff --git a/arch/arm/mm/tlb-v7.S b/arch/arm/mm/tlb-v7.S new file mode 100644 index 00000000..845f461f --- /dev/null +++ b/arch/arm/mm/tlb-v7.S @@ -0,0 +1,83 @@ +/* + * linux/arch/arm/mm/tlb-v7.S + * + * Copyright (C) 1997-2002 Russell King + * Modified for ARMv7 by Catalin Marinas + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * ARM architecture version 6 TLB handling functions. + * These assume a split I/D TLB. + */ +#include <linux/init.h> +#include <linux/linkage.h> +#include <asm/assembler.h> +#include <asm/asm-offsets.h> +#include <asm/page.h> +#include <asm/tlbflush.h> +#include "proc-macros.S" + +/* + * v7wbi_flush_user_tlb_range(start, end, vma) + * + * Invalidate a range of TLB entries in the specified address space. + * + * - start - start address (may not be aligned) + * - end - end address (exclusive, may not be aligned) + * - vma - vma_struct describing address range + * + * It is assumed that: + * - the "Invalidate single entry" instruction will invalidate + * both the I and the D TLBs on Harvard-style TLBs + */ +ENTRY(v7wbi_flush_user_tlb_range) + vma_vm_mm r3, r2 @ get vma->vm_mm + mmid r3, r3 @ get vm_mm->context.id + dsb + mov r0, r0, lsr #PAGE_SHIFT @ align address + mov r1, r1, lsr #PAGE_SHIFT + asid r3, r3 @ mask ASID + orr r0, r3, r0, lsl #PAGE_SHIFT @ Create initial MVA + mov r1, r1, lsl #PAGE_SHIFT +1: + ALT_SMP(mcr p15, 0, r0, c8, c3, 1) @ TLB invalidate U MVA (shareable) + ALT_UP(mcr p15, 0, r0, c8, c7, 1) @ TLB invalidate U MVA + + add r0, r0, #PAGE_SZ + cmp r0, r1 + blo 1b + dsb + mov pc, lr +ENDPROC(v7wbi_flush_user_tlb_range) + +/* + * v7wbi_flush_kern_tlb_range(start,end) + * + * Invalidate a range of kernel TLB entries + * + * - start - start address (may not be aligned) + * - end - end address (exclusive, may not be aligned) + */ +ENTRY(v7wbi_flush_kern_tlb_range) + dsb + mov r0, r0, lsr #PAGE_SHIFT @ align address + mov r1, r1, lsr #PAGE_SHIFT + mov r0, r0, lsl #PAGE_SHIFT + mov r1, r1, lsl #PAGE_SHIFT +1: + ALT_SMP(mcr p15, 0, r0, c8, c3, 1) @ TLB invalidate U MVA (shareable) + ALT_UP(mcr p15, 0, r0, c8, c7, 1) @ TLB invalidate U MVA + add r0, r0, #PAGE_SZ + cmp r0, r1 + blo 1b + dsb + isb + mov pc, lr +ENDPROC(v7wbi_flush_kern_tlb_range) + + __INIT + + /* define struct cpu_tlb_fns (see <asm/tlbflush.h> and proc-macros.S) */ + define_tlb_functions v7wbi, v7wbi_tlb_flags_up, flags_smp=v7wbi_tlb_flags_smp diff --git a/arch/arm/mm/vmregion.c b/arch/arm/mm/vmregion.c new file mode 100644 index 00000000..a631016e --- /dev/null +++ b/arch/arm/mm/vmregion.c @@ -0,0 +1,205 @@ +#include <linux/fs.h> +#include <linux/spinlock.h> +#include <linux/list.h> +#include <linux/proc_fs.h> +#include <linux/seq_file.h> +#include <linux/slab.h> + +#include "vmregion.h" + +/* + * VM region handling support. + * + * This should become something generic, handling VM region allocations for + * vmalloc and similar (ioremap, module space, etc). + * + * I envisage vmalloc()'s supporting vm_struct becoming: + * + * struct vm_struct { + * struct vmregion region; + * unsigned long flags; + * struct page **pages; + * unsigned int nr_pages; + * unsigned long phys_addr; + * }; + * + * get_vm_area() would then call vmregion_alloc with an appropriate + * struct vmregion head (eg): + * + * struct vmregion vmalloc_head = { + * .vm_list = LIST_HEAD_INIT(vmalloc_head.vm_list), + * .vm_start = VMALLOC_START, + * .vm_end = VMALLOC_END, + * }; + * + * However, vmalloc_head.vm_start is variable (typically, it is dependent on + * the amount of RAM found at boot time.) I would imagine that get_vm_area() + * would have to initialise this each time prior to calling vmregion_alloc(). + */ + +struct arm_vmregion * +arm_vmregion_alloc(struct arm_vmregion_head *head, size_t align, + size_t size, gfp_t gfp, const void *caller) +{ + unsigned long start = head->vm_start, addr = head->vm_end; + unsigned long flags; + struct arm_vmregion *c, *new; + + if (head->vm_end - head->vm_start < size) { + printk(KERN_WARNING "%s: allocation too big (requested %#x)\n", + __func__, size); + goto out; + } + + new = kmalloc(sizeof(struct arm_vmregion), gfp); + if (!new) + goto out; + + new->caller = caller; + + spin_lock_irqsave(&head->vm_lock, flags); + + addr = rounddown(addr - size, align); + list_for_each_entry_reverse(c, &head->vm_list, vm_list) { + if (addr >= c->vm_end) + goto found; + addr = rounddown(c->vm_start - size, align); + if (addr < start) + goto nospc; + } + + found: + /* + * Insert this entry after the one we found. + */ + list_add(&new->vm_list, &c->vm_list); + new->vm_start = addr; + new->vm_end = addr + size; + new->vm_active = 1; + + spin_unlock_irqrestore(&head->vm_lock, flags); + return new; + + nospc: + spin_unlock_irqrestore(&head->vm_lock, flags); + kfree(new); + out: + return NULL; +} + +static struct arm_vmregion *__arm_vmregion_find(struct arm_vmregion_head *head, unsigned long addr) +{ + struct arm_vmregion *c; + + list_for_each_entry(c, &head->vm_list, vm_list) { + if (c->vm_active && c->vm_start == addr) + goto out; + } + c = NULL; + out: + return c; +} + +struct arm_vmregion *arm_vmregion_find(struct arm_vmregion_head *head, unsigned long addr) +{ + struct arm_vmregion *c; + unsigned long flags; + + spin_lock_irqsave(&head->vm_lock, flags); + c = __arm_vmregion_find(head, addr); + spin_unlock_irqrestore(&head->vm_lock, flags); + return c; +} + +struct arm_vmregion *arm_vmregion_find_remove(struct arm_vmregion_head *head, unsigned long addr) +{ + struct arm_vmregion *c; + unsigned long flags; + + spin_lock_irqsave(&head->vm_lock, flags); + c = __arm_vmregion_find(head, addr); + if (c) + c->vm_active = 0; + spin_unlock_irqrestore(&head->vm_lock, flags); + return c; +} + +void arm_vmregion_free(struct arm_vmregion_head *head, struct arm_vmregion *c) +{ + unsigned long flags; + + spin_lock_irqsave(&head->vm_lock, flags); + list_del(&c->vm_list); + spin_unlock_irqrestore(&head->vm_lock, flags); + + kfree(c); +} + +#ifdef CONFIG_PROC_FS +static int arm_vmregion_show(struct seq_file *m, void *p) +{ + struct arm_vmregion *c = list_entry(p, struct arm_vmregion, vm_list); + + seq_printf(m, "0x%08lx-0x%08lx %7lu", c->vm_start, c->vm_end, + c->vm_end - c->vm_start); + if (c->caller) + seq_printf(m, " %pS", (void *)c->caller); + seq_putc(m, '\n'); + return 0; +} + +static void *arm_vmregion_start(struct seq_file *m, loff_t *pos) +{ + struct arm_vmregion_head *h = m->private; + spin_lock_irq(&h->vm_lock); + return seq_list_start(&h->vm_list, *pos); +} + +static void *arm_vmregion_next(struct seq_file *m, void *p, loff_t *pos) +{ + struct arm_vmregion_head *h = m->private; + return seq_list_next(p, &h->vm_list, pos); +} + +static void arm_vmregion_stop(struct seq_file *m, void *p) +{ + struct arm_vmregion_head *h = m->private; + spin_unlock_irq(&h->vm_lock); +} + +static const struct seq_operations arm_vmregion_ops = { + .start = arm_vmregion_start, + .stop = arm_vmregion_stop, + .next = arm_vmregion_next, + .show = arm_vmregion_show, +}; + +static int arm_vmregion_open(struct inode *inode, struct file *file) +{ + struct arm_vmregion_head *h = PDE(inode)->data; + int ret = seq_open(file, &arm_vmregion_ops); + if (!ret) { + struct seq_file *m = file->private_data; + m->private = h; + } + return ret; +} + +static const struct file_operations arm_vmregion_fops = { + .open = arm_vmregion_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + +int arm_vmregion_create_proc(const char *path, struct arm_vmregion_head *h) +{ + proc_create_data(path, S_IRUSR, NULL, &arm_vmregion_fops, h); + return 0; +} +#else +int arm_vmregion_create_proc(const char *path, struct arm_vmregion_head *h) +{ + return 0; +} +#endif diff --git a/arch/arm/mm/vmregion.h b/arch/arm/mm/vmregion.h new file mode 100644 index 00000000..162be662 --- /dev/null +++ b/arch/arm/mm/vmregion.h @@ -0,0 +1,32 @@ +#ifndef VMREGION_H +#define VMREGION_H + +#include <linux/spinlock.h> +#include <linux/list.h> + +struct page; + +struct arm_vmregion_head { + spinlock_t vm_lock; + struct list_head vm_list; + unsigned long vm_start; + unsigned long vm_end; +}; + +struct arm_vmregion { + struct list_head vm_list; + unsigned long vm_start; + unsigned long vm_end; + struct page *vm_pages; + int vm_active; + const void *caller; +}; + +struct arm_vmregion *arm_vmregion_alloc(struct arm_vmregion_head *, size_t, size_t, gfp_t, const void *); +struct arm_vmregion *arm_vmregion_find(struct arm_vmregion_head *, unsigned long); +struct arm_vmregion *arm_vmregion_find_remove(struct arm_vmregion_head *, unsigned long); +void arm_vmregion_free(struct arm_vmregion_head *, struct arm_vmregion *); + +int arm_vmregion_create_proc(const char *, struct arm_vmregion_head *); + +#endif |