summaryrefslogtreecommitdiff
path: root/init
diff options
context:
space:
mode:
authorSrikant Patnaik2015-01-11 12:28:04 +0530
committerSrikant Patnaik2015-01-11 12:28:04 +0530
commit871480933a1c28f8a9fed4c4d34d06c439a7a422 (patch)
tree8718f573808810c2a1e8cb8fb6ac469093ca2784 /init
parent9d40ac5867b9aefe0722bc1f110b965ff294d30d (diff)
downloadFOSSEE-netbook-kernel-source-871480933a1c28f8a9fed4c4d34d06c439a7a422.tar.gz
FOSSEE-netbook-kernel-source-871480933a1c28f8a9fed4c4d34d06c439a7a422.tar.bz2
FOSSEE-netbook-kernel-source-871480933a1c28f8a9fed4c4d34d06c439a7a422.zip
Moved, renamed, and deleted files
The original directory structure was scattered and unorganized. Changes are basically to make it look like kernel structure.
Diffstat (limited to 'init')
-rw-r--r--init/Kconfig1445
-rw-r--r--init/Makefile32
-rw-r--r--init/calibrate.c301
-rw-r--r--init/do_mounts.c560
-rw-r--r--init/do_mounts.h76
-rw-r--r--init/do_mounts_initrd.c125
-rw-r--r--init/do_mounts_md.c302
-rw-r--r--init/do_mounts_rd.c340
-rw-r--r--init/initramfs.c611
-rw-r--r--init/main.c894
-rw-r--r--init/noinitramfs.c52
-rw-r--r--init/version.c48
12 files changed, 4786 insertions, 0 deletions
diff --git a/init/Kconfig b/init/Kconfig
new file mode 100644
index 00000000..a7cffc83
--- /dev/null
+++ b/init/Kconfig
@@ -0,0 +1,1445 @@
+config ARCH
+ string
+ option env="ARCH"
+
+config KERNELVERSION
+ string
+ option env="KERNELVERSION"
+
+config DEFCONFIG_LIST
+ string
+ depends on !UML
+ option defconfig_list
+ default "/lib/modules/$UNAME_RELEASE/.config"
+ default "/etc/kernel-config"
+ default "/boot/config-$UNAME_RELEASE"
+ default "$ARCH_DEFCONFIG"
+ default "arch/$ARCH/defconfig"
+
+config CONSTRUCTORS
+ bool
+ depends on !UML
+
+config HAVE_IRQ_WORK
+ bool
+
+config IRQ_WORK
+ bool
+ depends on HAVE_IRQ_WORK
+
+menu "General setup"
+
+config EXPERIMENTAL
+ bool "Prompt for development and/or incomplete code/drivers"
+ ---help---
+ Some of the various things that Linux supports (such as network
+ drivers, file systems, network protocols, etc.) can be in a state
+ of development where the functionality, stability, or the level of
+ testing is not yet high enough for general use. This is usually
+ known as the "alpha-test" phase among developers. If a feature is
+ currently in alpha-test, then the developers usually discourage
+ uninformed widespread use of this feature by the general public to
+ avoid "Why doesn't this work?" type mail messages. However, active
+ testing and use of these systems is welcomed. Just be aware that it
+ may not meet the normal level of reliability or it may fail to work
+ in some special cases. Detailed bug reports from people familiar
+ with the kernel internals are usually welcomed by the developers
+ (before submitting bug reports, please read the documents
+ <file:README>, <file:MAINTAINERS>, <file:REPORTING-BUGS>,
+ <file:Documentation/BUG-HUNTING>, and
+ <file:Documentation/oops-tracing.txt> in the kernel source).
+
+ This option will also make obsoleted drivers available. These are
+ drivers that have been replaced by something else, and/or are
+ scheduled to be removed in a future kernel release.
+
+ Unless you intend to help test and develop a feature or driver that
+ falls into this category, or you have a situation that requires
+ using these features, you should probably say N here, which will
+ cause the configurator to present you with fewer choices. If
+ you say Y here, you will be offered the choice of using features or
+ drivers that are currently considered to be in the alpha-test phase.
+
+config BROKEN
+ bool
+
+config BROKEN_ON_SMP
+ bool
+ depends on BROKEN || !SMP
+ default y
+
+config INIT_ENV_ARG_LIMIT
+ int
+ default 32 if !UML
+ default 128 if UML
+ help
+ Maximum of each of the number of arguments and environment
+ variables passed to init from the kernel command line.
+
+
+config CROSS_COMPILE
+ string "Cross-compiler tool prefix"
+ help
+ Same as running 'make CROSS_COMPILE=prefix-' but stored for
+ default make runs in this kernel build directory. You don't
+ need to set this unless you want the configured kernel build
+ directory to select the cross-compiler automatically.
+
+config LOCALVERSION
+ string "Local version - append to kernel release"
+ help
+ Append an extra string to the end of your kernel version.
+ This will show up when you type uname, for example.
+ The string you set here will be appended after the contents of
+ any files with a filename matching localversion* in your
+ object and source tree, in that order. Your total string can
+ be a maximum of 64 characters.
+
+config LOCALVERSION_AUTO
+ bool "Automatically append version information to the version string"
+ default y
+ help
+ This will try to automatically determine if the current tree is a
+ release tree by looking for git tags that belong to the current
+ top of tree revision.
+
+ A string of the format -gxxxxxxxx will be added to the localversion
+ if a git-based tree is found. The string generated by this will be
+ appended after any matching localversion* files, and after the value
+ set in CONFIG_LOCALVERSION.
+
+ (The actual string used here is the first eight characters produced
+ by running the command:
+
+ $ git rev-parse --verify HEAD
+
+ which is done within the script "scripts/setlocalversion".)
+
+config HAVE_KERNEL_GZIP
+ bool
+
+config HAVE_KERNEL_BZIP2
+ bool
+
+config HAVE_KERNEL_LZMA
+ bool
+
+config HAVE_KERNEL_XZ
+ bool
+
+config HAVE_KERNEL_LZO
+ bool
+
+choice
+ prompt "Kernel compression mode"
+ default KERNEL_GZIP
+ depends on HAVE_KERNEL_GZIP || HAVE_KERNEL_BZIP2 || HAVE_KERNEL_LZMA || HAVE_KERNEL_XZ || HAVE_KERNEL_LZO
+ help
+ The linux kernel is a kind of self-extracting executable.
+ Several compression algorithms are available, which differ
+ in efficiency, compression and decompression speed.
+ Compression speed is only relevant when building a kernel.
+ Decompression speed is relevant at each boot.
+
+ If you have any problems with bzip2 or lzma compressed
+ kernels, mail me (Alain Knaff) <alain@knaff.lu>. (An older
+ version of this functionality (bzip2 only), for 2.4, was
+ supplied by Christian Ludwig)
+
+ High compression options are mostly useful for users, who
+ are low on disk space (embedded systems), but for whom ram
+ size matters less.
+
+ If in doubt, select 'gzip'
+
+config KERNEL_GZIP
+ bool "Gzip"
+ depends on HAVE_KERNEL_GZIP
+ help
+ The old and tried gzip compression. It provides a good balance
+ between compression ratio and decompression speed.
+
+config KERNEL_BZIP2
+ bool "Bzip2"
+ depends on HAVE_KERNEL_BZIP2
+ help
+ Its compression ratio and speed is intermediate.
+ Decompression speed is slowest among the three. The kernel
+ size is about 10% smaller with bzip2, in comparison to gzip.
+ Bzip2 uses a large amount of memory. For modern kernels you
+ will need at least 8MB RAM or more for booting.
+
+config KERNEL_LZMA
+ bool "LZMA"
+ depends on HAVE_KERNEL_LZMA
+ help
+ The most recent compression algorithm.
+ Its ratio is best, decompression speed is between the other
+ two. Compression is slowest. The kernel size is about 33%
+ smaller with LZMA in comparison to gzip.
+
+config KERNEL_XZ
+ bool "XZ"
+ depends on HAVE_KERNEL_XZ
+ help
+ XZ uses the LZMA2 algorithm and instruction set specific
+ BCJ filters which can improve compression ratio of executable
+ code. The size of the kernel is about 30% smaller with XZ in
+ comparison to gzip. On architectures for which there is a BCJ
+ filter (i386, x86_64, ARM, IA-64, PowerPC, and SPARC), XZ
+ will create a few percent smaller kernel than plain LZMA.
+
+ The speed is about the same as with LZMA: The decompression
+ speed of XZ is better than that of bzip2 but worse than gzip
+ and LZO. Compression is slow.
+
+config KERNEL_LZO
+ bool "LZO"
+ depends on HAVE_KERNEL_LZO
+ help
+ Its compression ratio is the poorest among the 4. The kernel
+ size is about 10% bigger than gzip; however its speed
+ (both compression and decompression) is the fastest.
+
+endchoice
+
+config DEFAULT_HOSTNAME
+ string "Default hostname"
+ default "(none)"
+ help
+ This option determines the default system hostname before userspace
+ calls sethostname(2). The kernel traditionally uses "(none)" here,
+ but you may wish to use a different default here to make a minimal
+ system more usable with less configuration.
+
+config SWAP
+ bool "Support for paging of anonymous memory (swap)"
+ depends on MMU && BLOCK
+ default y
+ help
+ This option allows you to choose whether you want to have support
+ for so called swap devices or swap files in your kernel that are
+ used to provide more virtual memory than the actual RAM present
+ in your computer. If unsure say Y.
+
+config SYSVIPC
+ bool "System V IPC"
+ ---help---
+ Inter Process Communication is a suite of library functions and
+ system calls which let processes (running programs) synchronize and
+ exchange information. It is generally considered to be a good thing,
+ and some programs won't run unless you say Y here. In particular, if
+ you want to run the DOS emulator dosemu under Linux (read the
+ DOSEMU-HOWTO, available from <http://www.tldp.org/docs.html#howto>),
+ you'll need to say Y here.
+
+ You can find documentation about IPC with "info ipc" and also in
+ section 6.4 of the Linux Programmer's Guide, available from
+ <http://www.tldp.org/guides.html>.
+
+config SYSVIPC_SYSCTL
+ bool
+ depends on SYSVIPC
+ depends on SYSCTL
+ default y
+
+config POSIX_MQUEUE
+ bool "POSIX Message Queues"
+ depends on NET && EXPERIMENTAL
+ ---help---
+ POSIX variant of message queues is a part of IPC. In POSIX message
+ queues every message has a priority which decides about succession
+ of receiving it by a process. If you want to compile and run
+ programs written e.g. for Solaris with use of its POSIX message
+ queues (functions mq_*) say Y here.
+
+ POSIX message queues are visible as a filesystem called 'mqueue'
+ and can be mounted somewhere if you want to do filesystem
+ operations on message queues.
+
+ If unsure, say Y.
+
+config POSIX_MQUEUE_SYSCTL
+ bool
+ depends on POSIX_MQUEUE
+ depends on SYSCTL
+ default y
+
+config BSD_PROCESS_ACCT
+ bool "BSD Process Accounting"
+ help
+ If you say Y here, a user level program will be able to instruct the
+ kernel (via a special system call) to write process accounting
+ information to a file: whenever a process exits, information about
+ that process will be appended to the file by the kernel. The
+ information includes things such as creation time, owning user,
+ command name, memory usage, controlling terminal etc. (the complete
+ list is in the struct acct in <file:include/linux/acct.h>). It is
+ up to the user level program to do useful things with this
+ information. This is generally a good idea, so say Y.
+
+config BSD_PROCESS_ACCT_V3
+ bool "BSD Process Accounting version 3 file format"
+ depends on BSD_PROCESS_ACCT
+ default n
+ help
+ If you say Y here, the process accounting information is written
+ in a new file format that also logs the process IDs of each
+ process and it's parent. Note that this file format is incompatible
+ with previous v0/v1/v2 file formats, so you will need updated tools
+ for processing it. A preliminary version of these tools is available
+ at <http://www.gnu.org/software/acct/>.
+
+config FHANDLE
+ bool "open by fhandle syscalls"
+ select EXPORTFS
+ help
+ If you say Y here, a user level program will be able to map
+ file names to handle and then later use the handle for
+ different file system operations. This is useful in implementing
+ userspace file servers, which now track files using handles instead
+ of names. The handle would remain the same even if file names
+ get renamed. Enables open_by_handle_at(2) and name_to_handle_at(2)
+ syscalls.
+
+config TASKSTATS
+ bool "Export task/process statistics through netlink (EXPERIMENTAL)"
+ depends on NET
+ default n
+ help
+ Export selected statistics for tasks/processes through the
+ generic netlink interface. Unlike BSD process accounting, the
+ statistics are available during the lifetime of tasks/processes as
+ responses to commands. Like BSD accounting, they are sent to user
+ space on task exit.
+
+ Say N if unsure.
+
+config TASK_DELAY_ACCT
+ bool "Enable per-task delay accounting (EXPERIMENTAL)"
+ depends on TASKSTATS
+ help
+ Collect information on time spent by a task waiting for system
+ resources like cpu, synchronous block I/O completion and swapping
+ in pages. Such statistics can help in setting a task's priorities
+ relative to other tasks for cpu, io, rss limits etc.
+
+ Say N if unsure.
+
+config TASK_XACCT
+ bool "Enable extended accounting over taskstats (EXPERIMENTAL)"
+ depends on TASKSTATS
+ help
+ Collect extended task accounting data and send the data
+ to userland for processing over the taskstats interface.
+
+ Say N if unsure.
+
+config TASK_IO_ACCOUNTING
+ bool "Enable per-task storage I/O accounting (EXPERIMENTAL)"
+ depends on TASK_XACCT
+ help
+ Collect information on the number of bytes of storage I/O which this
+ task has caused.
+
+ Say N if unsure.
+
+config AUDIT
+ bool "Auditing support"
+ depends on NET
+ help
+ Enable auditing infrastructure that can be used with another
+ kernel subsystem, such as SELinux (which requires this for
+ logging of avc messages output). Does not do system-call
+ auditing without CONFIG_AUDITSYSCALL.
+
+config AUDITSYSCALL
+ bool "Enable system-call auditing support"
+ depends on AUDIT && (X86 || PPC || S390 || IA64 || UML || SPARC64 || SUPERH || ARM)
+ default y if SECURITY_SELINUX
+ help
+ Enable low-overhead system-call auditing infrastructure that
+ can be used independently or with another kernel subsystem,
+ such as SELinux.
+
+config AUDIT_WATCH
+ def_bool y
+ depends on AUDITSYSCALL
+ select FSNOTIFY
+
+config AUDIT_TREE
+ def_bool y
+ depends on AUDITSYSCALL
+ select FSNOTIFY
+
+config AUDIT_LOGINUID_IMMUTABLE
+ bool "Make audit loginuid immutable"
+ depends on AUDIT
+ help
+ The config option toggles if a task setting its loginuid requires
+ CAP_SYS_AUDITCONTROL or if that task should require no special permissions
+ but should instead only allow setting its loginuid if it was never
+ previously set. On systems which use systemd or a similar central
+ process to restart login services this should be set to true. On older
+ systems in which an admin would typically have to directly stop and
+ start processes this should be set to false. Setting this to true allows
+ one to drop potentially dangerous capabilites from the login tasks,
+ but may not be backwards compatible with older init systems.
+
+source "kernel/irq/Kconfig"
+
+menu "RCU Subsystem"
+
+choice
+ prompt "RCU Implementation"
+ default TREE_RCU
+
+config TREE_RCU
+ bool "Tree-based hierarchical RCU"
+ depends on !PREEMPT && SMP
+ help
+ This option selects the RCU implementation that is
+ designed for very large SMP system with hundreds or
+ thousands of CPUs. It also scales down nicely to
+ smaller systems.
+
+config TREE_PREEMPT_RCU
+ bool "Preemptible tree-based hierarchical RCU"
+ depends on PREEMPT && SMP
+ help
+ This option selects the RCU implementation that is
+ designed for very large SMP systems with hundreds or
+ thousands of CPUs, but for which real-time response
+ is also required. It also scales down nicely to
+ smaller systems.
+
+config TINY_RCU
+ bool "UP-only small-memory-footprint RCU"
+ depends on !PREEMPT && !SMP
+ help
+ This option selects the RCU implementation that is
+ designed for UP systems from which real-time response
+ is not required. This option greatly reduces the
+ memory footprint of RCU.
+
+config TINY_PREEMPT_RCU
+ bool "Preemptible UP-only small-memory-footprint RCU"
+ depends on PREEMPT && !SMP
+ help
+ This option selects the RCU implementation that is designed
+ for real-time UP systems. This option greatly reduces the
+ memory footprint of RCU.
+
+endchoice
+
+config PREEMPT_RCU
+ def_bool ( TREE_PREEMPT_RCU || TINY_PREEMPT_RCU )
+ help
+ This option enables preemptible-RCU code that is common between
+ the TREE_PREEMPT_RCU and TINY_PREEMPT_RCU implementations.
+
+config RCU_FANOUT
+ int "Tree-based hierarchical RCU fanout value"
+ range 2 64 if 64BIT
+ range 2 32 if !64BIT
+ depends on TREE_RCU || TREE_PREEMPT_RCU
+ default 64 if 64BIT
+ default 32 if !64BIT
+ help
+ This option controls the fanout of hierarchical implementations
+ of RCU, allowing RCU to work efficiently on machines with
+ large numbers of CPUs. This value must be at least the fourth
+ root of NR_CPUS, which allows NR_CPUS to be insanely large.
+ The default value of RCU_FANOUT should be used for production
+ systems, but if you are stress-testing the RCU implementation
+ itself, small RCU_FANOUT values allow you to test large-system
+ code paths on small(er) systems.
+
+ Select a specific number if testing RCU itself.
+ Take the default if unsure.
+
+config RCU_FANOUT_EXACT
+ bool "Disable tree-based hierarchical RCU auto-balancing"
+ depends on TREE_RCU || TREE_PREEMPT_RCU
+ default n
+ help
+ This option forces use of the exact RCU_FANOUT value specified,
+ regardless of imbalances in the hierarchy. This is useful for
+ testing RCU itself, and might one day be useful on systems with
+ strong NUMA behavior.
+
+ Without RCU_FANOUT_EXACT, the code will balance the hierarchy.
+
+ Say N if unsure.
+
+config RCU_FAST_NO_HZ
+ bool "Accelerate last non-dyntick-idle CPU's grace periods"
+ depends on NO_HZ && SMP
+ default n
+ help
+ This option causes RCU to attempt to accelerate grace periods
+ in order to allow CPUs to enter dynticks-idle state more
+ quickly. On the other hand, this option increases the overhead
+ of the dynticks-idle checking, particularly on systems with
+ large numbers of CPUs.
+
+ Say Y if energy efficiency is critically important, particularly
+ if you have relatively few CPUs.
+
+ Say N if you are unsure.
+
+config TREE_RCU_TRACE
+ def_bool RCU_TRACE && ( TREE_RCU || TREE_PREEMPT_RCU )
+ select DEBUG_FS
+ help
+ This option provides tracing for the TREE_RCU and
+ TREE_PREEMPT_RCU implementations, permitting Makefile to
+ trivially select kernel/rcutree_trace.c.
+
+config RCU_BOOST
+ bool "Enable RCU priority boosting"
+ depends on RT_MUTEXES && PREEMPT_RCU
+ default n
+ help
+ This option boosts the priority of preempted RCU readers that
+ block the current preemptible RCU grace period for too long.
+ This option also prevents heavy loads from blocking RCU
+ callback invocation for all flavors of RCU.
+
+ Say Y here if you are working with real-time apps or heavy loads
+ Say N here if you are unsure.
+
+config RCU_BOOST_PRIO
+ int "Real-time priority to boost RCU readers to"
+ range 1 99
+ depends on RCU_BOOST
+ default 1
+ help
+ This option specifies the real-time priority to which preempted
+ RCU readers are to be boosted. If you are working with CPU-bound
+ real-time applications, you should specify a priority higher then
+ the highest-priority CPU-bound application.
+
+ Specify the real-time priority, or take the default if unsure.
+
+config RCU_BOOST_DELAY
+ int "Milliseconds to delay boosting after RCU grace-period start"
+ range 0 3000
+ depends on RCU_BOOST
+ default 500
+ help
+ This option specifies the time to wait after the beginning of
+ a given grace period before priority-boosting preempted RCU
+ readers blocking that grace period. Note that any RCU reader
+ blocking an expedited RCU grace period is boosted immediately.
+
+ Accept the default if unsure.
+
+endmenu # "RCU Subsystem"
+
+config IKCONFIG
+ tristate "Kernel .config support"
+ ---help---
+ This option enables the complete Linux kernel ".config" file
+ contents to be saved in the kernel. It provides documentation
+ of which kernel options are used in a running kernel or in an
+ on-disk kernel. This information can be extracted from the kernel
+ image file with the script scripts/extract-ikconfig and used as
+ input to rebuild the current kernel or to build another kernel.
+ It can also be extracted from a running kernel by reading
+ /proc/config.gz if enabled (below).
+
+config IKCONFIG_PROC
+ bool "Enable access to .config through /proc/config.gz"
+ depends on IKCONFIG && PROC_FS
+ ---help---
+ This option enables access to the kernel configuration file
+ through /proc/config.gz.
+
+config LOG_BUF_SHIFT
+ int "Kernel log buffer size (16 => 64KB, 17 => 128KB)"
+ range 12 21
+ default 17
+ help
+ Select kernel log buffer size as a power of 2.
+ Examples:
+ 17 => 128 KB
+ 16 => 64 KB
+ 15 => 32 KB
+ 14 => 16 KB
+ 13 => 8 KB
+ 12 => 4 KB
+
+#
+# Architectures with an unreliable sched_clock() should select this:
+#
+config HAVE_UNSTABLE_SCHED_CLOCK
+ bool
+
+menuconfig CGROUPS
+ boolean "Control Group support"
+ depends on EVENTFD
+ help
+ This option adds support for grouping sets of processes together, for
+ use with process control subsystems such as Cpusets, CFS, memory
+ controls or device isolation.
+ See
+ - Documentation/scheduler/sched-design-CFS.txt (CFS)
+ - Documentation/cgroups/ (features for grouping, isolation
+ and resource control)
+
+ Say N if unsure.
+
+if CGROUPS
+
+config CGROUP_DEBUG
+ bool "Example debug cgroup subsystem"
+ default n
+ help
+ This option enables a simple cgroup subsystem that
+ exports useful debugging information about the cgroups
+ framework.
+
+ Say N if unsure.
+
+config CGROUP_FREEZER
+ bool "Freezer cgroup subsystem"
+ help
+ Provides a way to freeze and unfreeze all tasks in a
+ cgroup.
+
+config CGROUP_DEVICE
+ bool "Device controller for cgroups"
+ help
+ Provides a cgroup implementing whitelists for devices which
+ a process in the cgroup can mknod or open.
+
+config CPUSETS
+ bool "Cpuset support"
+ help
+ This option will let you create and manage CPUSETs which
+ allow dynamically partitioning a system into sets of CPUs and
+ Memory Nodes and assigning tasks to run only within those sets.
+ This is primarily useful on large SMP or NUMA systems.
+
+ Say N if unsure.
+
+config PROC_PID_CPUSET
+ bool "Include legacy /proc/<pid>/cpuset file"
+ depends on CPUSETS
+ default y
+
+config CGROUP_CPUACCT
+ bool "Simple CPU accounting cgroup subsystem"
+ help
+ Provides a simple Resource Controller for monitoring the
+ total CPU consumed by the tasks in a cgroup.
+
+config RESOURCE_COUNTERS
+ bool "Resource counters"
+ help
+ This option enables controller independent resource accounting
+ infrastructure that works with cgroups.
+
+config CGROUP_MEM_RES_CTLR
+ bool "Memory Resource Controller for Control Groups"
+ depends on RESOURCE_COUNTERS
+ select MM_OWNER
+ help
+ Provides a memory resource controller that manages both anonymous
+ memory and page cache. (See Documentation/cgroups/memory.txt)
+
+ Note that setting this option increases fixed memory overhead
+ associated with each page of memory in the system. By this,
+ 20(40)bytes/PAGE_SIZE on 32(64)bit system will be occupied by memory
+ usage tracking struct at boot. Total amount of this is printed out
+ at boot.
+
+ Only enable when you're ok with these trade offs and really
+ sure you need the memory resource controller. Even when you enable
+ this, you can set "cgroup_disable=memory" at your boot option to
+ disable memory resource controller and you can avoid overheads.
+ (and lose benefits of memory resource controller)
+
+ This config option also selects MM_OWNER config option, which
+ could in turn add some fork/exit overhead.
+
+config CGROUP_MEM_RES_CTLR_SWAP
+ bool "Memory Resource Controller Swap Extension"
+ depends on CGROUP_MEM_RES_CTLR && SWAP
+ help
+ Add swap management feature to memory resource controller. When you
+ enable this, you can limit mem+swap usage per cgroup. In other words,
+ when you disable this, memory resource controller has no cares to
+ usage of swap...a process can exhaust all of the swap. This extension
+ is useful when you want to avoid exhaustion swap but this itself
+ adds more overheads and consumes memory for remembering information.
+ Especially if you use 32bit system or small memory system, please
+ be careful about enabling this. When memory resource controller
+ is disabled by boot option, this will be automatically disabled and
+ there will be no overhead from this. Even when you set this config=y,
+ if boot option "swapaccount=0" is set, swap will not be accounted.
+ Now, memory usage of swap_cgroup is 2 bytes per entry. If swap page
+ size is 4096bytes, 512k per 1Gbytes of swap.
+config CGROUP_MEM_RES_CTLR_SWAP_ENABLED
+ bool "Memory Resource Controller Swap Extension enabled by default"
+ depends on CGROUP_MEM_RES_CTLR_SWAP
+ default y
+ help
+ Memory Resource Controller Swap Extension comes with its price in
+ a bigger memory consumption. General purpose distribution kernels
+ which want to enable the feature but keep it disabled by default
+ and let the user enable it by swapaccount boot command line
+ parameter should have this option unselected.
+ For those who want to have the feature enabled by default should
+ select this option (if, for some reason, they need to disable it
+ then swapaccount=0 does the trick).
+config CGROUP_MEM_RES_CTLR_KMEM
+ bool "Memory Resource Controller Kernel Memory accounting (EXPERIMENTAL)"
+ depends on CGROUP_MEM_RES_CTLR && EXPERIMENTAL
+ default n
+ help
+ The Kernel Memory extension for Memory Resource Controller can limit
+ the amount of memory used by kernel objects in the system. Those are
+ fundamentally different from the entities handled by the standard
+ Memory Controller, which are page-based, and can be swapped. Users of
+ the kmem extension can use it to guarantee that no group of processes
+ will ever exhaust kernel resources alone.
+
+config CGROUP_PERF
+ bool "Enable perf_event per-cpu per-container group (cgroup) monitoring"
+ depends on PERF_EVENTS && CGROUPS
+ help
+ This option extends the per-cpu mode to restrict monitoring to
+ threads which belong to the cgroup specified and run on the
+ designated cpu.
+
+ Say N if unsure.
+
+menuconfig CGROUP_SCHED
+ bool "Group CPU scheduler"
+ default n
+ help
+ This feature lets CPU scheduler recognize task groups and control CPU
+ bandwidth allocation to such task groups. It uses cgroups to group
+ tasks.
+
+if CGROUP_SCHED
+config FAIR_GROUP_SCHED
+ bool "Group scheduling for SCHED_OTHER"
+ depends on CGROUP_SCHED
+ default CGROUP_SCHED
+
+config CFS_BANDWIDTH
+ bool "CPU bandwidth provisioning for FAIR_GROUP_SCHED"
+ depends on EXPERIMENTAL
+ depends on FAIR_GROUP_SCHED
+ default n
+ help
+ This option allows users to define CPU bandwidth rates (limits) for
+ tasks running within the fair group scheduler. Groups with no limit
+ set are considered to be unconstrained and will run with no
+ restriction.
+ See tip/Documentation/scheduler/sched-bwc.txt for more information.
+
+config RT_GROUP_SCHED
+ bool "Group scheduling for SCHED_RR/FIFO"
+ depends on EXPERIMENTAL
+ depends on CGROUP_SCHED
+ default n
+ help
+ This feature lets you explicitly allocate real CPU bandwidth
+ to task groups. If enabled, it will also make it impossible to
+ schedule realtime tasks for non-root users until you allocate
+ realtime bandwidth for them.
+ See Documentation/scheduler/sched-rt-group.txt for more information.
+
+endif #CGROUP_SCHED
+
+config BLK_CGROUP
+ tristate "Block IO controller"
+ depends on BLOCK
+ default n
+ ---help---
+ Generic block IO controller cgroup interface. This is the common
+ cgroup interface which should be used by various IO controlling
+ policies.
+
+ Currently, CFQ IO scheduler uses it to recognize task groups and
+ control disk bandwidth allocation (proportional time slice allocation)
+ to such task groups. It is also used by bio throttling logic in
+ block layer to implement upper limit in IO rates on a device.
+
+ This option only enables generic Block IO controller infrastructure.
+ One needs to also enable actual IO controlling logic/policy. For
+ enabling proportional weight division of disk bandwidth in CFQ, set
+ CONFIG_CFQ_GROUP_IOSCHED=y; for enabling throttling policy, set
+ CONFIG_BLK_DEV_THROTTLING=y.
+
+ See Documentation/cgroups/blkio-controller.txt for more information.
+
+config DEBUG_BLK_CGROUP
+ bool "Enable Block IO controller debugging"
+ depends on BLK_CGROUP
+ default n
+ ---help---
+ Enable some debugging help. Currently it exports additional stat
+ files in a cgroup which can be useful for debugging.
+
+endif # CGROUPS
+
+config CHECKPOINT_RESTORE
+ bool "Checkpoint/restore support" if EXPERT
+ default n
+ help
+ Enables additional kernel features in a sake of checkpoint/restore.
+ In particular it adds auxiliary prctl codes to setup process text,
+ data and heap segment sizes, and a few additional /proc filesystem
+ entries.
+
+ If unsure, say N here.
+
+menuconfig NAMESPACES
+ bool "Namespaces support" if EXPERT
+ default !EXPERT
+ help
+ Provides the way to make tasks work with different objects using
+ the same id. For example same IPC id may refer to different objects
+ or same user id or pid may refer to different tasks when used in
+ different namespaces.
+
+if NAMESPACES
+
+config UTS_NS
+ bool "UTS namespace"
+ default y
+ help
+ In this namespace tasks see different info provided with the
+ uname() system call
+
+config IPC_NS
+ bool "IPC namespace"
+ depends on (SYSVIPC || POSIX_MQUEUE)
+ default y
+ help
+ In this namespace tasks work with IPC ids which correspond to
+ different IPC objects in different namespaces.
+
+config USER_NS
+ bool "User namespace (EXPERIMENTAL)"
+ depends on EXPERIMENTAL
+ default y
+ help
+ This allows containers, i.e. vservers, to use user namespaces
+ to provide different user info for different servers.
+ If unsure, say N.
+
+config PID_NS
+ bool "PID Namespaces"
+ default y
+ help
+ Support process id namespaces. This allows having multiple
+ processes with the same pid as long as they are in different
+ pid namespaces. This is a building block of containers.
+
+config NET_NS
+ bool "Network namespace"
+ depends on NET
+ default y
+ help
+ Allow user space to create what appear to be multiple instances
+ of the network stack.
+
+endif # NAMESPACES
+
+config SCHED_AUTOGROUP
+ bool "Automatic process group scheduling"
+ select EVENTFD
+ select CGROUPS
+ select CGROUP_SCHED
+ select FAIR_GROUP_SCHED
+ help
+ This option optimizes the scheduler for common desktop workloads by
+ automatically creating and populating task groups. This separation
+ of workloads isolates aggressive CPU burners (like build jobs) from
+ desktop applications. Task group autogeneration is currently based
+ upon task session.
+
+config MM_OWNER
+ bool
+
+config SYSFS_DEPRECATED
+ bool "Enable deprecated sysfs features to support old userspace tools"
+ depends on SYSFS
+ default n
+ help
+ This option adds code that switches the layout of the "block" class
+ devices, to not show up in /sys/class/block/, but only in
+ /sys/block/.
+
+ This switch is only active when the sysfs.deprecated=1 boot option is
+ passed or the SYSFS_DEPRECATED_V2 option is set.
+
+ This option allows new kernels to run on old distributions and tools,
+ which might get confused by /sys/class/block/. Since 2007/2008 all
+ major distributions and tools handle this just fine.
+
+ Recent distributions and userspace tools after 2009/2010 depend on
+ the existence of /sys/class/block/, and will not work with this
+ option enabled.
+
+ Only if you are using a new kernel on an old distribution, you might
+ need to say Y here.
+
+config SYSFS_DEPRECATED_V2
+ bool "Enable deprecated sysfs features by default"
+ default n
+ depends on SYSFS
+ depends on SYSFS_DEPRECATED
+ help
+ Enable deprecated sysfs by default.
+
+ See the CONFIG_SYSFS_DEPRECATED option for more details about this
+ option.
+
+ Only if you are using a new kernel on an old distribution, you might
+ need to say Y here. Even then, odds are you would not need it
+ enabled, you can always pass the boot option if absolutely necessary.
+
+config RELAY
+ bool "Kernel->user space relay support (formerly relayfs)"
+ help
+ This option enables support for relay interface support in
+ certain file systems (such as debugfs).
+ It is designed to provide an efficient mechanism for tools and
+ facilities to relay large amounts of data from kernel space to
+ user space.
+
+ If unsure, say N.
+
+config BLK_DEV_INITRD
+ bool "Initial RAM filesystem and RAM disk (initramfs/initrd) support"
+ depends on BROKEN || !FRV
+ help
+ The initial RAM filesystem is a ramfs which is loaded by the
+ boot loader (loadlin or lilo) and that is mounted as root
+ before the normal boot procedure. It is typically used to
+ load modules needed to mount the "real" root file system,
+ etc. See <file:Documentation/initrd.txt> for details.
+
+ If RAM disk support (BLK_DEV_RAM) is also included, this
+ also enables initial RAM disk (initrd) support and adds
+ 15 Kbytes (more on some other architectures) to the kernel size.
+
+ If unsure say Y.
+
+if BLK_DEV_INITRD
+
+source "usr/Kconfig"
+
+endif
+
+config CC_OPTIMIZE_FOR_SIZE
+ bool "Optimize for size"
+ help
+ Enabling this option will pass "-Os" instead of "-O2" to gcc
+ resulting in a smaller kernel.
+
+ If unsure, say Y.
+
+config SYSCTL
+ bool
+
+config ANON_INODES
+ bool
+
+config PANIC_TIMEOUT
+ int "Default panic timeout"
+ default 0
+ help
+ Set default panic timeout.
+
+menuconfig EXPERT
+ bool "Configure standard kernel features (expert users)"
+ # Unhide debug options, to make the on-by-default options visible
+ select DEBUG_KERNEL
+ help
+ This option allows certain base kernel options and settings
+ to be disabled or tweaked. This is for specialized
+ environments which can tolerate a "non-standard" kernel.
+ Only use this if you really know what you are doing.
+
+config UID16
+ bool "Enable 16-bit UID system calls" if EXPERT
+ depends on ARM || BLACKFIN || CRIS || FRV || H8300 || X86_32 || M68K || (S390 && !64BIT) || SUPERH || SPARC32 || (SPARC64 && COMPAT) || UML || (X86_64 && IA32_EMULATION)
+ default y
+ help
+ This enables the legacy 16-bit UID syscall wrappers.
+
+config SYSCTL_SYSCALL
+ bool "Sysctl syscall support" if EXPERT
+ depends on PROC_SYSCTL
+ default n
+ select SYSCTL
+ ---help---
+ sys_sysctl uses binary paths that have been found challenging
+ to properly maintain and use. The interface in /proc/sys
+ using paths with ascii names is now the primary path to this
+ information.
+
+ Almost nothing using the binary sysctl interface so if you are
+ trying to save some space it is probably safe to disable this,
+ making your kernel marginally smaller.
+
+ If unsure say N here.
+
+config KALLSYMS
+ bool "Load all symbols for debugging/ksymoops" if EXPERT
+ default y
+ help
+ Say Y here to let the kernel print out symbolic crash information and
+ symbolic stack backtraces. This increases the size of the kernel
+ somewhat, as all symbols have to be loaded into the kernel image.
+
+config KALLSYMS_ALL
+ bool "Include all symbols in kallsyms"
+ depends on DEBUG_KERNEL && KALLSYMS
+ help
+ Normally kallsyms only contains the symbols of functions for nicer
+ OOPS messages and backtraces (i.e., symbols from the text and inittext
+ sections). This is sufficient for most cases. And only in very rare
+ cases (e.g., when a debugger is used) all symbols are required (e.g.,
+ names of variables from the data sections, etc).
+
+ This option makes sure that all symbols are loaded into the kernel
+ image (i.e., symbols from all sections) in cost of increased kernel
+ size (depending on the kernel configuration, it may be 300KiB or
+ something like this).
+
+ Say N unless you really need all symbols.
+
+config HOTPLUG
+ bool "Support for hot-pluggable devices" if EXPERT
+ default y
+ help
+ This option is provided for the case where no hotplug or uevent
+ capabilities is wanted by the kernel. You should only consider
+ disabling this option for embedded systems that do not use modules, a
+ dynamic /dev tree, or dynamic device discovery. Just say Y.
+
+config PRINTK
+ default y
+ bool "Enable support for printk" if EXPERT
+ help
+ This option enables normal printk support. Removing it
+ eliminates most of the message strings from the kernel image
+ and makes the kernel more or less silent. As this makes it
+ very difficult to diagnose system problems, saying N here is
+ strongly discouraged.
+
+config BUG
+ bool "BUG() support" if EXPERT
+ default y
+ help
+ Disabling this option eliminates support for BUG and WARN, reducing
+ the size of your kernel image and potentially quietly ignoring
+ numerous fatal conditions. You should only consider disabling this
+ option for embedded systems with no facilities for reporting errors.
+ Just say Y.
+
+config ELF_CORE
+ default y
+ bool "Enable ELF core dumps" if EXPERT
+ help
+ Enable support for generating core dumps. Disabling saves about 4k.
+
+
+config PCSPKR_PLATFORM
+ bool "Enable PC-Speaker support" if EXPERT
+ depends on HAVE_PCSPKR_PLATFORM
+ select I8253_LOCK
+ default y
+ help
+ This option allows to disable the internal PC-Speaker
+ support, saving some memory.
+
+config HAVE_PCSPKR_PLATFORM
+ bool
+
+config BASE_FULL
+ default y
+ bool "Enable full-sized data structures for core" if EXPERT
+ help
+ Disabling this option reduces the size of miscellaneous core
+ kernel data structures. This saves memory on small machines,
+ but may reduce performance.
+
+config FUTEX
+ bool "Enable futex support" if EXPERT
+ default y
+ select RT_MUTEXES
+ help
+ Disabling this option will cause the kernel to be built without
+ support for "fast userspace mutexes". The resulting kernel may not
+ run glibc-based applications correctly.
+
+config EPOLL
+ bool "Enable eventpoll support" if EXPERT
+ default y
+ select ANON_INODES
+ help
+ Disabling this option will cause the kernel to be built without
+ support for epoll family of system calls.
+
+config SIGNALFD
+ bool "Enable signalfd() system call" if EXPERT
+ select ANON_INODES
+ default y
+ help
+ Enable the signalfd() system call that allows to receive signals
+ on a file descriptor.
+
+ If unsure, say Y.
+
+config TIMERFD
+ bool "Enable timerfd() system call" if EXPERT
+ select ANON_INODES
+ default y
+ help
+ Enable the timerfd() system call that allows to receive timer
+ events on a file descriptor.
+
+ If unsure, say Y.
+
+config EVENTFD
+ bool "Enable eventfd() system call" if EXPERT
+ select ANON_INODES
+ default y
+ help
+ Enable the eventfd() system call that allows to receive both
+ kernel notification (ie. KAIO) or userspace notifications.
+
+ If unsure, say Y.
+
+config SHMEM
+ bool "Use full shmem filesystem" if EXPERT
+ default y
+ depends on MMU
+ help
+ The shmem is an internal filesystem used to manage shared memory.
+ It is backed by swap and manages resource limits. It is also exported
+ to userspace as tmpfs if TMPFS is enabled. Disabling this
+ option replaces shmem and tmpfs with the much simpler ramfs code,
+ which may be appropriate on small systems without swap.
+
+config AIO
+ bool "Enable AIO support" if EXPERT
+ default y
+ help
+ This option enables POSIX asynchronous I/O which may by used
+ by some high performance threaded applications. Disabling
+ this option saves about 7k.
+
+config EMBEDDED
+ bool "Embedded system"
+ select EXPERT
+ help
+ This option should be enabled if compiling the kernel for
+ an embedded system so certain expert options are available
+ for configuration.
+
+config HAVE_PERF_EVENTS
+ bool
+ help
+ See tools/perf/design.txt for details.
+
+config PERF_USE_VMALLOC
+ bool
+ help
+ See tools/perf/design.txt for details
+
+menu "Kernel Performance Events And Counters"
+
+config PERF_EVENTS
+ bool "Kernel performance events and counters"
+ default y if (PROFILING || PERF_COUNTERS)
+ depends on HAVE_PERF_EVENTS
+ select ANON_INODES
+ select IRQ_WORK
+ help
+ Enable kernel support for various performance events provided
+ by software and hardware.
+
+ Software events are supported either built-in or via the
+ use of generic tracepoints.
+
+ Most modern CPUs support performance events via performance
+ counter registers. These registers count the number of certain
+ types of hw events: such as instructions executed, cachemisses
+ suffered, or branches mis-predicted - without slowing down the
+ kernel or applications. These registers can also trigger interrupts
+ when a threshold number of events have passed - and can thus be
+ used to profile the code that runs on that CPU.
+
+ The Linux Performance Event subsystem provides an abstraction of
+ these software and hardware event capabilities, available via a
+ system call and used by the "perf" utility in tools/perf/. It
+ provides per task and per CPU counters, and it provides event
+ capabilities on top of those.
+
+ Say Y if unsure.
+
+config PERF_COUNTERS
+ bool "Kernel performance counters (old config option)"
+ depends on HAVE_PERF_EVENTS
+ help
+ This config has been obsoleted by the PERF_EVENTS
+ config option - please see that one for details.
+
+ It has no effect on the kernel whether you enable
+ it or not, it is a compatibility placeholder.
+
+ Say N if unsure.
+
+config DEBUG_PERF_USE_VMALLOC
+ default n
+ bool "Debug: use vmalloc to back perf mmap() buffers"
+ depends on PERF_EVENTS && DEBUG_KERNEL
+ select PERF_USE_VMALLOC
+ help
+ Use vmalloc memory to back perf mmap() buffers.
+
+ Mostly useful for debugging the vmalloc code on platforms
+ that don't require it.
+
+ Say N if unsure.
+
+endmenu
+
+config VM_EVENT_COUNTERS
+ default y
+ bool "Enable VM event counters for /proc/vmstat" if EXPERT
+ help
+ VM event counters are needed for event counts to be shown.
+ This option allows the disabling of the VM event counters
+ on EXPERT systems. /proc/vmstat will only show page counts
+ if VM event counters are disabled.
+
+config PCI_QUIRKS
+ default y
+ bool "Enable PCI quirk workarounds" if EXPERT
+ depends on PCI
+ help
+ This enables workarounds for various PCI chipset
+ bugs/quirks. Disable this only if your target machine is
+ unaffected by PCI quirks.
+
+config SLUB_DEBUG
+ default y
+ bool "Enable SLUB debugging support" if EXPERT
+ depends on SLUB && SYSFS
+ help
+ SLUB has extensive debug support features. Disabling these can
+ result in significant savings in code size. This also disables
+ SLUB sysfs support. /sys/slab will not exist and there will be
+ no support for cache validation etc.
+
+config COMPAT_BRK
+ bool "Disable heap randomization"
+ default y
+ help
+ Randomizing heap placement makes heap exploits harder, but it
+ also breaks ancient binaries (including anything libc5 based).
+ This option changes the bootup default to heap randomization
+ disabled, and can be overridden at runtime by setting
+ /proc/sys/kernel/randomize_va_space to 2.
+
+ On non-ancient distros (post-2000 ones) N is usually a safe choice.
+
+choice
+ prompt "Choose SLAB allocator"
+ default SLUB
+ help
+ This option allows to select a slab allocator.
+
+config SLAB
+ bool "SLAB"
+ help
+ The regular slab allocator that is established and known to work
+ well in all environments. It organizes cache hot objects in
+ per cpu and per node queues.
+
+config SLUB
+ bool "SLUB (Unqueued Allocator)"
+ help
+ SLUB is a slab allocator that minimizes cache line usage
+ instead of managing queues of cached objects (SLAB approach).
+ Per cpu caching is realized using slabs of objects instead
+ of queues of objects. SLUB can use memory efficiently
+ and has enhanced diagnostics. SLUB is the default choice for
+ a slab allocator.
+
+config SLOB
+ depends on EXPERT
+ bool "SLOB (Simple Allocator)"
+ help
+ SLOB replaces the stock allocator with a drastically simpler
+ allocator. SLOB is generally more space efficient but
+ does not perform as well on large systems.
+
+endchoice
+
+config MMAP_ALLOW_UNINITIALIZED
+ bool "Allow mmapped anonymous memory to be uninitialized"
+ depends on EXPERT && !MMU
+ default n
+ help
+ Normally, and according to the Linux spec, anonymous memory obtained
+ from mmap() has it's contents cleared before it is passed to
+ userspace. Enabling this config option allows you to request that
+ mmap() skip that if it is given an MAP_UNINITIALIZED flag, thus
+ providing a huge performance boost. If this option is not enabled,
+ then the flag will be ignored.
+
+ This is taken advantage of by uClibc's malloc(), and also by
+ ELF-FDPIC binfmt's brk and stack allocator.
+
+ Because of the obvious security issues, this option should only be
+ enabled on embedded devices where you control what is run in
+ userspace. Since that isn't generally a problem on no-MMU systems,
+ it is normally safe to say Y here.
+
+ See Documentation/nommu-mmap.txt for more information.
+
+config PROFILING
+ bool "Profiling support"
+ help
+ Say Y here to enable the extended profiling support mechanisms used
+ by profilers such as OProfile.
+
+#
+# Place an empty function call at each tracepoint site. Can be
+# dynamically changed for a probe function.
+#
+config TRACEPOINTS
+ bool
+
+source "arch/Kconfig"
+
+endmenu # General setup
+
+config HAVE_GENERIC_DMA_COHERENT
+ bool
+ default n
+
+config SLABINFO
+ bool
+ depends on PROC_FS
+ depends on SLAB || SLUB_DEBUG
+ default y
+
+config RT_MUTEXES
+ boolean
+
+config BASE_SMALL
+ int
+ default 0 if BASE_FULL
+ default 1 if !BASE_FULL
+
+menuconfig MODULES
+ bool "Enable loadable module support"
+ help
+ Kernel modules are small pieces of compiled code which can
+ be inserted in the running kernel, rather than being
+ permanently built into the kernel. You use the "modprobe"
+ tool to add (and sometimes remove) them. If you say Y here,
+ many parts of the kernel can be built as modules (by
+ answering M instead of Y where indicated): this is most
+ useful for infrequently used options which are not required
+ for booting. For more information, see the man pages for
+ modprobe, lsmod, modinfo, insmod and rmmod.
+
+ If you say Y here, you will need to run "make
+ modules_install" to put the modules under /lib/modules/
+ where modprobe can find them (you may need to be root to do
+ this).
+
+ If unsure, say Y.
+
+if MODULES
+
+config MODULE_FORCE_LOAD
+ bool "Forced module loading"
+ default n
+ help
+ Allow loading of modules without version information (ie. modprobe
+ --force). Forced module loading sets the 'F' (forced) taint flag and
+ is usually a really bad idea.
+
+config MODULE_UNLOAD
+ bool "Module unloading"
+ help
+ Without this option you will not be able to unload any
+ modules (note that some modules may not be unloadable
+ anyway), which makes your kernel smaller, faster
+ and simpler. If unsure, say Y.
+
+config MODULE_FORCE_UNLOAD
+ bool "Forced module unloading"
+ depends on MODULE_UNLOAD && EXPERIMENTAL
+ help
+ This option allows you to force a module to unload, even if the
+ kernel believes it is unsafe: the kernel will remove the module
+ without waiting for anyone to stop using it (using the -f option to
+ rmmod). This is mainly for kernel developers and desperate users.
+ If unsure, say N.
+
+config MODVERSIONS
+ bool "Module versioning support"
+ help
+ Usually, you have to use modules compiled with your kernel.
+ Saying Y here makes it sometimes possible to use modules
+ compiled for different kernels, by adding enough information
+ to the modules to (hopefully) spot any changes which would
+ make them incompatible with the kernel you are running. If
+ unsure, say N.
+
+config MODULE_SRCVERSION_ALL
+ bool "Source checksum for all modules"
+ help
+ Modules which contain a MODULE_VERSION get an extra "srcversion"
+ field inserted into their modinfo section, which contains a
+ sum of the source files which made it. This helps maintainers
+ see exactly which source was used to build a module (since
+ others sometimes change the module source without updating
+ the version). With this option, such a "srcversion" field
+ will be created for all modules. If unsure, say N.
+
+endif # MODULES
+
+config INIT_ALL_POSSIBLE
+ bool
+ help
+ Back when each arch used to define their own cpu_online_mask and
+ cpu_possible_mask, some of them chose to initialize cpu_possible_mask
+ with all 1s, and others with all 0s. When they were centralised,
+ it was better to provide this option than to break all the archs
+ and have several arch maintainers pursuing me down dark alleys.
+
+config STOP_MACHINE
+ bool
+ default y
+ depends on (SMP && MODULE_UNLOAD) || HOTPLUG_CPU
+ help
+ Need stop_machine() primitive.
+
+source "block/Kconfig"
+
+config PREEMPT_NOTIFIERS
+ bool
+
+config PADATA
+ depends on SMP
+ bool
+
+source "kernel/Kconfig.locks"
diff --git a/init/Makefile b/init/Makefile
new file mode 100644
index 00000000..0bf677aa
--- /dev/null
+++ b/init/Makefile
@@ -0,0 +1,32 @@
+#
+# Makefile for the linux kernel.
+#
+
+obj-y := main.o version.o mounts.o
+ifneq ($(CONFIG_BLK_DEV_INITRD),y)
+obj-y += noinitramfs.o
+else
+obj-$(CONFIG_BLK_DEV_INITRD) += initramfs.o
+endif
+obj-$(CONFIG_GENERIC_CALIBRATE_DELAY) += calibrate.o
+
+mounts-y := do_mounts.o
+mounts-$(CONFIG_BLK_DEV_RAM) += do_mounts_rd.o
+mounts-$(CONFIG_BLK_DEV_INITRD) += do_mounts_initrd.o
+mounts-$(CONFIG_BLK_DEV_MD) += do_mounts_md.o
+
+# dependencies on generated files need to be listed explicitly
+$(obj)/version.o: include/generated/compile.h
+
+# compile.h changes depending on hostname, generation number, etc,
+# so we regenerate it always.
+# mkcompile_h will make sure to only update the
+# actual file if its content has changed.
+
+ chk_compile.h = :
+ quiet_chk_compile.h = echo ' CHK $@'
+silent_chk_compile.h = :
+include/generated/compile.h: FORCE
+ @$($(quiet)chk_compile.h)
+ $(Q)$(CONFIG_SHELL) $(srctree)/scripts/mkcompile_h $@ \
+ "$(UTS_MACHINE)" "$(CONFIG_SMP)" "$(CONFIG_PREEMPT)" "$(CC) $(KBUILD_CFLAGS)"
diff --git a/init/calibrate.c b/init/calibrate.c
new file mode 100644
index 00000000..fda0a7b0
--- /dev/null
+++ b/init/calibrate.c
@@ -0,0 +1,301 @@
+/* calibrate.c: default delay calibration
+ *
+ * Excised from init/main.c
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ */
+
+#include <linux/jiffies.h>
+#include <linux/delay.h>
+#include <linux/init.h>
+#include <linux/timex.h>
+#include <linux/smp.h>
+#include <linux/percpu.h>
+
+unsigned long lpj_fine;
+unsigned long preset_lpj;
+static int __init lpj_setup(char *str)
+{
+ preset_lpj = simple_strtoul(str,NULL,0);
+ return 1;
+}
+
+__setup("lpj=", lpj_setup);
+
+#ifdef ARCH_HAS_READ_CURRENT_TIMER
+
+/* This routine uses the read_current_timer() routine and gets the
+ * loops per jiffy directly, instead of guessing it using delay().
+ * Also, this code tries to handle non-maskable asynchronous events
+ * (like SMIs)
+ */
+#define DELAY_CALIBRATION_TICKS ((HZ < 100) ? 1 : (HZ/100))
+#define MAX_DIRECT_CALIBRATION_RETRIES 5
+
+static unsigned long __cpuinit calibrate_delay_direct(void)
+{
+ unsigned long pre_start, start, post_start;
+ unsigned long pre_end, end, post_end;
+ unsigned long start_jiffies;
+ unsigned long timer_rate_min, timer_rate_max;
+ unsigned long good_timer_sum = 0;
+ unsigned long good_timer_count = 0;
+ unsigned long measured_times[MAX_DIRECT_CALIBRATION_RETRIES];
+ int max = -1; /* index of measured_times with max/min values or not set */
+ int min = -1;
+ int i;
+
+ if (read_current_timer(&pre_start) < 0 )
+ return 0;
+
+ /*
+ * A simple loop like
+ * while ( jiffies < start_jiffies+1)
+ * start = read_current_timer();
+ * will not do. As we don't really know whether jiffy switch
+ * happened first or timer_value was read first. And some asynchronous
+ * event can happen between these two events introducing errors in lpj.
+ *
+ * So, we do
+ * 1. pre_start <- When we are sure that jiffy switch hasn't happened
+ * 2. check jiffy switch
+ * 3. start <- timer value before or after jiffy switch
+ * 4. post_start <- When we are sure that jiffy switch has happened
+ *
+ * Note, we don't know anything about order of 2 and 3.
+ * Now, by looking at post_start and pre_start difference, we can
+ * check whether any asynchronous event happened or not
+ */
+
+ for (i = 0; i < MAX_DIRECT_CALIBRATION_RETRIES; i++) {
+ pre_start = 0;
+ read_current_timer(&start);
+ start_jiffies = jiffies;
+ while (time_before_eq(jiffies, start_jiffies + 1)) {
+ pre_start = start;
+ read_current_timer(&start);
+ }
+ read_current_timer(&post_start);
+
+ pre_end = 0;
+ end = post_start;
+ while (time_before_eq(jiffies, start_jiffies + 1 +
+ DELAY_CALIBRATION_TICKS)) {
+ pre_end = end;
+ read_current_timer(&end);
+ }
+ read_current_timer(&post_end);
+
+ timer_rate_max = (post_end - pre_start) /
+ DELAY_CALIBRATION_TICKS;
+ timer_rate_min = (pre_end - post_start) /
+ DELAY_CALIBRATION_TICKS;
+
+ /*
+ * If the upper limit and lower limit of the timer_rate is
+ * >= 12.5% apart, redo calibration.
+ */
+ if (start >= post_end)
+ printk(KERN_NOTICE "calibrate_delay_direct() ignoring "
+ "timer_rate as we had a TSC wrap around"
+ " start=%lu >=post_end=%lu\n",
+ start, post_end);
+ if (start < post_end && pre_start != 0 && pre_end != 0 &&
+ (timer_rate_max - timer_rate_min) < (timer_rate_max >> 3)) {
+ good_timer_count++;
+ good_timer_sum += timer_rate_max;
+ measured_times[i] = timer_rate_max;
+ if (max < 0 || timer_rate_max > measured_times[max])
+ max = i;
+ if (min < 0 || timer_rate_max < measured_times[min])
+ min = i;
+ } else
+ measured_times[i] = 0;
+
+ }
+
+ /*
+ * Find the maximum & minimum - if they differ too much throw out the
+ * one with the largest difference from the mean and try again...
+ */
+ while (good_timer_count > 1) {
+ unsigned long estimate;
+ unsigned long maxdiff;
+
+ /* compute the estimate */
+ estimate = (good_timer_sum/good_timer_count);
+ maxdiff = estimate >> 3;
+
+ /* if range is within 12% let's take it */
+ if ((measured_times[max] - measured_times[min]) < maxdiff)
+ return estimate;
+
+ /* ok - drop the worse value and try again... */
+ good_timer_sum = 0;
+ good_timer_count = 0;
+ if ((measured_times[max] - estimate) <
+ (estimate - measured_times[min])) {
+ printk(KERN_NOTICE "calibrate_delay_direct() dropping "
+ "min bogoMips estimate %d = %lu\n",
+ min, measured_times[min]);
+ measured_times[min] = 0;
+ min = max;
+ } else {
+ printk(KERN_NOTICE "calibrate_delay_direct() dropping "
+ "max bogoMips estimate %d = %lu\n",
+ max, measured_times[max]);
+ measured_times[max] = 0;
+ max = min;
+ }
+
+ for (i = 0; i < MAX_DIRECT_CALIBRATION_RETRIES; i++) {
+ if (measured_times[i] == 0)
+ continue;
+ good_timer_count++;
+ good_timer_sum += measured_times[i];
+ if (measured_times[i] < measured_times[min])
+ min = i;
+ if (measured_times[i] > measured_times[max])
+ max = i;
+ }
+
+ }
+
+ printk(KERN_NOTICE "calibrate_delay_direct() failed to get a good "
+ "estimate for loops_per_jiffy.\nProbably due to long platform "
+ "interrupts. Consider using \"lpj=\" boot option.\n");
+ return 0;
+}
+#else
+static unsigned long __cpuinit calibrate_delay_direct(void) {return 0;}
+#endif
+
+/*
+ * This is the number of bits of precision for the loops_per_jiffy. Each
+ * time we refine our estimate after the first takes 1.5/HZ seconds, so try
+ * to start with a good estimate.
+ * For the boot cpu we can skip the delay calibration and assign it a value
+ * calculated based on the timer frequency.
+ * For the rest of the CPUs we cannot assume that the timer frequency is same as
+ * the cpu frequency, hence do the calibration for those.
+ */
+#define LPS_PREC 8
+
+static unsigned long __cpuinit calibrate_delay_converge(void)
+{
+ /* First stage - slowly accelerate to find initial bounds */
+ unsigned long lpj, lpj_base, ticks, loopadd, loopadd_base, chop_limit;
+ int trials = 0, band = 0, trial_in_band = 0;
+
+ lpj = (1<<12);
+
+ /* wait for "start of" clock tick */
+ ticks = jiffies;
+ while (ticks == jiffies)
+ ; /* nothing */
+ /* Go .. */
+ ticks = jiffies;
+ do {
+ if (++trial_in_band == (1<<band)) {
+ ++band;
+ trial_in_band = 0;
+ }
+ __delay(lpj * band);
+ trials += band;
+ } while (ticks == jiffies);
+ /*
+ * We overshot, so retreat to a clear underestimate. Then estimate
+ * the largest likely undershoot. This defines our chop bounds.
+ */
+ trials -= band;
+ loopadd_base = lpj * band;
+ lpj_base = lpj * trials;
+
+recalibrate:
+ lpj = lpj_base;
+ loopadd = loopadd_base;
+
+ /*
+ * Do a binary approximation to get lpj set to
+ * equal one clock (up to LPS_PREC bits)
+ */
+ chop_limit = lpj >> LPS_PREC;
+ while (loopadd > chop_limit) {
+ lpj += loopadd;
+ ticks = jiffies;
+ while (ticks == jiffies)
+ ; /* nothing */
+ ticks = jiffies;
+ __delay(lpj);
+ if (jiffies != ticks) /* longer than 1 tick */
+ lpj -= loopadd;
+ loopadd >>= 1;
+ }
+ /*
+ * If we incremented every single time possible, presume we've
+ * massively underestimated initially, and retry with a higher
+ * start, and larger range. (Only seen on x86_64, due to SMIs)
+ */
+ if (lpj + loopadd * 2 == lpj_base + loopadd_base * 2) {
+ lpj_base = lpj;
+ loopadd_base <<= 2;
+ goto recalibrate;
+ }
+
+ return lpj;
+}
+
+static DEFINE_PER_CPU(unsigned long, cpu_loops_per_jiffy) = { 0 };
+
+/*
+ * Check if cpu calibration delay is already known. For example,
+ * some processors with multi-core sockets may have all cores
+ * with the same calibration delay.
+ *
+ * Architectures should override this function if a faster calibration
+ * method is available.
+ */
+unsigned long __attribute__((weak)) __cpuinit calibrate_delay_is_known(void)
+{
+ return 0;
+}
+
+void __cpuinit calibrate_delay(void)
+{
+ unsigned long lpj;
+ static bool printed;
+ int this_cpu = smp_processor_id();
+
+ if (per_cpu(cpu_loops_per_jiffy, this_cpu)) {
+ lpj = per_cpu(cpu_loops_per_jiffy, this_cpu);
+ if (!printed)
+ pr_info("Calibrating delay loop (skipped) "
+ "already calibrated this CPU");
+ } else if (preset_lpj) {
+ lpj = preset_lpj;
+ if (!printed)
+ pr_info("Calibrating delay loop (skipped) "
+ "preset value.. ");
+ } else if ((!printed) && lpj_fine) {
+ lpj = lpj_fine;
+ pr_info("Calibrating delay loop (skipped), "
+ "value calculated using timer frequency.. ");
+ } else if ((lpj = calibrate_delay_is_known())) {
+ ;
+ } else if ((lpj = calibrate_delay_direct()) != 0) {
+ if (!printed)
+ pr_info("Calibrating delay using timer "
+ "specific routine.. ");
+ } else {
+ if (!printed)
+ pr_info("Calibrating delay loop... ");
+ lpj = calibrate_delay_converge();
+ }
+ per_cpu(cpu_loops_per_jiffy, this_cpu) = lpj;
+ if (!printed)
+ pr_cont("%lu.%02lu BogoMIPS (lpj=%lu)\n",
+ lpj/(500000/HZ),
+ (lpj/(5000/HZ)) % 100, lpj);
+
+ loops_per_jiffy = lpj;
+ printed = true;
+}
diff --git a/init/do_mounts.c b/init/do_mounts.c
new file mode 100644
index 00000000..42b0707c
--- /dev/null
+++ b/init/do_mounts.c
@@ -0,0 +1,560 @@
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/ctype.h>
+#include <linux/fd.h>
+#include <linux/tty.h>
+#include <linux/suspend.h>
+#include <linux/root_dev.h>
+#include <linux/security.h>
+#include <linux/delay.h>
+#include <linux/genhd.h>
+#include <linux/mount.h>
+#include <linux/device.h>
+#include <linux/init.h>
+#include <linux/fs.h>
+#include <linux/initrd.h>
+#include <linux/async.h>
+#include <linux/fs_struct.h>
+#include <linux/slab.h>
+
+#include <linux/nfs_fs.h>
+#include <linux/nfs_fs_sb.h>
+#include <linux/nfs_mount.h>
+
+#include "do_mounts.h"
+
+int __initdata rd_doload; /* 1 = load RAM disk, 0 = don't load */
+
+int root_mountflags = MS_RDONLY | MS_SILENT;
+static char * __initdata root_device_name;
+static char __initdata saved_root_name[64];
+static int root_wait;
+
+dev_t ROOT_DEV;
+
+static int __init load_ramdisk(char *str)
+{
+ rd_doload = simple_strtol(str,NULL,0) & 3;
+ return 1;
+}
+__setup("load_ramdisk=", load_ramdisk);
+
+static int __init readonly(char *str)
+{
+ if (*str)
+ return 0;
+ root_mountflags |= MS_RDONLY;
+ return 1;
+}
+
+static int __init readwrite(char *str)
+{
+ if (*str)
+ return 0;
+ root_mountflags &= ~MS_RDONLY;
+ return 1;
+}
+
+__setup("ro", readonly);
+__setup("rw", readwrite);
+
+#ifdef CONFIG_BLOCK
+/**
+ * match_dev_by_uuid - callback for finding a partition using its uuid
+ * @dev: device passed in by the caller
+ * @data: opaque pointer to a 36 byte char array with a UUID
+ *
+ * Returns 1 if the device matches, and 0 otherwise.
+ */
+static int match_dev_by_uuid(struct device *dev, void *data)
+{
+ u8 *uuid = data;
+ struct hd_struct *part = dev_to_part(dev);
+
+ if (!part->info)
+ goto no_match;
+
+ if (memcmp(uuid, part->info->uuid, sizeof(part->info->uuid)))
+ goto no_match;
+
+ return 1;
+no_match:
+ return 0;
+}
+
+
+/**
+ * devt_from_partuuid - looks up the dev_t of a partition by its UUID
+ * @uuid: min 36 byte char array containing a hex ascii UUID
+ *
+ * The function will return the first partition which contains a matching
+ * UUID value in its partition_meta_info struct. This does not search
+ * by filesystem UUIDs.
+ *
+ * If @uuid is followed by a "/PARTNROFF=%d", then the number will be
+ * extracted and used as an offset from the partition identified by the UUID.
+ *
+ * Returns the matching dev_t on success or 0 on failure.
+ */
+static dev_t devt_from_partuuid(char *uuid_str)
+{
+ dev_t res = 0;
+ struct device *dev = NULL;
+ u8 uuid[16];
+ struct gendisk *disk;
+ struct hd_struct *part;
+ int offset = 0;
+
+ if (strlen(uuid_str) < 36)
+ goto done;
+
+ /* Check for optional partition number offset attributes. */
+ if (uuid_str[36]) {
+ char c = 0;
+ /* Explicitly fail on poor PARTUUID syntax. */
+ if (sscanf(&uuid_str[36],
+ "/PARTNROFF=%d%c", &offset, &c) != 1) {
+ printk(KERN_ERR "VFS: PARTUUID= is invalid.\n"
+ "Expected PARTUUID=<valid-uuid-id>[/PARTNROFF=%%d]\n");
+ if (root_wait)
+ printk(KERN_ERR
+ "Disabling rootwait; root= is invalid.\n");
+ root_wait = 0;
+ goto done;
+ }
+ }
+
+ /* Pack the requested UUID in the expected format. */
+ part_pack_uuid(uuid_str, uuid);
+
+ dev = class_find_device(&block_class, NULL, uuid, &match_dev_by_uuid);
+ if (!dev)
+ goto done;
+
+ res = dev->devt;
+
+ /* Attempt to find the partition by offset. */
+ if (!offset)
+ goto no_offset;
+
+ res = 0;
+ disk = part_to_disk(dev_to_part(dev));
+ part = disk_get_part(disk, dev_to_part(dev)->partno + offset);
+ if (part) {
+ res = part_devt(part);
+ put_device(part_to_dev(part));
+ }
+
+no_offset:
+ put_device(dev);
+done:
+ return res;
+}
+#endif
+
+/*
+ * Convert a name into device number. We accept the following variants:
+ *
+ * 1) device number in hexadecimal represents itself
+ * 2) /dev/nfs represents Root_NFS (0xff)
+ * 3) /dev/<disk_name> represents the device number of disk
+ * 4) /dev/<disk_name><decimal> represents the device number
+ * of partition - device number of disk plus the partition number
+ * 5) /dev/<disk_name>p<decimal> - same as the above, that form is
+ * used when disk name of partitioned disk ends on a digit.
+ * 6) PARTUUID=00112233-4455-6677-8899-AABBCCDDEEFF representing the
+ * unique id of a partition if the partition table provides it.
+ * 7) PARTUUID=<UUID>/PARTNROFF=<int> to select a partition in relation to
+ * a partition with a known unique id.
+ *
+ * If name doesn't have fall into the categories above, we return (0,0).
+ * block_class is used to check if something is a disk name. If the disk
+ * name contains slashes, the device name has them replaced with
+ * bangs.
+ */
+
+dev_t name_to_dev_t(char *name)
+{
+ char s[32];
+ char *p;
+ dev_t res = 0;
+ int part;
+
+#ifdef CONFIG_BLOCK
+ if (strncmp(name, "PARTUUID=", 9) == 0) {
+ name += 9;
+ res = devt_from_partuuid(name);
+ if (!res)
+ goto fail;
+ goto done;
+ }
+#endif
+
+ if (strncmp(name, "/dev/", 5) != 0) {
+ unsigned maj, min;
+
+ if (sscanf(name, "%u:%u", &maj, &min) == 2) {
+ res = MKDEV(maj, min);
+ if (maj != MAJOR(res) || min != MINOR(res))
+ goto fail;
+ } else {
+ res = new_decode_dev(simple_strtoul(name, &p, 16));
+ if (*p)
+ goto fail;
+ }
+ goto done;
+ }
+
+ name += 5;
+ res = Root_NFS;
+ if (strcmp(name, "nfs") == 0)
+ goto done;
+ res = Root_RAM0;
+ if (strcmp(name, "ram") == 0)
+ goto done;
+
+ if (strlen(name) > 31)
+ goto fail;
+ strcpy(s, name);
+ for (p = s; *p; p++)
+ if (*p == '/')
+ *p = '!';
+ res = blk_lookup_devt(s, 0);
+ if (res)
+ goto done;
+
+ /*
+ * try non-existent, but valid partition, which may only exist
+ * after revalidating the disk, like partitioned md devices
+ */
+ while (p > s && isdigit(p[-1]))
+ p--;
+ if (p == s || !*p || *p == '0')
+ goto fail;
+
+ /* try disk name without <part number> */
+ part = simple_strtoul(p, NULL, 10);
+ *p = '\0';
+ res = blk_lookup_devt(s, part);
+ if (res)
+ goto done;
+
+ /* try disk name without p<part number> */
+ if (p < s + 2 || !isdigit(p[-2]) || p[-1] != 'p')
+ goto fail;
+ p[-1] = '\0';
+ res = blk_lookup_devt(s, part);
+ if (res)
+ goto done;
+
+fail:
+ return 0;
+done:
+ return res;
+}
+
+static int __init root_dev_setup(char *line)
+{
+ strlcpy(saved_root_name, line, sizeof(saved_root_name));
+ return 1;
+}
+
+__setup("root=", root_dev_setup);
+
+static int __init rootwait_setup(char *str)
+{
+ if (*str)
+ return 0;
+ root_wait = 1;
+ return 1;
+}
+
+__setup("rootwait", rootwait_setup);
+
+static char * __initdata root_mount_data;
+static int __init root_data_setup(char *str)
+{
+ root_mount_data = str;
+ return 1;
+}
+
+static char * __initdata root_fs_names;
+static int __init fs_names_setup(char *str)
+{
+ root_fs_names = str;
+ return 1;
+}
+
+static unsigned int __initdata root_delay;
+static int __init root_delay_setup(char *str)
+{
+ root_delay = simple_strtoul(str, NULL, 0);
+ return 1;
+}
+
+__setup("rootflags=", root_data_setup);
+__setup("rootfstype=", fs_names_setup);
+__setup("rootdelay=", root_delay_setup);
+
+static void __init get_fs_names(char *page)
+{
+ char *s = page;
+
+ if (root_fs_names) {
+ strcpy(page, root_fs_names);
+ while (*s++) {
+ if (s[-1] == ',')
+ s[-1] = '\0';
+ }
+ } else {
+ int len = get_filesystem_list(page);
+ char *p, *next;
+
+ page[len] = '\0';
+ for (p = page-1; p; p = next) {
+ next = strchr(++p, '\n');
+ if (*p++ != '\t')
+ continue;
+ while ((*s++ = *p++) != '\n')
+ ;
+ s[-1] = '\0';
+ }
+ }
+ *s = '\0';
+}
+
+static int __init do_mount_root(char *name, char *fs, int flags, void *data)
+{
+ struct super_block *s;
+ int err = sys_mount(name, "/root", fs, flags, data);
+ if (err)
+ return err;
+
+ sys_chdir((const char __user __force *)"/root");
+ s = current->fs->pwd.dentry->d_sb;
+ ROOT_DEV = s->s_dev;
+ printk(KERN_INFO
+ "VFS: Mounted root (%s filesystem)%s on device %u:%u.\n",
+ s->s_type->name,
+ s->s_flags & MS_RDONLY ? " readonly" : "",
+ MAJOR(ROOT_DEV), MINOR(ROOT_DEV));
+ return 0;
+}
+
+void __init mount_block_root(char *name, int flags)
+{
+ char *fs_names = __getname_gfp(GFP_KERNEL
+ | __GFP_NOTRACK_FALSE_POSITIVE);
+ char *p;
+#ifdef CONFIG_BLOCK
+ char b[BDEVNAME_SIZE];
+#else
+ const char *b = name;
+#endif
+
+ get_fs_names(fs_names);
+retry:
+ for (p = fs_names; *p; p += strlen(p)+1) {
+ int err = do_mount_root(name, p, flags, root_mount_data);
+ switch (err) {
+ case 0:
+ goto out;
+ case -EACCES:
+ flags |= MS_RDONLY;
+ goto retry;
+ case -EINVAL:
+ continue;
+ }
+ /*
+ * Allow the user to distinguish between failed sys_open
+ * and bad superblock on root device.
+ * and give them a list of the available devices
+ */
+#ifdef CONFIG_BLOCK
+ __bdevname(ROOT_DEV, b);
+#endif
+ printk("VFS: Cannot open root device \"%s\" or %s: error %d\n",
+ root_device_name, b, err);
+ printk("Please append a correct \"root=\" boot option; here are the available partitions:\n");
+
+ printk_all_partitions();
+#ifdef CONFIG_DEBUG_BLOCK_EXT_DEVT
+ printk("DEBUG_BLOCK_EXT_DEVT is enabled, you need to specify "
+ "explicit textual name for \"root=\" boot option.\n");
+#endif
+ panic("VFS: Unable to mount root fs on %s", b);
+ }
+
+ printk("List of all partitions:\n");
+ printk_all_partitions();
+ printk("No filesystem could mount root, tried: ");
+ for (p = fs_names; *p; p += strlen(p)+1)
+ printk(" %s", p);
+ printk("\n");
+#ifdef CONFIG_BLOCK
+ __bdevname(ROOT_DEV, b);
+#endif
+ panic("VFS: Unable to mount root fs on %s", b);
+out:
+ putname(fs_names);
+}
+
+#ifdef CONFIG_ROOT_NFS
+
+#define NFSROOT_TIMEOUT_MIN 5
+#define NFSROOT_TIMEOUT_MAX 30
+#define NFSROOT_RETRY_MAX 5
+
+static int __init mount_nfs_root(void)
+{
+ char *root_dev, *root_data;
+ unsigned int timeout;
+ int try, err;
+
+ err = nfs_root_data(&root_dev, &root_data);
+ if (err != 0)
+ return 0;
+
+ /*
+ * The server or network may not be ready, so try several
+ * times. Stop after a few tries in case the client wants
+ * to fall back to other boot methods.
+ */
+ timeout = NFSROOT_TIMEOUT_MIN;
+ for (try = 1; ; try++) {
+ err = do_mount_root(root_dev, "nfs",
+ root_mountflags, root_data);
+ if (err == 0)
+ return 1;
+ if (try > NFSROOT_RETRY_MAX)
+ break;
+
+ /* Wait, in case the server refused us immediately */
+ ssleep(timeout);
+ timeout <<= 1;
+ if (timeout > NFSROOT_TIMEOUT_MAX)
+ timeout = NFSROOT_TIMEOUT_MAX;
+ }
+ return 0;
+}
+#endif
+
+#if defined(CONFIG_BLK_DEV_RAM) || defined(CONFIG_BLK_DEV_FD)
+void __init change_floppy(char *fmt, ...)
+{
+ struct termios termios;
+ char buf[80];
+ char c;
+ int fd;
+ va_list args;
+ va_start(args, fmt);
+ vsprintf(buf, fmt, args);
+ va_end(args);
+ fd = sys_open("/dev/root", O_RDWR | O_NDELAY, 0);
+ if (fd >= 0) {
+ sys_ioctl(fd, FDEJECT, 0);
+ sys_close(fd);
+ }
+ printk(KERN_NOTICE "VFS: Insert %s and press ENTER\n", buf);
+ fd = sys_open("/dev/console", O_RDWR, 0);
+ if (fd >= 0) {
+ sys_ioctl(fd, TCGETS, (long)&termios);
+ termios.c_lflag &= ~ICANON;
+ sys_ioctl(fd, TCSETSF, (long)&termios);
+ sys_read(fd, &c, 1);
+ termios.c_lflag |= ICANON;
+ sys_ioctl(fd, TCSETSF, (long)&termios);
+ sys_close(fd);
+ }
+}
+#endif
+
+void __init mount_root(void)
+{
+#ifdef CONFIG_ROOT_NFS
+ if (ROOT_DEV == Root_NFS) {
+ if (mount_nfs_root())
+ return;
+
+ printk(KERN_ERR "VFS: Unable to mount root fs via NFS, trying floppy.\n");
+ ROOT_DEV = Root_FD0;
+ }
+#endif
+#ifdef CONFIG_BLK_DEV_FD
+ if (MAJOR(ROOT_DEV) == FLOPPY_MAJOR) {
+ /* rd_doload is 2 for a dual initrd/ramload setup */
+ if (rd_doload==2) {
+ if (rd_load_disk(1)) {
+ ROOT_DEV = Root_RAM1;
+ root_device_name = NULL;
+ }
+ } else
+ change_floppy("root floppy");
+ }
+#endif
+#ifdef CONFIG_BLOCK
+ create_dev("/dev/root", ROOT_DEV);
+ mount_block_root("/dev/root", root_mountflags);
+#endif
+}
+
+/*
+ * Prepare the namespace - decide what/where to mount, load ramdisks, etc.
+ */
+void __init prepare_namespace(void)
+{
+ int is_floppy;
+
+ if (root_delay) {
+ printk(KERN_INFO "Waiting %dsec before mounting root device...\n",
+ root_delay);
+ ssleep(root_delay);
+ }
+
+ /*
+ * wait for the known devices to complete their probing
+ *
+ * Note: this is a potential source of long boot delays.
+ * For example, it is not atypical to wait 5 seconds here
+ * for the touchpad of a laptop to initialize.
+ */
+ wait_for_device_probe();
+
+ md_run_setup();
+
+ if (saved_root_name[0]) {
+ root_device_name = saved_root_name;
+ if (!strncmp(root_device_name, "mtd", 3) ||
+ !strncmp(root_device_name, "ubi", 3)) {
+ mount_block_root(root_device_name, root_mountflags);
+ goto out;
+ }
+ ROOT_DEV = name_to_dev_t(root_device_name);
+ if (strncmp(root_device_name, "/dev/", 5) == 0)
+ root_device_name += 5;
+ }
+
+ if (initrd_load())
+ goto out;
+
+ /* wait for any asynchronous scanning to complete */
+ if ((ROOT_DEV == 0) && root_wait) {
+ printk(KERN_INFO "Waiting for root device %s...\n",
+ saved_root_name);
+ while (driver_probe_done() != 0 ||
+ (ROOT_DEV = name_to_dev_t(saved_root_name)) == 0)
+ msleep(100);
+ async_synchronize_full();
+ }
+
+ is_floppy = MAJOR(ROOT_DEV) == FLOPPY_MAJOR;
+
+ if (is_floppy && rd_doload && rd_load_disk(0))
+ ROOT_DEV = Root_RAM0;
+
+ mount_root();
+out:
+ devtmpfs_mount("dev");
+ sys_mount(".", "/", NULL, MS_MOVE, NULL);
+ sys_chroot((const char __user __force *)".");
+}
diff --git a/init/do_mounts.h b/init/do_mounts.h
new file mode 100644
index 00000000..f5b978a9
--- /dev/null
+++ b/init/do_mounts.h
@@ -0,0 +1,76 @@
+#include <linux/kernel.h>
+#include <linux/blkdev.h>
+#include <linux/init.h>
+#include <linux/syscalls.h>
+#include <linux/unistd.h>
+#include <linux/slab.h>
+#include <linux/mount.h>
+#include <linux/major.h>
+#include <linux/root_dev.h>
+
+void change_floppy(char *fmt, ...);
+void mount_block_root(char *name, int flags);
+void mount_root(void);
+extern int root_mountflags;
+
+static inline int create_dev(char *name, dev_t dev)
+{
+ sys_unlink(name);
+ return sys_mknod(name, S_IFBLK|0600, new_encode_dev(dev));
+}
+
+#if BITS_PER_LONG == 32
+static inline u32 bstat(char *name)
+{
+ struct stat64 stat;
+ if (sys_stat64(name, &stat) != 0)
+ return 0;
+ if (!S_ISBLK(stat.st_mode))
+ return 0;
+ if (stat.st_rdev != (u32)stat.st_rdev)
+ return 0;
+ return stat.st_rdev;
+}
+#else
+static inline u32 bstat(char *name)
+{
+ struct stat stat;
+ if (sys_newstat(name, &stat) != 0)
+ return 0;
+ if (!S_ISBLK(stat.st_mode))
+ return 0;
+ return stat.st_rdev;
+}
+#endif
+
+#ifdef CONFIG_BLK_DEV_RAM
+
+int __init rd_load_disk(int n);
+int __init rd_load_image(char *from);
+
+#else
+
+static inline int rd_load_disk(int n) { return 0; }
+static inline int rd_load_image(char *from) { return 0; }
+
+#endif
+
+#ifdef CONFIG_BLK_DEV_INITRD
+
+int __init initrd_load(void);
+
+#else
+
+static inline int initrd_load(void) { return 0; }
+
+#endif
+
+#ifdef CONFIG_BLK_DEV_MD
+
+void md_run_setup(void);
+
+#else
+
+static inline void md_run_setup(void) {}
+
+#endif
diff --git a/init/do_mounts_initrd.c b/init/do_mounts_initrd.c
new file mode 100644
index 00000000..9047330c
--- /dev/null
+++ b/init/do_mounts_initrd.c
@@ -0,0 +1,125 @@
+#include <linux/unistd.h>
+#include <linux/kernel.h>
+#include <linux/fs.h>
+#include <linux/minix_fs.h>
+#include <linux/romfs_fs.h>
+#include <linux/initrd.h>
+#include <linux/sched.h>
+#include <linux/freezer.h>
+
+#include "do_mounts.h"
+
+unsigned long initrd_start, initrd_end;
+int initrd_below_start_ok;
+unsigned int real_root_dev; /* do_proc_dointvec cannot handle kdev_t */
+static int __initdata old_fd, root_fd;
+static int __initdata mount_initrd = 1;
+
+static int __init no_initrd(char *str)
+{
+ mount_initrd = 0;
+ return 1;
+}
+
+__setup("noinitrd", no_initrd);
+
+static int __init do_linuxrc(void *_shell)
+{
+ static const char *argv[] = { "linuxrc", NULL, };
+ extern const char *envp_init[];
+ const char *shell = _shell;
+
+ sys_close(old_fd);sys_close(root_fd);
+ sys_setsid();
+ return kernel_execve(shell, argv, envp_init);
+}
+
+static void __init handle_initrd(void)
+{
+ int error;
+ int pid;
+
+ real_root_dev = new_encode_dev(ROOT_DEV);
+ create_dev("/dev/root.old", Root_RAM0);
+ /* mount initrd on rootfs' /root */
+ mount_block_root("/dev/root.old", root_mountflags & ~MS_RDONLY);
+ sys_mkdir("/old", 0700);
+ root_fd = sys_open("/", 0, 0);
+ old_fd = sys_open("/old", 0, 0);
+ /* move initrd over / and chdir/chroot in initrd root */
+ sys_chdir("/root");
+ sys_mount(".", "/", NULL, MS_MOVE, NULL);
+ sys_chroot(".");
+
+ /*
+ * In case that a resume from disk is carried out by linuxrc or one of
+ * its children, we need to tell the freezer not to wait for us.
+ */
+ current->flags |= PF_FREEZER_SKIP;
+
+ pid = kernel_thread(do_linuxrc, "/linuxrc", SIGCHLD);
+ if (pid > 0)
+ while (pid != sys_wait4(-1, NULL, 0, NULL))
+ yield();
+
+ current->flags &= ~PF_FREEZER_SKIP;
+
+ /* move initrd to rootfs' /old */
+ sys_fchdir(old_fd);
+ sys_mount("/", ".", NULL, MS_MOVE, NULL);
+ /* switch root and cwd back to / of rootfs */
+ sys_fchdir(root_fd);
+ sys_chroot(".");
+ sys_close(old_fd);
+ sys_close(root_fd);
+
+ if (new_decode_dev(real_root_dev) == Root_RAM0) {
+ sys_chdir("/old");
+ return;
+ }
+
+ ROOT_DEV = new_decode_dev(real_root_dev);
+ mount_root();
+
+ printk(KERN_NOTICE "Trying to move old root to /initrd ... ");
+ error = sys_mount("/old", "/root/initrd", NULL, MS_MOVE, NULL);
+ if (!error)
+ printk("okay\n");
+ else {
+ int fd = sys_open("/dev/root.old", O_RDWR, 0);
+ if (error == -ENOENT)
+ printk("/initrd does not exist. Ignored.\n");
+ else
+ printk("failed\n");
+ printk(KERN_NOTICE "Unmounting old root\n");
+ sys_umount("/old", MNT_DETACH);
+ printk(KERN_NOTICE "Trying to free ramdisk memory ... ");
+ if (fd < 0) {
+ error = fd;
+ } else {
+ error = sys_ioctl(fd, BLKFLSBUF, 0);
+ sys_close(fd);
+ }
+ printk(!error ? "okay\n" : "failed\n");
+ }
+}
+
+int __init initrd_load(void)
+{
+ if (mount_initrd) {
+ create_dev("/dev/ram", Root_RAM0);
+ /*
+ * Load the initrd data into /dev/ram0. Execute it as initrd
+ * unless /dev/ram0 is supposed to be our actual root device,
+ * in that case the ram disk is just set up here, and gets
+ * mounted in the normal path.
+ */
+ if (rd_load_image("/initrd.image") && ROOT_DEV != Root_RAM0) {
+ sys_unlink("/initrd.image");
+ handle_initrd();
+ return 1;
+ }
+ }
+ sys_unlink("/initrd.image");
+ return 0;
+}
diff --git a/init/do_mounts_md.c b/init/do_mounts_md.c
new file mode 100644
index 00000000..32c4799b
--- /dev/null
+++ b/init/do_mounts_md.c
@@ -0,0 +1,302 @@
+#include <linux/delay.h>
+#include <linux/raid/md_u.h>
+#include <linux/raid/md_p.h>
+
+#include "do_mounts.h"
+
+/*
+ * When md (and any require personalities) are compiled into the kernel
+ * (not a module), arrays can be assembles are boot time using with AUTODETECT
+ * where specially marked partitions are registered with md_autodetect_dev(),
+ * and with MD_BOOT where devices to be collected are given on the boot line
+ * with md=.....
+ * The code for that is here.
+ */
+
+#ifdef CONFIG_MD_AUTODETECT
+static int __initdata raid_noautodetect;
+#else
+static int __initdata raid_noautodetect=1;
+#endif
+static int __initdata raid_autopart;
+
+static struct {
+ int minor;
+ int partitioned;
+ int level;
+ int chunk;
+ char *device_names;
+} md_setup_args[256] __initdata;
+
+static int md_setup_ents __initdata;
+
+/*
+ * Parse the command-line parameters given our kernel, but do not
+ * actually try to invoke the MD device now; that is handled by
+ * md_setup_drive after the low-level disk drivers have initialised.
+ *
+ * 27/11/1999: Fixed to work correctly with the 2.3 kernel (which
+ * assigns the task of parsing integer arguments to the
+ * invoked program now). Added ability to initialise all
+ * the MD devices (by specifying multiple "md=" lines)
+ * instead of just one. -- KTK
+ * 18May2000: Added support for persistent-superblock arrays:
+ * md=n,0,factor,fault,device-list uses RAID0 for device n
+ * md=n,-1,factor,fault,device-list uses LINEAR for device n
+ * md=n,device-list reads a RAID superblock from the devices
+ * elements in device-list are read by name_to_kdev_t so can be
+ * a hex number or something like /dev/hda1 /dev/sdb
+ * 2001-06-03: Dave Cinege <dcinege@psychosis.com>
+ * Shifted name_to_kdev_t() and related operations to md_set_drive()
+ * for later execution. Rewrote section to make devfs compatible.
+ */
+static int __init md_setup(char *str)
+{
+ int minor, level, factor, fault, partitioned = 0;
+ char *pername = "";
+ char *str1;
+ int ent;
+
+ if (*str == 'd') {
+ partitioned = 1;
+ str++;
+ }
+ if (get_option(&str, &minor) != 2) { /* MD Number */
+ printk(KERN_WARNING "md: Too few arguments supplied to md=.\n");
+ return 0;
+ }
+ str1 = str;
+ for (ent=0 ; ent< md_setup_ents ; ent++)
+ if (md_setup_args[ent].minor == minor &&
+ md_setup_args[ent].partitioned == partitioned) {
+ printk(KERN_WARNING "md: md=%s%d, Specified more than once. "
+ "Replacing previous definition.\n", partitioned?"d":"", minor);
+ break;
+ }
+ if (ent >= ARRAY_SIZE(md_setup_args)) {
+ printk(KERN_WARNING "md: md=%s%d - too many md initialisations\n", partitioned?"d":"", minor);
+ return 0;
+ }
+ if (ent >= md_setup_ents)
+ md_setup_ents++;
+ switch (get_option(&str, &level)) { /* RAID level */
+ case 2: /* could be 0 or -1.. */
+ if (level == 0 || level == LEVEL_LINEAR) {
+ if (get_option(&str, &factor) != 2 || /* Chunk Size */
+ get_option(&str, &fault) != 2) {
+ printk(KERN_WARNING "md: Too few arguments supplied to md=.\n");
+ return 0;
+ }
+ md_setup_args[ent].level = level;
+ md_setup_args[ent].chunk = 1 << (factor+12);
+ if (level == LEVEL_LINEAR)
+ pername = "linear";
+ else
+ pername = "raid0";
+ break;
+ }
+ /* FALL THROUGH */
+ case 1: /* the first device is numeric */
+ str = str1;
+ /* FALL THROUGH */
+ case 0:
+ md_setup_args[ent].level = LEVEL_NONE;
+ pername="super-block";
+ }
+
+ printk(KERN_INFO "md: Will configure md%d (%s) from %s, below.\n",
+ minor, pername, str);
+ md_setup_args[ent].device_names = str;
+ md_setup_args[ent].partitioned = partitioned;
+ md_setup_args[ent].minor = minor;
+
+ return 1;
+}
+
+static void __init md_setup_drive(void)
+{
+ int minor, i, ent, partitioned;
+ dev_t dev;
+ dev_t devices[MD_SB_DISKS+1];
+
+ for (ent = 0; ent < md_setup_ents ; ent++) {
+ int fd;
+ int err = 0;
+ char *devname;
+ mdu_disk_info_t dinfo;
+ char name[16];
+
+ minor = md_setup_args[ent].minor;
+ partitioned = md_setup_args[ent].partitioned;
+ devname = md_setup_args[ent].device_names;
+
+ sprintf(name, "/dev/md%s%d", partitioned?"_d":"", minor);
+ if (partitioned)
+ dev = MKDEV(mdp_major, minor << MdpMinorShift);
+ else
+ dev = MKDEV(MD_MAJOR, minor);
+ create_dev(name, dev);
+ for (i = 0; i < MD_SB_DISKS && devname != NULL; i++) {
+ char *p;
+ char comp_name[64];
+ u32 rdev;
+
+ p = strchr(devname, ',');
+ if (p)
+ *p++ = 0;
+
+ dev = name_to_dev_t(devname);
+ if (strncmp(devname, "/dev/", 5) == 0)
+ devname += 5;
+ snprintf(comp_name, 63, "/dev/%s", devname);
+ rdev = bstat(comp_name);
+ if (rdev)
+ dev = new_decode_dev(rdev);
+ if (!dev) {
+ printk(KERN_WARNING "md: Unknown device name: %s\n", devname);
+ break;
+ }
+
+ devices[i] = dev;
+
+ devname = p;
+ }
+ devices[i] = 0;
+
+ if (!i)
+ continue;
+
+ printk(KERN_INFO "md: Loading md%s%d: %s\n",
+ partitioned ? "_d" : "", minor,
+ md_setup_args[ent].device_names);
+
+ fd = sys_open(name, 0, 0);
+ if (fd < 0) {
+ printk(KERN_ERR "md: open failed - cannot start "
+ "array %s\n", name);
+ continue;
+ }
+ if (sys_ioctl(fd, SET_ARRAY_INFO, 0) == -EBUSY) {
+ printk(KERN_WARNING
+ "md: Ignoring md=%d, already autodetected. (Use raid=noautodetect)\n",
+ minor);
+ sys_close(fd);
+ continue;
+ }
+
+ if (md_setup_args[ent].level != LEVEL_NONE) {
+ /* non-persistent */
+ mdu_array_info_t ainfo;
+ ainfo.level = md_setup_args[ent].level;
+ ainfo.size = 0;
+ ainfo.nr_disks =0;
+ ainfo.raid_disks =0;
+ while (devices[ainfo.raid_disks])
+ ainfo.raid_disks++;
+ ainfo.md_minor =minor;
+ ainfo.not_persistent = 1;
+
+ ainfo.state = (1 << MD_SB_CLEAN);
+ ainfo.layout = 0;
+ ainfo.chunk_size = md_setup_args[ent].chunk;
+ err = sys_ioctl(fd, SET_ARRAY_INFO, (long)&ainfo);
+ for (i = 0; !err && i <= MD_SB_DISKS; i++) {
+ dev = devices[i];
+ if (!dev)
+ break;
+ dinfo.number = i;
+ dinfo.raid_disk = i;
+ dinfo.state = (1<<MD_DISK_ACTIVE)|(1<<MD_DISK_SYNC);
+ dinfo.major = MAJOR(dev);
+ dinfo.minor = MINOR(dev);
+ err = sys_ioctl(fd, ADD_NEW_DISK, (long)&dinfo);
+ }
+ } else {
+ /* persistent */
+ for (i = 0; i <= MD_SB_DISKS; i++) {
+ dev = devices[i];
+ if (!dev)
+ break;
+ dinfo.major = MAJOR(dev);
+ dinfo.minor = MINOR(dev);
+ sys_ioctl(fd, ADD_NEW_DISK, (long)&dinfo);
+ }
+ }
+ if (!err)
+ err = sys_ioctl(fd, RUN_ARRAY, 0);
+ if (err)
+ printk(KERN_WARNING "md: starting md%d failed\n", minor);
+ else {
+ /* reread the partition table.
+ * I (neilb) and not sure why this is needed, but I cannot
+ * boot a kernel with devfs compiled in from partitioned md
+ * array without it
+ */
+ sys_close(fd);
+ fd = sys_open(name, 0, 0);
+ sys_ioctl(fd, BLKRRPART, 0);
+ }
+ sys_close(fd);
+ }
+}
+
+static int __init raid_setup(char *str)
+{
+ int len, pos;
+
+ len = strlen(str) + 1;
+ pos = 0;
+
+ while (pos < len) {
+ char *comma = strchr(str+pos, ',');
+ int wlen;
+ if (comma)
+ wlen = (comma-str)-pos;
+ else wlen = (len-1)-pos;
+
+ if (!strncmp(str, "noautodetect", wlen))
+ raid_noautodetect = 1;
+ if (!strncmp(str, "autodetect", wlen))
+ raid_noautodetect = 0;
+ if (strncmp(str, "partitionable", wlen)==0)
+ raid_autopart = 1;
+ if (strncmp(str, "part", wlen)==0)
+ raid_autopart = 1;
+ pos += wlen+1;
+ }
+ return 1;
+}
+
+__setup("raid=", raid_setup);
+__setup("md=", md_setup);
+
+static void __init autodetect_raid(void)
+{
+ int fd;
+
+ /*
+ * Since we don't want to detect and use half a raid array, we need to
+ * wait for the known devices to complete their probing
+ */
+ printk(KERN_INFO "md: Waiting for all devices to be available before autodetect\n");
+ printk(KERN_INFO "md: If you don't use raid, use raid=noautodetect\n");
+
+ wait_for_device_probe();
+
+ fd = sys_open((const char __user __force *) "/dev/md0", 0, 0);
+ if (fd >= 0) {
+ sys_ioctl(fd, RAID_AUTORUN, raid_autopart);
+ sys_close(fd);
+ }
+}
+
+void __init md_run_setup(void)
+{
+ create_dev("/dev/md0", MKDEV(MD_MAJOR, 0));
+
+ if (raid_noautodetect)
+ printk(KERN_INFO "md: Skipping autodetection of RAID arrays. (raid=autodetect will force)\n");
+ else
+ autodetect_raid();
+ md_setup_drive();
+}
diff --git a/init/do_mounts_rd.c b/init/do_mounts_rd.c
new file mode 100644
index 00000000..6212586d
--- /dev/null
+++ b/init/do_mounts_rd.c
@@ -0,0 +1,340 @@
+
+#include <linux/kernel.h>
+#include <linux/fs.h>
+#include <linux/minix_fs.h>
+#include <linux/ext2_fs.h>
+#include <linux/romfs_fs.h>
+#include <linux/cramfs_fs.h>
+#include <linux/initrd.h>
+#include <linux/string.h>
+#include <linux/slab.h>
+
+#include "do_mounts.h"
+#include "../fs/squashfs/squashfs_fs.h"
+
+#include <linux/decompress/generic.h>
+
+
+int __initdata rd_prompt = 1;/* 1 = prompt for RAM disk, 0 = don't prompt */
+
+static int __init prompt_ramdisk(char *str)
+{
+ rd_prompt = simple_strtol(str,NULL,0) & 1;
+ return 1;
+}
+__setup("prompt_ramdisk=", prompt_ramdisk);
+
+int __initdata rd_image_start; /* starting block # of image */
+
+static int __init ramdisk_start_setup(char *str)
+{
+ rd_image_start = simple_strtol(str,NULL,0);
+ return 1;
+}
+__setup("ramdisk_start=", ramdisk_start_setup);
+
+static int __init crd_load(int in_fd, int out_fd, decompress_fn deco);
+
+/*
+ * This routine tries to find a RAM disk image to load, and returns the
+ * number of blocks to read for a non-compressed image, 0 if the image
+ * is a compressed image, and -1 if an image with the right magic
+ * numbers could not be found.
+ *
+ * We currently check for the following magic numbers:
+ * minix
+ * ext2
+ * romfs
+ * cramfs
+ * squashfs
+ * gzip
+ */
+static int __init
+identify_ramdisk_image(int fd, int start_block, decompress_fn *decompressor)
+{
+ const int size = 512;
+ struct minix_super_block *minixsb;
+ struct romfs_super_block *romfsb;
+ struct cramfs_super *cramfsb;
+ struct squashfs_super_block *squashfsb;
+ int nblocks = -1;
+ unsigned char *buf;
+ const char *compress_name;
+ unsigned long n;
+
+ buf = kmalloc(size, GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+
+ minixsb = (struct minix_super_block *) buf;
+ romfsb = (struct romfs_super_block *) buf;
+ cramfsb = (struct cramfs_super *) buf;
+ squashfsb = (struct squashfs_super_block *) buf;
+ memset(buf, 0xe5, size);
+
+ /*
+ * Read block 0 to test for compressed kernel
+ */
+ sys_lseek(fd, start_block * BLOCK_SIZE, 0);
+ sys_read(fd, buf, size);
+
+ *decompressor = decompress_method(buf, size, &compress_name);
+ if (compress_name) {
+ printk(KERN_NOTICE "RAMDISK: %s image found at block %d\n",
+ compress_name, start_block);
+ if (!*decompressor)
+ printk(KERN_EMERG
+ "RAMDISK: %s decompressor not configured!\n",
+ compress_name);
+ nblocks = 0;
+ goto done;
+ }
+
+ /* romfs is at block zero too */
+ if (romfsb->word0 == ROMSB_WORD0 &&
+ romfsb->word1 == ROMSB_WORD1) {
+ printk(KERN_NOTICE
+ "RAMDISK: romfs filesystem found at block %d\n",
+ start_block);
+ nblocks = (ntohl(romfsb->size)+BLOCK_SIZE-1)>>BLOCK_SIZE_BITS;
+ goto done;
+ }
+
+ if (cramfsb->magic == CRAMFS_MAGIC) {
+ printk(KERN_NOTICE
+ "RAMDISK: cramfs filesystem found at block %d\n",
+ start_block);
+ nblocks = (cramfsb->size + BLOCK_SIZE - 1) >> BLOCK_SIZE_BITS;
+ goto done;
+ }
+
+ /* squashfs is at block zero too */
+ if (le32_to_cpu(squashfsb->s_magic) == SQUASHFS_MAGIC) {
+ printk(KERN_NOTICE
+ "RAMDISK: squashfs filesystem found at block %d\n",
+ start_block);
+ nblocks = (le64_to_cpu(squashfsb->bytes_used) + BLOCK_SIZE - 1)
+ >> BLOCK_SIZE_BITS;
+ goto done;
+ }
+
+ /*
+ * Read 512 bytes further to check if cramfs is padded
+ */
+ sys_lseek(fd, start_block * BLOCK_SIZE + 0x200, 0);
+ sys_read(fd, buf, size);
+
+ if (cramfsb->magic == CRAMFS_MAGIC) {
+ printk(KERN_NOTICE
+ "RAMDISK: cramfs filesystem found at block %d\n",
+ start_block);
+ nblocks = (cramfsb->size + BLOCK_SIZE - 1) >> BLOCK_SIZE_BITS;
+ goto done;
+ }
+
+ /*
+ * Read block 1 to test for minix and ext2 superblock
+ */
+ sys_lseek(fd, (start_block+1) * BLOCK_SIZE, 0);
+ sys_read(fd, buf, size);
+
+ /* Try minix */
+ if (minixsb->s_magic == MINIX_SUPER_MAGIC ||
+ minixsb->s_magic == MINIX_SUPER_MAGIC2) {
+ printk(KERN_NOTICE
+ "RAMDISK: Minix filesystem found at block %d\n",
+ start_block);
+ nblocks = minixsb->s_nzones << minixsb->s_log_zone_size;
+ goto done;
+ }
+
+ /* Try ext2 */
+ n = ext2_image_size(buf);
+ if (n) {
+ printk(KERN_NOTICE
+ "RAMDISK: ext2 filesystem found at block %d\n",
+ start_block);
+ nblocks = n;
+ goto done;
+ }
+
+ printk(KERN_NOTICE
+ "RAMDISK: Couldn't find valid RAM disk image starting at %d.\n",
+ start_block);
+
+done:
+ sys_lseek(fd, start_block * BLOCK_SIZE, 0);
+ kfree(buf);
+ return nblocks;
+}
+
+int __init rd_load_image(char *from)
+{
+ int res = 0;
+ int in_fd, out_fd;
+ unsigned long rd_blocks, devblocks;
+ int nblocks, i, disk;
+ char *buf = NULL;
+ unsigned short rotate = 0;
+ decompress_fn decompressor = NULL;
+#if !defined(CONFIG_S390)
+ char rotator[4] = { '|' , '/' , '-' , '\\' };
+#endif
+
+ out_fd = sys_open((const char __user __force *) "/dev/ram", O_RDWR, 0);
+ if (out_fd < 0)
+ goto out;
+
+ in_fd = sys_open(from, O_RDONLY, 0);
+ if (in_fd < 0)
+ goto noclose_input;
+
+ nblocks = identify_ramdisk_image(in_fd, rd_image_start, &decompressor);
+ if (nblocks < 0)
+ goto done;
+
+ if (nblocks == 0) {
+ if (crd_load(in_fd, out_fd, decompressor) == 0)
+ goto successful_load;
+ goto done;
+ }
+
+ /*
+ * NOTE NOTE: nblocks is not actually blocks but
+ * the number of kibibytes of data to load into a ramdisk.
+ * So any ramdisk block size that is a multiple of 1KiB should
+ * work when the appropriate ramdisk_blocksize is specified
+ * on the command line.
+ *
+ * The default ramdisk_blocksize is 1KiB and it is generally
+ * silly to use anything else, so make sure to use 1KiB
+ * blocksize while generating ext2fs ramdisk-images.
+ */
+ if (sys_ioctl(out_fd, BLKGETSIZE, (unsigned long)&rd_blocks) < 0)
+ rd_blocks = 0;
+ else
+ rd_blocks >>= 1;
+
+ if (nblocks > rd_blocks) {
+ printk("RAMDISK: image too big! (%dKiB/%ldKiB)\n",
+ nblocks, rd_blocks);
+ goto done;
+ }
+
+ /*
+ * OK, time to copy in the data
+ */
+ if (sys_ioctl(in_fd, BLKGETSIZE, (unsigned long)&devblocks) < 0)
+ devblocks = 0;
+ else
+ devblocks >>= 1;
+
+ if (strcmp(from, "/initrd.image") == 0)
+ devblocks = nblocks;
+
+ if (devblocks == 0) {
+ printk(KERN_ERR "RAMDISK: could not determine device size\n");
+ goto done;
+ }
+
+ buf = kmalloc(BLOCK_SIZE, GFP_KERNEL);
+ if (!buf) {
+ printk(KERN_ERR "RAMDISK: could not allocate buffer\n");
+ goto done;
+ }
+
+ printk(KERN_NOTICE "RAMDISK: Loading %dKiB [%ld disk%s] into ram disk... ",
+ nblocks, ((nblocks-1)/devblocks)+1, nblocks>devblocks ? "s" : "");
+ for (i = 0, disk = 1; i < nblocks; i++) {
+ if (i && (i % devblocks == 0)) {
+ printk("done disk #%d.\n", disk++);
+ rotate = 0;
+ if (sys_close(in_fd)) {
+ printk("Error closing the disk.\n");
+ goto noclose_input;
+ }
+ change_floppy("disk #%d", disk);
+ in_fd = sys_open(from, O_RDONLY, 0);
+ if (in_fd < 0) {
+ printk("Error opening disk.\n");
+ goto noclose_input;
+ }
+ printk("Loading disk #%d... ", disk);
+ }
+ sys_read(in_fd, buf, BLOCK_SIZE);
+ sys_write(out_fd, buf, BLOCK_SIZE);
+#if !defined(CONFIG_S390)
+ if (!(i % 16)) {
+ printk("%c\b", rotator[rotate & 0x3]);
+ rotate++;
+ }
+#endif
+ }
+ printk("done.\n");
+
+successful_load:
+ res = 1;
+done:
+ sys_close(in_fd);
+noclose_input:
+ sys_close(out_fd);
+out:
+ kfree(buf);
+ sys_unlink((const char __user __force *) "/dev/ram");
+ return res;
+}
+
+int __init rd_load_disk(int n)
+{
+ if (rd_prompt)
+ change_floppy("root floppy disk to be loaded into RAM disk");
+ create_dev("/dev/root", ROOT_DEV);
+ create_dev("/dev/ram", MKDEV(RAMDISK_MAJOR, n));
+ return rd_load_image("/dev/root");
+}
+
+static int exit_code;
+static int decompress_error;
+static int crd_infd, crd_outfd;
+
+static int __init compr_fill(void *buf, unsigned int len)
+{
+ int r = sys_read(crd_infd, buf, len);
+ if (r < 0)
+ printk(KERN_ERR "RAMDISK: error while reading compressed data");
+ else if (r == 0)
+ printk(KERN_ERR "RAMDISK: EOF while reading compressed data");
+ return r;
+}
+
+static int __init compr_flush(void *window, unsigned int outcnt)
+{
+ int written = sys_write(crd_outfd, window, outcnt);
+ if (written != outcnt) {
+ if (decompress_error == 0)
+ printk(KERN_ERR
+ "RAMDISK: incomplete write (%d != %d)\n",
+ written, outcnt);
+ decompress_error = 1;
+ return -1;
+ }
+ return outcnt;
+}
+
+static void __init error(char *x)
+{
+ printk(KERN_ERR "%s\n", x);
+ exit_code = 1;
+ decompress_error = 1;
+}
+
+static int __init crd_load(int in_fd, int out_fd, decompress_fn deco)
+{
+ int result;
+ crd_infd = in_fd;
+ crd_outfd = out_fd;
+ result = deco(NULL, 0, compr_fill, compr_flush, NULL, NULL, error);
+ if (decompress_error)
+ result = 1;
+ return result;
+}
diff --git a/init/initramfs.c b/init/initramfs.c
new file mode 100644
index 00000000..8216c303
--- /dev/null
+++ b/init/initramfs.c
@@ -0,0 +1,611 @@
+#include <linux/init.h>
+#include <linux/fs.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/fcntl.h>
+#include <linux/delay.h>
+#include <linux/string.h>
+#include <linux/dirent.h>
+#include <linux/syscalls.h>
+#include <linux/utime.h>
+
+static __initdata char *message;
+static void __init error(char *x)
+{
+ if (!message)
+ message = x;
+}
+
+/* link hash */
+
+#define N_ALIGN(len) ((((len) + 1) & ~3) + 2)
+
+static __initdata struct hash {
+ int ino, minor, major;
+ umode_t mode;
+ struct hash *next;
+ char name[N_ALIGN(PATH_MAX)];
+} *head[32];
+
+static inline int hash(int major, int minor, int ino)
+{
+ unsigned long tmp = ino + minor + (major << 3);
+ tmp += tmp >> 5;
+ return tmp & 31;
+}
+
+static char __init *find_link(int major, int minor, int ino,
+ umode_t mode, char *name)
+{
+ struct hash **p, *q;
+ for (p = head + hash(major, minor, ino); *p; p = &(*p)->next) {
+ if ((*p)->ino != ino)
+ continue;
+ if ((*p)->minor != minor)
+ continue;
+ if ((*p)->major != major)
+ continue;
+ if (((*p)->mode ^ mode) & S_IFMT)
+ continue;
+ return (*p)->name;
+ }
+ q = kmalloc(sizeof(struct hash), GFP_KERNEL);
+ if (!q)
+ panic("can't allocate link hash entry");
+ q->major = major;
+ q->minor = minor;
+ q->ino = ino;
+ q->mode = mode;
+ strcpy(q->name, name);
+ q->next = NULL;
+ *p = q;
+ return NULL;
+}
+
+static void __init free_hash(void)
+{
+ struct hash **p, *q;
+ for (p = head; p < head + 32; p++) {
+ while (*p) {
+ q = *p;
+ *p = q->next;
+ kfree(q);
+ }
+ }
+}
+
+static long __init do_utime(char __user *filename, time_t mtime)
+{
+ struct timespec t[2];
+
+ t[0].tv_sec = mtime;
+ t[0].tv_nsec = 0;
+ t[1].tv_sec = mtime;
+ t[1].tv_nsec = 0;
+
+ return do_utimes(AT_FDCWD, filename, t, AT_SYMLINK_NOFOLLOW);
+}
+
+static __initdata LIST_HEAD(dir_list);
+struct dir_entry {
+ struct list_head list;
+ char *name;
+ time_t mtime;
+};
+
+static void __init dir_add(const char *name, time_t mtime)
+{
+ struct dir_entry *de = kmalloc(sizeof(struct dir_entry), GFP_KERNEL);
+ if (!de)
+ panic("can't allocate dir_entry buffer");
+ INIT_LIST_HEAD(&de->list);
+ de->name = kstrdup(name, GFP_KERNEL);
+ de->mtime = mtime;
+ list_add(&de->list, &dir_list);
+}
+
+static void __init dir_utime(void)
+{
+ struct dir_entry *de, *tmp;
+ list_for_each_entry_safe(de, tmp, &dir_list, list) {
+ list_del(&de->list);
+ do_utime(de->name, de->mtime);
+ kfree(de->name);
+ kfree(de);
+ }
+}
+
+static __initdata time_t mtime;
+
+/* cpio header parsing */
+
+static __initdata unsigned long ino, major, minor, nlink;
+static __initdata umode_t mode;
+static __initdata unsigned long body_len, name_len;
+static __initdata uid_t uid;
+static __initdata gid_t gid;
+static __initdata unsigned rdev;
+
+static void __init parse_header(char *s)
+{
+ unsigned long parsed[12];
+ char buf[9];
+ int i;
+
+ buf[8] = '\0';
+ for (i = 0, s += 6; i < 12; i++, s += 8) {
+ memcpy(buf, s, 8);
+ parsed[i] = simple_strtoul(buf, NULL, 16);
+ }
+ ino = parsed[0];
+ mode = parsed[1];
+ uid = parsed[2];
+ gid = parsed[3];
+ nlink = parsed[4];
+ mtime = parsed[5];
+ body_len = parsed[6];
+ major = parsed[7];
+ minor = parsed[8];
+ rdev = new_encode_dev(MKDEV(parsed[9], parsed[10]));
+ name_len = parsed[11];
+}
+
+/* FSM */
+
+static __initdata enum state {
+ Start,
+ Collect,
+ GotHeader,
+ SkipIt,
+ GotName,
+ CopyFile,
+ GotSymlink,
+ Reset
+} state, next_state;
+
+static __initdata char *victim;
+static __initdata unsigned count;
+static __initdata loff_t this_header, next_header;
+
+static inline void __init eat(unsigned n)
+{
+ victim += n;
+ this_header += n;
+ count -= n;
+}
+
+static __initdata char *vcollected;
+static __initdata char *collected;
+static __initdata int remains;
+static __initdata char *collect;
+
+static void __init read_into(char *buf, unsigned size, enum state next)
+{
+ if (count >= size) {
+ collected = victim;
+ eat(size);
+ state = next;
+ } else {
+ collect = collected = buf;
+ remains = size;
+ next_state = next;
+ state = Collect;
+ }
+}
+
+static __initdata char *header_buf, *symlink_buf, *name_buf;
+
+static int __init do_start(void)
+{
+ read_into(header_buf, 110, GotHeader);
+ return 0;
+}
+
+static int __init do_collect(void)
+{
+ unsigned n = remains;
+ if (count < n)
+ n = count;
+ memcpy(collect, victim, n);
+ eat(n);
+ collect += n;
+ if ((remains -= n) != 0)
+ return 1;
+ state = next_state;
+ return 0;
+}
+
+static int __init do_header(void)
+{
+ if (memcmp(collected, "070707", 6)==0) {
+ error("incorrect cpio method used: use -H newc option");
+ return 1;
+ }
+ if (memcmp(collected, "070701", 6)) {
+ error("no cpio magic");
+ return 1;
+ }
+ parse_header(collected);
+ next_header = this_header + N_ALIGN(name_len) + body_len;
+ next_header = (next_header + 3) & ~3;
+ state = SkipIt;
+ if (name_len <= 0 || name_len > PATH_MAX)
+ return 0;
+ if (S_ISLNK(mode)) {
+ if (body_len > PATH_MAX)
+ return 0;
+ collect = collected = symlink_buf;
+ remains = N_ALIGN(name_len) + body_len;
+ next_state = GotSymlink;
+ state = Collect;
+ return 0;
+ }
+ if (S_ISREG(mode) || !body_len)
+ read_into(name_buf, N_ALIGN(name_len), GotName);
+ return 0;
+}
+
+static int __init do_skip(void)
+{
+ if (this_header + count < next_header) {
+ eat(count);
+ return 1;
+ } else {
+ eat(next_header - this_header);
+ state = next_state;
+ return 0;
+ }
+}
+
+static int __init do_reset(void)
+{
+ while(count && *victim == '\0')
+ eat(1);
+ if (count && (this_header & 3))
+ error("broken padding");
+ return 1;
+}
+
+static int __init maybe_link(void)
+{
+ if (nlink >= 2) {
+ char *old = find_link(major, minor, ino, mode, collected);
+ if (old)
+ return (sys_link(old, collected) < 0) ? -1 : 1;
+ }
+ return 0;
+}
+
+static void __init clean_path(char *path, umode_t mode)
+{
+ struct stat st;
+
+ if (!sys_newlstat(path, &st) && (st.st_mode^mode) & S_IFMT) {
+ if (S_ISDIR(st.st_mode))
+ sys_rmdir(path);
+ else
+ sys_unlink(path);
+ }
+}
+
+static __initdata int wfd;
+
+static int __init do_name(void)
+{
+ state = SkipIt;
+ next_state = Reset;
+ if (strcmp(collected, "TRAILER!!!") == 0) {
+ free_hash();
+ return 0;
+ }
+ clean_path(collected, mode);
+ if (S_ISREG(mode)) {
+ int ml = maybe_link();
+ if (ml >= 0) {
+ int openflags = O_WRONLY|O_CREAT;
+ if (ml != 1)
+ openflags |= O_TRUNC;
+ wfd = sys_open(collected, openflags, mode);
+
+ if (wfd >= 0) {
+ sys_fchown(wfd, uid, gid);
+ sys_fchmod(wfd, mode);
+ if (body_len)
+ sys_ftruncate(wfd, body_len);
+ vcollected = kstrdup(collected, GFP_KERNEL);
+ state = CopyFile;
+ }
+ }
+ } else if (S_ISDIR(mode)) {
+ sys_mkdir(collected, mode);
+ sys_chown(collected, uid, gid);
+ sys_chmod(collected, mode);
+ dir_add(collected, mtime);
+ } else if (S_ISBLK(mode) || S_ISCHR(mode) ||
+ S_ISFIFO(mode) || S_ISSOCK(mode)) {
+ if (maybe_link() == 0) {
+ sys_mknod(collected, mode, rdev);
+ sys_chown(collected, uid, gid);
+ sys_chmod(collected, mode);
+ do_utime(collected, mtime);
+ }
+ }
+ return 0;
+}
+
+static int __init do_copy(void)
+{
+ if (count >= body_len) {
+ sys_write(wfd, victim, body_len);
+ sys_close(wfd);
+ do_utime(vcollected, mtime);
+ kfree(vcollected);
+ eat(body_len);
+ state = SkipIt;
+ return 0;
+ } else {
+ sys_write(wfd, victim, count);
+ body_len -= count;
+ eat(count);
+ return 1;
+ }
+}
+
+static int __init do_symlink(void)
+{
+ collected[N_ALIGN(name_len) + body_len] = '\0';
+ clean_path(collected, 0);
+ sys_symlink(collected + N_ALIGN(name_len), collected);
+ sys_lchown(collected, uid, gid);
+ do_utime(collected, mtime);
+ state = SkipIt;
+ next_state = Reset;
+ return 0;
+}
+
+static __initdata int (*actions[])(void) = {
+ [Start] = do_start,
+ [Collect] = do_collect,
+ [GotHeader] = do_header,
+ [SkipIt] = do_skip,
+ [GotName] = do_name,
+ [CopyFile] = do_copy,
+ [GotSymlink] = do_symlink,
+ [Reset] = do_reset,
+};
+
+static int __init write_buffer(char *buf, unsigned len)
+{
+ count = len;
+ victim = buf;
+
+ while (!actions[state]())
+ ;
+ return len - count;
+}
+
+static int __init flush_buffer(void *bufv, unsigned len)
+{
+ char *buf = (char *) bufv;
+ int written;
+ int origLen = len;
+ if (message)
+ return -1;
+ while ((written = write_buffer(buf, len)) < len && !message) {
+ char c = buf[written];
+ if (c == '0') {
+ buf += written;
+ len -= written;
+ state = Start;
+ } else if (c == 0) {
+ buf += written;
+ len -= written;
+ state = Reset;
+ } else
+ error("junk in compressed archive");
+ }
+ return origLen;
+}
+
+static unsigned my_inptr; /* index of next byte to be processed in inbuf */
+
+#include <linux/decompress/generic.h>
+
+static char * __init unpack_to_rootfs(char *buf, unsigned len)
+{
+ int written, res;
+ decompress_fn decompress;
+ const char *compress_name;
+ static __initdata char msg_buf[64];
+
+ header_buf = kmalloc(110, GFP_KERNEL);
+ symlink_buf = kmalloc(PATH_MAX + N_ALIGN(PATH_MAX) + 1, GFP_KERNEL);
+ name_buf = kmalloc(N_ALIGN(PATH_MAX), GFP_KERNEL);
+
+ if (!header_buf || !symlink_buf || !name_buf)
+ panic("can't allocate buffers");
+
+ state = Start;
+ this_header = 0;
+ message = NULL;
+ while (!message && len) {
+ loff_t saved_offset = this_header;
+ if (*buf == '0' && !(this_header & 3)) {
+ state = Start;
+ written = write_buffer(buf, len);
+ buf += written;
+ len -= written;
+ continue;
+ }
+ if (!*buf) {
+ buf++;
+ len--;
+ this_header++;
+ continue;
+ }
+ this_header = 0;
+ decompress = decompress_method(buf, len, &compress_name);
+ if (decompress) {
+ res = decompress(buf, len, NULL, flush_buffer, NULL,
+ &my_inptr, error);
+ if (res)
+ error("decompressor failed");
+ } else if (compress_name) {
+ if (!message) {
+ snprintf(msg_buf, sizeof msg_buf,
+ "compression method %s not configured",
+ compress_name);
+ message = msg_buf;
+ }
+ } else
+ error("junk in compressed archive");
+ if (state != Reset)
+ error("junk in compressed archive");
+ this_header = saved_offset + my_inptr;
+ buf += my_inptr;
+ len -= my_inptr;
+ }
+ dir_utime();
+ kfree(name_buf);
+ kfree(symlink_buf);
+ kfree(header_buf);
+ return message;
+}
+
+static int __initdata do_retain_initrd;
+
+static int __init retain_initrd_param(char *str)
+{
+ if (*str)
+ return 0;
+ do_retain_initrd = 1;
+ return 1;
+}
+__setup("retain_initrd", retain_initrd_param);
+
+extern char __initramfs_start[];
+extern unsigned long __initramfs_size;
+#include <linux/initrd.h>
+#include <linux/kexec.h>
+
+static void __init free_initrd(void)
+{
+#ifdef CONFIG_KEXEC
+ unsigned long crashk_start = (unsigned long)__va(crashk_res.start);
+ unsigned long crashk_end = (unsigned long)__va(crashk_res.end);
+#endif
+ if (do_retain_initrd)
+ goto skip;
+
+#ifdef CONFIG_KEXEC
+ /*
+ * If the initrd region is overlapped with crashkernel reserved region,
+ * free only memory that is not part of crashkernel region.
+ */
+ if (initrd_start < crashk_end && initrd_end > crashk_start) {
+ /*
+ * Initialize initrd memory region since the kexec boot does
+ * not do.
+ */
+ memset((void *)initrd_start, 0, initrd_end - initrd_start);
+ if (initrd_start < crashk_start)
+ free_initrd_mem(initrd_start, crashk_start);
+ if (initrd_end > crashk_end)
+ free_initrd_mem(crashk_end, initrd_end);
+ } else
+#endif
+ free_initrd_mem(initrd_start, initrd_end);
+skip:
+ initrd_start = 0;
+ initrd_end = 0;
+}
+
+#ifdef CONFIG_BLK_DEV_RAM
+#define BUF_SIZE 1024
+static void __init clean_rootfs(void)
+{
+ int fd;
+ void *buf;
+ struct linux_dirent64 *dirp;
+ int num;
+
+ fd = sys_open((const char __user __force *) "/", O_RDONLY, 0);
+ WARN_ON(fd < 0);
+ if (fd < 0)
+ return;
+ buf = kzalloc(BUF_SIZE, GFP_KERNEL);
+ WARN_ON(!buf);
+ if (!buf) {
+ sys_close(fd);
+ return;
+ }
+
+ dirp = buf;
+ num = sys_getdents64(fd, dirp, BUF_SIZE);
+ while (num > 0) {
+ while (num > 0) {
+ struct stat st;
+ int ret;
+
+ ret = sys_newlstat(dirp->d_name, &st);
+ WARN_ON_ONCE(ret);
+ if (!ret) {
+ if (S_ISDIR(st.st_mode))
+ sys_rmdir(dirp->d_name);
+ else
+ sys_unlink(dirp->d_name);
+ }
+
+ num -= dirp->d_reclen;
+ dirp = (void *)dirp + dirp->d_reclen;
+ }
+ dirp = buf;
+ memset(buf, 0, BUF_SIZE);
+ num = sys_getdents64(fd, dirp, BUF_SIZE);
+ }
+
+ sys_close(fd);
+ kfree(buf);
+}
+#endif
+
+static int __init populate_rootfs(void)
+{
+ char *err = unpack_to_rootfs(__initramfs_start, __initramfs_size);
+ if (err)
+ panic(err); /* Failed to decompress INTERNAL initramfs */
+ if (initrd_start) {
+#ifdef CONFIG_BLK_DEV_RAM
+ int fd;
+ printk(KERN_INFO "Trying to unpack rootfs image as initramfs...\n");
+ err = unpack_to_rootfs((char *)initrd_start,
+ initrd_end - initrd_start);
+ if (!err) {
+ free_initrd();
+ return 0;
+ } else {
+ clean_rootfs();
+ unpack_to_rootfs(__initramfs_start, __initramfs_size);
+ }
+ printk(KERN_INFO "rootfs image is not initramfs (%s)"
+ "; looks like an initrd\n", err);
+ fd = sys_open((const char __user __force *) "/initrd.image",
+ O_WRONLY|O_CREAT, 0700);
+ if (fd >= 0) {
+ sys_write(fd, (char *)initrd_start,
+ initrd_end - initrd_start);
+ sys_close(fd);
+ free_initrd();
+ }
+#else
+ printk(KERN_INFO "Unpacking initramfs...\n");
+ err = unpack_to_rootfs((char *)initrd_start,
+ initrd_end - initrd_start);
+ if (err)
+ printk(KERN_EMERG "Initramfs unpacking failed: %s\n", err);
+ free_initrd();
+#endif
+ }
+ return 0;
+}
+rootfs_initcall(populate_rootfs);
diff --git a/init/main.c b/init/main.c
new file mode 100644
index 00000000..b08c5f75
--- /dev/null
+++ b/init/main.c
@@ -0,0 +1,894 @@
+/*
+ * linux/init/main.c
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ *
+ * GK 2/5/95 - Changed to support mounting root fs via NFS
+ * Added initrd & change_root: Werner Almesberger & Hans Lermen, Feb '96
+ * Moan early if gcc is old, avoiding bogus kernels - Paul Gortmaker, May '96
+ * Simplified starting of init: Michael A. Griffith <grif@acm.org>
+ */
+
+#include <linux/types.h>
+#include <linux/module.h>
+#include <linux/proc_fs.h>
+#include <linux/kernel.h>
+#include <linux/syscalls.h>
+#include <linux/stackprotector.h>
+#include <linux/string.h>
+#include <linux/ctype.h>
+#include <linux/delay.h>
+#include <linux/ioport.h>
+#include <linux/init.h>
+#include <linux/initrd.h>
+#include <linux/bootmem.h>
+#include <linux/acpi.h>
+#include <linux/tty.h>
+#include <linux/percpu.h>
+#include <linux/kmod.h>
+#include <linux/vmalloc.h>
+#include <linux/kernel_stat.h>
+#include <linux/start_kernel.h>
+#include <linux/security.h>
+#include <linux/smp.h>
+#include <linux/profile.h>
+#include <linux/rcupdate.h>
+#include <linux/moduleparam.h>
+#include <linux/kallsyms.h>
+#include <linux/writeback.h>
+#include <linux/cpu.h>
+#include <linux/cpuset.h>
+#include <linux/cgroup.h>
+#include <linux/efi.h>
+#include <linux/tick.h>
+#include <linux/interrupt.h>
+#include <linux/taskstats_kern.h>
+#include <linux/delayacct.h>
+#include <linux/unistd.h>
+#include <linux/rmap.h>
+#include <linux/mempolicy.h>
+#include <linux/key.h>
+#include <linux/buffer_head.h>
+#include <linux/page_cgroup.h>
+#include <linux/debug_locks.h>
+#include <linux/debugobjects.h>
+#include <linux/lockdep.h>
+#include <linux/kmemleak.h>
+#include <linux/pid_namespace.h>
+#include <linux/device.h>
+#include <linux/kthread.h>
+#include <linux/sched.h>
+#include <linux/signal.h>
+#include <linux/idr.h>
+#include <linux/kgdb.h>
+#include <linux/ftrace.h>
+#include <linux/async.h>
+#include <linux/kmemcheck.h>
+#include <linux/sfi.h>
+#include <linux/shmem_fs.h>
+#include <linux/slab.h>
+#include <linux/perf_event.h>
+
+#include <asm/io.h>
+#include <asm/bugs.h>
+#include <asm/setup.h>
+#include <asm/sections.h>
+#include <asm/cacheflush.h>
+
+#ifdef CONFIG_X86_LOCAL_APIC
+#include <asm/smp.h>
+#endif
+
+static int kernel_init(void *);
+
+extern void init_IRQ(void);
+extern void fork_init(unsigned long);
+extern void mca_init(void);
+extern void sbus_init(void);
+extern void prio_tree_init(void);
+extern void radix_tree_init(void);
+#ifndef CONFIG_DEBUG_RODATA
+static inline void mark_rodata_ro(void) { }
+#endif
+
+#ifdef CONFIG_TC
+extern void tc_init(void);
+#endif
+
+/*
+ * Debug helper: via this flag we know that we are in 'early bootup code'
+ * where only the boot processor is running with IRQ disabled. This means
+ * two things - IRQ must not be enabled before the flag is cleared and some
+ * operations which are not allowed with IRQ disabled are allowed while the
+ * flag is set.
+ */
+bool early_boot_irqs_disabled __read_mostly;
+
+enum system_states system_state __read_mostly;
+EXPORT_SYMBOL(system_state);
+
+/*
+ * Boot command-line arguments
+ */
+#define MAX_INIT_ARGS CONFIG_INIT_ENV_ARG_LIMIT
+#define MAX_INIT_ENVS CONFIG_INIT_ENV_ARG_LIMIT
+
+extern void time_init(void);
+/* Default late time init is NULL. archs can override this later. */
+void (*__initdata late_time_init)(void);
+extern void softirq_init(void);
+
+/* Untouched command line saved by arch-specific code. */
+char __initdata boot_command_line[COMMAND_LINE_SIZE];
+/* Untouched saved command line (eg. for /proc) */
+char *saved_command_line;
+/* Command line for parameter parsing */
+static char *static_command_line;
+
+static char *execute_command;
+static char *ramdisk_execute_command;
+
+/*
+ * If set, this is an indication to the drivers that reset the underlying
+ * device before going ahead with the initialization otherwise driver might
+ * rely on the BIOS and skip the reset operation.
+ *
+ * This is useful if kernel is booting in an unreliable environment.
+ * For ex. kdump situaiton where previous kernel has crashed, BIOS has been
+ * skipped and devices will be in unknown state.
+ */
+unsigned int reset_devices;
+EXPORT_SYMBOL(reset_devices);
+
+static int __init set_reset_devices(char *str)
+{
+ reset_devices = 1;
+ return 1;
+}
+
+__setup("reset_devices", set_reset_devices);
+
+static const char * argv_init[MAX_INIT_ARGS+2] = { "init", NULL, };
+const char * envp_init[MAX_INIT_ENVS+2] = { "HOME=/", "TERM=linux", NULL, };
+static const char *panic_later, *panic_param;
+
+extern const struct obs_kernel_param __setup_start[], __setup_end[];
+
+static int __init obsolete_checksetup(char *line)
+{
+ const struct obs_kernel_param *p;
+ int had_early_param = 0;
+
+ p = __setup_start;
+ do {
+ int n = strlen(p->str);
+ if (parameqn(line, p->str, n)) {
+ if (p->early) {
+ /* Already done in parse_early_param?
+ * (Needs exact match on param part).
+ * Keep iterating, as we can have early
+ * params and __setups of same names 8( */
+ if (line[n] == '\0' || line[n] == '=')
+ had_early_param = 1;
+ } else if (!p->setup_func) {
+ printk(KERN_WARNING "Parameter %s is obsolete,"
+ " ignored\n", p->str);
+ return 1;
+ } else if (p->setup_func(line + n))
+ return 1;
+ }
+ p++;
+ } while (p < __setup_end);
+
+ return had_early_param;
+}
+
+/*
+ * This should be approx 2 Bo*oMips to start (note initial shift), and will
+ * still work even if initially too large, it will just take slightly longer
+ */
+unsigned long loops_per_jiffy = (1<<12);
+
+EXPORT_SYMBOL(loops_per_jiffy);
+
+static int __init debug_kernel(char *str)
+{
+ console_loglevel = 10;
+ return 0;
+}
+
+static int __init quiet_kernel(char *str)
+{
+ console_loglevel = 4;
+ return 0;
+}
+
+early_param("debug", debug_kernel);
+early_param("quiet", quiet_kernel);
+
+static int __init loglevel(char *str)
+{
+ int newlevel;
+
+ /*
+ * Only update loglevel value when a correct setting was passed,
+ * to prevent blind crashes (when loglevel being set to 0) that
+ * are quite hard to debug
+ */
+ if (get_option(&str, &newlevel)) {
+ console_loglevel = newlevel;
+ return 0;
+ }
+
+ return -EINVAL;
+}
+
+early_param("loglevel", loglevel);
+
+/* Change NUL term back to "=", to make "param" the whole string. */
+static int __init repair_env_string(char *param, char *val)
+{
+ if (val) {
+ /* param=val or param="val"? */
+ if (val == param+strlen(param)+1)
+ val[-1] = '=';
+ else if (val == param+strlen(param)+2) {
+ val[-2] = '=';
+ memmove(val-1, val, strlen(val)+1);
+ val--;
+ } else
+ BUG();
+ }
+ return 0;
+}
+
+/*
+ * Unknown boot options get handed to init, unless they look like
+ * unused parameters (modprobe will find them in /proc/cmdline).
+ */
+static int __init unknown_bootoption(char *param, char *val)
+{
+ repair_env_string(param, val);
+
+ /* Handle obsolete-style parameters */
+ if (obsolete_checksetup(param))
+ return 0;
+
+ /* Unused module parameter. */
+ if (strchr(param, '.') && (!val || strchr(param, '.') < val))
+ return 0;
+
+ if (panic_later)
+ return 0;
+
+ if (val) {
+ /* Environment option */
+ unsigned int i;
+ for (i = 0; envp_init[i]; i++) {
+ if (i == MAX_INIT_ENVS) {
+ panic_later = "Too many boot env vars at `%s'";
+ panic_param = param;
+ }
+ if (!strncmp(param, envp_init[i], val - param))
+ break;
+ }
+ envp_init[i] = param;
+ } else {
+ /* Command line option */
+ unsigned int i;
+ for (i = 0; argv_init[i]; i++) {
+ if (i == MAX_INIT_ARGS) {
+ panic_later = "Too many boot init vars at `%s'";
+ panic_param = param;
+ }
+ }
+ argv_init[i] = param;
+ }
+ return 0;
+}
+
+static int __init init_setup(char *str)
+{
+ unsigned int i;
+
+ execute_command = str;
+ /*
+ * In case LILO is going to boot us with default command line,
+ * it prepends "auto" before the whole cmdline which makes
+ * the shell think it should execute a script with such name.
+ * So we ignore all arguments entered _before_ init=... [MJ]
+ */
+ for (i = 1; i < MAX_INIT_ARGS; i++)
+ argv_init[i] = NULL;
+ return 1;
+}
+__setup("init=", init_setup);
+
+static int __init rdinit_setup(char *str)
+{
+ unsigned int i;
+
+ ramdisk_execute_command = str;
+ /* See "auto" comment in init_setup */
+ for (i = 1; i < MAX_INIT_ARGS; i++)
+ argv_init[i] = NULL;
+ return 1;
+}
+__setup("rdinit=", rdinit_setup);
+
+#ifndef CONFIG_SMP
+static const unsigned int setup_max_cpus = NR_CPUS;
+#ifdef CONFIG_X86_LOCAL_APIC
+static void __init smp_init(void)
+{
+ APIC_init_uniprocessor();
+}
+#else
+#define smp_init() do { } while (0)
+#endif
+
+static inline void setup_nr_cpu_ids(void) { }
+static inline void smp_prepare_cpus(unsigned int maxcpus) { }
+#endif
+
+/*
+ * We need to store the untouched command line for future reference.
+ * We also need to store the touched command line since the parameter
+ * parsing is performed in place, and we should allow a component to
+ * store reference of name/value for future reference.
+ */
+static void __init setup_command_line(char *command_line)
+{
+ saved_command_line = alloc_bootmem(strlen (boot_command_line)+1);
+ static_command_line = alloc_bootmem(strlen (command_line)+1);
+ strcpy (saved_command_line, boot_command_line);
+ strcpy (static_command_line, command_line);
+}
+
+/*
+ * We need to finalize in a non-__init function or else race conditions
+ * between the root thread and the init thread may cause start_kernel to
+ * be reaped by free_initmem before the root thread has proceeded to
+ * cpu_idle.
+ *
+ * gcc-3.4 accidentally inlines this function, so use noinline.
+ */
+
+static __initdata DECLARE_COMPLETION(kthreadd_done);
+
+static noinline void __init_refok rest_init(void)
+{
+ int pid;
+
+ rcu_scheduler_starting();
+ /*
+ * We need to spawn init first so that it obtains pid 1, however
+ * the init task will end up wanting to create kthreads, which, if
+ * we schedule it before we create kthreadd, will OOPS.
+ */
+ kernel_thread(kernel_init, NULL, CLONE_FS | CLONE_SIGHAND);
+ numa_default_policy();
+ pid = kernel_thread(kthreadd, NULL, CLONE_FS | CLONE_FILES);
+ rcu_read_lock();
+ kthreadd_task = find_task_by_pid_ns(pid, &init_pid_ns);
+ rcu_read_unlock();
+ complete(&kthreadd_done);
+
+ /*
+ * The boot idle thread must execute schedule()
+ * at least once to get things moving:
+ */
+ init_idle_bootup_task(current);
+ schedule_preempt_disabled();
+ /* Call into cpu_idle with preempt disabled */
+ cpu_idle();
+}
+
+/* Check for early params. */
+static int __init do_early_param(char *param, char *val)
+{
+ const struct obs_kernel_param *p;
+
+ for (p = __setup_start; p < __setup_end; p++) {
+ if ((p->early && parameq(param, p->str)) ||
+ (strcmp(param, "console") == 0 &&
+ strcmp(p->str, "earlycon") == 0)
+ ) {
+ if (p->setup_func(val) != 0)
+ printk(KERN_WARNING
+ "Malformed early option '%s'\n", param);
+ }
+ }
+ /* We accept everything at this stage. */
+ return 0;
+}
+
+void __init parse_early_options(char *cmdline)
+{
+ parse_args("early options", cmdline, NULL, 0, 0, 0, do_early_param);
+}
+
+/* Arch code calls this early on, or if not, just before other parsing. */
+void __init parse_early_param(void)
+{
+ static __initdata int done = 0;
+ static __initdata char tmp_cmdline[COMMAND_LINE_SIZE];
+
+ if (done)
+ return;
+
+ /* All fall through to do_early_param. */
+ strlcpy(tmp_cmdline, boot_command_line, COMMAND_LINE_SIZE);
+ parse_early_options(tmp_cmdline);
+ done = 1;
+}
+
+/*
+ * Activate the first processor.
+ */
+
+static void __init boot_cpu_init(void)
+{
+ int cpu = smp_processor_id();
+ /* Mark the boot cpu "present", "online" etc for SMP and UP case */
+ set_cpu_online(cpu, true);
+ set_cpu_active(cpu, true);
+ set_cpu_present(cpu, true);
+ set_cpu_possible(cpu, true);
+}
+
+void __init __weak smp_setup_processor_id(void)
+{
+}
+
+void __init __weak thread_info_cache_init(void)
+{
+}
+
+/*
+ * Set up kernel memory allocators
+ */
+static void __init mm_init(void)
+{
+ /*
+ * page_cgroup requires contiguous pages,
+ * bigger than MAX_ORDER unless SPARSEMEM.
+ */
+ page_cgroup_init_flatmem();
+ mem_init();
+ kmem_cache_init();
+ percpu_init_late();
+ pgtable_cache_init();
+ vmalloc_init();
+}
+
+asmlinkage void __init start_kernel(void)
+{
+ char * command_line;
+ extern const struct kernel_param __start___param[], __stop___param[];
+
+ /*
+ * Need to run as early as possible, to initialize the
+ * lockdep hash:
+ */
+ lockdep_init();
+ smp_setup_processor_id();
+ debug_objects_early_init();
+
+ /*
+ * Set up the the initial canary ASAP:
+ */
+ boot_init_stack_canary();
+
+ cgroup_init_early();
+
+ local_irq_disable();
+ early_boot_irqs_disabled = true;
+
+/*
+ * Interrupts are still disabled. Do necessary setups, then
+ * enable them
+ */
+ tick_init();
+ boot_cpu_init();
+ page_address_init();
+ printk(KERN_NOTICE "%s", linux_banner);
+ setup_arch(&command_line);
+ mm_init_owner(&init_mm, &init_task);
+ mm_init_cpumask(&init_mm);
+ setup_command_line(command_line);
+ setup_nr_cpu_ids();
+ setup_per_cpu_areas();
+ smp_prepare_boot_cpu(); /* arch-specific boot-cpu hooks */
+
+ build_all_zonelists(NULL);
+ page_alloc_init();
+
+ printk(KERN_NOTICE "Kernel command line: %s\n", boot_command_line);
+ parse_early_param();
+ parse_args("Booting kernel", static_command_line, __start___param,
+ __stop___param - __start___param,
+ -1, -1, &unknown_bootoption);
+
+ jump_label_init();
+
+ /*
+ * These use large bootmem allocations and must precede
+ * kmem_cache_init()
+ */
+ setup_log_buf(0);
+ pidhash_init();
+ vfs_caches_init_early();
+ sort_main_extable();
+ trap_init();
+ mm_init();
+
+ /*
+ * Set up the scheduler prior starting any interrupts (such as the
+ * timer interrupt). Full topology setup happens at smp_init()
+ * time - but meanwhile we still have a functioning scheduler.
+ */
+ sched_init();
+ /*
+ * Disable preemption - early bootup scheduling is extremely
+ * fragile until we cpu_idle() for the first time.
+ */
+ preempt_disable();
+ if (!irqs_disabled()) {
+ printk(KERN_WARNING "start_kernel(): bug: interrupts were "
+ "enabled *very* early, fixing it\n");
+ local_irq_disable();
+ }
+ idr_init_cache();
+ perf_event_init();
+ rcu_init();
+ radix_tree_init();
+ /* init some links before init_ISA_irqs() */
+ early_irq_init();
+ init_IRQ();
+ prio_tree_init();
+ init_timers();
+ hrtimers_init();
+ softirq_init();
+ timekeeping_init();
+ time_init();
+ profile_init();
+ call_function_init();
+ if (!irqs_disabled())
+ printk(KERN_CRIT "start_kernel(): bug: interrupts were "
+ "enabled early\n");
+ early_boot_irqs_disabled = false;
+ local_irq_enable();
+
+ kmem_cache_init_late();
+
+ /*
+ * HACK ALERT! This is early. We're enabling the console before
+ * we've done PCI setups etc, and console_init() must be aware of
+ * this. But we do want output early, in case something goes wrong.
+ */
+ console_init();
+ if (panic_later)
+ panic(panic_later, panic_param);
+
+ lockdep_info();
+
+ /*
+ * Need to run this when irqs are enabled, because it wants
+ * to self-test [hard/soft]-irqs on/off lock inversion bugs
+ * too:
+ */
+ locking_selftest();
+
+#ifdef CONFIG_BLK_DEV_INITRD
+ if (initrd_start && !initrd_below_start_ok &&
+ page_to_pfn(virt_to_page((void *)initrd_start)) < min_low_pfn) {
+ printk(KERN_CRIT "initrd overwritten (0x%08lx < 0x%08lx) - "
+ "disabling it.\n",
+ page_to_pfn(virt_to_page((void *)initrd_start)),
+ min_low_pfn);
+ initrd_start = 0;
+ }
+#endif
+ page_cgroup_init();
+ debug_objects_mem_init();
+ kmemleak_init();
+ setup_per_cpu_pageset();
+ numa_policy_init();
+ if (late_time_init)
+ late_time_init();
+ sched_clock_init();
+ calibrate_delay();
+ pidmap_init();
+ anon_vma_init();
+#ifdef CONFIG_X86
+ if (efi_enabled)
+ efi_enter_virtual_mode();
+#endif
+ thread_info_cache_init();
+ cred_init();
+ fork_init(totalram_pages);
+ proc_caches_init();
+ buffer_init();
+ key_init();
+ security_init();
+ dbg_late_init();
+ vfs_caches_init(totalram_pages);
+ signals_init();
+ /* rootfs populating might need page-writeback */
+ page_writeback_init();
+#ifdef CONFIG_PROC_FS
+ proc_root_init();
+#endif
+ cgroup_init();
+ cpuset_init();
+ taskstats_init_early();
+ delayacct_init();
+
+ check_bugs();
+
+ acpi_early_init(); /* before LAPIC and SMP init */
+ sfi_init_late();
+
+ ftrace_init();
+
+ /* Do the rest non-__init'ed, we're now alive */
+ rest_init();
+}
+
+/* Call all constructor functions linked into the kernel. */
+static void __init do_ctors(void)
+{
+#ifdef CONFIG_CONSTRUCTORS
+ ctor_fn_t *fn = (ctor_fn_t *) __ctors_start;
+
+ for (; fn < (ctor_fn_t *) __ctors_end; fn++)
+ (*fn)();
+#endif
+}
+
+bool initcall_debug;
+core_param(initcall_debug, initcall_debug, bool, 0644);
+
+static char msgbuf[64];
+
+static int __init_or_module do_one_initcall_debug(initcall_t fn)
+{
+ ktime_t calltime, delta, rettime;
+ unsigned long long duration;
+ int ret;
+
+ printk(KERN_DEBUG "calling %pF @ %i\n", fn, task_pid_nr(current));
+ calltime = ktime_get();
+ ret = fn();
+ rettime = ktime_get();
+ delta = ktime_sub(rettime, calltime);
+ duration = (unsigned long long) ktime_to_ns(delta) >> 10;
+ printk(KERN_DEBUG "initcall %pF returned %d after %lld usecs\n", fn,
+ ret, duration);
+
+ return ret;
+}
+
+int __init_or_module do_one_initcall(initcall_t fn)
+{
+ int count = preempt_count();
+ int ret;
+
+ if (initcall_debug)
+ ret = do_one_initcall_debug(fn);
+ else
+ ret = fn();
+
+ msgbuf[0] = 0;
+
+ if (ret && ret != -ENODEV && initcall_debug)
+ sprintf(msgbuf, "error code %d ", ret);
+
+ if (preempt_count() != count) {
+ strlcat(msgbuf, "preemption imbalance ", sizeof(msgbuf));
+ preempt_count() = count;
+ }
+ if (irqs_disabled()) {
+ strlcat(msgbuf, "disabled interrupts ", sizeof(msgbuf));
+ local_irq_enable();
+ }
+ if (msgbuf[0]) {
+ printk("initcall %pF returned with %s\n", fn, msgbuf);
+ }
+
+ return ret;
+}
+
+
+extern initcall_t __initcall_start[];
+extern initcall_t __initcall0_start[];
+extern initcall_t __initcall1_start[];
+extern initcall_t __initcall2_start[];
+extern initcall_t __initcall3_start[];
+extern initcall_t __initcall4_start[];
+extern initcall_t __initcall5_start[];
+extern initcall_t __initcall6_start[];
+extern initcall_t __initcall7_start[];
+extern initcall_t __initcall_end[];
+
+static initcall_t *initcall_levels[] __initdata = {
+ __initcall0_start,
+ __initcall1_start,
+ __initcall2_start,
+ __initcall3_start,
+ __initcall4_start,
+ __initcall5_start,
+ __initcall6_start,
+ __initcall7_start,
+ __initcall_end,
+};
+
+static char *initcall_level_names[] __initdata = {
+ "early parameters",
+ "core parameters",
+ "postcore parameters",
+ "arch parameters",
+ "subsys parameters",
+ "fs parameters",
+ "device parameters",
+ "late parameters",
+};
+
+static void __init do_initcall_level(int level)
+{
+ extern const struct kernel_param __start___param[], __stop___param[];
+ initcall_t *fn;
+
+ strcpy(static_command_line, saved_command_line);
+ parse_args(initcall_level_names[level],
+ static_command_line, __start___param,
+ __stop___param - __start___param,
+ level, level,
+ repair_env_string);
+
+ for (fn = initcall_levels[level]; fn < initcall_levels[level+1]; fn++)
+ do_one_initcall(*fn);
+}
+
+static void __init do_initcalls(void)
+{
+ int level;
+
+ for (level = 0; level < ARRAY_SIZE(initcall_levels) - 1; level++)
+ do_initcall_level(level);
+}
+
+/*
+ * Ok, the machine is now initialized. None of the devices
+ * have been touched yet, but the CPU subsystem is up and
+ * running, and memory and process management works.
+ *
+ * Now we can finally start doing some real work..
+ */
+static void __init do_basic_setup(void)
+{
+ cpuset_init_smp();
+ usermodehelper_init();
+ shmem_init();
+ driver_init();
+ init_irq_proc();
+ do_ctors();
+ usermodehelper_enable();
+ do_initcalls();
+}
+
+static void __init do_pre_smp_initcalls(void)
+{
+ initcall_t *fn;
+
+ for (fn = __initcall_start; fn < __initcall0_start; fn++)
+ do_one_initcall(*fn);
+}
+
+static void run_init_process(const char *init_filename)
+{
+ argv_init[0] = init_filename;
+ kernel_execve(init_filename, argv_init, envp_init);
+}
+
+/* This is a non __init function. Force it to be noinline otherwise gcc
+ * makes it inline to init() and it becomes part of init.text section
+ */
+static noinline int init_post(void)
+{
+ /* need to finish all async __init code before freeing the memory */
+ async_synchronize_full();
+ free_initmem();
+ mark_rodata_ro();
+ system_state = SYSTEM_RUNNING;
+ numa_default_policy();
+
+
+ current->signal->flags |= SIGNAL_UNKILLABLE;
+
+ if (ramdisk_execute_command) {
+ run_init_process(ramdisk_execute_command);
+ printk(KERN_WARNING "Failed to execute %s\n",
+ ramdisk_execute_command);
+ }
+
+ /*
+ * We try each of these until one succeeds.
+ *
+ * The Bourne shell can be used instead of init if we are
+ * trying to recover a really broken machine.
+ */
+ if (execute_command) {
+ run_init_process(execute_command);
+ printk(KERN_WARNING "Failed to execute %s. Attempting "
+ "defaults...\n", execute_command);
+ }
+ run_init_process("/sbin/init");
+ run_init_process("/etc/init");
+ run_init_process("/bin/init");
+ run_init_process("/bin/sh");
+
+ panic("No init found. Try passing init= option to kernel. "
+ "See Linux Documentation/init.txt for guidance.");
+}
+
+static int __init kernel_init(void * unused)
+{
+ /*
+ * Wait until kthreadd is all set-up.
+ */
+ wait_for_completion(&kthreadd_done);
+
+ /* Now the scheduler is fully set up and can do blocking allocations */
+ gfp_allowed_mask = __GFP_BITS_MASK;
+
+ /*
+ * init can allocate pages on any node
+ */
+ set_mems_allowed(node_states[N_HIGH_MEMORY]);
+ /*
+ * init can run on any cpu.
+ */
+ set_cpus_allowed_ptr(current, cpu_all_mask);
+
+ cad_pid = task_pid(current);
+
+ smp_prepare_cpus(setup_max_cpus);
+
+ do_pre_smp_initcalls();
+ lockup_detector_init();
+
+ smp_init();
+ sched_init_smp();
+
+ do_basic_setup();
+
+ /* Open the /dev/console on the rootfs, this should never fail */
+ if (sys_open((const char __user *) "/dev/console", O_RDWR, 0) < 0)
+ printk(KERN_WARNING "Warning: unable to open an initial console.\n");
+
+ (void) sys_dup(0);
+ (void) sys_dup(0);
+ /*
+ * check if there is an early userspace init. If yes, let it do all
+ * the work
+ */
+
+ if (!ramdisk_execute_command)
+ ramdisk_execute_command = "/init";
+
+ if (sys_access((const char __user *) ramdisk_execute_command, 0) != 0) {
+ ramdisk_execute_command = NULL;
+ prepare_namespace();
+ }
+
+ /*
+ * Ok, we have completed the initial bootup, and
+ * we're essentially up and running. Get rid of the
+ * initmem segments and start the user-mode stuff..
+ */
+
+ init_post();
+ return 0;
+}
diff --git a/init/noinitramfs.c b/init/noinitramfs.c
new file mode 100644
index 00000000..267739d8
--- /dev/null
+++ b/init/noinitramfs.c
@@ -0,0 +1,52 @@
+/*
+ * init/noinitramfs.c
+ *
+ * Copyright (C) 2006, NXP Semiconductors, All Rights Reserved
+ * Author: Jean-Paul Saman <jean-paul.saman@nxp.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+#include <linux/init.h>
+#include <linux/stat.h>
+#include <linux/kdev_t.h>
+#include <linux/syscalls.h>
+
+/*
+ * Create a simple rootfs that is similar to the default initramfs
+ */
+static int __init default_rootfs(void)
+{
+ int err;
+
+ err = sys_mkdir((const char __user __force *) "/dev", 0755);
+ if (err < 0)
+ goto out;
+
+ err = sys_mknod((const char __user __force *) "/dev/console",
+ S_IFCHR | S_IRUSR | S_IWUSR,
+ new_encode_dev(MKDEV(5, 1)));
+ if (err < 0)
+ goto out;
+
+ err = sys_mkdir((const char __user __force *) "/root", 0700);
+ if (err < 0)
+ goto out;
+
+ return 0;
+
+out:
+ printk(KERN_WARNING "Failed to create a rootfs\n");
+ return err;
+}
+rootfs_initcall(default_rootfs);
diff --git a/init/version.c b/init/version.c
new file mode 100644
index 00000000..86fe0ccb
--- /dev/null
+++ b/init/version.c
@@ -0,0 +1,48 @@
+/*
+ * linux/init/version.c
+ *
+ * Copyright (C) 1992 Theodore Ts'o
+ *
+ * May be freely distributed as part of Linux.
+ */
+
+#include <generated/compile.h>
+#include <linux/module.h>
+#include <linux/uts.h>
+#include <linux/utsname.h>
+#include <generated/utsrelease.h>
+#include <linux/version.h>
+
+#ifndef CONFIG_KALLSYMS
+#define version(a) Version_ ## a
+#define version_string(a) version(a)
+
+extern int version_string(LINUX_VERSION_CODE);
+int version_string(LINUX_VERSION_CODE);
+#endif
+
+struct uts_namespace init_uts_ns = {
+ .kref = {
+ .refcount = ATOMIC_INIT(2),
+ },
+ .name = {
+ .sysname = UTS_SYSNAME,
+ .nodename = UTS_NODENAME,
+ .release = UTS_RELEASE,
+ .version = UTS_VERSION,
+ .machine = UTS_MACHINE,
+ .domainname = UTS_DOMAINNAME,
+ },
+ .user_ns = &init_user_ns,
+};
+EXPORT_SYMBOL_GPL(init_uts_ns);
+
+/* FIXED STRINGS! Don't touch! */
+const char linux_banner[] =
+ "Linux version " UTS_RELEASE " (" LINUX_COMPILE_BY "@"
+ LINUX_COMPILE_HOST ") (" LINUX_COMPILER ") " UTS_VERSION "\n";
+
+const char linux_proc_banner[] =
+ "%s version %s"
+ " (" LINUX_COMPILE_BY "@" LINUX_COMPILE_HOST ")"
+ " (" LINUX_COMPILER ") %s\n";