82 files changed, 0 insertions, 63982 deletions
diff --git a/ANDROID_3.4.5/drivers/md/Kconfig b/ANDROID_3.4.5/drivers/md/Kconfig
deleted file mode 100644
index 10f122a3..00000000
--- a/ANDROID_3.4.5/drivers/md/Kconfig
+++ /dev/null
@@ -1,393 +0,0 @@
-#
-# Block device driver configuration
-#
-
-menuconfig MD
-	bool "Multiple devices driver support (RAID and LVM)"
-	depends on BLOCK
-	help
-	  Support multiple physical spindles through a single logical device.
-	  Required for RAID and logical volume management.
-
-if MD
-
-config BLK_DEV_MD
-	tristate "RAID support"
-	---help---
-	  This driver lets you combine several hard disk partitions into one
-	  logical block device. This can be used to simply append one
-	  partition to another one or to combine several redundant hard disks
-	  into a RAID1/4/5 device so as to provide protection against hard
-	  disk failures. This is called "Software RAID" since the combining of
-	  the partitions is done by the kernel. "Hardware RAID" means that the
-	  combining is done by a dedicated controller; if you have such a
-	  controller, you do not need to say Y here.
-
-	  More information about Software RAID on Linux is contained in the
-	  Software RAID mini-HOWTO, available from
-	  <http://www.tldp.org/docs.html#howto>. There you will also learn
-	  where to get the supporting user space utilities raidtools.
-
-	  If unsure, say N.
-
-config MD_AUTODETECT
-	bool "Autodetect RAID arrays during kernel boot"
-	depends on BLK_DEV_MD=y
-	default y
-	---help---
-	  If you say Y here, then the kernel will try to autodetect raid
-	  arrays as part of its boot process. 
-
-	  If you don't use raid and say Y, this autodetection can cause 
-	  a several-second delay in the boot time due to various
-	  synchronisation steps that are part of this step.
-
-	  If unsure, say Y.
-
-config MD_LINEAR
-	tristate "Linear (append) mode"
-	depends on BLK_DEV_MD
-	---help---
-	  If you say Y here, then your multiple devices driver will be able to
-	  use the so-called linear mode, i.e. it will combine the hard disk
-	  partitions by simply appending one to the other.
-
-	  To compile this as a module, choose M here: the module
-	  will be called linear.
-
-	  If unsure, say Y.
-
-config MD_RAID0
-	tristate "RAID-0 (striping) mode"
-	depends on BLK_DEV_MD
-	---help---
-	  If you say Y here, then your multiple devices driver will be able to
-	  use the so-called raid0 mode, i.e. it will combine the hard disk
-	  partitions into one logical device in such a fashion as to fill them
-	  up evenly, one chunk here and one chunk there. This will increase
-	  the throughput rate if the partitions reside on distinct disks.
-
-	  Information about Software RAID on Linux is contained in the
-	  Software-RAID mini-HOWTO, available from
-	  <http://www.tldp.org/docs.html#howto>. There you will also
-	  learn where to get the supporting user space utilities raidtools.
-
-	  To compile this as a module, choose M here: the module
-	  will be called raid0.
-
-	  If unsure, say Y.
-
-config MD_RAID1
-	tristate "RAID-1 (mirroring) mode"
-	depends on BLK_DEV_MD
-	---help---
-	  A RAID-1 set consists of several disk drives which are exact copies
-	  of each other.  In the event of a mirror failure, the RAID driver
-	  will continue to use the operational mirrors in the set, providing
-	  an error free MD (multiple device) to the higher levels of the
-	  kernel.  In a set with N drives, the available space is the capacity
-	  of a single drive, and the set protects against a failure of (N - 1)
-	  drives.
-
-	  Information about Software RAID on Linux is contained in the
-	  Software-RAID mini-HOWTO, available from
-	  <http://www.tldp.org/docs.html#howto>.  There you will also
-	  learn where to get the supporting user space utilities raidtools.
-
-	  If you want to use such a RAID-1 set, say Y.  To compile this code
-	  as a module, choose M here: the module will be called raid1.
-
-	  If unsure, say Y.
-
-config MD_RAID10
-	tristate "RAID-10 (mirrored striping) mode"
-	depends on BLK_DEV_MD
-	---help---
-	  RAID-10 provides a combination of striping (RAID-0) and
-	  mirroring (RAID-1) with easier configuration and more flexible
-	  layout.
-	  Unlike RAID-0, but like RAID-1, RAID-10 requires all devices to
-	  be the same size (or at least, only as much as the smallest device
-	  will be used).
-	  RAID-10 provides a variety of layouts that provide different levels
-	  of redundancy and performance.
-
-	  RAID-10 requires mdadm-1.7.0 or later, available at:
-
-	  ftp://ftp.kernel.org/pub/linux/utils/raid/mdadm/
-
-	  If unsure, say Y.
-
-config MD_RAID456
-	tristate "RAID-4/RAID-5/RAID-6 mode"
-	depends on BLK_DEV_MD
-	select RAID6_PQ
-	select ASYNC_MEMCPY
-	select ASYNC_XOR
-	select ASYNC_PQ
-	select ASYNC_RAID6_RECOV
-	---help---
-	  A RAID-5 set of N drives with a capacity of C MB per drive provides
-	  the capacity of C * (N - 1) MB, and protects against a failure
-	  of a single drive. For a given sector (row) number, (N - 1) drives
-	  contain data sectors, and one drive contains the parity protection.
-	  For a RAID-4 set, the parity blocks are present on a single drive,
-	  while a RAID-5 set distributes the parity across the drives in one
-	  of the available parity distribution methods.
-
-	  A RAID-6 set of N drives with a capacity of C MB per drive
-	  provides the capacity of C * (N - 2) MB, and protects
-	  against a failure of any two drives. For a given sector
-	  (row) number, (N - 2) drives contain data sectors, and two
-	  drives contains two independent redundancy syndromes.  Like
-	  RAID-5, RAID-6 distributes the syndromes across the drives
-	  in one of the available parity distribution methods.
-
-	  Information about Software RAID on Linux is contained in the
-	  Software-RAID mini-HOWTO, available from
-	  <http://www.tldp.org/docs.html#howto>. There you will also
-	  learn where to get the supporting user space utilities raidtools.
-
-	  If you want to use such a RAID-4/RAID-5/RAID-6 set, say Y.  To
-	  compile this code as a module, choose M here: the module
-	  will be called raid456.
-
-	  If unsure, say Y.
-
-config MULTICORE_RAID456
-	bool "RAID-4/RAID-5/RAID-6 Multicore processing (EXPERIMENTAL)"
-	depends on MD_RAID456
-	depends on SMP
-	depends on EXPERIMENTAL
-	---help---
-	  Enable the raid456 module to dispatch per-stripe raid operations to a
-	  thread pool.
-
-	  If unsure, say N.
-
-config MD_MULTIPATH
-	tristate "Multipath I/O support"
-	depends on BLK_DEV_MD
-	help
-	  MD_MULTIPATH provides a simple multi-path personality for use
-	  the MD framework.  It is not under active development.  New
-	  projects should consider using DM_MULTIPATH which has more
-	  features and more testing.
-
-	  If unsure, say N.
-
-config MD_FAULTY
-	tristate "Faulty test module for MD"
-	depends on BLK_DEV_MD
-	help
-	  The "faulty" module allows for a block device that occasionally returns
-	  read or write errors.  It is useful for testing.
-
-	  In unsure, say N.
-
-config BLK_DEV_DM
-	tristate "Device mapper support"
-	---help---
-	  Device-mapper is a low level volume manager.  It works by allowing
-	  people to specify mappings for ranges of logical sectors.  Various
-	  mapping types are available, in addition people may write their own
-	  modules containing custom mappings if they wish.
-
-	  Higher level volume managers such as LVM2 use this driver.
-
-	  To compile this as a module, choose M here: the module will be
-	  called dm-mod.
-
-	  If unsure, say N.
-
-config DM_DEBUG
-	boolean "Device mapper debugging support"
-	depends on BLK_DEV_DM
-	---help---
-	  Enable this for messages that may help debug device-mapper problems.
-
-	  If unsure, say N.
-
-config DM_BUFIO
-       tristate
-       depends on BLK_DEV_DM && EXPERIMENTAL
-       ---help---
-	 This interface allows you to do buffered I/O on a device and acts
-	 as a cache, holding recently-read blocks in memory and performing
-	 delayed writes.
-
-source "drivers/md/persistent-data/Kconfig"
-
-config DM_CRYPT
-	tristate "Crypt target support"
-	depends on BLK_DEV_DM
-	select CRYPTO
-	select CRYPTO_CBC
-	---help---
-	  This device-mapper target allows you to create a device that
-	  transparently encrypts the data on it. You'll need to activate
-	  the ciphers you're going to use in the cryptoapi configuration.
-
-	  Information on how to use dm-crypt can be found on
-
-	  <http://www.saout.de/misc/dm-crypt/>
-
-	  To compile this code as a module, choose M here: the module will
-	  be called dm-crypt.
-
-	  If unsure, say N.
-
-config DM_SNAPSHOT
-       tristate "Snapshot target"
-       depends on BLK_DEV_DM
-       ---help---
-         Allow volume managers to take writable snapshots of a device.
-
-config DM_THIN_PROVISIONING
-       tristate "Thin provisioning target (EXPERIMENTAL)"
-       depends on BLK_DEV_DM && EXPERIMENTAL
-       select DM_PERSISTENT_DATA
-       ---help---
-         Provides thin provisioning and snapshots that share a data store.
-
-config DM_DEBUG_BLOCK_STACK_TRACING
-	boolean "Keep stack trace of thin provisioning block lock holders"
-	depends on STACKTRACE_SUPPORT && DM_THIN_PROVISIONING
-	select STACKTRACE
-	---help---
-	  Enable this for messages that may help debug problems with the
-	  block manager locking used by thin provisioning.
-
-	  If unsure, say N.
-
-config DM_DEBUG_SPACE_MAPS
-	boolean "Extra validation for thin provisioning space maps"
-	depends on DM_THIN_PROVISIONING
-	---help---
-	  Enable this for messages that may help debug problems with the
-	  space maps used by thin provisioning.
-
-          If unsure, say N.
-
-config DM_MIRROR
-       tristate "Mirror target"
-       depends on BLK_DEV_DM
-       ---help---
-         Allow volume managers to mirror logical volumes, also
-         needed for live data migration tools such as 'pvmove'.
-
-config DM_RAID
-       tristate "RAID 1/4/5/6 target"
-       depends on BLK_DEV_DM
-       select MD_RAID1
-       select MD_RAID456
-       select BLK_DEV_MD
-       ---help---
-	 A dm target that supports RAID1, RAID4, RAID5 and RAID6 mappings
-
-	 A RAID-5 set of N drives with a capacity of C MB per drive provides
-	 the capacity of C * (N - 1) MB, and protects against a failure
-	 of a single drive. For a given sector (row) number, (N - 1) drives
-	 contain data sectors, and one drive contains the parity protection.
-	 For a RAID-4 set, the parity blocks are present on a single drive,
-	 while a RAID-5 set distributes the parity across the drives in one
-	 of the available parity distribution methods.
-
-	 A RAID-6 set of N drives with a capacity of C MB per drive
-	 provides the capacity of C * (N - 2) MB, and protects
-	 against a failure of any two drives. For a given sector
-	 (row) number, (N - 2) drives contain data sectors, and two
-	 drives contains two independent redundancy syndromes.  Like
-	 RAID-5, RAID-6 distributes the syndromes across the drives
-	 in one of the available parity distribution methods.
-
-config DM_LOG_USERSPACE
-	tristate "Mirror userspace logging (EXPERIMENTAL)"
-	depends on DM_MIRROR && EXPERIMENTAL && NET
-	select CONNECTOR
-	---help---
-	  The userspace logging module provides a mechanism for
-	  relaying the dm-dirty-log API to userspace.  Log designs
-	  which are more suited to userspace implementation (e.g.
-	  shared storage logs) or experimental logs can be implemented
-	  by leveraging this framework.
-
-config DM_ZERO
-	tristate "Zero target"
-	depends on BLK_DEV_DM
-	---help---
-	  A target that discards writes, and returns all zeroes for
-	  reads.  Useful in some recovery situations.
-
-config DM_MULTIPATH
-	tristate "Multipath target"
-	depends on BLK_DEV_DM
-	# nasty syntax but means make DM_MULTIPATH independent
-	# of SCSI_DH if the latter isn't defined but if
-	# it is, DM_MULTIPATH must depend on it.  We get a build
-	# error if SCSI_DH=m and DM_MULTIPATH=y
-	depends on SCSI_DH || !SCSI_DH
-	---help---
-	  Allow volume managers to support multipath hardware.
-
-config DM_MULTIPATH_QL
-	tristate "I/O Path Selector based on the number of in-flight I/Os"
-	depends on DM_MULTIPATH
-	---help---
-	  This path selector is a dynamic load balancer which selects
-	  the path with the least number of in-flight I/Os.
-
-	  If unsure, say N.
-
-config DM_MULTIPATH_ST
-	tristate "I/O Path Selector based on the service time"
-	depends on DM_MULTIPATH
-	---help---
-	  This path selector is a dynamic load balancer which selects
-	  the path expected to complete the incoming I/O in the shortest
-	  time.
-
-	  If unsure, say N.
-
-config DM_DELAY
-	tristate "I/O delaying target (EXPERIMENTAL)"
-	depends on BLK_DEV_DM && EXPERIMENTAL
-	---help---
-	A target that delays reads and/or writes and can send
-	them to different devices.  Useful for testing.
-
-	If unsure, say N.
-
-config DM_UEVENT
-	bool "DM uevents"
-	depends on BLK_DEV_DM
-	---help---
-	Generate udev events for DM events.
-
-config DM_FLAKEY
-       tristate "Flakey target (EXPERIMENTAL)"
-       depends on BLK_DEV_DM && EXPERIMENTAL
-       ---help---
-         A target that intermittently fails I/O for debugging purposes.
-
-config DM_VERITY
-	tristate "Verity target support (EXPERIMENTAL)"
-	depends on BLK_DEV_DM && EXPERIMENTAL
-	select CRYPTO
-	select CRYPTO_HASH
-	select DM_BUFIO
-	---help---
-	  This device-mapper target creates a read-only device that
-	  transparently validates the data on one underlying device against
-	  a pre-generated tree of cryptographic checksums stored on a second
-	  device.
-
-	  You'll need to activate the digests you're going to use in the
-	  cryptoapi configuration.
-
-	  To compile this code as a module, choose M here: the module will
-	  be called dm-verity.
-
-	  If unsure, say N.
-
-endif # MD
diff --git a/ANDROID_3.4.5/drivers/md/Makefile b/ANDROID_3.4.5/drivers/md/Makefile
deleted file mode 100644
index 8b2e0dff..00000000
--- a/ANDROID_3.4.5/drivers/md/Makefile
+++ /dev/null
@@ -1,49 +0,0 @@
-#
-# Makefile for the kernel software RAID and LVM drivers.
-#
-
-dm-mod-y	+= dm.o dm-table.o dm-target.o dm-linear.o dm-stripe.o \
-		   dm-ioctl.o dm-io.o dm-kcopyd.o dm-sysfs.o
-dm-multipath-y	+= dm-path-selector.o dm-mpath.o
-dm-snapshot-y	+= dm-snap.o dm-exception-store.o dm-snap-transient.o \
-		    dm-snap-persistent.o
-dm-mirror-y	+= dm-raid1.o
-dm-log-userspace-y \
-		+= dm-log-userspace-base.o dm-log-userspace-transfer.o
-dm-thin-pool-y	+= dm-thin.o dm-thin-metadata.o
-md-mod-y	+= md.o bitmap.o
-raid456-y	+= raid5.o
-
-# Note: link order is important.  All raid personalities
-# and must come before md.o, as they each initialise 
-# themselves, and md.o may use the personalities when it 
-# auto-initialised.
-
-obj-$(CONFIG_MD_LINEAR)		+= linear.o
-obj-$(CONFIG_MD_RAID0)		+= raid0.o
-obj-$(CONFIG_MD_RAID1)		+= raid1.o
-obj-$(CONFIG_MD_RAID10)		+= raid10.o
-obj-$(CONFIG_MD_RAID456)	+= raid456.o
-obj-$(CONFIG_MD_MULTIPATH)	+= multipath.o
-obj-$(CONFIG_MD_FAULTY)		+= faulty.o
-obj-$(CONFIG_BLK_DEV_MD)	+= md-mod.o
-obj-$(CONFIG_BLK_DEV_DM)	+= dm-mod.o
-obj-$(CONFIG_DM_BUFIO)		+= dm-bufio.o
-obj-$(CONFIG_DM_CRYPT)		+= dm-crypt.o
-obj-$(CONFIG_DM_DELAY)		+= dm-delay.o
-obj-$(CONFIG_DM_FLAKEY)		+= dm-flakey.o
-obj-$(CONFIG_DM_MULTIPATH)	+= dm-multipath.o dm-round-robin.o
-obj-$(CONFIG_DM_MULTIPATH_QL)	+= dm-queue-length.o
-obj-$(CONFIG_DM_MULTIPATH_ST)	+= dm-service-time.o
-obj-$(CONFIG_DM_SNAPSHOT)	+= dm-snapshot.o
-obj-$(CONFIG_DM_PERSISTENT_DATA)	+= persistent-data/
-obj-$(CONFIG_DM_MIRROR)		+= dm-mirror.o dm-log.o dm-region-hash.o
-obj-$(CONFIG_DM_LOG_USERSPACE)	+= dm-log-userspace.o
-obj-$(CONFIG_DM_ZERO)		+= dm-zero.o
-obj-$(CONFIG_DM_RAID)	+= dm-raid.o
-obj-$(CONFIG_DM_THIN_PROVISIONING)	+= dm-thin-pool.o
-obj-$(CONFIG_DM_VERITY)		+= dm-verity.o
-
-ifeq ($(CONFIG_DM_UEVENT),y)
-dm-mod-objs			+= dm-uevent.o
-endif
diff --git a/ANDROID_3.4.5/drivers/md/bitmap.c b/ANDROID_3.4.5/drivers/md/bitmap.c
deleted file mode 100644
index 17e2b472..00000000
--- a/ANDROID_3.4.5/drivers/md/bitmap.c
+++ /dev/null
@@ -1,2113 +0,0 @@
-/*
- * bitmap.c two-level bitmap (C) Peter T. Breuer (ptb@ot.uc3m.es) 2003
- *
- * bitmap_create  - sets up the bitmap structure
- * bitmap_destroy - destroys the bitmap structure
- *
- * additions, Copyright (C) 2003-2004, Paul Clements, SteelEye Technology, Inc.:
- * - added disk storage for bitmap
- * - changes to allow various bitmap chunk sizes
- */
-
-/*
- * Still to do:
- *
- * flush after percent set rather than just time based. (maybe both).
- */
-
-#include <linux/blkdev.h>
-#include <linux/module.h>
-#include <linux/errno.h>
-#include <linux/slab.h>
-#include <linux/init.h>
-#include <linux/timer.h>
-#include <linux/sched.h>
-#include <linux/list.h>
-#include <linux/file.h>
-#include <linux/mount.h>
-#include <linux/buffer_head.h>
-#include <linux/seq_file.h>
-#include "md.h"
-#include "bitmap.h"
-
-static inline char *bmname(struct bitmap *bitmap)
-{
-	return bitmap->mddev ? mdname(bitmap->mddev) : "mdX";
-}
-
-/*
- * check a page and, if necessary, allocate it (or hijack it if the alloc fails)
- *
- * 1) check to see if this page is allocated, if it's not then try to alloc
- * 2) if the alloc fails, set the page's hijacked flag so we'll use the
- *    page pointer directly as a counter
- *
- * if we find our page, we increment the page's refcount so that it stays
- * allocated while we're using it
- */
-static int bitmap_checkpage(struct bitmap *bitmap,
-			    unsigned long page, int create)
-__releases(bitmap->lock)
-__acquires(bitmap->lock)
-{
-	unsigned char *mappage;
-
-	if (page >= bitmap->pages) {
-		/* This can happen if bitmap_start_sync goes beyond
-		 * End-of-device while looking for a whole page.
-		 * It is harmless.
-		 */
-		return -EINVAL;
-	}
-
-	if (bitmap->bp[page].hijacked) /* it's hijacked, don't try to alloc */
-		return 0;
-
-	if (bitmap->bp[page].map) /* page is already allocated, just return */
-		return 0;
-
-	if (!create)
-		return -ENOENT;
-
-	/* this page has not been allocated yet */
-
-	spin_unlock_irq(&bitmap->lock);
-	mappage = kzalloc(PAGE_SIZE, GFP_NOIO);
-	spin_lock_irq(&bitmap->lock);
-
-	if (mappage == NULL) {
-		pr_debug("%s: bitmap map page allocation failed, hijacking\n",
-			 bmname(bitmap));
-		/* failed - set the hijacked flag so that we can use the
-		 * pointer as a counter */
-		if (!bitmap->bp[page].map)
-			bitmap->bp[page].hijacked = 1;
-	} else if (bitmap->bp[page].map ||
-		   bitmap->bp[page].hijacked) {
-		/* somebody beat us to getting the page */
-		kfree(mappage);
-		return 0;
-	} else {
-
-		/* no page was in place and we have one, so install it */
-
-		bitmap->bp[page].map = mappage;
-		bitmap->missing_pages--;
-	}
-	return 0;
-}
-
-/* if page is completely empty, put it back on the free list, or dealloc it */
-/* if page was hijacked, unmark the flag so it might get alloced next time */
-/* Note: lock should be held when calling this */
-static void bitmap_checkfree(struct bitmap *bitmap, unsigned long page)
-{
-	char *ptr;
-
-	if (bitmap->bp[page].count) /* page is still busy */
-		return;
-
-	/* page is no longer in use, it can be released */
-
-	if (bitmap->bp[page].hijacked) { /* page was hijacked, undo this now */
-		bitmap->bp[page].hijacked = 0;
-		bitmap->bp[page].map = NULL;
-	} else {
-		/* normal case, free the page */
-		ptr = bitmap->bp[page].map;
-		bitmap->bp[page].map = NULL;
-		bitmap->missing_pages++;
-		kfree(ptr);
-	}
-}
-
-/*
- * bitmap file handling - read and write the bitmap file and its superblock
- */
-
-/*
- * basic page I/O operations
- */
-
-/* IO operations when bitmap is stored near all superblocks */
-static struct page *read_sb_page(struct mddev *mddev, loff_t offset,
-				 struct page *page,
-				 unsigned long index, int size)
-{
-	/* choose a good rdev and read the page from there */
-
-	struct md_rdev *rdev;
-	sector_t target;
-	int did_alloc = 0;
-
-	if (!page) {
-		page = alloc_page(GFP_KERNEL);
-		if (!page)
-			return ERR_PTR(-ENOMEM);
-		did_alloc = 1;
-	}
-
-	rdev_for_each(rdev, mddev) {
-		if (! test_bit(In_sync, &rdev->flags)
-		    || test_bit(Faulty, &rdev->flags))
-			continue;
-
-		target = offset + index * (PAGE_SIZE/512);
-
-		if (sync_page_io(rdev, target,
-				 roundup(size, bdev_logical_block_size(rdev->bdev)),
-				 page, READ, true)) {
-			page->index = index;
-			attach_page_buffers(page, NULL); /* so that free_buffer will
-							  * quietly no-op */
-			return page;
-		}
-	}
-	if (did_alloc)
-		put_page(page);
-	return ERR_PTR(-EIO);
-
-}
-
-static struct md_rdev *next_active_rdev(struct md_rdev *rdev, struct mddev *mddev)
-{
-	/* Iterate the disks of an mddev, using rcu to protect access to the
-	 * linked list, and raising the refcount of devices we return to ensure
-	 * they don't disappear while in use.
-	 * As devices are only added or removed when raid_disk is < 0 and
-	 * nr_pending is 0 and In_sync is clear, the entries we return will
-	 * still be in the same position on the list when we re-enter
-	 * list_for_each_continue_rcu.
-	 */
-	struct list_head *pos;
-	rcu_read_lock();
-	if (rdev == NULL)
-		/* start at the beginning */
-		pos = &mddev->disks;
-	else {
-		/* release the previous rdev and start from there. */
-		rdev_dec_pending(rdev, mddev);
-		pos = &rdev->same_set;
-	}
-	list_for_each_continue_rcu(pos, &mddev->disks) {
-		rdev = list_entry(pos, struct md_rdev, same_set);
-		if (rdev->raid_disk >= 0 &&
-		    !test_bit(Faulty, &rdev->flags)) {
-			/* this is a usable devices */
-			atomic_inc(&rdev->nr_pending);
-			rcu_read_unlock();
-			return rdev;
-		}
-	}
-	rcu_read_unlock();
-	return NULL;
-}
-
-static int write_sb_page(struct bitmap *bitmap, struct page *page, int wait)
-{
-	struct md_rdev *rdev = NULL;
-	struct block_device *bdev;
-	struct mddev *mddev = bitmap->mddev;
-
-	while ((rdev = next_active_rdev(rdev, mddev)) != NULL) {
-		int size = PAGE_SIZE;
-		loff_t offset = mddev->bitmap_info.offset;
-
-		bdev = (rdev->meta_bdev) ? rdev->meta_bdev : rdev->bdev;
-
-		if (page->index == bitmap->file_pages-1)
-			size = roundup(bitmap->last_page_size,
-				       bdev_logical_block_size(bdev));
-		/* Just make sure we aren't corrupting data or
-		 * metadata
-		 */
-		if (mddev->external) {
-			/* Bitmap could be anywhere. */
-			if (rdev->sb_start + offset + (page->index
-						       * (PAGE_SIZE/512))
-			    > rdev->data_offset
-			    &&
-			    rdev->sb_start + offset
-			    < (rdev->data_offset + mddev->dev_sectors
-			     + (PAGE_SIZE/512)))
-				goto bad_alignment;
-		} else if (offset < 0) {
-			/* DATA  BITMAP METADATA  */
-			if (offset
-			    + (long)(page->index * (PAGE_SIZE/512))
-			    + size/512 > 0)
-				/* bitmap runs in to metadata */
-				goto bad_alignment;
-			if (rdev->data_offset + mddev->dev_sectors
-			    > rdev->sb_start + offset)
-				/* data runs in to bitmap */
-				goto bad_alignment;
-		} else if (rdev->sb_start < rdev->data_offset) {
-			/* METADATA BITMAP DATA */
-			if (rdev->sb_start
-			    + offset
-			    + page->index*(PAGE_SIZE/512) + size/512
-			    > rdev->data_offset)
-				/* bitmap runs in to data */
-				goto bad_alignment;
-		} else {
-			/* DATA METADATA BITMAP - no problems */
-		}
-		md_super_write(mddev, rdev,
-			       rdev->sb_start + offset
-			       + page->index * (PAGE_SIZE/512),
-			       size,
-			       page);
-	}
-
-	if (wait)
-		md_super_wait(mddev);
-	return 0;
-
- bad_alignment:
-	return -EINVAL;
-}
-
-static void bitmap_file_kick(struct bitmap *bitmap);
-/*
- * write out a page to a file
- */
-static void write_page(struct bitmap *bitmap, struct page *page, int wait)
-{
-	struct buffer_head *bh;
-
-	if (bitmap->file == NULL) {
-		switch (write_sb_page(bitmap, page, wait)) {
-		case -EINVAL:
-			bitmap->flags |= BITMAP_WRITE_ERROR;
-		}
-	} else {
-
-		bh = page_buffers(page);
-
-		while (bh && bh->b_blocknr) {
-			atomic_inc(&bitmap->pending_writes);
-			set_buffer_locked(bh);
-			set_buffer_mapped(bh);
-			submit_bh(WRITE | REQ_SYNC, bh);
-			bh = bh->b_this_page;
-		}
-
-		if (wait)
-			wait_event(bitmap->write_wait,
-				   atomic_read(&bitmap->pending_writes)==0);
-	}
-	if (bitmap->flags & BITMAP_WRITE_ERROR)
-		bitmap_file_kick(bitmap);
-}
-
-static void end_bitmap_write(struct buffer_head *bh, int uptodate)
-{
-	struct bitmap *bitmap = bh->b_private;
-	unsigned long flags;
-
-	if (!uptodate) {
-		spin_lock_irqsave(&bitmap->lock, flags);
-		bitmap->flags |= BITMAP_WRITE_ERROR;
-		spin_unlock_irqrestore(&bitmap->lock, flags);
-	}
-	if (atomic_dec_and_test(&bitmap->pending_writes))
-		wake_up(&bitmap->write_wait);
-}
-
-/* copied from buffer.c */
-static void
-__clear_page_buffers(struct page *page)
-{
-	ClearPagePrivate(page);
-	set_page_private(page, 0);
-	page_cache_release(page);
-}
-static void free_buffers(struct page *page)
-{
-	struct buffer_head *bh = page_buffers(page);
-
-	while (bh) {
-		struct buffer_head *next = bh->b_this_page;
-		free_buffer_head(bh);
-		bh = next;
-	}
-	__clear_page_buffers(page);
-	put_page(page);
-}
-
-/* read a page from a file.
- * We both read the page, and attach buffers to the page to record the
- * address of each block (using bmap).  These addresses will be used
- * to write the block later, completely bypassing the filesystem.
- * This usage is similar to how swap files are handled, and allows us
- * to write to a file with no concerns of memory allocation failing.
- */
-static struct page *read_page(struct file *file, unsigned long index,
-			      struct bitmap *bitmap,
-			      unsigned long count)
-{
-	struct page *page = NULL;
-	struct inode *inode = file->f_path.dentry->d_inode;
-	struct buffer_head *bh;
-	sector_t block;
-
-	pr_debug("read bitmap file (%dB @ %llu)\n", (int)PAGE_SIZE,
-		 (unsigned long long)index << PAGE_SHIFT);
-
-	page = alloc_page(GFP_KERNEL);
-	if (!page)
-		page = ERR_PTR(-ENOMEM);
-	if (IS_ERR(page))
-		goto out;
-
-	bh = alloc_page_buffers(page, 1<<inode->i_blkbits, 0);
-	if (!bh) {
-		put_page(page);
-		page = ERR_PTR(-ENOMEM);
-		goto out;
-	}
-	attach_page_buffers(page, bh);
-	block = index << (PAGE_SHIFT - inode->i_blkbits);
-	while (bh) {
-		if (count == 0)
-			bh->b_blocknr = 0;
-		else {
-			bh->b_blocknr = bmap(inode, block);
-			if (bh->b_blocknr == 0) {
-				/* Cannot use this file! */
-				free_buffers(page);
-				page = ERR_PTR(-EINVAL);
-				goto out;
-			}
-			bh->b_bdev = inode->i_sb->s_bdev;
-			if (count < (1<<inode->i_blkbits))
-				count = 0;
-			else
-				count -= (1<<inode->i_blkbits);
-
-			bh->b_end_io = end_bitmap_write;
-			bh->b_private = bitmap;
-			atomic_inc(&bitmap->pending_writes);
-			set_buffer_locked(bh);
-			set_buffer_mapped(bh);
-			submit_bh(READ, bh);
-		}
-		block++;
-		bh = bh->b_this_page;
-	}
-	page->index = index;
-
-	wait_event(bitmap->write_wait,
-		   atomic_read(&bitmap->pending_writes)==0);
-	if (bitmap->flags & BITMAP_WRITE_ERROR) {
-		free_buffers(page);
-		page = ERR_PTR(-EIO);
-	}
-out:
-	if (IS_ERR(page))
-		printk(KERN_ALERT "md: bitmap read error: (%dB @ %llu): %ld\n",
-			(int)PAGE_SIZE,
-			(unsigned long long)index << PAGE_SHIFT,
-			PTR_ERR(page));
-	return page;
-}
-
-/*
- * bitmap file superblock operations
- */
-
-/* update the event counter and sync the superblock to disk */
-void bitmap_update_sb(struct bitmap *bitmap)
-{
-	bitmap_super_t *sb;
-
-	if (!bitmap || !bitmap->mddev) /* no bitmap for this array */
-		return;
-	if (bitmap->mddev->bitmap_info.external)
-		return;
-	if (!bitmap->sb_page) /* no superblock */
-		return;
-	sb = kmap_atomic(bitmap->sb_page);
-	sb->events = cpu_to_le64(bitmap->mddev->events);
-	if (bitmap->mddev->events < bitmap->events_cleared)
-		/* rocking back to read-only */
-		bitmap->events_cleared = bitmap->mddev->events;
-	sb->events_cleared = cpu_to_le64(bitmap->events_cleared);
-	sb->state = cpu_to_le32(bitmap->flags);
-	/* Just in case these have been changed via sysfs: */
-	sb->daemon_sleep = cpu_to_le32(bitmap->mddev->bitmap_info.daemon_sleep/HZ);
-	sb->write_behind = cpu_to_le32(bitmap->mddev->bitmap_info.max_write_behind);
-	kunmap_atomic(sb);
-	write_page(bitmap, bitmap->sb_page, 1);
-}
-
-/* print out the bitmap file superblock */
-void bitmap_print_sb(struct bitmap *bitmap)
-{
-	bitmap_super_t *sb;
-
-	if (!bitmap || !bitmap->sb_page)
-		return;
-	sb = kmap_atomic(bitmap->sb_page);
-	printk(KERN_DEBUG "%s: bitmap file superblock:\n", bmname(bitmap));
-	printk(KERN_DEBUG "         magic: %08x\n", le32_to_cpu(sb->magic));
-	printk(KERN_DEBUG "       version: %d\n", le32_to_cpu(sb->version));
-	printk(KERN_DEBUG "          uuid: %08x.%08x.%08x.%08x\n",
-					*(__u32 *)(sb->uuid+0),
-					*(__u32 *)(sb->uuid+4),
-					*(__u32 *)(sb->uuid+8),
-					*(__u32 *)(sb->uuid+12));
-	printk(KERN_DEBUG "        events: %llu\n",
-			(unsigned long long) le64_to_cpu(sb->events));
-	printk(KERN_DEBUG "events cleared: %llu\n",
-			(unsigned long long) le64_to_cpu(sb->events_cleared));
-	printk(KERN_DEBUG "         state: %08x\n", le32_to_cpu(sb->state));
-	printk(KERN_DEBUG "     chunksize: %d B\n", le32_to_cpu(sb->chunksize));
-	printk(KERN_DEBUG "  daemon sleep: %ds\n", le32_to_cpu(sb->daemon_sleep));
-	printk(KERN_DEBUG "     sync size: %llu KB\n",
-			(unsigned long long)le64_to_cpu(sb->sync_size)/2);
-	printk(KERN_DEBUG "max write behind: %d\n", le32_to_cpu(sb->write_behind));
-	kunmap_atomic(sb);
-}
-
-/*
- * bitmap_new_disk_sb
- * @bitmap
- *
- * This function is somewhat the reverse of bitmap_read_sb.  bitmap_read_sb
- * reads and verifies the on-disk bitmap superblock and populates bitmap_info.
- * This function verifies 'bitmap_info' and populates the on-disk bitmap
- * structure, which is to be written to disk.
- *
- * Returns: 0 on success, -Exxx on error
- */
-static int bitmap_new_disk_sb(struct bitmap *bitmap)
-{
-	bitmap_super_t *sb;
-	unsigned long chunksize, daemon_sleep, write_behind;
-	int err = -EINVAL;
-
-	bitmap->sb_page = alloc_page(GFP_KERNEL);
-	if (IS_ERR(bitmap->sb_page)) {
-		err = PTR_ERR(bitmap->sb_page);
-		bitmap->sb_page = NULL;
-		return err;
-	}
-	bitmap->sb_page->index = 0;
-
-	sb = kmap_atomic(bitmap->sb_page);
-
-	sb->magic = cpu_to_le32(BITMAP_MAGIC);
-	sb->version = cpu_to_le32(BITMAP_MAJOR_HI);
-
-	chunksize = bitmap->mddev->bitmap_info.chunksize;
-	BUG_ON(!chunksize);
-	if (!is_power_of_2(chunksize)) {
-		kunmap_atomic(sb);
-		printk(KERN_ERR "bitmap chunksize not a power of 2\n");
-		return -EINVAL;
-	}
-	sb->chunksize = cpu_to_le32(chunksize);
-
-	daemon_sleep = bitmap->mddev->bitmap_info.daemon_sleep;
-	if (!daemon_sleep ||
-	    (daemon_sleep < 1) || (daemon_sleep > MAX_SCHEDULE_TIMEOUT)) {
-		printk(KERN_INFO "Choosing daemon_sleep default (5 sec)\n");
-		daemon_sleep = 5 * HZ;
-	}
-	sb->daemon_sleep = cpu_to_le32(daemon_sleep);
-	bitmap->mddev->bitmap_info.daemon_sleep = daemon_sleep;
-
-	/*
-	 * FIXME: write_behind for RAID1.  If not specified, what
-	 * is a good choice?  We choose COUNTER_MAX / 2 arbitrarily.
-	 */
-	write_behind = bitmap->mddev->bitmap_info.max_write_behind;
-	if (write_behind > COUNTER_MAX)
-		write_behind = COUNTER_MAX / 2;
-	sb->write_behind = cpu_to_le32(write_behind);
-	bitmap->mddev->bitmap_info.max_write_behind = write_behind;
-
-	/* keep the array size field of the bitmap superblock up to date */
-	sb->sync_size = cpu_to_le64(bitmap->mddev->resync_max_sectors);
-
-	memcpy(sb->uuid, bitmap->mddev->uuid, 16);
-
-	bitmap->flags |= BITMAP_STALE;
-	sb->state |= cpu_to_le32(BITMAP_STALE);
-	bitmap->events_cleared = bitmap->mddev->events;
-	sb->events_cleared = cpu_to_le64(bitmap->mddev->events);
-
-	kunmap_atomic(sb);
-
-	return 0;
-}
-
-/* read the superblock from the bitmap file and initialize some bitmap fields */
-static int bitmap_read_sb(struct bitmap *bitmap)
-{
-	char *reason = NULL;
-	bitmap_super_t *sb;
-	unsigned long chunksize, daemon_sleep, write_behind;
-	unsigned long long events;
-	int err = -EINVAL;
-
-	/* page 0 is the superblock, read it... */
-	if (bitmap->file) {
-		loff_t isize = i_size_read(bitmap->file->f_mapping->host);
-		int bytes = isize > PAGE_SIZE ? PAGE_SIZE : isize;
-
-		bitmap->sb_page = read_page(bitmap->file, 0, bitmap, bytes);
-	} else {
-		bitmap->sb_page = read_sb_page(bitmap->mddev,
-					       bitmap->mddev->bitmap_info.offset,
-					       NULL,
-					       0, sizeof(bitmap_super_t));
-	}
-	if (IS_ERR(bitmap->sb_page)) {
-		err = PTR_ERR(bitmap->sb_page);
-		bitmap->sb_page = NULL;
-		return err;
-	}
-
-	sb = kmap_atomic(bitmap->sb_page);
-
-	chunksize = le32_to_cpu(sb->chunksize);
-	daemon_sleep = le32_to_cpu(sb->daemon_sleep) * HZ;
-	write_behind = le32_to_cpu(sb->write_behind);
-
-	/* verify that the bitmap-specific fields are valid */
-	if (sb->magic != cpu_to_le32(BITMAP_MAGIC))
-		reason = "bad magic";
-	else if (le32_to_cpu(sb->version) < BITMAP_MAJOR_LO ||
-		 le32_to_cpu(sb->version) > BITMAP_MAJOR_HI)
-		reason = "unrecognized superblock version";
-	else if (chunksize < 512)
-		reason = "bitmap chunksize too small";
-	else if (!is_power_of_2(chunksize))
-		reason = "bitmap chunksize not a power of 2";
-	else if (daemon_sleep < 1 || daemon_sleep > MAX_SCHEDULE_TIMEOUT)
-		reason = "daemon sleep period out of range";
-	else if (write_behind > COUNTER_MAX)
-		reason = "write-behind limit out of range (0 - 16383)";
-	if (reason) {
-		printk(KERN_INFO "%s: invalid bitmap file superblock: %s\n",
-			bmname(bitmap), reason);
-		goto out;
-	}
-
-	/* keep the array size field of the bitmap superblock up to date */
-	sb->sync_size = cpu_to_le64(bitmap->mddev->resync_max_sectors);
-
-	if (bitmap->mddev->persistent) {
-		/*
-		 * We have a persistent array superblock, so compare the
-		 * bitmap's UUID and event counter to the mddev's
-		 */
-		if (memcmp(sb->uuid, bitmap->mddev->uuid, 16)) {
-			printk(KERN_INFO
-			       "%s: bitmap superblock UUID mismatch\n",
-			       bmname(bitmap));
-			goto out;
-		}
-		events = le64_to_cpu(sb->events);
-		if (events < bitmap->mddev->events) {
-			printk(KERN_INFO
-			       "%s: bitmap file is out of date (%llu < %llu) "
-			       "-- forcing full recovery\n",
-			       bmname(bitmap), events,
-			       (unsigned long long) bitmap->mddev->events);
-			sb->state |= cpu_to_le32(BITMAP_STALE);
-		}
-	}
-
-	/* assign fields using values from superblock */
-	bitmap->mddev->bitmap_info.chunksize = chunksize;
-	bitmap->mddev->bitmap_info.daemon_sleep = daemon_sleep;
-	bitmap->mddev->bitmap_info.max_write_behind = write_behind;
-	bitmap->flags |= le32_to_cpu(sb->state);
-	if (le32_to_cpu(sb->version) == BITMAP_MAJOR_HOSTENDIAN)
-		bitmap->flags |= BITMAP_HOSTENDIAN;
-	bitmap->events_cleared = le64_to_cpu(sb->events_cleared);
-	if (bitmap->flags & BITMAP_STALE)
-		bitmap->events_cleared = bitmap->mddev->events;
-	err = 0;
-out:
-	kunmap_atomic(sb);
-	if (err)
-		bitmap_print_sb(bitmap);
-	return err;
-}
-
-enum bitmap_mask_op {
-	MASK_SET,
-	MASK_UNSET
-};
-
-/* record the state of the bitmap in the superblock.  Return the old value */
-static int bitmap_mask_state(struct bitmap *bitmap, enum bitmap_state bits,
-			     enum bitmap_mask_op op)
-{
-	bitmap_super_t *sb;
-	int old;
-
-	if (!bitmap->sb_page) /* can't set the state */
-		return 0;
-	sb = kmap_atomic(bitmap->sb_page);
-	old = le32_to_cpu(sb->state) & bits;
-	switch (op) {
-	case MASK_SET:
-		sb->state |= cpu_to_le32(bits);
-		bitmap->flags |= bits;
-		break;
-	case MASK_UNSET:
-		sb->state &= cpu_to_le32(~bits);
-		bitmap->flags &= ~bits;
-		break;
-	default:
-		BUG();
-	}
-	kunmap_atomic(sb);
-	return old;
-}
-
-/*
- * general bitmap file operations
- */
-
-/*
- * on-disk bitmap:
- *
- * Use one bit per "chunk" (block set). We do the disk I/O on the bitmap
- * file a page at a time. There's a superblock at the start of the file.
- */
-/* calculate the index of the page that contains this bit */
-static inline unsigned long file_page_index(struct bitmap *bitmap, unsigned long chunk)
-{
-	if (!bitmap->mddev->bitmap_info.external)
-		chunk += sizeof(bitmap_super_t) << 3;
-	return chunk >> PAGE_BIT_SHIFT;
-}
-
-/* calculate the (bit) offset of this bit within a page */
-static inline unsigned long file_page_offset(struct bitmap *bitmap, unsigned long chunk)
-{
-	if (!bitmap->mddev->bitmap_info.external)
-		chunk += sizeof(bitmap_super_t) << 3;
-	return chunk & (PAGE_BITS - 1);
-}
-
-/*
- * return a pointer to the page in the filemap that contains the given bit
- *
- * this lookup is complicated by the fact that the bitmap sb might be exactly
- * 1 page (e.g., x86) or less than 1 page -- so the bitmap might start on page
- * 0 or page 1
- */
-static inline struct page *filemap_get_page(struct bitmap *bitmap,
-					    unsigned long chunk)
-{
-	if (file_page_index(bitmap, chunk) >= bitmap->file_pages)
-		return NULL;
-	return bitmap->filemap[file_page_index(bitmap, chunk)
-			       - file_page_index(bitmap, 0)];
-}
-
-static void bitmap_file_unmap(struct bitmap *bitmap)
-{
-	struct page **map, *sb_page;
-	unsigned long *attr;
-	int pages;
-	unsigned long flags;
-
-	spin_lock_irqsave(&bitmap->lock, flags);
-	map = bitmap->filemap;
-	bitmap->filemap = NULL;
-	attr = bitmap->filemap_attr;
-	bitmap->filemap_attr = NULL;
-	pages = bitmap->file_pages;
-	bitmap->file_pages = 0;
-	sb_page = bitmap->sb_page;
-	bitmap->sb_page = NULL;
-	spin_unlock_irqrestore(&bitmap->lock, flags);
-
-	while (pages--)
-		if (map[pages] != sb_page) /* 0 is sb_page, release it below */
-			free_buffers(map[pages]);
-	kfree(map);
-	kfree(attr);
-
-	if (sb_page)
-		free_buffers(sb_page);
-}
-
-static void bitmap_file_put(struct bitmap *bitmap)
-{
-	struct file *file;
-	unsigned long flags;
-
-	spin_lock_irqsave(&bitmap->lock, flags);
-	file = bitmap->file;
-	bitmap->file = NULL;
-	spin_unlock_irqrestore(&bitmap->lock, flags);
-
-	if (file)
-		wait_event(bitmap->write_wait,
-			   atomic_read(&bitmap->pending_writes)==0);
-	bitmap_file_unmap(bitmap);
-
-	if (file) {
-		struct inode *inode = file->f_path.dentry->d_inode;
-		invalidate_mapping_pages(inode->i_mapping, 0, -1);
-		fput(file);
-	}
-}
-
-/*
- * bitmap_file_kick - if an error occurs while manipulating the bitmap file
- * then it is no longer reliable, so we stop using it and we mark the file
- * as failed in the superblock
- */
-static void bitmap_file_kick(struct bitmap *bitmap)
-{
-	char *path, *ptr = NULL;
-
-	if (bitmap_mask_state(bitmap, BITMAP_STALE, MASK_SET) == 0) {
-		bitmap_update_sb(bitmap);
-
-		if (bitmap->file) {
-			path = kmalloc(PAGE_SIZE, GFP_KERNEL);
-			if (path)
-				ptr = d_path(&bitmap->file->f_path, path,
-					     PAGE_SIZE);
-
-			printk(KERN_ALERT
-			      "%s: kicking failed bitmap file %s from array!\n",
-			      bmname(bitmap), IS_ERR(ptr) ? "" : ptr);
-
-			kfree(path);
-		} else
-			printk(KERN_ALERT
-			       "%s: disabling internal bitmap due to errors\n",
-			       bmname(bitmap));
-	}
-
-	bitmap_file_put(bitmap);
-
-	return;
-}
-
-enum bitmap_page_attr {
-	BITMAP_PAGE_DIRTY = 0,     /* there are set bits that need to be synced */
-	BITMAP_PAGE_PENDING = 1,   /* there are bits that are being cleaned.
-				    * i.e. counter is 1 or 2. */
-	BITMAP_PAGE_NEEDWRITE = 2, /* there are cleared bits that need to be synced */
-};
-
-static inline void set_page_attr(struct bitmap *bitmap, struct page *page,
-				enum bitmap_page_attr attr)
-{
-	__set_bit((page->index<<2) + attr, bitmap->filemap_attr);
-}
-
-static inline void clear_page_attr(struct bitmap *bitmap, struct page *page,
-				enum bitmap_page_attr attr)
-{
-	__clear_bit((page->index<<2) + attr, bitmap->filemap_attr);
-}
-
-static inline unsigned long test_page_attr(struct bitmap *bitmap, struct page *page,
-					   enum bitmap_page_attr attr)
-{
-	return test_bit((page->index<<2) + attr, bitmap->filemap_attr);
-}
-
-/*
- * bitmap_file_set_bit -- called before performing a write to the md device
- * to set (and eventually sync) a particular bit in the bitmap file
- *
- * we set the bit immediately, then we record the page number so that
- * when an unplug occurs, we can flush the dirty pages out to disk
- */
-static void bitmap_file_set_bit(struct bitmap *bitmap, sector_t block)
-{
-	unsigned long bit;
-	struct page *page;
-	void *kaddr;
-	unsigned long chunk = block >> bitmap->chunkshift;
-
-	if (!bitmap->filemap)
-		return;
-
-	page = filemap_get_page(bitmap, chunk);
-	if (!page)
-		return;
-	bit = file_page_offset(bitmap, chunk);
-
-	/* set the bit */
-	kaddr = kmap_atomic(page);
-	if (bitmap->flags & BITMAP_HOSTENDIAN)
-		set_bit(bit, kaddr);
-	else
-		__set_bit_le(bit, kaddr);
-	kunmap_atomic(kaddr);
-	pr_debug("set file bit %lu page %lu\n", bit, page->index);
-	/* record page number so it gets flushed to disk when unplug occurs */
-	set_page_attr(bitmap, page, BITMAP_PAGE_DIRTY);
-}
-
-/* this gets called when the md device is ready to unplug its underlying
- * (slave) device queues -- before we let any writes go down, we need to
- * sync the dirty pages of the bitmap file to disk */
-void bitmap_unplug(struct bitmap *bitmap)
-{
-	unsigned long i, flags;
-	int dirty, need_write;
-	struct page *page;
-	int wait = 0;
-
-	if (!bitmap)
-		return;
-
-	/* look at each page to see if there are any set bits that need to be
-	 * flushed out to disk */
-	for (i = 0; i < bitmap->file_pages; i++) {
-		spin_lock_irqsave(&bitmap->lock, flags);
-		if (!bitmap->filemap) {
-			spin_unlock_irqrestore(&bitmap->lock, flags);
-			return;
-		}
-		page = bitmap->filemap[i];
-		dirty = test_page_attr(bitmap, page, BITMAP_PAGE_DIRTY);
-		need_write = test_page_attr(bitmap, page, BITMAP_PAGE_NEEDWRITE);
-		clear_page_attr(bitmap, page, BITMAP_PAGE_DIRTY);
-		clear_page_attr(bitmap, page, BITMAP_PAGE_NEEDWRITE);
-		if (dirty)
-			wait = 1;
-		spin_unlock_irqrestore(&bitmap->lock, flags);
-
-		if (dirty || need_write)
-			write_page(bitmap, page, 0);
-	}
-	if (wait) { /* if any writes were performed, we need to wait on them */
-		if (bitmap->file)
-			wait_event(bitmap->write_wait,
-				   atomic_read(&bitmap->pending_writes)==0);
-		else
-			md_super_wait(bitmap->mddev);
-	}
-	if (bitmap->flags & BITMAP_WRITE_ERROR)
-		bitmap_file_kick(bitmap);
-}
-EXPORT_SYMBOL(bitmap_unplug);
-
-static void bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, int needed);
-/* * bitmap_init_from_disk -- called at bitmap_create time to initialize
- * the in-memory bitmap from the on-disk bitmap -- also, sets up the
- * memory mapping of the bitmap file
- * Special cases:
- *   if there's no bitmap file, or if the bitmap file had been
- *   previously kicked from the array, we mark all the bits as
- *   1's in order to cause a full resync.
- *
- * We ignore all bits for sectors that end earlier than 'start'.
- * This is used when reading an out-of-date bitmap...
- */
-static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start)
-{
-	unsigned long i, chunks, index, oldindex, bit;
-	struct page *page = NULL, *oldpage = NULL;
-	unsigned long num_pages, bit_cnt = 0;
-	struct file *file;
-	unsigned long bytes, offset;
-	int outofdate;
-	int ret = -ENOSPC;
-	void *paddr;
-
-	chunks = bitmap->chunks;
-	file = bitmap->file;
-
-	BUG_ON(!file && !bitmap->mddev->bitmap_info.offset);
-
-	outofdate = bitmap->flags & BITMAP_STALE;
-	if (outofdate)
-		printk(KERN_INFO "%s: bitmap file is out of date, doing full "
-			"recovery\n", bmname(bitmap));
-
-	bytes = DIV_ROUND_UP(bitmap->chunks, 8);
-	if (!bitmap->mddev->bitmap_info.external)
-		bytes += sizeof(bitmap_super_t);
-
-	num_pages = DIV_ROUND_UP(bytes, PAGE_SIZE);
-
-	if (file && i_size_read(file->f_mapping->host) < bytes) {
-		printk(KERN_INFO "%s: bitmap file too short %lu < %lu\n",
-			bmname(bitmap),
-			(unsigned long) i_size_read(file->f_mapping->host),
-			bytes);
-		goto err;
-	}
-
-	ret = -ENOMEM;
-
-	bitmap->filemap = kmalloc(sizeof(struct page *) * num_pages, GFP_KERNEL);
-	if (!bitmap->filemap)
-		goto err;
-
-	/* We need 4 bits per page, rounded up to a multiple of sizeof(unsigned long) */
-	bitmap->filemap_attr = kzalloc(
-		roundup(DIV_ROUND_UP(num_pages*4, 8), sizeof(unsigned long)),
-		GFP_KERNEL);
-	if (!bitmap->filemap_attr)
-		goto err;
-
-	oldindex = ~0L;
-
-	for (i = 0; i < chunks; i++) {
-		int b;
-		index = file_page_index(bitmap, i);
-		bit = file_page_offset(bitmap, i);
-		if (index != oldindex) { /* this is a new page, read it in */
-			int count;
-			/* unmap the old page, we're done with it */
-			if (index == num_pages-1)
-				count = bytes - index * PAGE_SIZE;
-			else
-				count = PAGE_SIZE;
-			if (index == 0 && bitmap->sb_page) {
-				/*
-				 * if we're here then the superblock page
-				 * contains some bits (PAGE_SIZE != sizeof sb)
-				 * we've already read it in, so just use it
-				 */
-				page = bitmap->sb_page;
-				offset = sizeof(bitmap_super_t);
-				if (!file)
-					page = read_sb_page(
-						bitmap->mddev,
-						bitmap->mddev->bitmap_info.offset,
-						page,
-						index, count);
-			} else if (file) {
-				page = read_page(file, index, bitmap, count);
-				offset = 0;
-			} else {
-				page = read_sb_page(bitmap->mddev,
-						    bitmap->mddev->bitmap_info.offset,
-						    NULL,
-						    index, count);
-				offset = 0;
-			}
-			if (IS_ERR(page)) { /* read error */
-				ret = PTR_ERR(page);
-				goto err;
-			}
-
-			oldindex = index;
-			oldpage = page;
-
-			bitmap->filemap[bitmap->file_pages++] = page;
-			bitmap->last_page_size = count;
-
-			if (outofdate) {
-				/*
-				 * if bitmap is out of date, dirty the
-				 * whole page and write it out
-				 */
-				paddr = kmap_atomic(page);
-				memset(paddr + offset, 0xff,
-				       PAGE_SIZE - offset);
-				kunmap_atomic(paddr);
-				write_page(bitmap, page, 1);
-
-				ret = -EIO;
-				if (bitmap->flags & BITMAP_WRITE_ERROR)
-					goto err;
-			}
-		}
-		paddr = kmap_atomic(page);
-		if (bitmap->flags & BITMAP_HOSTENDIAN)
-			b = test_bit(bit, paddr);
-		else
-			b = test_bit_le(bit, paddr);
-		kunmap_atomic(paddr);
-		if (b) {
-			/* if the disk bit is set, set the memory bit */
-			int needed = ((sector_t)(i+1) << bitmap->chunkshift
-				      >= start);
-			bitmap_set_memory_bits(bitmap,
-					       (sector_t)i << bitmap->chunkshift,
-					       needed);
-			bit_cnt++;
-		}
-	}
-
-	/* everything went OK */
-	ret = 0;
-	bitmap_mask_state(bitmap, BITMAP_STALE, MASK_UNSET);
-
-	if (bit_cnt) { /* Kick recovery if any bits were set */
-		set_bit(MD_RECOVERY_NEEDED, &bitmap->mddev->recovery);
-		md_wakeup_thread(bitmap->mddev->thread);
-	}
-
-	printk(KERN_INFO "%s: bitmap initialized from disk: "
-	       "read %lu/%lu pages, set %lu of %lu bits\n",
-	       bmname(bitmap), bitmap->file_pages, num_pages, bit_cnt, chunks);
-
-	return 0;
-
- err:
-	printk(KERN_INFO "%s: bitmap initialisation failed: %d\n",
-	       bmname(bitmap), ret);
-	return ret;
-}
-
-void bitmap_write_all(struct bitmap *bitmap)
-{
-	/* We don't actually write all bitmap blocks here,
-	 * just flag them as needing to be written
-	 */
-	int i;
-
-	spin_lock_irq(&bitmap->lock);
-	for (i = 0; i < bitmap->file_pages; i++)
-		set_page_attr(bitmap, bitmap->filemap[i],
-			      BITMAP_PAGE_NEEDWRITE);
-	bitmap->allclean = 0;
-	spin_unlock_irq(&bitmap->lock);
-}
-
-static void bitmap_count_page(struct bitmap *bitmap, sector_t offset, int inc)
-{
-	sector_t chunk = offset >> bitmap->chunkshift;
-	unsigned long page = chunk >> PAGE_COUNTER_SHIFT;
-	bitmap->bp[page].count += inc;
-	bitmap_checkfree(bitmap, page);
-}
-static bitmap_counter_t *bitmap_get_counter(struct bitmap *bitmap,
-					    sector_t offset, sector_t *blocks,
-					    int create);
-
-/*
- * bitmap daemon -- periodically wakes up to clean bits and flush pages
- *			out to disk
- */
-
-void bitmap_daemon_work(struct mddev *mddev)
-{
-	struct bitmap *bitmap;
-	unsigned long j;
-	unsigned long flags;
-	struct page *page = NULL, *lastpage = NULL;
-	sector_t blocks;
-	void *paddr;
-
-	/* Use a mutex to guard daemon_work against
-	 * bitmap_destroy.
-	 */
-	mutex_lock(&mddev->bitmap_info.mutex);
-	bitmap = mddev->bitmap;
-	if (bitmap == NULL) {
-		mutex_unlock(&mddev->bitmap_info.mutex);
-		return;
-	}
-	if (time_before(jiffies, bitmap->daemon_lastrun
-			+ mddev->bitmap_info.daemon_sleep))
-		goto done;
-
-	bitmap->daemon_lastrun = jiffies;
-	if (bitmap->allclean) {
-		mddev->thread->timeout = MAX_SCHEDULE_TIMEOUT;
-		goto done;
-	}
-	bitmap->allclean = 1;
-
-	spin_lock_irqsave(&bitmap->lock, flags);
-	for (j = 0; j < bitmap->chunks; j++) {
-		bitmap_counter_t *bmc;
-		if (!bitmap->filemap)
-			/* error or shutdown */
-			break;
-
-		page = filemap_get_page(bitmap, j);
-
-		if (page != lastpage) {
-			/* skip this page unless it's marked as needing cleaning */
-			if (!test_page_attr(bitmap, page, BITMAP_PAGE_PENDING)) {
-				int need_write = test_page_attr(bitmap, page,
-								BITMAP_PAGE_NEEDWRITE);
-				if (need_write)
-					clear_page_attr(bitmap, page, BITMAP_PAGE_NEEDWRITE);
-
-				spin_unlock_irqrestore(&bitmap->lock, flags);
-				if (need_write)
-					write_page(bitmap, page, 0);
-				spin_lock_irqsave(&bitmap->lock, flags);
-				j |= (PAGE_BITS - 1);
-				continue;
-			}
-
-			/* grab the new page, sync and release the old */
-			if (lastpage != NULL) {
-				if (test_page_attr(bitmap, lastpage,
-						   BITMAP_PAGE_NEEDWRITE)) {
-					clear_page_attr(bitmap, lastpage,
-							BITMAP_PAGE_NEEDWRITE);
-					spin_unlock_irqrestore(&bitmap->lock, flags);
-					write_page(bitmap, lastpage, 0);
-				} else {
-					set_page_attr(bitmap, lastpage,
-						      BITMAP_PAGE_NEEDWRITE);
-					bitmap->allclean = 0;
-					spin_unlock_irqrestore(&bitmap->lock, flags);
-				}
-			} else
-				spin_unlock_irqrestore(&bitmap->lock, flags);
-			lastpage = page;
-
-			/* We are possibly going to clear some bits, so make
-			 * sure that events_cleared is up-to-date.
-			 */
-			if (bitmap->need_sync &&
-			    mddev->bitmap_info.external == 0) {
-				bitmap_super_t *sb;
-				bitmap->need_sync = 0;
-				sb = kmap_atomic(bitmap->sb_page);
-				sb->events_cleared =
-					cpu_to_le64(bitmap->events_cleared);
-				kunmap_atomic(sb);
-				write_page(bitmap, bitmap->sb_page, 1);
-			}
-			spin_lock_irqsave(&bitmap->lock, flags);
-			if (!bitmap->need_sync)
-				clear_page_attr(bitmap, page, BITMAP_PAGE_PENDING);
-			else
-				bitmap->allclean = 0;
-		}
-		bmc = bitmap_get_counter(bitmap,
-					 (sector_t)j << bitmap->chunkshift,
-					 &blocks, 0);
-		if (!bmc)
-			j |= PAGE_COUNTER_MASK;
-		else if (*bmc) {
-			if (*bmc == 1 && !bitmap->need_sync) {
-				/* we can clear the bit */
-				*bmc = 0;
-				bitmap_count_page(bitmap,
-						  (sector_t)j << bitmap->chunkshift,
-						  -1);
-
-				/* clear the bit */
-				paddr = kmap_atomic(page);
-				if (bitmap->flags & BITMAP_HOSTENDIAN)
-					clear_bit(file_page_offset(bitmap, j),
-						  paddr);
-				else
-					__clear_bit_le(
-						file_page_offset(bitmap,
-								 j),
-						paddr);
-				kunmap_atomic(paddr);
-			} else if (*bmc <= 2) {
-				*bmc = 1; /* maybe clear the bit next time */
-				set_page_attr(bitmap, page, BITMAP_PAGE_PENDING);
-				bitmap->allclean = 0;
-			}
-		}
-	}
-	spin_unlock_irqrestore(&bitmap->lock, flags);
-
-	/* now sync the final page */
-	if (lastpage != NULL) {
-		spin_lock_irqsave(&bitmap->lock, flags);
-		if (test_page_attr(bitmap, lastpage, BITMAP_PAGE_NEEDWRITE)) {
-			clear_page_attr(bitmap, lastpage, BITMAP_PAGE_NEEDWRITE);
-			spin_unlock_irqrestore(&bitmap->lock, flags);
-			write_page(bitmap, lastpage, 0);
-		} else {
-			set_page_attr(bitmap, lastpage, BITMAP_PAGE_NEEDWRITE);
-			bitmap->allclean = 0;
-			spin_unlock_irqrestore(&bitmap->lock, flags);
-		}
-	}
-
- done:
-	if (bitmap->allclean == 0)
-		mddev->thread->timeout =
-			mddev->bitmap_info.daemon_sleep;
-	mutex_unlock(&mddev->bitmap_info.mutex);
-}
-
-static bitmap_counter_t *bitmap_get_counter(struct bitmap *bitmap,
-					    sector_t offset, sector_t *blocks,
-					    int create)
-__releases(bitmap->lock)
-__acquires(bitmap->lock)
-{
-	/* If 'create', we might release the lock and reclaim it.
-	 * The lock must have been taken with interrupts enabled.
-	 * If !create, we don't release the lock.
-	 */
-	sector_t chunk = offset >> bitmap->chunkshift;
-	unsigned long page = chunk >> PAGE_COUNTER_SHIFT;
-	unsigned long pageoff = (chunk & PAGE_COUNTER_MASK) << COUNTER_BYTE_SHIFT;
-	sector_t csize;
-	int err;
-
-	err = bitmap_checkpage(bitmap, page, create);
-
-	if (bitmap->bp[page].hijacked ||
-	    bitmap->bp[page].map == NULL)
-		csize = ((sector_t)1) << (bitmap->chunkshift +
-					  PAGE_COUNTER_SHIFT - 1);
-	else
-		csize = ((sector_t)1) << bitmap->chunkshift;
-	*blocks = csize - (offset & (csize - 1));
-
-	if (err < 0)
-		return NULL;
-
-	/* now locked ... */
-
-	if (bitmap->bp[page].hijacked) { /* hijacked pointer */
-		/* should we use the first or second counter field
-		 * of the hijacked pointer? */
-		int hi = (pageoff > PAGE_COUNTER_MASK);
-		return  &((bitmap_counter_t *)
-			  &bitmap->bp[page].map)[hi];
-	} else /* page is allocated */
-		return (bitmap_counter_t *)
-			&(bitmap->bp[page].map[pageoff]);
-}
-
-int bitmap_startwrite(struct bitmap *bitmap, sector_t offset, unsigned long sectors, int behind)
-{
-	if (!bitmap)
-		return 0;
-
-	if (behind) {
-		int bw;
-		atomic_inc(&bitmap->behind_writes);
-		bw = atomic_read(&bitmap->behind_writes);
-		if (bw > bitmap->behind_writes_used)
-			bitmap->behind_writes_used = bw;
-
-		pr_debug("inc write-behind count %d/%lu\n",
-			 bw, bitmap->mddev->bitmap_info.max_write_behind);
-	}
-
-	while (sectors) {
-		sector_t blocks;
-		bitmap_counter_t *bmc;
-
-		spin_lock_irq(&bitmap->lock);
-		bmc = bitmap_get_counter(bitmap, offset, &blocks, 1);
-		if (!bmc) {
-			spin_unlock_irq(&bitmap->lock);
-			return 0;
-		}
-
-		if (unlikely(COUNTER(*bmc) == COUNTER_MAX)) {
-			DEFINE_WAIT(__wait);
-			/* note that it is safe to do the prepare_to_wait
-			 * after the test as long as we do it before dropping
-			 * the spinlock.
-			 */
-			prepare_to_wait(&bitmap->overflow_wait, &__wait,
-					TASK_UNINTERRUPTIBLE);
-			spin_unlock_irq(&bitmap->lock);
-			io_schedule();
-			finish_wait(&bitmap->overflow_wait, &__wait);
-			continue;
-		}
-
-		switch (*bmc) {
-		case 0:
-			bitmap_file_set_bit(bitmap, offset);
-			bitmap_count_page(bitmap, offset, 1);
-			/* fall through */
-		case 1:
-			*bmc = 2;
-		}
-
-		(*bmc)++;
-
-		spin_unlock_irq(&bitmap->lock);
-
-		offset += blocks;
-		if (sectors > blocks)
-			sectors -= blocks;
-		else
-			sectors = 0;
-	}
-	return 0;
-}
-EXPORT_SYMBOL(bitmap_startwrite);
-
-void bitmap_endwrite(struct bitmap *bitmap, sector_t offset, unsigned long sectors,
-		     int success, int behind)
-{
-	if (!bitmap)
-		return;
-	if (behind) {
-		if (atomic_dec_and_test(&bitmap->behind_writes))
-			wake_up(&bitmap->behind_wait);
-		pr_debug("dec write-behind count %d/%lu\n",
-			 atomic_read(&bitmap->behind_writes),
-			 bitmap->mddev->bitmap_info.max_write_behind);
-	}
-
-	while (sectors) {
-		sector_t blocks;
-		unsigned long flags;
-		bitmap_counter_t *bmc;
-
-		spin_lock_irqsave(&bitmap->lock, flags);
-		bmc = bitmap_get_counter(bitmap, offset, &blocks, 0);
-		if (!bmc) {
-			spin_unlock_irqrestore(&bitmap->lock, flags);
-			return;
-		}
-
-		if (success && !bitmap->mddev->degraded &&
-		    bitmap->events_cleared < bitmap->mddev->events) {
-			bitmap->events_cleared = bitmap->mddev->events;
-			bitmap->need_sync = 1;
-			sysfs_notify_dirent_safe(bitmap->sysfs_can_clear);
-		}
-
-		if (!success && !NEEDED(*bmc))
-			*bmc |= NEEDED_MASK;
-
-		if (COUNTER(*bmc) == COUNTER_MAX)
-			wake_up(&bitmap->overflow_wait);
-
-		(*bmc)--;
-		if (*bmc <= 2) {
-			set_page_attr(bitmap,
-				      filemap_get_page(
-					      bitmap,
-					      offset >> bitmap->chunkshift),
-				      BITMAP_PAGE_PENDING);
-			bitmap->allclean = 0;
-		}
-		spin_unlock_irqrestore(&bitmap->lock, flags);
-		offset += blocks;
-		if (sectors > blocks)
-			sectors -= blocks;
-		else
-			sectors = 0;
-	}
-}
-EXPORT_SYMBOL(bitmap_endwrite);
-
-static int __bitmap_start_sync(struct bitmap *bitmap, sector_t offset, sector_t *blocks,
-			       int degraded)
-{
-	bitmap_counter_t *bmc;
-	int rv;
-	if (bitmap == NULL) {/* FIXME or bitmap set as 'failed' */
-		*blocks = 1024;
-		return 1; /* always resync if no bitmap */
-	}
-	spin_lock_irq(&bitmap->lock);
-	bmc = bitmap_get_counter(bitmap, offset, blocks, 0);
-	rv = 0;
-	if (bmc) {
-		/* locked */
-		if (RESYNC(*bmc))
-			rv = 1;
-		else if (NEEDED(*bmc)) {
-			rv = 1;
-			if (!degraded) { /* don't set/clear bits if degraded */
-				*bmc |= RESYNC_MASK;
-				*bmc &= ~NEEDED_MASK;
-			}
-		}
-	}
-	spin_unlock_irq(&bitmap->lock);
-	return rv;
-}
-
-int bitmap_start_sync(struct bitmap *bitmap, sector_t offset, sector_t *blocks,
-		      int degraded)
-{
-	/* bitmap_start_sync must always report on multiples of whole
-	 * pages, otherwise resync (which is very PAGE_SIZE based) will
-	 * get confused.
-	 * So call __bitmap_start_sync repeatedly (if needed) until
-	 * At least PAGE_SIZE>>9 blocks are covered.
-	 * Return the 'or' of the result.
-	 */
-	int rv = 0;
-	sector_t blocks1;
-
-	*blocks = 0;
-	while (*blocks < (PAGE_SIZE>>9)) {
-		rv |= __bitmap_start_sync(bitmap, offset,
-					  &blocks1, degraded);
-		offset += blocks1;
-		*blocks += blocks1;
-	}
-	return rv;
-}
-EXPORT_SYMBOL(bitmap_start_sync);
-
-void bitmap_end_sync(struct bitmap *bitmap, sector_t offset, sector_t *blocks, int aborted)
-{
-	bitmap_counter_t *bmc;
-	unsigned long flags;
-
-	if (bitmap == NULL) {
-		*blocks = 1024;
-		return;
-	}
-	spin_lock_irqsave(&bitmap->lock, flags);
-	bmc = bitmap_get_counter(bitmap, offset, blocks, 0);
-	if (bmc == NULL)
-		goto unlock;
-	/* locked */
-	if (RESYNC(*bmc)) {
-		*bmc &= ~RESYNC_MASK;
-
-		if (!NEEDED(*bmc) && aborted)
-			*bmc |= NEEDED_MASK;
-		else {
-			if (*bmc <= 2) {
-				set_page_attr(bitmap,
-					      filemap_get_page(bitmap, offset >> bitmap->chunkshift),
-					      BITMAP_PAGE_PENDING);
-				bitmap->allclean = 0;
-			}
-		}
-	}
- unlock:
-	spin_unlock_irqrestore(&bitmap->lock, flags);
-}
-EXPORT_SYMBOL(bitmap_end_sync);
-
-void bitmap_close_sync(struct bitmap *bitmap)
-{
-	/* Sync has finished, and any bitmap chunks that weren't synced
-	 * properly have been aborted.  It remains to us to clear the
-	 * RESYNC bit wherever it is still on
-	 */
-	sector_t sector = 0;
-	sector_t blocks;
-	if (!bitmap)
-		return;
-	while (sector < bitmap->mddev->resync_max_sectors) {
-		bitmap_end_sync(bitmap, sector, &blocks, 0);
-		sector += blocks;
-	}
-}
-EXPORT_SYMBOL(bitmap_close_sync);
-
-void bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector)
-{
-	sector_t s = 0;
-	sector_t blocks;
-
-	if (!bitmap)
-		return;
-	if (sector == 0) {
-		bitmap->last_end_sync = jiffies;
-		return;
-	}
-	if (time_before(jiffies, (bitmap->last_end_sync
-				  + bitmap->mddev->bitmap_info.daemon_sleep)))
-		return;
-	wait_event(bitmap->mddev->recovery_wait,
-		   atomic_read(&bitmap->mddev->recovery_active) == 0);
-
-	bitmap->mddev->curr_resync_completed = sector;
-	set_bit(MD_CHANGE_CLEAN, &bitmap->mddev->flags);
-	sector &= ~((1ULL << bitmap->chunkshift) - 1);
-	s = 0;
-	while (s < sector && s < bitmap->mddev->resync_max_sectors) {
-		bitmap_end_sync(bitmap, s, &blocks, 0);
-		s += blocks;
-	}
-	bitmap->last_end_sync = jiffies;
-	sysfs_notify(&bitmap->mddev->kobj, NULL, "sync_completed");
-}
-EXPORT_SYMBOL(bitmap_cond_end_sync);
-
-static void bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, int needed)
-{
-	/* For each chunk covered by any of these sectors, set the
-	 * counter to 1 and set resync_needed.  They should all
-	 * be 0 at this point
-	 */
-
-	sector_t secs;
-	bitmap_counter_t *bmc;
-	spin_lock_irq(&bitmap->lock);
-	bmc = bitmap_get_counter(bitmap, offset, &secs, 1);
-	if (!bmc) {
-		spin_unlock_irq(&bitmap->lock);
-		return;
-	}
-	if (!*bmc) {
-		struct page *page;
-		*bmc = 2 | (needed ? NEEDED_MASK : 0);
-		bitmap_count_page(bitmap, offset, 1);
-		page = filemap_get_page(bitmap, offset >> bitmap->chunkshift);
-		set_page_attr(bitmap, page, BITMAP_PAGE_PENDING);
-		bitmap->allclean = 0;
-	}
-	spin_unlock_irq(&bitmap->lock);
-}
-
-/* dirty the memory and file bits for bitmap chunks "s" to "e" */
-void bitmap_dirty_bits(struct bitmap *bitmap, unsigned long s, unsigned long e)
-{
-	unsigned long chunk;
-
-	for (chunk = s; chunk <= e; chunk++) {
-		sector_t sec = (sector_t)chunk << bitmap->chunkshift;
-		bitmap_set_memory_bits(bitmap, sec, 1);
-		spin_lock_irq(&bitmap->lock);
-		bitmap_file_set_bit(bitmap, sec);
-		spin_unlock_irq(&bitmap->lock);
-		if (sec < bitmap->mddev->recovery_cp)
-			/* We are asserting that the array is dirty,
-			 * so move the recovery_cp address back so
-			 * that it is obvious that it is dirty
-			 */
-			bitmap->mddev->recovery_cp = sec;
-	}
-}
-
-/*
- * flush out any pending updates
- */
-void bitmap_flush(struct mddev *mddev)
-{
-	struct bitmap *bitmap = mddev->bitmap;
-	long sleep;
-
-	if (!bitmap) /* there was no bitmap */
-		return;
-
-	/* run the daemon_work three time to ensure everything is flushed
-	 * that can be
-	 */
-	sleep = mddev->bitmap_info.daemon_sleep * 2;
-	bitmap->daemon_lastrun -= sleep;
-	bitmap_daemon_work(mddev);
-	bitmap->daemon_lastrun -= sleep;
-	bitmap_daemon_work(mddev);
-	bitmap->daemon_lastrun -= sleep;
-	bitmap_daemon_work(mddev);
-	bitmap_update_sb(bitmap);
-}
-
-/*
- * free memory that was allocated
- */
-static void bitmap_free(struct bitmap *bitmap)
-{
-	unsigned long k, pages;
-	struct bitmap_page *bp;
-
-	if (!bitmap) /* there was no bitmap */
-		return;
-
-	/* release the bitmap file and kill the daemon */
-	bitmap_file_put(bitmap);
-
-	bp = bitmap->bp;
-	pages = bitmap->pages;
-
-	/* free all allocated memory */
-
-	if (bp) /* deallocate the page memory */
-		for (k = 0; k < pages; k++)
-			if (bp[k].map && !bp[k].hijacked)
-				kfree(bp[k].map);
-	kfree(bp);
-	kfree(bitmap);
-}
-
-void bitmap_destroy(struct mddev *mddev)
-{
-	struct bitmap *bitmap = mddev->bitmap;
-
-	if (!bitmap) /* there was no bitmap */
-		return;
-
-	mutex_lock(&mddev->bitmap_info.mutex);
-	mddev->bitmap = NULL; /* disconnect from the md device */
-	mutex_unlock(&mddev->bitmap_info.mutex);
-	if (mddev->thread)
-		mddev->thread->timeout = MAX_SCHEDULE_TIMEOUT;
-
-	if (bitmap->sysfs_can_clear)
-		sysfs_put(bitmap->sysfs_can_clear);
-
-	bitmap_free(bitmap);
-}
-
-/*
- * initialize the bitmap structure
- * if this returns an error, bitmap_destroy must be called to do clean up
- */
-int bitmap_create(struct mddev *mddev)
-{
-	struct bitmap *bitmap;
-	sector_t blocks = mddev->resync_max_sectors;
-	unsigned long chunks;
-	unsigned long pages;
-	struct file *file = mddev->bitmap_info.file;
-	int err;
-	struct sysfs_dirent *bm = NULL;
-
-	BUILD_BUG_ON(sizeof(bitmap_super_t) != 256);
-
-	if (!file
-	    && !mddev->bitmap_info.offset) /* bitmap disabled, nothing to do */
-		return 0;
-
-	BUG_ON(file && mddev->bitmap_info.offset);
-
-	bitmap = kzalloc(sizeof(*bitmap), GFP_KERNEL);
-	if (!bitmap)
-		return -ENOMEM;
-
-	spin_lock_init(&bitmap->lock);
-	atomic_set(&bitmap->pending_writes, 0);
-	init_waitqueue_head(&bitmap->write_wait);
-	init_waitqueue_head(&bitmap->overflow_wait);
-	init_waitqueue_head(&bitmap->behind_wait);
-
-	bitmap->mddev = mddev;
-
-	if (mddev->kobj.sd)
-		bm = sysfs_get_dirent(mddev->kobj.sd, NULL, "bitmap");
-	if (bm) {
-		bitmap->sysfs_can_clear = sysfs_get_dirent(bm, NULL, "can_clear");
-		sysfs_put(bm);
-	} else
-		bitmap->sysfs_can_clear = NULL;
-
-	bitmap->file = file;
-	if (file) {
-		get_file(file);
-		/* As future accesses to this file will use bmap,
-		 * and bypass the page cache, we must sync the file
-		 * first.
-		 */
-		vfs_fsync(file, 1);
-	}
-	/* read superblock from bitmap file (this sets mddev->bitmap_info.chunksize) */
-	if (!mddev->bitmap_info.external) {
-		/*
-		 * If 'MD_ARRAY_FIRST_USE' is set, then device-mapper is
-		 * instructing us to create a new on-disk bitmap instance.
-		 */
-		if (test_and_clear_bit(MD_ARRAY_FIRST_USE, &mddev->flags))
-			err = bitmap_new_disk_sb(bitmap);
-		else
-			err = bitmap_read_sb(bitmap);
-	} else {
-		err = 0;
-		if (mddev->bitmap_info.chunksize == 0 ||
-		    mddev->bitmap_info.daemon_sleep == 0)
-			/* chunksize and time_base need to be
-			 * set first. */
-			err = -EINVAL;
-	}
-	if (err)
-		goto error;
-
-	bitmap->daemon_lastrun = jiffies;
-	bitmap->chunkshift = (ffz(~mddev->bitmap_info.chunksize)
-			      - BITMAP_BLOCK_SHIFT);
-
-	chunks = (blocks + (1 << bitmap->chunkshift) - 1) >>
-			bitmap->chunkshift;
-	pages = (chunks + PAGE_COUNTER_RATIO - 1) / PAGE_COUNTER_RATIO;
-
-	BUG_ON(!pages);
-
-	bitmap->chunks = chunks;
-	bitmap->pages = pages;
-	bitmap->missing_pages = pages;
-
-	bitmap->bp = kzalloc(pages * sizeof(*bitmap->bp), GFP_KERNEL);
-
-	err = -ENOMEM;
-	if (!bitmap->bp)
-		goto error;
-
-	printk(KERN_INFO "created bitmap (%lu pages) for device %s\n",
-		pages, bmname(bitmap));
-
-	mddev->bitmap = bitmap;
-
-
-	return (bitmap->flags & BITMAP_WRITE_ERROR) ? -EIO : 0;
-
- error:
-	bitmap_free(bitmap);
-	return err;
-}
-
-int bitmap_load(struct mddev *mddev)
-{
-	int err = 0;
-	sector_t start = 0;
-	sector_t sector = 0;
-	struct bitmap *bitmap = mddev->bitmap;
-
-	if (!bitmap)
-		goto out;
-
-	/* Clear out old bitmap info first:  Either there is none, or we
-	 * are resuming after someone else has possibly changed things,
-	 * so we should forget old cached info.
-	 * All chunks should be clean, but some might need_sync.
-	 */
-	while (sector < mddev->resync_max_sectors) {
-		sector_t blocks;
-		bitmap_start_sync(bitmap, sector, &blocks, 0);
-		sector += blocks;
-	}
-	bitmap_close_sync(bitmap);
-
-	if (mddev->degraded == 0
-	    || bitmap->events_cleared == mddev->events)
-		/* no need to keep dirty bits to optimise a
-		 * re-add of a missing device */
-		start = mddev->recovery_cp;
-
-	mutex_lock(&mddev->bitmap_info.mutex);
-	err = bitmap_init_from_disk(bitmap, start);
-	mutex_unlock(&mddev->bitmap_info.mutex);
-
-	if (err)
-		goto out;
-
-	mddev->thread->timeout = mddev->bitmap_info.daemon_sleep;
-	md_wakeup_thread(mddev->thread);
-
-	bitmap_update_sb(bitmap);
-
-	if (bitmap->flags & BITMAP_WRITE_ERROR)
-		err = -EIO;
-out:
-	return err;
-}
-EXPORT_SYMBOL_GPL(bitmap_load);
-
-void bitmap_status(struct seq_file *seq, struct bitmap *bitmap)
-{
-	unsigned long chunk_kb;
-	unsigned long flags;
-
-	if (!bitmap)
-		return;
-
-	spin_lock_irqsave(&bitmap->lock, flags);
-	chunk_kb = bitmap->mddev->bitmap_info.chunksize >> 10;
-	seq_printf(seq, "bitmap: %lu/%lu pages [%luKB], "
-		   "%lu%s chunk",
-		   bitmap->pages - bitmap->missing_pages,
-		   bitmap->pages,
-		   (bitmap->pages - bitmap->missing_pages)
-		   << (PAGE_SHIFT - 10),
-		   chunk_kb ? chunk_kb : bitmap->mddev->bitmap_info.chunksize,
-		   chunk_kb ? "KB" : "B");
-	if (bitmap->file) {
-		seq_printf(seq, ", file: ");
-		seq_path(seq, &bitmap->file->f_path, " \t\n");
-	}
-
-	seq_printf(seq, "\n");
-	spin_unlock_irqrestore(&bitmap->lock, flags);
-}
-
-static ssize_t
-location_show(struct mddev *mddev, char *page)
-{
-	ssize_t len;
-	if (mddev->bitmap_info.file)
-		len = sprintf(page, "file");
-	else if (mddev->bitmap_info.offset)
-		len = sprintf(page, "%+lld", (long long)mddev->bitmap_info.offset);
-	else
-		len = sprintf(page, "none");
-	len += sprintf(page+len, "\n");
-	return len;
-}
-
-static ssize_t
-location_store(struct mddev *mddev, const char *buf, size_t len)
-{
-
-	if (mddev->pers) {
-		if (!mddev->pers->quiesce)
-			return -EBUSY;
-		if (mddev->recovery || mddev->sync_thread)
-			return -EBUSY;
-	}
-
-	if (mddev->bitmap || mddev->bitmap_info.file ||
-	    mddev->bitmap_info.offset) {
-		/* bitmap already configured.  Only option is to clear it */
-		if (strncmp(buf, "none", 4) != 0)
-			return -EBUSY;
-		if (mddev->pers) {
-			mddev->pers->quiesce(mddev, 1);
-			bitmap_destroy(mddev);
-			mddev->pers->quiesce(mddev, 0);
-		}
-		mddev->bitmap_info.offset = 0;
-		if (mddev->bitmap_info.file) {
-			struct file *f = mddev->bitmap_info.file;
-			mddev->bitmap_info.file = NULL;
-			restore_bitmap_write_access(f);
-			fput(f);
-		}
-	} else {
-		/* No bitmap, OK to set a location */
-		long long offset;
-		if (strncmp(buf, "none", 4) == 0)
-			/* nothing to be done */;
-		else if (strncmp(buf, "file:", 5) == 0) {
-			/* Not supported yet */
-			return -EINVAL;
-		} else {
-			int rv;
-			if (buf[0] == '+')
-				rv = strict_strtoll(buf+1, 10, &offset);
-			else
-				rv = strict_strtoll(buf, 10, &offset);
-			if (rv)
-				return rv;
-			if (offset == 0)
-				return -EINVAL;
-			if (mddev->bitmap_info.external == 0 &&
-			    mddev->major_version == 0 &&
-			    offset != mddev->bitmap_info.default_offset)
-				return -EINVAL;
-			mddev->bitmap_info.offset = offset;
-			if (mddev->pers) {
-				mddev->pers->quiesce(mddev, 1);
-				rv = bitmap_create(mddev);
-				if (!rv)
-					rv = bitmap_load(mddev);
-				if (rv) {
-					bitmap_destroy(mddev);
-					mddev->bitmap_info.offset = 0;
-				}
-				mddev->pers->quiesce(mddev, 0);
-				if (rv)
-					return rv;
-			}
-		}
-	}
-	if (!mddev->external) {
-		/* Ensure new bitmap info is stored in
-		 * metadata promptly.
-		 */
-		set_bit(MD_CHANGE_DEVS, &mddev->flags);
-		md_wakeup_thread(mddev->thread);
-	}
-	return len;
-}
-
-static struct md_sysfs_entry bitmap_location =
-__ATTR(location, S_IRUGO|S_IWUSR, location_show, location_store);
-
-static ssize_t
-timeout_show(struct mddev *mddev, char *page)
-{
-	ssize_t len;
-	unsigned long secs = mddev->bitmap_info.daemon_sleep / HZ;
-	unsigned long jifs = mddev->bitmap_info.daemon_sleep % HZ;
-
-	len = sprintf(page, "%lu", secs);
-	if (jifs)
-		len += sprintf(page+len, ".%03u", jiffies_to_msecs(jifs));
-	len += sprintf(page+len, "\n");
-	return len;
-}
-
-static ssize_t
-timeout_store(struct mddev *mddev, const char *buf, size_t len)
-{
-	/* timeout can be set at any time */
-	unsigned long timeout;
-	int rv = strict_strtoul_scaled(buf, &timeout, 4);
-	if (rv)
-		return rv;
-
-	/* just to make sure we don't overflow... */
-	if (timeout >= LONG_MAX / HZ)
-		return -EINVAL;
-
-	timeout = timeout * HZ / 10000;
-
-	if (timeout >= MAX_SCHEDULE_TIMEOUT)
-		timeout = MAX_SCHEDULE_TIMEOUT-1;
-	if (timeout < 1)
-		timeout = 1;
-	mddev->bitmap_info.daemon_sleep = timeout;
-	if (mddev->thread) {
-		/* if thread->timeout is MAX_SCHEDULE_TIMEOUT, then
-		 * the bitmap is all clean and we don't need to
-		 * adjust the timeout right now
-		 */
-		if (mddev->thread->timeout < MAX_SCHEDULE_TIMEOUT) {
-			mddev->thread->timeout = timeout;
-			md_wakeup_thread(mddev->thread);
-		}
-	}
-	return len;
-}
-
-static struct md_sysfs_entry bitmap_timeout =
-__ATTR(time_base, S_IRUGO|S_IWUSR, timeout_show, timeout_store);
-
-static ssize_t
-backlog_show(struct mddev *mddev, char *page)
-{
-	return sprintf(page, "%lu\n", mddev->bitmap_info.max_write_behind);
-}
-
-static ssize_t
-backlog_store(struct mddev *mddev, const char *buf, size_t len)
-{
-	unsigned long backlog;
-	int rv = strict_strtoul(buf, 10, &backlog);
-	if (rv)
-		return rv;
-	if (backlog > COUNTER_MAX)
-		return -EINVAL;
-	mddev->bitmap_info.max_write_behind = backlog;
-	return len;
-}
-
-static struct md_sysfs_entry bitmap_backlog =
-__ATTR(backlog, S_IRUGO|S_IWUSR, backlog_show, backlog_store);
-
-static ssize_t
-chunksize_show(struct mddev *mddev, char *page)
-{
-	return sprintf(page, "%lu\n", mddev->bitmap_info.chunksize);
-}
-
-static ssize_t
-chunksize_store(struct mddev *mddev, const char *buf, size_t len)
-{
-	/* Can only be changed when no bitmap is active */
-	int rv;
-	unsigned long csize;
-	if (mddev->bitmap)
-		return -EBUSY;
-	rv = strict_strtoul(buf, 10, &csize);
-	if (rv)
-		return rv;
-	if (csize < 512 ||
-	    !is_power_of_2(csize))
-		return -EINVAL;
-	mddev->bitmap_info.chunksize = csize;
-	return len;
-}
-
-static struct md_sysfs_entry bitmap_chunksize =
-__ATTR(chunksize, S_IRUGO|S_IWUSR, chunksize_show, chunksize_store);
-
-static ssize_t metadata_show(struct mddev *mddev, char *page)
-{
-	return sprintf(page, "%s\n", (mddev->bitmap_info.external
-				      ? "external" : "internal"));
-}
-
-static ssize_t metadata_store(struct mddev *mddev, const char *buf, size_t len)
-{
-	if (mddev->bitmap ||
-	    mddev->bitmap_info.file ||
-	    mddev->bitmap_info.offset)
-		return -EBUSY;
-	if (strncmp(buf, "external", 8) == 0)
-		mddev->bitmap_info.external = 1;
-	else if (strncmp(buf, "internal", 8) == 0)
-		mddev->bitmap_info.external = 0;
-	else
-		return -EINVAL;
-	return len;
-}
-
-static struct md_sysfs_entry bitmap_metadata =
-__ATTR(metadata, S_IRUGO|S_IWUSR, metadata_show, metadata_store);
-
-static ssize_t can_clear_show(struct mddev *mddev, char *page)
-{
-	int len;
-	if (mddev->bitmap)
-		len = sprintf(page, "%s\n", (mddev->bitmap->need_sync ?
-					     "false" : "true"));
-	else
-		len = sprintf(page, "\n");
-	return len;
-}
-
-static ssize_t can_clear_store(struct mddev *mddev, const char *buf, size_t len)
-{
-	if (mddev->bitmap == NULL)
-		return -ENOENT;
-	if (strncmp(buf, "false", 5) == 0)
-		mddev->bitmap->need_sync = 1;
-	else if (strncmp(buf, "true", 4) == 0) {
-		if (mddev->degraded)
-			return -EBUSY;
-		mddev->bitmap->need_sync = 0;
-	} else
-		return -EINVAL;
-	return len;
-}
-
-static struct md_sysfs_entry bitmap_can_clear =
-__ATTR(can_clear, S_IRUGO|S_IWUSR, can_clear_show, can_clear_store);
-
-static ssize_t
-behind_writes_used_show(struct mddev *mddev, char *page)
-{
-	if (mddev->bitmap == NULL)
-		return sprintf(page, "0\n");
-	return sprintf(page, "%lu\n",
-		       mddev->bitmap->behind_writes_used);
-}
-
-static ssize_t
-behind_writes_used_reset(struct mddev *mddev, const char *buf, size_t len)
-{
-	if (mddev->bitmap)
-		mddev->bitmap->behind_writes_used = 0;
-	return len;
-}
-
-static struct md_sysfs_entry max_backlog_used =
-__ATTR(max_backlog_used, S_IRUGO | S_IWUSR,
-       behind_writes_used_show, behind_writes_used_reset);
-
-static struct attribute *md_bitmap_attrs[] = {
-	&bitmap_location.attr,
-	&bitmap_timeout.attr,
-	&bitmap_backlog.attr,
-	&bitmap_chunksize.attr,
-	&bitmap_metadata.attr,
-	&bitmap_can_clear.attr,
-	&max_backlog_used.attr,
-	NULL
-};
-struct attribute_group md_bitmap_group = {
-	.name = "bitmap",
-	.attrs = md_bitmap_attrs,
-};
-
diff --git a/ANDROID_3.4.5/drivers/md/bitmap.h b/ANDROID_3.4.5/drivers/md/bitmap.h
deleted file mode 100644
index b44b0aba..00000000
--- a/ANDROID_3.4.5/drivers/md/bitmap.h
+++ /dev/null
@@ -1,247 +0,0 @@
-/*
- * bitmap.h: Copyright (C) Peter T. Breuer (ptb@ot.uc3m.es) 2003
- *
- * additions: Copyright (C) 2003-2004, Paul Clements, SteelEye Technology, Inc.
- */
-#ifndef BITMAP_H
-#define BITMAP_H 1
-
-#define BITMAP_MAJOR_LO 3
-/* version 4 insists the bitmap is in little-endian order
- * with version 3, it is host-endian which is non-portable
- */
-#define BITMAP_MAJOR_HI 4
-#define	BITMAP_MAJOR_HOSTENDIAN 3
-
-/*
- * in-memory bitmap:
- *
- * Use 16 bit block counters to track pending writes to each "chunk".
- * The 2 high order bits are special-purpose, the first is a flag indicating
- * whether a resync is needed.  The second is a flag indicating whether a
- * resync is active.
- * This means that the counter is actually 14 bits:
- *
- * +--------+--------+------------------------------------------------+
- * | resync | resync |               counter                          |
- * | needed | active |                                                |
- * |  (0-1) |  (0-1) |              (0-16383)                         |
- * +--------+--------+------------------------------------------------+
- *
- * The "resync needed" bit is set when:
- *    a '1' bit is read from storage at startup.
- *    a write request fails on some drives
- *    a resync is aborted on a chunk with 'resync active' set
- * It is cleared (and resync-active set) when a resync starts across all drives
- * of the chunk.
- *
- *
- * The "resync active" bit is set when:
- *    a resync is started on all drives, and resync_needed is set.
- *       resync_needed will be cleared (as long as resync_active wasn't already set).
- * It is cleared when a resync completes.
- *
- * The counter counts pending write requests, plus the on-disk bit.
- * When the counter is '1' and the resync bits are clear, the on-disk
- * bit can be cleared as well, thus setting the counter to 0.
- * When we set a bit, or in the counter (to start a write), if the fields is
- * 0, we first set the disk bit and set the counter to 1.
- *
- * If the counter is 0, the on-disk bit is clear and the stipe is clean
- * Anything that dirties the stipe pushes the counter to 2 (at least)
- * and sets the on-disk bit (lazily).
- * If a periodic sweep find the counter at 2, it is decremented to 1.
- * If the sweep find the counter at 1, the on-disk bit is cleared and the
- * counter goes to zero.
- *
- * Also, we'll hijack the "map" pointer itself and use it as two 16 bit block
- * counters as a fallback when "page" memory cannot be allocated:
- *
- * Normal case (page memory allocated):
- *
- *     page pointer (32-bit)
- *
- *     [ ] ------+
- *               |
- *               +-------> [   ][   ]..[   ] (4096 byte page == 2048 counters)
- *                          c1   c2    c2048
- *
- * Hijacked case (page memory allocation failed):
- *
- *     hijacked page pointer (32-bit)
- *
- *     [		  ][		  ] (no page memory allocated)
- *      counter #1 (16-bit) counter #2 (16-bit)
- *
- */
-
-#ifdef __KERNEL__
-
-#define PAGE_BITS (PAGE_SIZE << 3)
-#define PAGE_BIT_SHIFT (PAGE_SHIFT + 3)
-
-typedef __u16 bitmap_counter_t;
-#define COUNTER_BITS 16
-#define COUNTER_BIT_SHIFT 4
-#define COUNTER_BYTE_SHIFT (COUNTER_BIT_SHIFT - 3)
-
-#define NEEDED_MASK ((bitmap_counter_t) (1 << (COUNTER_BITS - 1)))
-#define RESYNC_MASK ((bitmap_counter_t) (1 << (COUNTER_BITS - 2)))
-#define COUNTER_MAX ((bitmap_counter_t) RESYNC_MASK - 1)
-#define NEEDED(x) (((bitmap_counter_t) x) & NEEDED_MASK)
-#define RESYNC(x) (((bitmap_counter_t) x) & RESYNC_MASK)
-#define COUNTER(x) (((bitmap_counter_t) x) & COUNTER_MAX)
-
-/* how many counters per page? */
-#define PAGE_COUNTER_RATIO (PAGE_BITS / COUNTER_BITS)
-/* same, except a shift value for more efficient bitops */
-#define PAGE_COUNTER_SHIFT (PAGE_BIT_SHIFT - COUNTER_BIT_SHIFT)
-/* same, except a mask value for more efficient bitops */
-#define PAGE_COUNTER_MASK  (PAGE_COUNTER_RATIO - 1)
-
-#define BITMAP_BLOCK_SHIFT 9
-
-#endif
-
-/*
- * bitmap structures:
- */
-
-#define BITMAP_MAGIC 0x6d746962
-
-/* use these for bitmap->flags and bitmap->sb->state bit-fields */
-enum bitmap_state {
-	BITMAP_STALE  = 0x002,  /* the bitmap file is out of date or had -EIO */
-	BITMAP_WRITE_ERROR = 0x004, /* A write error has occurred */
-	BITMAP_HOSTENDIAN = 0x8000,
-};
-
-/* the superblock at the front of the bitmap file -- little endian */
-typedef struct bitmap_super_s {
-	__le32 magic;        /*  0  BITMAP_MAGIC */
-	__le32 version;      /*  4  the bitmap major for now, could change... */
-	__u8  uuid[16];      /*  8  128 bit uuid - must match md device uuid */
-	__le64 events;       /* 24  event counter for the bitmap (1)*/
-	__le64 events_cleared;/*32  event counter when last bit cleared (2) */
-	__le64 sync_size;    /* 40  the size of the md device's sync range(3) */
-	__le32 state;        /* 48  bitmap state information */
-	__le32 chunksize;    /* 52  the bitmap chunk size in bytes */
-	__le32 daemon_sleep; /* 56  seconds between disk flushes */
-	__le32 write_behind; /* 60  number of outstanding write-behind writes */
-
-	__u8  pad[256 - 64]; /* set to zero */
-} bitmap_super_t;
-
-/* notes:
- * (1) This event counter is updated before the eventcounter in the md superblock
- *    When a bitmap is loaded, it is only accepted if this event counter is equal
- *    to, or one greater than, the event counter in the superblock.
- * (2) This event counter is updated when the other one is *if*and*only*if* the
- *    array is not degraded.  As bits are not cleared when the array is degraded,
- *    this represents the last time that any bits were cleared.
- *    If a device is being added that has an event count with this value or
- *    higher, it is accepted as conforming to the bitmap.
- * (3)This is the number of sectors represented by the bitmap, and is the range that
- *    resync happens across.  For raid1 and raid5/6 it is the size of individual
- *    devices.  For raid10 it is the size of the array.
- */
-
-#ifdef __KERNEL__
-
-/* the in-memory bitmap is represented by bitmap_pages */
-struct bitmap_page {
-	/*
-	 * map points to the actual memory page
-	 */
-	char *map;
-	/*
-	 * in emergencies (when map cannot be alloced), hijack the map
-	 * pointer and use it as two counters itself
-	 */
-	unsigned int hijacked:1;
-	/*
-	 * count of dirty bits on the page
-	 */
-	unsigned int  count:31;
-};
-
-/* the main bitmap structure - one per mddev */
-struct bitmap {
-	struct bitmap_page *bp;
-	unsigned long pages; /* total number of pages in the bitmap */
-	unsigned long missing_pages; /* number of pages not yet allocated */
-
-	struct mddev *mddev; /* the md device that the bitmap is for */
-
-	/* bitmap chunksize -- how much data does each bit represent? */
-	unsigned long chunkshift; /* chunksize = 2^(chunkshift+9) (for bitops) */
-	unsigned long chunks; /* total number of data chunks for the array */
-
-	__u64	events_cleared;
-	int need_sync;
-
-	/* bitmap spinlock */
-	spinlock_t lock;
-
-	struct file *file; /* backing disk file */
-	struct page *sb_page; /* cached copy of the bitmap file superblock */
-	struct page **filemap; /* list of cache pages for the file */
-	unsigned long *filemap_attr; /* attributes associated w/ filemap pages */
-	unsigned long file_pages; /* number of pages in the file */
-	int last_page_size; /* bytes in the last page */
-
-	unsigned long flags;
-
-	int allclean;
-
-	atomic_t behind_writes;
-	unsigned long behind_writes_used; /* highest actual value at runtime */
-
-	/*
-	 * the bitmap daemon - periodically wakes up and sweeps the bitmap
-	 * file, cleaning up bits and flushing out pages to disk as necessary
-	 */
-	unsigned long daemon_lastrun; /* jiffies of last run */
-	unsigned long last_end_sync; /* when we lasted called end_sync to
-				      * update bitmap with resync progress */
-
-	atomic_t pending_writes; /* pending writes to the bitmap file */
-	wait_queue_head_t write_wait;
-	wait_queue_head_t overflow_wait;
-	wait_queue_head_t behind_wait;
-
-	struct sysfs_dirent *sysfs_can_clear;
-};
-
-/* the bitmap API */
-
-/* these are used only by md/bitmap */
-int  bitmap_create(struct mddev *mddev);
-int bitmap_load(struct mddev *mddev);
-void bitmap_flush(struct mddev *mddev);
-void bitmap_destroy(struct mddev *mddev);
-
-void bitmap_print_sb(struct bitmap *bitmap);
-void bitmap_update_sb(struct bitmap *bitmap);
-void bitmap_status(struct seq_file *seq, struct bitmap *bitmap);
-
-int  bitmap_setallbits(struct bitmap *bitmap);
-void bitmap_write_all(struct bitmap *bitmap);
-
-void bitmap_dirty_bits(struct bitmap *bitmap, unsigned long s, unsigned long e);
-
-/* these are exported */
-int bitmap_startwrite(struct bitmap *bitmap, sector_t offset,
-			unsigned long sectors, int behind);
-void bitmap_endwrite(struct bitmap *bitmap, sector_t offset,
-			unsigned long sectors, int success, int behind);
-int bitmap_start_sync(struct bitmap *bitmap, sector_t offset, sector_t *blocks, int degraded);
-void bitmap_end_sync(struct bitmap *bitmap, sector_t offset, sector_t *blocks, int aborted);
-void bitmap_close_sync(struct bitmap *bitmap);
-void bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector);
-
-void bitmap_unplug(struct bitmap *bitmap);
-void bitmap_daemon_work(struct mddev *mddev);
-#endif
-
-#endif
diff --git a/ANDROID_3.4.5/drivers/md/dm-bio-record.h b/ANDROID_3.4.5/drivers/md/dm-bio-record.h
deleted file mode 100644
index 3a8cfa26..00000000
--- a/ANDROID_3.4.5/drivers/md/dm-bio-record.h
+++ /dev/null
@@ -1,71 +0,0 @@
-/*
- * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
- *
- * This file is released under the GPL.
- */
-
-#ifndef DM_BIO_RECORD_H
-#define DM_BIO_RECORD_H
-
-#include <linux/bio.h>
-
-/*
- * There are lots of mutable fields in the bio struct that get
- * changed by the lower levels of the block layer.  Some targets,
- * such as multipath, may wish to resubmit a bio on error.  The
- * functions in this file help the target record and restore the
- * original bio state.
- */
-
-struct dm_bio_vec_details {
-#if PAGE_SIZE < 65536
-	__u16 bv_len;
-	__u16 bv_offset;
-#else
-	unsigned bv_len;
-	unsigned bv_offset;
-#endif
-};
-
-struct dm_bio_details {
-	sector_t bi_sector;
-	struct block_device *bi_bdev;
-	unsigned int bi_size;
-	unsigned short bi_idx;
-	unsigned long bi_flags;
-	struct dm_bio_vec_details bi_io_vec[BIO_MAX_PAGES];
-};
-
-static inline void dm_bio_record(struct dm_bio_details *bd, struct bio *bio)
-{
-	unsigned i;
-
-	bd->bi_sector = bio->bi_sector;
-	bd->bi_bdev = bio->bi_bdev;
-	bd->bi_size = bio->bi_size;
-	bd->bi_idx = bio->bi_idx;
-	bd->bi_flags = bio->bi_flags;
-
-	for (i = 0; i < bio->bi_vcnt; i++) {
-		bd->bi_io_vec[i].bv_len = bio->bi_io_vec[i].bv_len;
-		bd->bi_io_vec[i].bv_offset = bio->bi_io_vec[i].bv_offset;
-	}
-}
-
-static inline void dm_bio_restore(struct dm_bio_details *bd, struct bio *bio)
-{
-	unsigned i;
-
-	bio->bi_sector = bd->bi_sector;
-	bio->bi_bdev = bd->bi_bdev;
-	bio->bi_size = bd->bi_size;
-	bio->bi_idx = bd->bi_idx;
-	bio->bi_flags = bd->bi_flags;
-
-	for (i = 0; i < bio->bi_vcnt; i++) {
-		bio->bi_io_vec[i].bv_len = bd->bi_io_vec[i].bv_len;
-		bio->bi_io_vec[i].bv_offset = bd->bi_io_vec[i].bv_offset;
-	}
-}
-
-#endif
diff --git a/ANDROID_3.4.5/drivers/md/dm-bufio.c b/ANDROID_3.4.5/drivers/md/dm-bufio.c
deleted file mode 100644
index cc06a1e5..00000000
--- a/ANDROID_3.4.5/drivers/md/dm-bufio.c
+++ /dev/null
@@ -1,1755 +0,0 @@
-/*
- * Copyright (C) 2009-2011 Red Hat, Inc.
- *
- * Author: Mikulas Patocka <mpatocka@redhat.com>
- *
- * This file is released under the GPL.
- */
-
-#include "dm-bufio.h"
-
-#include <linux/device-mapper.h>
-#include <linux/dm-io.h>
-#include <linux/slab.h>
-#include <linux/vmalloc.h>
-#include <linux/shrinker.h>
-#include <linux/module.h>
-
-#define DM_MSG_PREFIX "bufio"
-
-/*
- * Memory management policy:
- *	Limit the number of buffers to DM_BUFIO_MEMORY_PERCENT of main memory
- *	or DM_BUFIO_VMALLOC_PERCENT of vmalloc memory (whichever is lower).
- *	Always allocate at least DM_BUFIO_MIN_BUFFERS buffers.
- *	Start background writeback when there are DM_BUFIO_WRITEBACK_PERCENT
- *	dirty buffers.
- */
-#define DM_BUFIO_MIN_BUFFERS		8
-
-#define DM_BUFIO_MEMORY_PERCENT		2
-#define DM_BUFIO_VMALLOC_PERCENT	25
-#define DM_BUFIO_WRITEBACK_PERCENT	75
-
-/*
- * Check buffer ages in this interval (seconds)
- */
-#define DM_BUFIO_WORK_TIMER_SECS	10
-
-/*
- * Free buffers when they are older than this (seconds)
- */
-#define DM_BUFIO_DEFAULT_AGE_SECS	60
-
-/*
- * The number of bvec entries that are embedded directly in the buffer.
- * If the chunk size is larger, dm-io is used to do the io.
- */
-#define DM_BUFIO_INLINE_VECS		16
-
-/*
- * Buffer hash
- */
-#define DM_BUFIO_HASH_BITS	20
-#define DM_BUFIO_HASH(block) \
-	((((block) >> DM_BUFIO_HASH_BITS) ^ (block)) & \
-	 ((1 << DM_BUFIO_HASH_BITS) - 1))
-
-/*
- * Don't try to use kmem_cache_alloc for blocks larger than this.
- * For explanation, see alloc_buffer_data below.
- */
-#define DM_BUFIO_BLOCK_SIZE_SLAB_LIMIT	(PAGE_SIZE >> 1)
-#define DM_BUFIO_BLOCK_SIZE_GFP_LIMIT	(PAGE_SIZE << (MAX_ORDER - 1))
-
-/*
- * dm_buffer->list_mode
- */
-#define LIST_CLEAN	0
-#define LIST_DIRTY	1
-#define LIST_SIZE	2
-
-/*
- * Linking of buffers:
- *	All buffers are linked to cache_hash with their hash_list field.
- *
- *	Clean buffers that are not being written (B_WRITING not set)
- *	are linked to lru[LIST_CLEAN] with their lru_list field.
- *
- *	Dirty and clean buffers that are being written are linked to
- *	lru[LIST_DIRTY] with their lru_list field. When the write
- *	finishes, the buffer cannot be relinked immediately (because we
- *	are in an interrupt context and relinking requires process
- *	context), so some clean-not-writing buffers can be held on
- *	dirty_lru too.  They are later added to lru in the process
- *	context.
- */
-struct dm_bufio_client {
-	struct mutex lock;
-
-	struct list_head lru[LIST_SIZE];
-	unsigned long n_buffers[LIST_SIZE];
-
-	struct block_device *bdev;
-	unsigned block_size;
-	unsigned char sectors_per_block_bits;
-	unsigned char pages_per_block_bits;
-	unsigned char blocks_per_page_bits;
-	unsigned aux_size;
-	void (*alloc_callback)(struct dm_buffer *);
-	void (*write_callback)(struct dm_buffer *);
-
-	struct dm_io_client *dm_io;
-
-	struct list_head reserved_buffers;
-	unsigned need_reserved_buffers;
-
-	struct hlist_head *cache_hash;
-	wait_queue_head_t free_buffer_wait;
-
-	int async_write_error;
-
-	struct list_head client_list;
-	struct shrinker shrinker;
-};
-
-/*
- * Buffer state bits.
- */
-#define B_READING	0
-#define B_WRITING	1
-#define B_DIRTY		2
-
-/*
- * Describes how the block was allocated:
- * kmem_cache_alloc(), __get_free_pages() or vmalloc().
- * See the comment at alloc_buffer_data.
- */
-enum data_mode {
-	DATA_MODE_SLAB = 0,
-	DATA_MODE_GET_FREE_PAGES = 1,
-	DATA_MODE_VMALLOC = 2,
-	DATA_MODE_LIMIT = 3
-};
-
-struct dm_buffer {
-	struct hlist_node hash_list;
-	struct list_head lru_list;
-	sector_t block;
-	void *data;
-	enum data_mode data_mode;
-	unsigned char list_mode;		/* LIST_* */
-	unsigned hold_count;
-	int read_error;
-	int write_error;
-	unsigned long state;
-	unsigned long last_accessed;
-	struct dm_bufio_client *c;
-	struct bio bio;
-	struct bio_vec bio_vec[DM_BUFIO_INLINE_VECS];
-};
-
-/*----------------------------------------------------------------*/
-
-static struct kmem_cache *dm_bufio_caches[PAGE_SHIFT - SECTOR_SHIFT];
-static char *dm_bufio_cache_names[PAGE_SHIFT - SECTOR_SHIFT];
-
-static inline int dm_bufio_cache_index(struct dm_bufio_client *c)
-{
-	unsigned ret = c->blocks_per_page_bits - 1;
-
-	BUG_ON(ret >= ARRAY_SIZE(dm_bufio_caches));
-
-	return ret;
-}
-
-#define DM_BUFIO_CACHE(c)	(dm_bufio_caches[dm_bufio_cache_index(c)])
-#define DM_BUFIO_CACHE_NAME(c)	(dm_bufio_cache_names[dm_bufio_cache_index(c)])
-
-#define dm_bufio_in_request()	(!!current->bio_list)
-
-static void dm_bufio_lock(struct dm_bufio_client *c)
-{
-	mutex_lock_nested(&c->lock, dm_bufio_in_request());
-}
-
-static int dm_bufio_trylock(struct dm_bufio_client *c)
-{
-	return mutex_trylock(&c->lock);
-}
-
-static void dm_bufio_unlock(struct dm_bufio_client *c)
-{
-	mutex_unlock(&c->lock);
-}
-
-/*
- * FIXME Move to sched.h?
- */
-#ifdef CONFIG_PREEMPT_VOLUNTARY
-#  define dm_bufio_cond_resched()		\
-do {						\
-	if (unlikely(need_resched()))		\
-		_cond_resched();		\
-} while (0)
-#else
-#  define dm_bufio_cond_resched()                do { } while (0)
-#endif
-
-/*----------------------------------------------------------------*/
-
-/*
- * Default cache size: available memory divided by the ratio.
- */
-static unsigned long dm_bufio_default_cache_size;
-
-/*
- * Total cache size set by the user.
- */
-static unsigned long dm_bufio_cache_size;
-
-/*
- * A copy of dm_bufio_cache_size because dm_bufio_cache_size can change
- * at any time.  If it disagrees, the user has changed cache size.
- */
-static unsigned long dm_bufio_cache_size_latch;
-
-static DEFINE_SPINLOCK(param_spinlock);
-
-/*
- * Buffers are freed after this timeout
- */
-static unsigned dm_bufio_max_age = DM_BUFIO_DEFAULT_AGE_SECS;
-
-static unsigned long dm_bufio_peak_allocated;
-static unsigned long dm_bufio_allocated_kmem_cache;
-static unsigned long dm_bufio_allocated_get_free_pages;
-static unsigned long dm_bufio_allocated_vmalloc;
-static unsigned long dm_bufio_current_allocated;
-
-/*----------------------------------------------------------------*/
-
-/*
- * Per-client cache: dm_bufio_cache_size / dm_bufio_client_count
- */
-static unsigned long dm_bufio_cache_size_per_client;
-
-/*
- * The current number of clients.
- */
-static int dm_bufio_client_count;
-
-/*
- * The list of all clients.
- */
-static LIST_HEAD(dm_bufio_all_clients);
-
-/*
- * This mutex protects dm_bufio_cache_size_latch,
- * dm_bufio_cache_size_per_client and dm_bufio_client_count
- */
-static DEFINE_MUTEX(dm_bufio_clients_lock);
-
-/*----------------------------------------------------------------*/
-
-static void adjust_total_allocated(enum data_mode data_mode, long diff)
-{
-	static unsigned long * const class_ptr[DATA_MODE_LIMIT] = {
-		&dm_bufio_allocated_kmem_cache,
-		&dm_bufio_allocated_get_free_pages,
-		&dm_bufio_allocated_vmalloc,
-	};
-
-	spin_lock(&param_spinlock);
-
-	*class_ptr[data_mode] += diff;
-
-	dm_bufio_current_allocated += diff;
-
-	if (dm_bufio_current_allocated > dm_bufio_peak_allocated)
-		dm_bufio_peak_allocated = dm_bufio_current_allocated;
-
-	spin_unlock(&param_spinlock);
-}
-
-/*
- * Change the number of clients and recalculate per-client limit.
- */
-static void __cache_size_refresh(void)
-{
-	BUG_ON(!mutex_is_locked(&dm_bufio_clients_lock));
-	BUG_ON(dm_bufio_client_count < 0);
-
-	dm_bufio_cache_size_latch = dm_bufio_cache_size;
-
-	barrier();
-
-	/*
-	 * Use default if set to 0 and report the actual cache size used.
-	 */
-	if (!dm_bufio_cache_size_latch) {
-		(void)cmpxchg(&dm_bufio_cache_size, 0,
-			      dm_bufio_default_cache_size);
-		dm_bufio_cache_size_latch = dm_bufio_default_cache_size;
-	}
-
-	dm_bufio_cache_size_per_client = dm_bufio_cache_size_latch /
-					 (dm_bufio_client_count ? : 1);
-}
-
-/*
- * Allocating buffer data.
- *
- * Small buffers are allocated with kmem_cache, to use space optimally.
- *
- * For large buffers, we choose between get_free_pages and vmalloc.
- * Each has advantages and disadvantages.
- *
- * __get_free_pages can randomly fail if the memory is fragmented.
- * __vmalloc won't randomly fail, but vmalloc space is limited (it may be
- * as low as 128M) so using it for caching is not appropriate.
- *
- * If the allocation may fail we use __get_free_pages. Memory fragmentation
- * won't have a fatal effect here, but it just causes flushes of some other
- * buffers and more I/O will be performed. Don't use __get_free_pages if it
- * always fails (i.e. order >= MAX_ORDER).
- *
- * If the allocation shouldn't fail we use __vmalloc. This is only for the
- * initial reserve allocation, so there's no risk of wasting all vmalloc
- * space.
- */
-static void *alloc_buffer_data(struct dm_bufio_client *c, gfp_t gfp_mask,
-			       enum data_mode *data_mode)
-{
-	if (c->block_size <= DM_BUFIO_BLOCK_SIZE_SLAB_LIMIT) {
-		*data_mode = DATA_MODE_SLAB;
-		return kmem_cache_alloc(DM_BUFIO_CACHE(c), gfp_mask);
-	}
-
-	if (c->block_size <= DM_BUFIO_BLOCK_SIZE_GFP_LIMIT &&
-	    gfp_mask & __GFP_NORETRY) {
-		*data_mode = DATA_MODE_GET_FREE_PAGES;
-		return (void *)__get_free_pages(gfp_mask,
-						c->pages_per_block_bits);
-	}
-
-	*data_mode = DATA_MODE_VMALLOC;
-	return __vmalloc(c->block_size, gfp_mask, PAGE_KERNEL);
-}
-
-/*
- * Free buffer's data.
- */
-static void free_buffer_data(struct dm_bufio_client *c,
-			     void *data, enum data_mode data_mode)
-{
-	switch (data_mode) {
-	case DATA_MODE_SLAB:
-		kmem_cache_free(DM_BUFIO_CACHE(c), data);
-		break;
-
-	case DATA_MODE_GET_FREE_PAGES:
-		free_pages((unsigned long)data, c->pages_per_block_bits);
-		break;
-
-	case DATA_MODE_VMALLOC:
-		vfree(data);
-		break;
-
-	default:
-		DMCRIT("dm_bufio_free_buffer_data: bad data mode: %d",
-		       data_mode);
-		BUG();
-	}
-}
-
-/*
- * Allocate buffer and its data.
- */
-static struct dm_buffer *alloc_buffer(struct dm_bufio_client *c, gfp_t gfp_mask)
-{
-	struct dm_buffer *b = kmalloc(sizeof(struct dm_buffer) + c->aux_size,
-				      gfp_mask);
-
-	if (!b)
-		return NULL;
-
-	b->c = c;
-
-	b->data = alloc_buffer_data(c, gfp_mask, &b->data_mode);
-	if (!b->data) {
-		kfree(b);
-		return NULL;
-	}
-
-	adjust_total_allocated(b->data_mode, (long)c->block_size);
-
-	return b;
-}
-
-/*
- * Free buffer and its data.
- */
-static void free_buffer(struct dm_buffer *b)
-{
-	struct dm_bufio_client *c = b->c;
-
-	adjust_total_allocated(b->data_mode, -(long)c->block_size);
-
-	free_buffer_data(c, b->data, b->data_mode);
-	kfree(b);
-}
-
-/*
- * Link buffer to the hash list and clean or dirty queue.
- */
-static void __link_buffer(struct dm_buffer *b, sector_t block, int dirty)
-{
-	struct dm_bufio_client *c = b->c;
-
-	c->n_buffers[dirty]++;
-	b->block = block;
-	b->list_mode = dirty;
-	list_add(&b->lru_list, &c->lru[dirty]);
-	hlist_add_head(&b->hash_list, &c->cache_hash[DM_BUFIO_HASH(block)]);
-	b->last_accessed = jiffies;
-}
-
-/*
- * Unlink buffer from the hash list and dirty or clean queue.
- */
-static void __unlink_buffer(struct dm_buffer *b)
-{
-	struct dm_bufio_client *c = b->c;
-
-	BUG_ON(!c->n_buffers[b->list_mode]);
-
-	c->n_buffers[b->list_mode]--;
-	hlist_del(&b->hash_list);
-	list_del(&b->lru_list);
-}
-
-/*
- * Place the buffer to the head of dirty or clean LRU queue.
- */
-static void __relink_lru(struct dm_buffer *b, int dirty)
-{
-	struct dm_bufio_client *c = b->c;
-
-	BUG_ON(!c->n_buffers[b->list_mode]);
-
-	c->n_buffers[b->list_mode]--;
-	c->n_buffers[dirty]++;
-	b->list_mode = dirty;
-	list_del(&b->lru_list);
-	list_add(&b->lru_list, &c->lru[dirty]);
-}
-
-/*----------------------------------------------------------------
- * Submit I/O on the buffer.
- *
- * Bio interface is faster but it has some problems:
- *	the vector list is limited (increasing this limit increases
- *	memory-consumption per buffer, so it is not viable);
- *
- *	the memory must be direct-mapped, not vmalloced;
- *
- *	the I/O driver can reject requests spuriously if it thinks that
- *	the requests are too big for the device or if they cross a
- *	controller-defined memory boundary.
- *
- * If the buffer is small enough (up to DM_BUFIO_INLINE_VECS pages) and
- * it is not vmalloced, try using the bio interface.
- *
- * If the buffer is big, if it is vmalloced or if the underlying device
- * rejects the bio because it is too large, use dm-io layer to do the I/O.
- * The dm-io layer splits the I/O into multiple requests, avoiding the above
- * shortcomings.
- *--------------------------------------------------------------*/
-
-/*
- * dm-io completion routine. It just calls b->bio.bi_end_io, pretending
- * that the request was handled directly with bio interface.
- */
-static void dmio_complete(unsigned long error, void *context)
-{
-	struct dm_buffer *b = context;
-
-	b->bio.bi_end_io(&b->bio, error ? -EIO : 0);
-}
-
-static void use_dmio(struct dm_buffer *b, int rw, sector_t block,
-		     bio_end_io_t *end_io)
-{
-	int r;
-	struct dm_io_request io_req = {
-		.bi_rw = rw,
-		.notify.fn = dmio_complete,
-		.notify.context = b,
-		.client = b->c->dm_io,
-	};
-	struct dm_io_region region = {
-		.bdev = b->c->bdev,
-		.sector = block << b->c->sectors_per_block_bits,
-		.count = b->c->block_size >> SECTOR_SHIFT,
-	};
-
-	if (b->data_mode != DATA_MODE_VMALLOC) {
-		io_req.mem.type = DM_IO_KMEM;
-		io_req.mem.ptr.addr = b->data;
-	} else {
-		io_req.mem.type = DM_IO_VMA;
-		io_req.mem.ptr.vma = b->data;
-	}
-
-	b->bio.bi_end_io = end_io;
-
-	r = dm_io(&io_req, 1, &region, NULL);
-	if (r)
-		end_io(&b->bio, r);
-}
-
-static void use_inline_bio(struct dm_buffer *b, int rw, sector_t block,
-			   bio_end_io_t *end_io)
-{
-	char *ptr;
-	int len;
-
-	bio_init(&b->bio);
-	b->bio.bi_io_vec = b->bio_vec;
-	b->bio.bi_max_vecs = DM_BUFIO_INLINE_VECS;
-	b->bio.bi_sector = block << b->c->sectors_per_block_bits;
-	b->bio.bi_bdev = b->c->bdev;
-	b->bio.bi_end_io = end_io;
-
-	/*
-	 * We assume that if len >= PAGE_SIZE ptr is page-aligned.
-	 * If len < PAGE_SIZE the buffer doesn't cross page boundary.
-	 */
-	ptr = b->data;
-	len = b->c->block_size;
-
-	if (len >= PAGE_SIZE)
-		BUG_ON((unsigned long)ptr & (PAGE_SIZE - 1));
-	else
-		BUG_ON((unsigned long)ptr & (len - 1));
-
-	do {
-		if (!bio_add_page(&b->bio, virt_to_page(ptr),
-				  len < PAGE_SIZE ? len : PAGE_SIZE,
-				  virt_to_phys(ptr) & (PAGE_SIZE - 1))) {
-			BUG_ON(b->c->block_size <= PAGE_SIZE);
-			use_dmio(b, rw, block, end_io);
-			return;
-		}
-
-		len -= PAGE_SIZE;
-		ptr += PAGE_SIZE;
-	} while (len > 0);
-
-	submit_bio(rw, &b->bio);
-}
-
-static void submit_io(struct dm_buffer *b, int rw, sector_t block,
-		      bio_end_io_t *end_io)
-{
-	if (rw == WRITE && b->c->write_callback)
-		b->c->write_callback(b);
-
-	if (b->c->block_size <= DM_BUFIO_INLINE_VECS * PAGE_SIZE &&
-	    b->data_mode != DATA_MODE_VMALLOC)
-		use_inline_bio(b, rw, block, end_io);
-	else
-		use_dmio(b, rw, block, end_io);
-}
-
-/*----------------------------------------------------------------
- * Writing dirty buffers
- *--------------------------------------------------------------*/
-
-/*
- * The endio routine for write.
- *
- * Set the error, clear B_WRITING bit and wake anyone who was waiting on
- * it.
- */
-static void write_endio(struct bio *bio, int error)
-{
-	struct dm_buffer *b = container_of(bio, struct dm_buffer, bio);
-
-	b->write_error = error;
-	if (unlikely(error)) {
-		struct dm_bufio_client *c = b->c;
-		(void)cmpxchg(&c->async_write_error, 0, error);
-	}
-
-	BUG_ON(!test_bit(B_WRITING, &b->state));
-
-	smp_mb__before_clear_bit();
-	clear_bit(B_WRITING, &b->state);
-	smp_mb__after_clear_bit();
-
-	wake_up_bit(&b->state, B_WRITING);
-}
-
-/*
- * This function is called when wait_on_bit is actually waiting.
- */
-static int do_io_schedule(void *word)
-{
-	io_schedule();
-
-	return 0;
-}
-
-/*
- * Initiate a write on a dirty buffer, but don't wait for it.
- *
- * - If the buffer is not dirty, exit.
- * - If there some previous write going on, wait for it to finish (we can't
- *   have two writes on the same buffer simultaneously).
- * - Submit our write and don't wait on it. We set B_WRITING indicating
- *   that there is a write in progress.
- */
-static void __write_dirty_buffer(struct dm_buffer *b)
-{
-	if (!test_bit(B_DIRTY, &b->state))
-		return;
-
-	clear_bit(B_DIRTY, &b->state);
-	wait_on_bit_lock(&b->state, B_WRITING,
-			 do_io_schedule, TASK_UNINTERRUPTIBLE);
-
-	submit_io(b, WRITE, b->block, write_endio);
-}
-
-/*
- * Wait until any activity on the buffer finishes.  Possibly write the
- * buffer if it is dirty.  When this function finishes, there is no I/O
- * running on the buffer and the buffer is not dirty.
- */
-static void __make_buffer_clean(struct dm_buffer *b)
-{
-	BUG_ON(b->hold_count);
-
-	if (!b->state)	/* fast case */
-		return;
-
-	wait_on_bit(&b->state, B_READING, do_io_schedule, TASK_UNINTERRUPTIBLE);
-	__write_dirty_buffer(b);
-	wait_on_bit(&b->state, B_WRITING, do_io_schedule, TASK_UNINTERRUPTIBLE);
-}
-
-/*
- * Find some buffer that is not held by anybody, clean it, unlink it and
- * return it.
- */
-static struct dm_buffer *__get_unclaimed_buffer(struct dm_bufio_client *c)
-{
-	struct dm_buffer *b;
-
-	list_for_each_entry_reverse(b, &c->lru[LIST_CLEAN], lru_list) {
-		BUG_ON(test_bit(B_WRITING, &b->state));
-		BUG_ON(test_bit(B_DIRTY, &b->state));
-
-		if (!b->hold_count) {
-			__make_buffer_clean(b);
-			__unlink_buffer(b);
-			return b;
-		}
-		dm_bufio_cond_resched();
-	}
-
-	list_for_each_entry_reverse(b, &c->lru[LIST_DIRTY], lru_list) {
-		BUG_ON(test_bit(B_READING, &b->state));
-
-		if (!b->hold_count) {
-			__make_buffer_clean(b);
-			__unlink_buffer(b);
-			return b;
-		}
-		dm_bufio_cond_resched();
-	}
-
-	return NULL;
-}
-
-/*
- * Wait until some other threads free some buffer or release hold count on
- * some buffer.
- *
- * This function is entered with c->lock held, drops it and regains it
- * before exiting.
- */
-static void __wait_for_free_buffer(struct dm_bufio_client *c)
-{
-	DECLARE_WAITQUEUE(wait, current);
-
-	add_wait_queue(&c->free_buffer_wait, &wait);
-	set_task_state(current, TASK_UNINTERRUPTIBLE);
-	dm_bufio_unlock(c);
-
-	io_schedule();
-
-	set_task_state(current, TASK_RUNNING);
-	remove_wait_queue(&c->free_buffer_wait, &wait);
-
-	dm_bufio_lock(c);
-}
-
-enum new_flag {
-	NF_FRESH = 0,
-	NF_READ = 1,
-	NF_GET = 2,
-	NF_PREFETCH = 3
-};
-
-/*
- * Allocate a new buffer. If the allocation is not possible, wait until
- * some other thread frees a buffer.
- *
- * May drop the lock and regain it.
- */
-static struct dm_buffer *__alloc_buffer_wait_no_callback(struct dm_bufio_client *c, enum new_flag nf)
-{
-	struct dm_buffer *b;
-
-	/*
-	 * dm-bufio is resistant to allocation failures (it just keeps
-	 * one buffer reserved in cases all the allocations fail).
-	 * So set flags to not try too hard:
-	 *	GFP_NOIO: don't recurse into the I/O layer
-	 *	__GFP_NORETRY: don't retry and rather return failure
-	 *	__GFP_NOMEMALLOC: don't use emergency reserves
-	 *	__GFP_NOWARN: don't print a warning in case of failure
-	 *
-	 * For debugging, if we set the cache size to 1, no new buffers will
-	 * be allocated.
-	 */
-	while (1) {
-		if (dm_bufio_cache_size_latch != 1) {
-			b = alloc_buffer(c, GFP_NOIO | __GFP_NORETRY | __GFP_NOMEMALLOC | __GFP_NOWARN);
-			if (b)
-				return b;
-		}
-
-		if (nf == NF_PREFETCH)
-			return NULL;
-
-		if (!list_empty(&c->reserved_buffers)) {
-			b = list_entry(c->reserved_buffers.next,
-				       struct dm_buffer, lru_list);
-			list_del(&b->lru_list);
-			c->need_reserved_buffers++;
-
-			return b;
-		}
-
-		b = __get_unclaimed_buffer(c);
-		if (b)
-			return b;
-
-		__wait_for_free_buffer(c);
-	}
-}
-
-static struct dm_buffer *__alloc_buffer_wait(struct dm_bufio_client *c, enum new_flag nf)
-{
-	struct dm_buffer *b = __alloc_buffer_wait_no_callback(c, nf);
-
-	if (!b)
-		return NULL;
-
-	if (c->alloc_callback)
-		c->alloc_callback(b);
-
-	return b;
-}
-
-/*
- * Free a buffer and wake other threads waiting for free buffers.
- */
-static void __free_buffer_wake(struct dm_buffer *b)
-{
-	struct dm_bufio_client *c = b->c;
-
-	if (!c->need_reserved_buffers)
-		free_buffer(b);
-	else {
-		list_add(&b->lru_list, &c->reserved_buffers);
-		c->need_reserved_buffers--;
-	}
-
-	wake_up(&c->free_buffer_wait);
-}
-
-static void __write_dirty_buffers_async(struct dm_bufio_client *c, int no_wait)
-{
-	struct dm_buffer *b, *tmp;
-
-	list_for_each_entry_safe_reverse(b, tmp, &c->lru[LIST_DIRTY], lru_list) {
-		BUG_ON(test_bit(B_READING, &b->state));
-
-		if (!test_bit(B_DIRTY, &b->state) &&
-		    !test_bit(B_WRITING, &b->state)) {
-			__relink_lru(b, LIST_CLEAN);
-			continue;
-		}
-
-		if (no_wait && test_bit(B_WRITING, &b->state))
-			return;
-
-		__write_dirty_buffer(b);
-		dm_bufio_cond_resched();
-	}
-}
-
-/*
- * Get writeback threshold and buffer limit for a given client.
- */
-static void __get_memory_limit(struct dm_bufio_client *c,
-			       unsigned long *threshold_buffers,
-			       unsigned long *limit_buffers)
-{
-	unsigned long buffers;
-
-	if (dm_bufio_cache_size != dm_bufio_cache_size_latch) {
-		mutex_lock(&dm_bufio_clients_lock);
-		__cache_size_refresh();
-		mutex_unlock(&dm_bufio_clients_lock);
-	}
-
-	buffers = dm_bufio_cache_size_per_client >>
-		  (c->sectors_per_block_bits + SECTOR_SHIFT);
-
-	if (buffers < DM_BUFIO_MIN_BUFFERS)
-		buffers = DM_BUFIO_MIN_BUFFERS;
-
-	*limit_buffers = buffers;
-	*threshold_buffers = buffers * DM_BUFIO_WRITEBACK_PERCENT / 100;
-}
-
-/*
- * Check if we're over watermark.
- * If we are over threshold_buffers, start freeing buffers.
- * If we're over "limit_buffers", block until we get under the limit.
- */
-static void __check_watermark(struct dm_bufio_client *c)
-{
-	unsigned long threshold_buffers, limit_buffers;
-
-	__get_memory_limit(c, &threshold_buffers, &limit_buffers);
-
-	while (c->n_buffers[LIST_CLEAN] + c->n_buffers[LIST_DIRTY] >
-	       limit_buffers) {
-
-		struct dm_buffer *b = __get_unclaimed_buffer(c);
-
-		if (!b)
-			return;
-
-		__free_buffer_wake(b);
-		dm_bufio_cond_resched();
-	}
-
-	if (c->n_buffers[LIST_DIRTY] > threshold_buffers)
-		__write_dirty_buffers_async(c, 1);
-}
-
-/*
- * Find a buffer in the hash.
- */
-static struct dm_buffer *__find(struct dm_bufio_client *c, sector_t block)
-{
-	struct dm_buffer *b;
-	struct hlist_node *hn;
-
-	hlist_for_each_entry(b, hn, &c->cache_hash[DM_BUFIO_HASH(block)],
-			     hash_list) {
-		dm_bufio_cond_resched();
-		if (b->block == block)
-			return b;
-	}
-
-	return NULL;
-}
-
-/*----------------------------------------------------------------
- * Getting a buffer
- *--------------------------------------------------------------*/
-
-static struct dm_buffer *__bufio_new(struct dm_bufio_client *c, sector_t block,
-				     enum new_flag nf, int *need_submit)
-{
-	struct dm_buffer *b, *new_b = NULL;
-
-	*need_submit = 0;
-
-	b = __find(c, block);
-	if (b)
-		goto found_buffer;
-
-	if (nf == NF_GET)
-		return NULL;
-
-	new_b = __alloc_buffer_wait(c, nf);
-	if (!new_b)
-		return NULL;
-
-	/*
-	 * We've had a period where the mutex was unlocked, so need to
-	 * recheck the hash table.
-	 */
-	b = __find(c, block);
-	if (b) {
-		__free_buffer_wake(new_b);
-		goto found_buffer;
-	}
-
-	__check_watermark(c);
-
-	b = new_b;
-	b->hold_count = 1;
-	b->read_error = 0;
-	b->write_error = 0;
-	__link_buffer(b, block, LIST_CLEAN);
-
-	if (nf == NF_FRESH) {
-		b->state = 0;
-		return b;
-	}
-
-	b->state = 1 << B_READING;
-	*need_submit = 1;
-
-	return b;
-
-found_buffer:
-	if (nf == NF_PREFETCH)
-		return NULL;
-	/*
-	 * Note: it is essential that we don't wait for the buffer to be
-	 * read if dm_bufio_get function is used. Both dm_bufio_get and
-	 * dm_bufio_prefetch can be used in the driver request routine.
-	 * If the user called both dm_bufio_prefetch and dm_bufio_get on
-	 * the same buffer, it would deadlock if we waited.
-	 */
-	if (nf == NF_GET && unlikely(test_bit(B_READING, &b->state)))
-		return NULL;
-
-	b->hold_count++;
-	__relink_lru(b, test_bit(B_DIRTY, &b->state) ||
-		     test_bit(B_WRITING, &b->state));
-	return b;
-}
-
-/*
- * The endio routine for reading: set the error, clear the bit and wake up
- * anyone waiting on the buffer.
- */
-static void read_endio(struct bio *bio, int error)
-{
-	struct dm_buffer *b = container_of(bio, struct dm_buffer, bio);
-
-	b->read_error = error;
-
-	BUG_ON(!test_bit(B_READING, &b->state));
-
-	smp_mb__before_clear_bit();
-	clear_bit(B_READING, &b->state);
-	smp_mb__after_clear_bit();
-
-	wake_up_bit(&b->state, B_READING);
-}
-
-/*
- * A common routine for dm_bufio_new and dm_bufio_read.  Operation of these
- * functions is similar except that dm_bufio_new doesn't read the
- * buffer from the disk (assuming that the caller overwrites all the data
- * and uses dm_bufio_mark_buffer_dirty to write new data back).
- */
-static void *new_read(struct dm_bufio_client *c, sector_t block,
-		      enum new_flag nf, struct dm_buffer **bp)
-{
-	int need_submit;
-	struct dm_buffer *b;
-
-	dm_bufio_lock(c);
-	b = __bufio_new(c, block, nf, &need_submit);
-	dm_bufio_unlock(c);
-
-	if (!b)
-		return b;
-
-	if (need_submit)
-		submit_io(b, READ, b->block, read_endio);
-
-	wait_on_bit(&b->state, B_READING, do_io_schedule, TASK_UNINTERRUPTIBLE);
-
-	if (b->read_error) {
-		int error = b->read_error;
-
-		dm_bufio_release(b);
-
-		return ERR_PTR(error);
-	}
-
-	*bp = b;
-
-	return b->data;
-}
-
-void *dm_bufio_get(struct dm_bufio_client *c, sector_t block,
-		   struct dm_buffer **bp)
-{
-	return new_read(c, block, NF_GET, bp);
-}
-EXPORT_SYMBOL_GPL(dm_bufio_get);
-
-void *dm_bufio_read(struct dm_bufio_client *c, sector_t block,
-		    struct dm_buffer **bp)
-{
-	BUG_ON(dm_bufio_in_request());
-
-	return new_read(c, block, NF_READ, bp);
-}
-EXPORT_SYMBOL_GPL(dm_bufio_read);
-
-void *dm_bufio_new(struct dm_bufio_client *c, sector_t block,
-		   struct dm_buffer **bp)
-{
-	BUG_ON(dm_bufio_in_request());
-
-	return new_read(c, block, NF_FRESH, bp);
-}
-EXPORT_SYMBOL_GPL(dm_bufio_new);
-
-void dm_bufio_prefetch(struct dm_bufio_client *c,
-		       sector_t block, unsigned n_blocks)
-{
-	struct blk_plug plug;
-
-	blk_start_plug(&plug);
-	dm_bufio_lock(c);
-
-	for (; n_blocks--; block++) {
-		int need_submit;
-		struct dm_buffer *b;
-		b = __bufio_new(c, block, NF_PREFETCH, &need_submit);
-		if (unlikely(b != NULL)) {
-			dm_bufio_unlock(c);
-
-			if (need_submit)
-				submit_io(b, READ, b->block, read_endio);
-			dm_bufio_release(b);
-
-			dm_bufio_cond_resched();
-
-			if (!n_blocks)
-				goto flush_plug;
-			dm_bufio_lock(c);
-		}
-
-	}
-
-	dm_bufio_unlock(c);
-
-flush_plug:
-	blk_finish_plug(&plug);
-}
-EXPORT_SYMBOL_GPL(dm_bufio_prefetch);
-
-void dm_bufio_release(struct dm_buffer *b)
-{
-	struct dm_bufio_client *c = b->c;
-
-	dm_bufio_lock(c);
-
-	BUG_ON(!b->hold_count);
-
-	b->hold_count--;
-	if (!b->hold_count) {
-		wake_up(&c->free_buffer_wait);
-
-		/*
-		 * If there were errors on the buffer, and the buffer is not
-		 * to be written, free the buffer. There is no point in caching
-		 * invalid buffer.
-		 */
-		if ((b->read_error || b->write_error) &&
-		    !test_bit(B_READING, &b->state) &&
-		    !test_bit(B_WRITING, &b->state) &&
-		    !test_bit(B_DIRTY, &b->state)) {
-			__unlink_buffer(b);
-			__free_buffer_wake(b);
-		}
-	}
-
-	dm_bufio_unlock(c);
-}
-EXPORT_SYMBOL_GPL(dm_bufio_release);
-
-void dm_bufio_mark_buffer_dirty(struct dm_buffer *b)
-{
-	struct dm_bufio_client *c = b->c;
-
-	dm_bufio_lock(c);
-
-	BUG_ON(test_bit(B_READING, &b->state));
-
-	if (!test_and_set_bit(B_DIRTY, &b->state))
-		__relink_lru(b, LIST_DIRTY);
-
-	dm_bufio_unlock(c);
-}
-EXPORT_SYMBOL_GPL(dm_bufio_mark_buffer_dirty);
-
-void dm_bufio_write_dirty_buffers_async(struct dm_bufio_client *c)
-{
-	BUG_ON(dm_bufio_in_request());
-
-	dm_bufio_lock(c);
-	__write_dirty_buffers_async(c, 0);
-	dm_bufio_unlock(c);
-}
-EXPORT_SYMBOL_GPL(dm_bufio_write_dirty_buffers_async);
-
-/*
- * For performance, it is essential that the buffers are written asynchronously
- * and simultaneously (so that the block layer can merge the writes) and then
- * waited upon.
- *
- * Finally, we flush hardware disk cache.
- */
-int dm_bufio_write_dirty_buffers(struct dm_bufio_client *c)
-{
-	int a, f;
-	unsigned long buffers_processed = 0;
-	struct dm_buffer *b, *tmp;
-
-	dm_bufio_lock(c);
-	__write_dirty_buffers_async(c, 0);
-
-again:
-	list_for_each_entry_safe_reverse(b, tmp, &c->lru[LIST_DIRTY], lru_list) {
-		int dropped_lock = 0;
-
-		if (buffers_processed < c->n_buffers[LIST_DIRTY])
-			buffers_processed++;
-
-		BUG_ON(test_bit(B_READING, &b->state));
-
-		if (test_bit(B_WRITING, &b->state)) {
-			if (buffers_processed < c->n_buffers[LIST_DIRTY]) {
-				dropped_lock = 1;
-				b->hold_count++;
-				dm_bufio_unlock(c);
-				wait_on_bit(&b->state, B_WRITING,
-					    do_io_schedule,
-					    TASK_UNINTERRUPTIBLE);
-				dm_bufio_lock(c);
-				b->hold_count--;
-			} else
-				wait_on_bit(&b->state, B_WRITING,
-					    do_io_schedule,
-					    TASK_UNINTERRUPTIBLE);
-		}
-
-		if (!test_bit(B_DIRTY, &b->state) &&
-		    !test_bit(B_WRITING, &b->state))
-			__relink_lru(b, LIST_CLEAN);
-
-		dm_bufio_cond_resched();
-
-		/*
-		 * If we dropped the lock, the list is no longer consistent,
-		 * so we must restart the search.
-		 *
-		 * In the most common case, the buffer just processed is
-		 * relinked to the clean list, so we won't loop scanning the
-		 * same buffer again and again.
-		 *
-		 * This may livelock if there is another thread simultaneously
-		 * dirtying buffers, so we count the number of buffers walked
-		 * and if it exceeds the total number of buffers, it means that
-		 * someone is doing some writes simultaneously with us.  In
-		 * this case, stop, dropping the lock.
-		 */
-		if (dropped_lock)
-			goto again;
-	}
-	wake_up(&c->free_buffer_wait);
-	dm_bufio_unlock(c);
-
-	a = xchg(&c->async_write_error, 0);
-	f = dm_bufio_issue_flush(c);
-	if (a)
-		return a;
-
-	return f;
-}
-EXPORT_SYMBOL_GPL(dm_bufio_write_dirty_buffers);
-
-/*
- * Use dm-io to send and empty barrier flush the device.
- */
-int dm_bufio_issue_flush(struct dm_bufio_client *c)
-{
-	struct dm_io_request io_req = {
-		.bi_rw = REQ_FLUSH,
-		.mem.type = DM_IO_KMEM,
-		.mem.ptr.addr = NULL,
-		.client = c->dm_io,
-	};
-	struct dm_io_region io_reg = {
-		.bdev = c->bdev,
-		.sector = 0,
-		.count = 0,
-	};
-
-	BUG_ON(dm_bufio_in_request());
-
-	return dm_io(&io_req, 1, &io_reg, NULL);
-}
-EXPORT_SYMBOL_GPL(dm_bufio_issue_flush);
-
-/*
- * We first delete any other buffer that may be at that new location.
- *
- * Then, we write the buffer to the original location if it was dirty.
- *
- * Then, if we are the only one who is holding the buffer, relink the buffer
- * in the hash queue for the new location.
- *
- * If there was someone else holding the buffer, we write it to the new
- * location but not relink it, because that other user needs to have the buffer
- * at the same place.
- */
-void dm_bufio_release_move(struct dm_buffer *b, sector_t new_block)
-{
-	struct dm_bufio_client *c = b->c;
-	struct dm_buffer *new;
-
-	BUG_ON(dm_bufio_in_request());
-
-	dm_bufio_lock(c);
-
-retry:
-	new = __find(c, new_block);
-	if (new) {
-		if (new->hold_count) {
-			__wait_for_free_buffer(c);
-			goto retry;
-		}
-
-		/*
-		 * FIXME: Is there any point waiting for a write that's going
-		 * to be overwritten in a bit?
-		 */
-		__make_buffer_clean(new);
-		__unlink_buffer(new);
-		__free_buffer_wake(new);
-	}
-
-	BUG_ON(!b->hold_count);
-	BUG_ON(test_bit(B_READING, &b->state));
-
-	__write_dirty_buffer(b);
-	if (b->hold_count == 1) {
-		wait_on_bit(&b->state, B_WRITING,
-			    do_io_schedule, TASK_UNINTERRUPTIBLE);
-		set_bit(B_DIRTY, &b->state);
-		__unlink_buffer(b);
-		__link_buffer(b, new_block, LIST_DIRTY);
-	} else {
-		sector_t old_block;
-		wait_on_bit_lock(&b->state, B_WRITING,
-				 do_io_schedule, TASK_UNINTERRUPTIBLE);
-		/*
-		 * Relink buffer to "new_block" so that write_callback
-		 * sees "new_block" as a block number.
-		 * After the write, link the buffer back to old_block.
-		 * All this must be done in bufio lock, so that block number
-		 * change isn't visible to other threads.
-		 */
-		old_block = b->block;
-		__unlink_buffer(b);
-		__link_buffer(b, new_block, b->list_mode);
-		submit_io(b, WRITE, new_block, write_endio);
-		wait_on_bit(&b->state, B_WRITING,
-			    do_io_schedule, TASK_UNINTERRUPTIBLE);
-		__unlink_buffer(b);
-		__link_buffer(b, old_block, b->list_mode);
-	}
-
-	dm_bufio_unlock(c);
-	dm_bufio_release(b);
-}
-EXPORT_SYMBOL_GPL(dm_bufio_release_move);
-
-unsigned dm_bufio_get_block_size(struct dm_bufio_client *c)
-{
-	return c->block_size;
-}
-EXPORT_SYMBOL_GPL(dm_bufio_get_block_size);
-
-sector_t dm_bufio_get_device_size(struct dm_bufio_client *c)
-{
-	return i_size_read(c->bdev->bd_inode) >>
-			   (SECTOR_SHIFT + c->sectors_per_block_bits);
-}
-EXPORT_SYMBOL_GPL(dm_bufio_get_device_size);
-
-sector_t dm_bufio_get_block_number(struct dm_buffer *b)
-{
-	return b->block;
-}
-EXPORT_SYMBOL_GPL(dm_bufio_get_block_number);
-
-void *dm_bufio_get_block_data(struct dm_buffer *b)
-{
-	return b->data;
-}
-EXPORT_SYMBOL_GPL(dm_bufio_get_block_data);
-
-void *dm_bufio_get_aux_data(struct dm_buffer *b)
-{
-	return b + 1;
-}
-EXPORT_SYMBOL_GPL(dm_bufio_get_aux_data);
-
-struct dm_bufio_client *dm_bufio_get_client(struct dm_buffer *b)
-{
-	return b->c;
-}
-EXPORT_SYMBOL_GPL(dm_bufio_get_client);
-
-static void drop_buffers(struct dm_bufio_client *c)
-{
-	struct dm_buffer *b;
-	int i;
-
-	BUG_ON(dm_bufio_in_request());
-
-	/*
-	 * An optimization so that the buffers are not written one-by-one.
-	 */
-	dm_bufio_write_dirty_buffers_async(c);
-
-	dm_bufio_lock(c);
-
-	while ((b = __get_unclaimed_buffer(c)))
-		__free_buffer_wake(b);
-
-	for (i = 0; i < LIST_SIZE; i++)
-		list_for_each_entry(b, &c->lru[i], lru_list)
-			DMERR("leaked buffer %llx, hold count %u, list %d",
-			      (unsigned long long)b->block, b->hold_count, i);
-
-	for (i = 0; i < LIST_SIZE; i++)
-		BUG_ON(!list_empty(&c->lru[i]));
-
-	dm_bufio_unlock(c);
-}
-
-/*
- * Test if the buffer is unused and too old, and commit it.
- * At if noio is set, we must not do any I/O because we hold
- * dm_bufio_clients_lock and we would risk deadlock if the I/O gets rerouted to
- * different bufio client.
- */
-static int __cleanup_old_buffer(struct dm_buffer *b, gfp_t gfp,
-				unsigned long max_jiffies)
-{
-	if (jiffies - b->last_accessed < max_jiffies)
-		return 1;
-
-	if (!(gfp & __GFP_IO)) {
-		if (test_bit(B_READING, &b->state) ||
-		    test_bit(B_WRITING, &b->state) ||
-		    test_bit(B_DIRTY, &b->state))
-			return 1;
-	}
-
-	if (b->hold_count)
-		return 1;
-
-	__make_buffer_clean(b);
-	__unlink_buffer(b);
-	__free_buffer_wake(b);
-
-	return 0;
-}
-
-static void __scan(struct dm_bufio_client *c, unsigned long nr_to_scan,
-		   struct shrink_control *sc)
-{
-	int l;
-	struct dm_buffer *b, *tmp;
-
-	for (l = 0; l < LIST_SIZE; l++) {
-		list_for_each_entry_safe_reverse(b, tmp, &c->lru[l], lru_list)
-			if (!__cleanup_old_buffer(b, sc->gfp_mask, 0) &&
-			    !--nr_to_scan)
-				return;
-		dm_bufio_cond_resched();
-	}
-}
-
-static int shrink(struct shrinker *shrinker, struct shrink_control *sc)
-{
-	struct dm_bufio_client *c =
-	    container_of(shrinker, struct dm_bufio_client, shrinker);
-	unsigned long r;
-	unsigned long nr_to_scan = sc->nr_to_scan;
-
-	if (sc->gfp_mask & __GFP_IO)
-		dm_bufio_lock(c);
-	else if (!dm_bufio_trylock(c))
-		return !nr_to_scan ? 0 : -1;
-
-	if (nr_to_scan)
-		__scan(c, nr_to_scan, sc);
-
-	r = c->n_buffers[LIST_CLEAN] + c->n_buffers[LIST_DIRTY];
-	if (r > INT_MAX)
-		r = INT_MAX;
-
-	dm_bufio_unlock(c);
-
-	return r;
-}
-
-/*
- * Create the buffering interface
- */
-struct dm_bufio_client *dm_bufio_client_create(struct block_device *bdev, unsigned block_size,
-					       unsigned reserved_buffers, unsigned aux_size,
-					       void (*alloc_callback)(struct dm_buffer *),
-					       void (*write_callback)(struct dm_buffer *))
-{
-	int r;
-	struct dm_bufio_client *c;
-	unsigned i;
-
-	BUG_ON(block_size < 1 << SECTOR_SHIFT ||
-	       (block_size & (block_size - 1)));
-
-	c = kmalloc(sizeof(*c), GFP_KERNEL);
-	if (!c) {
-		r = -ENOMEM;
-		goto bad_client;
-	}
-	c->cache_hash = vmalloc(sizeof(struct hlist_head) << DM_BUFIO_HASH_BITS);
-	if (!c->cache_hash) {
-		r = -ENOMEM;
-		goto bad_hash;
-	}
-
-	c->bdev = bdev;
-	c->block_size = block_size;
-	c->sectors_per_block_bits = ffs(block_size) - 1 - SECTOR_SHIFT;
-	c->pages_per_block_bits = (ffs(block_size) - 1 >= PAGE_SHIFT) ?
-				  ffs(block_size) - 1 - PAGE_SHIFT : 0;
-	c->blocks_per_page_bits = (ffs(block_size) - 1 < PAGE_SHIFT ?
-				  PAGE_SHIFT - (ffs(block_size) - 1) : 0);
-
-	c->aux_size = aux_size;
-	c->alloc_callback = alloc_callback;
-	c->write_callback = write_callback;
-
-	for (i = 0; i < LIST_SIZE; i++) {
-		INIT_LIST_HEAD(&c->lru[i]);
-		c->n_buffers[i] = 0;
-	}
-
-	for (i = 0; i < 1 << DM_BUFIO_HASH_BITS; i++)
-		INIT_HLIST_HEAD(&c->cache_hash[i]);
-
-	mutex_init(&c->lock);
-	INIT_LIST_HEAD(&c->reserved_buffers);
-	c->need_reserved_buffers = reserved_buffers;
-
-	init_waitqueue_head(&c->free_buffer_wait);
-	c->async_write_error = 0;
-
-	c->dm_io = dm_io_client_create();
-	if (IS_ERR(c->dm_io)) {
-		r = PTR_ERR(c->dm_io);
-		goto bad_dm_io;
-	}
-
-	mutex_lock(&dm_bufio_clients_lock);
-	if (c->blocks_per_page_bits) {
-		if (!DM_BUFIO_CACHE_NAME(c)) {
-			DM_BUFIO_CACHE_NAME(c) = kasprintf(GFP_KERNEL, "dm_bufio_cache-%u", c->block_size);
-			if (!DM_BUFIO_CACHE_NAME(c)) {
-				r = -ENOMEM;
-				mutex_unlock(&dm_bufio_clients_lock);
-				goto bad_cache;
-			}
-		}
-
-		if (!DM_BUFIO_CACHE(c)) {
-			DM_BUFIO_CACHE(c) = kmem_cache_create(DM_BUFIO_CACHE_NAME(c),
-							      c->block_size,
-							      c->block_size, 0, NULL);
-			if (!DM_BUFIO_CACHE(c)) {
-				r = -ENOMEM;
-				mutex_unlock(&dm_bufio_clients_lock);
-				goto bad_cache;
-			}
-		}
-	}
-	mutex_unlock(&dm_bufio_clients_lock);
-
-	while (c->need_reserved_buffers) {
-		struct dm_buffer *b = alloc_buffer(c, GFP_KERNEL);
-
-		if (!b) {
-			r = -ENOMEM;
-			goto bad_buffer;
-		}
-		__free_buffer_wake(b);
-	}
-
-	mutex_lock(&dm_bufio_clients_lock);
-	dm_bufio_client_count++;
-	list_add(&c->client_list, &dm_bufio_all_clients);
-	__cache_size_refresh();
-	mutex_unlock(&dm_bufio_clients_lock);
-
-	c->shrinker.shrink = shrink;
-	c->shrinker.seeks = 1;
-	c->shrinker.batch = 0;
-	register_shrinker(&c->shrinker);
-
-	return c;
-
-bad_buffer:
-bad_cache:
-	while (!list_empty(&c->reserved_buffers)) {
-		struct dm_buffer *b = list_entry(c->reserved_buffers.next,
-						 struct dm_buffer, lru_list);
-		list_del(&b->lru_list);
-		free_buffer(b);
-	}
-	dm_io_client_destroy(c->dm_io);
-bad_dm_io:
-	vfree(c->cache_hash);
-bad_hash:
-	kfree(c);
-bad_client:
-	return ERR_PTR(r);
-}
-EXPORT_SYMBOL_GPL(dm_bufio_client_create);
-
-/*
- * Free the buffering interface.
- * It is required that there are no references on any buffers.
- */
-void dm_bufio_client_destroy(struct dm_bufio_client *c)
-{
-	unsigned i;
-
-	drop_buffers(c);
-
-	unregister_shrinker(&c->shrinker);
-
-	mutex_lock(&dm_bufio_clients_lock);
-
-	list_del(&c->client_list);
-	dm_bufio_client_count--;
-	__cache_size_refresh();
-
-	mutex_unlock(&dm_bufio_clients_lock);
-
-	for (i = 0; i < 1 << DM_BUFIO_HASH_BITS; i++)
-		BUG_ON(!hlist_empty(&c->cache_hash[i]));
-
-	BUG_ON(c->need_reserved_buffers);
-
-	while (!list_empty(&c->reserved_buffers)) {
-		struct dm_buffer *b = list_entry(c->reserved_buffers.next,
-						 struct dm_buffer, lru_list);
-		list_del(&b->lru_list);
-		free_buffer(b);
-	}
-
-	for (i = 0; i < LIST_SIZE; i++)
-		if (c->n_buffers[i])
-			DMERR("leaked buffer count %d: %ld", i, c->n_buffers[i]);
-
-	for (i = 0; i < LIST_SIZE; i++)
-		BUG_ON(c->n_buffers[i]);
-
-	dm_io_client_destroy(c->dm_io);
-	vfree(c->cache_hash);
-	kfree(c);
-}
-EXPORT_SYMBOL_GPL(dm_bufio_client_destroy);
-
-static void cleanup_old_buffers(void)
-{
-	unsigned long max_age = dm_bufio_max_age;
-	struct dm_bufio_client *c;
-
-	barrier();
-
-	if (max_age > ULONG_MAX / HZ)
-		max_age = ULONG_MAX / HZ;
-
-	mutex_lock(&dm_bufio_clients_lock);
-	list_for_each_entry(c, &dm_bufio_all_clients, client_list) {
-		if (!dm_bufio_trylock(c))
-			continue;
-
-		while (!list_empty(&c->lru[LIST_CLEAN])) {
-			struct dm_buffer *b;
-			b = list_entry(c->lru[LIST_CLEAN].prev,
-				       struct dm_buffer, lru_list);
-			if (__cleanup_old_buffer(b, 0, max_age * HZ))
-				break;
-			dm_bufio_cond_resched();
-		}
-
-		dm_bufio_unlock(c);
-		dm_bufio_cond_resched();
-	}
-	mutex_unlock(&dm_bufio_clients_lock);
-}
-
-static struct workqueue_struct *dm_bufio_wq;
-static struct delayed_work dm_bufio_work;
-
-static void work_fn(struct work_struct *w)
-{
-	cleanup_old_buffers();
-
-	queue_delayed_work(dm_bufio_wq, &dm_bufio_work,
-			   DM_BUFIO_WORK_TIMER_SECS * HZ);
-}
-
-/*----------------------------------------------------------------
- * Module setup
- *--------------------------------------------------------------*/
-
-/*
- * This is called only once for the whole dm_bufio module.
- * It initializes memory limit.
- */
-static int __init dm_bufio_init(void)
-{
-	__u64 mem;
-
-	memset(&dm_bufio_caches, 0, sizeof dm_bufio_caches);
-	memset(&dm_bufio_cache_names, 0, sizeof dm_bufio_cache_names);
-
-	mem = (__u64)((totalram_pages - totalhigh_pages) *
-		      DM_BUFIO_MEMORY_PERCENT / 100) << PAGE_SHIFT;
-
-	if (mem > ULONG_MAX)
-		mem = ULONG_MAX;
-
-#ifdef CONFIG_MMU
-	/*
-	 * Get the size of vmalloc space the same way as VMALLOC_TOTAL
-	 * in fs/proc/internal.h
-	 */
-	if (mem > (VMALLOC_END - VMALLOC_START) * DM_BUFIO_VMALLOC_PERCENT / 100)
-		mem = (VMALLOC_END - VMALLOC_START) * DM_BUFIO_VMALLOC_PERCENT / 100;
-#endif
-
-	dm_bufio_default_cache_size = mem;
-
-	mutex_lock(&dm_bufio_clients_lock);
-	__cache_size_refresh();
-	mutex_unlock(&dm_bufio_clients_lock);
-
-	dm_bufio_wq = create_singlethread_workqueue("dm_bufio_cache");
-	if (!dm_bufio_wq)
-		return -ENOMEM;
-
-	INIT_DELAYED_WORK(&dm_bufio_work, work_fn);
-	queue_delayed_work(dm_bufio_wq, &dm_bufio_work,
-			   DM_BUFIO_WORK_TIMER_SECS * HZ);
-
-	return 0;
-}
-
-/*
- * This is called once when unloading the dm_bufio module.
- */
-static void __exit dm_bufio_exit(void)
-{
-	int bug = 0;
-	int i;
-
-	cancel_delayed_work_sync(&dm_bufio_work);
-	destroy_workqueue(dm_bufio_wq);
-
-	for (i = 0; i < ARRAY_SIZE(dm_bufio_caches); i++) {
-		struct kmem_cache *kc = dm_bufio_caches[i];
-
-		if (kc)
-			kmem_cache_destroy(kc);
-	}
-
-	for (i = 0; i < ARRAY_SIZE(dm_bufio_cache_names); i++)
-		kfree(dm_bufio_cache_names[i]);
-
-	if (dm_bufio_client_count) {
-		DMCRIT("%s: dm_bufio_client_count leaked: %d",
-			__func__, dm_bufio_client_count);
-		bug = 1;
-	}
-
-	if (dm_bufio_current_allocated) {
-		DMCRIT("%s: dm_bufio_current_allocated leaked: %lu",
-			__func__, dm_bufio_current_allocated);
-		bug = 1;
-	}
-
-	if (dm_bufio_allocated_get_free_pages) {
-		DMCRIT("%s: dm_bufio_allocated_get_free_pages leaked: %lu",
-		       __func__, dm_bufio_allocated_get_free_pages);
-		bug = 1;
-	}
-
-	if (dm_bufio_allocated_vmalloc) {
-		DMCRIT("%s: dm_bufio_vmalloc leaked: %lu",
-		       __func__, dm_bufio_allocated_vmalloc);
-		bug = 1;
-	}
-
-	if (bug)
-		BUG();
-}
-
-module_init(dm_bufio_init)
-module_exit(dm_bufio_exit)
-
-module_param_named(max_cache_size_bytes, dm_bufio_cache_size, ulong, S_IRUGO | S_IWUSR);
-MODULE_PARM_DESC(max_cache_size_bytes, "Size of metadata cache");
-
-module_param_named(max_age_seconds, dm_bufio_max_age, uint, S_IRUGO | S_IWUSR);
-MODULE_PARM_DESC(max_age_seconds, "Max age of a buffer in seconds");
-
-module_param_named(peak_allocated_bytes, dm_bufio_peak_allocated, ulong, S_IRUGO | S_IWUSR);
-MODULE_PARM_DESC(peak_allocated_bytes, "Tracks the maximum allocated memory");
-
-module_param_named(allocated_kmem_cache_bytes, dm_bufio_allocated_kmem_cache, ulong, S_IRUGO);
-MODULE_PARM_DESC(allocated_kmem_cache_bytes, "Memory allocated with kmem_cache_alloc");
-
-module_param_named(allocated_get_free_pages_bytes, dm_bufio_allocated_get_free_pages, ulong, S_IRUGO);
-MODULE_PARM_DESC(allocated_get_free_pages_bytes, "Memory allocated with get_free_pages");
-
-module_param_named(allocated_vmalloc_bytes, dm_bufio_allocated_vmalloc, ulong, S_IRUGO);
-MODULE_PARM_DESC(allocated_vmalloc_bytes, "Memory allocated with vmalloc");
-
-module_param_named(current_allocated_bytes, dm_bufio_current_allocated, ulong, S_IRUGO);
-MODULE_PARM_DESC(current_allocated_bytes, "Memory currently used by the cache");
-
-MODULE_AUTHOR("Mikulas Patocka <dm-devel@redhat.com>");
-MODULE_DESCRIPTION(DM_NAME " buffered I/O library");
-MODULE_LICENSE("GPL");
diff --git a/ANDROID_3.4.5/drivers/md/dm-bufio.h b/ANDROID_3.4.5/drivers/md/dm-bufio.h
deleted file mode 100644
index b142946a..00000000
--- a/ANDROID_3.4.5/drivers/md/dm-bufio.h
+++ /dev/null
@@ -1,120 +0,0 @@
-/*
- * Copyright (C) 2009-2011 Red Hat, Inc.
- *
- * Author: Mikulas Patocka <mpatocka@redhat.com>
- *
- * This file is released under the GPL.
- */
-
-#ifndef DM_BUFIO_H
-#define DM_BUFIO_H
-
-#include <linux/blkdev.h>
-#include <linux/types.h>
-
-/*----------------------------------------------------------------*/
-
-struct dm_bufio_client;
-struct dm_buffer;
-
-/*
- * Create a buffered IO cache on a given device
- */
-struct dm_bufio_client *
-dm_bufio_client_create(struct block_device *bdev, unsigned block_size,
-		       unsigned reserved_buffers, unsigned aux_size,
-		       void (*alloc_callback)(struct dm_buffer *),
-		       void (*write_callback)(struct dm_buffer *));
-
-/*
- * Release a buffered IO cache.
- */
-void dm_bufio_client_destroy(struct dm_bufio_client *c);
-
-/*
- * WARNING: to avoid deadlocks, these conditions are observed:
- *
- * - At most one thread can hold at most "reserved_buffers" simultaneously.
- * - Each other threads can hold at most one buffer.
- * - Threads which call only dm_bufio_get can hold unlimited number of
- *   buffers.
- */
-
-/*
- * Read a given block from disk. Returns pointer to data.  Returns a
- * pointer to dm_buffer that can be used to release the buffer or to make
- * it dirty.
- */
-void *dm_bufio_read(struct dm_bufio_client *c, sector_t block,
-		    struct dm_buffer **bp);
-
-/*
- * Like dm_bufio_read, but return buffer from cache, don't read
- * it. If the buffer is not in the cache, return NULL.
- */
-void *dm_bufio_get(struct dm_bufio_client *c, sector_t block,
-		   struct dm_buffer **bp);
-
-/*
- * Like dm_bufio_read, but don't read anything from the disk.  It is
- * expected that the caller initializes the buffer and marks it dirty.
- */
-void *dm_bufio_new(struct dm_bufio_client *c, sector_t block,
-		   struct dm_buffer **bp);
-
-/*
- * Prefetch the specified blocks to the cache.
- * The function starts to read the blocks and returns without waiting for
- * I/O to finish.
- */
-void dm_bufio_prefetch(struct dm_bufio_client *c,
-		       sector_t block, unsigned n_blocks);
-
-/*
- * Release a reference obtained with dm_bufio_{read,get,new}. The data
- * pointer and dm_buffer pointer is no longer valid after this call.
- */
-void dm_bufio_release(struct dm_buffer *b);
-
-/*
- * Mark a buffer dirty. It should be called after the buffer is modified.
- *
- * In case of memory pressure, the buffer may be written after
- * dm_bufio_mark_buffer_dirty, but before dm_bufio_write_dirty_buffers.  So
- * dm_bufio_write_dirty_buffers guarantees that the buffer is on-disk but
- * the actual writing may occur earlier.
- */
-void dm_bufio_mark_buffer_dirty(struct dm_buffer *b);
-
-/*
- * Initiate writing of dirty buffers, without waiting for completion.
- */
-void dm_bufio_write_dirty_buffers_async(struct dm_bufio_client *c);
-
-/*
- * Write all dirty buffers. Guarantees that all dirty buffers created prior
- * to this call are on disk when this call exits.
- */
-int dm_bufio_write_dirty_buffers(struct dm_bufio_client *c);
-
-/*
- * Send an empty write barrier to the device to flush hardware disk cache.
- */
-int dm_bufio_issue_flush(struct dm_bufio_client *c);
-
-/*
- * Like dm_bufio_release but also move the buffer to the new
- * block. dm_bufio_write_dirty_buffers is needed to commit the new block.
- */
-void dm_bufio_release_move(struct dm_buffer *b, sector_t new_block);
-
-unsigned dm_bufio_get_block_size(struct dm_bufio_client *c);
-sector_t dm_bufio_get_device_size(struct dm_bufio_client *c);
-sector_t dm_bufio_get_block_number(struct dm_buffer *b);
-void *dm_bufio_get_block_data(struct dm_buffer *b);
-void *dm_bufio_get_aux_data(struct dm_buffer *b);
-struct dm_bufio_client *dm_bufio_get_client(struct dm_buffer *b);
-
-/*----------------------------------------------------------------*/
-
-#endif
diff --git a/ANDROID_3.4.5/drivers/md/dm-crypt.c b/ANDROID_3.4.5/drivers/md/dm-crypt.c
deleted file mode 100644
index 3f06df59..00000000
--- a/ANDROID_3.4.5/drivers/md/dm-crypt.c
+++ /dev/null
@@ -1,1914 +0,0 @@
-/*
- * Copyright (C) 2003 Christophe Saout <christophe@saout.de>
- * Copyright (C) 2004 Clemens Fruhwirth <clemens@endorphin.org>
- * Copyright (C) 2006-2009 Red Hat, Inc. All rights reserved.
- *
- * This file is released under the GPL.
- */
-
-#include <linux/completion.h>
-#include <linux/err.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/bio.h>
-#include <linux/blkdev.h>
-#include <linux/mempool.h>
-#include <linux/slab.h>
-#include <linux/crypto.h>
-#include <linux/workqueue.h>
-#include <linux/backing-dev.h>
-#include <linux/percpu.h>
-#include <linux/atomic.h>
-#include <linux/scatterlist.h>
-#include <asm/page.h>
-#include <asm/unaligned.h>
-#include <crypto/hash.h>
-#include <crypto/md5.h>
-#include <crypto/algapi.h>
-
-#include <linux/device-mapper.h>
-
-#define DM_MSG_PREFIX "crypt"
-
-/*
- * context holding the current state of a multi-part conversion
- */
-struct convert_context {
-	struct completion restart;
-	struct bio *bio_in;
-	struct bio *bio_out;
-	unsigned int offset_in;
-	unsigned int offset_out;
-	unsigned int idx_in;
-	unsigned int idx_out;
-	sector_t sector;
-	atomic_t pending;
-};
-
-/*
- * per bio private data
- */
-struct dm_crypt_io {
-	struct dm_target *target;
-	struct bio *base_bio;
-	struct work_struct work;
-
-	struct convert_context ctx;
-
-	atomic_t pending;
-	int error;
-	sector_t sector;
-	struct dm_crypt_io *base_io;
-};
-
-struct dm_crypt_request {
-	struct convert_context *ctx;
-	struct scatterlist sg_in;
-	struct scatterlist sg_out;
-	sector_t iv_sector;
-};
-
-struct crypt_config;
-
-struct crypt_iv_operations {
-	int (*ctr)(struct crypt_config *cc, struct dm_target *ti,
-		   const char *opts);
-	void (*dtr)(struct crypt_config *cc);
-	int (*init)(struct crypt_config *cc);
-	int (*wipe)(struct crypt_config *cc);
-	int (*generator)(struct crypt_config *cc, u8 *iv,
-			 struct dm_crypt_request *dmreq);
-	int (*post)(struct crypt_config *cc, u8 *iv,
-		    struct dm_crypt_request *dmreq);
-};
-
-struct iv_essiv_private {
-	struct crypto_hash *hash_tfm;
-	u8 *salt;
-};
-
-struct iv_benbi_private {
-	int shift;
-};
-
-#define LMK_SEED_SIZE 64 /* hash + 0 */
-struct iv_lmk_private {
-	struct crypto_shash *hash_tfm;
-	u8 *seed;
-};
-
-/*
- * Crypt: maps a linear range of a block device
- * and encrypts / decrypts at the same time.
- */
-enum flags { DM_CRYPT_SUSPENDED, DM_CRYPT_KEY_VALID };
-
-/*
- * Duplicated per-CPU state for cipher.
- */
-struct crypt_cpu {
-	struct ablkcipher_request *req;
-	/* ESSIV: struct crypto_cipher *essiv_tfm */
-	void *iv_private;
-	struct crypto_ablkcipher *tfms[0];
-};
-
-/*
- * The fields in here must be read only after initialization,
- * changing state should be in crypt_cpu.
- */
-struct crypt_config {
-	struct dm_dev *dev;
-	sector_t start;
-
-	/*
-	 * pool for per bio private data, crypto requests and
-	 * encryption requeusts/buffer pages
-	 */
-	mempool_t *io_pool;
-	mempool_t *req_pool;
-	mempool_t *page_pool;
-	struct bio_set *bs;
-
-	struct workqueue_struct *io_queue;
-	struct workqueue_struct *crypt_queue;
-
-	char *cipher;
-	char *cipher_string;
-
-	struct crypt_iv_operations *iv_gen_ops;
-	union {
-		struct iv_essiv_private essiv;
-		struct iv_benbi_private benbi;
-		struct iv_lmk_private lmk;
-	} iv_gen_private;
-	sector_t iv_offset;
-	unsigned int iv_size;
-
-	/*
-	 * Duplicated per cpu state. Access through
-	 * per_cpu_ptr() only.
-	 */
-	struct crypt_cpu __percpu *cpu;
-	unsigned tfms_count;
-
-	/*
-	 * Layout of each crypto request:
-	 *
-	 *   struct ablkcipher_request
-	 *      context
-	 *      padding
-	 *   struct dm_crypt_request
-	 *      padding
-	 *   IV
-	 *
-	 * The padding is added so that dm_crypt_request and the IV are
-	 * correctly aligned.
-	 */
-	unsigned int dmreq_start;
-
-	unsigned long flags;
-	unsigned int key_size;
-	unsigned int key_parts;
-	u8 key[0];
-};
-
-#define MIN_IOS        16
-#define MIN_POOL_PAGES 32
-
-static struct kmem_cache *_crypt_io_pool;
-
-static void clone_init(struct dm_crypt_io *, struct bio *);
-static void kcryptd_queue_crypt(struct dm_crypt_io *io);
-static u8 *iv_of_dmreq(struct crypt_config *cc, struct dm_crypt_request *dmreq);
-
-static struct crypt_cpu *this_crypt_config(struct crypt_config *cc)
-{
-	return this_cpu_ptr(cc->cpu);
-}
-
-/*
- * Use this to access cipher attributes that are the same for each CPU.
- */
-static struct crypto_ablkcipher *any_tfm(struct crypt_config *cc)
-{
-	return __this_cpu_ptr(cc->cpu)->tfms[0];
-}
-
-/*
- * Different IV generation algorithms:
- *
- * plain: the initial vector is the 32-bit little-endian version of the sector
- *        number, padded with zeros if necessary.
- *
- * plain64: the initial vector is the 64-bit little-endian version of the sector
- *        number, padded with zeros if necessary.
- *
- * essiv: "encrypted sector|salt initial vector", the sector number is
- *        encrypted with the bulk cipher using a salt as key. The salt
- *        should be derived from the bulk cipher's key via hashing.
- *
- * benbi: the 64-bit "big-endian 'narrow block'-count", starting at 1
- *        (needed for LRW-32-AES and possible other narrow block modes)
- *
- * null: the initial vector is always zero.  Provides compatibility with
- *       obsolete loop_fish2 devices.  Do not use for new devices.
- *
- * lmk:  Compatible implementation of the block chaining mode used
- *       by the Loop-AES block device encryption system
- *       designed by Jari Ruusu. See http://loop-aes.sourceforge.net/
- *       It operates on full 512 byte sectors and uses CBC
- *       with an IV derived from the sector number, the data and
- *       optionally extra IV seed.
- *       This means that after decryption the first block
- *       of sector must be tweaked according to decrypted data.
- *       Loop-AES can use three encryption schemes:
- *         version 1: is plain aes-cbc mode
- *         version 2: uses 64 multikey scheme with lmk IV generator
- *         version 3: the same as version 2 with additional IV seed
- *                   (it uses 65 keys, last key is used as IV seed)
- *
- * plumb: unimplemented, see:
- * http://article.gmane.org/gmane.linux.kernel.device-mapper.dm-crypt/454
- */
-
-static int crypt_iv_plain_gen(struct crypt_config *cc, u8 *iv,
-			      struct dm_crypt_request *dmreq)
-{
-	memset(iv, 0, cc->iv_size);
-	*(__le32 *)iv = cpu_to_le32(dmreq->iv_sector & 0xffffffff);
-
-	return 0;
-}
-
-static int crypt_iv_plain64_gen(struct crypt_config *cc, u8 *iv,
-				struct dm_crypt_request *dmreq)
-{
-	memset(iv, 0, cc->iv_size);
-	*(__le64 *)iv = cpu_to_le64(dmreq->iv_sector);
-
-	return 0;
-}
-
-/* Initialise ESSIV - compute salt but no local memory allocations */
-static int crypt_iv_essiv_init(struct crypt_config *cc)
-{
-	struct iv_essiv_private *essiv = &cc->iv_gen_private.essiv;
-	struct hash_desc desc;
-	struct scatterlist sg;
-	struct crypto_cipher *essiv_tfm;
-	int err, cpu;
-
-	sg_init_one(&sg, cc->key, cc->key_size);
-	desc.tfm = essiv->hash_tfm;
-	desc.flags = CRYPTO_TFM_REQ_MAY_SLEEP;
-
-	err = crypto_hash_digest(&desc, &sg, cc->key_size, essiv->salt);
-	if (err)
-		return err;
-
-	for_each_possible_cpu(cpu) {
-		essiv_tfm = per_cpu_ptr(cc->cpu, cpu)->iv_private,
-
-		err = crypto_cipher_setkey(essiv_tfm, essiv->salt,
-				    crypto_hash_digestsize(essiv->hash_tfm));
-		if (err)
-			return err;
-	}
-
-	return 0;
-}
-
-/* Wipe salt and reset key derived from volume key */
-static int crypt_iv_essiv_wipe(struct crypt_config *cc)
-{
-	struct iv_essiv_private *essiv = &cc->iv_gen_private.essiv;
-	unsigned salt_size = crypto_hash_digestsize(essiv->hash_tfm);
-	struct crypto_cipher *essiv_tfm;
-	int cpu, r, err = 0;
-
-	memset(essiv->salt, 0, salt_size);
-
-	for_each_possible_cpu(cpu) {
-		essiv_tfm = per_cpu_ptr(cc->cpu, cpu)->iv_private;
-		r = crypto_cipher_setkey(essiv_tfm, essiv->salt, salt_size);
-		if (r)
-			err = r;
-	}
-
-	return err;
-}
-
-/* Set up per cpu cipher state */
-static struct crypto_cipher *setup_essiv_cpu(struct crypt_config *cc,
-					     struct dm_target *ti,
-					     u8 *salt, unsigned saltsize)
-{
-	struct crypto_cipher *essiv_tfm;
-	int err;
-
-	/* Setup the essiv_tfm with the given salt */
-	essiv_tfm = crypto_alloc_cipher(cc->cipher, 0, CRYPTO_ALG_ASYNC);
-	if (IS_ERR(essiv_tfm)) {
-		ti->error = "Error allocating crypto tfm for ESSIV";
-		return essiv_tfm;
-	}
-
-	if (crypto_cipher_blocksize(essiv_tfm) !=
-	    crypto_ablkcipher_ivsize(any_tfm(cc))) {
-		ti->error = "Block size of ESSIV cipher does "
-			    "not match IV size of block cipher";
-		crypto_free_cipher(essiv_tfm);
-		return ERR_PTR(-EINVAL);
-	}
-
-	err = crypto_cipher_setkey(essiv_tfm, salt, saltsize);
-	if (err) {
-		ti->error = "Failed to set key for ESSIV cipher";
-		crypto_free_cipher(essiv_tfm);
-		return ERR_PTR(err);
-	}
-
-	return essiv_tfm;
-}
-
-static void crypt_iv_essiv_dtr(struct crypt_config *cc)
-{
-	int cpu;
-	struct crypt_cpu *cpu_cc;
-	struct crypto_cipher *essiv_tfm;
-	struct iv_essiv_private *essiv = &cc->iv_gen_private.essiv;
-
-	crypto_free_hash(essiv->hash_tfm);
-	essiv->hash_tfm = NULL;
-
-	kzfree(essiv->salt);
-	essiv->salt = NULL;
-
-	for_each_possible_cpu(cpu) {
-		cpu_cc = per_cpu_ptr(cc->cpu, cpu);
-		essiv_tfm = cpu_cc->iv_private;
-
-		if (essiv_tfm)
-			crypto_free_cipher(essiv_tfm);
-
-		cpu_cc->iv_private = NULL;
-	}
-}
-
-static int crypt_iv_essiv_ctr(struct crypt_config *cc, struct dm_target *ti,
-			      const char *opts)
-{
-	struct crypto_cipher *essiv_tfm = NULL;
-	struct crypto_hash *hash_tfm = NULL;
-	u8 *salt = NULL;
-	int err, cpu;
-
-	if (!opts) {
-		ti->error = "Digest algorithm missing for ESSIV mode";
-		return -EINVAL;
-	}
-
-	/* Allocate hash algorithm */
-	hash_tfm = crypto_alloc_hash(opts, 0, CRYPTO_ALG_ASYNC);
-	if (IS_ERR(hash_tfm)) {
-		ti->error = "Error initializing ESSIV hash";
-		err = PTR_ERR(hash_tfm);
-		goto bad;
-	}
-
-	salt = kzalloc(crypto_hash_digestsize(hash_tfm), GFP_KERNEL);
-	if (!salt) {
-		ti->error = "Error kmallocing salt storage in ESSIV";
-		err = -ENOMEM;
-		goto bad;
-	}
-
-	cc->iv_gen_private.essiv.salt = salt;
-	cc->iv_gen_private.essiv.hash_tfm = hash_tfm;
-
-	for_each_possible_cpu(cpu) {
-		essiv_tfm = setup_essiv_cpu(cc, ti, salt,
-					crypto_hash_digestsize(hash_tfm));
-		if (IS_ERR(essiv_tfm)) {
-			crypt_iv_essiv_dtr(cc);
-			return PTR_ERR(essiv_tfm);
-		}
-		per_cpu_ptr(cc->cpu, cpu)->iv_private = essiv_tfm;
-	}
-
-	return 0;
-
-bad:
-	if (hash_tfm && !IS_ERR(hash_tfm))
-		crypto_free_hash(hash_tfm);
-	kfree(salt);
-	return err;
-}
-
-static int crypt_iv_essiv_gen(struct crypt_config *cc, u8 *iv,
-			      struct dm_crypt_request *dmreq)
-{
-	struct crypto_cipher *essiv_tfm = this_crypt_config(cc)->iv_private;
-
-	memset(iv, 0, cc->iv_size);
-	*(__le64 *)iv = cpu_to_le64(dmreq->iv_sector);
-	crypto_cipher_encrypt_one(essiv_tfm, iv, iv);
-
-	return 0;
-}
-
-static int crypt_iv_benbi_ctr(struct crypt_config *cc, struct dm_target *ti,
-			      const char *opts)
-{
-	unsigned bs = crypto_ablkcipher_blocksize(any_tfm(cc));
-	int log = ilog2(bs);
-
-	/* we need to calculate how far we must shift the sector count
-	 * to get the cipher block count, we use this shift in _gen */
-
-	if (1 << log != bs) {
-		ti->error = "cypher blocksize is not a power of 2";
-		return -EINVAL;
-	}
-
-	if (log > 9) {
-		ti->error = "cypher blocksize is > 512";
-		return -EINVAL;
-	}
-
-	cc->iv_gen_private.benbi.shift = 9 - log;
-
-	return 0;
-}
-
-static void crypt_iv_benbi_dtr(struct crypt_config *cc)
-{
-}
-
-static int crypt_iv_benbi_gen(struct crypt_config *cc, u8 *iv,
-			      struct dm_crypt_request *dmreq)
-{
-	__be64 val;
-
-	memset(iv, 0, cc->iv_size - sizeof(u64)); /* rest is cleared below */
-
-	val = cpu_to_be64(((u64)dmreq->iv_sector << cc->iv_gen_private.benbi.shift) + 1);
-	put_unaligned(val, (__be64 *)(iv + cc->iv_size - sizeof(u64)));
-
-	return 0;
-}
-
-static int crypt_iv_null_gen(struct crypt_config *cc, u8 *iv,
-			     struct dm_crypt_request *dmreq)
-{
-	memset(iv, 0, cc->iv_size);
-
-	return 0;
-}
-
-static void crypt_iv_lmk_dtr(struct crypt_config *cc)
-{
-	struct iv_lmk_private *lmk = &cc->iv_gen_private.lmk;
-
-	if (lmk->hash_tfm && !IS_ERR(lmk->hash_tfm))
-		crypto_free_shash(lmk->hash_tfm);
-	lmk->hash_tfm = NULL;
-
-	kzfree(lmk->seed);
-	lmk->seed = NULL;
-}
-
-static int crypt_iv_lmk_ctr(struct crypt_config *cc, struct dm_target *ti,
-			    const char *opts)
-{
-	struct iv_lmk_private *lmk = &cc->iv_gen_private.lmk;
-
-	lmk->hash_tfm = crypto_alloc_shash("md5", 0, 0);
-	if (IS_ERR(lmk->hash_tfm)) {
-		ti->error = "Error initializing LMK hash";
-		return PTR_ERR(lmk->hash_tfm);
-	}
-
-	/* No seed in LMK version 2 */
-	if (cc->key_parts == cc->tfms_count) {
-		lmk->seed = NULL;
-		return 0;
-	}
-
-	lmk->seed = kzalloc(LMK_SEED_SIZE, GFP_KERNEL);
-	if (!lmk->seed) {
-		crypt_iv_lmk_dtr(cc);
-		ti->error = "Error kmallocing seed storage in LMK";
-		return -ENOMEM;
-	}
-
-	return 0;
-}
-
-static int crypt_iv_lmk_init(struct crypt_config *cc)
-{
-	struct iv_lmk_private *lmk = &cc->iv_gen_private.lmk;
-	int subkey_size = cc->key_size / cc->key_parts;
-
-	/* LMK seed is on the position of LMK_KEYS + 1 key */
-	if (lmk->seed)
-		memcpy(lmk->seed, cc->key + (cc->tfms_count * subkey_size),
-		       crypto_shash_digestsize(lmk->hash_tfm));
-
-	return 0;
-}
-
-static int crypt_iv_lmk_wipe(struct crypt_config *cc)
-{
-	struct iv_lmk_private *lmk = &cc->iv_gen_private.lmk;
-
-	if (lmk->seed)
-		memset(lmk->seed, 0, LMK_SEED_SIZE);
-
-	return 0;
-}
-
-static int crypt_iv_lmk_one(struct crypt_config *cc, u8 *iv,
-			    struct dm_crypt_request *dmreq,
-			    u8 *data)
-{
-	struct iv_lmk_private *lmk = &cc->iv_gen_private.lmk;
-	struct {
-		struct shash_desc desc;
-		char ctx[crypto_shash_descsize(lmk->hash_tfm)];
-	} sdesc;
-	struct md5_state md5state;
-	u32 buf[4];
-	int i, r;
-
-	sdesc.desc.tfm = lmk->hash_tfm;
-	sdesc.desc.flags = CRYPTO_TFM_REQ_MAY_SLEEP;
-
-	r = crypto_shash_init(&sdesc.desc);
-	if (r)
-		return r;
-
-	if (lmk->seed) {
-		r = crypto_shash_update(&sdesc.desc, lmk->seed, LMK_SEED_SIZE);
-		if (r)
-			return r;
-	}
-
-	/* Sector is always 512B, block size 16, add data of blocks 1-31 */
-	r = crypto_shash_update(&sdesc.desc, data + 16, 16 * 31);
-	if (r)
-		return r;
-
-	/* Sector is cropped to 56 bits here */
-	buf[0] = cpu_to_le32(dmreq->iv_sector & 0xFFFFFFFF);
-	buf[1] = cpu_to_le32((((u64)dmreq->iv_sector >> 32) & 0x00FFFFFF) | 0x80000000);
-	buf[2] = cpu_to_le32(4024);
-	buf[3] = 0;
-	r = crypto_shash_update(&sdesc.desc, (u8 *)buf, sizeof(buf));
-	if (r)
-		return r;
-
-	/* No MD5 padding here */
-	r = crypto_shash_export(&sdesc.desc, &md5state);
-	if (r)
-		return r;
-
-	for (i = 0; i < MD5_HASH_WORDS; i++)
-		__cpu_to_le32s(&md5state.hash[i]);
-	memcpy(iv, &md5state.hash, cc->iv_size);
-
-	return 0;
-}
-
-static int crypt_iv_lmk_gen(struct crypt_config *cc, u8 *iv,
-			    struct dm_crypt_request *dmreq)
-{
-	u8 *src;
-	int r = 0;
-
-	if (bio_data_dir(dmreq->ctx->bio_in) == WRITE) {
-		src = kmap_atomic(sg_page(&dmreq->sg_in));
-		r = crypt_iv_lmk_one(cc, iv, dmreq, src + dmreq->sg_in.offset);
-		kunmap_atomic(src);
-	} else
-		memset(iv, 0, cc->iv_size);
-
-	return r;
-}
-
-static int crypt_iv_lmk_post(struct crypt_config *cc, u8 *iv,
-			     struct dm_crypt_request *dmreq)
-{
-	u8 *dst;
-	int r;
-
-	if (bio_data_dir(dmreq->ctx->bio_in) == WRITE)
-		return 0;
-
-	dst = kmap_atomic(sg_page(&dmreq->sg_out));
-	r = crypt_iv_lmk_one(cc, iv, dmreq, dst + dmreq->sg_out.offset);
-
-	/* Tweak the first block of plaintext sector */
-	if (!r)
-		crypto_xor(dst + dmreq->sg_out.offset, iv, cc->iv_size);
-
-	kunmap_atomic(dst);
-	return r;
-}
-
-static struct crypt_iv_operations crypt_iv_plain_ops = {
-	.generator = crypt_iv_plain_gen
-};
-
-static struct crypt_iv_operations crypt_iv_plain64_ops = {
-	.generator = crypt_iv_plain64_gen
-};
-
-static struct crypt_iv_operations crypt_iv_essiv_ops = {
-	.ctr       = crypt_iv_essiv_ctr,
-	.dtr       = crypt_iv_essiv_dtr,
-	.init      = crypt_iv_essiv_init,
-	.wipe      = crypt_iv_essiv_wipe,
-	.generator = crypt_iv_essiv_gen
-};
-
-static struct crypt_iv_operations crypt_iv_benbi_ops = {
-	.ctr	   = crypt_iv_benbi_ctr,
-	.dtr	   = crypt_iv_benbi_dtr,
-	.generator = crypt_iv_benbi_gen
-};
-
-static struct crypt_iv_operations crypt_iv_null_ops = {
-	.generator = crypt_iv_null_gen
-};
-
-static struct crypt_iv_operations crypt_iv_lmk_ops = {
-	.ctr	   = crypt_iv_lmk_ctr,
-	.dtr	   = crypt_iv_lmk_dtr,
-	.init	   = crypt_iv_lmk_init,
-	.wipe	   = crypt_iv_lmk_wipe,
-	.generator = crypt_iv_lmk_gen,
-	.post	   = crypt_iv_lmk_post
-};
-
-static void crypt_convert_init(struct crypt_config *cc,
-			       struct convert_context *ctx,
-			       struct bio *bio_out, struct bio *bio_in,
-			       sector_t sector)
-{
-	ctx->bio_in = bio_in;
-	ctx->bio_out = bio_out;
-	ctx->offset_in = 0;
-	ctx->offset_out = 0;
-	ctx->idx_in = bio_in ? bio_in->bi_idx : 0;
-	ctx->idx_out = bio_out ? bio_out->bi_idx : 0;
-	ctx->sector = sector + cc->iv_offset;
-	init_completion(&ctx->restart);
-}
-
-static struct dm_crypt_request *dmreq_of_req(struct crypt_config *cc,
-					     struct ablkcipher_request *req)
-{
-	return (struct dm_crypt_request *)((char *)req + cc->dmreq_start);
-}
-
-static struct ablkcipher_request *req_of_dmreq(struct crypt_config *cc,
-					       struct dm_crypt_request *dmreq)
-{
-	return (struct ablkcipher_request *)((char *)dmreq - cc->dmreq_start);
-}
-
-static u8 *iv_of_dmreq(struct crypt_config *cc,
-		       struct dm_crypt_request *dmreq)
-{
-	return (u8 *)ALIGN((unsigned long)(dmreq + 1),
-		crypto_ablkcipher_alignmask(any_tfm(cc)) + 1);
-}
-
-static int crypt_convert_block(struct crypt_config *cc,
-			       struct convert_context *ctx,
-			       struct ablkcipher_request *req)
-{
-	struct bio_vec *bv_in = bio_iovec_idx(ctx->bio_in, ctx->idx_in);
-	struct bio_vec *bv_out = bio_iovec_idx(ctx->bio_out, ctx->idx_out);
-	struct dm_crypt_request *dmreq;
-	u8 *iv;
-	int r = 0;
-
-	dmreq = dmreq_of_req(cc, req);
-	iv = iv_of_dmreq(cc, dmreq);
-
-	dmreq->iv_sector = ctx->sector;
-	dmreq->ctx = ctx;
-	sg_init_table(&dmreq->sg_in, 1);
-	sg_set_page(&dmreq->sg_in, bv_in->bv_page, 1 << SECTOR_SHIFT,
-		    bv_in->bv_offset + ctx->offset_in);
-
-	sg_init_table(&dmreq->sg_out, 1);
-	sg_set_page(&dmreq->sg_out, bv_out->bv_page, 1 << SECTOR_SHIFT,
-		    bv_out->bv_offset + ctx->offset_out);
-
-	ctx->offset_in += 1 << SECTOR_SHIFT;
-	if (ctx->offset_in >= bv_in->bv_len) {
-		ctx->offset_in = 0;
-		ctx->idx_in++;
-	}
-
-	ctx->offset_out += 1 << SECTOR_SHIFT;
-	if (ctx->offset_out >= bv_out->bv_len) {
-		ctx->offset_out = 0;
-		ctx->idx_out++;
-	}
-
-	if (cc->iv_gen_ops) {
-		r = cc->iv_gen_ops->generator(cc, iv, dmreq);
-		if (r < 0)
-			return r;
-	}
-
-	ablkcipher_request_set_crypt(req, &dmreq->sg_in, &dmreq->sg_out,
-				     1 << SECTOR_SHIFT, iv);
-
-	if (bio_data_dir(ctx->bio_in) == WRITE)
-		r = crypto_ablkcipher_encrypt(req);
-	else
-		r = crypto_ablkcipher_decrypt(req);
-
-	if (!r && cc->iv_gen_ops && cc->iv_gen_ops->post)
-		r = cc->iv_gen_ops->post(cc, iv, dmreq);
-
-	return r;
-}
-
-static void kcryptd_async_done(struct crypto_async_request *async_req,
-			       int error);
-
-static void crypt_alloc_req(struct crypt_config *cc,
-			    struct convert_context *ctx)
-{
-	struct crypt_cpu *this_cc = this_crypt_config(cc);
-	unsigned key_index = ctx->sector & (cc->tfms_count - 1);
-
-	if (!this_cc->req)
-		this_cc->req = mempool_alloc(cc->req_pool, GFP_NOIO);
-
-	ablkcipher_request_set_tfm(this_cc->req, this_cc->tfms[key_index]);
-	ablkcipher_request_set_callback(this_cc->req,
-	    CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP,
-	    kcryptd_async_done, dmreq_of_req(cc, this_cc->req));
-}
-
-/*
- * Encrypt / decrypt data from one bio to another one (can be the same one)
- */
-static int crypt_convert(struct crypt_config *cc,
-			 struct convert_context *ctx)
-{
-	struct crypt_cpu *this_cc = this_crypt_config(cc);
-	int r;
-
-	atomic_set(&ctx->pending, 1);
-
-	while(ctx->idx_in < ctx->bio_in->bi_vcnt &&
-	      ctx->idx_out < ctx->bio_out->bi_vcnt) {
-
-		crypt_alloc_req(cc, ctx);
-
-		atomic_inc(&ctx->pending);
-
-		r = crypt_convert_block(cc, ctx, this_cc->req);
-
-		switch (r) {
-		/* async */
-		case -EBUSY:
-			wait_for_completion(&ctx->restart);
-			INIT_COMPLETION(ctx->restart);
-			/* fall through*/
-		case -EINPROGRESS:
-			this_cc->req = NULL;
-			ctx->sector++;
-			continue;
-
-		/* sync */
-		case 0:
-			atomic_dec(&ctx->pending);
-			ctx->sector++;
-			cond_resched();
-			continue;
-
-		/* error */
-		default:
-			atomic_dec(&ctx->pending);
-			return r;
-		}
-	}
-
-	return 0;
-}
-
-static void dm_crypt_bio_destructor(struct bio *bio)
-{
-	struct dm_crypt_io *io = bio->bi_private;
-	struct crypt_config *cc = io->target->private;
-
-	bio_free(bio, cc->bs);
-}
-
-/*
- * Generate a new unfragmented bio with the given size
- * This should never violate the device limitations
- * May return a smaller bio when running out of pages, indicated by
- * *out_of_pages set to 1.
- */
-static struct bio *crypt_alloc_buffer(struct dm_crypt_io *io, unsigned size,
-				      unsigned *out_of_pages)
-{
-	struct crypt_config *cc = io->target->private;
-	struct bio *clone;
-	unsigned int nr_iovecs = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
-	gfp_t gfp_mask = GFP_NOIO | __GFP_HIGHMEM;
-	unsigned i, len;
-	struct page *page;
-
-	clone = bio_alloc_bioset(GFP_NOIO, nr_iovecs, cc->bs);
-	if (!clone)
-		return NULL;
-
-	clone_init(io, clone);
-	*out_of_pages = 0;
-
-	for (i = 0; i < nr_iovecs; i++) {
-		page = mempool_alloc(cc->page_pool, gfp_mask);
-		if (!page) {
-			*out_of_pages = 1;
-			break;
-		}
-
-		/*
-		 * If additional pages cannot be allocated without waiting,
-		 * return a partially-allocated bio.  The caller will then try
-		 * to allocate more bios while submitting this partial bio.
-		 */
-		gfp_mask = (gfp_mask | __GFP_NOWARN) & ~__GFP_WAIT;
-
-		len = (size > PAGE_SIZE) ? PAGE_SIZE : size;
-
-		if (!bio_add_page(clone, page, len, 0)) {
-			mempool_free(page, cc->page_pool);
-			break;
-		}
-
-		size -= len;
-	}
-
-	if (!clone->bi_size) {
-		bio_put(clone);
-		return NULL;
-	}
-
-	return clone;
-}
-
-static void crypt_free_buffer_pages(struct crypt_config *cc, struct bio *clone)
-{
-	unsigned int i;
-	struct bio_vec *bv;
-
-	for (i = 0; i < clone->bi_vcnt; i++) {
-		bv = bio_iovec_idx(clone, i);
-		BUG_ON(!bv->bv_page);
-		mempool_free(bv->bv_page, cc->page_pool);
-		bv->bv_page = NULL;
-	}
-}
-
-static struct dm_crypt_io *crypt_io_alloc(struct dm_target *ti,
-					  struct bio *bio, sector_t sector)
-{
-	struct crypt_config *cc = ti->private;
-	struct dm_crypt_io *io;
-
-	io = mempool_alloc(cc->io_pool, GFP_NOIO);
-	io->target = ti;
-	io->base_bio = bio;
-	io->sector = sector;
-	io->error = 0;
-	io->base_io = NULL;
-	atomic_set(&io->pending, 0);
-
-	return io;
-}
-
-static void crypt_inc_pending(struct dm_crypt_io *io)
-{
-	atomic_inc(&io->pending);
-}
-
-/*
- * One of the bios was finished. Check for completion of
- * the whole request and correctly clean up the buffer.
- * If base_io is set, wait for the last fragment to complete.
- */
-static void crypt_dec_pending(struct dm_crypt_io *io)
-{
-	struct crypt_config *cc = io->target->private;
-	struct bio *base_bio = io->base_bio;
-	struct dm_crypt_io *base_io = io->base_io;
-	int error = io->error;
-
-	if (!atomic_dec_and_test(&io->pending))
-		return;
-
-	mempool_free(io, cc->io_pool);
-
-	if (likely(!base_io))
-		bio_endio(base_bio, error);
-	else {
-		if (error && !base_io->error)
-			base_io->error = error;
-		crypt_dec_pending(base_io);
-	}
-}
-
-/*
- * kcryptd/kcryptd_io:
- *
- * Needed because it would be very unwise to do decryption in an
- * interrupt context.
- *
- * kcryptd performs the actual encryption or decryption.
- *
- * kcryptd_io performs the IO submission.
- *
- * They must be separated as otherwise the final stages could be
- * starved by new requests which can block in the first stages due
- * to memory allocation.
- *
- * The work is done per CPU global for all dm-crypt instances.
- * They should not depend on each other and do not block.
- */
-static void crypt_endio(struct bio *clone, int error)
-{
-	struct dm_crypt_io *io = clone->bi_private;
-	struct crypt_config *cc = io->target->private;
-	unsigned rw = bio_data_dir(clone);
-
-	if (unlikely(!bio_flagged(clone, BIO_UPTODATE) && !error))
-		error = -EIO;
-
-	/*
-	 * free the processed pages
-	 */
-	if (rw == WRITE)
-		crypt_free_buffer_pages(cc, clone);
-
-	bio_put(clone);
-
-	if (rw == READ && !error) {
-		kcryptd_queue_crypt(io);
-		return;
-	}
-
-	if (unlikely(error))
-		io->error = error;
-
-	crypt_dec_pending(io);
-}
-
-static void clone_init(struct dm_crypt_io *io, struct bio *clone)
-{
-	struct crypt_config *cc = io->target->private;
-
-	clone->bi_private = io;
-	clone->bi_end_io  = crypt_endio;
-	clone->bi_bdev    = cc->dev->bdev;
-	clone->bi_rw      = io->base_bio->bi_rw;
-	clone->bi_destructor = dm_crypt_bio_destructor;
-}
-
-static int kcryptd_io_read(struct dm_crypt_io *io, gfp_t gfp)
-{
-	struct crypt_config *cc = io->target->private;
-	struct bio *base_bio = io->base_bio;
-	struct bio *clone;
-
-	/*
-	 * The block layer might modify the bvec array, so always
-	 * copy the required bvecs because we need the original
-	 * one in order to decrypt the whole bio data *afterwards*.
-	 */
-	clone = bio_alloc_bioset(gfp, bio_segments(base_bio), cc->bs);
-	if (!clone)
-		return 1;
-
-	crypt_inc_pending(io);
-
-	clone_init(io, clone);
-	clone->bi_idx = 0;
-	clone->bi_vcnt = bio_segments(base_bio);
-	clone->bi_size = base_bio->bi_size;
-	clone->bi_sector = cc->start + io->sector;
-	memcpy(clone->bi_io_vec, bio_iovec(base_bio),
-	       sizeof(struct bio_vec) * clone->bi_vcnt);
-
-	generic_make_request(clone);
-	return 0;
-}
-
-static void kcryptd_io_write(struct dm_crypt_io *io)
-{
-	struct bio *clone = io->ctx.bio_out;
-	generic_make_request(clone);
-}
-
-static void kcryptd_io(struct work_struct *work)
-{
-	struct dm_crypt_io *io = container_of(work, struct dm_crypt_io, work);
-
-	if (bio_data_dir(io->base_bio) == READ) {
-		crypt_inc_pending(io);
-		if (kcryptd_io_read(io, GFP_NOIO))
-			io->error = -ENOMEM;
-		crypt_dec_pending(io);
-	} else
-		kcryptd_io_write(io);
-}
-
-static void kcryptd_queue_io(struct dm_crypt_io *io)
-{
-	struct crypt_config *cc = io->target->private;
-
-	INIT_WORK(&io->work, kcryptd_io);
-	queue_work(cc->io_queue, &io->work);
-}
-
-static void kcryptd_crypt_write_io_submit(struct dm_crypt_io *io, int async)
-{
-	struct bio *clone = io->ctx.bio_out;
-	struct crypt_config *cc = io->target->private;
-
-	if (unlikely(io->error < 0)) {
-		crypt_free_buffer_pages(cc, clone);
-		bio_put(clone);
-		crypt_dec_pending(io);
-		return;
-	}
-
-	/* crypt_convert should have filled the clone bio */
-	BUG_ON(io->ctx.idx_out < clone->bi_vcnt);
-
-	clone->bi_sector = cc->start + io->sector;
-
-	if (async)
-		kcryptd_queue_io(io);
-	else
-		generic_make_request(clone);
-}
-
-static void kcryptd_crypt_write_convert(struct dm_crypt_io *io)
-{
-	struct crypt_config *cc = io->target->private;
-	struct bio *clone;
-	struct dm_crypt_io *new_io;
-	int crypt_finished;
-	unsigned out_of_pages = 0;
-	unsigned remaining = io->base_bio->bi_size;
-	sector_t sector = io->sector;
-	int r;
-
-	/*
-	 * Prevent io from disappearing until this function completes.
-	 */
-	crypt_inc_pending(io);
-	crypt_convert_init(cc, &io->ctx, NULL, io->base_bio, sector);
-
-	/*
-	 * The allocated buffers can be smaller than the whole bio,
-	 * so repeat the whole process until all the data can be handled.
-	 */
-	while (remaining) {
-		clone = crypt_alloc_buffer(io, remaining, &out_of_pages);
-		if (unlikely(!clone)) {
-			io->error = -ENOMEM;
-			break;
-		}
-
-		io->ctx.bio_out = clone;
-		io->ctx.idx_out = 0;
-
-		remaining -= clone->bi_size;
-		sector += bio_sectors(clone);
-
-		crypt_inc_pending(io);
-
-		r = crypt_convert(cc, &io->ctx);
-		if (r < 0)
-			io->error = -EIO;
-
-		crypt_finished = atomic_dec_and_test(&io->ctx.pending);
-
-		/* Encryption was already finished, submit io now */
-		if (crypt_finished) {
-			kcryptd_crypt_write_io_submit(io, 0);
-
-			/*
-			 * If there was an error, do not try next fragments.
-			 * For async, error is processed in async handler.
-			 */
-			if (unlikely(r < 0))
-				break;
-
-			io->sector = sector;
-		}
-
-		/*
-		 * Out of memory -> run queues
-		 * But don't wait if split was due to the io size restriction
-		 */
-		if (unlikely(out_of_pages))
-			congestion_wait(BLK_RW_ASYNC, HZ/100);
-
-		/*
-		 * With async crypto it is unsafe to share the crypto context
-		 * between fragments, so switch to a new dm_crypt_io structure.
-		 */
-		if (unlikely(!crypt_finished && remaining)) {
-			new_io = crypt_io_alloc(io->target, io->base_bio,
-						sector);
-			crypt_inc_pending(new_io);
-			crypt_convert_init(cc, &new_io->ctx, NULL,
-					   io->base_bio, sector);
-			new_io->ctx.idx_in = io->ctx.idx_in;
-			new_io->ctx.offset_in = io->ctx.offset_in;
-
-			/*
-			 * Fragments after the first use the base_io
-			 * pending count.
-			 */
-			if (!io->base_io)
-				new_io->base_io = io;
-			else {
-				new_io->base_io = io->base_io;
-				crypt_inc_pending(io->base_io);
-				crypt_dec_pending(io);
-			}
-
-			io = new_io;
-		}
-	}
-
-	crypt_dec_pending(io);
-}
-
-static void kcryptd_crypt_read_done(struct dm_crypt_io *io)
-{
-	crypt_dec_pending(io);
-}
-
-static void kcryptd_crypt_read_convert(struct dm_crypt_io *io)
-{
-	struct crypt_config *cc = io->target->private;
-	int r = 0;
-
-	crypt_inc_pending(io);
-
-	crypt_convert_init(cc, &io->ctx, io->base_bio, io->base_bio,
-			   io->sector);
-
-	r = crypt_convert(cc, &io->ctx);
-	if (r < 0)
-		io->error = -EIO;
-
-	if (atomic_dec_and_test(&io->ctx.pending))
-		kcryptd_crypt_read_done(io);
-
-	crypt_dec_pending(io);
-}
-
-static void kcryptd_async_done(struct crypto_async_request *async_req,
-			       int error)
-{
-	struct dm_crypt_request *dmreq = async_req->data;
-	struct convert_context *ctx = dmreq->ctx;
-	struct dm_crypt_io *io = container_of(ctx, struct dm_crypt_io, ctx);
-	struct crypt_config *cc = io->target->private;
-
-	if (error == -EINPROGRESS) {
-		complete(&ctx->restart);
-		return;
-	}
-
-	if (!error && cc->iv_gen_ops && cc->iv_gen_ops->post)
-		error = cc->iv_gen_ops->post(cc, iv_of_dmreq(cc, dmreq), dmreq);
-
-	if (error < 0)
-		io->error = -EIO;
-
-	mempool_free(req_of_dmreq(cc, dmreq), cc->req_pool);
-
-	if (!atomic_dec_and_test(&ctx->pending))
-		return;
-
-	if (bio_data_dir(io->base_bio) == READ)
-		kcryptd_crypt_read_done(io);
-	else
-		kcryptd_crypt_write_io_submit(io, 1);
-}
-
-static void kcryptd_crypt(struct work_struct *work)
-{
-	struct dm_crypt_io *io = container_of(work, struct dm_crypt_io, work);
-
-	if (bio_data_dir(io->base_bio) == READ)
-		kcryptd_crypt_read_convert(io);
-	else
-		kcryptd_crypt_write_convert(io);
-}
-
-static void kcryptd_queue_crypt(struct dm_crypt_io *io)
-{
-	struct crypt_config *cc = io->target->private;
-
-	INIT_WORK(&io->work, kcryptd_crypt);
-	queue_work(cc->crypt_queue, &io->work);
-}
-
-/*
- * Decode key from its hex representation
- */
-static int crypt_decode_key(u8 *key, char *hex, unsigned int size)
-{
-	char buffer[3];
-	char *endp;
-	unsigned int i;
-
-	buffer[2] = '\0';
-
-	for (i = 0; i < size; i++) {
-		buffer[0] = *hex++;
-		buffer[1] = *hex++;
-
-		key[i] = (u8)simple_strtoul(buffer, &endp, 16);
-
-		if (endp != &buffer[2])
-			return -EINVAL;
-	}
-
-	if (*hex != '\0')
-		return -EINVAL;
-
-	return 0;
-}
-
-/*
- * Encode key into its hex representation
- */
-static void crypt_encode_key(char *hex, u8 *key, unsigned int size)
-{
-	unsigned int i;
-
-	for (i = 0; i < size; i++) {
-		sprintf(hex, "%02x", *key);
-		hex += 2;
-		key++;
-	}
-}
-
-static void crypt_free_tfms(struct crypt_config *cc, int cpu)
-{
-	struct crypt_cpu *cpu_cc = per_cpu_ptr(cc->cpu, cpu);
-	unsigned i;
-
-	for (i = 0; i < cc->tfms_count; i++)
-		if (cpu_cc->tfms[i] && !IS_ERR(cpu_cc->tfms[i])) {
-			crypto_free_ablkcipher(cpu_cc->tfms[i]);
-			cpu_cc->tfms[i] = NULL;
-		}
-}
-
-static int crypt_alloc_tfms(struct crypt_config *cc, int cpu, char *ciphermode)
-{
-	struct crypt_cpu *cpu_cc = per_cpu_ptr(cc->cpu, cpu);
-	unsigned i;
-	int err;
-
-	for (i = 0; i < cc->tfms_count; i++) {
-		cpu_cc->tfms[i] = crypto_alloc_ablkcipher(ciphermode, 0, 0);
-		if (IS_ERR(cpu_cc->tfms[i])) {
-			err = PTR_ERR(cpu_cc->tfms[i]);
-			crypt_free_tfms(cc, cpu);
-			return err;
-		}
-	}
-
-	return 0;
-}
-
-static int crypt_setkey_allcpus(struct crypt_config *cc)
-{
-	unsigned subkey_size = cc->key_size >> ilog2(cc->tfms_count);
-	int cpu, err = 0, i, r;
-
-	for_each_possible_cpu(cpu) {
-		for (i = 0; i < cc->tfms_count; i++) {
-			r = crypto_ablkcipher_setkey(per_cpu_ptr(cc->cpu, cpu)->tfms[i],
-						     cc->key + (i * subkey_size), subkey_size);
-			if (r)
-				err = r;
-		}
-	}
-
-	return err;
-}
-
-static int crypt_set_key(struct crypt_config *cc, char *key)
-{
-	int r = -EINVAL;
-	int key_string_len = strlen(key);
-
-	/* The key size may not be changed. */
-	if (cc->key_size != (key_string_len >> 1))
-		goto out;
-
-	/* Hyphen (which gives a key_size of zero) means there is no key. */
-	if (!cc->key_size && strcmp(key, "-"))
-		goto out;
-
-	if (cc->key_size && crypt_decode_key(cc->key, key, cc->key_size) < 0)
-		goto out;
-
-	set_bit(DM_CRYPT_KEY_VALID, &cc->flags);
-
-	r = crypt_setkey_allcpus(cc);
-
-out:
-	/* Hex key string not needed after here, so wipe it. */
-	memset(key, '0', key_string_len);
-
-	return r;
-}
-
-static int crypt_wipe_key(struct crypt_config *cc)
-{
-	clear_bit(DM_CRYPT_KEY_VALID, &cc->flags);
-	memset(&cc->key, 0, cc->key_size * sizeof(u8));
-
-	return crypt_setkey_allcpus(cc);
-}
-
-static void crypt_dtr(struct dm_target *ti)
-{
-	struct crypt_config *cc = ti->private;
-	struct crypt_cpu *cpu_cc;
-	int cpu;
-
-	ti->private = NULL;
-
-	if (!cc)
-		return;
-
-	if (cc->io_queue)
-		destroy_workqueue(cc->io_queue);
-	if (cc->crypt_queue)
-		destroy_workqueue(cc->crypt_queue);
-
-	if (cc->cpu)
-		for_each_possible_cpu(cpu) {
-			cpu_cc = per_cpu_ptr(cc->cpu, cpu);
-			if (cpu_cc->req)
-				mempool_free(cpu_cc->req, cc->req_pool);
-			crypt_free_tfms(cc, cpu);
-		}
-
-	if (cc->bs)
-		bioset_free(cc->bs);
-
-	if (cc->page_pool)
-		mempool_destroy(cc->page_pool);
-	if (cc->req_pool)
-		mempool_destroy(cc->req_pool);
-	if (cc->io_pool)
-		mempool_destroy(cc->io_pool);
-
-	if (cc->iv_gen_ops && cc->iv_gen_ops->dtr)
-		cc->iv_gen_ops->dtr(cc);
-
-	if (cc->dev)
-		dm_put_device(ti, cc->dev);
-
-	if (cc->cpu)
-		free_percpu(cc->cpu);
-
-	kzfree(cc->cipher);
-	kzfree(cc->cipher_string);
-
-	/* Must zero key material before freeing */
-	kzfree(cc);
-}
-
-static int crypt_ctr_cipher(struct dm_target *ti,
-			    char *cipher_in, char *key)
-{
-	struct crypt_config *cc = ti->private;
-	char *tmp, *cipher, *chainmode, *ivmode, *ivopts, *keycount;
-	char *cipher_api = NULL;
-	int cpu, ret = -EINVAL;
-	char dummy;
-
-	/* Convert to crypto api definition? */
-	if (strchr(cipher_in, '(')) {
-		ti->error = "Bad cipher specification";
-		return -EINVAL;
-	}
-
-	cc->cipher_string = kstrdup(cipher_in, GFP_KERNEL);
-	if (!cc->cipher_string)
-		goto bad_mem;
-
-	/*
-	 * Legacy dm-crypt cipher specification
-	 * cipher[:keycount]-mode-iv:ivopts
-	 */
-	tmp = cipher_in;
-	keycount = strsep(&tmp, "-");
-	cipher = strsep(&keycount, ":");
-
-	if (!keycount)
-		cc->tfms_count = 1;
-	else if (sscanf(keycount, "%u%c", &cc->tfms_count, &dummy) != 1 ||
-		 !is_power_of_2(cc->tfms_count)) {
-		ti->error = "Bad cipher key count specification";
-		return -EINVAL;
-	}
-	cc->key_parts = cc->tfms_count;
-
-	cc->cipher = kstrdup(cipher, GFP_KERNEL);
-	if (!cc->cipher)
-		goto bad_mem;
-
-	chainmode = strsep(&tmp, "-");
-	ivopts = strsep(&tmp, "-");
-	ivmode = strsep(&ivopts, ":");
-
-	if (tmp)
-		DMWARN("Ignoring unexpected additional cipher options");
-
-	cc->cpu = __alloc_percpu(sizeof(*(cc->cpu)) +
-				 cc->tfms_count * sizeof(*(cc->cpu->tfms)),
-				 __alignof__(struct crypt_cpu));
-	if (!cc->cpu) {
-		ti->error = "Cannot allocate per cpu state";
-		goto bad_mem;
-	}
-
-	/*
-	 * For compatibility with the original dm-crypt mapping format, if
-	 * only the cipher name is supplied, use cbc-plain.
-	 */
-	if (!chainmode || (!strcmp(chainmode, "plain") && !ivmode)) {
-		chainmode = "cbc";
-		ivmode = "plain";
-	}
-
-	if (strcmp(chainmode, "ecb") && !ivmode) {
-		ti->error = "IV mechanism required";
-		return -EINVAL;
-	}
-
-	cipher_api = kmalloc(CRYPTO_MAX_ALG_NAME, GFP_KERNEL);
-	if (!cipher_api)
-		goto bad_mem;
-
-	ret = snprintf(cipher_api, CRYPTO_MAX_ALG_NAME,
-		       "%s(%s)", chainmode, cipher);
-	if (ret < 0) {
-		kfree(cipher_api);
-		goto bad_mem;
-	}
-
-	/* Allocate cipher */
-	for_each_possible_cpu(cpu) {
-		ret = crypt_alloc_tfms(cc, cpu, cipher_api);
-		if (ret < 0) {
-			ti->error = "Error allocating crypto tfm";
-			goto bad;
-		}
-	}
-
-	/* Initialize and set key */
-	ret = crypt_set_key(cc, key);
-	if (ret < 0) {
-		ti->error = "Error decoding and setting key";
-		goto bad;
-	}
-
-	/* Initialize IV */
-	cc->iv_size = crypto_ablkcipher_ivsize(any_tfm(cc));
-	if (cc->iv_size)
-		/* at least a 64 bit sector number should fit in our buffer */
-		cc->iv_size = max(cc->iv_size,
-				  (unsigned int)(sizeof(u64) / sizeof(u8)));
-	else if (ivmode) {
-		DMWARN("Selected cipher does not support IVs");
-		ivmode = NULL;
-	}
-
-	/* Choose ivmode, see comments at iv code. */
-	if (ivmode == NULL)
-		cc->iv_gen_ops = NULL;
-	else if (strcmp(ivmode, "plain") == 0)
-		cc->iv_gen_ops = &crypt_iv_plain_ops;
-	else if (strcmp(ivmode, "plain64") == 0)
-		cc->iv_gen_ops = &crypt_iv_plain64_ops;
-	else if (strcmp(ivmode, "essiv") == 0)
-		cc->iv_gen_ops = &crypt_iv_essiv_ops;
-	else if (strcmp(ivmode, "benbi") == 0)
-		cc->iv_gen_ops = &crypt_iv_benbi_ops;
-	else if (strcmp(ivmode, "null") == 0)
-		cc->iv_gen_ops = &crypt_iv_null_ops;
-	else if (strcmp(ivmode, "lmk") == 0) {
-		cc->iv_gen_ops = &crypt_iv_lmk_ops;
-		/* Version 2 and 3 is recognised according
-		 * to length of provided multi-key string.
-		 * If present (version 3), last key is used as IV seed.
-		 */
-		if (cc->key_size % cc->key_parts)
-			cc->key_parts++;
-	} else {
-		ret = -EINVAL;
-		ti->error = "Invalid IV mode";
-		goto bad;
-	}
-
-	/* Allocate IV */
-	if (cc->iv_gen_ops && cc->iv_gen_ops->ctr) {
-		ret = cc->iv_gen_ops->ctr(cc, ti, ivopts);
-		if (ret < 0) {
-			ti->error = "Error creating IV";
-			goto bad;
-		}
-	}
-
-	/* Initialize IV (set keys for ESSIV etc) */
-	if (cc->iv_gen_ops && cc->iv_gen_ops->init) {
-		ret = cc->iv_gen_ops->init(cc);
-		if (ret < 0) {
-			ti->error = "Error initialising IV";
-			goto bad;
-		}
-	}
-
-	ret = 0;
-bad:
-	kfree(cipher_api);
-	return ret;
-
-bad_mem:
-	ti->error = "Cannot allocate cipher strings";
-	return -ENOMEM;
-}
-
-/*
- * Construct an encryption mapping:
- * <cipher> <key> <iv_offset> <dev_path> <start>
- */
-static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
-{
-	struct crypt_config *cc;
-	unsigned int key_size, opt_params;
-	unsigned long long tmpll;
-	int ret;
-	struct dm_arg_set as;
-	const char *opt_string;
-	char dummy;
-
-	static struct dm_arg _args[] = {
-		{0, 1, "Invalid number of feature args"},
-	};
-
-	if (argc < 5) {
-		ti->error = "Not enough arguments";
-		return -EINVAL;
-	}
-
-	key_size = strlen(argv[1]) >> 1;
-
-	cc = kzalloc(sizeof(*cc) + key_size * sizeof(u8), GFP_KERNEL);
-	if (!cc) {
-		ti->error = "Cannot allocate encryption context";
-		return -ENOMEM;
-	}
-	cc->key_size = key_size;
-
-	ti->private = cc;
-	ret = crypt_ctr_cipher(ti, argv[0], argv[1]);
-	if (ret < 0)
-		goto bad;
-
-	ret = -ENOMEM;
-	cc->io_pool = mempool_create_slab_pool(MIN_IOS, _crypt_io_pool);
-	if (!cc->io_pool) {
-		ti->error = "Cannot allocate crypt io mempool";
-		goto bad;
-	}
-
-	cc->dmreq_start = sizeof(struct ablkcipher_request);
-	cc->dmreq_start += crypto_ablkcipher_reqsize(any_tfm(cc));
-	cc->dmreq_start = ALIGN(cc->dmreq_start, crypto_tfm_ctx_alignment());
-	cc->dmreq_start += crypto_ablkcipher_alignmask(any_tfm(cc)) &
-			   ~(crypto_tfm_ctx_alignment() - 1);
-
-	cc->req_pool = mempool_create_kmalloc_pool(MIN_IOS, cc->dmreq_start +
-			sizeof(struct dm_crypt_request) + cc->iv_size);
-	if (!cc->req_pool) {
-		ti->error = "Cannot allocate crypt request mempool";
-		goto bad;
-	}
-
-	cc->page_pool = mempool_create_page_pool(MIN_POOL_PAGES, 0);
-	if (!cc->page_pool) {
-		ti->error = "Cannot allocate page mempool";
-		goto bad;
-	}
-
-	cc->bs = bioset_create(MIN_IOS, 0);
-	if (!cc->bs) {
-		ti->error = "Cannot allocate crypt bioset";
-		goto bad;
-	}
-
-	ret = -EINVAL;
-	if (sscanf(argv[2], "%llu%c", &tmpll, &dummy) != 1) {
-		ti->error = "Invalid iv_offset sector";
-		goto bad;
-	}
-	cc->iv_offset = tmpll;
-
-	if (dm_get_device(ti, argv[3], dm_table_get_mode(ti->table), &cc->dev)) {
-		ti->error = "Device lookup failed";
-		goto bad;
-	}
-
-	if (sscanf(argv[4], "%llu%c", &tmpll, &dummy) != 1) {
-		ti->error = "Invalid device sector";
-		goto bad;
-	}
-	cc->start = tmpll;
-
-	argv += 5;
-	argc -= 5;
-
-	/* Optional parameters */
-	if (argc) {
-		as.argc = argc;
-		as.argv = argv;
-
-		ret = dm_read_arg_group(_args, &as, &opt_params, &ti->error);
-		if (ret)
-			goto bad;
-
-		opt_string = dm_shift_arg(&as);
-
-		if (opt_params == 1 && opt_string &&
-		    !strcasecmp(opt_string, "allow_discards"))
-			ti->num_discard_requests = 1;
-		else if (opt_params) {
-			ret = -EINVAL;
-			ti->error = "Invalid feature arguments";
-			goto bad;
-		}
-	}
-
-	ret = -ENOMEM;
-	cc->io_queue = alloc_workqueue("kcryptd_io",
-				       WQ_NON_REENTRANT|
-				       WQ_MEM_RECLAIM,
-				       1);
-	if (!cc->io_queue) {
-		ti->error = "Couldn't create kcryptd io queue";
-		goto bad;
-	}
-
-	cc->crypt_queue = alloc_workqueue("kcryptd",
-					  WQ_NON_REENTRANT|
-					  WQ_CPU_INTENSIVE|
-					  WQ_MEM_RECLAIM,
-					  1);
-	if (!cc->crypt_queue) {
-		ti->error = "Couldn't create kcryptd queue";
-		goto bad;
-	}
-
-	ti->num_flush_requests = 1;
-	ti->discard_zeroes_data_unsupported = 1;
-
-	return 0;
-
-bad:
-	crypt_dtr(ti);
-	return ret;
-}
-
-static int crypt_map(struct dm_target *ti, struct bio *bio,
-		     union map_info *map_context)
-{
-	struct dm_crypt_io *io;
-	struct crypt_config *cc;
-
-	/*
-	 * If bio is REQ_FLUSH or REQ_DISCARD, just bypass crypt queues.
-	 * - for REQ_FLUSH device-mapper core ensures that no IO is in-flight
-	 * - for REQ_DISCARD caller must use flush if IO ordering matters
-	 */
-	if (unlikely(bio->bi_rw & (REQ_FLUSH | REQ_DISCARD))) {
-		cc = ti->private;
-		bio->bi_bdev = cc->dev->bdev;
-		if (bio_sectors(bio))
-			bio->bi_sector = cc->start + dm_target_offset(ti, bio->bi_sector);
-		return DM_MAPIO_REMAPPED;
-	}
-
-	io = crypt_io_alloc(ti, bio, dm_target_offset(ti, bio->bi_sector));
-
-	if (bio_data_dir(io->base_bio) == READ) {
-		if (kcryptd_io_read(io, GFP_NOWAIT))
-			kcryptd_queue_io(io);
-	} else
-		kcryptd_queue_crypt(io);
-
-	return DM_MAPIO_SUBMITTED;
-}
-
-static int crypt_status(struct dm_target *ti, status_type_t type,
-			char *result, unsigned int maxlen)
-{
-	struct crypt_config *cc = ti->private;
-	unsigned int sz = 0;
-
-	switch (type) {
-	case STATUSTYPE_INFO:
-		result[0] = '\0';
-		break;
-
-	case STATUSTYPE_TABLE:
-		DMEMIT("%s ", cc->cipher_string);
-
-		if (cc->key_size > 0) {
-			if ((maxlen - sz) < ((cc->key_size << 1) + 1))
-				return -ENOMEM;
-
-			crypt_encode_key(result + sz, cc->key, cc->key_size);
-			sz += cc->key_size << 1;
-		} else {
-			if (sz >= maxlen)
-				return -ENOMEM;
-			result[sz++] = '-';
-		}
-
-		DMEMIT(" %llu %s %llu", (unsigned long long)cc->iv_offset,
-				cc->dev->name, (unsigned long long)cc->start);
-
-		if (ti->num_discard_requests)
-			DMEMIT(" 1 allow_discards");
-
-		break;
-	}
-	return 0;
-}
-
-static void crypt_postsuspend(struct dm_target *ti)
-{
-	struct crypt_config *cc = ti->private;
-
-	set_bit(DM_CRYPT_SUSPENDED, &cc->flags);
-}
-
-static int crypt_preresume(struct dm_target *ti)
-{
-	struct crypt_config *cc = ti->private;
-
-	if (!test_bit(DM_CRYPT_KEY_VALID, &cc->flags)) {
-		DMERR("aborting resume - crypt key is not set.");
-		return -EAGAIN;
-	}
-
-	return 0;
-}
-
-static void crypt_resume(struct dm_target *ti)
-{
-	struct crypt_config *cc = ti->private;
-
-	clear_bit(DM_CRYPT_SUSPENDED, &cc->flags);
-}
-
-/* Message interface
- *	key set <key>
- *	key wipe
- */
-static int crypt_message(struct dm_target *ti, unsigned argc, char **argv)
-{
-	struct crypt_config *cc = ti->private;
-	int ret = -EINVAL;
-
-	if (argc < 2)
-		goto error;
-
-	if (!strcasecmp(argv[0], "key")) {
-		if (!test_bit(DM_CRYPT_SUSPENDED, &cc->flags)) {
-			DMWARN("not suspended during key manipulation.");
-			return -EINVAL;
-		}
-		if (argc == 3 && !strcasecmp(argv[1], "set")) {
-			ret = crypt_set_key(cc, argv[2]);
-			if (ret)
-				return ret;
-			if (cc->iv_gen_ops && cc->iv_gen_ops->init)
-				ret = cc->iv_gen_ops->init(cc);
-			return ret;
-		}
-		if (argc == 2 && !strcasecmp(argv[1], "wipe")) {
-			if (cc->iv_gen_ops && cc->iv_gen_ops->wipe) {
-				ret = cc->iv_gen_ops->wipe(cc);
-				if (ret)
-					return ret;
-			}
-			return crypt_wipe_key(cc);
-		}
-	}
-
-error:
-	DMWARN("unrecognised message received.");
-	return -EINVAL;
-}
-
-static int crypt_merge(struct dm_target *ti, struct bvec_merge_data *bvm,
-		       struct bio_vec *biovec, int max_size)
-{
-	struct crypt_config *cc = ti->private;
-	struct request_queue *q = bdev_get_queue(cc->dev->bdev);
-
-	if (!q->merge_bvec_fn)
-		return max_size;
-
-	bvm->bi_bdev = cc->dev->bdev;
-	bvm->bi_sector = cc->start + dm_target_offset(ti, bvm->bi_sector);
-
-	return min(max_size, q->merge_bvec_fn(q, bvm, biovec));
-}
-
-static int crypt_iterate_devices(struct dm_target *ti,
-				 iterate_devices_callout_fn fn, void *data)
-{
-	struct crypt_config *cc = ti->private;
-
-	return fn(ti, cc->dev, cc->start, ti->len, data);
-}
-
-static struct target_type crypt_target = {
-	.name   = "crypt",
-	.version = {1, 11, 0},
-	.module = THIS_MODULE,
-	.ctr    = crypt_ctr,
-	.dtr    = crypt_dtr,
-	.map    = crypt_map,
-	.status = crypt_status,
-	.postsuspend = crypt_postsuspend,
-	.preresume = crypt_preresume,
-	.resume = crypt_resume,
-	.message = crypt_message,
-	.merge  = crypt_merge,
-	.iterate_devices = crypt_iterate_devices,
-};
-
-static int __init dm_crypt_init(void)
-{
-	int r;
-
-	_crypt_io_pool = KMEM_CACHE(dm_crypt_io, 0);
-	if (!_crypt_io_pool)
-		return -ENOMEM;
-
-	r = dm_register_target(&crypt_target);
-	if (r < 0) {
-		DMERR("register failed %d", r);
-		kmem_cache_destroy(_crypt_io_pool);
-	}
-
-	return r;
-}
-
-static void __exit dm_crypt_exit(void)
-{
-	dm_unregister_target(&crypt_target);
-	kmem_cache_destroy(_crypt_io_pool);
-}
-
-module_init(dm_crypt_init);
-module_exit(dm_crypt_exit);
-
-MODULE_AUTHOR("Christophe Saout <christophe@saout.de>");
-MODULE_DESCRIPTION(DM_NAME " target for transparent encryption / decryption");
-MODULE_LICENSE("GPL");
diff --git a/ANDROID_3.4.5/drivers/md/dm-delay.c b/ANDROID_3.4.5/drivers/md/dm-delay.c
deleted file mode 100644
index 2dc22ddd..00000000
--- a/ANDROID_3.4.5/drivers/md/dm-delay.c
+++ /dev/null
@@ -1,397 +0,0 @@
-/*
- * Copyright (C) 2005-2007 Red Hat GmbH
- *
- * A target that delays reads and/or writes and can send
- * them to different devices.
- *
- * This file is released under the GPL.
- */
-
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/blkdev.h>
-#include <linux/bio.h>
-#include <linux/slab.h>
-
-#include <linux/device-mapper.h>
-
-#define DM_MSG_PREFIX "delay"
-
-struct delay_c {
-	struct timer_list delay_timer;
-	struct mutex timer_lock;
-	struct work_struct flush_expired_bios;
-	struct list_head delayed_bios;
-	atomic_t may_delay;
-	mempool_t *delayed_pool;
-
-	struct dm_dev *dev_read;
-	sector_t start_read;
-	unsigned read_delay;
-	unsigned reads;
-
-	struct dm_dev *dev_write;
-	sector_t start_write;
-	unsigned write_delay;
-	unsigned writes;
-};
-
-struct dm_delay_info {
-	struct delay_c *context;
-	struct list_head list;
-	struct bio *bio;
-	unsigned long expires;
-};
-
-static DEFINE_MUTEX(delayed_bios_lock);
-
-static struct workqueue_struct *kdelayd_wq;
-static struct kmem_cache *delayed_cache;
-
-static void handle_delayed_timer(unsigned long data)
-{
-	struct delay_c *dc = (struct delay_c *)data;
-
-	queue_work(kdelayd_wq, &dc->flush_expired_bios);
-}
-
-static void queue_timeout(struct delay_c *dc, unsigned long expires)
-{
-	mutex_lock(&dc->timer_lock);
-
-	if (!timer_pending(&dc->delay_timer) || expires < dc->delay_timer.expires)
-		mod_timer(&dc->delay_timer, expires);
-
-	mutex_unlock(&dc->timer_lock);
-}
-
-static void flush_bios(struct bio *bio)
-{
-	struct bio *n;
-
-	while (bio) {
-		n = bio->bi_next;
-		bio->bi_next = NULL;
-		generic_make_request(bio);
-		bio = n;
-	}
-}
-
-static struct bio *flush_delayed_bios(struct delay_c *dc, int flush_all)
-{
-	struct dm_delay_info *delayed, *next;
-	unsigned long next_expires = 0;
-	int start_timer = 0;
-	struct bio_list flush_bios = { };
-
-	mutex_lock(&delayed_bios_lock);
-	list_for_each_entry_safe(delayed, next, &dc->delayed_bios, list) {
-		if (flush_all || time_after_eq(jiffies, delayed->expires)) {
-			list_del(&delayed->list);
-			bio_list_add(&flush_bios, delayed->bio);
-			if ((bio_data_dir(delayed->bio) == WRITE))
-				delayed->context->writes--;
-			else
-				delayed->context->reads--;
-			mempool_free(delayed, dc->delayed_pool);
-			continue;
-		}
-
-		if (!start_timer) {
-			start_timer = 1;
-			next_expires = delayed->expires;
-		} else
-			next_expires = min(next_expires, delayed->expires);
-	}
-
-	mutex_unlock(&delayed_bios_lock);
-
-	if (start_timer)
-		queue_timeout(dc, next_expires);
-
-	return bio_list_get(&flush_bios);
-}
-
-static void flush_expired_bios(struct work_struct *work)
-{
-	struct delay_c *dc;
-
-	dc = container_of(work, struct delay_c, flush_expired_bios);
-	flush_bios(flush_delayed_bios(dc, 0));
-}
-
-/*
- * Mapping parameters:
- *    <device> <offset> <delay> [<write_device> <write_offset> <write_delay>]
- *
- * With separate write parameters, the first set is only used for reads.
- * Delays are specified in milliseconds.
- */
-static int delay_ctr(struct dm_target *ti, unsigned int argc, char **argv)
-{
-	struct delay_c *dc;
-	unsigned long long tmpll;
-	char dummy;
-
-	if (argc != 3 && argc != 6) {
-		ti->error = "requires exactly 3 or 6 arguments";
-		return -EINVAL;
-	}
-
-	dc = kmalloc(sizeof(*dc), GFP_KERNEL);
-	if (!dc) {
-		ti->error = "Cannot allocate context";
-		return -ENOMEM;
-	}
-
-	dc->reads = dc->writes = 0;
-
-	if (sscanf(argv[1], "%llu%c", &tmpll, &dummy) != 1) {
-		ti->error = "Invalid device sector";
-		goto bad;
-	}
-	dc->start_read = tmpll;
-
-	if (sscanf(argv[2], "%u%c", &dc->read_delay, &dummy) != 1) {
-		ti->error = "Invalid delay";
-		goto bad;
-	}
-
-	if (dm_get_device(ti, argv[0], dm_table_get_mode(ti->table),
-			  &dc->dev_read)) {
-		ti->error = "Device lookup failed";
-		goto bad;
-	}
-
-	dc->dev_write = NULL;
-	if (argc == 3)
-		goto out;
-
-	if (sscanf(argv[4], "%llu%c", &tmpll, &dummy) != 1) {
-		ti->error = "Invalid write device sector";
-		goto bad_dev_read;
-	}
-	dc->start_write = tmpll;
-
-	if (sscanf(argv[5], "%u%c", &dc->write_delay, &dummy) != 1) {
-		ti->error = "Invalid write delay";
-		goto bad_dev_read;
-	}
-
-	if (dm_get_device(ti, argv[3], dm_table_get_mode(ti->table),
-			  &dc->dev_write)) {
-		ti->error = "Write device lookup failed";
-		goto bad_dev_read;
-	}
-
-out:
-	dc->delayed_pool = mempool_create_slab_pool(128, delayed_cache);
-	if (!dc->delayed_pool) {
-		DMERR("Couldn't create delayed bio pool.");
-		goto bad_dev_write;
-	}
-
-	setup_timer(&dc->delay_timer, handle_delayed_timer, (unsigned long)dc);
-
-	INIT_WORK(&dc->flush_expired_bios, flush_expired_bios);
-	INIT_LIST_HEAD(&dc->delayed_bios);
-	mutex_init(&dc->timer_lock);
-	atomic_set(&dc->may_delay, 1);
-
-	ti->num_flush_requests = 1;
-	ti->num_discard_requests = 1;
-	ti->private = dc;
-	return 0;
-
-bad_dev_write:
-	if (dc->dev_write)
-		dm_put_device(ti, dc->dev_write);
-bad_dev_read:
-	dm_put_device(ti, dc->dev_read);
-bad:
-	kfree(dc);
-	return -EINVAL;
-}
-
-static void delay_dtr(struct dm_target *ti)
-{
-	struct delay_c *dc = ti->private;
-
-	flush_workqueue(kdelayd_wq);
-
-	dm_put_device(ti, dc->dev_read);
-
-	if (dc->dev_write)
-		dm_put_device(ti, dc->dev_write);
-
-	mempool_destroy(dc->delayed_pool);
-	kfree(dc);
-}
-
-static int delay_bio(struct delay_c *dc, int delay, struct bio *bio)
-{
-	struct dm_delay_info *delayed;
-	unsigned long expires = 0;
-
-	if (!delay || !atomic_read(&dc->may_delay))
-		return 1;
-
-	delayed = mempool_alloc(dc->delayed_pool, GFP_NOIO);
-
-	delayed->context = dc;
-	delayed->bio = bio;
-	delayed->expires = expires = jiffies + (delay * HZ / 1000);
-
-	mutex_lock(&delayed_bios_lock);
-
-	if (bio_data_dir(bio) == WRITE)
-		dc->writes++;
-	else
-		dc->reads++;
-
-	list_add_tail(&delayed->list, &dc->delayed_bios);
-
-	mutex_unlock(&delayed_bios_lock);
-
-	queue_timeout(dc, expires);
-
-	return 0;
-}
-
-static void delay_presuspend(struct dm_target *ti)
-{
-	struct delay_c *dc = ti->private;
-
-	atomic_set(&dc->may_delay, 0);
-	del_timer_sync(&dc->delay_timer);
-	flush_bios(flush_delayed_bios(dc, 1));
-}
-
-static void delay_resume(struct dm_target *ti)
-{
-	struct delay_c *dc = ti->private;
-
-	atomic_set(&dc->may_delay, 1);
-}
-
-static int delay_map(struct dm_target *ti, struct bio *bio,
-		     union map_info *map_context)
-{
-	struct delay_c *dc = ti->private;
-
-	if ((bio_data_dir(bio) == WRITE) && (dc->dev_write)) {
-		bio->bi_bdev = dc->dev_write->bdev;
-		if (bio_sectors(bio))
-			bio->bi_sector = dc->start_write +
-					 dm_target_offset(ti, bio->bi_sector);
-
-		return delay_bio(dc, dc->write_delay, bio);
-	}
-
-	bio->bi_bdev = dc->dev_read->bdev;
-	bio->bi_sector = dc->start_read + dm_target_offset(ti, bio->bi_sector);
-
-	return delay_bio(dc, dc->read_delay, bio);
-}
-
-static int delay_status(struct dm_target *ti, status_type_t type,
-			char *result, unsigned maxlen)
-{
-	struct delay_c *dc = ti->private;
-	int sz = 0;
-
-	switch (type) {
-	case STATUSTYPE_INFO:
-		DMEMIT("%u %u", dc->reads, dc->writes);
-		break;
-
-	case STATUSTYPE_TABLE:
-		DMEMIT("%s %llu %u", dc->dev_read->name,
-		       (unsigned long long) dc->start_read,
-		       dc->read_delay);
-		if (dc->dev_write)
-			DMEMIT(" %s %llu %u", dc->dev_write->name,
-			       (unsigned long long) dc->start_write,
-			       dc->write_delay);
-		break;
-	}
-
-	return 0;
-}
-
-static int delay_iterate_devices(struct dm_target *ti,
-				 iterate_devices_callout_fn fn, void *data)
-{
-	struct delay_c *dc = ti->private;
-	int ret = 0;
-
-	ret = fn(ti, dc->dev_read, dc->start_read, ti->len, data);
-	if (ret)
-		goto out;
-
-	if (dc->dev_write)
-		ret = fn(ti, dc->dev_write, dc->start_write, ti->len, data);
-
-out:
-	return ret;
-}
-
-static struct target_type delay_target = {
-	.name	     = "delay",
-	.version     = {1, 1, 0},
-	.module      = THIS_MODULE,
-	.ctr	     = delay_ctr,
-	.dtr	     = delay_dtr,
-	.map	     = delay_map,
-	.presuspend  = delay_presuspend,
-	.resume	     = delay_resume,
-	.status	     = delay_status,
-	.iterate_devices = delay_iterate_devices,
-};
-
-static int __init dm_delay_init(void)
-{
-	int r = -ENOMEM;
-
-	kdelayd_wq = alloc_workqueue("kdelayd", WQ_MEM_RECLAIM, 0);
-	if (!kdelayd_wq) {
-		DMERR("Couldn't start kdelayd");
-		goto bad_queue;
-	}
-
-	delayed_cache = KMEM_CACHE(dm_delay_info, 0);
-	if (!delayed_cache) {
-		DMERR("Couldn't create delayed bio cache.");
-		goto bad_memcache;
-	}
-
-	r = dm_register_target(&delay_target);
-	if (r < 0) {
-		DMERR("register failed %d", r);
-		goto bad_register;
-	}
-
-	return 0;
-
-bad_register:
-	kmem_cache_destroy(delayed_cache);
-bad_memcache:
-	destroy_workqueue(kdelayd_wq);
-bad_queue:
-	return r;
-}
-
-static void __exit dm_delay_exit(void)
-{
-	dm_unregister_target(&delay_target);
-	kmem_cache_destroy(delayed_cache);
-	destroy_workqueue(kdelayd_wq);
-}
-
-/* Module hooks */
-module_init(dm_delay_init);
-module_exit(dm_delay_exit);
-
-MODULE_DESCRIPTION(DM_NAME " delay target");
-MODULE_AUTHOR("Heinz Mauelshagen <mauelshagen@redhat.com>");
-MODULE_LICENSE("GPL");
diff --git a/ANDROID_3.4.5/drivers/md/dm-exception-store.c b/ANDROID_3.4.5/drivers/md/dm-exception-store.c
deleted file mode 100644
index aa70f7d4..00000000
--- a/ANDROID_3.4.5/drivers/md/dm-exception-store.c
+++ /dev/null
@@ -1,295 +0,0 @@
-/*
- * Copyright (C) 2001-2002 Sistina Software (UK) Limited.
- * Copyright (C) 2006-2008 Red Hat GmbH
- *
- * This file is released under the GPL.
- */
-
-#include "dm-exception-store.h"
-
-#include <linux/ctype.h>
-#include <linux/mm.h>
-#include <linux/pagemap.h>
-#include <linux/vmalloc.h>
-#include <linux/module.h>
-#include <linux/slab.h>
-
-#define DM_MSG_PREFIX "snapshot exception stores"
-
-static LIST_HEAD(_exception_store_types);
-static DEFINE_SPINLOCK(_lock);
-
-static struct dm_exception_store_type *__find_exception_store_type(const char *name)
-{
-	struct dm_exception_store_type *type;
-
-	list_for_each_entry(type, &_exception_store_types, list)
-		if (!strcmp(name, type->name))
-			return type;
-
-	return NULL;
-}
-
-static struct dm_exception_store_type *_get_exception_store_type(const char *name)
-{
-	struct dm_exception_store_type *type;
-
-	spin_lock(&_lock);
-
-	type = __find_exception_store_type(name);
-
-	if (type && !try_module_get(type->module))
-		type = NULL;
-
-	spin_unlock(&_lock);
-
-	return type;
-}
-
-/*
- * get_type
- * @type_name
- *
- * Attempt to retrieve the dm_exception_store_type by name.  If not already
- * available, attempt to load the appropriate module.
- *
- * Exstore modules are named "dm-exstore-" followed by the 'type_name'.
- * Modules may contain multiple types.
- * This function will first try the module "dm-exstore-<type_name>",
- * then truncate 'type_name' on the last '-' and try again.
- *
- * For example, if type_name was "clustered-shared", it would search
- * 'dm-exstore-clustered-shared' then 'dm-exstore-clustered'.
- *
- * 'dm-exception-store-<type_name>' is too long of a name in my
- * opinion, which is why I've chosen to have the files
- * containing exception store implementations be 'dm-exstore-<type_name>'.
- * If you want your module to be autoloaded, you will follow this
- * naming convention.
- *
- * Returns: dm_exception_store_type* on success, NULL on failure
- */
-static struct dm_exception_store_type *get_type(const char *type_name)
-{
-	char *p, *type_name_dup;
-	struct dm_exception_store_type *type;
-
-	type = _get_exception_store_type(type_name);
-	if (type)
-		return type;
-
-	type_name_dup = kstrdup(type_name, GFP_KERNEL);
-	if (!type_name_dup) {
-		DMERR("No memory left to attempt load for \"%s\"", type_name);
-		return NULL;
-	}
-
-	while (request_module("dm-exstore-%s", type_name_dup) ||
-	       !(type = _get_exception_store_type(type_name))) {
-		p = strrchr(type_name_dup, '-');
-		if (!p)
-			break;
-		p[0] = '\0';
-	}
-
-	if (!type)
-		DMWARN("Module for exstore type \"%s\" not found.", type_name);
-
-	kfree(type_name_dup);
-
-	return type;
-}
-
-static void put_type(struct dm_exception_store_type *type)
-{
-	spin_lock(&_lock);
-	module_put(type->module);
-	spin_unlock(&_lock);
-}
-
-int dm_exception_store_type_register(struct dm_exception_store_type *type)
-{
-	int r = 0;
-
-	spin_lock(&_lock);
-	if (!__find_exception_store_type(type->name))
-		list_add(&type->list, &_exception_store_types);
-	else
-		r = -EEXIST;
-	spin_unlock(&_lock);
-
-	return r;
-}
-EXPORT_SYMBOL(dm_exception_store_type_register);
-
-int dm_exception_store_type_unregister(struct dm_exception_store_type *type)
-{
-	spin_lock(&_lock);
-
-	if (!__find_exception_store_type(type->name)) {
-		spin_unlock(&_lock);
-		return -EINVAL;
-	}
-
-	list_del(&type->list);
-
-	spin_unlock(&_lock);
-
-	return 0;
-}
-EXPORT_SYMBOL(dm_exception_store_type_unregister);
-
-static int set_chunk_size(struct dm_exception_store *store,
-			  const char *chunk_size_arg, char **error)
-{
-	unsigned long chunk_size_ulong;
-	char *value;
-
-	chunk_size_ulong = simple_strtoul(chunk_size_arg, &value, 10);
-	if (*chunk_size_arg == '\0' || *value != '\0' ||
-	    chunk_size_ulong > UINT_MAX) {
-		*error = "Invalid chunk size";
-		return -EINVAL;
-	}
-
-	if (!chunk_size_ulong) {
-		store->chunk_size = store->chunk_mask = store->chunk_shift = 0;
-		return 0;
-	}
-
-	return dm_exception_store_set_chunk_size(store,
-						 (unsigned) chunk_size_ulong,
-						 error);
-}
-
-int dm_exception_store_set_chunk_size(struct dm_exception_store *store,
-				      unsigned chunk_size,
-				      char **error)
-{
-	/* Check chunk_size is a power of 2 */
-	if (!is_power_of_2(chunk_size)) {
-		*error = "Chunk size is not a power of 2";
-		return -EINVAL;
-	}
-
-	/* Validate the chunk size against the device block size */
-	if (chunk_size %
-	    (bdev_logical_block_size(dm_snap_cow(store->snap)->bdev) >> 9) ||
-	    chunk_size %
-	    (bdev_logical_block_size(dm_snap_origin(store->snap)->bdev) >> 9)) {
-		*error = "Chunk size is not a multiple of device blocksize";
-		return -EINVAL;
-	}
-
-	if (chunk_size > INT_MAX >> SECTOR_SHIFT) {
-		*error = "Chunk size is too high";
-		return -EINVAL;
-	}
-
-	store->chunk_size = chunk_size;
-	store->chunk_mask = chunk_size - 1;
-	store->chunk_shift = ffs(chunk_size) - 1;
-
-	return 0;
-}
-
-int dm_exception_store_create(struct dm_target *ti, int argc, char **argv,
-			      struct dm_snapshot *snap,
-			      unsigned *args_used,
-			      struct dm_exception_store **store)
-{
-	int r = 0;
-	struct dm_exception_store_type *type = NULL;
-	struct dm_exception_store *tmp_store;
-	char persistent;
-
-	if (argc < 2) {
-		ti->error = "Insufficient exception store arguments";
-		return -EINVAL;
-	}
-
-	tmp_store = kmalloc(sizeof(*tmp_store), GFP_KERNEL);
-	if (!tmp_store) {
-		ti->error = "Exception store allocation failed";
-		return -ENOMEM;
-	}
-
-	persistent = toupper(*argv[0]);
-	if (persistent == 'P')
-		type = get_type("P");
-	else if (persistent == 'N')
-		type = get_type("N");
-	else {
-		ti->error = "Persistent flag is not P or N";
-		r = -EINVAL;
-		goto bad_type;
-	}
-
-	if (!type) {
-		ti->error = "Exception store type not recognised";
-		r = -EINVAL;
-		goto bad_type;
-	}
-
-	tmp_store->type = type;
-	tmp_store->snap = snap;
-
-	r = set_chunk_size(tmp_store, argv[1], &ti->error);
-	if (r)
-		goto bad;
-
-	r = type->ctr(tmp_store, 0, NULL);
-	if (r) {
-		ti->error = "Exception store type constructor failed";
-		goto bad;
-	}
-
-	*args_used = 2;
-	*store = tmp_store;
-	return 0;
-
-bad:
-	put_type(type);
-bad_type:
-	kfree(tmp_store);
-	return r;
-}
-EXPORT_SYMBOL(dm_exception_store_create);
-
-void dm_exception_store_destroy(struct dm_exception_store *store)
-{
-	store->type->dtr(store);
-	put_type(store->type);
-	kfree(store);
-}
-EXPORT_SYMBOL(dm_exception_store_destroy);
-
-int dm_exception_store_init(void)
-{
-	int r;
-
-	r = dm_transient_snapshot_init();
-	if (r) {
-		DMERR("Unable to register transient exception store type.");
-		goto transient_fail;
-	}
-
-	r = dm_persistent_snapshot_init();
-	if (r) {
-		DMERR("Unable to register persistent exception store type");
-		goto persistent_fail;
-	}
-
-	return 0;
-
-persistent_fail:
-	dm_transient_snapshot_exit();
-transient_fail:
-	return r;
-}
-
-void dm_exception_store_exit(void)
-{
-	dm_persistent_snapshot_exit();
-	dm_transient_snapshot_exit();
-}
diff --git a/ANDROID_3.4.5/drivers/md/dm-exception-store.h b/ANDROID_3.4.5/drivers/md/dm-exception-store.h
deleted file mode 100644
index 0b253624..00000000
--- a/ANDROID_3.4.5/drivers/md/dm-exception-store.h
+++ /dev/null
@@ -1,227 +0,0 @@
-/*
- * Copyright (C) 2001-2002 Sistina Software (UK) Limited.
- * Copyright (C) 2008 Red Hat, Inc. All rights reserved.
- *
- * Device-mapper snapshot exception store.
- *
- * This file is released under the GPL.
- */
-
-#ifndef _LINUX_DM_EXCEPTION_STORE
-#define _LINUX_DM_EXCEPTION_STORE
-
-#include <linux/blkdev.h>
-#include <linux/device-mapper.h>
-
-/*
- * The snapshot code deals with largish chunks of the disk at a
- * time. Typically 32k - 512k.
- */
-typedef sector_t chunk_t;
-
-/*
- * An exception is used where an old chunk of data has been
- * replaced by a new one.
- * If chunk_t is 64 bits in size, the top 8 bits of new_chunk hold the number
- * of chunks that follow contiguously.  Remaining bits hold the number of the
- * chunk within the device.
- */
-struct dm_exception {
-	struct list_head hash_list;
-
-	chunk_t old_chunk;
-	chunk_t new_chunk;
-};
-
-/*
- * Abstraction to handle the meta/layout of exception stores (the
- * COW device).
- */
-struct dm_exception_store;
-struct dm_exception_store_type {
-	const char *name;
-	struct module *module;
-
-	int (*ctr) (struct dm_exception_store *store,
-		    unsigned argc, char **argv);
-
-	/*
-	 * Destroys this object when you've finished with it.
-	 */
-	void (*dtr) (struct dm_exception_store *store);
-
-	/*
-	 * The target shouldn't read the COW device until this is
-	 * called.  As exceptions are read from the COW, they are
-	 * reported back via the callback.
-	 */
-	int (*read_metadata) (struct dm_exception_store *store,
-			      int (*callback)(void *callback_context,
-					      chunk_t old, chunk_t new),
-			      void *callback_context);
-
-	/*
-	 * Find somewhere to store the next exception.
-	 */
-	int (*prepare_exception) (struct dm_exception_store *store,
-				  struct dm_exception *e);
-
-	/*
-	 * Update the metadata with this exception.
-	 */
-	void (*commit_exception) (struct dm_exception_store *store,
-				  struct dm_exception *e,
-				  void (*callback) (void *, int success),
-				  void *callback_context);
-
-	/*
-	 * Returns 0 if the exception store is empty.
-	 *
-	 * If there are exceptions still to be merged, sets
-	 * *last_old_chunk and *last_new_chunk to the most recent
-	 * still-to-be-merged chunk and returns the number of
-	 * consecutive previous ones.
-	 */
-	int (*prepare_merge) (struct dm_exception_store *store,
-			      chunk_t *last_old_chunk, chunk_t *last_new_chunk);
-
-	/*
-	 * Clear the last n exceptions.
-	 * nr_merged must be <= the value returned by prepare_merge.
-	 */
-	int (*commit_merge) (struct dm_exception_store *store, int nr_merged);
-
-	/*
-	 * The snapshot is invalid, note this in the metadata.
-	 */
-	void (*drop_snapshot) (struct dm_exception_store *store);
-
-	unsigned (*status) (struct dm_exception_store *store,
-			    status_type_t status, char *result,
-			    unsigned maxlen);
-
-	/*
-	 * Return how full the snapshot is.
-	 */
-	void (*usage) (struct dm_exception_store *store,
-		       sector_t *total_sectors, sector_t *sectors_allocated,
-		       sector_t *metadata_sectors);
-
-	/* For internal device-mapper use only. */
-	struct list_head list;
-};
-
-struct dm_snapshot;
-
-struct dm_exception_store {
-	struct dm_exception_store_type *type;
-	struct dm_snapshot *snap;
-
-	/* Size of data blocks saved - must be a power of 2 */
-	unsigned chunk_size;
-	unsigned chunk_mask;
-	unsigned chunk_shift;
-
-	void *context;
-};
-
-/*
- * Obtain the origin or cow device used by a given snapshot.
- */
-struct dm_dev *dm_snap_origin(struct dm_snapshot *snap);
-struct dm_dev *dm_snap_cow(struct dm_snapshot *snap);
-
-/*
- * Funtions to manipulate consecutive chunks
- */
-#  if defined(CONFIG_LBDAF) || (BITS_PER_LONG == 64)
-#    define DM_CHUNK_CONSECUTIVE_BITS 8
-#    define DM_CHUNK_NUMBER_BITS 56
-
-static inline chunk_t dm_chunk_number(chunk_t chunk)
-{
-	return chunk & (chunk_t)((1ULL << DM_CHUNK_NUMBER_BITS) - 1ULL);
-}
-
-static inline unsigned dm_consecutive_chunk_count(struct dm_exception *e)
-{
-	return e->new_chunk >> DM_CHUNK_NUMBER_BITS;
-}
-
-static inline void dm_consecutive_chunk_count_inc(struct dm_exception *e)
-{
-	e->new_chunk += (1ULL << DM_CHUNK_NUMBER_BITS);
-
-	BUG_ON(!dm_consecutive_chunk_count(e));
-}
-
-static inline void dm_consecutive_chunk_count_dec(struct dm_exception *e)
-{
-	BUG_ON(!dm_consecutive_chunk_count(e));
-
-	e->new_chunk -= (1ULL << DM_CHUNK_NUMBER_BITS);
-}
-
-#  else
-#    define DM_CHUNK_CONSECUTIVE_BITS 0
-
-static inline chunk_t dm_chunk_number(chunk_t chunk)
-{
-	return chunk;
-}
-
-static inline unsigned dm_consecutive_chunk_count(struct dm_exception *e)
-{
-	return 0;
-}
-
-static inline void dm_consecutive_chunk_count_inc(struct dm_exception *e)
-{
-}
-
-static inline void dm_consecutive_chunk_count_dec(struct dm_exception *e)
-{
-}
-
-#  endif
-
-/*
- * Return the number of sectors in the device.
- */
-static inline sector_t get_dev_size(struct block_device *bdev)
-{
-	return i_size_read(bdev->bd_inode) >> SECTOR_SHIFT;
-}
-
-static inline chunk_t sector_to_chunk(struct dm_exception_store *store,
-				      sector_t sector)
-{
-	return sector >> store->chunk_shift;
-}
-
-int dm_exception_store_type_register(struct dm_exception_store_type *type);
-int dm_exception_store_type_unregister(struct dm_exception_store_type *type);
-
-int dm_exception_store_set_chunk_size(struct dm_exception_store *store,
-				      unsigned chunk_size,
-				      char **error);
-
-int dm_exception_store_create(struct dm_target *ti, int argc, char **argv,
-			      struct dm_snapshot *snap,
-			      unsigned *args_used,
-			      struct dm_exception_store **store);
-void dm_exception_store_destroy(struct dm_exception_store *store);
-
-int dm_exception_store_init(void);
-void dm_exception_store_exit(void);
-
-/*
- * Two exception store implementations.
- */
-int dm_persistent_snapshot_init(void);
-void dm_persistent_snapshot_exit(void);
-
-int dm_transient_snapshot_init(void);
-void dm_transient_snapshot_exit(void);
-
-#endif /* _LINUX_DM_EXCEPTION_STORE */
diff --git a/ANDROID_3.4.5/drivers/md/dm-flakey.c b/ANDROID_3.4.5/drivers/md/dm-flakey.c
deleted file mode 100644
index ac49c01f..00000000
--- a/ANDROID_3.4.5/drivers/md/dm-flakey.c
+++ /dev/null
@@ -1,442 +0,0 @@
-/*
- * Copyright (C) 2003 Sistina Software (UK) Limited.
- * Copyright (C) 2004, 2010-2011 Red Hat, Inc. All rights reserved.
- *
- * This file is released under the GPL.
- */
-
-#include <linux/device-mapper.h>
-
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/blkdev.h>
-#include <linux/bio.h>
-#include <linux/slab.h>
-
-#define DM_MSG_PREFIX "flakey"
-
-#define all_corrupt_bio_flags_match(bio, fc)	\
-	(((bio)->bi_rw & (fc)->corrupt_bio_flags) == (fc)->corrupt_bio_flags)
-
-/*
- * Flakey: Used for testing only, simulates intermittent,
- * catastrophic device failure.
- */
-struct flakey_c {
-	struct dm_dev *dev;
-	unsigned long start_time;
-	sector_t start;
-	unsigned up_interval;
-	unsigned down_interval;
-	unsigned long flags;
-	unsigned corrupt_bio_byte;
-	unsigned corrupt_bio_rw;
-	unsigned corrupt_bio_value;
-	unsigned corrupt_bio_flags;
-};
-
-enum feature_flag_bits {
-	DROP_WRITES
-};
-
-static int parse_features(struct dm_arg_set *as, struct flakey_c *fc,
-			  struct dm_target *ti)
-{
-	int r;
-	unsigned argc;
-	const char *arg_name;
-
-	static struct dm_arg _args[] = {
-		{0, 6, "Invalid number of feature args"},
-		{1, UINT_MAX, "Invalid corrupt bio byte"},
-		{0, 255, "Invalid corrupt value to write into bio byte (0-255)"},
-		{0, UINT_MAX, "Invalid corrupt bio flags mask"},
-	};
-
-	/* No feature arguments supplied. */
-	if (!as->argc)
-		return 0;
-
-	r = dm_read_arg_group(_args, as, &argc, &ti->error);
-	if (r)
-		return r;
-
-	while (argc) {
-		arg_name = dm_shift_arg(as);
-		argc--;
-
-		/*
-		 * drop_writes
-		 */
-		if (!strcasecmp(arg_name, "drop_writes")) {
-			if (test_and_set_bit(DROP_WRITES, &fc->flags)) {
-				ti->error = "Feature drop_writes duplicated";
-				return -EINVAL;
-			}
-
-			continue;
-		}
-
-		/*
-		 * corrupt_bio_byte <Nth_byte> <direction> <value> <bio_flags>
-		 */
-		if (!strcasecmp(arg_name, "corrupt_bio_byte")) {
-			if (!argc) {
-				ti->error = "Feature corrupt_bio_byte requires parameters";
-				return -EINVAL;
-			}
-
-			r = dm_read_arg(_args + 1, as, &fc->corrupt_bio_byte, &ti->error);
-			if (r)
-				return r;
-			argc--;
-
-			/*
-			 * Direction r or w?
-			 */
-			arg_name = dm_shift_arg(as);
-			if (!strcasecmp(arg_name, "w"))
-				fc->corrupt_bio_rw = WRITE;
-			else if (!strcasecmp(arg_name, "r"))
-				fc->corrupt_bio_rw = READ;
-			else {
-				ti->error = "Invalid corrupt bio direction (r or w)";
-				return -EINVAL;
-			}
-			argc--;
-
-			/*
-			 * Value of byte (0-255) to write in place of correct one.
-			 */
-			r = dm_read_arg(_args + 2, as, &fc->corrupt_bio_value, &ti->error);
-			if (r)
-				return r;
-			argc--;
-
-			/*
-			 * Only corrupt bios with these flags set.
-			 */
-			r = dm_read_arg(_args + 3, as, &fc->corrupt_bio_flags, &ti->error);
-			if (r)
-				return r;
-			argc--;
-
-			continue;
-		}
-
-		ti->error = "Unrecognised flakey feature requested";
-		return -EINVAL;
-	}
-
-	if (test_bit(DROP_WRITES, &fc->flags) && (fc->corrupt_bio_rw == WRITE)) {
-		ti->error = "drop_writes is incompatible with corrupt_bio_byte with the WRITE flag set";
-		return -EINVAL;
-	}
-
-	return 0;
-}
-
-/*
- * Construct a flakey mapping:
- * <dev_path> <offset> <up interval> <down interval> [<#feature args> [<arg>]*]
- *
- *   Feature args:
- *     [drop_writes]
- *     [corrupt_bio_byte <Nth_byte> <direction> <value> <bio_flags>]
- *
- *   Nth_byte starts from 1 for the first byte.
- *   Direction is r for READ or w for WRITE.
- *   bio_flags is ignored if 0.
- */
-static int flakey_ctr(struct dm_target *ti, unsigned int argc, char **argv)
-{
-	static struct dm_arg _args[] = {
-		{0, UINT_MAX, "Invalid up interval"},
-		{0, UINT_MAX, "Invalid down interval"},
-	};
-
-	int r;
-	struct flakey_c *fc;
-	unsigned long long tmpll;
-	struct dm_arg_set as;
-	const char *devname;
-	char dummy;
-
-	as.argc = argc;
-	as.argv = argv;
-
-	if (argc < 4) {
-		ti->error = "Invalid argument count";
-		return -EINVAL;
-	}
-
-	fc = kzalloc(sizeof(*fc), GFP_KERNEL);
-	if (!fc) {
-		ti->error = "Cannot allocate linear context";
-		return -ENOMEM;
-	}
-	fc->start_time = jiffies;
-
-	devname = dm_shift_arg(&as);
-
-	if (sscanf(dm_shift_arg(&as), "%llu%c", &tmpll, &dummy) != 1) {
-		ti->error = "Invalid device sector";
-		goto bad;
-	}
-	fc->start = tmpll;
-
-	r = dm_read_arg(_args, &as, &fc->up_interval, &ti->error);
-	if (r)
-		goto bad;
-
-	r = dm_read_arg(_args, &as, &fc->down_interval, &ti->error);
-	if (r)
-		goto bad;
-
-	if (!(fc->up_interval + fc->down_interval)) {
-		ti->error = "Total (up + down) interval is zero";
-		goto bad;
-	}
-
-	if (fc->up_interval + fc->down_interval < fc->up_interval) {
-		ti->error = "Interval overflow";
-		goto bad;
-	}
-
-	r = parse_features(&as, fc, ti);
-	if (r)
-		goto bad;
-
-	if (dm_get_device(ti, devname, dm_table_get_mode(ti->table), &fc->dev)) {
-		ti->error = "Device lookup failed";
-		goto bad;
-	}
-
-	ti->num_flush_requests = 1;
-	ti->num_discard_requests = 1;
-	ti->private = fc;
-	return 0;
-
-bad:
-	kfree(fc);
-	return -EINVAL;
-}
-
-static void flakey_dtr(struct dm_target *ti)
-{
-	struct flakey_c *fc = ti->private;
-
-	dm_put_device(ti, fc->dev);
-	kfree(fc);
-}
-
-static sector_t flakey_map_sector(struct dm_target *ti, sector_t bi_sector)
-{
-	struct flakey_c *fc = ti->private;
-
-	return fc->start + dm_target_offset(ti, bi_sector);
-}
-
-static void flakey_map_bio(struct dm_target *ti, struct bio *bio)
-{
-	struct flakey_c *fc = ti->private;
-
-	bio->bi_bdev = fc->dev->bdev;
-	if (bio_sectors(bio))
-		bio->bi_sector = flakey_map_sector(ti, bio->bi_sector);
-}
-
-static void corrupt_bio_data(struct bio *bio, struct flakey_c *fc)
-{
-	unsigned bio_bytes = bio_cur_bytes(bio);
-	char *data = bio_data(bio);
-
-	/*
-	 * Overwrite the Nth byte of the data returned.
-	 */
-	if (data && bio_bytes >= fc->corrupt_bio_byte) {
-		data[fc->corrupt_bio_byte - 1] = fc->corrupt_bio_value;
-
-		DMDEBUG("Corrupting data bio=%p by writing %u to byte %u "
-			"(rw=%c bi_rw=%lu bi_sector=%llu cur_bytes=%u)\n",
-			bio, fc->corrupt_bio_value, fc->corrupt_bio_byte,
-			(bio_data_dir(bio) == WRITE) ? 'w' : 'r',
-			bio->bi_rw, (unsigned long long)bio->bi_sector, bio_bytes);
-	}
-}
-
-static int flakey_map(struct dm_target *ti, struct bio *bio,
-		      union map_info *map_context)
-{
-	struct flakey_c *fc = ti->private;
-	unsigned elapsed;
-
-	/* Are we alive ? */
-	elapsed = (jiffies - fc->start_time) / HZ;
-	if (elapsed % (fc->up_interval + fc->down_interval) >= fc->up_interval) {
-		/*
-		 * Flag this bio as submitted while down.
-		 */
-		map_context->ll = 1;
-
-		/*
-		 * Map reads as normal.
-		 */
-		if (bio_data_dir(bio) == READ)
-			goto map_bio;
-
-		/*
-		 * Drop writes?
-		 */
-		if (test_bit(DROP_WRITES, &fc->flags)) {
-			bio_endio(bio, 0);
-			return DM_MAPIO_SUBMITTED;
-		}
-
-		/*
-		 * Corrupt matching writes.
-		 */
-		if (fc->corrupt_bio_byte && (fc->corrupt_bio_rw == WRITE)) {
-			if (all_corrupt_bio_flags_match(bio, fc))
-				corrupt_bio_data(bio, fc);
-			goto map_bio;
-		}
-
-		/*
-		 * By default, error all I/O.
-		 */
-		return -EIO;
-	}
-
-map_bio:
-	flakey_map_bio(ti, bio);
-
-	return DM_MAPIO_REMAPPED;
-}
-
-static int flakey_end_io(struct dm_target *ti, struct bio *bio,
-			 int error, union map_info *map_context)
-{
-	struct flakey_c *fc = ti->private;
-	unsigned bio_submitted_while_down = map_context->ll;
-
-	/*
-	 * Corrupt successful READs while in down state.
-	 * If flags were specified, only corrupt those that match.
-	 */
-	if (fc->corrupt_bio_byte && !error && bio_submitted_while_down &&
-	    (bio_data_dir(bio) == READ) && (fc->corrupt_bio_rw == READ) &&
-	    all_corrupt_bio_flags_match(bio, fc))
-		corrupt_bio_data(bio, fc);
-
-	return error;
-}
-
-static int flakey_status(struct dm_target *ti, status_type_t type,
-			 char *result, unsigned int maxlen)
-{
-	unsigned sz = 0;
-	struct flakey_c *fc = ti->private;
-	unsigned drop_writes;
-
-	switch (type) {
-	case STATUSTYPE_INFO:
-		result[0] = '\0';
-		break;
-
-	case STATUSTYPE_TABLE:
-		DMEMIT("%s %llu %u %u ", fc->dev->name,
-		       (unsigned long long)fc->start, fc->up_interval,
-		       fc->down_interval);
-
-		drop_writes = test_bit(DROP_WRITES, &fc->flags);
-		DMEMIT("%u ", drop_writes + (fc->corrupt_bio_byte > 0) * 5);
-
-		if (drop_writes)
-			DMEMIT("drop_writes ");
-
-		if (fc->corrupt_bio_byte)
-			DMEMIT("corrupt_bio_byte %u %c %u %u ",
-			       fc->corrupt_bio_byte,
-			       (fc->corrupt_bio_rw == WRITE) ? 'w' : 'r',
-			       fc->corrupt_bio_value, fc->corrupt_bio_flags);
-
-		break;
-	}
-	return 0;
-}
-
-static int flakey_ioctl(struct dm_target *ti, unsigned int cmd, unsigned long arg)
-{
-	struct flakey_c *fc = ti->private;
-	struct dm_dev *dev = fc->dev;
-	int r = 0;
-
-	/*
-	 * Only pass ioctls through if the device sizes match exactly.
-	 */
-	if (fc->start ||
-	    ti->len != i_size_read(dev->bdev->bd_inode) >> SECTOR_SHIFT)
-		r = scsi_verify_blk_ioctl(NULL, cmd);
-
-	return r ? : __blkdev_driver_ioctl(dev->bdev, dev->mode, cmd, arg);
-}
-
-static int flakey_merge(struct dm_target *ti, struct bvec_merge_data *bvm,
-			struct bio_vec *biovec, int max_size)
-{
-	struct flakey_c *fc = ti->private;
-	struct request_queue *q = bdev_get_queue(fc->dev->bdev);
-
-	if (!q->merge_bvec_fn)
-		return max_size;
-
-	bvm->bi_bdev = fc->dev->bdev;
-	bvm->bi_sector = flakey_map_sector(ti, bvm->bi_sector);
-
-	return min(max_size, q->merge_bvec_fn(q, bvm, biovec));
-}
-
-static int flakey_iterate_devices(struct dm_target *ti, iterate_devices_callout_fn fn, void *data)
-{
-	struct flakey_c *fc = ti->private;
-
-	return fn(ti, fc->dev, fc->start, ti->len, data);
-}
-
-static struct target_type flakey_target = {
-	.name   = "flakey",
-	.version = {1, 2, 0},
-	.module = THIS_MODULE,
-	.ctr    = flakey_ctr,
-	.dtr    = flakey_dtr,
-	.map    = flakey_map,
-	.end_io = flakey_end_io,
-	.status = flakey_status,
-	.ioctl	= flakey_ioctl,
-	.merge	= flakey_merge,
-	.iterate_devices = flakey_iterate_devices,
-};
-
-static int __init dm_flakey_init(void)
-{
-	int r = dm_register_target(&flakey_target);
-
-	if (r < 0)
-		DMERR("register failed %d", r);
-
-	return r;
-}
-
-static void __exit dm_flakey_exit(void)
-{
-	dm_unregister_target(&flakey_target);
-}
-
-/* Module hooks */
-module_init(dm_flakey_init);
-module_exit(dm_flakey_exit);
-
-MODULE_DESCRIPTION(DM_NAME " flakey target");
-MODULE_AUTHOR("Joe Thornber <dm-devel@redhat.com>");
-MODULE_LICENSE("GPL");
diff --git a/ANDROID_3.4.5/drivers/md/dm-io.c b/ANDROID_3.4.5/drivers/md/dm-io.c
deleted file mode 100644
index ea5dd289..00000000
--- a/ANDROID_3.4.5/drivers/md/dm-io.c
+++ /dev/null
@@ -1,523 +0,0 @@
-/*
- * Copyright (C) 2003 Sistina Software
- * Copyright (C) 2006 Red Hat GmbH
- *
- * This file is released under the GPL.
- */
-
-#include "dm.h"
-
-#include <linux/device-mapper.h>
-
-#include <linux/bio.h>
-#include <linux/mempool.h>
-#include <linux/module.h>
-#include <linux/sched.h>
-#include <linux/slab.h>
-#include <linux/dm-io.h>
-
-#define DM_MSG_PREFIX "io"
-
-#define DM_IO_MAX_REGIONS	BITS_PER_LONG
-#define MIN_IOS		16
-#define MIN_BIOS	16
-
-struct dm_io_client {
-	mempool_t *pool;
-	struct bio_set *bios;
-};
-
-/*
- * Aligning 'struct io' reduces the number of bits required to store
- * its address.  Refer to store_io_and_region_in_bio() below.
- */
-struct io {
-	unsigned long error_bits;
-	atomic_t count;
-	struct task_struct *sleeper;
-	struct dm_io_client *client;
-	io_notify_fn callback;
-	void *context;
-	void *vma_invalidate_address;
-	unsigned long vma_invalidate_size;
-} __attribute__((aligned(DM_IO_MAX_REGIONS)));
-
-static struct kmem_cache *_dm_io_cache;
-
-/*
- * Create a client with mempool and bioset.
- */
-struct dm_io_client *dm_io_client_create(void)
-{
-	struct dm_io_client *client;
-
-	client = kmalloc(sizeof(*client), GFP_KERNEL);
-	if (!client)
-		return ERR_PTR(-ENOMEM);
-
-	client->pool = mempool_create_slab_pool(MIN_IOS, _dm_io_cache);
-	if (!client->pool)
-		goto bad;
-
-	client->bios = bioset_create(MIN_BIOS, 0);
-	if (!client->bios)
-		goto bad;
-
-	return client;
-
-   bad:
-	if (client->pool)
-		mempool_destroy(client->pool);
-	kfree(client);
-	return ERR_PTR(-ENOMEM);
-}
-EXPORT_SYMBOL(dm_io_client_create);
-
-void dm_io_client_destroy(struct dm_io_client *client)
-{
-	mempool_destroy(client->pool);
-	bioset_free(client->bios);
-	kfree(client);
-}
-EXPORT_SYMBOL(dm_io_client_destroy);
-
-/*-----------------------------------------------------------------
- * We need to keep track of which region a bio is doing io for.
- * To avoid a memory allocation to store just 5 or 6 bits, we
- * ensure the 'struct io' pointer is aligned so enough low bits are
- * always zero and then combine it with the region number directly in
- * bi_private.
- *---------------------------------------------------------------*/
-static void store_io_and_region_in_bio(struct bio *bio, struct io *io,
-				       unsigned region)
-{
-	if (unlikely(!IS_ALIGNED((unsigned long)io, DM_IO_MAX_REGIONS))) {
-		DMCRIT("Unaligned struct io pointer %p", io);
-		BUG();
-	}
-
-	bio->bi_private = (void *)((unsigned long)io | region);
-}
-
-static void retrieve_io_and_region_from_bio(struct bio *bio, struct io **io,
-				       unsigned *region)
-{
-	unsigned long val = (unsigned long)bio->bi_private;
-
-	*io = (void *)(val & -(unsigned long)DM_IO_MAX_REGIONS);
-	*region = val & (DM_IO_MAX_REGIONS - 1);
-}
-
-/*-----------------------------------------------------------------
- * We need an io object to keep track of the number of bios that
- * have been dispatched for a particular io.
- *---------------------------------------------------------------*/
-static void dec_count(struct io *io, unsigned int region, int error)
-{
-	if (error)
-		set_bit(region, &io->error_bits);
-
-	if (atomic_dec_and_test(&io->count)) {
-		if (io->vma_invalidate_size)
-			invalidate_kernel_vmap_range(io->vma_invalidate_address,
-						     io->vma_invalidate_size);
-
-		if (io->sleeper)
-			wake_up_process(io->sleeper);
-
-		else {
-			unsigned long r = io->error_bits;
-			io_notify_fn fn = io->callback;
-			void *context = io->context;
-
-			mempool_free(io, io->client->pool);
-			fn(r, context);
-		}
-	}
-}
-
-static void endio(struct bio *bio, int error)
-{
-	struct io *io;
-	unsigned region;
-
-	if (error && bio_data_dir(bio) == READ)
-		zero_fill_bio(bio);
-
-	/*
-	 * The bio destructor in bio_put() may use the io object.
-	 */
-	retrieve_io_and_region_from_bio(bio, &io, &region);
-
-	bio_put(bio);
-
-	dec_count(io, region, error);
-}
-
-/*-----------------------------------------------------------------
- * These little objects provide an abstraction for getting a new
- * destination page for io.
- *---------------------------------------------------------------*/
-struct dpages {
-	void (*get_page)(struct dpages *dp,
-			 struct page **p, unsigned long *len, unsigned *offset);
-	void (*next_page)(struct dpages *dp);
-
-	unsigned context_u;
-	void *context_ptr;
-
-	void *vma_invalidate_address;
-	unsigned long vma_invalidate_size;
-};
-
-/*
- * Functions for getting the pages from a list.
- */
-static void list_get_page(struct dpages *dp,
-		  struct page **p, unsigned long *len, unsigned *offset)
-{
-	unsigned o = dp->context_u;
-	struct page_list *pl = (struct page_list *) dp->context_ptr;
-
-	*p = pl->page;
-	*len = PAGE_SIZE - o;
-	*offset = o;
-}
-
-static void list_next_page(struct dpages *dp)
-{
-	struct page_list *pl = (struct page_list *) dp->context_ptr;
-	dp->context_ptr = pl->next;
-	dp->context_u = 0;
-}
-
-static void list_dp_init(struct dpages *dp, struct page_list *pl, unsigned offset)
-{
-	dp->get_page = list_get_page;
-	dp->next_page = list_next_page;
-	dp->context_u = offset;
-	dp->context_ptr = pl;
-}
-
-/*
- * Functions for getting the pages from a bvec.
- */
-static void bvec_get_page(struct dpages *dp,
-		  struct page **p, unsigned long *len, unsigned *offset)
-{
-	struct bio_vec *bvec = (struct bio_vec *) dp->context_ptr;
-	*p = bvec->bv_page;
-	*len = bvec->bv_len;
-	*offset = bvec->bv_offset;
-}
-
-static void bvec_next_page(struct dpages *dp)
-{
-	struct bio_vec *bvec = (struct bio_vec *) dp->context_ptr;
-	dp->context_ptr = bvec + 1;
-}
-
-static void bvec_dp_init(struct dpages *dp, struct bio_vec *bvec)
-{
-	dp->get_page = bvec_get_page;
-	dp->next_page = bvec_next_page;
-	dp->context_ptr = bvec;
-}
-
-/*
- * Functions for getting the pages from a VMA.
- */
-static void vm_get_page(struct dpages *dp,
-		 struct page **p, unsigned long *len, unsigned *offset)
-{
-	*p = vmalloc_to_page(dp->context_ptr);
-	*offset = dp->context_u;
-	*len = PAGE_SIZE - dp->context_u;
-}
-
-static void vm_next_page(struct dpages *dp)
-{
-	dp->context_ptr += PAGE_SIZE - dp->context_u;
-	dp->context_u = 0;
-}
-
-static void vm_dp_init(struct dpages *dp, void *data)
-{
-	dp->get_page = vm_get_page;
-	dp->next_page = vm_next_page;
-	dp->context_u = ((unsigned long) data) & (PAGE_SIZE - 1);
-	dp->context_ptr = data;
-}
-
-static void dm_bio_destructor(struct bio *bio)
-{
-	unsigned region;
-	struct io *io;
-
-	retrieve_io_and_region_from_bio(bio, &io, &region);
-
-	bio_free(bio, io->client->bios);
-}
-
-/*
- * Functions for getting the pages from kernel memory.
- */
-static void km_get_page(struct dpages *dp, struct page **p, unsigned long *len,
-			unsigned *offset)
-{
-	*p = virt_to_page(dp->context_ptr);
-	*offset = dp->context_u;
-	*len = PAGE_SIZE - dp->context_u;
-}
-
-static void km_next_page(struct dpages *dp)
-{
-	dp->context_ptr += PAGE_SIZE - dp->context_u;
-	dp->context_u = 0;
-}
-
-static void km_dp_init(struct dpages *dp, void *data)
-{
-	dp->get_page = km_get_page;
-	dp->next_page = km_next_page;
-	dp->context_u = ((unsigned long) data) & (PAGE_SIZE - 1);
-	dp->context_ptr = data;
-}
-
-/*-----------------------------------------------------------------
- * IO routines that accept a list of pages.
- *---------------------------------------------------------------*/
-static void do_region(int rw, unsigned region, struct dm_io_region *where,
-		      struct dpages *dp, struct io *io)
-{
-	struct bio *bio;
-	struct page *page;
-	unsigned long len;
-	unsigned offset;
-	unsigned num_bvecs;
-	sector_t remaining = where->count;
-	struct request_queue *q = bdev_get_queue(where->bdev);
-	sector_t discard_sectors;
-
-	/*
-	 * where->count may be zero if rw holds a flush and we need to
-	 * send a zero-sized flush.
-	 */
-	do {
-		/*
-		 * Allocate a suitably sized-bio.
-		 */
-		if (rw & REQ_DISCARD)
-			num_bvecs = 1;
-		else
-			num_bvecs = min_t(int, bio_get_nr_vecs(where->bdev),
-					  dm_sector_div_up(remaining, (PAGE_SIZE >> SECTOR_SHIFT)));
-
-		bio = bio_alloc_bioset(GFP_NOIO, num_bvecs, io->client->bios);
-		bio->bi_sector = where->sector + (where->count - remaining);
-		bio->bi_bdev = where->bdev;
-		bio->bi_end_io = endio;
-		bio->bi_destructor = dm_bio_destructor;
-		store_io_and_region_in_bio(bio, io, region);
-
-		if (rw & REQ_DISCARD) {
-			discard_sectors = min_t(sector_t, q->limits.max_discard_sectors, remaining);
-			bio->bi_size = discard_sectors << SECTOR_SHIFT;
-			remaining -= discard_sectors;
-		} else while (remaining) {
-			/*
-			 * Try and add as many pages as possible.
-			 */
-			dp->get_page(dp, &page, &len, &offset);
-			len = min(len, to_bytes(remaining));
-			if (!bio_add_page(bio, page, len, offset))
-				break;
-
-			offset = 0;
-			remaining -= to_sector(len);
-			dp->next_page(dp);
-		}
-
-		atomic_inc(&io->count);
-		submit_bio(rw, bio);
-	} while (remaining);
-}
-
-static void dispatch_io(int rw, unsigned int num_regions,
-			struct dm_io_region *where, struct dpages *dp,
-			struct io *io, int sync)
-{
-	int i;
-	struct dpages old_pages = *dp;
-
-	BUG_ON(num_regions > DM_IO_MAX_REGIONS);
-
-	if (sync)
-		rw |= REQ_SYNC;
-
-	/*
-	 * For multiple regions we need to be careful to rewind
-	 * the dp object for each call to do_region.
-	 */
-	for (i = 0; i < num_regions; i++) {
-		*dp = old_pages;
-		if (where[i].count || (rw & REQ_FLUSH))
-			do_region(rw, i, where + i, dp, io);
-	}
-
-	/*
-	 * Drop the extra reference that we were holding to avoid
-	 * the io being completed too early.
-	 */
-	dec_count(io, 0, 0);
-}
-
-static int sync_io(struct dm_io_client *client, unsigned int num_regions,
-		   struct dm_io_region *where, int rw, struct dpages *dp,
-		   unsigned long *error_bits)
-{
-	/*
-	 * gcc <= 4.3 can't do the alignment for stack variables, so we must
-	 * align it on our own.
-	 * volatile prevents the optimizer from removing or reusing
-	 * "io_" field from the stack frame (allowed in ANSI C).
-	 */
-	volatile char io_[sizeof(struct io) + __alignof__(struct io) - 1];
-	struct io *io = (struct io *)PTR_ALIGN(&io_, __alignof__(struct io));
-
-	if (num_regions > 1 && (rw & RW_MASK) != WRITE) {
-		WARN_ON(1);
-		return -EIO;
-	}
-
-	io->error_bits = 0;
-	atomic_set(&io->count, 1); /* see dispatch_io() */
-	io->sleeper = current;
-	io->client = client;
-
-	io->vma_invalidate_address = dp->vma_invalidate_address;
-	io->vma_invalidate_size = dp->vma_invalidate_size;
-
-	dispatch_io(rw, num_regions, where, dp, io, 1);
-
-	while (1) {
-		set_current_state(TASK_UNINTERRUPTIBLE);
-
-		if (!atomic_read(&io->count))
-			break;
-
-		io_schedule();
-	}
-	set_current_state(TASK_RUNNING);
-
-	if (error_bits)
-		*error_bits = io->error_bits;
-
-	return io->error_bits ? -EIO : 0;
-}
-
-static int async_io(struct dm_io_client *client, unsigned int num_regions,
-		    struct dm_io_region *where, int rw, struct dpages *dp,
-		    io_notify_fn fn, void *context)
-{
-	struct io *io;
-
-	if (num_regions > 1 && (rw & RW_MASK) != WRITE) {
-		WARN_ON(1);
-		fn(1, context);
-		return -EIO;
-	}
-
-	io = mempool_alloc(client->pool, GFP_NOIO);
-	io->error_bits = 0;
-	atomic_set(&io->count, 1); /* see dispatch_io() */
-	io->sleeper = NULL;
-	io->client = client;
-	io->callback = fn;
-	io->context = context;
-
-	io->vma_invalidate_address = dp->vma_invalidate_address;
-	io->vma_invalidate_size = dp->vma_invalidate_size;
-
-	dispatch_io(rw, num_regions, where, dp, io, 0);
-	return 0;
-}
-
-static int dp_init(struct dm_io_request *io_req, struct dpages *dp,
-		   unsigned long size)
-{
-	/* Set up dpages based on memory type */
-
-	dp->vma_invalidate_address = NULL;
-	dp->vma_invalidate_size = 0;
-
-	switch (io_req->mem.type) {
-	case DM_IO_PAGE_LIST:
-		list_dp_init(dp, io_req->mem.ptr.pl, io_req->mem.offset);
-		break;
-
-	case DM_IO_BVEC:
-		bvec_dp_init(dp, io_req->mem.ptr.bvec);
-		break;
-
-	case DM_IO_VMA:
-		flush_kernel_vmap_range(io_req->mem.ptr.vma, size);
-		if ((io_req->bi_rw & RW_MASK) == READ) {
-			dp->vma_invalidate_address = io_req->mem.ptr.vma;
-			dp->vma_invalidate_size = size;
-		}
-		vm_dp_init(dp, io_req->mem.ptr.vma);
-		break;
-
-	case DM_IO_KMEM:
-		km_dp_init(dp, io_req->mem.ptr.addr);
-		break;
-
-	default:
-		return -EINVAL;
-	}
-
-	return 0;
-}
-
-/*
- * New collapsed (a)synchronous interface.
- *
- * If the IO is asynchronous (i.e. it has notify.fn), you must either unplug
- * the queue with blk_unplug() some time later or set REQ_SYNC in
-io_req->bi_rw. If you fail to do one of these, the IO will be submitted to
- * the disk after q->unplug_delay, which defaults to 3ms in blk-settings.c.
- */
-int dm_io(struct dm_io_request *io_req, unsigned num_regions,
-	  struct dm_io_region *where, unsigned long *sync_error_bits)
-{
-	int r;
-	struct dpages dp;
-
-	r = dp_init(io_req, &dp, (unsigned long)where->count << SECTOR_SHIFT);
-	if (r)
-		return r;
-
-	if (!io_req->notify.fn)
-		return sync_io(io_req->client, num_regions, where,
-			       io_req->bi_rw, &dp, sync_error_bits);
-
-	return async_io(io_req->client, num_regions, where, io_req->bi_rw,
-			&dp, io_req->notify.fn, io_req->notify.context);
-}
-EXPORT_SYMBOL(dm_io);
-
-int __init dm_io_init(void)
-{
-	_dm_io_cache = KMEM_CACHE(io, 0);
-	if (!_dm_io_cache)
-		return -ENOMEM;
-
-	return 0;
-}
-
-void dm_io_exit(void)
-{
-	kmem_cache_destroy(_dm_io_cache);
-	_dm_io_cache = NULL;
-}
diff --git a/ANDROID_3.4.5/drivers/md/dm-ioctl.c b/ANDROID_3.4.5/drivers/md/dm-ioctl.c
deleted file mode 100644
index a1a3e6df..00000000
--- a/ANDROID_3.4.5/drivers/md/dm-ioctl.c
+++ /dev/null
@@ -1,1782 +0,0 @@
-/*
- * Copyright (C) 2001, 2002 Sistina Software (UK) Limited.
- * Copyright (C) 2004 - 2006 Red Hat, Inc. All rights reserved.
- *
- * This file is released under the GPL.
- */
-
-#include "dm.h"
-
-#include <linux/module.h>
-#include <linux/vmalloc.h>
-#include <linux/miscdevice.h>
-#include <linux/init.h>
-#include <linux/wait.h>
-#include <linux/slab.h>
-#include <linux/dm-ioctl.h>
-#include <linux/hdreg.h>
-#include <linux/compat.h>
-
-#include <asm/uaccess.h>
-
-#define DM_MSG_PREFIX "ioctl"
-#define DM_DRIVER_EMAIL "dm-devel@redhat.com"
-
-/*-----------------------------------------------------------------
- * The ioctl interface needs to be able to look up devices by
- * name or uuid.
- *---------------------------------------------------------------*/
-struct hash_cell {
-	struct list_head name_list;
-	struct list_head uuid_list;
-
-	char *name;
-	char *uuid;
-	struct mapped_device *md;
-	struct dm_table *new_map;
-};
-
-struct vers_iter {
-    size_t param_size;
-    struct dm_target_versions *vers, *old_vers;
-    char *end;
-    uint32_t flags;
-};
-
-
-#define NUM_BUCKETS 64
-#define MASK_BUCKETS (NUM_BUCKETS - 1)
-static struct list_head _name_buckets[NUM_BUCKETS];
-static struct list_head _uuid_buckets[NUM_BUCKETS];
-
-static void dm_hash_remove_all(int keep_open_devices);
-
-/*
- * Guards access to both hash tables.
- */
-static DECLARE_RWSEM(_hash_lock);
-
-/*
- * Protects use of mdptr to obtain hash cell name and uuid from mapped device.
- */
-static DEFINE_MUTEX(dm_hash_cells_mutex);
-
-static void init_buckets(struct list_head *buckets)
-{
-	unsigned int i;
-
-	for (i = 0; i < NUM_BUCKETS; i++)
-		INIT_LIST_HEAD(buckets + i);
-}
-
-static int dm_hash_init(void)
-{
-	init_buckets(_name_buckets);
-	init_buckets(_uuid_buckets);
-	return 0;
-}
-
-static void dm_hash_exit(void)
-{
-	dm_hash_remove_all(0);
-}
-
-/*-----------------------------------------------------------------
- * Hash function:
- * We're not really concerned with the str hash function being
- * fast since it's only used by the ioctl interface.
- *---------------------------------------------------------------*/
-static unsigned int hash_str(const char *str)
-{
-	const unsigned int hash_mult = 2654435387U;
-	unsigned int h = 0;
-
-	while (*str)
-		h = (h + (unsigned int) *str++) * hash_mult;
-
-	return h & MASK_BUCKETS;
-}
-
-/*-----------------------------------------------------------------
- * Code for looking up a device by name
- *---------------------------------------------------------------*/
-static struct hash_cell *__get_name_cell(const char *str)
-{
-	struct hash_cell *hc;
-	unsigned int h = hash_str(str);
-
-	list_for_each_entry (hc, _name_buckets + h, name_list)
-		if (!strcmp(hc->name, str)) {
-			dm_get(hc->md);
-			return hc;
-		}
-
-	return NULL;
-}
-
-static struct hash_cell *__get_uuid_cell(const char *str)
-{
-	struct hash_cell *hc;
-	unsigned int h = hash_str(str);
-
-	list_for_each_entry (hc, _uuid_buckets + h, uuid_list)
-		if (!strcmp(hc->uuid, str)) {
-			dm_get(hc->md);
-			return hc;
-		}
-
-	return NULL;
-}
-
-static struct hash_cell *__get_dev_cell(uint64_t dev)
-{
-	struct mapped_device *md;
-	struct hash_cell *hc;
-
-	md = dm_get_md(huge_decode_dev(dev));
-	if (!md)
-		return NULL;
-
-	hc = dm_get_mdptr(md);
-	if (!hc) {
-		dm_put(md);
-		return NULL;
-	}
-
-	return hc;
-}
-
-/*-----------------------------------------------------------------
- * Inserting, removing and renaming a device.
- *---------------------------------------------------------------*/
-static struct hash_cell *alloc_cell(const char *name, const char *uuid,
-				    struct mapped_device *md)
-{
-	struct hash_cell *hc;
-
-	hc = kmalloc(sizeof(*hc), GFP_KERNEL);
-	if (!hc)
-		return NULL;
-
-	hc->name = kstrdup(name, GFP_KERNEL);
-	if (!hc->name) {
-		kfree(hc);
-		return NULL;
-	}
-
-	if (!uuid)
-		hc->uuid = NULL;
-
-	else {
-		hc->uuid = kstrdup(uuid, GFP_KERNEL);
-		if (!hc->uuid) {
-			kfree(hc->name);
-			kfree(hc);
-			return NULL;
-		}
-	}
-
-	INIT_LIST_HEAD(&hc->name_list);
-	INIT_LIST_HEAD(&hc->uuid_list);
-	hc->md = md;
-	hc->new_map = NULL;
-	return hc;
-}
-
-static void free_cell(struct hash_cell *hc)
-{
-	if (hc) {
-		kfree(hc->name);
-		kfree(hc->uuid);
-		kfree(hc);
-	}
-}
-
-/*
- * The kdev_t and uuid of a device can never change once it is
- * initially inserted.
- */
-static int dm_hash_insert(const char *name, const char *uuid, struct mapped_device *md)
-{
-	struct hash_cell *cell, *hc;
-
-	/*
-	 * Allocate the new cells.
-	 */
-	cell = alloc_cell(name, uuid, md);
-	if (!cell)
-		return -ENOMEM;
-
-	/*
-	 * Insert the cell into both hash tables.
-	 */
-	down_write(&_hash_lock);
-	hc = __get_name_cell(name);
-	if (hc) {
-		dm_put(hc->md);
-		goto bad;
-	}
-
-	list_add(&cell->name_list, _name_buckets + hash_str(name));
-
-	if (uuid) {
-		hc = __get_uuid_cell(uuid);
-		if (hc) {
-			list_del(&cell->name_list);
-			dm_put(hc->md);
-			goto bad;
-		}
-		list_add(&cell->uuid_list, _uuid_buckets + hash_str(uuid));
-	}
-	dm_get(md);
-	mutex_lock(&dm_hash_cells_mutex);
-	dm_set_mdptr(md, cell);
-	mutex_unlock(&dm_hash_cells_mutex);
-	up_write(&_hash_lock);
-
-	return 0;
-
- bad:
-	up_write(&_hash_lock);
-	free_cell(cell);
-	return -EBUSY;
-}
-
-static void __hash_remove(struct hash_cell *hc)
-{
-	struct dm_table *table;
-
-	/* remove from the dev hash */
-	list_del(&hc->uuid_list);
-	list_del(&hc->name_list);
-	mutex_lock(&dm_hash_cells_mutex);
-	dm_set_mdptr(hc->md, NULL);
-	mutex_unlock(&dm_hash_cells_mutex);
-
-	table = dm_get_live_table(hc->md);
-	if (table) {
-		dm_table_event(table);
-		dm_table_put(table);
-	}
-
-	if (hc->new_map)
-		dm_table_destroy(hc->new_map);
-	dm_put(hc->md);
-	free_cell(hc);
-}
-
-static void dm_hash_remove_all(int keep_open_devices)
-{
-	int i, dev_skipped;
-	struct hash_cell *hc;
-	struct mapped_device *md;
-
-retry:
-	dev_skipped = 0;
-
-	down_write(&_hash_lock);
-
-	for (i = 0; i < NUM_BUCKETS; i++) {
-		list_for_each_entry(hc, _name_buckets + i, name_list) {
-			md = hc->md;
-			dm_get(md);
-
-			if (keep_open_devices && dm_lock_for_deletion(md)) {
-				dm_put(md);
-				dev_skipped++;
-				continue;
-			}
-
-			__hash_remove(hc);
-
-			up_write(&_hash_lock);
-
-			dm_put(md);
-			if (likely(keep_open_devices))
-				dm_destroy(md);
-			else
-				dm_destroy_immediate(md);
-
-			/*
-			 * Some mapped devices may be using other mapped
-			 * devices, so repeat until we make no further
-			 * progress.  If a new mapped device is created
-			 * here it will also get removed.
-			 */
-			goto retry;
-		}
-	}
-
-	up_write(&_hash_lock);
-
-	if (dev_skipped)
-		DMWARN("remove_all left %d open device(s)", dev_skipped);
-}
-
-/*
- * Set the uuid of a hash_cell that isn't already set.
- */
-static void __set_cell_uuid(struct hash_cell *hc, char *new_uuid)
-{
-	mutex_lock(&dm_hash_cells_mutex);
-	hc->uuid = new_uuid;
-	mutex_unlock(&dm_hash_cells_mutex);
-
-	list_add(&hc->uuid_list, _uuid_buckets + hash_str(new_uuid));
-}
-
-/*
- * Changes the name of a hash_cell and returns the old name for
- * the caller to free.
- */
-static char *__change_cell_name(struct hash_cell *hc, char *new_name)
-{
-	char *old_name;
-
-	/*
-	 * Rename and move the name cell.
-	 */
-	list_del(&hc->name_list);
-	old_name = hc->name;
-
-	mutex_lock(&dm_hash_cells_mutex);
-	hc->name = new_name;
-	mutex_unlock(&dm_hash_cells_mutex);
-
-	list_add(&hc->name_list, _name_buckets + hash_str(new_name));
-
-	return old_name;
-}
-
-static struct mapped_device *dm_hash_rename(struct dm_ioctl *param,
-					    const char *new)
-{
-	char *new_data, *old_name = NULL;
-	struct hash_cell *hc;
-	struct dm_table *table;
-	struct mapped_device *md;
-	unsigned change_uuid = (param->flags & DM_UUID_FLAG) ? 1 : 0;
-
-	/*
-	 * duplicate new.
-	 */
-	new_data = kstrdup(new, GFP_KERNEL);
-	if (!new_data)
-		return ERR_PTR(-ENOMEM);
-
-	down_write(&_hash_lock);
-
-	/*
-	 * Is new free ?
-	 */
-	if (change_uuid)
-		hc = __get_uuid_cell(new);
-	else
-		hc = __get_name_cell(new);
-
-	if (hc) {
-		DMWARN("Unable to change %s on mapped device %s to one that "
-		       "already exists: %s",
-		       change_uuid ? "uuid" : "name",
-		       param->name, new);
-		dm_put(hc->md);
-		up_write(&_hash_lock);
-		kfree(new_data);
-		return ERR_PTR(-EBUSY);
-	}
-
-	/*
-	 * Is there such a device as 'old' ?
-	 */
-	hc = __get_name_cell(param->name);
-	if (!hc) {
-		DMWARN("Unable to rename non-existent device, %s to %s%s",
-		       param->name, change_uuid ? "uuid " : "", new);
-		up_write(&_hash_lock);
-		kfree(new_data);
-		return ERR_PTR(-ENXIO);
-	}
-
-	/*
-	 * Does this device already have a uuid?
-	 */
-	if (change_uuid && hc->uuid) {
-		DMWARN("Unable to change uuid of mapped device %s to %s "
-		       "because uuid is already set to %s",
-		       param->name, new, hc->uuid);
-		dm_put(hc->md);
-		up_write(&_hash_lock);
-		kfree(new_data);
-		return ERR_PTR(-EINVAL);
-	}
-
-	if (change_uuid)
-		__set_cell_uuid(hc, new_data);
-	else
-		old_name = __change_cell_name(hc, new_data);
-
-	/*
-	 * Wake up any dm event waiters.
-	 */
-	table = dm_get_live_table(hc->md);
-	if (table) {
-		dm_table_event(table);
-		dm_table_put(table);
-	}
-
-	if (!dm_kobject_uevent(hc->md, KOBJ_CHANGE, param->event_nr))
-		param->flags |= DM_UEVENT_GENERATED_FLAG;
-
-	md = hc->md;
-	up_write(&_hash_lock);
-	kfree(old_name);
-
-	return md;
-}
-
-/*-----------------------------------------------------------------
- * Implementation of the ioctl commands
- *---------------------------------------------------------------*/
-/*
- * All the ioctl commands get dispatched to functions with this
- * prototype.
- */
-typedef int (*ioctl_fn)(struct dm_ioctl *param, size_t param_size);
-
-static int remove_all(struct dm_ioctl *param, size_t param_size)
-{
-	dm_hash_remove_all(1);
-	param->data_size = 0;
-	return 0;
-}
-
-/*
- * Round up the ptr to an 8-byte boundary.
- */
-#define ALIGN_MASK 7
-static inline void *align_ptr(void *ptr)
-{
-	return (void *) (((size_t) (ptr + ALIGN_MASK)) & ~ALIGN_MASK);
-}
-
-/*
- * Retrieves the data payload buffer from an already allocated
- * struct dm_ioctl.
- */
-static void *get_result_buffer(struct dm_ioctl *param, size_t param_size,
-			       size_t *len)
-{
-	param->data_start = align_ptr(param + 1) - (void *) param;
-
-	if (param->data_start < param_size)
-		*len = param_size - param->data_start;
-	else
-		*len = 0;
-
-	return ((void *) param) + param->data_start;
-}
-
-static int list_devices(struct dm_ioctl *param, size_t param_size)
-{
-	unsigned int i;
-	struct hash_cell *hc;
-	size_t len, needed = 0;
-	struct gendisk *disk;
-	struct dm_name_list *nl, *old_nl = NULL;
-
-	down_write(&_hash_lock);
-
-	/*
-	 * Loop through all the devices working out how much
-	 * space we need.
-	 */
-	for (i = 0; i < NUM_BUCKETS; i++) {
-		list_for_each_entry (hc, _name_buckets + i, name_list) {
-			needed += sizeof(struct dm_name_list);
-			needed += strlen(hc->name) + 1;
-			needed += ALIGN_MASK;
-		}
-	}
-
-	/*
-	 * Grab our output buffer.
-	 */
-	nl = get_result_buffer(param, param_size, &len);
-	if (len < needed) {
-		param->flags |= DM_BUFFER_FULL_FLAG;
-		goto out;
-	}
-	param->data_size = param->data_start + needed;
-
-	nl->dev = 0;	/* Flags no data */
-
-	/*
-	 * Now loop through filling out the names.
-	 */
-	for (i = 0; i < NUM_BUCKETS; i++) {
-		list_for_each_entry (hc, _name_buckets + i, name_list) {
-			if (old_nl)
-				old_nl->next = (uint32_t) ((void *) nl -
-							   (void *) old_nl);
-			disk = dm_disk(hc->md);
-			nl->dev = huge_encode_dev(disk_devt(disk));
-			nl->next = 0;
-			strcpy(nl->name, hc->name);
-
-			old_nl = nl;
-			nl = align_ptr(((void *) ++nl) + strlen(hc->name) + 1);
-		}
-	}
-
- out:
-	up_write(&_hash_lock);
-	return 0;
-}
-
-static void list_version_get_needed(struct target_type *tt, void *needed_param)
-{
-    size_t *needed = needed_param;
-
-    *needed += sizeof(struct dm_target_versions);
-    *needed += strlen(tt->name);
-    *needed += ALIGN_MASK;
-}
-
-static void list_version_get_info(struct target_type *tt, void *param)
-{
-    struct vers_iter *info = param;
-
-    /* Check space - it might have changed since the first iteration */
-    if ((char *)info->vers + sizeof(tt->version) + strlen(tt->name) + 1 >
-	info->end) {
-
-	info->flags = DM_BUFFER_FULL_FLAG;
-	return;
-    }
-
-    if (info->old_vers)
-	info->old_vers->next = (uint32_t) ((void *)info->vers -
-					   (void *)info->old_vers);
-    info->vers->version[0] = tt->version[0];
-    info->vers->version[1] = tt->version[1];
-    info->vers->version[2] = tt->version[2];
-    info->vers->next = 0;
-    strcpy(info->vers->name, tt->name);
-
-    info->old_vers = info->vers;
-    info->vers = align_ptr(((void *) ++info->vers) + strlen(tt->name) + 1);
-}
-
-static int list_versions(struct dm_ioctl *param, size_t param_size)
-{
-	size_t len, needed = 0;
-	struct dm_target_versions *vers;
-	struct vers_iter iter_info;
-
-	/*
-	 * Loop through all the devices working out how much
-	 * space we need.
-	 */
-	dm_target_iterate(list_version_get_needed, &needed);
-
-	/*
-	 * Grab our output buffer.
-	 */
-	vers = get_result_buffer(param, param_size, &len);
-	if (len < needed) {
-		param->flags |= DM_BUFFER_FULL_FLAG;
-		goto out;
-	}
-	param->data_size = param->data_start + needed;
-
-	iter_info.param_size = param_size;
-	iter_info.old_vers = NULL;
-	iter_info.vers = vers;
-	iter_info.flags = 0;
-	iter_info.end = (char *)vers+len;
-
-	/*
-	 * Now loop through filling out the names & versions.
-	 */
-	dm_target_iterate(list_version_get_info, &iter_info);
-	param->flags |= iter_info.flags;
-
- out:
-	return 0;
-}
-
-static int check_name(const char *name)
-{
-	if (strchr(name, '/')) {
-		DMWARN("invalid device name");
-		return -EINVAL;
-	}
-
-	return 0;
-}
-
-/*
- * On successful return, the caller must not attempt to acquire
- * _hash_lock without first calling dm_table_put, because dm_table_destroy
- * waits for this dm_table_put and could be called under this lock.
- */
-static struct dm_table *dm_get_inactive_table(struct mapped_device *md)
-{
-	struct hash_cell *hc;
-	struct dm_table *table = NULL;
-
-	down_read(&_hash_lock);
-	hc = dm_get_mdptr(md);
-	if (!hc || hc->md != md) {
-		DMWARN("device has been removed from the dev hash table.");
-		goto out;
-	}
-
-	table = hc->new_map;
-	if (table)
-		dm_table_get(table);
-
-out:
-	up_read(&_hash_lock);
-
-	return table;
-}
-
-static struct dm_table *dm_get_live_or_inactive_table(struct mapped_device *md,
-						      struct dm_ioctl *param)
-{
-	return (param->flags & DM_QUERY_INACTIVE_TABLE_FLAG) ?
-		dm_get_inactive_table(md) : dm_get_live_table(md);
-}
-
-/*
- * Fills in a dm_ioctl structure, ready for sending back to
- * userland.
- */
-static void __dev_status(struct mapped_device *md, struct dm_ioctl *param)
-{
-	struct gendisk *disk = dm_disk(md);
-	struct dm_table *table;
-
-	param->flags &= ~(DM_SUSPEND_FLAG | DM_READONLY_FLAG |
-			  DM_ACTIVE_PRESENT_FLAG);
-
-	if (dm_suspended_md(md))
-		param->flags |= DM_SUSPEND_FLAG;
-
-	param->dev = huge_encode_dev(disk_devt(disk));
-
-	/*
-	 * Yes, this will be out of date by the time it gets back
-	 * to userland, but it is still very useful for
-	 * debugging.
-	 */
-	param->open_count = dm_open_count(md);
-
-	param->event_nr = dm_get_event_nr(md);
-	param->target_count = 0;
-
-	table = dm_get_live_table(md);
-	if (table) {
-		if (!(param->flags & DM_QUERY_INACTIVE_TABLE_FLAG)) {
-			if (get_disk_ro(disk))
-				param->flags |= DM_READONLY_FLAG;
-			param->target_count = dm_table_get_num_targets(table);
-		}
-		dm_table_put(table);
-
-		param->flags |= DM_ACTIVE_PRESENT_FLAG;
-	}
-
-	if (param->flags & DM_QUERY_INACTIVE_TABLE_FLAG) {
-		table = dm_get_inactive_table(md);
-		if (table) {
-			if (!(dm_table_get_mode(table) & FMODE_WRITE))
-				param->flags |= DM_READONLY_FLAG;
-			param->target_count = dm_table_get_num_targets(table);
-			dm_table_put(table);
-		}
-	}
-}
-
-static int dev_create(struct dm_ioctl *param, size_t param_size)
-{
-	int r, m = DM_ANY_MINOR;
-	struct mapped_device *md;
-
-	r = check_name(param->name);
-	if (r)
-		return r;
-
-	if (param->flags & DM_PERSISTENT_DEV_FLAG)
-		m = MINOR(huge_decode_dev(param->dev));
-
-	r = dm_create(m, &md);
-	if (r)
-		return r;
-
-	r = dm_hash_insert(param->name, *param->uuid ? param->uuid : NULL, md);
-	if (r) {
-		dm_put(md);
-		dm_destroy(md);
-		return r;
-	}
-
-	param->flags &= ~DM_INACTIVE_PRESENT_FLAG;
-
-	__dev_status(md, param);
-
-	dm_put(md);
-
-	return 0;
-}
-
-/*
- * Always use UUID for lookups if it's present, otherwise use name or dev.
- */
-static struct hash_cell *__find_device_hash_cell(struct dm_ioctl *param)
-{
-	struct hash_cell *hc = NULL;
-
-	if (*param->uuid) {
-		if (*param->name || param->dev)
-			return NULL;
-
-		hc = __get_uuid_cell(param->uuid);
-		if (!hc)
-			return NULL;
-	} else if (*param->name) {
-		if (param->dev)
-			return NULL;
-
-		hc = __get_name_cell(param->name);
-		if (!hc)
-			return NULL;
-	} else if (param->dev) {
-		hc = __get_dev_cell(param->dev);
-		if (!hc)
-			return NULL;
-	} else
-		return NULL;
-
-	/*
-	 * Sneakily write in both the name and the uuid
-	 * while we have the cell.
-	 */
-	strlcpy(param->name, hc->name, sizeof(param->name));
-	if (hc->uuid)
-		strlcpy(param->uuid, hc->uuid, sizeof(param->uuid));
-	else
-		param->uuid[0] = '\0';
-
-	if (hc->new_map)
-		param->flags |= DM_INACTIVE_PRESENT_FLAG;
-	else
-		param->flags &= ~DM_INACTIVE_PRESENT_FLAG;
-
-	return hc;
-}
-
-static struct mapped_device *find_device(struct dm_ioctl *param)
-{
-	struct hash_cell *hc;
-	struct mapped_device *md = NULL;
-
-	down_read(&_hash_lock);
-	hc = __find_device_hash_cell(param);
-	if (hc)
-		md = hc->md;
-	up_read(&_hash_lock);
-
-	return md;
-}
-
-static int dev_remove(struct dm_ioctl *param, size_t param_size)
-{
-	struct hash_cell *hc;
-	struct mapped_device *md;
-	int r;
-
-	down_write(&_hash_lock);
-	hc = __find_device_hash_cell(param);
-
-	if (!hc) {
-		DMDEBUG_LIMIT("device doesn't appear to be in the dev hash table.");
-		up_write(&_hash_lock);
-		return -ENXIO;
-	}
-
-	md = hc->md;
-
-	/*
-	 * Ensure the device is not open and nothing further can open it.
-	 */
-	r = dm_lock_for_deletion(md);
-	if (r) {
-		DMDEBUG_LIMIT("unable to remove open device %s", hc->name);
-		up_write(&_hash_lock);
-		dm_put(md);
-		return r;
-	}
-
-	__hash_remove(hc);
-	up_write(&_hash_lock);
-
-	if (!dm_kobject_uevent(md, KOBJ_REMOVE, param->event_nr))
-		param->flags |= DM_UEVENT_GENERATED_FLAG;
-
-	dm_put(md);
-	dm_destroy(md);
-	return 0;
-}
-
-/*
- * Check a string doesn't overrun the chunk of
- * memory we copied from userland.
- */
-static int invalid_str(char *str, void *end)
-{
-	while ((void *) str < end)
-		if (!*str++)
-			return 0;
-
-	return -EINVAL;
-}
-
-static int dev_rename(struct dm_ioctl *param, size_t param_size)
-{
-	int r;
-	char *new_data = (char *) param + param->data_start;
-	struct mapped_device *md;
-	unsigned change_uuid = (param->flags & DM_UUID_FLAG) ? 1 : 0;
-
-	if (new_data < param->data ||
-	    invalid_str(new_data, (void *) param + param_size) ||
-	    strlen(new_data) > (change_uuid ? DM_UUID_LEN - 1 : DM_NAME_LEN - 1)) {
-		DMWARN("Invalid new mapped device name or uuid string supplied.");
-		return -EINVAL;
-	}
-
-	if (!change_uuid) {
-		r = check_name(new_data);
-		if (r)
-			return r;
-	}
-
-	md = dm_hash_rename(param, new_data);
-	if (IS_ERR(md))
-		return PTR_ERR(md);
-
-	__dev_status(md, param);
-	dm_put(md);
-
-	return 0;
-}
-
-static int dev_set_geometry(struct dm_ioctl *param, size_t param_size)
-{
-	int r = -EINVAL, x;
-	struct mapped_device *md;
-	struct hd_geometry geometry;
-	unsigned long indata[4];
-	char *geostr = (char *) param + param->data_start;
-	char dummy;
-
-	md = find_device(param);
-	if (!md)
-		return -ENXIO;
-
-	if (geostr < param->data ||
-	    invalid_str(geostr, (void *) param + param_size)) {
-		DMWARN("Invalid geometry supplied.");
-		goto out;
-	}
-
-	x = sscanf(geostr, "%lu %lu %lu %lu%c", indata,
-		   indata + 1, indata + 2, indata + 3, &dummy);
-
-	if (x != 4) {
-		DMWARN("Unable to interpret geometry settings.");
-		goto out;
-	}
-
-	if (indata[0] > 65535 || indata[1] > 255 ||
-	    indata[2] > 255 || indata[3] > ULONG_MAX) {
-		DMWARN("Geometry exceeds range limits.");
-		goto out;
-	}
-
-	geometry.cylinders = indata[0];
-	geometry.heads = indata[1];
-	geometry.sectors = indata[2];
-	geometry.start = indata[3];
-
-	r = dm_set_geometry(md, &geometry);
-
-	param->data_size = 0;
-
-out:
-	dm_put(md);
-	return r;
-}
-
-static int do_suspend(struct dm_ioctl *param)
-{
-	int r = 0;
-	unsigned suspend_flags = DM_SUSPEND_LOCKFS_FLAG;
-	struct mapped_device *md;
-
-	md = find_device(param);
-	if (!md)
-		return -ENXIO;
-
-	if (param->flags & DM_SKIP_LOCKFS_FLAG)
-		suspend_flags &= ~DM_SUSPEND_LOCKFS_FLAG;
-	if (param->flags & DM_NOFLUSH_FLAG)
-		suspend_flags |= DM_SUSPEND_NOFLUSH_FLAG;
-
-	if (!dm_suspended_md(md)) {
-		r = dm_suspend(md, suspend_flags);
-		if (r)
-			goto out;
-	}
-
-	__dev_status(md, param);
-
-out:
-	dm_put(md);
-
-	return r;
-}
-
-static int do_resume(struct dm_ioctl *param)
-{
-	int r = 0;
-	unsigned suspend_flags = DM_SUSPEND_LOCKFS_FLAG;
-	struct hash_cell *hc;
-	struct mapped_device *md;
-	struct dm_table *new_map, *old_map = NULL;
-
-	down_write(&_hash_lock);
-
-	hc = __find_device_hash_cell(param);
-	if (!hc) {
-		DMDEBUG_LIMIT("device doesn't appear to be in the dev hash table.");
-		up_write(&_hash_lock);
-		return -ENXIO;
-	}
-
-	md = hc->md;
-
-	new_map = hc->new_map;
-	hc->new_map = NULL;
-	param->flags &= ~DM_INACTIVE_PRESENT_FLAG;
-
-	up_write(&_hash_lock);
-
-	/* Do we need to load a new map ? */
-	if (new_map) {
-		/* Suspend if it isn't already suspended */
-		if (param->flags & DM_SKIP_LOCKFS_FLAG)
-			suspend_flags &= ~DM_SUSPEND_LOCKFS_FLAG;
-		if (param->flags & DM_NOFLUSH_FLAG)
-			suspend_flags |= DM_SUSPEND_NOFLUSH_FLAG;
-		if (!dm_suspended_md(md))
-			dm_suspend(md, suspend_flags);
-
-		old_map = dm_swap_table(md, new_map);
-		if (IS_ERR(old_map)) {
-			dm_table_destroy(new_map);
-			dm_put(md);
-			return PTR_ERR(old_map);
-		}
-
-		if (dm_table_get_mode(new_map) & FMODE_WRITE)
-			set_disk_ro(dm_disk(md), 0);
-		else
-			set_disk_ro(dm_disk(md), 1);
-	}
-
-	if (dm_suspended_md(md)) {
-		r = dm_resume(md);
-		if (!r && !dm_kobject_uevent(md, KOBJ_CHANGE, param->event_nr))
-			param->flags |= DM_UEVENT_GENERATED_FLAG;
-	}
-
-	if (old_map)
-		dm_table_destroy(old_map);
-
-	if (!r)
-		__dev_status(md, param);
-
-	dm_put(md);
-	return r;
-}
-
-/*
- * Set or unset the suspension state of a device.
- * If the device already is in the requested state we just return its status.
- */
-static int dev_suspend(struct dm_ioctl *param, size_t param_size)
-{
-	if (param->flags & DM_SUSPEND_FLAG)
-		return do_suspend(param);
-
-	return do_resume(param);
-}
-
-/*
- * Copies device info back to user space, used by
- * the create and info ioctls.
- */
-static int dev_status(struct dm_ioctl *param, size_t param_size)
-{
-	struct mapped_device *md;
-
-	md = find_device(param);
-	if (!md)
-		return -ENXIO;
-
-	__dev_status(md, param);
-	dm_put(md);
-
-	return 0;
-}
-
-/*
- * Build up the status struct for each target
- */
-static void retrieve_status(struct dm_table *table,
-			    struct dm_ioctl *param, size_t param_size)
-{
-	unsigned int i, num_targets;
-	struct dm_target_spec *spec;
-	char *outbuf, *outptr;
-	status_type_t type;
-	size_t remaining, len, used = 0;
-
-	outptr = outbuf = get_result_buffer(param, param_size, &len);
-
-	if (param->flags & DM_STATUS_TABLE_FLAG)
-		type = STATUSTYPE_TABLE;
-	else
-		type = STATUSTYPE_INFO;
-
-	/* Get all the target info */
-	num_targets = dm_table_get_num_targets(table);
-	for (i = 0; i < num_targets; i++) {
-		struct dm_target *ti = dm_table_get_target(table, i);
-
-		remaining = len - (outptr - outbuf);
-		if (remaining <= sizeof(struct dm_target_spec)) {
-			param->flags |= DM_BUFFER_FULL_FLAG;
-			break;
-		}
-
-		spec = (struct dm_target_spec *) outptr;
-
-		spec->status = 0;
-		spec->sector_start = ti->begin;
-		spec->length = ti->len;
-		strncpy(spec->target_type, ti->type->name,
-			sizeof(spec->target_type));
-
-		outptr += sizeof(struct dm_target_spec);
-		remaining = len - (outptr - outbuf);
-		if (remaining <= 0) {
-			param->flags |= DM_BUFFER_FULL_FLAG;
-			break;
-		}
-
-		/* Get the status/table string from the target driver */
-		if (ti->type->status) {
-			if (ti->type->status(ti, type, outptr, remaining)) {
-				param->flags |= DM_BUFFER_FULL_FLAG;
-				break;
-			}
-		} else
-			outptr[0] = '\0';
-
-		outptr += strlen(outptr) + 1;
-		used = param->data_start + (outptr - outbuf);
-
-		outptr = align_ptr(outptr);
-		spec->next = outptr - outbuf;
-	}
-
-	if (used)
-		param->data_size = used;
-
-	param->target_count = num_targets;
-}
-
-/*
- * Wait for a device to report an event
- */
-static int dev_wait(struct dm_ioctl *param, size_t param_size)
-{
-	int r = 0;
-	struct mapped_device *md;
-	struct dm_table *table;
-
-	md = find_device(param);
-	if (!md)
-		return -ENXIO;
-
-	/*
-	 * Wait for a notification event
-	 */
-	if (dm_wait_event(md, param->event_nr)) {
-		r = -ERESTARTSYS;
-		goto out;
-	}
-
-	/*
-	 * The userland program is going to want to know what
-	 * changed to trigger the event, so we may as well tell
-	 * him and save an ioctl.
-	 */
-	__dev_status(md, param);
-
-	table = dm_get_live_or_inactive_table(md, param);
-	if (table) {
-		retrieve_status(table, param, param_size);
-		dm_table_put(table);
-	}
-
-out:
-	dm_put(md);
-
-	return r;
-}
-
-static inline fmode_t get_mode(struct dm_ioctl *param)
-{
-	fmode_t mode = FMODE_READ | FMODE_WRITE;
-
-	if (param->flags & DM_READONLY_FLAG)
-		mode = FMODE_READ;
-
-	return mode;
-}
-
-static int next_target(struct dm_target_spec *last, uint32_t next, void *end,
-		       struct dm_target_spec **spec, char **target_params)
-{
-	*spec = (struct dm_target_spec *) ((unsigned char *) last + next);
-	*target_params = (char *) (*spec + 1);
-
-	if (*spec < (last + 1))
-		return -EINVAL;
-
-	return invalid_str(*target_params, end);
-}
-
-static int populate_table(struct dm_table *table,
-			  struct dm_ioctl *param, size_t param_size)
-{
-	int r;
-	unsigned int i = 0;
-	struct dm_target_spec *spec = (struct dm_target_spec *) param;
-	uint32_t next = param->data_start;
-	void *end = (void *) param + param_size;
-	char *target_params;
-
-	if (!param->target_count) {
-		DMWARN("populate_table: no targets specified");
-		return -EINVAL;
-	}
-
-	for (i = 0; i < param->target_count; i++) {
-
-		r = next_target(spec, next, end, &spec, &target_params);
-		if (r) {
-			DMWARN("unable to find target");
-			return r;
-		}
-
-		r = dm_table_add_target(table, spec->target_type,
-					(sector_t) spec->sector_start,
-					(sector_t) spec->length,
-					target_params);
-		if (r) {
-			DMWARN("error adding target to table");
-			return r;
-		}
-
-		next = spec->next;
-	}
-
-	return dm_table_complete(table);
-}
-
-static int table_load(struct dm_ioctl *param, size_t param_size)
-{
-	int r;
-	struct hash_cell *hc;
-	struct dm_table *t;
-	struct mapped_device *md;
-	struct target_type *immutable_target_type;
-
-	md = find_device(param);
-	if (!md)
-		return -ENXIO;
-
-	r = dm_table_create(&t, get_mode(param), param->target_count, md);
-	if (r)
-		goto out;
-
-	r = populate_table(t, param, param_size);
-	if (r) {
-		dm_table_destroy(t);
-		goto out;
-	}
-
-	immutable_target_type = dm_get_immutable_target_type(md);
-	if (immutable_target_type &&
-	    (immutable_target_type != dm_table_get_immutable_target_type(t))) {
-		DMWARN("can't replace immutable target type %s",
-		       immutable_target_type->name);
-		dm_table_destroy(t);
-		r = -EINVAL;
-		goto out;
-	}
-
-	/* Protect md->type and md->queue against concurrent table loads. */
-	dm_lock_md_type(md);
-	if (dm_get_md_type(md) == DM_TYPE_NONE)
-		/* Initial table load: acquire type of table. */
-		dm_set_md_type(md, dm_table_get_type(t));
-	else if (dm_get_md_type(md) != dm_table_get_type(t)) {
-		DMWARN("can't change device type after initial table load.");
-		dm_table_destroy(t);
-		dm_unlock_md_type(md);
-		r = -EINVAL;
-		goto out;
-	}
-
-	/* setup md->queue to reflect md's type (may block) */
-	r = dm_setup_md_queue(md);
-	if (r) {
-		DMWARN("unable to set up device queue for new table.");
-		dm_table_destroy(t);
-		dm_unlock_md_type(md);
-		goto out;
-	}
-	dm_unlock_md_type(md);
-
-	/* stage inactive table */
-	down_write(&_hash_lock);
-	hc = dm_get_mdptr(md);
-	if (!hc || hc->md != md) {
-		DMWARN("device has been removed from the dev hash table.");
-		dm_table_destroy(t);
-		up_write(&_hash_lock);
-		r = -ENXIO;
-		goto out;
-	}
-
-	if (hc->new_map)
-		dm_table_destroy(hc->new_map);
-	hc->new_map = t;
-	up_write(&_hash_lock);
-
-	param->flags |= DM_INACTIVE_PRESENT_FLAG;
-	__dev_status(md, param);
-
-out:
-	dm_put(md);
-
-	return r;
-}
-
-static int table_clear(struct dm_ioctl *param, size_t param_size)
-{
-	struct hash_cell *hc;
-	struct mapped_device *md;
-
-	down_write(&_hash_lock);
-
-	hc = __find_device_hash_cell(param);
-	if (!hc) {
-		DMDEBUG_LIMIT("device doesn't appear to be in the dev hash table.");
-		up_write(&_hash_lock);
-		return -ENXIO;
-	}
-
-	if (hc->new_map) {
-		dm_table_destroy(hc->new_map);
-		hc->new_map = NULL;
-	}
-
-	param->flags &= ~DM_INACTIVE_PRESENT_FLAG;
-
-	__dev_status(hc->md, param);
-	md = hc->md;
-	up_write(&_hash_lock);
-	dm_put(md);
-
-	return 0;
-}
-
-/*
- * Retrieves a list of devices used by a particular dm device.
- */
-static void retrieve_deps(struct dm_table *table,
-			  struct dm_ioctl *param, size_t param_size)
-{
-	unsigned int count = 0;
-	struct list_head *tmp;
-	size_t len, needed;
-	struct dm_dev_internal *dd;
-	struct dm_target_deps *deps;
-
-	deps = get_result_buffer(param, param_size, &len);
-
-	/*
-	 * Count the devices.
-	 */
-	list_for_each (tmp, dm_table_get_devices(table))
-		count++;
-
-	/*
-	 * Check we have enough space.
-	 */
-	needed = sizeof(*deps) + (sizeof(*deps->dev) * count);
-	if (len < needed) {
-		param->flags |= DM_BUFFER_FULL_FLAG;
-		return;
-	}
-
-	/*
-	 * Fill in the devices.
-	 */
-	deps->count = count;
-	count = 0;
-	list_for_each_entry (dd, dm_table_get_devices(table), list)
-		deps->dev[count++] = huge_encode_dev(dd->dm_dev.bdev->bd_dev);
-
-	param->data_size = param->data_start + needed;
-}
-
-static int table_deps(struct dm_ioctl *param, size_t param_size)
-{
-	struct mapped_device *md;
-	struct dm_table *table;
-
-	md = find_device(param);
-	if (!md)
-		return -ENXIO;
-
-	__dev_status(md, param);
-
-	table = dm_get_live_or_inactive_table(md, param);
-	if (table) {
-		retrieve_deps(table, param, param_size);
-		dm_table_put(table);
-	}
-
-	dm_put(md);
-
-	return 0;
-}
-
-/*
- * Return the status of a device as a text string for each
- * target.
- */
-static int table_status(struct dm_ioctl *param, size_t param_size)
-{
-	struct mapped_device *md;
-	struct dm_table *table;
-
-	md = find_device(param);
-	if (!md)
-		return -ENXIO;
-
-	__dev_status(md, param);
-
-	table = dm_get_live_or_inactive_table(md, param);
-	if (table) {
-		retrieve_status(table, param, param_size);
-		dm_table_put(table);
-	}
-
-	dm_put(md);
-
-	return 0;
-}
-
-/*
- * Pass a message to the target that's at the supplied device offset.
- */
-static int target_message(struct dm_ioctl *param, size_t param_size)
-{
-	int r, argc;
-	char **argv;
-	struct mapped_device *md;
-	struct dm_table *table;
-	struct dm_target *ti;
-	struct dm_target_msg *tmsg = (void *) param + param->data_start;
-
-	md = find_device(param);
-	if (!md)
-		return -ENXIO;
-
-	if (tmsg < (struct dm_target_msg *) param->data ||
-	    invalid_str(tmsg->message, (void *) param + param_size)) {
-		DMWARN("Invalid target message parameters.");
-		r = -EINVAL;
-		goto out;
-	}
-
-	r = dm_split_args(&argc, &argv, tmsg->message);
-	if (r) {
-		DMWARN("Failed to split target message parameters");
-		goto out;
-	}
-
-	if (!argc) {
-		DMWARN("Empty message received.");
-		goto out_argv;
-	}
-
-	table = dm_get_live_table(md);
-	if (!table)
-		goto out_argv;
-
-	if (dm_deleting_md(md)) {
-		r = -ENXIO;
-		goto out_table;
-	}
-
-	ti = dm_table_find_target(table, tmsg->sector);
-	if (!dm_target_is_valid(ti)) {
-		DMWARN("Target message sector outside device.");
-		r = -EINVAL;
-	} else if (ti->type->message)
-		r = ti->type->message(ti, argc, argv);
-	else {
-		DMWARN("Target type does not support messages");
-		r = -EINVAL;
-	}
-
- out_table:
-	dm_table_put(table);
- out_argv:
-	kfree(argv);
- out:
-	param->data_size = 0;
-	dm_put(md);
-	return r;
-}
-
-/*-----------------------------------------------------------------
- * Implementation of open/close/ioctl on the special char
- * device.
- *---------------------------------------------------------------*/
-static ioctl_fn lookup_ioctl(unsigned int cmd)
-{
-	static struct {
-		int cmd;
-		ioctl_fn fn;
-	} _ioctls[] = {
-		{DM_VERSION_CMD, NULL},	/* version is dealt with elsewhere */
-		{DM_REMOVE_ALL_CMD, remove_all},
-		{DM_LIST_DEVICES_CMD, list_devices},
-
-		{DM_DEV_CREATE_CMD, dev_create},
-		{DM_DEV_REMOVE_CMD, dev_remove},
-		{DM_DEV_RENAME_CMD, dev_rename},
-		{DM_DEV_SUSPEND_CMD, dev_suspend},
-		{DM_DEV_STATUS_CMD, dev_status},
-		{DM_DEV_WAIT_CMD, dev_wait},
-
-		{DM_TABLE_LOAD_CMD, table_load},
-		{DM_TABLE_CLEAR_CMD, table_clear},
-		{DM_TABLE_DEPS_CMD, table_deps},
-		{DM_TABLE_STATUS_CMD, table_status},
-
-		{DM_LIST_VERSIONS_CMD, list_versions},
-
-		{DM_TARGET_MSG_CMD, target_message},
-		{DM_DEV_SET_GEOMETRY_CMD, dev_set_geometry}
-	};
-
-	return (cmd >= ARRAY_SIZE(_ioctls)) ? NULL : _ioctls[cmd].fn;
-}
-
-/*
- * As well as checking the version compatibility this always
- * copies the kernel interface version out.
- */
-static int check_version(unsigned int cmd, struct dm_ioctl __user *user)
-{
-	uint32_t version[3];
-	int r = 0;
-
-	if (copy_from_user(version, user->version, sizeof(version)))
-		return -EFAULT;
-
-	if ((DM_VERSION_MAJOR != version[0]) ||
-	    (DM_VERSION_MINOR < version[1])) {
-		DMWARN("ioctl interface mismatch: "
-		       "kernel(%u.%u.%u), user(%u.%u.%u), cmd(%d)",
-		       DM_VERSION_MAJOR, DM_VERSION_MINOR,
-		       DM_VERSION_PATCHLEVEL,
-		       version[0], version[1], version[2], cmd);
-		r = -EINVAL;
-	}
-
-	/*
-	 * Fill in the kernel version.
-	 */
-	version[0] = DM_VERSION_MAJOR;
-	version[1] = DM_VERSION_MINOR;
-	version[2] = DM_VERSION_PATCHLEVEL;
-	if (copy_to_user(user->version, version, sizeof(version)))
-		return -EFAULT;
-
-	return r;
-}
-
-static int copy_params(struct dm_ioctl __user *user, struct dm_ioctl **param)
-{
-	struct dm_ioctl tmp, *dmi;
-	int secure_data;
-
-	if (copy_from_user(&tmp, user, sizeof(tmp) - sizeof(tmp.data)))
-		return -EFAULT;
-
-	if (tmp.data_size < (sizeof(tmp) - sizeof(tmp.data)))
-		return -EINVAL;
-
-	secure_data = tmp.flags & DM_SECURE_DATA_FLAG;
-
-	dmi = vmalloc(tmp.data_size);
-	if (!dmi) {
-		if (secure_data && clear_user(user, tmp.data_size))
-			return -EFAULT;
-		return -ENOMEM;
-	}
-
-	if (copy_from_user(dmi, user, tmp.data_size))
-		goto bad;
-
-	/* Wipe the user buffer so we do not return it to userspace */
-	if (secure_data && clear_user(user, tmp.data_size))
-		goto bad;
-
-	*param = dmi;
-	return 0;
-
-bad:
-	if (secure_data)
-		memset(dmi, 0, tmp.data_size);
-	vfree(dmi);
-	return -EFAULT;
-}
-
-static int validate_params(uint cmd, struct dm_ioctl *param)
-{
-	/* Always clear this flag */
-	param->flags &= ~DM_BUFFER_FULL_FLAG;
-	param->flags &= ~DM_UEVENT_GENERATED_FLAG;
-	param->flags &= ~DM_SECURE_DATA_FLAG;
-
-	/* Ignores parameters */
-	if (cmd == DM_REMOVE_ALL_CMD ||
-	    cmd == DM_LIST_DEVICES_CMD ||
-	    cmd == DM_LIST_VERSIONS_CMD)
-		return 0;
-
-	if ((cmd == DM_DEV_CREATE_CMD)) {
-		if (!*param->name) {
-			DMWARN("name not supplied when creating device");
-			return -EINVAL;
-		}
-	} else if ((*param->uuid && *param->name)) {
-		DMWARN("only supply one of name or uuid, cmd(%u)", cmd);
-		return -EINVAL;
-	}
-
-	/* Ensure strings are terminated */
-	param->name[DM_NAME_LEN - 1] = '\0';
-	param->uuid[DM_UUID_LEN - 1] = '\0';
-
-	return 0;
-}
-
-static int ctl_ioctl(uint command, struct dm_ioctl __user *user)
-{
-	int r = 0;
-	int wipe_buffer;
-	unsigned int cmd;
-	struct dm_ioctl *uninitialized_var(param);
-	ioctl_fn fn = NULL;
-	size_t input_param_size;
-
-	/* only root can play with this */
-	if (!capable(CAP_SYS_ADMIN))
-		return -EACCES;
-
-	if (_IOC_TYPE(command) != DM_IOCTL)
-		return -ENOTTY;
-
-	cmd = _IOC_NR(command);
-
-	/*
-	 * Check the interface version passed in.  This also
-	 * writes out the kernel's interface version.
-	 */
-	r = check_version(cmd, user);
-	if (r)
-		return r;
-
-	/*
-	 * Nothing more to do for the version command.
-	 */
-	if (cmd == DM_VERSION_CMD)
-		return 0;
-
-	fn = lookup_ioctl(cmd);
-	if (!fn) {
-		DMWARN("dm_ctl_ioctl: unknown command 0x%x", command);
-		return -ENOTTY;
-	}
-
-	/*
-	 * Trying to avoid low memory issues when a device is
-	 * suspended.
-	 */
-	current->flags |= PF_MEMALLOC;
-
-	/*
-	 * Copy the parameters into kernel space.
-	 */
-	r = copy_params(user, &param);
-
-	current->flags &= ~PF_MEMALLOC;
-
-	if (r)
-		return r;
-
-	input_param_size = param->data_size;
-	wipe_buffer = param->flags & DM_SECURE_DATA_FLAG;
-
-	r = validate_params(cmd, param);
-	if (r)
-		goto out;
-
-	param->data_size = sizeof(*param);
-	r = fn(param, input_param_size);
-
-	/*
-	 * Copy the results back to userland.
-	 */
-	if (!r && copy_to_user(user, param, param->data_size))
-		r = -EFAULT;
-
-out:
-	if (wipe_buffer)
-		memset(param, 0, input_param_size);
-
-	vfree(param);
-	return r;
-}
-
-static long dm_ctl_ioctl(struct file *file, uint command, ulong u)
-{
-	return (long)ctl_ioctl(command, (struct dm_ioctl __user *)u);
-}
-
-#ifdef CONFIG_COMPAT
-static long dm_compat_ctl_ioctl(struct file *file, uint command, ulong u)
-{
-	return (long)dm_ctl_ioctl(file, command, (ulong) compat_ptr(u));
-}
-#else
-#define dm_compat_ctl_ioctl NULL
-#endif
-
-static const struct file_operations _ctl_fops = {
-	.open = nonseekable_open,
-	.unlocked_ioctl	 = dm_ctl_ioctl,
-	.compat_ioctl = dm_compat_ctl_ioctl,
-	.owner	 = THIS_MODULE,
-	.llseek  = noop_llseek,
-};
-
-static struct miscdevice _dm_misc = {
-	.minor		= MAPPER_CTRL_MINOR,
-	.name  		= DM_NAME,
-	.nodename	= DM_DIR "/" DM_CONTROL_NODE,
-	.fops  		= &_ctl_fops
-};
-
-MODULE_ALIAS_MISCDEV(MAPPER_CTRL_MINOR);
-MODULE_ALIAS("devname:" DM_DIR "/" DM_CONTROL_NODE);
-
-/*
- * Create misc character device and link to DM_DIR/control.
- */
-int __init dm_interface_init(void)
-{
-	int r;
-
-	r = dm_hash_init();
-	if (r)
-		return r;
-
-	r = misc_register(&_dm_misc);
-	if (r) {
-		DMERR("misc_register failed for control device");
-		dm_hash_exit();
-		return r;
-	}
-
-	DMINFO("%d.%d.%d%s initialised: %s", DM_VERSION_MAJOR,
-	       DM_VERSION_MINOR, DM_VERSION_PATCHLEVEL, DM_VERSION_EXTRA,
-	       DM_DRIVER_EMAIL);
-	return 0;
-}
-
-void dm_interface_exit(void)
-{
-	if (misc_deregister(&_dm_misc) < 0)
-		DMERR("misc_deregister failed for control device");
-
-	dm_hash_exit();
-}
-
-/**
- * dm_copy_name_and_uuid - Copy mapped device name & uuid into supplied buffers
- * @md: Pointer to mapped_device
- * @name: Buffer (size DM_NAME_LEN) for name
- * @uuid: Buffer (size DM_UUID_LEN) for uuid or empty string if uuid not defined
- */
-int dm_copy_name_and_uuid(struct mapped_device *md, char *name, char *uuid)
-{
-	int r = 0;
-	struct hash_cell *hc;
-
-	if (!md)
-		return -ENXIO;
-
-	mutex_lock(&dm_hash_cells_mutex);
-	hc = dm_get_mdptr(md);
-	if (!hc || hc->md != md) {
-		r = -ENXIO;
-		goto out;
-	}
-
-	if (name)
-		strcpy(name, hc->name);
-	if (uuid)
-		strcpy(uuid, hc->uuid ? : "");
-
-out:
-	mutex_unlock(&dm_hash_cells_mutex);
-
-	return r;
-}
diff --git a/ANDROID_3.4.5/drivers/md/dm-kcopyd.c b/ANDROID_3.4.5/drivers/md/dm-kcopyd.c
deleted file mode 100644
index bed444c9..00000000
--- a/ANDROID_3.4.5/drivers/md/dm-kcopyd.c
+++ /dev/null
@@ -1,756 +0,0 @@
-/*
- * Copyright (C) 2002 Sistina Software (UK) Limited.
- * Copyright (C) 2006 Red Hat GmbH
- *
- * This file is released under the GPL.
- *
- * Kcopyd provides a simple interface for copying an area of one
- * block-device to one or more other block-devices, with an asynchronous
- * completion notification.
- */
-
-#include <linux/types.h>
-#include <linux/atomic.h>
-#include <linux/blkdev.h>
-#include <linux/fs.h>
-#include <linux/init.h>
-#include <linux/list.h>
-#include <linux/mempool.h>
-#include <linux/module.h>
-#include <linux/pagemap.h>
-#include <linux/slab.h>
-#include <linux/vmalloc.h>
-#include <linux/workqueue.h>
-#include <linux/mutex.h>
-#include <linux/device-mapper.h>
-#include <linux/dm-kcopyd.h>
-
-#include "dm.h"
-
-#define SUB_JOB_SIZE	128
-#define SPLIT_COUNT	8
-#define MIN_JOBS	8
-#define RESERVE_PAGES	(DIV_ROUND_UP(SUB_JOB_SIZE << SECTOR_SHIFT, PAGE_SIZE))
-
-/*-----------------------------------------------------------------
- * Each kcopyd client has its own little pool of preallocated
- * pages for kcopyd io.
- *---------------------------------------------------------------*/
-struct dm_kcopyd_client {
-	struct page_list *pages;
-	unsigned nr_reserved_pages;
-	unsigned nr_free_pages;
-
-	struct dm_io_client *io_client;
-
-	wait_queue_head_t destroyq;
-	atomic_t nr_jobs;
-
-	mempool_t *job_pool;
-
-	struct workqueue_struct *kcopyd_wq;
-	struct work_struct kcopyd_work;
-
-/*
- * We maintain three lists of jobs:
- *
- * i)   jobs waiting for pages
- * ii)  jobs that have pages, and are waiting for the io to be issued.
- * iii) jobs that have completed.
- *
- * All three of these are protected by job_lock.
- */
-	spinlock_t job_lock;
-	struct list_head complete_jobs;
-	struct list_head io_jobs;
-	struct list_head pages_jobs;
-};
-
-static struct page_list zero_page_list;
-
-static void wake(struct dm_kcopyd_client *kc)
-{
-	queue_work(kc->kcopyd_wq, &kc->kcopyd_work);
-}
-
-/*
- * Obtain one page for the use of kcopyd.
- */
-static struct page_list *alloc_pl(gfp_t gfp)
-{
-	struct page_list *pl;
-
-	pl = kmalloc(sizeof(*pl), gfp);
-	if (!pl)
-		return NULL;
-
-	pl->page = alloc_page(gfp);
-	if (!pl->page) {
-		kfree(pl);
-		return NULL;
-	}
-
-	return pl;
-}
-
-static void free_pl(struct page_list *pl)
-{
-	__free_page(pl->page);
-	kfree(pl);
-}
-
-/*
- * Add the provided pages to a client's free page list, releasing
- * back to the system any beyond the reserved_pages limit.
- */
-static void kcopyd_put_pages(struct dm_kcopyd_client *kc, struct page_list *pl)
-{
-	struct page_list *next;
-
-	do {
-		next = pl->next;
-
-		if (kc->nr_free_pages >= kc->nr_reserved_pages)
-			free_pl(pl);
-		else {
-			pl->next = kc->pages;
-			kc->pages = pl;
-			kc->nr_free_pages++;
-		}
-
-		pl = next;
-	} while (pl);
-}
-
-static int kcopyd_get_pages(struct dm_kcopyd_client *kc,
-			    unsigned int nr, struct page_list **pages)
-{
-	struct page_list *pl;
-
-	*pages = NULL;
-
-	do {
-		pl = alloc_pl(__GFP_NOWARN | __GFP_NORETRY);
-		if (unlikely(!pl)) {
-			/* Use reserved pages */
-			pl = kc->pages;
-			if (unlikely(!pl))
-				goto out_of_memory;
-			kc->pages = pl->next;
-			kc->nr_free_pages--;
-		}
-		pl->next = *pages;
-		*pages = pl;
-	} while (--nr);
-
-	return 0;
-
-out_of_memory:
-	if (*pages)
-		kcopyd_put_pages(kc, *pages);
-	return -ENOMEM;
-}
-
-/*
- * These three functions resize the page pool.
- */
-static void drop_pages(struct page_list *pl)
-{
-	struct page_list *next;
-
-	while (pl) {
-		next = pl->next;
-		free_pl(pl);
-		pl = next;
-	}
-}
-
-/*
- * Allocate and reserve nr_pages for the use of a specific client.
- */
-static int client_reserve_pages(struct dm_kcopyd_client *kc, unsigned nr_pages)
-{
-	unsigned i;
-	struct page_list *pl = NULL, *next;
-
-	for (i = 0; i < nr_pages; i++) {
-		next = alloc_pl(GFP_KERNEL);
-		if (!next) {
-			if (pl)
-				drop_pages(pl);
-			return -ENOMEM;
-		}
-		next->next = pl;
-		pl = next;
-	}
-
-	kc->nr_reserved_pages += nr_pages;
-	kcopyd_put_pages(kc, pl);
-
-	return 0;
-}
-
-static void client_free_pages(struct dm_kcopyd_client *kc)
-{
-	BUG_ON(kc->nr_free_pages != kc->nr_reserved_pages);
-	drop_pages(kc->pages);
-	kc->pages = NULL;
-	kc->nr_free_pages = kc->nr_reserved_pages = 0;
-}
-
-/*-----------------------------------------------------------------
- * kcopyd_jobs need to be allocated by the *clients* of kcopyd,
- * for this reason we use a mempool to prevent the client from
- * ever having to do io (which could cause a deadlock).
- *---------------------------------------------------------------*/
-struct kcopyd_job {
-	struct dm_kcopyd_client *kc;
-	struct list_head list;
-	unsigned long flags;
-
-	/*
-	 * Error state of the job.
-	 */
-	int read_err;
-	unsigned long write_err;
-
-	/*
-	 * Either READ or WRITE
-	 */
-	int rw;
-	struct dm_io_region source;
-
-	/*
-	 * The destinations for the transfer.
-	 */
-	unsigned int num_dests;
-	struct dm_io_region dests[DM_KCOPYD_MAX_REGIONS];
-
-	struct page_list *pages;
-
-	/*
-	 * Set this to ensure you are notified when the job has
-	 * completed.  'context' is for callback to use.
-	 */
-	dm_kcopyd_notify_fn fn;
-	void *context;
-
-	/*
-	 * These fields are only used if the job has been split
-	 * into more manageable parts.
-	 */
-	struct mutex lock;
-	atomic_t sub_jobs;
-	sector_t progress;
-
-	struct kcopyd_job *master_job;
-};
-
-static struct kmem_cache *_job_cache;
-
-int __init dm_kcopyd_init(void)
-{
-	_job_cache = kmem_cache_create("kcopyd_job",
-				sizeof(struct kcopyd_job) * (SPLIT_COUNT + 1),
-				__alignof__(struct kcopyd_job), 0, NULL);
-	if (!_job_cache)
-		return -ENOMEM;
-
-	zero_page_list.next = &zero_page_list;
-	zero_page_list.page = ZERO_PAGE(0);
-
-	return 0;
-}
-
-void dm_kcopyd_exit(void)
-{
-	kmem_cache_destroy(_job_cache);
-	_job_cache = NULL;
-}
-
-/*
- * Functions to push and pop a job onto the head of a given job
- * list.
- */
-static struct kcopyd_job *pop(struct list_head *jobs,
-			      struct dm_kcopyd_client *kc)
-{
-	struct kcopyd_job *job = NULL;
-	unsigned long flags;
-
-	spin_lock_irqsave(&kc->job_lock, flags);
-
-	if (!list_empty(jobs)) {
-		job = list_entry(jobs->next, struct kcopyd_job, list);
-		list_del(&job->list);
-	}
-	spin_unlock_irqrestore(&kc->job_lock, flags);
-
-	return job;
-}
-
-static void push(struct list_head *jobs, struct kcopyd_job *job)
-{
-	unsigned long flags;
-	struct dm_kcopyd_client *kc = job->kc;
-
-	spin_lock_irqsave(&kc->job_lock, flags);
-	list_add_tail(&job->list, jobs);
-	spin_unlock_irqrestore(&kc->job_lock, flags);
-}
-
-
-static void push_head(struct list_head *jobs, struct kcopyd_job *job)
-{
-	unsigned long flags;
-	struct dm_kcopyd_client *kc = job->kc;
-
-	spin_lock_irqsave(&kc->job_lock, flags);
-	list_add(&job->list, jobs);
-	spin_unlock_irqrestore(&kc->job_lock, flags);
-}
-
-/*
- * These three functions process 1 item from the corresponding
- * job list.
- *
- * They return:
- * < 0: error
- *   0: success
- * > 0: can't process yet.
- */
-static int run_complete_job(struct kcopyd_job *job)
-{
-	void *context = job->context;
-	int read_err = job->read_err;
-	unsigned long write_err = job->write_err;
-	dm_kcopyd_notify_fn fn = job->fn;
-	struct dm_kcopyd_client *kc = job->kc;
-
-	if (job->pages && job->pages != &zero_page_list)
-		kcopyd_put_pages(kc, job->pages);
-	/*
-	 * If this is the master job, the sub jobs have already
-	 * completed so we can free everything.
-	 */
-	if (job->master_job == job)
-		mempool_free(job, kc->job_pool);
-	fn(read_err, write_err, context);
-
-	if (atomic_dec_and_test(&kc->nr_jobs))
-		wake_up(&kc->destroyq);
-
-	return 0;
-}
-
-static void complete_io(unsigned long error, void *context)
-{
-	struct kcopyd_job *job = (struct kcopyd_job *) context;
-	struct dm_kcopyd_client *kc = job->kc;
-
-	if (error) {
-		if (job->rw == WRITE)
-			job->write_err |= error;
-		else
-			job->read_err = 1;
-
-		if (!test_bit(DM_KCOPYD_IGNORE_ERROR, &job->flags)) {
-			push(&kc->complete_jobs, job);
-			wake(kc);
-			return;
-		}
-	}
-
-	if (job->rw == WRITE)
-		push(&kc->complete_jobs, job);
-
-	else {
-		job->rw = WRITE;
-		push(&kc->io_jobs, job);
-	}
-
-	wake(kc);
-}
-
-/*
- * Request io on as many buffer heads as we can currently get for
- * a particular job.
- */
-static int run_io_job(struct kcopyd_job *job)
-{
-	int r;
-	struct dm_io_request io_req = {
-		.bi_rw = job->rw,
-		.mem.type = DM_IO_PAGE_LIST,
-		.mem.ptr.pl = job->pages,
-		.mem.offset = 0,
-		.notify.fn = complete_io,
-		.notify.context = job,
-		.client = job->kc->io_client,
-	};
-
-	if (job->rw == READ)
-		r = dm_io(&io_req, 1, &job->source, NULL);
-	else
-		r = dm_io(&io_req, job->num_dests, job->dests, NULL);
-
-	return r;
-}
-
-static int run_pages_job(struct kcopyd_job *job)
-{
-	int r;
-	unsigned nr_pages = dm_div_up(job->dests[0].count, PAGE_SIZE >> 9);
-
-	r = kcopyd_get_pages(job->kc, nr_pages, &job->pages);
-	if (!r) {
-		/* this job is ready for io */
-		push(&job->kc->io_jobs, job);
-		return 0;
-	}
-
-	if (r == -ENOMEM)
-		/* can't complete now */
-		return 1;
-
-	return r;
-}
-
-/*
- * Run through a list for as long as possible.  Returns the count
- * of successful jobs.
- */
-static int process_jobs(struct list_head *jobs, struct dm_kcopyd_client *kc,
-			int (*fn) (struct kcopyd_job *))
-{
-	struct kcopyd_job *job;
-	int r, count = 0;
-
-	while ((job = pop(jobs, kc))) {
-
-		r = fn(job);
-
-		if (r < 0) {
-			/* error this rogue job */
-			if (job->rw == WRITE)
-				job->write_err = (unsigned long) -1L;
-			else
-				job->read_err = 1;
-			push(&kc->complete_jobs, job);
-			break;
-		}
-
-		if (r > 0) {
-			/*
-			 * We couldn't service this job ATM, so
-			 * push this job back onto the list.
-			 */
-			push_head(jobs, job);
-			break;
-		}
-
-		count++;
-	}
-
-	return count;
-}
-
-/*
- * kcopyd does this every time it's woken up.
- */
-static void do_work(struct work_struct *work)
-{
-	struct dm_kcopyd_client *kc = container_of(work,
-					struct dm_kcopyd_client, kcopyd_work);
-	struct blk_plug plug;
-
-	/*
-	 * The order that these are called is *very* important.
-	 * complete jobs can free some pages for pages jobs.
-	 * Pages jobs when successful will jump onto the io jobs
-	 * list.  io jobs call wake when they complete and it all
-	 * starts again.
-	 */
-	blk_start_plug(&plug);
-	process_jobs(&kc->complete_jobs, kc, run_complete_job);
-	process_jobs(&kc->pages_jobs, kc, run_pages_job);
-	process_jobs(&kc->io_jobs, kc, run_io_job);
-	blk_finish_plug(&plug);
-}
-
-/*
- * If we are copying a small region we just dispatch a single job
- * to do the copy, otherwise the io has to be split up into many
- * jobs.
- */
-static void dispatch_job(struct kcopyd_job *job)
-{
-	struct dm_kcopyd_client *kc = job->kc;
-	atomic_inc(&kc->nr_jobs);
-	if (unlikely(!job->source.count))
-		push(&kc->complete_jobs, job);
-	else if (job->pages == &zero_page_list)
-		push(&kc->io_jobs, job);
-	else
-		push(&kc->pages_jobs, job);
-	wake(kc);
-}
-
-static void segment_complete(int read_err, unsigned long write_err,
-			     void *context)
-{
-	/* FIXME: tidy this function */
-	sector_t progress = 0;
-	sector_t count = 0;
-	struct kcopyd_job *sub_job = (struct kcopyd_job *) context;
-	struct kcopyd_job *job = sub_job->master_job;
-	struct dm_kcopyd_client *kc = job->kc;
-
-	mutex_lock(&job->lock);
-
-	/* update the error */
-	if (read_err)
-		job->read_err = 1;
-
-	if (write_err)
-		job->write_err |= write_err;
-
-	/*
-	 * Only dispatch more work if there hasn't been an error.
-	 */
-	if ((!job->read_err && !job->write_err) ||
-	    test_bit(DM_KCOPYD_IGNORE_ERROR, &job->flags)) {
-		/* get the next chunk of work */
-		progress = job->progress;
-		count = job->source.count - progress;
-		if (count) {
-			if (count > SUB_JOB_SIZE)
-				count = SUB_JOB_SIZE;
-
-			job->progress += count;
-		}
-	}
-	mutex_unlock(&job->lock);
-
-	if (count) {
-		int i;
-
-		*sub_job = *job;
-		sub_job->source.sector += progress;
-		sub_job->source.count = count;
-
-		for (i = 0; i < job->num_dests; i++) {
-			sub_job->dests[i].sector += progress;
-			sub_job->dests[i].count = count;
-		}
-
-		sub_job->fn = segment_complete;
-		sub_job->context = sub_job;
-		dispatch_job(sub_job);
-
-	} else if (atomic_dec_and_test(&job->sub_jobs)) {
-
-		/*
-		 * Queue the completion callback to the kcopyd thread.
-		 *
-		 * Some callers assume that all the completions are called
-		 * from a single thread and don't race with each other.
-		 *
-		 * We must not call the callback directly here because this
-		 * code may not be executing in the thread.
-		 */
-		push(&kc->complete_jobs, job);
-		wake(kc);
-	}
-}
-
-/*
- * Create some sub jobs to share the work between them.
- */
-static void split_job(struct kcopyd_job *master_job)
-{
-	int i;
-
-	atomic_inc(&master_job->kc->nr_jobs);
-
-	atomic_set(&master_job->sub_jobs, SPLIT_COUNT);
-	for (i = 0; i < SPLIT_COUNT; i++) {
-		master_job[i + 1].master_job = master_job;
-		segment_complete(0, 0u, &master_job[i + 1]);
-	}
-}
-
-int dm_kcopyd_copy(struct dm_kcopyd_client *kc, struct dm_io_region *from,
-		   unsigned int num_dests, struct dm_io_region *dests,
-		   unsigned int flags, dm_kcopyd_notify_fn fn, void *context)
-{
-	struct kcopyd_job *job;
-
-	/*
-	 * Allocate an array of jobs consisting of one master job
-	 * followed by SPLIT_COUNT sub jobs.
-	 */
-	job = mempool_alloc(kc->job_pool, GFP_NOIO);
-
-	/*
-	 * set up for the read.
-	 */
-	job->kc = kc;
-	job->flags = flags;
-	job->read_err = 0;
-	job->write_err = 0;
-
-	job->num_dests = num_dests;
-	memcpy(&job->dests, dests, sizeof(*dests) * num_dests);
-
-	if (from) {
-		job->source = *from;
-		job->pages = NULL;
-		job->rw = READ;
-	} else {
-		memset(&job->source, 0, sizeof job->source);
-		job->source.count = job->dests[0].count;
-		job->pages = &zero_page_list;
-		job->rw = WRITE;
-	}
-
-	job->fn = fn;
-	job->context = context;
-	job->master_job = job;
-
-	if (job->source.count <= SUB_JOB_SIZE)
-		dispatch_job(job);
-	else {
-		mutex_init(&job->lock);
-		job->progress = 0;
-		split_job(job);
-	}
-
-	return 0;
-}
-EXPORT_SYMBOL(dm_kcopyd_copy);
-
-int dm_kcopyd_zero(struct dm_kcopyd_client *kc,
-		   unsigned num_dests, struct dm_io_region *dests,
-		   unsigned flags, dm_kcopyd_notify_fn fn, void *context)
-{
-	return dm_kcopyd_copy(kc, NULL, num_dests, dests, flags, fn, context);
-}
-EXPORT_SYMBOL(dm_kcopyd_zero);
-
-void *dm_kcopyd_prepare_callback(struct dm_kcopyd_client *kc,
-				 dm_kcopyd_notify_fn fn, void *context)
-{
-	struct kcopyd_job *job;
-
-	job = mempool_alloc(kc->job_pool, GFP_NOIO);
-
-	memset(job, 0, sizeof(struct kcopyd_job));
-	job->kc = kc;
-	job->fn = fn;
-	job->context = context;
-	job->master_job = job;
-
-	atomic_inc(&kc->nr_jobs);
-
-	return job;
-}
-EXPORT_SYMBOL(dm_kcopyd_prepare_callback);
-
-void dm_kcopyd_do_callback(void *j, int read_err, unsigned long write_err)
-{
-	struct kcopyd_job *job = j;
-	struct dm_kcopyd_client *kc = job->kc;
-
-	job->read_err = read_err;
-	job->write_err = write_err;
-
-	push(&kc->complete_jobs, job);
-	wake(kc);
-}
-EXPORT_SYMBOL(dm_kcopyd_do_callback);
-
-/*
- * Cancels a kcopyd job, eg. someone might be deactivating a
- * mirror.
- */
-#if 0
-int kcopyd_cancel(struct kcopyd_job *job, int block)
-{
-	/* FIXME: finish */
-	return -1;
-}
-#endif  /*  0  */
-
-/*-----------------------------------------------------------------
- * Client setup
- *---------------------------------------------------------------*/
-struct dm_kcopyd_client *dm_kcopyd_client_create(void)
-{
-	int r = -ENOMEM;
-	struct dm_kcopyd_client *kc;
-
-	kc = kmalloc(sizeof(*kc), GFP_KERNEL);
-	if (!kc)
-		return ERR_PTR(-ENOMEM);
-
-	spin_lock_init(&kc->job_lock);
-	INIT_LIST_HEAD(&kc->complete_jobs);
-	INIT_LIST_HEAD(&kc->io_jobs);
-	INIT_LIST_HEAD(&kc->pages_jobs);
-
-	kc->job_pool = mempool_create_slab_pool(MIN_JOBS, _job_cache);
-	if (!kc->job_pool)
-		goto bad_slab;
-
-	INIT_WORK(&kc->kcopyd_work, do_work);
-	kc->kcopyd_wq = alloc_workqueue("kcopyd",
-					WQ_NON_REENTRANT | WQ_MEM_RECLAIM, 0);
-	if (!kc->kcopyd_wq)
-		goto bad_workqueue;
-
-	kc->pages = NULL;
-	kc->nr_reserved_pages = kc->nr_free_pages = 0;
-	r = client_reserve_pages(kc, RESERVE_PAGES);
-	if (r)
-		goto bad_client_pages;
-
-	kc->io_client = dm_io_client_create();
-	if (IS_ERR(kc->io_client)) {
-		r = PTR_ERR(kc->io_client);
-		goto bad_io_client;
-	}
-
-	init_waitqueue_head(&kc->destroyq);
-	atomic_set(&kc->nr_jobs, 0);
-
-	return kc;
-
-bad_io_client:
-	client_free_pages(kc);
-bad_client_pages:
-	destroy_workqueue(kc->kcopyd_wq);
-bad_workqueue:
-	mempool_destroy(kc->job_pool);
-bad_slab:
-	kfree(kc);
-
-	return ERR_PTR(r);
-}
-EXPORT_SYMBOL(dm_kcopyd_client_create);
-
-void dm_kcopyd_client_destroy(struct dm_kcopyd_client *kc)
-{
-	/* Wait for completion of all jobs submitted by this client. */
-	wait_event(kc->destroyq, !atomic_read(&kc->nr_jobs));
-
-	BUG_ON(!list_empty(&kc->complete_jobs));
-	BUG_ON(!list_empty(&kc->io_jobs));
-	BUG_ON(!list_empty(&kc->pages_jobs));
-	destroy_workqueue(kc->kcopyd_wq);
-	dm_io_client_destroy(kc->io_client);
-	client_free_pages(kc);
-	mempool_destroy(kc->job_pool);
-	kfree(kc);
-}
-EXPORT_SYMBOL(dm_kcopyd_client_destroy);
diff --git a/ANDROID_3.4.5/drivers/md/dm-linear.c b/ANDROID_3.4.5/drivers/md/dm-linear.c
deleted file mode 100644
index 3639eeab..00000000
--- a/ANDROID_3.4.5/drivers/md/dm-linear.c
+++ /dev/null
@@ -1,182 +0,0 @@
-/*
- * Copyright (C) 2001-2003 Sistina Software (UK) Limited.
- *
- * This file is released under the GPL.
- */
-
-#include "dm.h"
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/blkdev.h>
-#include <linux/bio.h>
-#include <linux/slab.h>
-#include <linux/device-mapper.h>
-
-#define DM_MSG_PREFIX "linear"
-
-/*
- * Linear: maps a linear range of a device.
- */
-struct linear_c {
-	struct dm_dev *dev;
-	sector_t start;
-};
-
-/*
- * Construct a linear mapping: <dev_path> <offset>
- */
-static int linear_ctr(struct dm_target *ti, unsigned int argc, char **argv)
-{
-	struct linear_c *lc;
-	unsigned long long tmp;
-	char dummy;
-
-	if (argc != 2) {
-		ti->error = "Invalid argument count";
-		return -EINVAL;
-	}
-
-	lc = kmalloc(sizeof(*lc), GFP_KERNEL);
-	if (lc == NULL) {
-		ti->error = "dm-linear: Cannot allocate linear context";
-		return -ENOMEM;
-	}
-
-	if (sscanf(argv[1], "%llu%c", &tmp, &dummy) != 1) {
-		ti->error = "dm-linear: Invalid device sector";
-		goto bad;
-	}
-	lc->start = tmp;
-
-	if (dm_get_device(ti, argv[0], dm_table_get_mode(ti->table), &lc->dev)) {
-		ti->error = "dm-linear: Device lookup failed";
-		goto bad;
-	}
-
-	ti->num_flush_requests = 1;
-	ti->num_discard_requests = 1;
-	ti->private = lc;
-	return 0;
-
-      bad:
-	kfree(lc);
-	return -EINVAL;
-}
-
-static void linear_dtr(struct dm_target *ti)
-{
-	struct linear_c *lc = (struct linear_c *) ti->private;
-
-	dm_put_device(ti, lc->dev);
-	kfree(lc);
-}
-
-static sector_t linear_map_sector(struct dm_target *ti, sector_t bi_sector)
-{
-	struct linear_c *lc = ti->private;
-
-	return lc->start + dm_target_offset(ti, bi_sector);
-}
-
-static void linear_map_bio(struct dm_target *ti, struct bio *bio)
-{
-	struct linear_c *lc = ti->private;
-
-	bio->bi_bdev = lc->dev->bdev;
-	if (bio_sectors(bio))
-		bio->bi_sector = linear_map_sector(ti, bio->bi_sector);
-}
-
-static int linear_map(struct dm_target *ti, struct bio *bio,
-		      union map_info *map_context)
-{
-	linear_map_bio(ti, bio);
-
-	return DM_MAPIO_REMAPPED;
-}
-
-static int linear_status(struct dm_target *ti, status_type_t type,
-			 char *result, unsigned int maxlen)
-{
-	struct linear_c *lc = (struct linear_c *) ti->private;
-
-	switch (type) {
-	case STATUSTYPE_INFO:
-		result[0] = '\0';
-		break;
-
-	case STATUSTYPE_TABLE:
-		snprintf(result, maxlen, "%s %llu", lc->dev->name,
-				(unsigned long long)lc->start);
-		break;
-	}
-	return 0;
-}
-
-static int linear_ioctl(struct dm_target *ti, unsigned int cmd,
-			unsigned long arg)
-{
-	struct linear_c *lc = (struct linear_c *) ti->private;
-	struct dm_dev *dev = lc->dev;
-	int r = 0;
-
-	/*
-	 * Only pass ioctls through if the device sizes match exactly.
-	 */
-	if (lc->start ||
-	    ti->len != i_size_read(dev->bdev->bd_inode) >> SECTOR_SHIFT)
-		r = scsi_verify_blk_ioctl(NULL, cmd);
-
-	return r ? : __blkdev_driver_ioctl(dev->bdev, dev->mode, cmd, arg);
-}
-
-static int linear_merge(struct dm_target *ti, struct bvec_merge_data *bvm,
-			struct bio_vec *biovec, int max_size)
-{
-	struct linear_c *lc = ti->private;
-	struct request_queue *q = bdev_get_queue(lc->dev->bdev);
-
-	if (!q->merge_bvec_fn)
-		return max_size;
-
-	bvm->bi_bdev = lc->dev->bdev;
-	bvm->bi_sector = linear_map_sector(ti, bvm->bi_sector);
-
-	return min(max_size, q->merge_bvec_fn(q, bvm, biovec));
-}
-
-static int linear_iterate_devices(struct dm_target *ti,
-				  iterate_devices_callout_fn fn, void *data)
-{
-	struct linear_c *lc = ti->private;
-
-	return fn(ti, lc->dev, lc->start, ti->len, data);
-}
-
-static struct target_type linear_target = {
-	.name   = "linear",
-	.version = {1, 1, 0},
-	.module = THIS_MODULE,
-	.ctr    = linear_ctr,
-	.dtr    = linear_dtr,
-	.map    = linear_map,
-	.status = linear_status,
-	.ioctl  = linear_ioctl,
-	.merge  = linear_merge,
-	.iterate_devices = linear_iterate_devices,
-};
-
-int __init dm_linear_init(void)
-{
-	int r = dm_register_target(&linear_target);
-
-	if (r < 0)
-		DMERR("register failed %d", r);
-
-	return r;
-}
-
-void dm_linear_exit(void)
-{
-	dm_unregister_target(&linear_target);
-}
diff --git a/ANDROID_3.4.5/drivers/md/dm-log-userspace-base.c b/ANDROID_3.4.5/drivers/md/dm-log-userspace-base.c
deleted file mode 100644
index 9429159d..00000000
--- a/ANDROID_3.4.5/drivers/md/dm-log-userspace-base.c
+++ /dev/null
@@ -1,818 +0,0 @@
-/*
- * Copyright (C) 2006-2009 Red Hat, Inc.
- *
- * This file is released under the LGPL.
- */
-
-#include <linux/bio.h>
-#include <linux/slab.h>
-#include <linux/dm-dirty-log.h>
-#include <linux/device-mapper.h>
-#include <linux/dm-log-userspace.h>
-#include <linux/module.h>
-
-#include "dm-log-userspace-transfer.h"
-
-#define DM_LOG_USERSPACE_VSN "1.1.0"
-
-struct flush_entry {
-	int type;
-	region_t region;
-	struct list_head list;
-};
-
-/*
- * This limit on the number of mark and clear request is, to a degree,
- * arbitrary.  However, there is some basis for the choice in the limits
- * imposed on the size of data payload by dm-log-userspace-transfer.c:
- * dm_consult_userspace().
- */
-#define MAX_FLUSH_GROUP_COUNT 32
-
-struct log_c {
-	struct dm_target *ti;
-	struct dm_dev *log_dev;
-	uint32_t region_size;
-	region_t region_count;
-	uint64_t luid;
-	char uuid[DM_UUID_LEN];
-
-	char *usr_argv_str;
-	uint32_t usr_argc;
-
-	/*
-	 * in_sync_hint gets set when doing is_remote_recovering.  It
-	 * represents the first region that needs recovery.  IOW, the
-	 * first zero bit of sync_bits.  This can be useful for to limit
-	 * traffic for calls like is_remote_recovering and get_resync_work,
-	 * but be take care in its use for anything else.
-	 */
-	uint64_t in_sync_hint;
-
-	/*
-	 * Mark and clear requests are held until a flush is issued
-	 * so that we can group, and thereby limit, the amount of
-	 * network traffic between kernel and userspace.  The 'flush_lock'
-	 * is used to protect these lists.
-	 */
-	spinlock_t flush_lock;
-	struct list_head mark_list;
-	struct list_head clear_list;
-};
-
-static mempool_t *flush_entry_pool;
-
-static void *flush_entry_alloc(gfp_t gfp_mask, void *pool_data)
-{
-	return kmalloc(sizeof(struct flush_entry), gfp_mask);
-}
-
-static void flush_entry_free(void *element, void *pool_data)
-{
-	kfree(element);
-}
-
-static int userspace_do_request(struct log_c *lc, const char *uuid,
-				int request_type, char *data, size_t data_size,
-				char *rdata, size_t *rdata_size)
-{
-	int r;
-
-	/*
-	 * If the server isn't there, -ESRCH is returned,
-	 * and we must keep trying until the server is
-	 * restored.
-	 */
-retry:
-	r = dm_consult_userspace(uuid, lc->luid, request_type, data,
-				 data_size, rdata, rdata_size);
-
-	if (r != -ESRCH)
-		return r;
-
-	DMERR(" Userspace log server not found.");
-	while (1) {
-		set_current_state(TASK_INTERRUPTIBLE);
-		schedule_timeout(2*HZ);
-		DMWARN("Attempting to contact userspace log server...");
-		r = dm_consult_userspace(uuid, lc->luid, DM_ULOG_CTR,
-					 lc->usr_argv_str,
-					 strlen(lc->usr_argv_str) + 1,
-					 NULL, NULL);
-		if (!r)
-			break;
-	}
-	DMINFO("Reconnected to userspace log server... DM_ULOG_CTR complete");
-	r = dm_consult_userspace(uuid, lc->luid, DM_ULOG_RESUME, NULL,
-				 0, NULL, NULL);
-	if (!r)
-		goto retry;
-
-	DMERR("Error trying to resume userspace log: %d", r);
-
-	return -ESRCH;
-}
-
-static int build_constructor_string(struct dm_target *ti,
-				    unsigned argc, char **argv,
-				    char **ctr_str)
-{
-	int i, str_size;
-	char *str = NULL;
-
-	*ctr_str = NULL;
-
-	for (i = 0, str_size = 0; i < argc; i++)
-		str_size += strlen(argv[i]) + 1; /* +1 for space between args */
-
-	str_size += 20; /* Max number of chars in a printed u64 number */
-
-	str = kzalloc(str_size, GFP_KERNEL);
-	if (!str) {
-		DMWARN("Unable to allocate memory for constructor string");
-		return -ENOMEM;
-	}
-
-	str_size = sprintf(str, "%llu", (unsigned long long)ti->len);
-	for (i = 0; i < argc; i++)
-		str_size += sprintf(str + str_size, " %s", argv[i]);
-
-	*ctr_str = str;
-	return str_size;
-}
-
-/*
- * userspace_ctr
- *
- * argv contains:
- *	<UUID> <other args>
- * Where 'other args' is the userspace implementation specific log
- * arguments.  An example might be:
- *	<UUID> clustered-disk <arg count> <log dev> <region_size> [[no]sync]
- *
- * So, this module will strip off the <UUID> for identification purposes
- * when communicating with userspace about a log; but will pass on everything
- * else.
- */
-static int userspace_ctr(struct dm_dirty_log *log, struct dm_target *ti,
-			 unsigned argc, char **argv)
-{
-	int r = 0;
-	int str_size;
-	char *ctr_str = NULL;
-	struct log_c *lc = NULL;
-	uint64_t rdata;
-	size_t rdata_size = sizeof(rdata);
-	char *devices_rdata = NULL;
-	size_t devices_rdata_size = DM_NAME_LEN;
-
-	if (argc < 3) {
-		DMWARN("Too few arguments to userspace dirty log");
-		return -EINVAL;
-	}
-
-	lc = kzalloc(sizeof(*lc), GFP_KERNEL);
-	if (!lc) {
-		DMWARN("Unable to allocate userspace log context.");
-		return -ENOMEM;
-	}
-
-	/* The ptr value is sufficient for local unique id */
-	lc->luid = (unsigned long)lc;
-
-	lc->ti = ti;
-
-	if (strlen(argv[0]) > (DM_UUID_LEN - 1)) {
-		DMWARN("UUID argument too long.");
-		kfree(lc);
-		return -EINVAL;
-	}
-
-	strncpy(lc->uuid, argv[0], DM_UUID_LEN);
-	spin_lock_init(&lc->flush_lock);
-	INIT_LIST_HEAD(&lc->mark_list);
-	INIT_LIST_HEAD(&lc->clear_list);
-
-	str_size = build_constructor_string(ti, argc - 1, argv + 1, &ctr_str);
-	if (str_size < 0) {
-		kfree(lc);
-		return str_size;
-	}
-
-	devices_rdata = kzalloc(devices_rdata_size, GFP_KERNEL);
-	if (!devices_rdata) {
-		DMERR("Failed to allocate memory for device information");
-		r = -ENOMEM;
-		goto out;
-	}
-
-	/*
-	 * Send table string and get back any opened device.
-	 */
-	r = dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_CTR,
-				 ctr_str, str_size,
-				 devices_rdata, &devices_rdata_size);
-
-	if (r < 0) {
-		if (r == -ESRCH)
-			DMERR("Userspace log server not found");
-		else
-			DMERR("Userspace log server failed to create log");
-		goto out;
-	}
-
-	/* Since the region size does not change, get it now */
-	rdata_size = sizeof(rdata);
-	r = dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_GET_REGION_SIZE,
-				 NULL, 0, (char *)&rdata, &rdata_size);
-
-	if (r) {
-		DMERR("Failed to get region size of dirty log");
-		goto out;
-	}
-
-	lc->region_size = (uint32_t)rdata;
-	lc->region_count = dm_sector_div_up(ti->len, lc->region_size);
-
-	if (devices_rdata_size) {
-		if (devices_rdata[devices_rdata_size - 1] != '\0') {
-			DMERR("DM_ULOG_CTR device return string not properly terminated");
-			r = -EINVAL;
-			goto out;
-		}
-		r = dm_get_device(ti, devices_rdata,
-				  dm_table_get_mode(ti->table), &lc->log_dev);
-		if (r)
-			DMERR("Failed to register %s with device-mapper",
-			      devices_rdata);
-	}
-out:
-	kfree(devices_rdata);
-	if (r) {
-		kfree(lc);
-		kfree(ctr_str);
-	} else {
-		lc->usr_argv_str = ctr_str;
-		lc->usr_argc = argc;
-		log->context = lc;
-	}
-
-	return r;
-}
-
-static void userspace_dtr(struct dm_dirty_log *log)
-{
-	struct log_c *lc = log->context;
-
-	(void) dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_DTR,
-				 NULL, 0,
-				 NULL, NULL);
-
-	if (lc->log_dev)
-		dm_put_device(lc->ti, lc->log_dev);
-
-	kfree(lc->usr_argv_str);
-	kfree(lc);
-
-	return;
-}
-
-static int userspace_presuspend(struct dm_dirty_log *log)
-{
-	int r;
-	struct log_c *lc = log->context;
-
-	r = dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_PRESUSPEND,
-				 NULL, 0,
-				 NULL, NULL);
-
-	return r;
-}
-
-static int userspace_postsuspend(struct dm_dirty_log *log)
-{
-	int r;
-	struct log_c *lc = log->context;
-
-	r = dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_POSTSUSPEND,
-				 NULL, 0,
-				 NULL, NULL);
-
-	return r;
-}
-
-static int userspace_resume(struct dm_dirty_log *log)
-{
-	int r;
-	struct log_c *lc = log->context;
-
-	lc->in_sync_hint = 0;
-	r = dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_RESUME,
-				 NULL, 0,
-				 NULL, NULL);
-
-	return r;
-}
-
-static uint32_t userspace_get_region_size(struct dm_dirty_log *log)
-{
-	struct log_c *lc = log->context;
-
-	return lc->region_size;
-}
-
-/*
- * userspace_is_clean
- *
- * Check whether a region is clean.  If there is any sort of
- * failure when consulting the server, we return not clean.
- *
- * Returns: 1 if clean, 0 otherwise
- */
-static int userspace_is_clean(struct dm_dirty_log *log, region_t region)
-{
-	int r;
-	uint64_t region64 = (uint64_t)region;
-	int64_t is_clean;
-	size_t rdata_size;
-	struct log_c *lc = log->context;
-
-	rdata_size = sizeof(is_clean);
-	r = userspace_do_request(lc, lc->uuid, DM_ULOG_IS_CLEAN,
-				 (char *)&region64, sizeof(region64),
-				 (char *)&is_clean, &rdata_size);
-
-	return (r) ? 0 : (int)is_clean;
-}
-
-/*
- * userspace_in_sync
- *
- * Check if the region is in-sync.  If there is any sort
- * of failure when consulting the server, we assume that
- * the region is not in sync.
- *
- * If 'can_block' is set, return immediately
- *
- * Returns: 1 if in-sync, 0 if not-in-sync, -EWOULDBLOCK
- */
-static int userspace_in_sync(struct dm_dirty_log *log, region_t region,
-			     int can_block)
-{
-	int r;
-	uint64_t region64 = region;
-	int64_t in_sync;
-	size_t rdata_size;
-	struct log_c *lc = log->context;
-
-	/*
-	 * We can never respond directly - even if in_sync_hint is
-	 * set.  This is because another machine could see a device
-	 * failure and mark the region out-of-sync.  If we don't go
-	 * to userspace to ask, we might think the region is in-sync
-	 * and allow a read to pick up data that is stale.  (This is
-	 * very unlikely if a device actually fails; but it is very
-	 * likely if a connection to one device from one machine fails.)
-	 *
-	 * There still might be a problem if the mirror caches the region
-	 * state as in-sync... but then this call would not be made.  So,
-	 * that is a mirror problem.
-	 */
-	if (!can_block)
-		return -EWOULDBLOCK;
-
-	rdata_size = sizeof(in_sync);
-	r = userspace_do_request(lc, lc->uuid, DM_ULOG_IN_SYNC,
-				 (char *)&region64, sizeof(region64),
-				 (char *)&in_sync, &rdata_size);
-	return (r) ? 0 : (int)in_sync;
-}
-
-static int flush_one_by_one(struct log_c *lc, struct list_head *flush_list)
-{
-	int r = 0;
-	struct flush_entry *fe;
-
-	list_for_each_entry(fe, flush_list, list) {
-		r = userspace_do_request(lc, lc->uuid, fe->type,
-					 (char *)&fe->region,
-					 sizeof(fe->region),
-					 NULL, NULL);
-		if (r)
-			break;
-	}
-
-	return r;
-}
-
-static int flush_by_group(struct log_c *lc, struct list_head *flush_list)
-{
-	int r = 0;
-	int count;
-	uint32_t type = 0;
-	struct flush_entry *fe, *tmp_fe;
-	LIST_HEAD(tmp_list);
-	uint64_t group[MAX_FLUSH_GROUP_COUNT];
-
-	/*
-	 * Group process the requests
-	 */
-	while (!list_empty(flush_list)) {
-		count = 0;
-
-		list_for_each_entry_safe(fe, tmp_fe, flush_list, list) {
-			group[count] = fe->region;
-			count++;
-
-			list_move(&fe->list, &tmp_list);
-
-			type = fe->type;
-			if (count >= MAX_FLUSH_GROUP_COUNT)
-				break;
-		}
-
-		r = userspace_do_request(lc, lc->uuid, type,
-					 (char *)(group),
-					 count * sizeof(uint64_t),
-					 NULL, NULL);
-		if (r) {
-			/* Group send failed.  Attempt one-by-one. */
-			list_splice_init(&tmp_list, flush_list);
-			r = flush_one_by_one(lc, flush_list);
-			break;
-		}
-	}
-
-	/*
-	 * Must collect flush_entrys that were successfully processed
-	 * as a group so that they will be free'd by the caller.
-	 */
-	list_splice_init(&tmp_list, flush_list);
-
-	return r;
-}
-
-/*
- * userspace_flush
- *
- * This function is ok to block.
- * The flush happens in two stages.  First, it sends all
- * clear/mark requests that are on the list.  Then it
- * tells the server to commit them.  This gives the
- * server a chance to optimise the commit, instead of
- * doing it for every request.
- *
- * Additionally, we could implement another thread that
- * sends the requests up to the server - reducing the
- * load on flush.  Then the flush would have less in
- * the list and be responsible for the finishing commit.
- *
- * Returns: 0 on success, < 0 on failure
- */
-static int userspace_flush(struct dm_dirty_log *log)
-{
-	int r = 0;
-	unsigned long flags;
-	struct log_c *lc = log->context;
-	LIST_HEAD(mark_list);
-	LIST_HEAD(clear_list);
-	struct flush_entry *fe, *tmp_fe;
-
-	spin_lock_irqsave(&lc->flush_lock, flags);
-	list_splice_init(&lc->mark_list, &mark_list);
-	list_splice_init(&lc->clear_list, &clear_list);
-	spin_unlock_irqrestore(&lc->flush_lock, flags);
-
-	if (list_empty(&mark_list) && list_empty(&clear_list))
-		return 0;
-
-	r = flush_by_group(lc, &mark_list);
-	if (r)
-		goto fail;
-
-	r = flush_by_group(lc, &clear_list);
-	if (r)
-		goto fail;
-
-	r = userspace_do_request(lc, lc->uuid, DM_ULOG_FLUSH,
-				 NULL, 0, NULL, NULL);
-
-fail:
-	/*
-	 * We can safely remove these entries, even if failure.
-	 * Calling code will receive an error and will know that
-	 * the log facility has failed.
-	 */
-	list_for_each_entry_safe(fe, tmp_fe, &mark_list, list) {
-		list_del(&fe->list);
-		mempool_free(fe, flush_entry_pool);
-	}
-	list_for_each_entry_safe(fe, tmp_fe, &clear_list, list) {
-		list_del(&fe->list);
-		mempool_free(fe, flush_entry_pool);
-	}
-
-	if (r)
-		dm_table_event(lc->ti->table);
-
-	return r;
-}
-
-/*
- * userspace_mark_region
- *
- * This function should avoid blocking unless absolutely required.
- * (Memory allocation is valid for blocking.)
- */
-static void userspace_mark_region(struct dm_dirty_log *log, region_t region)
-{
-	unsigned long flags;
-	struct log_c *lc = log->context;
-	struct flush_entry *fe;
-
-	/* Wait for an allocation, but _never_ fail */
-	fe = mempool_alloc(flush_entry_pool, GFP_NOIO);
-	BUG_ON(!fe);
-
-	spin_lock_irqsave(&lc->flush_lock, flags);
-	fe->type = DM_ULOG_MARK_REGION;
-	fe->region = region;
-	list_add(&fe->list, &lc->mark_list);
-	spin_unlock_irqrestore(&lc->flush_lock, flags);
-
-	return;
-}
-
-/*
- * userspace_clear_region
- *
- * This function must not block.
- * So, the alloc can't block.  In the worst case, it is ok to
- * fail.  It would simply mean we can't clear the region.
- * Does nothing to current sync context, but does mean
- * the region will be re-sync'ed on a reload of the mirror
- * even though it is in-sync.
- */
-static void userspace_clear_region(struct dm_dirty_log *log, region_t region)
-{
-	unsigned long flags;
-	struct log_c *lc = log->context;
-	struct flush_entry *fe;
-
-	/*
-	 * If we fail to allocate, we skip the clearing of
-	 * the region.  This doesn't hurt us in any way, except
-	 * to cause the region to be resync'ed when the
-	 * device is activated next time.
-	 */
-	fe = mempool_alloc(flush_entry_pool, GFP_ATOMIC);
-	if (!fe) {
-		DMERR("Failed to allocate memory to clear region.");
-		return;
-	}
-
-	spin_lock_irqsave(&lc->flush_lock, flags);
-	fe->type = DM_ULOG_CLEAR_REGION;
-	fe->region = region;
-	list_add(&fe->list, &lc->clear_list);
-	spin_unlock_irqrestore(&lc->flush_lock, flags);
-
-	return;
-}
-
-/*
- * userspace_get_resync_work
- *
- * Get a region that needs recovery.  It is valid to return
- * an error for this function.
- *
- * Returns: 1 if region filled, 0 if no work, <0 on error
- */
-static int userspace_get_resync_work(struct dm_dirty_log *log, region_t *region)
-{
-	int r;
-	size_t rdata_size;
-	struct log_c *lc = log->context;
-	struct {
-		int64_t i; /* 64-bit for mix arch compatibility */
-		region_t r;
-	} pkg;
-
-	if (lc->in_sync_hint >= lc->region_count)
-		return 0;
-
-	rdata_size = sizeof(pkg);
-	r = userspace_do_request(lc, lc->uuid, DM_ULOG_GET_RESYNC_WORK,
-				 NULL, 0,
-				 (char *)&pkg, &rdata_size);
-
-	*region = pkg.r;
-	return (r) ? r : (int)pkg.i;
-}
-
-/*
- * userspace_set_region_sync
- *
- * Set the sync status of a given region.  This function
- * must not fail.
- */
-static void userspace_set_region_sync(struct dm_dirty_log *log,
-				      region_t region, int in_sync)
-{
-	int r;
-	struct log_c *lc = log->context;
-	struct {
-		region_t r;
-		int64_t i;
-	} pkg;
-
-	pkg.r = region;
-	pkg.i = (int64_t)in_sync;
-
-	r = userspace_do_request(lc, lc->uuid, DM_ULOG_SET_REGION_SYNC,
-				 (char *)&pkg, sizeof(pkg),
-				 NULL, NULL);
-
-	/*
-	 * It would be nice to be able to report failures.
-	 * However, it is easy emough to detect and resolve.
-	 */
-	return;
-}
-
-/*
- * userspace_get_sync_count
- *
- * If there is any sort of failure when consulting the server,
- * we assume that the sync count is zero.
- *
- * Returns: sync count on success, 0 on failure
- */
-static region_t userspace_get_sync_count(struct dm_dirty_log *log)
-{
-	int r;
-	size_t rdata_size;
-	uint64_t sync_count;
-	struct log_c *lc = log->context;
-
-	rdata_size = sizeof(sync_count);
-	r = userspace_do_request(lc, lc->uuid, DM_ULOG_GET_SYNC_COUNT,
-				 NULL, 0,
-				 (char *)&sync_count, &rdata_size);
-
-	if (r)
-		return 0;
-
-	if (sync_count >= lc->region_count)
-		lc->in_sync_hint = lc->region_count;
-
-	return (region_t)sync_count;
-}
-
-/*
- * userspace_status
- *
- * Returns: amount of space consumed
- */
-static int userspace_status(struct dm_dirty_log *log, status_type_t status_type,
-			    char *result, unsigned maxlen)
-{
-	int r = 0;
-	char *table_args;
-	size_t sz = (size_t)maxlen;
-	struct log_c *lc = log->context;
-
-	switch (status_type) {
-	case STATUSTYPE_INFO:
-		r = userspace_do_request(lc, lc->uuid, DM_ULOG_STATUS_INFO,
-					 NULL, 0,
-					 result, &sz);
-
-		if (r) {
-			sz = 0;
-			DMEMIT("%s 1 COM_FAILURE", log->type->name);
-		}
-		break;
-	case STATUSTYPE_TABLE:
-		sz = 0;
-		table_args = strchr(lc->usr_argv_str, ' ');
-		BUG_ON(!table_args); /* There will always be a ' ' */
-		table_args++;
-
-		DMEMIT("%s %u %s %s ", log->type->name, lc->usr_argc,
-		       lc->uuid, table_args);
-		break;
-	}
-	return (r) ? 0 : (int)sz;
-}
-
-/*
- * userspace_is_remote_recovering
- *
- * Returns: 1 if region recovering, 0 otherwise
- */
-static int userspace_is_remote_recovering(struct dm_dirty_log *log,
-					  region_t region)
-{
-	int r;
-	uint64_t region64 = region;
-	struct log_c *lc = log->context;
-	static unsigned long long limit;
-	struct {
-		int64_t is_recovering;
-		uint64_t in_sync_hint;
-	} pkg;
-	size_t rdata_size = sizeof(pkg);
-
-	/*
-	 * Once the mirror has been reported to be in-sync,
-	 * it will never again ask for recovery work.  So,
-	 * we can safely say there is not a remote machine
-	 * recovering if the device is in-sync.  (in_sync_hint
-	 * must be reset at resume time.)
-	 */
-	if (region < lc->in_sync_hint)
-		return 0;
-	else if (jiffies < limit)
-		return 1;
-
-	limit = jiffies + (HZ / 4);
-	r = userspace_do_request(lc, lc->uuid, DM_ULOG_IS_REMOTE_RECOVERING,
-				 (char *)&region64, sizeof(region64),
-				 (char *)&pkg, &rdata_size);
-	if (r)
-		return 1;
-
-	lc->in_sync_hint = pkg.in_sync_hint;
-
-	return (int)pkg.is_recovering;
-}
-
-static struct dm_dirty_log_type _userspace_type = {
-	.name = "userspace",
-	.module = THIS_MODULE,
-	.ctr = userspace_ctr,
-	.dtr = userspace_dtr,
-	.presuspend = userspace_presuspend,
-	.postsuspend = userspace_postsuspend,
-	.resume = userspace_resume,
-	.get_region_size = userspace_get_region_size,
-	.is_clean = userspace_is_clean,
-	.in_sync = userspace_in_sync,
-	.flush = userspace_flush,
-	.mark_region = userspace_mark_region,
-	.clear_region = userspace_clear_region,
-	.get_resync_work = userspace_get_resync_work,
-	.set_region_sync = userspace_set_region_sync,
-	.get_sync_count = userspace_get_sync_count,
-	.status = userspace_status,
-	.is_remote_recovering = userspace_is_remote_recovering,
-};
-
-static int __init userspace_dirty_log_init(void)
-{
-	int r = 0;
-
-	flush_entry_pool = mempool_create(100, flush_entry_alloc,
-					  flush_entry_free, NULL);
-
-	if (!flush_entry_pool) {
-		DMWARN("Unable to create flush_entry_pool:  No memory.");
-		return -ENOMEM;
-	}
-
-	r = dm_ulog_tfr_init();
-	if (r) {
-		DMWARN("Unable to initialize userspace log communications");
-		mempool_destroy(flush_entry_pool);
-		return r;
-	}
-
-	r = dm_dirty_log_type_register(&_userspace_type);
-	if (r) {
-		DMWARN("Couldn't register userspace dirty log type");
-		dm_ulog_tfr_exit();
-		mempool_destroy(flush_entry_pool);
-		return r;
-	}
-
-	DMINFO("version " DM_LOG_USERSPACE_VSN " loaded");
-	return 0;
-}
-
-static void __exit userspace_dirty_log_exit(void)
-{
-	dm_dirty_log_type_unregister(&_userspace_type);
-	dm_ulog_tfr_exit();
-	mempool_destroy(flush_entry_pool);
-
-	DMINFO("version " DM_LOG_USERSPACE_VSN " unloaded");
-	return;
-}
-
-module_init(userspace_dirty_log_init);
-module_exit(userspace_dirty_log_exit);
-
-MODULE_DESCRIPTION(DM_NAME " userspace dirty log link");
-MODULE_AUTHOR("Jonathan Brassow <dm-devel@redhat.com>");
-MODULE_LICENSE("GPL");
diff --git a/ANDROID_3.4.5/drivers/md/dm-log-userspace-transfer.c b/ANDROID_3.4.5/drivers/md/dm-log-userspace-transfer.c
deleted file mode 100644
index 08d9a207..00000000
--- a/ANDROID_3.4.5/drivers/md/dm-log-userspace-transfer.c
+++ /dev/null
@@ -1,286 +0,0 @@
-/*
- * Copyright (C) 2006-2009 Red Hat, Inc.
- *
- * This file is released under the LGPL.
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/slab.h>
-#include <net/sock.h>
-#include <linux/workqueue.h>
-#include <linux/connector.h>
-#include <linux/device-mapper.h>
-#include <linux/dm-log-userspace.h>
-
-#include "dm-log-userspace-transfer.h"
-
-static uint32_t dm_ulog_seq;
-
-/*
- * Netlink/Connector is an unreliable protocol.  How long should
- * we wait for a response before assuming it was lost and retrying?
- * (If we do receive a response after this time, it will be discarded
- * and the response to the resent request will be waited for.
- */
-#define DM_ULOG_RETRY_TIMEOUT (15 * HZ)
-
-/*
- * Pre-allocated space for speed
- */
-#define DM_ULOG_PREALLOCED_SIZE 512
-static struct cn_msg *prealloced_cn_msg;
-static struct dm_ulog_request *prealloced_ulog_tfr;
-
-static struct cb_id ulog_cn_id = {
-	.idx = CN_IDX_DM,
-	.val = CN_VAL_DM_USERSPACE_LOG
-};
-
-static DEFINE_MUTEX(dm_ulog_lock);
-
-struct receiving_pkg {
-	struct list_head list;
-	struct completion complete;
-
-	uint32_t seq;
-
-	int error;
-	size_t *data_size;
-	char *data;
-};
-
-static DEFINE_SPINLOCK(receiving_list_lock);
-static struct list_head receiving_list;
-
-static int dm_ulog_sendto_server(struct dm_ulog_request *tfr)
-{
-	int r;
-	struct cn_msg *msg = prealloced_cn_msg;
-
-	memset(msg, 0, sizeof(struct cn_msg));
-
-	msg->id.idx = ulog_cn_id.idx;
-	msg->id.val = ulog_cn_id.val;
-	msg->ack = 0;
-	msg->seq = tfr->seq;
-	msg->len = sizeof(struct dm_ulog_request) + tfr->data_size;
-
-	r = cn_netlink_send(msg, 0, gfp_any());
-
-	return r;
-}
-
-/*
- * Parameters for this function can be either msg or tfr, but not
- * both.  This function fills in the reply for a waiting request.
- * If just msg is given, then the reply is simply an ACK from userspace
- * that the request was received.
- *
- * Returns: 0 on success, -ENOENT on failure
- */
-static int fill_pkg(struct cn_msg *msg, struct dm_ulog_request *tfr)
-{
-	uint32_t rtn_seq = (msg) ? msg->seq : (tfr) ? tfr->seq : 0;
-	struct receiving_pkg *pkg;
-
-	/*
-	 * The 'receiving_pkg' entries in this list are statically
-	 * allocated on the stack in 'dm_consult_userspace'.
-	 * Each process that is waiting for a reply from the user
-	 * space server will have an entry in this list.
-	 *
-	 * We are safe to do it this way because the stack space
-	 * is unique to each process, but still addressable by
-	 * other processes.
-	 */
-	list_for_each_entry(pkg, &receiving_list, list) {
-		if (rtn_seq != pkg->seq)
-			continue;
-
-		if (msg) {
-			pkg->error = -msg->ack;
-			/*
-			 * If we are trying again, we will need to know our
-			 * storage capacity.  Otherwise, along with the
-			 * error code, we make explicit that we have no data.
-			 */
-			if (pkg->error != -EAGAIN)
-				*(pkg->data_size) = 0;
-		} else if (tfr->data_size > *(pkg->data_size)) {
-			DMERR("Insufficient space to receive package [%u] "
-			      "(%u vs %zu)", tfr->request_type,
-			      tfr->data_size, *(pkg->data_size));
-
-			*(pkg->data_size) = 0;
-			pkg->error = -ENOSPC;
-		} else {
-			pkg->error = tfr->error;
-			memcpy(pkg->data, tfr->data, tfr->data_size);
-			*(pkg->data_size) = tfr->data_size;
-		}
-		complete(&pkg->complete);
-		return 0;
-	}
-
-	return -ENOENT;
-}
-
-/*
- * This is the connector callback that delivers data
- * that was sent from userspace.
- */
-static void cn_ulog_callback(struct cn_msg *msg, struct netlink_skb_parms *nsp)
-{
-	struct dm_ulog_request *tfr = (struct dm_ulog_request *)(msg + 1);
-
-	if (!capable(CAP_SYS_ADMIN))
-		return;
-
-	spin_lock(&receiving_list_lock);
-	if (msg->len == 0)
-		fill_pkg(msg, NULL);
-	else if (msg->len < sizeof(*tfr))
-		DMERR("Incomplete message received (expected %u, got %u): [%u]",
-		      (unsigned)sizeof(*tfr), msg->len, msg->seq);
-	else
-		fill_pkg(NULL, tfr);
-	spin_unlock(&receiving_list_lock);
-}
-
-/**
- * dm_consult_userspace
- * @uuid: log's universal unique identifier (must be DM_UUID_LEN in size)
- * @luid: log's local unique identifier
- * @request_type:  found in include/linux/dm-log-userspace.h
- * @data: data to tx to the server
- * @data_size: size of data in bytes
- * @rdata: place to put return data from server
- * @rdata_size: value-result (amount of space given/amount of space used)
- *
- * rdata_size is undefined on failure.
- *
- * Memory used to communicate with userspace is zero'ed
- * before populating to ensure that no unwanted bits leak
- * from kernel space to user-space.  All userspace log communications
- * between kernel and user space go through this function.
- *
- * Returns: 0 on success, -EXXX on failure
- **/
-int dm_consult_userspace(const char *uuid, uint64_t luid, int request_type,
-			 char *data, size_t data_size,
-			 char *rdata, size_t *rdata_size)
-{
-	int r = 0;
-	size_t dummy = 0;
-	int overhead_size = sizeof(struct dm_ulog_request) + sizeof(struct cn_msg);
-	struct dm_ulog_request *tfr = prealloced_ulog_tfr;
-	struct receiving_pkg pkg;
-
-	/*
-	 * Given the space needed to hold the 'struct cn_msg' and
-	 * 'struct dm_ulog_request' - do we have enough payload
-	 * space remaining?
-	 */
-	if (data_size > (DM_ULOG_PREALLOCED_SIZE - overhead_size)) {
-		DMINFO("Size of tfr exceeds preallocated size");
-		return -EINVAL;
-	}
-
-	if (!rdata_size)
-		rdata_size = &dummy;
-resend:
-	/*
-	 * We serialize the sending of requests so we can
-	 * use the preallocated space.
-	 */
-	mutex_lock(&dm_ulog_lock);
-
-	memset(tfr, 0, DM_ULOG_PREALLOCED_SIZE - sizeof(struct cn_msg));
-	memcpy(tfr->uuid, uuid, DM_UUID_LEN);
-	tfr->version = DM_ULOG_REQUEST_VERSION;
-	tfr->luid = luid;
-	tfr->seq = dm_ulog_seq++;
-
-	/*
-	 * Must be valid request type (all other bits set to
-	 * zero).  This reserves other bits for possible future
-	 * use.
-	 */
-	tfr->request_type = request_type & DM_ULOG_REQUEST_MASK;
-
-	tfr->data_size = data_size;
-	if (data && data_size)
-		memcpy(tfr->data, data, data_size);
-
-	memset(&pkg, 0, sizeof(pkg));
-	init_completion(&pkg.complete);
-	pkg.seq = tfr->seq;
-	pkg.data_size = rdata_size;
-	pkg.data = rdata;
-	spin_lock(&receiving_list_lock);
-	list_add(&(pkg.list), &receiving_list);
-	spin_unlock(&receiving_list_lock);
-
-	r = dm_ulog_sendto_server(tfr);
-
-	mutex_unlock(&dm_ulog_lock);
-
-	if (r) {
-		DMERR("Unable to send log request [%u] to userspace: %d",
-		      request_type, r);
-		spin_lock(&receiving_list_lock);
-		list_del_init(&(pkg.list));
-		spin_unlock(&receiving_list_lock);
-
-		goto out;
-	}
-
-	r = wait_for_completion_timeout(&(pkg.complete), DM_ULOG_RETRY_TIMEOUT);
-	spin_lock(&receiving_list_lock);
-	list_del_init(&(pkg.list));
-	spin_unlock(&receiving_list_lock);
-	if (!r) {
-		DMWARN("[%s] Request timed out: [%u/%u] - retrying",
-		       (strlen(uuid) > 8) ?
-		       (uuid + (strlen(uuid) - 8)) : (uuid),
-		       request_type, pkg.seq);
-		goto resend;
-	}
-
-	r = pkg.error;
-	if (r == -EAGAIN)
-		goto resend;
-
-out:
-	return r;
-}
-
-int dm_ulog_tfr_init(void)
-{
-	int r;
-	void *prealloced;
-
-	INIT_LIST_HEAD(&receiving_list);
-
-	prealloced = kmalloc(DM_ULOG_PREALLOCED_SIZE, GFP_KERNEL);
-	if (!prealloced)
-		return -ENOMEM;
-
-	prealloced_cn_msg = prealloced;
-	prealloced_ulog_tfr = prealloced + sizeof(struct cn_msg);
-
-	r = cn_add_callback(&ulog_cn_id, "dmlogusr", cn_ulog_callback);
-	if (r) {
-		cn_del_callback(&ulog_cn_id);
-		return r;
-	}
-
-	return 0;
-}
-
-void dm_ulog_tfr_exit(void)
-{
-	cn_del_callback(&ulog_cn_id);
-	kfree(prealloced_cn_msg);
-}
diff --git a/ANDROID_3.4.5/drivers/md/dm-log-userspace-transfer.h b/ANDROID_3.4.5/drivers/md/dm-log-userspace-transfer.h
deleted file mode 100644
index 04ee874f..00000000
--- a/ANDROID_3.4.5/drivers/md/dm-log-userspace-transfer.h
+++ /dev/null
@@ -1,18 +0,0 @@
-/*
- * Copyright (C) 2006-2009 Red Hat, Inc.
- *
- * This file is released under the LGPL.
- */
-
-#ifndef __DM_LOG_USERSPACE_TRANSFER_H__
-#define __DM_LOG_USERSPACE_TRANSFER_H__
-
-#define DM_MSG_PREFIX "dm-log-userspace"
-
-int dm_ulog_tfr_init(void);
-void dm_ulog_tfr_exit(void);
-int dm_consult_userspace(const char *uuid, uint64_t luid, int request_type,
-			 char *data, size_t data_size,
-			 char *rdata, size_t *rdata_size);
-
-#endif /* __DM_LOG_USERSPACE_TRANSFER_H__ */
diff --git a/ANDROID_3.4.5/drivers/md/dm-log.c b/ANDROID_3.4.5/drivers/md/dm-log.c
deleted file mode 100644
index 65ebaebf..00000000
--- a/ANDROID_3.4.5/drivers/md/dm-log.c
+++ /dev/null
@@ -1,897 +0,0 @@
-/*
- * Copyright (C) 2003 Sistina Software
- * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved.
- *
- * This file is released under the LGPL.
- */
-
-#include <linux/init.h>
-#include <linux/slab.h>
-#include <linux/module.h>
-#include <linux/vmalloc.h>
-#include <linux/dm-io.h>
-#include <linux/dm-dirty-log.h>
-
-#include <linux/device-mapper.h>
-
-#define DM_MSG_PREFIX "dirty region log"
-
-static LIST_HEAD(_log_types);
-static DEFINE_SPINLOCK(_lock);
-
-static struct dm_dirty_log_type *__find_dirty_log_type(const char *name)
-{
-	struct dm_dirty_log_type *log_type;
-
-	list_for_each_entry(log_type, &_log_types, list)
-		if (!strcmp(name, log_type->name))
-			return log_type;
-
-	return NULL;
-}
-
-static struct dm_dirty_log_type *_get_dirty_log_type(const char *name)
-{
-	struct dm_dirty_log_type *log_type;
-
-	spin_lock(&_lock);
-
-	log_type = __find_dirty_log_type(name);
-	if (log_type && !try_module_get(log_type->module))
-		log_type = NULL;
-
-	spin_unlock(&_lock);
-
-	return log_type;
-}
-
-/*
- * get_type
- * @type_name
- *
- * Attempt to retrieve the dm_dirty_log_type by name.  If not already
- * available, attempt to load the appropriate module.
- *
- * Log modules are named "dm-log-" followed by the 'type_name'.
- * Modules may contain multiple types.
- * This function will first try the module "dm-log-<type_name>",
- * then truncate 'type_name' on the last '-' and try again.
- *
- * For example, if type_name was "clustered-disk", it would search
- * 'dm-log-clustered-disk' then 'dm-log-clustered'.
- *
- * Returns: dirty_log_type* on success, NULL on failure
- */
-static struct dm_dirty_log_type *get_type(const char *type_name)
-{
-	char *p, *type_name_dup;
-	struct dm_dirty_log_type *log_type;
-
-	if (!type_name)
-		return NULL;
-
-	log_type = _get_dirty_log_type(type_name);
-	if (log_type)
-		return log_type;
-
-	type_name_dup = kstrdup(type_name, GFP_KERNEL);
-	if (!type_name_dup) {
-		DMWARN("No memory left to attempt log module load for \"%s\"",
-		       type_name);
-		return NULL;
-	}
-
-	while (request_module("dm-log-%s", type_name_dup) ||
-	       !(log_type = _get_dirty_log_type(type_name))) {
-		p = strrchr(type_name_dup, '-');
-		if (!p)
-			break;
-		p[0] = '\0';
-	}
-
-	if (!log_type)
-		DMWARN("Module for logging type \"%s\" not found.", type_name);
-
-	kfree(type_name_dup);
-
-	return log_type;
-}
-
-static void put_type(struct dm_dirty_log_type *type)
-{
-	if (!type)
-		return;
-
-	spin_lock(&_lock);
-	if (!__find_dirty_log_type(type->name))
-		goto out;
-
-	module_put(type->module);
-
-out:
-	spin_unlock(&_lock);
-}
-
-int dm_dirty_log_type_register(struct dm_dirty_log_type *type)
-{
-	int r = 0;
-
-	spin_lock(&_lock);
-	if (!__find_dirty_log_type(type->name))
-		list_add(&type->list, &_log_types);
-	else
-		r = -EEXIST;
-	spin_unlock(&_lock);
-
-	return r;
-}
-EXPORT_SYMBOL(dm_dirty_log_type_register);
-
-int dm_dirty_log_type_unregister(struct dm_dirty_log_type *type)
-{
-	spin_lock(&_lock);
-
-	if (!__find_dirty_log_type(type->name)) {
-		spin_unlock(&_lock);
-		return -EINVAL;
-	}
-
-	list_del(&type->list);
-
-	spin_unlock(&_lock);
-
-	return 0;
-}
-EXPORT_SYMBOL(dm_dirty_log_type_unregister);
-
-struct dm_dirty_log *dm_dirty_log_create(const char *type_name,
-			struct dm_target *ti,
-			int (*flush_callback_fn)(struct dm_target *ti),
-			unsigned int argc, char **argv)
-{
-	struct dm_dirty_log_type *type;
-	struct dm_dirty_log *log;
-
-	log = kmalloc(sizeof(*log), GFP_KERNEL);
-	if (!log)
-		return NULL;
-
-	type = get_type(type_name);
-	if (!type) {
-		kfree(log);
-		return NULL;
-	}
-
-	log->flush_callback_fn = flush_callback_fn;
-	log->type = type;
-	if (type->ctr(log, ti, argc, argv)) {
-		kfree(log);
-		put_type(type);
-		return NULL;
-	}
-
-	return log;
-}
-EXPORT_SYMBOL(dm_dirty_log_create);
-
-void dm_dirty_log_destroy(struct dm_dirty_log *log)
-{
-	log->type->dtr(log);
-	put_type(log->type);
-	kfree(log);
-}
-EXPORT_SYMBOL(dm_dirty_log_destroy);
-
-/*-----------------------------------------------------------------
- * Persistent and core logs share a lot of their implementation.
- * FIXME: need a reload method to be called from a resume
- *---------------------------------------------------------------*/
-/*
- * Magic for persistent mirrors: "MiRr"
- */
-#define MIRROR_MAGIC 0x4D695272
-
-/*
- * The on-disk version of the metadata.
- */
-#define MIRROR_DISK_VERSION 2
-#define LOG_OFFSET 2
-
-struct log_header_disk {
-	__le32 magic;
-
-	/*
-	 * Simple, incrementing version. no backward
-	 * compatibility.
-	 */
-	__le32 version;
-	__le64 nr_regions;
-} __packed;
-
-struct log_header_core {
-	uint32_t magic;
-	uint32_t version;
-	uint64_t nr_regions;
-};
-
-struct log_c {
-	struct dm_target *ti;
-	int touched_dirtied;
-	int touched_cleaned;
-	int flush_failed;
-	uint32_t region_size;
-	unsigned int region_count;
-	region_t sync_count;
-
-	unsigned bitset_uint32_count;
-	uint32_t *clean_bits;
-	uint32_t *sync_bits;
-	uint32_t *recovering_bits;	/* FIXME: this seems excessive */
-
-	int sync_search;
-
-	/* Resync flag */
-	enum sync {
-		DEFAULTSYNC,	/* Synchronize if necessary */
-		NOSYNC,		/* Devices known to be already in sync */
-		FORCESYNC,	/* Force a sync to happen */
-	} sync;
-
-	struct dm_io_request io_req;
-
-	/*
-	 * Disk log fields
-	 */
-	int log_dev_failed;
-	int log_dev_flush_failed;
-	struct dm_dev *log_dev;
-	struct log_header_core header;
-
-	struct dm_io_region header_location;
-	struct log_header_disk *disk_header;
-};
-
-/*
- * The touched member needs to be updated every time we access
- * one of the bitsets.
- */
-static inline int log_test_bit(uint32_t *bs, unsigned bit)
-{
-	return test_bit_le(bit, bs) ? 1 : 0;
-}
-
-static inline void log_set_bit(struct log_c *l,
-			       uint32_t *bs, unsigned bit)
-{
-	__set_bit_le(bit, bs);
-	l->touched_cleaned = 1;
-}
-
-static inline void log_clear_bit(struct log_c *l,
-				 uint32_t *bs, unsigned bit)
-{
-	__clear_bit_le(bit, bs);
-	l->touched_dirtied = 1;
-}
-
-/*----------------------------------------------------------------
- * Header IO
- *--------------------------------------------------------------*/
-static void header_to_disk(struct log_header_core *core, struct log_header_disk *disk)
-{
-	disk->magic = cpu_to_le32(core->magic);
-	disk->version = cpu_to_le32(core->version);
-	disk->nr_regions = cpu_to_le64(core->nr_regions);
-}
-
-static void header_from_disk(struct log_header_core *core, struct log_header_disk *disk)
-{
-	core->magic = le32_to_cpu(disk->magic);
-	core->version = le32_to_cpu(disk->version);
-	core->nr_regions = le64_to_cpu(disk->nr_regions);
-}
-
-static int rw_header(struct log_c *lc, int rw)
-{
-	lc->io_req.bi_rw = rw;
-
-	return dm_io(&lc->io_req, 1, &lc->header_location, NULL);
-}
-
-static int flush_header(struct log_c *lc)
-{
-	struct dm_io_region null_location = {
-		.bdev = lc->header_location.bdev,
-		.sector = 0,
-		.count = 0,
-	};
-
-	lc->io_req.bi_rw = WRITE_FLUSH;
-
-	return dm_io(&lc->io_req, 1, &null_location, NULL);
-}
-
-static int read_header(struct log_c *log)
-{
-	int r;
-
-	r = rw_header(log, READ);
-	if (r)
-		return r;
-
-	header_from_disk(&log->header, log->disk_header);
-
-	/* New log required? */
-	if (log->sync != DEFAULTSYNC || log->header.magic != MIRROR_MAGIC) {
-		log->header.magic = MIRROR_MAGIC;
-		log->header.version = MIRROR_DISK_VERSION;
-		log->header.nr_regions = 0;
-	}
-
-#ifdef __LITTLE_ENDIAN
-	if (log->header.version == 1)
-		log->header.version = 2;
-#endif
-
-	if (log->header.version != MIRROR_DISK_VERSION) {
-		DMWARN("incompatible disk log version");
-		return -EINVAL;
-	}
-
-	return 0;
-}
-
-static int _check_region_size(struct dm_target *ti, uint32_t region_size)
-{
-	if (region_size < 2 || region_size > ti->len)
-		return 0;
-
-	if (!is_power_of_2(region_size))
-		return 0;
-
-	return 1;
-}
-
-/*----------------------------------------------------------------
- * core log constructor/destructor
- *
- * argv contains region_size followed optionally by [no]sync
- *--------------------------------------------------------------*/
-#define BYTE_SHIFT 3
-static int create_log_context(struct dm_dirty_log *log, struct dm_target *ti,
-			      unsigned int argc, char **argv,
-			      struct dm_dev *dev)
-{
-	enum sync sync = DEFAULTSYNC;
-
-	struct log_c *lc;
-	uint32_t region_size;
-	unsigned int region_count;
-	size_t bitset_size, buf_size;
-	int r;
-	char dummy;
-
-	if (argc < 1 || argc > 2) {
-		DMWARN("wrong number of arguments to dirty region log");
-		return -EINVAL;
-	}
-
-	if (argc > 1) {
-		if (!strcmp(argv[1], "sync"))
-			sync = FORCESYNC;
-		else if (!strcmp(argv[1], "nosync"))
-			sync = NOSYNC;
-		else {
-			DMWARN("unrecognised sync argument to "
-			       "dirty region log: %s", argv[1]);
-			return -EINVAL;
-		}
-	}
-
-	if (sscanf(argv[0], "%u%c", &region_size, &dummy) != 1 ||
-	    !_check_region_size(ti, region_size)) {
-		DMWARN("invalid region size %s", argv[0]);
-		return -EINVAL;
-	}
-
-	region_count = dm_sector_div_up(ti->len, region_size);
-
-	lc = kmalloc(sizeof(*lc), GFP_KERNEL);
-	if (!lc) {
-		DMWARN("couldn't allocate core log");
-		return -ENOMEM;
-	}
-
-	lc->ti = ti;
-	lc->touched_dirtied = 0;
-	lc->touched_cleaned = 0;
-	lc->flush_failed = 0;
-	lc->region_size = region_size;
-	lc->region_count = region_count;
-	lc->sync = sync;
-
-	/*
-	 * Work out how many "unsigned long"s we need to hold the bitset.
-	 */
-	bitset_size = dm_round_up(region_count,
-				  sizeof(*lc->clean_bits) << BYTE_SHIFT);
-	bitset_size >>= BYTE_SHIFT;
-
-	lc->bitset_uint32_count = bitset_size / sizeof(*lc->clean_bits);
-
-	/*
-	 * Disk log?
-	 */
-	if (!dev) {
-		lc->clean_bits = vmalloc(bitset_size);
-		if (!lc->clean_bits) {
-			DMWARN("couldn't allocate clean bitset");
-			kfree(lc);
-			return -ENOMEM;
-		}
-		lc->disk_header = NULL;
-	} else {
-		lc->log_dev = dev;
-		lc->log_dev_failed = 0;
-		lc->log_dev_flush_failed = 0;
-		lc->header_location.bdev = lc->log_dev->bdev;
-		lc->header_location.sector = 0;
-
-		/*
-		 * Buffer holds both header and bitset.
-		 */
-		buf_size =
-		    dm_round_up((LOG_OFFSET << SECTOR_SHIFT) + bitset_size,
-				bdev_logical_block_size(lc->header_location.
-							    bdev));
-
-		if (buf_size > i_size_read(dev->bdev->bd_inode)) {
-			DMWARN("log device %s too small: need %llu bytes",
-				dev->name, (unsigned long long)buf_size);
-			kfree(lc);
-			return -EINVAL;
-		}
-
-		lc->header_location.count = buf_size >> SECTOR_SHIFT;
-
-		lc->io_req.mem.type = DM_IO_VMA;
-		lc->io_req.notify.fn = NULL;
-		lc->io_req.client = dm_io_client_create();
-		if (IS_ERR(lc->io_req.client)) {
-			r = PTR_ERR(lc->io_req.client);
-			DMWARN("couldn't allocate disk io client");
-			kfree(lc);
-			return r;
-		}
-
-		lc->disk_header = vmalloc(buf_size);
-		if (!lc->disk_header) {
-			DMWARN("couldn't allocate disk log buffer");
-			dm_io_client_destroy(lc->io_req.client);
-			kfree(lc);
-			return -ENOMEM;
-		}
-
-		lc->io_req.mem.ptr.vma = lc->disk_header;
-		lc->clean_bits = (void *)lc->disk_header +
-				 (LOG_OFFSET << SECTOR_SHIFT);
-	}
-
-	memset(lc->clean_bits, -1, bitset_size);
-
-	lc->sync_bits = vmalloc(bitset_size);
-	if (!lc->sync_bits) {
-		DMWARN("couldn't allocate sync bitset");
-		if (!dev)
-			vfree(lc->clean_bits);
-		else
-			dm_io_client_destroy(lc->io_req.client);
-		vfree(lc->disk_header);
-		kfree(lc);
-		return -ENOMEM;
-	}
-	memset(lc->sync_bits, (sync == NOSYNC) ? -1 : 0, bitset_size);
-	lc->sync_count = (sync == NOSYNC) ? region_count : 0;
-
-	lc->recovering_bits = vzalloc(bitset_size);
-	if (!lc->recovering_bits) {
-		DMWARN("couldn't allocate sync bitset");
-		vfree(lc->sync_bits);
-		if (!dev)
-			vfree(lc->clean_bits);
-		else
-			dm_io_client_destroy(lc->io_req.client);
-		vfree(lc->disk_header);
-		kfree(lc);
-		return -ENOMEM;
-	}
-	lc->sync_search = 0;
-	log->context = lc;
-
-	return 0;
-}
-
-static int core_ctr(struct dm_dirty_log *log, struct dm_target *ti,
-		    unsigned int argc, char **argv)
-{
-	return create_log_context(log, ti, argc, argv, NULL);
-}
-
-static void destroy_log_context(struct log_c *lc)
-{
-	vfree(lc->sync_bits);
-	vfree(lc->recovering_bits);
-	kfree(lc);
-}
-
-static void core_dtr(struct dm_dirty_log *log)
-{
-	struct log_c *lc = (struct log_c *) log->context;
-
-	vfree(lc->clean_bits);
-	destroy_log_context(lc);
-}
-
-/*----------------------------------------------------------------
- * disk log constructor/destructor
- *
- * argv contains log_device region_size followed optionally by [no]sync
- *--------------------------------------------------------------*/
-static int disk_ctr(struct dm_dirty_log *log, struct dm_target *ti,
-		    unsigned int argc, char **argv)
-{
-	int r;
-	struct dm_dev *dev;
-
-	if (argc < 2 || argc > 3) {
-		DMWARN("wrong number of arguments to disk dirty region log");
-		return -EINVAL;
-	}
-
-	r = dm_get_device(ti, argv[0], dm_table_get_mode(ti->table), &dev);
-	if (r)
-		return r;
-
-	r = create_log_context(log, ti, argc - 1, argv + 1, dev);
-	if (r) {
-		dm_put_device(ti, dev);
-		return r;
-	}
-
-	return 0;
-}
-
-static void disk_dtr(struct dm_dirty_log *log)
-{
-	struct log_c *lc = (struct log_c *) log->context;
-
-	dm_put_device(lc->ti, lc->log_dev);
-	vfree(lc->disk_header);
-	dm_io_client_destroy(lc->io_req.client);
-	destroy_log_context(lc);
-}
-
-static int count_bits32(uint32_t *addr, unsigned size)
-{
-	int count = 0, i;
-
-	for (i = 0; i < size; i++) {
-		count += hweight32(*(addr+i));
-	}
-	return count;
-}
-
-static void fail_log_device(struct log_c *lc)
-{
-	if (lc->log_dev_failed)
-		return;
-
-	lc->log_dev_failed = 1;
-	dm_table_event(lc->ti->table);
-}
-
-static int disk_resume(struct dm_dirty_log *log)
-{
-	int r;
-	unsigned i;
-	struct log_c *lc = (struct log_c *) log->context;
-	size_t size = lc->bitset_uint32_count * sizeof(uint32_t);
-
-	/* read the disk header */
-	r = read_header(lc);
-	if (r) {
-		DMWARN("%s: Failed to read header on dirty region log device",
-		       lc->log_dev->name);
-		fail_log_device(lc);
-		/*
-		 * If the log device cannot be read, we must assume
-		 * all regions are out-of-sync.  If we simply return
-		 * here, the state will be uninitialized and could
-		 * lead us to return 'in-sync' status for regions
-		 * that are actually 'out-of-sync'.
-		 */
-		lc->header.nr_regions = 0;
-	}
-
-	/* set or clear any new bits -- device has grown */
-	if (lc->sync == NOSYNC)
-		for (i = lc->header.nr_regions; i < lc->region_count; i++)
-			/* FIXME: amazingly inefficient */
-			log_set_bit(lc, lc->clean_bits, i);
-	else
-		for (i = lc->header.nr_regions; i < lc->region_count; i++)
-			/* FIXME: amazingly inefficient */
-			log_clear_bit(lc, lc->clean_bits, i);
-
-	/* clear any old bits -- device has shrunk */
-	for (i = lc->region_count; i % (sizeof(*lc->clean_bits) << BYTE_SHIFT); i++)
-		log_clear_bit(lc, lc->clean_bits, i);
-
-	/* copy clean across to sync */
-	memcpy(lc->sync_bits, lc->clean_bits, size);
-	lc->sync_count = count_bits32(lc->clean_bits, lc->bitset_uint32_count);
-	lc->sync_search = 0;
-
-	/* set the correct number of regions in the header */
-	lc->header.nr_regions = lc->region_count;
-
-	header_to_disk(&lc->header, lc->disk_header);
-
-	/* write the new header */
-	r = rw_header(lc, WRITE);
-	if (!r) {
-		r = flush_header(lc);
-		if (r)
-			lc->log_dev_flush_failed = 1;
-	}
-	if (r) {
-		DMWARN("%s: Failed to write header on dirty region log device",
-		       lc->log_dev->name);
-		fail_log_device(lc);
-	}
-
-	return r;
-}
-
-static uint32_t core_get_region_size(struct dm_dirty_log *log)
-{
-	struct log_c *lc = (struct log_c *) log->context;
-	return lc->region_size;
-}
-
-static int core_resume(struct dm_dirty_log *log)
-{
-	struct log_c *lc = (struct log_c *) log->context;
-	lc->sync_search = 0;
-	return 0;
-}
-
-static int core_is_clean(struct dm_dirty_log *log, region_t region)
-{
-	struct log_c *lc = (struct log_c *) log->context;
-	return log_test_bit(lc->clean_bits, region);
-}
-
-static int core_in_sync(struct dm_dirty_log *log, region_t region, int block)
-{
-	struct log_c *lc = (struct log_c *) log->context;
-	return log_test_bit(lc->sync_bits, region);
-}
-
-static int core_flush(struct dm_dirty_log *log)
-{
-	/* no op */
-	return 0;
-}
-
-static int disk_flush(struct dm_dirty_log *log)
-{
-	int r, i;
-	struct log_c *lc = log->context;
-
-	/* only write if the log has changed */
-	if (!lc->touched_cleaned && !lc->touched_dirtied)
-		return 0;
-
-	if (lc->touched_cleaned && log->flush_callback_fn &&
-	    log->flush_callback_fn(lc->ti)) {
-		/*
-		 * At this point it is impossible to determine which
-		 * regions are clean and which are dirty (without
-		 * re-reading the log off disk). So mark all of them
-		 * dirty.
-		 */
-		lc->flush_failed = 1;
-		for (i = 0; i < lc->region_count; i++)
-			log_clear_bit(lc, lc->clean_bits, i);
-	}
-
-	r = rw_header(lc, WRITE);
-	if (r)
-		fail_log_device(lc);
-	else {
-		if (lc->touched_dirtied) {
-			r = flush_header(lc);
-			if (r) {
-				lc->log_dev_flush_failed = 1;
-				fail_log_device(lc);
-			} else
-				lc->touched_dirtied = 0;
-		}
-		lc->touched_cleaned = 0;
-	}
-
-	return r;
-}
-
-static void core_mark_region(struct dm_dirty_log *log, region_t region)
-{
-	struct log_c *lc = (struct log_c *) log->context;
-	log_clear_bit(lc, lc->clean_bits, region);
-}
-
-static void core_clear_region(struct dm_dirty_log *log, region_t region)
-{
-	struct log_c *lc = (struct log_c *) log->context;
-	if (likely(!lc->flush_failed))
-		log_set_bit(lc, lc->clean_bits, region);
-}
-
-static int core_get_resync_work(struct dm_dirty_log *log, region_t *region)
-{
-	struct log_c *lc = (struct log_c *) log->context;
-
-	if (lc->sync_search >= lc->region_count)
-		return 0;
-
-	do {
-		*region = find_next_zero_bit_le(lc->sync_bits,
-					     lc->region_count,
-					     lc->sync_search);
-		lc->sync_search = *region + 1;
-
-		if (*region >= lc->region_count)
-			return 0;
-
-	} while (log_test_bit(lc->recovering_bits, *region));
-
-	log_set_bit(lc, lc->recovering_bits, *region);
-	return 1;
-}
-
-static void core_set_region_sync(struct dm_dirty_log *log, region_t region,
-				 int in_sync)
-{
-	struct log_c *lc = (struct log_c *) log->context;
-
-	log_clear_bit(lc, lc->recovering_bits, region);
-	if (in_sync) {
-		log_set_bit(lc, lc->sync_bits, region);
-                lc->sync_count++;
-        } else if (log_test_bit(lc->sync_bits, region)) {
-		lc->sync_count--;
-		log_clear_bit(lc, lc->sync_bits, region);
-	}
-}
-
-static region_t core_get_sync_count(struct dm_dirty_log *log)
-{
-        struct log_c *lc = (struct log_c *) log->context;
-
-        return lc->sync_count;
-}
-
-#define	DMEMIT_SYNC \
-	if (lc->sync != DEFAULTSYNC) \
-		DMEMIT("%ssync ", lc->sync == NOSYNC ? "no" : "")
-
-static int core_status(struct dm_dirty_log *log, status_type_t status,
-		       char *result, unsigned int maxlen)
-{
-	int sz = 0;
-	struct log_c *lc = log->context;
-
-	switch(status) {
-	case STATUSTYPE_INFO:
-		DMEMIT("1 %s", log->type->name);
-		break;
-
-	case STATUSTYPE_TABLE:
-		DMEMIT("%s %u %u ", log->type->name,
-		       lc->sync == DEFAULTSYNC ? 1 : 2, lc->region_size);
-		DMEMIT_SYNC;
-	}
-
-	return sz;
-}
-
-static int disk_status(struct dm_dirty_log *log, status_type_t status,
-		       char *result, unsigned int maxlen)
-{
-	int sz = 0;
-	struct log_c *lc = log->context;
-
-	switch(status) {
-	case STATUSTYPE_INFO:
-		DMEMIT("3 %s %s %c", log->type->name, lc->log_dev->name,
-		       lc->log_dev_flush_failed ? 'F' :
-		       lc->log_dev_failed ? 'D' :
-		       'A');
-		break;
-
-	case STATUSTYPE_TABLE:
-		DMEMIT("%s %u %s %u ", log->type->name,
-		       lc->sync == DEFAULTSYNC ? 2 : 3, lc->log_dev->name,
-		       lc->region_size);
-		DMEMIT_SYNC;
-	}
-
-	return sz;
-}
-
-static struct dm_dirty_log_type _core_type = {
-	.name = "core",
-	.module = THIS_MODULE,
-	.ctr = core_ctr,
-	.dtr = core_dtr,
-	.resume = core_resume,
-	.get_region_size = core_get_region_size,
-	.is_clean = core_is_clean,
-	.in_sync = core_in_sync,
-	.flush = core_flush,
-	.mark_region = core_mark_region,
-	.clear_region = core_clear_region,
-	.get_resync_work = core_get_resync_work,
-	.set_region_sync = core_set_region_sync,
-	.get_sync_count = core_get_sync_count,
-	.status = core_status,
-};
-
-static struct dm_dirty_log_type _disk_type = {
-	.name = "disk",
-	.module = THIS_MODULE,
-	.ctr = disk_ctr,
-	.dtr = disk_dtr,
-	.postsuspend = disk_flush,
-	.resume = disk_resume,
-	.get_region_size = core_get_region_size,
-	.is_clean = core_is_clean,
-	.in_sync = core_in_sync,
-	.flush = disk_flush,
-	.mark_region = core_mark_region,
-	.clear_region = core_clear_region,
-	.get_resync_work = core_get_resync_work,
-	.set_region_sync = core_set_region_sync,
-	.get_sync_count = core_get_sync_count,
-	.status = disk_status,
-};
-
-static int __init dm_dirty_log_init(void)
-{
-	int r;
-
-	r = dm_dirty_log_type_register(&_core_type);
-	if (r)
-		DMWARN("couldn't register core log");
-
-	r = dm_dirty_log_type_register(&_disk_type);
-	if (r) {
-		DMWARN("couldn't register disk type");
-		dm_dirty_log_type_unregister(&_core_type);
-	}
-
-	return r;
-}
-
-static void __exit dm_dirty_log_exit(void)
-{
-	dm_dirty_log_type_unregister(&_disk_type);
-	dm_dirty_log_type_unregister(&_core_type);
-}
-
-module_init(dm_dirty_log_init);
-module_exit(dm_dirty_log_exit);
-
-MODULE_DESCRIPTION(DM_NAME " dirty region log");
-MODULE_AUTHOR("Joe Thornber, Heinz Mauelshagen <dm-devel@redhat.com>");
-MODULE_LICENSE("GPL");
diff --git a/ANDROID_3.4.5/drivers/md/dm-mpath.c b/ANDROID_3.4.5/drivers/md/dm-mpath.c
deleted file mode 100644
index 754f38f8..00000000
--- a/ANDROID_3.4.5/drivers/md/dm-mpath.c
+++ /dev/null
@@ -1,1723 +0,0 @@
-/*
- * Copyright (C) 2003 Sistina Software Limited.
- * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
- *
- * This file is released under the GPL.
- */
-
-#include <linux/device-mapper.h>
-
-#include "dm-path-selector.h"
-#include "dm-uevent.h"
-
-#include <linux/ctype.h>
-#include <linux/init.h>
-#include <linux/mempool.h>
-#include <linux/module.h>
-#include <linux/pagemap.h>
-#include <linux/slab.h>
-#include <linux/time.h>
-#include <linux/workqueue.h>
-#include <scsi/scsi_dh.h>
-#include <linux/atomic.h>
-
-#define DM_MSG_PREFIX "multipath"
-#define DM_PG_INIT_DELAY_MSECS 2000
-#define DM_PG_INIT_DELAY_DEFAULT ((unsigned) -1)
-
-/* Path properties */
-struct pgpath {
-	struct list_head list;
-
-	struct priority_group *pg;	/* Owning PG */
-	unsigned is_active;		/* Path status */
-	unsigned fail_count;		/* Cumulative failure count */
-
-	struct dm_path path;
-	struct delayed_work activate_path;
-};
-
-#define path_to_pgpath(__pgp) container_of((__pgp), struct pgpath, path)
-
-/*
- * Paths are grouped into Priority Groups and numbered from 1 upwards.
- * Each has a path selector which controls which path gets used.
- */
-struct priority_group {
-	struct list_head list;
-
-	struct multipath *m;		/* Owning multipath instance */
-	struct path_selector ps;
-
-	unsigned pg_num;		/* Reference number */
-	unsigned bypassed;		/* Temporarily bypass this PG? */
-
-	unsigned nr_pgpaths;		/* Number of paths in PG */
-	struct list_head pgpaths;
-};
-
-/* Multipath context */
-struct multipath {
-	struct list_head list;
-	struct dm_target *ti;
-
-	spinlock_t lock;
-
-	const char *hw_handler_name;
-	char *hw_handler_params;
-
-	unsigned nr_priority_groups;
-	struct list_head priority_groups;
-
-	wait_queue_head_t pg_init_wait;	/* Wait for pg_init completion */
-
-	unsigned pg_init_required;	/* pg_init needs calling? */
-	unsigned pg_init_in_progress;	/* Only one pg_init allowed at once */
-	unsigned pg_init_delay_retry;	/* Delay pg_init retry? */
-
-	unsigned nr_valid_paths;	/* Total number of usable paths */
-	struct pgpath *current_pgpath;
-	struct priority_group *current_pg;
-	struct priority_group *next_pg;	/* Switch to this PG if set */
-	unsigned repeat_count;		/* I/Os left before calling PS again */
-
-	unsigned queue_io;		/* Must we queue all I/O? */
-	unsigned queue_if_no_path;	/* Queue I/O if last path fails? */
-	unsigned saved_queue_if_no_path;/* Saved state during suspension */
-	unsigned pg_init_retries;	/* Number of times to retry pg_init */
-	unsigned pg_init_count;		/* Number of times pg_init called */
-	unsigned pg_init_delay_msecs;	/* Number of msecs before pg_init retry */
-
-	struct work_struct process_queued_ios;
-	struct list_head queued_ios;
-	unsigned queue_size;
-
-	struct work_struct trigger_event;
-
-	/*
-	 * We must use a mempool of dm_mpath_io structs so that we
-	 * can resubmit bios on error.
-	 */
-	mempool_t *mpio_pool;
-
-	struct mutex work_mutex;
-};
-
-/*
- * Context information attached to each bio we process.
- */
-struct dm_mpath_io {
-	struct pgpath *pgpath;
-	size_t nr_bytes;
-};
-
-typedef int (*action_fn) (struct pgpath *pgpath);
-
-#define MIN_IOS 256	/* Mempool size */
-
-static struct kmem_cache *_mpio_cache;
-
-static struct workqueue_struct *kmultipathd, *kmpath_handlerd;
-static void process_queued_ios(struct work_struct *work);
-static void trigger_event(struct work_struct *work);
-static void activate_path(struct work_struct *work);
-
-
-/*-----------------------------------------------
- * Allocation routines
- *-----------------------------------------------*/
-
-static struct pgpath *alloc_pgpath(void)
-{
-	struct pgpath *pgpath = kzalloc(sizeof(*pgpath), GFP_KERNEL);
-
-	if (pgpath) {
-		pgpath->is_active = 1;
-		INIT_DELAYED_WORK(&pgpath->activate_path, activate_path);
-	}
-
-	return pgpath;
-}
-
-static void free_pgpath(struct pgpath *pgpath)
-{
-	kfree(pgpath);
-}
-
-static struct priority_group *alloc_priority_group(void)
-{
-	struct priority_group *pg;
-
-	pg = kzalloc(sizeof(*pg), GFP_KERNEL);
-
-	if (pg)
-		INIT_LIST_HEAD(&pg->pgpaths);
-
-	return pg;
-}
-
-static void free_pgpaths(struct list_head *pgpaths, struct dm_target *ti)
-{
-	struct pgpath *pgpath, *tmp;
-	struct multipath *m = ti->private;
-
-	list_for_each_entry_safe(pgpath, tmp, pgpaths, list) {
-		list_del(&pgpath->list);
-		if (m->hw_handler_name)
-			scsi_dh_detach(bdev_get_queue(pgpath->path.dev->bdev));
-		dm_put_device(ti, pgpath->path.dev);
-		free_pgpath(pgpath);
-	}
-}
-
-static void free_priority_group(struct priority_group *pg,
-				struct dm_target *ti)
-{
-	struct path_selector *ps = &pg->ps;
-
-	if (ps->type) {
-		ps->type->destroy(ps);
-		dm_put_path_selector(ps->type);
-	}
-
-	free_pgpaths(&pg->pgpaths, ti);
-	kfree(pg);
-}
-
-static struct multipath *alloc_multipath(struct dm_target *ti)
-{
-	struct multipath *m;
-
-	m = kzalloc(sizeof(*m), GFP_KERNEL);
-	if (m) {
-		INIT_LIST_HEAD(&m->priority_groups);
-		INIT_LIST_HEAD(&m->queued_ios);
-		spin_lock_init(&m->lock);
-		m->queue_io = 1;
-		m->pg_init_delay_msecs = DM_PG_INIT_DELAY_DEFAULT;
-		INIT_WORK(&m->process_queued_ios, process_queued_ios);
-		INIT_WORK(&m->trigger_event, trigger_event);
-		init_waitqueue_head(&m->pg_init_wait);
-		mutex_init(&m->work_mutex);
-		m->mpio_pool = mempool_create_slab_pool(MIN_IOS, _mpio_cache);
-		if (!m->mpio_pool) {
-			kfree(m);
-			return NULL;
-		}
-		m->ti = ti;
-		ti->private = m;
-	}
-
-	return m;
-}
-
-static void free_multipath(struct multipath *m)
-{
-	struct priority_group *pg, *tmp;
-
-	list_for_each_entry_safe(pg, tmp, &m->priority_groups, list) {
-		list_del(&pg->list);
-		free_priority_group(pg, m->ti);
-	}
-
-	kfree(m->hw_handler_name);
-	kfree(m->hw_handler_params);
-	mempool_destroy(m->mpio_pool);
-	kfree(m);
-}
-
-static int set_mapinfo(struct multipath *m, union map_info *info)
-{
-	struct dm_mpath_io *mpio;
-
-	mpio = mempool_alloc(m->mpio_pool, GFP_ATOMIC);
-	if (!mpio)
-		return -ENOMEM;
-
-	memset(mpio, 0, sizeof(*mpio));
-	info->ptr = mpio;
-
-	return 0;
-}
-
-static void clear_mapinfo(struct multipath *m, union map_info *info)
-{
-	struct dm_mpath_io *mpio = info->ptr;
-
-	info->ptr = NULL;
-	mempool_free(mpio, m->mpio_pool);
-}
-
-/*-----------------------------------------------
- * Path selection
- *-----------------------------------------------*/
-
-static void __pg_init_all_paths(struct multipath *m)
-{
-	struct pgpath *pgpath;
-	unsigned long pg_init_delay = 0;
-
-	m->pg_init_count++;
-	m->pg_init_required = 0;
-	if (m->pg_init_delay_retry)
-		pg_init_delay = msecs_to_jiffies(m->pg_init_delay_msecs != DM_PG_INIT_DELAY_DEFAULT ?
-						 m->pg_init_delay_msecs : DM_PG_INIT_DELAY_MSECS);
-	list_for_each_entry(pgpath, &m->current_pg->pgpaths, list) {
-		/* Skip failed paths */
-		if (!pgpath->is_active)
-			continue;
-		if (queue_delayed_work(kmpath_handlerd, &pgpath->activate_path,
-				       pg_init_delay))
-			m->pg_init_in_progress++;
-	}
-}
-
-static void __switch_pg(struct multipath *m, struct pgpath *pgpath)
-{
-	m->current_pg = pgpath->pg;
-
-	/* Must we initialise the PG first, and queue I/O till it's ready? */
-	if (m->hw_handler_name) {
-		m->pg_init_required = 1;
-		m->queue_io = 1;
-	} else {
-		m->pg_init_required = 0;
-		m->queue_io = 0;
-	}
-
-	m->pg_init_count = 0;
-}
-
-static int __choose_path_in_pg(struct multipath *m, struct priority_group *pg,
-			       size_t nr_bytes)
-{
-	struct dm_path *path;
-
-	path = pg->ps.type->select_path(&pg->ps, &m->repeat_count, nr_bytes);
-	if (!path)
-		return -ENXIO;
-
-	m->current_pgpath = path_to_pgpath(path);
-
-	if (m->current_pg != pg)
-		__switch_pg(m, m->current_pgpath);
-
-	return 0;
-}
-
-static void __choose_pgpath(struct multipath *m, size_t nr_bytes)
-{
-	struct priority_group *pg;
-	unsigned bypassed = 1;
-
-	if (!m->nr_valid_paths)
-		goto failed;
-
-	/* Were we instructed to switch PG? */
-	if (m->next_pg) {
-		pg = m->next_pg;
-		m->next_pg = NULL;
-		if (!__choose_path_in_pg(m, pg, nr_bytes))
-			return;
-	}
-
-	/* Don't change PG until it has no remaining paths */
-	if (m->current_pg && !__choose_path_in_pg(m, m->current_pg, nr_bytes))
-		return;
-
-	/*
-	 * Loop through priority groups until we find a valid path.
-	 * First time we skip PGs marked 'bypassed'.
-	 * Second time we only try the ones we skipped.
-	 */
-	do {
-		list_for_each_entry(pg, &m->priority_groups, list) {
-			if (pg->bypassed == bypassed)
-				continue;
-			if (!__choose_path_in_pg(m, pg, nr_bytes))
-				return;
-		}
-	} while (bypassed--);
-
-failed:
-	m->current_pgpath = NULL;
-	m->current_pg = NULL;
-}
-
-/*
- * Check whether bios must be queued in the device-mapper core rather
- * than here in the target.
- *
- * m->lock must be held on entry.
- *
- * If m->queue_if_no_path and m->saved_queue_if_no_path hold the
- * same value then we are not between multipath_presuspend()
- * and multipath_resume() calls and we have no need to check
- * for the DMF_NOFLUSH_SUSPENDING flag.
- */
-static int __must_push_back(struct multipath *m)
-{
-	return (m->queue_if_no_path != m->saved_queue_if_no_path &&
-		dm_noflush_suspending(m->ti));
-}
-
-static int map_io(struct multipath *m, struct request *clone,
-		  union map_info *map_context, unsigned was_queued)
-{
-	int r = DM_MAPIO_REMAPPED;
-	size_t nr_bytes = blk_rq_bytes(clone);
-	unsigned long flags;
-	struct pgpath *pgpath;
-	struct block_device *bdev;
-	struct dm_mpath_io *mpio = map_context->ptr;
-
-	spin_lock_irqsave(&m->lock, flags);
-
-	/* Do we need to select a new pgpath? */
-	if (!m->current_pgpath ||
-	    (!m->queue_io && (m->repeat_count && --m->repeat_count == 0)))
-		__choose_pgpath(m, nr_bytes);
-
-	pgpath = m->current_pgpath;
-
-	if (was_queued)
-		m->queue_size--;
-
-	if ((pgpath && m->queue_io) ||
-	    (!pgpath && m->queue_if_no_path)) {
-		/* Queue for the daemon to resubmit */
-		list_add_tail(&clone->queuelist, &m->queued_ios);
-		m->queue_size++;
-		if ((m->pg_init_required && !m->pg_init_in_progress) ||
-		    !m->queue_io)
-			queue_work(kmultipathd, &m->process_queued_ios);
-		pgpath = NULL;
-		r = DM_MAPIO_SUBMITTED;
-	} else if (pgpath) {
-		bdev = pgpath->path.dev->bdev;
-		clone->q = bdev_get_queue(bdev);
-		clone->rq_disk = bdev->bd_disk;
-	} else if (__must_push_back(m))
-		r = DM_MAPIO_REQUEUE;
-	else
-		r = -EIO;	/* Failed */
-
-	mpio->pgpath = pgpath;
-	mpio->nr_bytes = nr_bytes;
-
-	if (r == DM_MAPIO_REMAPPED && pgpath->pg->ps.type->start_io)
-		pgpath->pg->ps.type->start_io(&pgpath->pg->ps, &pgpath->path,
-					      nr_bytes);
-
-	spin_unlock_irqrestore(&m->lock, flags);
-
-	return r;
-}
-
-/*
- * If we run out of usable paths, should we queue I/O or error it?
- */
-static int queue_if_no_path(struct multipath *m, unsigned queue_if_no_path,
-			    unsigned save_old_value)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&m->lock, flags);
-
-	if (save_old_value)
-		m->saved_queue_if_no_path = m->queue_if_no_path;
-	else
-		m->saved_queue_if_no_path = queue_if_no_path;
-	m->queue_if_no_path = queue_if_no_path;
-	if (!m->queue_if_no_path && m->queue_size)
-		queue_work(kmultipathd, &m->process_queued_ios);
-
-	spin_unlock_irqrestore(&m->lock, flags);
-
-	return 0;
-}
-
-/*-----------------------------------------------------------------
- * The multipath daemon is responsible for resubmitting queued ios.
- *---------------------------------------------------------------*/
-
-static void dispatch_queued_ios(struct multipath *m)
-{
-	int r;
-	unsigned long flags;
-	union map_info *info;
-	struct request *clone, *n;
-	LIST_HEAD(cl);
-
-	spin_lock_irqsave(&m->lock, flags);
-	list_splice_init(&m->queued_ios, &cl);
-	spin_unlock_irqrestore(&m->lock, flags);
-
-	list_for_each_entry_safe(clone, n, &cl, queuelist) {
-		list_del_init(&clone->queuelist);
-
-		info = dm_get_rq_mapinfo(clone);
-
-		r = map_io(m, clone, info, 1);
-		if (r < 0) {
-			clear_mapinfo(m, info);
-			dm_kill_unmapped_request(clone, r);
-		} else if (r == DM_MAPIO_REMAPPED)
-			dm_dispatch_request(clone);
-		else if (r == DM_MAPIO_REQUEUE) {
-			clear_mapinfo(m, info);
-			dm_requeue_unmapped_request(clone);
-		}
-	}
-}
-
-static void process_queued_ios(struct work_struct *work)
-{
-	struct multipath *m =
-		container_of(work, struct multipath, process_queued_ios);
-	struct pgpath *pgpath = NULL;
-	unsigned must_queue = 1;
-	unsigned long flags;
-
-	spin_lock_irqsave(&m->lock, flags);
-
-	if (!m->queue_size)
-		goto out;
-
-	if (!m->current_pgpath)
-		__choose_pgpath(m, 0);
-
-	pgpath = m->current_pgpath;
-
-	if ((pgpath && !m->queue_io) ||
-	    (!pgpath && !m->queue_if_no_path))
-		must_queue = 0;
-
-	if (m->pg_init_required && !m->pg_init_in_progress && pgpath)
-		__pg_init_all_paths(m);
-
-out:
-	spin_unlock_irqrestore(&m->lock, flags);
-	if (!must_queue)
-		dispatch_queued_ios(m);
-}
-
-/*
- * An event is triggered whenever a path is taken out of use.
- * Includes path failure and PG bypass.
- */
-static void trigger_event(struct work_struct *work)
-{
-	struct multipath *m =
-		container_of(work, struct multipath, trigger_event);
-
-	dm_table_event(m->ti->table);
-}
-
-/*-----------------------------------------------------------------
- * Constructor/argument parsing:
- * <#multipath feature args> [<arg>]*
- * <#hw_handler args> [hw_handler [<arg>]*]
- * <#priority groups>
- * <initial priority group>
- *     [<selector> <#selector args> [<arg>]*
- *      <#paths> <#per-path selector args>
- *         [<path> [<arg>]* ]+ ]+
- *---------------------------------------------------------------*/
-static int parse_path_selector(struct dm_arg_set *as, struct priority_group *pg,
-			       struct dm_target *ti)
-{
-	int r;
-	struct path_selector_type *pst;
-	unsigned ps_argc;
-
-	static struct dm_arg _args[] = {
-		{0, 1024, "invalid number of path selector args"},
-	};
-
-	pst = dm_get_path_selector(dm_shift_arg(as));
-	if (!pst) {
-		ti->error = "unknown path selector type";
-		return -EINVAL;
-	}
-
-	r = dm_read_arg_group(_args, as, &ps_argc, &ti->error);
-	if (r) {
-		dm_put_path_selector(pst);
-		return -EINVAL;
-	}
-
-	r = pst->create(&pg->ps, ps_argc, as->argv);
-	if (r) {
-		dm_put_path_selector(pst);
-		ti->error = "path selector constructor failed";
-		return r;
-	}
-
-	pg->ps.type = pst;
-	dm_consume_args(as, ps_argc);
-
-	return 0;
-}
-
-static struct pgpath *parse_path(struct dm_arg_set *as, struct path_selector *ps,
-			       struct dm_target *ti)
-{
-	int r;
-	struct pgpath *p;
-	struct multipath *m = ti->private;
-
-	/* we need at least a path arg */
-	if (as->argc < 1) {
-		ti->error = "no device given";
-		return ERR_PTR(-EINVAL);
-	}
-
-	p = alloc_pgpath();
-	if (!p)
-		return ERR_PTR(-ENOMEM);
-
-	r = dm_get_device(ti, dm_shift_arg(as), dm_table_get_mode(ti->table),
-			  &p->path.dev);
-	if (r) {
-		ti->error = "error getting device";
-		goto bad;
-	}
-
-	if (m->hw_handler_name) {
-		struct request_queue *q = bdev_get_queue(p->path.dev->bdev);
-
-		r = scsi_dh_attach(q, m->hw_handler_name);
-		if (r == -EBUSY) {
-			/*
-			 * Already attached to different hw_handler,
-			 * try to reattach with correct one.
-			 */
-			scsi_dh_detach(q);
-			r = scsi_dh_attach(q, m->hw_handler_name);
-		}
-
-		if (r < 0) {
-			ti->error = "error attaching hardware handler";
-			dm_put_device(ti, p->path.dev);
-			goto bad;
-		}
-
-		if (m->hw_handler_params) {
-			r = scsi_dh_set_params(q, m->hw_handler_params);
-			if (r < 0) {
-				ti->error = "unable to set hardware "
-							"handler parameters";
-				scsi_dh_detach(q);
-				dm_put_device(ti, p->path.dev);
-				goto bad;
-			}
-		}
-	}
-
-	r = ps->type->add_path(ps, &p->path, as->argc, as->argv, &ti->error);
-	if (r) {
-		dm_put_device(ti, p->path.dev);
-		goto bad;
-	}
-
-	return p;
-
- bad:
-	free_pgpath(p);
-	return ERR_PTR(r);
-}
-
-static struct priority_group *parse_priority_group(struct dm_arg_set *as,
-						   struct multipath *m)
-{
-	static struct dm_arg _args[] = {
-		{1, 1024, "invalid number of paths"},
-		{0, 1024, "invalid number of selector args"}
-	};
-
-	int r;
-	unsigned i, nr_selector_args, nr_args;
-	struct priority_group *pg;
-	struct dm_target *ti = m->ti;
-
-	if (as->argc < 2) {
-		as->argc = 0;
-		ti->error = "not enough priority group arguments";
-		return ERR_PTR(-EINVAL);
-	}
-
-	pg = alloc_priority_group();
-	if (!pg) {
-		ti->error = "couldn't allocate priority group";
-		return ERR_PTR(-ENOMEM);
-	}
-	pg->m = m;
-
-	r = parse_path_selector(as, pg, ti);
-	if (r)
-		goto bad;
-
-	/*
-	 * read the paths
-	 */
-	r = dm_read_arg(_args, as, &pg->nr_pgpaths, &ti->error);
-	if (r)
-		goto bad;
-
-	r = dm_read_arg(_args + 1, as, &nr_selector_args, &ti->error);
-	if (r)
-		goto bad;
-
-	nr_args = 1 + nr_selector_args;
-	for (i = 0; i < pg->nr_pgpaths; i++) {
-		struct pgpath *pgpath;
-		struct dm_arg_set path_args;
-
-		if (as->argc < nr_args) {
-			ti->error = "not enough path parameters";
-			r = -EINVAL;
-			goto bad;
-		}
-
-		path_args.argc = nr_args;
-		path_args.argv = as->argv;
-
-		pgpath = parse_path(&path_args, &pg->ps, ti);
-		if (IS_ERR(pgpath)) {
-			r = PTR_ERR(pgpath);
-			goto bad;
-		}
-
-		pgpath->pg = pg;
-		list_add_tail(&pgpath->list, &pg->pgpaths);
-		dm_consume_args(as, nr_args);
-	}
-
-	return pg;
-
- bad:
-	free_priority_group(pg, ti);
-	return ERR_PTR(r);
-}
-
-static int parse_hw_handler(struct dm_arg_set *as, struct multipath *m)
-{
-	unsigned hw_argc;
-	int ret;
-	struct dm_target *ti = m->ti;
-
-	static struct dm_arg _args[] = {
-		{0, 1024, "invalid number of hardware handler args"},
-	};
-
-	if (dm_read_arg_group(_args, as, &hw_argc, &ti->error))
-		return -EINVAL;
-
-	if (!hw_argc)
-		return 0;
-
-	m->hw_handler_name = kstrdup(dm_shift_arg(as), GFP_KERNEL);
-	if (!try_then_request_module(scsi_dh_handler_exist(m->hw_handler_name),
-				     "scsi_dh_%s", m->hw_handler_name)) {
-		ti->error = "unknown hardware handler type";
-		ret = -EINVAL;
-		goto fail;
-	}
-
-	if (hw_argc > 1) {
-		char *p;
-		int i, j, len = 4;
-
-		for (i = 0; i <= hw_argc - 2; i++)
-			len += strlen(as->argv[i]) + 1;
-		p = m->hw_handler_params = kzalloc(len, GFP_KERNEL);
-		if (!p) {
-			ti->error = "memory allocation failed";
-			ret = -ENOMEM;
-			goto fail;
-		}
-		j = sprintf(p, "%d", hw_argc - 1);
-		for (i = 0, p+=j+1; i <= hw_argc - 2; i++, p+=j+1)
-			j = sprintf(p, "%s", as->argv[i]);
-	}
-	dm_consume_args(as, hw_argc - 1);
-
-	return 0;
-fail:
-	kfree(m->hw_handler_name);
-	m->hw_handler_name = NULL;
-	return ret;
-}
-
-static int parse_features(struct dm_arg_set *as, struct multipath *m)
-{
-	int r;
-	unsigned argc;
-	struct dm_target *ti = m->ti;
-	const char *arg_name;
-
-	static struct dm_arg _args[] = {
-		{0, 5, "invalid number of feature args"},
-		{1, 50, "pg_init_retries must be between 1 and 50"},
-		{0, 60000, "pg_init_delay_msecs must be between 0 and 60000"},
-	};
-
-	r = dm_read_arg_group(_args, as, &argc, &ti->error);
-	if (r)
-		return -EINVAL;
-
-	if (!argc)
-		return 0;
-
-	do {
-		arg_name = dm_shift_arg(as);
-		argc--;
-
-		if (!strcasecmp(arg_name, "queue_if_no_path")) {
-			r = queue_if_no_path(m, 1, 0);
-			continue;
-		}
-
-		if (!strcasecmp(arg_name, "pg_init_retries") &&
-		    (argc >= 1)) {
-			r = dm_read_arg(_args + 1, as, &m->pg_init_retries, &ti->error);
-			argc--;
-			continue;
-		}
-
-		if (!strcasecmp(arg_name, "pg_init_delay_msecs") &&
-		    (argc >= 1)) {
-			r = dm_read_arg(_args + 2, as, &m->pg_init_delay_msecs, &ti->error);
-			argc--;
-			continue;
-		}
-
-		ti->error = "Unrecognised multipath feature request";
-		r = -EINVAL;
-	} while (argc && !r);
-
-	return r;
-}
-
-static int multipath_ctr(struct dm_target *ti, unsigned int argc,
-			 char **argv)
-{
-	/* target arguments */
-	static struct dm_arg _args[] = {
-		{0, 1024, "invalid number of priority groups"},
-		{0, 1024, "invalid initial priority group number"},
-	};
-
-	int r;
-	struct multipath *m;
-	struct dm_arg_set as;
-	unsigned pg_count = 0;
-	unsigned next_pg_num;
-
-	as.argc = argc;
-	as.argv = argv;
-
-	m = alloc_multipath(ti);
-	if (!m) {
-		ti->error = "can't allocate multipath";
-		return -EINVAL;
-	}
-
-	r = parse_features(&as, m);
-	if (r)
-		goto bad;
-
-	r = parse_hw_handler(&as, m);
-	if (r)
-		goto bad;
-
-	r = dm_read_arg(_args, &as, &m->nr_priority_groups, &ti->error);
-	if (r)
-		goto bad;
-
-	r = dm_read_arg(_args + 1, &as, &next_pg_num, &ti->error);
-	if (r)
-		goto bad;
-
-	if ((!m->nr_priority_groups && next_pg_num) ||
-	    (m->nr_priority_groups && !next_pg_num)) {
-		ti->error = "invalid initial priority group";
-		r = -EINVAL;
-		goto bad;
-	}
-
-	/* parse the priority groups */
-	while (as.argc) {
-		struct priority_group *pg;
-
-		pg = parse_priority_group(&as, m);
-		if (IS_ERR(pg)) {
-			r = PTR_ERR(pg);
-			goto bad;
-		}
-
-		m->nr_valid_paths += pg->nr_pgpaths;
-		list_add_tail(&pg->list, &m->priority_groups);
-		pg_count++;
-		pg->pg_num = pg_count;
-		if (!--next_pg_num)
-			m->next_pg = pg;
-	}
-
-	if (pg_count != m->nr_priority_groups) {
-		ti->error = "priority group count mismatch";
-		r = -EINVAL;
-		goto bad;
-	}
-
-	ti->num_flush_requests = 1;
-	ti->num_discard_requests = 1;
-
-	return 0;
-
- bad:
-	free_multipath(m);
-	return r;
-}
-
-static void multipath_wait_for_pg_init_completion(struct multipath *m)
-{
-	DECLARE_WAITQUEUE(wait, current);
-	unsigned long flags;
-
-	add_wait_queue(&m->pg_init_wait, &wait);
-
-	while (1) {
-		set_current_state(TASK_UNINTERRUPTIBLE);
-
-		spin_lock_irqsave(&m->lock, flags);
-		if (!m->pg_init_in_progress) {
-			spin_unlock_irqrestore(&m->lock, flags);
-			break;
-		}
-		spin_unlock_irqrestore(&m->lock, flags);
-
-		io_schedule();
-	}
-	set_current_state(TASK_RUNNING);
-
-	remove_wait_queue(&m->pg_init_wait, &wait);
-}
-
-static void flush_multipath_work(struct multipath *m)
-{
-	flush_workqueue(kmpath_handlerd);
-	multipath_wait_for_pg_init_completion(m);
-	flush_workqueue(kmultipathd);
-	flush_work_sync(&m->trigger_event);
-}
-
-static void multipath_dtr(struct dm_target *ti)
-{
-	struct multipath *m = ti->private;
-
-	flush_multipath_work(m);
-	free_multipath(m);
-}
-
-/*
- * Map cloned requests
- */
-static int multipath_map(struct dm_target *ti, struct request *clone,
-			 union map_info *map_context)
-{
-	int r;
-	struct multipath *m = (struct multipath *) ti->private;
-
-	if (set_mapinfo(m, map_context) < 0)
-		/* ENOMEM, requeue */
-		return DM_MAPIO_REQUEUE;
-
-	clone->cmd_flags |= REQ_FAILFAST_TRANSPORT;
-	r = map_io(m, clone, map_context, 0);
-	if (r < 0 || r == DM_MAPIO_REQUEUE)
-		clear_mapinfo(m, map_context);
-
-	return r;
-}
-
-/*
- * Take a path out of use.
- */
-static int fail_path(struct pgpath *pgpath)
-{
-	unsigned long flags;
-	struct multipath *m = pgpath->pg->m;
-
-	spin_lock_irqsave(&m->lock, flags);
-
-	if (!pgpath->is_active)
-		goto out;
-
-	DMWARN("Failing path %s.", pgpath->path.dev->name);
-
-	pgpath->pg->ps.type->fail_path(&pgpath->pg->ps, &pgpath->path);
-	pgpath->is_active = 0;
-	pgpath->fail_count++;
-
-	m->nr_valid_paths--;
-
-	if (pgpath == m->current_pgpath)
-		m->current_pgpath = NULL;
-
-	dm_path_uevent(DM_UEVENT_PATH_FAILED, m->ti,
-		      pgpath->path.dev->name, m->nr_valid_paths);
-
-	schedule_work(&m->trigger_event);
-
-out:
-	spin_unlock_irqrestore(&m->lock, flags);
-
-	return 0;
-}
-
-/*
- * Reinstate a previously-failed path
- */
-static int reinstate_path(struct pgpath *pgpath)
-{
-	int r = 0;
-	unsigned long flags;
-	struct multipath *m = pgpath->pg->m;
-
-	spin_lock_irqsave(&m->lock, flags);
-
-	if (pgpath->is_active)
-		goto out;
-
-	if (!pgpath->pg->ps.type->reinstate_path) {
-		DMWARN("Reinstate path not supported by path selector %s",
-		       pgpath->pg->ps.type->name);
-		r = -EINVAL;
-		goto out;
-	}
-
-	r = pgpath->pg->ps.type->reinstate_path(&pgpath->pg->ps, &pgpath->path);
-	if (r)
-		goto out;
-
-	pgpath->is_active = 1;
-
-	if (!m->nr_valid_paths++ && m->queue_size) {
-		m->current_pgpath = NULL;
-		queue_work(kmultipathd, &m->process_queued_ios);
-	} else if (m->hw_handler_name && (m->current_pg == pgpath->pg)) {
-		if (queue_work(kmpath_handlerd, &pgpath->activate_path.work))
-			m->pg_init_in_progress++;
-	}
-
-	dm_path_uevent(DM_UEVENT_PATH_REINSTATED, m->ti,
-		      pgpath->path.dev->name, m->nr_valid_paths);
-
-	schedule_work(&m->trigger_event);
-
-out:
-	spin_unlock_irqrestore(&m->lock, flags);
-
-	return r;
-}
-
-/*
- * Fail or reinstate all paths that match the provided struct dm_dev.
- */
-static int action_dev(struct multipath *m, struct dm_dev *dev,
-		      action_fn action)
-{
-	int r = -EINVAL;
-	struct pgpath *pgpath;
-	struct priority_group *pg;
-
-	list_for_each_entry(pg, &m->priority_groups, list) {
-		list_for_each_entry(pgpath, &pg->pgpaths, list) {
-			if (pgpath->path.dev == dev)
-				r = action(pgpath);
-		}
-	}
-
-	return r;
-}
-
-/*
- * Temporarily try to avoid having to use the specified PG
- */
-static void bypass_pg(struct multipath *m, struct priority_group *pg,
-		      int bypassed)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&m->lock, flags);
-
-	pg->bypassed = bypassed;
-	m->current_pgpath = NULL;
-	m->current_pg = NULL;
-
-	spin_unlock_irqrestore(&m->lock, flags);
-
-	schedule_work(&m->trigger_event);
-}
-
-/*
- * Switch to using the specified PG from the next I/O that gets mapped
- */
-static int switch_pg_num(struct multipath *m, const char *pgstr)
-{
-	struct priority_group *pg;
-	unsigned pgnum;
-	unsigned long flags;
-	char dummy;
-
-	if (!pgstr || (sscanf(pgstr, "%u%c", &pgnum, &dummy) != 1) || !pgnum ||
-	    (pgnum > m->nr_priority_groups)) {
-		DMWARN("invalid PG number supplied to switch_pg_num");
-		return -EINVAL;
-	}
-
-	spin_lock_irqsave(&m->lock, flags);
-	list_for_each_entry(pg, &m->priority_groups, list) {
-		pg->bypassed = 0;
-		if (--pgnum)
-			continue;
-
-		m->current_pgpath = NULL;
-		m->current_pg = NULL;
-		m->next_pg = pg;
-	}
-	spin_unlock_irqrestore(&m->lock, flags);
-
-	schedule_work(&m->trigger_event);
-	return 0;
-}
-
-/*
- * Set/clear bypassed status of a PG.
- * PGs are numbered upwards from 1 in the order they were declared.
- */
-static int bypass_pg_num(struct multipath *m, const char *pgstr, int bypassed)
-{
-	struct priority_group *pg;
-	unsigned pgnum;
-	char dummy;
-
-	if (!pgstr || (sscanf(pgstr, "%u%c", &pgnum, &dummy) != 1) || !pgnum ||
-	    (pgnum > m->nr_priority_groups)) {
-		DMWARN("invalid PG number supplied to bypass_pg");
-		return -EINVAL;
-	}
-
-	list_for_each_entry(pg, &m->priority_groups, list) {
-		if (!--pgnum)
-			break;
-	}
-
-	bypass_pg(m, pg, bypassed);
-	return 0;
-}
-
-/*
- * Should we retry pg_init immediately?
- */
-static int pg_init_limit_reached(struct multipath *m, struct pgpath *pgpath)
-{
-	unsigned long flags;
-	int limit_reached = 0;
-
-	spin_lock_irqsave(&m->lock, flags);
-
-	if (m->pg_init_count <= m->pg_init_retries)
-		m->pg_init_required = 1;
-	else
-		limit_reached = 1;
-
-	spin_unlock_irqrestore(&m->lock, flags);
-
-	return limit_reached;
-}
-
-static void pg_init_done(void *data, int errors)
-{
-	struct pgpath *pgpath = data;
-	struct priority_group *pg = pgpath->pg;
-	struct multipath *m = pg->m;
-	unsigned long flags;
-	unsigned delay_retry = 0;
-
-	/* device or driver problems */
-	switch (errors) {
-	case SCSI_DH_OK:
-		break;
-	case SCSI_DH_NOSYS:
-		if (!m->hw_handler_name) {
-			errors = 0;
-			break;
-		}
-		DMERR("Could not failover the device: Handler scsi_dh_%s "
-		      "Error %d.", m->hw_handler_name, errors);
-		/*
-		 * Fail path for now, so we do not ping pong
-		 */
-		fail_path(pgpath);
-		break;
-	case SCSI_DH_DEV_TEMP_BUSY:
-		/*
-		 * Probably doing something like FW upgrade on the
-		 * controller so try the other pg.
-		 */
-		bypass_pg(m, pg, 1);
-		break;
-	case SCSI_DH_RETRY:
-		/* Wait before retrying. */
-		delay_retry = 1;
-	case SCSI_DH_IMM_RETRY:
-	case SCSI_DH_RES_TEMP_UNAVAIL:
-		if (pg_init_limit_reached(m, pgpath))
-			fail_path(pgpath);
-		errors = 0;
-		break;
-	default:
-		/*
-		 * We probably do not want to fail the path for a device
-		 * error, but this is what the old dm did. In future
-		 * patches we can do more advanced handling.
-		 */
-		fail_path(pgpath);
-	}
-
-	spin_lock_irqsave(&m->lock, flags);
-	if (errors) {
-		if (pgpath == m->current_pgpath) {
-			DMERR("Could not failover device. Error %d.", errors);
-			m->current_pgpath = NULL;
-			m->current_pg = NULL;
-		}
-	} else if (!m->pg_init_required)
-		pg->bypassed = 0;
-
-	if (--m->pg_init_in_progress)
-		/* Activations of other paths are still on going */
-		goto out;
-
-	if (!m->pg_init_required)
-		m->queue_io = 0;
-
-	m->pg_init_delay_retry = delay_retry;
-	queue_work(kmultipathd, &m->process_queued_ios);
-
-	/*
-	 * Wake up any thread waiting to suspend.
-	 */
-	wake_up(&m->pg_init_wait);
-
-out:
-	spin_unlock_irqrestore(&m->lock, flags);
-}
-
-static void activate_path(struct work_struct *work)
-{
-	struct pgpath *pgpath =
-		container_of(work, struct pgpath, activate_path.work);
-
-	scsi_dh_activate(bdev_get_queue(pgpath->path.dev->bdev),
-				pg_init_done, pgpath);
-}
-
-/*
- * end_io handling
- */
-static int do_end_io(struct multipath *m, struct request *clone,
-		     int error, struct dm_mpath_io *mpio)
-{
-	/*
-	 * We don't queue any clone request inside the multipath target
-	 * during end I/O handling, since those clone requests don't have
-	 * bio clones.  If we queue them inside the multipath target,
-	 * we need to make bio clones, that requires memory allocation.
-	 * (See drivers/md/dm.c:end_clone_bio() about why the clone requests
-	 *  don't have bio clones.)
-	 * Instead of queueing the clone request here, we queue the original
-	 * request into dm core, which will remake a clone request and
-	 * clone bios for it and resubmit it later.
-	 */
-	int r = DM_ENDIO_REQUEUE;
-	unsigned long flags;
-
-	if (!error && !clone->errors)
-		return 0;	/* I/O complete */
-
-	if (error == -EOPNOTSUPP || error == -EREMOTEIO || error == -EILSEQ)
-		return error;
-
-	if (mpio->pgpath)
-		fail_path(mpio->pgpath);
-
-	spin_lock_irqsave(&m->lock, flags);
-	if (!m->nr_valid_paths) {
-		if (!m->queue_if_no_path) {
-			if (!__must_push_back(m))
-				r = -EIO;
-		} else {
-			if (error == -EBADE)
-				r = error;
-		}
-	}
-	spin_unlock_irqrestore(&m->lock, flags);
-
-	return r;
-}
-
-static int multipath_end_io(struct dm_target *ti, struct request *clone,
-			    int error, union map_info *map_context)
-{
-	struct multipath *m = ti->private;
-	struct dm_mpath_io *mpio = map_context->ptr;
-	struct pgpath *pgpath = mpio->pgpath;
-	struct path_selector *ps;
-	int r;
-
-	BUG_ON(!mpio);
-
-	r  = do_end_io(m, clone, error, mpio);
-	if (pgpath) {
-		ps = &pgpath->pg->ps;
-		if (ps->type->end_io)
-			ps->type->end_io(ps, &pgpath->path, mpio->nr_bytes);
-	}
-	clear_mapinfo(m, map_context);
-
-	return r;
-}
-
-/*
- * Suspend can't complete until all the I/O is processed so if
- * the last path fails we must error any remaining I/O.
- * Note that if the freeze_bdev fails while suspending, the
- * queue_if_no_path state is lost - userspace should reset it.
- */
-static void multipath_presuspend(struct dm_target *ti)
-{
-	struct multipath *m = (struct multipath *) ti->private;
-
-	queue_if_no_path(m, 0, 1);
-}
-
-static void multipath_postsuspend(struct dm_target *ti)
-{
-	struct multipath *m = ti->private;
-
-	mutex_lock(&m->work_mutex);
-	flush_multipath_work(m);
-	mutex_unlock(&m->work_mutex);
-}
-
-/*
- * Restore the queue_if_no_path setting.
- */
-static void multipath_resume(struct dm_target *ti)
-{
-	struct multipath *m = (struct multipath *) ti->private;
-	unsigned long flags;
-
-	spin_lock_irqsave(&m->lock, flags);
-	m->queue_if_no_path = m->saved_queue_if_no_path;
-	spin_unlock_irqrestore(&m->lock, flags);
-}
-
-/*
- * Info output has the following format:
- * num_multipath_feature_args [multipath_feature_args]*
- * num_handler_status_args [handler_status_args]*
- * num_groups init_group_number
- *            [A|D|E num_ps_status_args [ps_status_args]*
- *             num_paths num_selector_args
- *             [path_dev A|F fail_count [selector_args]* ]+ ]+
- *
- * Table output has the following format (identical to the constructor string):
- * num_feature_args [features_args]*
- * num_handler_args hw_handler [hw_handler_args]*
- * num_groups init_group_number
- *     [priority selector-name num_ps_args [ps_args]*
- *      num_paths num_selector_args [path_dev [selector_args]* ]+ ]+
- */
-static int multipath_status(struct dm_target *ti, status_type_t type,
-			    char *result, unsigned int maxlen)
-{
-	int sz = 0;
-	unsigned long flags;
-	struct multipath *m = (struct multipath *) ti->private;
-	struct priority_group *pg;
-	struct pgpath *p;
-	unsigned pg_num;
-	char state;
-
-	spin_lock_irqsave(&m->lock, flags);
-
-	/* Features */
-	if (type == STATUSTYPE_INFO)
-		DMEMIT("2 %u %u ", m->queue_size, m->pg_init_count);
-	else {
-		DMEMIT("%u ", m->queue_if_no_path +
-			      (m->pg_init_retries > 0) * 2 +
-			      (m->pg_init_delay_msecs != DM_PG_INIT_DELAY_DEFAULT) * 2);
-		if (m->queue_if_no_path)
-			DMEMIT("queue_if_no_path ");
-		if (m->pg_init_retries)
-			DMEMIT("pg_init_retries %u ", m->pg_init_retries);
-		if (m->pg_init_delay_msecs != DM_PG_INIT_DELAY_DEFAULT)
-			DMEMIT("pg_init_delay_msecs %u ", m->pg_init_delay_msecs);
-	}
-
-	if (!m->hw_handler_name || type == STATUSTYPE_INFO)
-		DMEMIT("0 ");
-	else
-		DMEMIT("1 %s ", m->hw_handler_name);
-
-	DMEMIT("%u ", m->nr_priority_groups);
-
-	if (m->next_pg)
-		pg_num = m->next_pg->pg_num;
-	else if (m->current_pg)
-		pg_num = m->current_pg->pg_num;
-	else
-		pg_num = (m->nr_priority_groups ? 1 : 0);
-
-	DMEMIT("%u ", pg_num);
-
-	switch (type) {
-	case STATUSTYPE_INFO:
-		list_for_each_entry(pg, &m->priority_groups, list) {
-			if (pg->bypassed)
-				state = 'D';	/* Disabled */
-			else if (pg == m->current_pg)
-				state = 'A';	/* Currently Active */
-			else
-				state = 'E';	/* Enabled */
-
-			DMEMIT("%c ", state);
-
-			if (pg->ps.type->status)
-				sz += pg->ps.type->status(&pg->ps, NULL, type,
-							  result + sz,
-							  maxlen - sz);
-			else
-				DMEMIT("0 ");
-
-			DMEMIT("%u %u ", pg->nr_pgpaths,
-			       pg->ps.type->info_args);
-
-			list_for_each_entry(p, &pg->pgpaths, list) {
-				DMEMIT("%s %s %u ", p->path.dev->name,
-				       p->is_active ? "A" : "F",
-				       p->fail_count);
-				if (pg->ps.type->status)
-					sz += pg->ps.type->status(&pg->ps,
-					      &p->path, type, result + sz,
-					      maxlen - sz);
-			}
-		}
-		break;
-
-	case STATUSTYPE_TABLE:
-		list_for_each_entry(pg, &m->priority_groups, list) {
-			DMEMIT("%s ", pg->ps.type->name);
-
-			if (pg->ps.type->status)
-				sz += pg->ps.type->status(&pg->ps, NULL, type,
-							  result + sz,
-							  maxlen - sz);
-			else
-				DMEMIT("0 ");
-
-			DMEMIT("%u %u ", pg->nr_pgpaths,
-			       pg->ps.type->table_args);
-
-			list_for_each_entry(p, &pg->pgpaths, list) {
-				DMEMIT("%s ", p->path.dev->name);
-				if (pg->ps.type->status)
-					sz += pg->ps.type->status(&pg->ps,
-					      &p->path, type, result + sz,
-					      maxlen - sz);
-			}
-		}
-		break;
-	}
-
-	spin_unlock_irqrestore(&m->lock, flags);
-
-	return 0;
-}
-
-static int multipath_message(struct dm_target *ti, unsigned argc, char **argv)
-{
-	int r = -EINVAL;
-	struct dm_dev *dev;
-	struct multipath *m = (struct multipath *) ti->private;
-	action_fn action;
-
-	mutex_lock(&m->work_mutex);
-
-	if (dm_suspended(ti)) {
-		r = -EBUSY;
-		goto out;
-	}
-
-	if (argc == 1) {
-		if (!strcasecmp(argv[0], "queue_if_no_path")) {
-			r = queue_if_no_path(m, 1, 0);
-			goto out;
-		} else if (!strcasecmp(argv[0], "fail_if_no_path")) {
-			r = queue_if_no_path(m, 0, 0);
-			goto out;
-		}
-	}
-
-	if (argc != 2) {
-		DMWARN("Unrecognised multipath message received.");
-		goto out;
-	}
-
-	if (!strcasecmp(argv[0], "disable_group")) {
-		r = bypass_pg_num(m, argv[1], 1);
-		goto out;
-	} else if (!strcasecmp(argv[0], "enable_group")) {
-		r = bypass_pg_num(m, argv[1], 0);
-		goto out;
-	} else if (!strcasecmp(argv[0], "switch_group")) {
-		r = switch_pg_num(m, argv[1]);
-		goto out;
-	} else if (!strcasecmp(argv[0], "reinstate_path"))
-		action = reinstate_path;
-	else if (!strcasecmp(argv[0], "fail_path"))
-		action = fail_path;
-	else {
-		DMWARN("Unrecognised multipath message received.");
-		goto out;
-	}
-
-	r = dm_get_device(ti, argv[1], dm_table_get_mode(ti->table), &dev);
-	if (r) {
-		DMWARN("message: error getting device %s",
-		       argv[1]);
-		goto out;
-	}
-
-	r = action_dev(m, dev, action);
-
-	dm_put_device(ti, dev);
-
-out:
-	mutex_unlock(&m->work_mutex);
-	return r;
-}
-
-static int multipath_ioctl(struct dm_target *ti, unsigned int cmd,
-			   unsigned long arg)
-{
-	struct multipath *m = (struct multipath *) ti->private;
-	struct block_device *bdev = NULL;
-	fmode_t mode = 0;
-	unsigned long flags;
-	int r = 0;
-
-	spin_lock_irqsave(&m->lock, flags);
-
-	if (!m->current_pgpath)
-		__choose_pgpath(m, 0);
-
-	if (m->current_pgpath) {
-		bdev = m->current_pgpath->path.dev->bdev;
-		mode = m->current_pgpath->path.dev->mode;
-	}
-
-	if (m->queue_io)
-		r = -EAGAIN;
-	else if (!bdev)
-		r = -EIO;
-
-	spin_unlock_irqrestore(&m->lock, flags);
-
-	/*
-	 * Only pass ioctls through if the device sizes match exactly.
-	 */
-	if (!r && ti->len != i_size_read(bdev->bd_inode) >> SECTOR_SHIFT)
-		r = scsi_verify_blk_ioctl(NULL, cmd);
-
-	return r ? : __blkdev_driver_ioctl(bdev, mode, cmd, arg);
-}
-
-static int multipath_iterate_devices(struct dm_target *ti,
-				     iterate_devices_callout_fn fn, void *data)
-{
-	struct multipath *m = ti->private;
-	struct priority_group *pg;
-	struct pgpath *p;
-	int ret = 0;
-
-	list_for_each_entry(pg, &m->priority_groups, list) {
-		list_for_each_entry(p, &pg->pgpaths, list) {
-			ret = fn(ti, p->path.dev, ti->begin, ti->len, data);
-			if (ret)
-				goto out;
-		}
-	}
-
-out:
-	return ret;
-}
-
-static int __pgpath_busy(struct pgpath *pgpath)
-{
-	struct request_queue *q = bdev_get_queue(pgpath->path.dev->bdev);
-
-	return dm_underlying_device_busy(q);
-}
-
-/*
- * We return "busy", only when we can map I/Os but underlying devices
- * are busy (so even if we map I/Os now, the I/Os will wait on
- * the underlying queue).
- * In other words, if we want to kill I/Os or queue them inside us
- * due to map unavailability, we don't return "busy".  Otherwise,
- * dm core won't give us the I/Os and we can't do what we want.
- */
-static int multipath_busy(struct dm_target *ti)
-{
-	int busy = 0, has_active = 0;
-	struct multipath *m = ti->private;
-	struct priority_group *pg;
-	struct pgpath *pgpath;
-	unsigned long flags;
-
-	spin_lock_irqsave(&m->lock, flags);
-
-	/* Guess which priority_group will be used at next mapping time */
-	if (unlikely(!m->current_pgpath && m->next_pg))
-		pg = m->next_pg;
-	else if (likely(m->current_pg))
-		pg = m->current_pg;
-	else
-		/*
-		 * We don't know which pg will be used at next mapping time.
-		 * We don't call __choose_pgpath() here to avoid to trigger
-		 * pg_init just by busy checking.
-		 * So we don't know whether underlying devices we will be using
-		 * at next mapping time are busy or not. Just try mapping.
-		 */
-		goto out;
-
-	/*
-	 * If there is one non-busy active path at least, the path selector
-	 * will be able to select it. So we consider such a pg as not busy.
-	 */
-	busy = 1;
-	list_for_each_entry(pgpath, &pg->pgpaths, list)
-		if (pgpath->is_active) {
-			has_active = 1;
-
-			if (!__pgpath_busy(pgpath)) {
-				busy = 0;
-				break;
-			}
-		}
-
-	if (!has_active)
-		/*
-		 * No active path in this pg, so this pg won't be used and
-		 * the current_pg will be changed at next mapping time.
-		 * We need to try mapping to determine it.
-		 */
-		busy = 0;
-
-out:
-	spin_unlock_irqrestore(&m->lock, flags);
-
-	return busy;
-}
-
-/*-----------------------------------------------------------------
- * Module setup
- *---------------------------------------------------------------*/
-static struct target_type multipath_target = {
-	.name = "multipath",
-	.version = {1, 3, 0},
-	.module = THIS_MODULE,
-	.ctr = multipath_ctr,
-	.dtr = multipath_dtr,
-	.map_rq = multipath_map,
-	.rq_end_io = multipath_end_io,
-	.presuspend = multipath_presuspend,
-	.postsuspend = multipath_postsuspend,
-	.resume = multipath_resume,
-	.status = multipath_status,
-	.message = multipath_message,
-	.ioctl  = multipath_ioctl,
-	.iterate_devices = multipath_iterate_devices,
-	.busy = multipath_busy,
-};
-
-static int __init dm_multipath_init(void)
-{
-	int r;
-
-	/* allocate a slab for the dm_ios */
-	_mpio_cache = KMEM_CACHE(dm_mpath_io, 0);
-	if (!_mpio_cache)
-		return -ENOMEM;
-
-	r = dm_register_target(&multipath_target);
-	if (r < 0) {
-		DMERR("register failed %d", r);
-		kmem_cache_destroy(_mpio_cache);
-		return -EINVAL;
-	}
-
-	kmultipathd = alloc_workqueue("kmpathd", WQ_MEM_RECLAIM, 0);
-	if (!kmultipathd) {
-		DMERR("failed to create workqueue kmpathd");
-		dm_unregister_target(&multipath_target);
-		kmem_cache_destroy(_mpio_cache);
-		return -ENOMEM;
-	}
-
-	/*
-	 * A separate workqueue is used to handle the device handlers
-	 * to avoid overloading existing workqueue. Overloading the
-	 * old workqueue would also create a bottleneck in the
-	 * path of the storage hardware device activation.
-	 */
-	kmpath_handlerd = alloc_ordered_workqueue("kmpath_handlerd",
-						  WQ_MEM_RECLAIM);
-	if (!kmpath_handlerd) {
-		DMERR("failed to create workqueue kmpath_handlerd");
-		destroy_workqueue(kmultipathd);
-		dm_unregister_target(&multipath_target);
-		kmem_cache_destroy(_mpio_cache);
-		return -ENOMEM;
-	}
-
-	DMINFO("version %u.%u.%u loaded",
-	       multipath_target.version[0], multipath_target.version[1],
-	       multipath_target.version[2]);
-
-	return r;
-}
-
-static void __exit dm_multipath_exit(void)
-{
-	destroy_workqueue(kmpath_handlerd);
-	destroy_workqueue(kmultipathd);
-
-	dm_unregister_target(&multipath_target);
-	kmem_cache_destroy(_mpio_cache);
-}
-
-module_init(dm_multipath_init);
-module_exit(dm_multipath_exit);
-
-MODULE_DESCRIPTION(DM_NAME " multipath target");
-MODULE_AUTHOR("Sistina Software <dm-devel@redhat.com>");
-MODULE_LICENSE("GPL");
diff --git a/ANDROID_3.4.5/drivers/md/dm-mpath.h b/ANDROID_3.4.5/drivers/md/dm-mpath.h
deleted file mode 100644
index e230f719..00000000
--- a/ANDROID_3.4.5/drivers/md/dm-mpath.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- * Copyright (C) 2004 Red Hat, Inc. All rights reserved.
- *
- * This file is released under the GPL.
- *
- * Multipath.
- */
-
-#ifndef	DM_MPATH_H
-#define	DM_MPATH_H
-
-struct dm_dev;
-
-struct dm_path {
-	struct dm_dev *dev;	/* Read-only */
-	void *pscontext;	/* For path-selector use */
-};
-
-/* Callback for hwh_pg_init_fn to use when complete */
-void dm_pg_init_complete(struct dm_path *path, unsigned err_flags);
-
-#endif
diff --git a/ANDROID_3.4.5/drivers/md/dm-path-selector.c b/ANDROID_3.4.5/drivers/md/dm-path-selector.c
deleted file mode 100644
index fa0ccc58..00000000
--- a/ANDROID_3.4.5/drivers/md/dm-path-selector.c
+++ /dev/null
@@ -1,140 +0,0 @@
-/*
- * Copyright (C) 2003 Sistina Software.
- * Copyright (C) 2004 Red Hat, Inc. All rights reserved.
- *
- * Module Author: Heinz Mauelshagen
- *
- * This file is released under the GPL.
- *
- * Path selector registration.
- */
-
-#include <linux/device-mapper.h>
-#include <linux/module.h>
-
-#include "dm-path-selector.h"
-
-#include <linux/slab.h>
-
-struct ps_internal {
-	struct path_selector_type pst;
-	struct list_head list;
-};
-
-#define pst_to_psi(__pst) container_of((__pst), struct ps_internal, pst)
-
-static LIST_HEAD(_path_selectors);
-static DECLARE_RWSEM(_ps_lock);
-
-static struct ps_internal *__find_path_selector_type(const char *name)
-{
-	struct ps_internal *psi;
-
-	list_for_each_entry(psi, &_path_selectors, list) {
-		if (!strcmp(name, psi->pst.name))
-			return psi;
-	}
-
-	return NULL;
-}
-
-static struct ps_internal *get_path_selector(const char *name)
-{
-	struct ps_internal *psi;
-
-	down_read(&_ps_lock);
-	psi = __find_path_selector_type(name);
-	if (psi && !try_module_get(psi->pst.module))
-		psi = NULL;
-	up_read(&_ps_lock);
-
-	return psi;
-}
-
-struct path_selector_type *dm_get_path_selector(const char *name)
-{
-	struct ps_internal *psi;
-
-	if (!name)
-		return NULL;
-
-	psi = get_path_selector(name);
-	if (!psi) {
-		request_module("dm-%s", name);
-		psi = get_path_selector(name);
-	}
-
-	return psi ? &psi->pst : NULL;
-}
-
-void dm_put_path_selector(struct path_selector_type *pst)
-{
-	struct ps_internal *psi;
-
-	if (!pst)
-		return;
-
-	down_read(&_ps_lock);
-	psi = __find_path_selector_type(pst->name);
-	if (!psi)
-		goto out;
-
-	module_put(psi->pst.module);
-out:
-	up_read(&_ps_lock);
-}
-
-static struct ps_internal *_alloc_path_selector(struct path_selector_type *pst)
-{
-	struct ps_internal *psi = kzalloc(sizeof(*psi), GFP_KERNEL);
-
-	if (psi)
-		psi->pst = *pst;
-
-	return psi;
-}
-
-int dm_register_path_selector(struct path_selector_type *pst)
-{
-	int r = 0;
-	struct ps_internal *psi = _alloc_path_selector(pst);
-
-	if (!psi)
-		return -ENOMEM;
-
-	down_write(&_ps_lock);
-
-	if (__find_path_selector_type(pst->name)) {
-		kfree(psi);
-		r = -EEXIST;
-	} else
-		list_add(&psi->list, &_path_selectors);
-
-	up_write(&_ps_lock);
-
-	return r;
-}
-
-int dm_unregister_path_selector(struct path_selector_type *pst)
-{
-	struct ps_internal *psi;
-
-	down_write(&_ps_lock);
-
-	psi = __find_path_selector_type(pst->name);
-	if (!psi) {
-		up_write(&_ps_lock);
-		return -EINVAL;
-	}
-
-	list_del(&psi->list);
-
-	up_write(&_ps_lock);
-
-	kfree(psi);
-
-	return 0;
-}
-
-EXPORT_SYMBOL_GPL(dm_register_path_selector);
-EXPORT_SYMBOL_GPL(dm_unregister_path_selector);
diff --git a/ANDROID_3.4.5/drivers/md/dm-path-selector.h b/ANDROID_3.4.5/drivers/md/dm-path-selector.h
deleted file mode 100644
index e7d1fa8b..00000000
--- a/ANDROID_3.4.5/drivers/md/dm-path-selector.h
+++ /dev/null
@@ -1,97 +0,0 @@
-/*
- * Copyright (C) 2003 Sistina Software.
- * Copyright (C) 2004 Red Hat, Inc. All rights reserved.
- *
- * Module Author: Heinz Mauelshagen
- *
- * This file is released under the GPL.
- *
- * Path-Selector registration.
- */
-
-#ifndef	DM_PATH_SELECTOR_H
-#define	DM_PATH_SELECTOR_H
-
-#include <linux/device-mapper.h>
-
-#include "dm-mpath.h"
-
-/*
- * We provide an abstraction for the code that chooses which path
- * to send some io down.
- */
-struct path_selector_type;
-struct path_selector {
-	struct path_selector_type *type;
-	void *context;
-};
-
-/* Information about a path selector type */
-struct path_selector_type {
-	char *name;
-	struct module *module;
-
-	unsigned int table_args;
-	unsigned int info_args;
-
-	/*
-	 * Constructs a path selector object, takes custom arguments
-	 */
-	int (*create) (struct path_selector *ps, unsigned argc, char **argv);
-	void (*destroy) (struct path_selector *ps);
-
-	/*
-	 * Add an opaque path object, along with some selector specific
-	 * path args (eg, path priority).
-	 */
-	int (*add_path) (struct path_selector *ps, struct dm_path *path,
-			 int argc, char **argv, char **error);
-
-	/*
-	 * Chooses a path for this io, if no paths are available then
-	 * NULL will be returned.
-	 *
-	 * repeat_count is the number of times to use the path before
-	 * calling the function again.  0 means don't call it again unless
-	 * the path fails.
-	 */
-	struct dm_path *(*select_path) (struct path_selector *ps,
-					unsigned *repeat_count,
-					size_t nr_bytes);
-
-	/*
-	 * Notify the selector that a path has failed.
-	 */
-	void (*fail_path) (struct path_selector *ps, struct dm_path *p);
-
-	/*
-	 * Ask selector to reinstate a path.
-	 */
-	int (*reinstate_path) (struct path_selector *ps, struct dm_path *p);
-
-	/*
-	 * Table content based on parameters added in ps_add_path_fn
-	 * or path selector status
-	 */
-	int (*status) (struct path_selector *ps, struct dm_path *path,
-		       status_type_t type, char *result, unsigned int maxlen);
-
-	int (*start_io) (struct path_selector *ps, struct dm_path *path,
-			 size_t nr_bytes);
-	int (*end_io) (struct path_selector *ps, struct dm_path *path,
-		       size_t nr_bytes);
-};
-
-/* Register a path selector */
-int dm_register_path_selector(struct path_selector_type *type);
-
-/* Unregister a path selector */
-int dm_unregister_path_selector(struct path_selector_type *type);
-
-/* Returns a registered path selector type */
-struct path_selector_type *dm_get_path_selector(const char *name);
-
-/* Releases a path selector  */
-void dm_put_path_selector(struct path_selector_type *pst);
-
-#endif
diff --git a/ANDROID_3.4.5/drivers/md/dm-queue-length.c b/ANDROID_3.4.5/drivers/md/dm-queue-length.c
deleted file mode 100644
index 3941fae0..00000000
--- a/ANDROID_3.4.5/drivers/md/dm-queue-length.c
+++ /dev/null
@@ -1,264 +0,0 @@
-/*
- * Copyright (C) 2004-2005 IBM Corp.  All Rights Reserved.
- * Copyright (C) 2006-2009 NEC Corporation.
- *
- * dm-queue-length.c
- *
- * Module Author: Stefan Bader, IBM
- * Modified by: Kiyoshi Ueda, NEC
- *
- * This file is released under the GPL.
- *
- * queue-length path selector - choose a path with the least number of
- * in-flight I/Os.
- */
-
-#include "dm.h"
-#include "dm-path-selector.h"
-
-#include <linux/slab.h>
-#include <linux/ctype.h>
-#include <linux/errno.h>
-#include <linux/module.h>
-#include <linux/atomic.h>
-
-#define DM_MSG_PREFIX	"multipath queue-length"
-#define QL_MIN_IO	128
-#define QL_VERSION	"0.1.0"
-
-struct selector {
-	struct list_head	valid_paths;
-	struct list_head	failed_paths;
-};
-
-struct path_info {
-	struct list_head	list;
-	struct dm_path		*path;
-	unsigned		repeat_count;
-	atomic_t		qlen;	/* the number of in-flight I/Os */
-};
-
-static struct selector *alloc_selector(void)
-{
-	struct selector *s = kmalloc(sizeof(*s), GFP_KERNEL);
-
-	if (s) {
-		INIT_LIST_HEAD(&s->valid_paths);
-		INIT_LIST_HEAD(&s->failed_paths);
-	}
-
-	return s;
-}
-
-static int ql_create(struct path_selector *ps, unsigned argc, char **argv)
-{
-	struct selector *s = alloc_selector();
-
-	if (!s)
-		return -ENOMEM;
-
-	ps->context = s;
-	return 0;
-}
-
-static void ql_free_paths(struct list_head *paths)
-{
-	struct path_info *pi, *next;
-
-	list_for_each_entry_safe(pi, next, paths, list) {
-		list_del(&pi->list);
-		kfree(pi);
-	}
-}
-
-static void ql_destroy(struct path_selector *ps)
-{
-	struct selector *s = ps->context;
-
-	ql_free_paths(&s->valid_paths);
-	ql_free_paths(&s->failed_paths);
-	kfree(s);
-	ps->context = NULL;
-}
-
-static int ql_status(struct path_selector *ps, struct dm_path *path,
-		     status_type_t type, char *result, unsigned maxlen)
-{
-	unsigned sz = 0;
-	struct path_info *pi;
-
-	/* When called with NULL path, return selector status/args. */
-	if (!path)
-		DMEMIT("0 ");
-	else {
-		pi = path->pscontext;
-
-		switch (type) {
-		case STATUSTYPE_INFO:
-			DMEMIT("%d ", atomic_read(&pi->qlen));
-			break;
-		case STATUSTYPE_TABLE:
-			DMEMIT("%u ", pi->repeat_count);
-			break;
-		}
-	}
-
-	return sz;
-}
-
-static int ql_add_path(struct path_selector *ps, struct dm_path *path,
-		       int argc, char **argv, char **error)
-{
-	struct selector *s = ps->context;
-	struct path_info *pi;
-	unsigned repeat_count = QL_MIN_IO;
-	char dummy;
-
-	/*
-	 * Arguments: [<repeat_count>]
-	 * 	<repeat_count>: The number of I/Os before switching path.
-	 * 			If not given, default (QL_MIN_IO) is used.
-	 */
-	if (argc > 1) {
-		*error = "queue-length ps: incorrect number of arguments";
-		return -EINVAL;
-	}
-
-	if ((argc == 1) && (sscanf(argv[0], "%u%c", &repeat_count, &dummy) != 1)) {
-		*error = "queue-length ps: invalid repeat count";
-		return -EINVAL;
-	}
-
-	/* Allocate the path information structure */
-	pi = kmalloc(sizeof(*pi), GFP_KERNEL);
-	if (!pi) {
-		*error = "queue-length ps: Error allocating path information";
-		return -ENOMEM;
-	}
-
-	pi->path = path;
-	pi->repeat_count = repeat_count;
-	atomic_set(&pi->qlen, 0);
-
-	path->pscontext = pi;
-
-	list_add_tail(&pi->list, &s->valid_paths);
-
-	return 0;
-}
-
-static void ql_fail_path(struct path_selector *ps, struct dm_path *path)
-{
-	struct selector *s = ps->context;
-	struct path_info *pi = path->pscontext;
-
-	list_move(&pi->list, &s->failed_paths);
-}
-
-static int ql_reinstate_path(struct path_selector *ps, struct dm_path *path)
-{
-	struct selector *s = ps->context;
-	struct path_info *pi = path->pscontext;
-
-	list_move_tail(&pi->list, &s->valid_paths);
-
-	return 0;
-}
-
-/*
- * Select a path having the minimum number of in-flight I/Os
- */
-static struct dm_path *ql_select_path(struct path_selector *ps,
-				      unsigned *repeat_count, size_t nr_bytes)
-{
-	struct selector *s = ps->context;
-	struct path_info *pi = NULL, *best = NULL;
-
-	if (list_empty(&s->valid_paths))
-		return NULL;
-
-	/* Change preferred (first in list) path to evenly balance. */
-	list_move_tail(s->valid_paths.next, &s->valid_paths);
-
-	list_for_each_entry(pi, &s->valid_paths, list) {
-		if (!best ||
-		    (atomic_read(&pi->qlen) < atomic_read(&best->qlen)))
-			best = pi;
-
-		if (!atomic_read(&best->qlen))
-			break;
-	}
-
-	if (!best)
-		return NULL;
-
-	*repeat_count = best->repeat_count;
-
-	return best->path;
-}
-
-static int ql_start_io(struct path_selector *ps, struct dm_path *path,
-		       size_t nr_bytes)
-{
-	struct path_info *pi = path->pscontext;
-
-	atomic_inc(&pi->qlen);
-
-	return 0;
-}
-
-static int ql_end_io(struct path_selector *ps, struct dm_path *path,
-		     size_t nr_bytes)
-{
-	struct path_info *pi = path->pscontext;
-
-	atomic_dec(&pi->qlen);
-
-	return 0;
-}
-
-static struct path_selector_type ql_ps = {
-	.name		= "queue-length",
-	.module		= THIS_MODULE,
-	.table_args	= 1,
-	.info_args	= 1,
-	.create		= ql_create,
-	.destroy	= ql_destroy,
-	.status		= ql_status,
-	.add_path	= ql_add_path,
-	.fail_path	= ql_fail_path,
-	.reinstate_path	= ql_reinstate_path,
-	.select_path	= ql_select_path,
-	.start_io	= ql_start_io,
-	.end_io		= ql_end_io,
-};
-
-static int __init dm_ql_init(void)
-{
-	int r = dm_register_path_selector(&ql_ps);
-
-	if (r < 0)
-		DMERR("register failed %d", r);
-
-	DMINFO("version " QL_VERSION " loaded");
-
-	return r;
-}
-
-static void __exit dm_ql_exit(void)
-{
-	int r = dm_unregister_path_selector(&ql_ps);
-
-	if (r < 0)
-		DMERR("unregister failed %d", r);
-}
-
-module_init(dm_ql_init);
-module_exit(dm_ql_exit);
-
-MODULE_AUTHOR("Stefan Bader <Stefan.Bader at de.ibm.com>");
-MODULE_DESCRIPTION(
-	"(C) Copyright IBM Corp. 2004,2005   All Rights Reserved.\n"
-	DM_NAME " path selector to balance the number of in-flight I/Os"
-);
-MODULE_LICENSE("GPL");
diff --git a/ANDROID_3.4.5/drivers/md/dm-raid.c b/ANDROID_3.4.5/drivers/md/dm-raid.c
deleted file mode 100644
index 68965e66..00000000
--- a/ANDROID_3.4.5/drivers/md/dm-raid.c
+++ /dev/null
@@ -1,1297 +0,0 @@
-/*
- * Copyright (C) 2010-2011 Neil Brown
- * Copyright (C) 2010-2011 Red Hat, Inc. All rights reserved.
- *
- * This file is released under the GPL.
- */
-
-#include <linux/slab.h>
-#include <linux/module.h>
-
-#include "md.h"
-#include "raid1.h"
-#include "raid5.h"
-#include "bitmap.h"
-
-#include <linux/device-mapper.h>
-
-#define DM_MSG_PREFIX "raid"
-
-/*
- * The following flags are used by dm-raid.c to set up the array state.
- * They must be cleared before md_run is called.
- */
-#define FirstUse 10             /* rdev flag */
-
-struct raid_dev {
-	/*
-	 * Two DM devices, one to hold metadata and one to hold the
-	 * actual data/parity.  The reason for this is to not confuse
-	 * ti->len and give more flexibility in altering size and
-	 * characteristics.
-	 *
-	 * While it is possible for this device to be associated
-	 * with a different physical device than the data_dev, it
-	 * is intended for it to be the same.
-	 *    |--------- Physical Device ---------|
-	 *    |- meta_dev -|------ data_dev ------|
-	 */
-	struct dm_dev *meta_dev;
-	struct dm_dev *data_dev;
-	struct md_rdev rdev;
-};
-
-/*
- * Flags for rs->print_flags field.
- */
-#define DMPF_SYNC              0x1
-#define DMPF_NOSYNC            0x2
-#define DMPF_REBUILD           0x4
-#define DMPF_DAEMON_SLEEP      0x8
-#define DMPF_MIN_RECOVERY_RATE 0x10
-#define DMPF_MAX_RECOVERY_RATE 0x20
-#define DMPF_MAX_WRITE_BEHIND  0x40
-#define DMPF_STRIPE_CACHE      0x80
-#define DMPF_REGION_SIZE       0X100
-struct raid_set {
-	struct dm_target *ti;
-
-	uint32_t bitmap_loaded;
-	uint32_t print_flags;
-
-	struct mddev md;
-	struct raid_type *raid_type;
-	struct dm_target_callbacks callbacks;
-
-	struct raid_dev dev[0];
-};
-
-/* Supported raid types and properties. */
-static struct raid_type {
-	const char *name;		/* RAID algorithm. */
-	const char *descr;		/* Descriptor text for logging. */
-	const unsigned parity_devs;	/* # of parity devices. */
-	const unsigned minimal_devs;	/* minimal # of devices in set. */
-	const unsigned level;		/* RAID level. */
-	const unsigned algorithm;	/* RAID algorithm. */
-} raid_types[] = {
-	{"raid1",    "RAID1 (mirroring)",               0, 2, 1, 0 /* NONE */},
-	{"raid4",    "RAID4 (dedicated parity disk)",	1, 2, 5, ALGORITHM_PARITY_0},
-	{"raid5_la", "RAID5 (left asymmetric)",		1, 2, 5, ALGORITHM_LEFT_ASYMMETRIC},
-	{"raid5_ra", "RAID5 (right asymmetric)",	1, 2, 5, ALGORITHM_RIGHT_ASYMMETRIC},
-	{"raid5_ls", "RAID5 (left symmetric)",		1, 2, 5, ALGORITHM_LEFT_SYMMETRIC},
-	{"raid5_rs", "RAID5 (right symmetric)",		1, 2, 5, ALGORITHM_RIGHT_SYMMETRIC},
-	{"raid6_zr", "RAID6 (zero restart)",		2, 4, 6, ALGORITHM_ROTATING_ZERO_RESTART},
-	{"raid6_nr", "RAID6 (N restart)",		2, 4, 6, ALGORITHM_ROTATING_N_RESTART},
-	{"raid6_nc", "RAID6 (N continue)",		2, 4, 6, ALGORITHM_ROTATING_N_CONTINUE}
-};
-
-static struct raid_type *get_raid_type(char *name)
-{
-	int i;
-
-	for (i = 0; i < ARRAY_SIZE(raid_types); i++)
-		if (!strcmp(raid_types[i].name, name))
-			return &raid_types[i];
-
-	return NULL;
-}
-
-static struct raid_set *context_alloc(struct dm_target *ti, struct raid_type *raid_type, unsigned raid_devs)
-{
-	unsigned i;
-	struct raid_set *rs;
-	sector_t sectors_per_dev;
-
-	if (raid_devs <= raid_type->parity_devs) {
-		ti->error = "Insufficient number of devices";
-		return ERR_PTR(-EINVAL);
-	}
-
-	sectors_per_dev = ti->len;
-	if ((raid_type->level > 1) &&
-	    sector_div(sectors_per_dev, (raid_devs - raid_type->parity_devs))) {
-		ti->error = "Target length not divisible by number of data devices";
-		return ERR_PTR(-EINVAL);
-	}
-
-	rs = kzalloc(sizeof(*rs) + raid_devs * sizeof(rs->dev[0]), GFP_KERNEL);
-	if (!rs) {
-		ti->error = "Cannot allocate raid context";
-		return ERR_PTR(-ENOMEM);
-	}
-
-	mddev_init(&rs->md);
-
-	rs->ti = ti;
-	rs->raid_type = raid_type;
-	rs->md.raid_disks = raid_devs;
-	rs->md.level = raid_type->level;
-	rs->md.new_level = rs->md.level;
-	rs->md.dev_sectors = sectors_per_dev;
-	rs->md.layout = raid_type->algorithm;
-	rs->md.new_layout = rs->md.layout;
-	rs->md.delta_disks = 0;
-	rs->md.recovery_cp = 0;
-
-	for (i = 0; i < raid_devs; i++)
-		md_rdev_init(&rs->dev[i].rdev);
-
-	/*
-	 * Remaining items to be initialized by further RAID params:
-	 *  rs->md.persistent
-	 *  rs->md.external
-	 *  rs->md.chunk_sectors
-	 *  rs->md.new_chunk_sectors
-	 */
-
-	return rs;
-}
-
-static void context_free(struct raid_set *rs)
-{
-	int i;
-
-	for (i = 0; i < rs->md.raid_disks; i++) {
-		if (rs->dev[i].meta_dev)
-			dm_put_device(rs->ti, rs->dev[i].meta_dev);
-		if (rs->dev[i].rdev.sb_page)
-			put_page(rs->dev[i].rdev.sb_page);
-		rs->dev[i].rdev.sb_page = NULL;
-		rs->dev[i].rdev.sb_loaded = 0;
-		if (rs->dev[i].data_dev)
-			dm_put_device(rs->ti, rs->dev[i].data_dev);
-	}
-
-	kfree(rs);
-}
-
-/*
- * For every device we have two words
- *  <meta_dev>: meta device name or '-' if missing
- *  <data_dev>: data device name or '-' if missing
- *
- * The following are permitted:
- *    - -
- *    - <data_dev>
- *    <meta_dev> <data_dev>
- *
- * The following is not allowed:
- *    <meta_dev> -
- *
- * This code parses those words.  If there is a failure,
- * the caller must use context_free to unwind the operations.
- */
-static int dev_parms(struct raid_set *rs, char **argv)
-{
-	int i;
-	int rebuild = 0;
-	int metadata_available = 0;
-	int ret = 0;
-
-	for (i = 0; i < rs->md.raid_disks; i++, argv += 2) {
-		rs->dev[i].rdev.raid_disk = i;
-
-		rs->dev[i].meta_dev = NULL;
-		rs->dev[i].data_dev = NULL;
-
-		/*
-		 * There are no offsets, since there is a separate device
-		 * for data and metadata.
-		 */
-		rs->dev[i].rdev.data_offset = 0;
-		rs->dev[i].rdev.mddev = &rs->md;
-
-		if (strcmp(argv[0], "-")) {
-			ret = dm_get_device(rs->ti, argv[0],
-					    dm_table_get_mode(rs->ti->table),
-					    &rs->dev[i].meta_dev);
-			rs->ti->error = "RAID metadata device lookup failure";
-			if (ret)
-				return ret;
-
-			rs->dev[i].rdev.sb_page = alloc_page(GFP_KERNEL);
-			if (!rs->dev[i].rdev.sb_page)
-				return -ENOMEM;
-		}
-
-		if (!strcmp(argv[1], "-")) {
-			if (!test_bit(In_sync, &rs->dev[i].rdev.flags) &&
-			    (!rs->dev[i].rdev.recovery_offset)) {
-				rs->ti->error = "Drive designated for rebuild not specified";
-				return -EINVAL;
-			}
-
-			rs->ti->error = "No data device supplied with metadata device";
-			if (rs->dev[i].meta_dev)
-				return -EINVAL;
-
-			continue;
-		}
-
-		ret = dm_get_device(rs->ti, argv[1],
-				    dm_table_get_mode(rs->ti->table),
-				    &rs->dev[i].data_dev);
-		if (ret) {
-			rs->ti->error = "RAID device lookup failure";
-			return ret;
-		}
-
-		if (rs->dev[i].meta_dev) {
-			metadata_available = 1;
-			rs->dev[i].rdev.meta_bdev = rs->dev[i].meta_dev->bdev;
-		}
-		rs->dev[i].rdev.bdev = rs->dev[i].data_dev->bdev;
-		list_add(&rs->dev[i].rdev.same_set, &rs->md.disks);
-		if (!test_bit(In_sync, &rs->dev[i].rdev.flags))
-			rebuild++;
-	}
-
-	if (metadata_available) {
-		rs->md.external = 0;
-		rs->md.persistent = 1;
-		rs->md.major_version = 2;
-	} else if (rebuild && !rs->md.recovery_cp) {
-		/*
-		 * Without metadata, we will not be able to tell if the array
-		 * is in-sync or not - we must assume it is not.  Therefore,
-		 * it is impossible to rebuild a drive.
-		 *
-		 * Even if there is metadata, the on-disk information may
-		 * indicate that the array is not in-sync and it will then
-		 * fail at that time.
-		 *
-		 * User could specify 'nosync' option if desperate.
-		 */
-		DMERR("Unable to rebuild drive while array is not in-sync");
-		rs->ti->error = "RAID device lookup failure";
-		return -EINVAL;
-	}
-
-	return 0;
-}
-
-/*
- * validate_region_size
- * @rs
- * @region_size:  region size in sectors.  If 0, pick a size (4MiB default).
- *
- * Set rs->md.bitmap_info.chunksize (which really refers to 'region size').
- * Ensure that (ti->len/region_size < 2^21) - required by MD bitmap.
- *
- * Returns: 0 on success, -EINVAL on failure.
- */
-static int validate_region_size(struct raid_set *rs, unsigned long region_size)
-{
-	unsigned long min_region_size = rs->ti->len / (1 << 21);
-
-	if (!region_size) {
-		/*
-		 * Choose a reasonable default.  All figures in sectors.
-		 */
-		if (min_region_size > (1 << 13)) {
-			DMINFO("Choosing default region size of %lu sectors",
-			       region_size);
-			region_size = min_region_size;
-		} else {
-			DMINFO("Choosing default region size of 4MiB");
-			region_size = 1 << 13; /* sectors */
-		}
-	} else {
-		/*
-		 * Validate user-supplied value.
-		 */
-		if (region_size > rs->ti->len) {
-			rs->ti->error = "Supplied region size is too large";
-			return -EINVAL;
-		}
-
-		if (region_size < min_region_size) {
-			DMERR("Supplied region_size (%lu sectors) below minimum (%lu)",
-			      region_size, min_region_size);
-			rs->ti->error = "Supplied region size is too small";
-			return -EINVAL;
-		}
-
-		if (!is_power_of_2(region_size)) {
-			rs->ti->error = "Region size is not a power of 2";
-			return -EINVAL;
-		}
-
-		if (region_size < rs->md.chunk_sectors) {
-			rs->ti->error = "Region size is smaller than the chunk size";
-			return -EINVAL;
-		}
-	}
-
-	/*
-	 * Convert sectors to bytes.
-	 */
-	rs->md.bitmap_info.chunksize = (region_size << 9);
-
-	return 0;
-}
-
-/*
- * Possible arguments are...
- *	<chunk_size> [optional_args]
- *
- * Argument definitions
- *    <chunk_size>			The number of sectors per disk that
- *                                      will form the "stripe"
- *    [[no]sync]			Force or prevent recovery of the
- *                                      entire array
- *    [rebuild <idx>]			Rebuild the drive indicated by the index
- *    [daemon_sleep <ms>]		Time between bitmap daemon work to
- *                                      clear bits
- *    [min_recovery_rate <kB/sec/disk>]	Throttle RAID initialization
- *    [max_recovery_rate <kB/sec/disk>]	Throttle RAID initialization
- *    [write_mostly <idx>]		Indicate a write mostly drive via index
- *    [max_write_behind <sectors>]	See '-write-behind=' (man mdadm)
- *    [stripe_cache <sectors>]		Stripe cache size for higher RAIDs
- *    [region_size <sectors>]           Defines granularity of bitmap
- */
-static int parse_raid_params(struct raid_set *rs, char **argv,
-			     unsigned num_raid_params)
-{
-	unsigned i, rebuild_cnt = 0;
-	unsigned long value, region_size = 0;
-	char *key;
-
-	/*
-	 * First, parse the in-order required arguments
-	 * "chunk_size" is the only argument of this type.
-	 */
-	if ((strict_strtoul(argv[0], 10, &value) < 0)) {
-		rs->ti->error = "Bad chunk size";
-		return -EINVAL;
-	} else if (rs->raid_type->level == 1) {
-		if (value)
-			DMERR("Ignoring chunk size parameter for RAID 1");
-		value = 0;
-	} else if (!is_power_of_2(value)) {
-		rs->ti->error = "Chunk size must be a power of 2";
-		return -EINVAL;
-	} else if (value < 8) {
-		rs->ti->error = "Chunk size value is too small";
-		return -EINVAL;
-	}
-
-	rs->md.new_chunk_sectors = rs->md.chunk_sectors = value;
-	argv++;
-	num_raid_params--;
-
-	/*
-	 * We set each individual device as In_sync with a completed
-	 * 'recovery_offset'.  If there has been a device failure or
-	 * replacement then one of the following cases applies:
-	 *
-	 *   1) User specifies 'rebuild'.
-	 *      - Device is reset when param is read.
-	 *   2) A new device is supplied.
-	 *      - No matching superblock found, resets device.
-	 *   3) Device failure was transient and returns on reload.
-	 *      - Failure noticed, resets device for bitmap replay.
-	 *   4) Device hadn't completed recovery after previous failure.
-	 *      - Superblock is read and overrides recovery_offset.
-	 *
-	 * What is found in the superblocks of the devices is always
-	 * authoritative, unless 'rebuild' or '[no]sync' was specified.
-	 */
-	for (i = 0; i < rs->md.raid_disks; i++) {
-		set_bit(In_sync, &rs->dev[i].rdev.flags);
-		rs->dev[i].rdev.recovery_offset = MaxSector;
-	}
-
-	/*
-	 * Second, parse the unordered optional arguments
-	 */
-	for (i = 0; i < num_raid_params; i++) {
-		if (!strcasecmp(argv[i], "nosync")) {
-			rs->md.recovery_cp = MaxSector;
-			rs->print_flags |= DMPF_NOSYNC;
-			continue;
-		}
-		if (!strcasecmp(argv[i], "sync")) {
-			rs->md.recovery_cp = 0;
-			rs->print_flags |= DMPF_SYNC;
-			continue;
-		}
-
-		/* The rest of the optional arguments come in key/value pairs */
-		if ((i + 1) >= num_raid_params) {
-			rs->ti->error = "Wrong number of raid parameters given";
-			return -EINVAL;
-		}
-
-		key = argv[i++];
-		if (strict_strtoul(argv[i], 10, &value) < 0) {
-			rs->ti->error = "Bad numerical argument given in raid params";
-			return -EINVAL;
-		}
-
-		if (!strcasecmp(key, "rebuild")) {
-			rebuild_cnt++;
-			if (((rs->raid_type->level != 1) &&
-			     (rebuild_cnt > rs->raid_type->parity_devs)) ||
-			    ((rs->raid_type->level == 1) &&
-			     (rebuild_cnt > (rs->md.raid_disks - 1)))) {
-				rs->ti->error = "Too many rebuild devices specified for given RAID type";
-				return -EINVAL;
-			}
-			if (value > rs->md.raid_disks) {
-				rs->ti->error = "Invalid rebuild index given";
-				return -EINVAL;
-			}
-			clear_bit(In_sync, &rs->dev[value].rdev.flags);
-			rs->dev[value].rdev.recovery_offset = 0;
-			rs->print_flags |= DMPF_REBUILD;
-		} else if (!strcasecmp(key, "write_mostly")) {
-			if (rs->raid_type->level != 1) {
-				rs->ti->error = "write_mostly option is only valid for RAID1";
-				return -EINVAL;
-			}
-			if (value >= rs->md.raid_disks) {
-				rs->ti->error = "Invalid write_mostly drive index given";
-				return -EINVAL;
-			}
-			set_bit(WriteMostly, &rs->dev[value].rdev.flags);
-		} else if (!strcasecmp(key, "max_write_behind")) {
-			if (rs->raid_type->level != 1) {
-				rs->ti->error = "max_write_behind option is only valid for RAID1";
-				return -EINVAL;
-			}
-			rs->print_flags |= DMPF_MAX_WRITE_BEHIND;
-
-			/*
-			 * In device-mapper, we specify things in sectors, but
-			 * MD records this value in kB
-			 */
-			value /= 2;
-			if (value > COUNTER_MAX) {
-				rs->ti->error = "Max write-behind limit out of range";
-				return -EINVAL;
-			}
-			rs->md.bitmap_info.max_write_behind = value;
-		} else if (!strcasecmp(key, "daemon_sleep")) {
-			rs->print_flags |= DMPF_DAEMON_SLEEP;
-			if (!value || (value > MAX_SCHEDULE_TIMEOUT)) {
-				rs->ti->error = "daemon sleep period out of range";
-				return -EINVAL;
-			}
-			rs->md.bitmap_info.daemon_sleep = value;
-		} else if (!strcasecmp(key, "stripe_cache")) {
-			rs->print_flags |= DMPF_STRIPE_CACHE;
-
-			/*
-			 * In device-mapper, we specify things in sectors, but
-			 * MD records this value in kB
-			 */
-			value /= 2;
-
-			if (rs->raid_type->level < 5) {
-				rs->ti->error = "Inappropriate argument: stripe_cache";
-				return -EINVAL;
-			}
-			if (raid5_set_cache_size(&rs->md, (int)value)) {
-				rs->ti->error = "Bad stripe_cache size";
-				return -EINVAL;
-			}
-		} else if (!strcasecmp(key, "min_recovery_rate")) {
-			rs->print_flags |= DMPF_MIN_RECOVERY_RATE;
-			if (value > INT_MAX) {
-				rs->ti->error = "min_recovery_rate out of range";
-				return -EINVAL;
-			}
-			rs->md.sync_speed_min = (int)value;
-		} else if (!strcasecmp(key, "max_recovery_rate")) {
-			rs->print_flags |= DMPF_MAX_RECOVERY_RATE;
-			if (value > INT_MAX) {
-				rs->ti->error = "max_recovery_rate out of range";
-				return -EINVAL;
-			}
-			rs->md.sync_speed_max = (int)value;
-		} else if (!strcasecmp(key, "region_size")) {
-			rs->print_flags |= DMPF_REGION_SIZE;
-			region_size = value;
-		} else {
-			DMERR("Unable to parse RAID parameter: %s", key);
-			rs->ti->error = "Unable to parse RAID parameters";
-			return -EINVAL;
-		}
-	}
-
-	if (validate_region_size(rs, region_size))
-		return -EINVAL;
-
-	if (rs->md.chunk_sectors)
-		rs->ti->split_io = rs->md.chunk_sectors;
-	else
-		rs->ti->split_io = region_size;
-
-	if (rs->md.chunk_sectors)
-		rs->ti->split_io = rs->md.chunk_sectors;
-	else
-		rs->ti->split_io = region_size;
-
-	/* Assume there are no metadata devices until the drives are parsed */
-	rs->md.persistent = 0;
-	rs->md.external = 1;
-
-	return 0;
-}
-
-static void do_table_event(struct work_struct *ws)
-{
-	struct raid_set *rs = container_of(ws, struct raid_set, md.event_work);
-
-	dm_table_event(rs->ti->table);
-}
-
-static int raid_is_congested(struct dm_target_callbacks *cb, int bits)
-{
-	struct raid_set *rs = container_of(cb, struct raid_set, callbacks);
-
-	if (rs->raid_type->level == 1)
-		return md_raid1_congested(&rs->md, bits);
-
-	return md_raid5_congested(&rs->md, bits);
-}
-
-/*
- * This structure is never routinely used by userspace, unlike md superblocks.
- * Devices with this superblock should only ever be accessed via device-mapper.
- */
-#define DM_RAID_MAGIC 0x64526D44
-struct dm_raid_superblock {
-	__le32 magic;		/* "DmRd" */
-	__le32 features;	/* Used to indicate possible future changes */
-
-	__le32 num_devices;	/* Number of devices in this array. (Max 64) */
-	__le32 array_position;	/* The position of this drive in the array */
-
-	__le64 events;		/* Incremented by md when superblock updated */
-	__le64 failed_devices;	/* Bit field of devices to indicate failures */
-
-	/*
-	 * This offset tracks the progress of the repair or replacement of
-	 * an individual drive.
-	 */
-	__le64 disk_recovery_offset;
-
-	/*
-	 * This offset tracks the progress of the initial array
-	 * synchronisation/parity calculation.
-	 */
-	__le64 array_resync_offset;
-
-	/*
-	 * RAID characteristics
-	 */
-	__le32 level;
-	__le32 layout;
-	__le32 stripe_sectors;
-
-	__u8 pad[452];		/* Round struct to 512 bytes. */
-				/* Always set to 0 when writing. */
-} __packed;
-
-static int read_disk_sb(struct md_rdev *rdev, int size)
-{
-	BUG_ON(!rdev->sb_page);
-
-	if (rdev->sb_loaded)
-		return 0;
-
-	if (!sync_page_io(rdev, 0, size, rdev->sb_page, READ, 1)) {
-		DMERR("Failed to read superblock of device at position %d",
-		      rdev->raid_disk);
-		set_bit(Faulty, &rdev->flags);
-		return -EINVAL;
-	}
-
-	rdev->sb_loaded = 1;
-
-	return 0;
-}
-
-static void super_sync(struct mddev *mddev, struct md_rdev *rdev)
-{
-	struct md_rdev *r;
-	uint64_t failed_devices;
-	struct dm_raid_superblock *sb;
-
-	sb = page_address(rdev->sb_page);
-	failed_devices = le64_to_cpu(sb->failed_devices);
-
-	rdev_for_each(r, mddev)
-		if ((r->raid_disk >= 0) && test_bit(Faulty, &r->flags))
-			failed_devices |= (1ULL << r->raid_disk);
-
-	memset(sb, 0, sizeof(*sb));
-
-	sb->magic = cpu_to_le32(DM_RAID_MAGIC);
-	sb->features = cpu_to_le32(0);	/* No features yet */
-
-	sb->num_devices = cpu_to_le32(mddev->raid_disks);
-	sb->array_position = cpu_to_le32(rdev->raid_disk);
-
-	sb->events = cpu_to_le64(mddev->events);
-	sb->failed_devices = cpu_to_le64(failed_devices);
-
-	sb->disk_recovery_offset = cpu_to_le64(rdev->recovery_offset);
-	sb->array_resync_offset = cpu_to_le64(mddev->recovery_cp);
-
-	sb->level = cpu_to_le32(mddev->level);
-	sb->layout = cpu_to_le32(mddev->layout);
-	sb->stripe_sectors = cpu_to_le32(mddev->chunk_sectors);
-}
-
-/*
- * super_load
- *
- * This function creates a superblock if one is not found on the device
- * and will decide which superblock to use if there's a choice.
- *
- * Return: 1 if use rdev, 0 if use refdev, -Exxx otherwise
- */
-static int super_load(struct md_rdev *rdev, struct md_rdev *refdev)
-{
-	int ret;
-	struct dm_raid_superblock *sb;
-	struct dm_raid_superblock *refsb;
-	uint64_t events_sb, events_refsb;
-
-	rdev->sb_start = 0;
-	rdev->sb_size = sizeof(*sb);
-
-	ret = read_disk_sb(rdev, rdev->sb_size);
-	if (ret)
-		return ret;
-
-	sb = page_address(rdev->sb_page);
-
-	/*
-	 * Two cases that we want to write new superblocks and rebuild:
-	 * 1) New device (no matching magic number)
-	 * 2) Device specified for rebuild (!In_sync w/ offset == 0)
-	 */
-	if ((sb->magic != cpu_to_le32(DM_RAID_MAGIC)) ||
-	    (!test_bit(In_sync, &rdev->flags) && !rdev->recovery_offset)) {
-		super_sync(rdev->mddev, rdev);
-
-		set_bit(FirstUse, &rdev->flags);
-
-		/* Force writing of superblocks to disk */
-		set_bit(MD_CHANGE_DEVS, &rdev->mddev->flags);
-
-		/* Any superblock is better than none, choose that if given */
-		return refdev ? 0 : 1;
-	}
-
-	if (!refdev)
-		return 1;
-
-	events_sb = le64_to_cpu(sb->events);
-
-	refsb = page_address(refdev->sb_page);
-	events_refsb = le64_to_cpu(refsb->events);
-
-	return (events_sb > events_refsb) ? 1 : 0;
-}
-
-static int super_init_validation(struct mddev *mddev, struct md_rdev *rdev)
-{
-	int role;
-	struct raid_set *rs = container_of(mddev, struct raid_set, md);
-	uint64_t events_sb;
-	uint64_t failed_devices;
-	struct dm_raid_superblock *sb;
-	uint32_t new_devs = 0;
-	uint32_t rebuilds = 0;
-	struct md_rdev *r;
-	struct dm_raid_superblock *sb2;
-
-	sb = page_address(rdev->sb_page);
-	events_sb = le64_to_cpu(sb->events);
-	failed_devices = le64_to_cpu(sb->failed_devices);
-
-	/*
-	 * Initialise to 1 if this is a new superblock.
-	 */
-	mddev->events = events_sb ? : 1;
-
-	/*
-	 * Reshaping is not currently allowed
-	 */
-	if ((le32_to_cpu(sb->level) != mddev->level) ||
-	    (le32_to_cpu(sb->layout) != mddev->layout) ||
-	    (le32_to_cpu(sb->stripe_sectors) != mddev->chunk_sectors)) {
-		DMERR("Reshaping arrays not yet supported.");
-		return -EINVAL;
-	}
-
-	/* We can only change the number of devices in RAID1 right now */
-	if ((rs->raid_type->level != 1) &&
-	    (le32_to_cpu(sb->num_devices) != mddev->raid_disks)) {
-		DMERR("Reshaping arrays not yet supported.");
-		return -EINVAL;
-	}
-
-	if (!(rs->print_flags & (DMPF_SYNC | DMPF_NOSYNC)))
-		mddev->recovery_cp = le64_to_cpu(sb->array_resync_offset);
-
-	/*
-	 * During load, we set FirstUse if a new superblock was written.
-	 * There are two reasons we might not have a superblock:
-	 * 1) The array is brand new - in which case, all of the
-	 *    devices must have their In_sync bit set.  Also,
-	 *    recovery_cp must be 0, unless forced.
-	 * 2) This is a new device being added to an old array
-	 *    and the new device needs to be rebuilt - in which
-	 *    case the In_sync bit will /not/ be set and
-	 *    recovery_cp must be MaxSector.
-	 */
-	rdev_for_each(r, mddev) {
-		if (!test_bit(In_sync, &r->flags)) {
-			DMINFO("Device %d specified for rebuild: "
-			       "Clearing superblock", r->raid_disk);
-			rebuilds++;
-		} else if (test_bit(FirstUse, &r->flags))
-			new_devs++;
-	}
-
-	if (!rebuilds) {
-		if (new_devs == mddev->raid_disks) {
-			DMINFO("Superblocks created for new array");
-			set_bit(MD_ARRAY_FIRST_USE, &mddev->flags);
-		} else if (new_devs) {
-			DMERR("New device injected "
-			      "into existing array without 'rebuild' "
-			      "parameter specified");
-			return -EINVAL;
-		}
-	} else if (new_devs) {
-		DMERR("'rebuild' devices cannot be "
-		      "injected into an array with other first-time devices");
-		return -EINVAL;
-	} else if (mddev->recovery_cp != MaxSector) {
-		DMERR("'rebuild' specified while array is not in-sync");
-		return -EINVAL;
-	}
-
-	/*
-	 * Now we set the Faulty bit for those devices that are
-	 * recorded in the superblock as failed.
-	 */
-	rdev_for_each(r, mddev) {
-		if (!r->sb_page)
-			continue;
-		sb2 = page_address(r->sb_page);
-		sb2->failed_devices = 0;
-
-		/*
-		 * Check for any device re-ordering.
-		 */
-		if (!test_bit(FirstUse, &r->flags) && (r->raid_disk >= 0)) {
-			role = le32_to_cpu(sb2->array_position);
-			if (role != r->raid_disk) {
-				if (rs->raid_type->level != 1) {
-					rs->ti->error = "Cannot change device "
-						"positions in RAID array";
-					return -EINVAL;
-				}
-				DMINFO("RAID1 device #%d now at position #%d",
-				       role, r->raid_disk);
-			}
-
-			/*
-			 * Partial recovery is performed on
-			 * returning failed devices.
-			 */
-			if (failed_devices & (1 << role))
-				set_bit(Faulty, &r->flags);
-		}
-	}
-
-	return 0;
-}
-
-static int super_validate(struct mddev *mddev, struct md_rdev *rdev)
-{
-	struct dm_raid_superblock *sb = page_address(rdev->sb_page);
-
-	/*
-	 * If mddev->events is not set, we know we have not yet initialized
-	 * the array.
-	 */
-	if (!mddev->events && super_init_validation(mddev, rdev))
-		return -EINVAL;
-
-	mddev->bitmap_info.offset = 4096 >> 9; /* Enable bitmap creation */
-	rdev->mddev->bitmap_info.default_offset = 4096 >> 9;
-	if (!test_bit(FirstUse, &rdev->flags)) {
-		rdev->recovery_offset = le64_to_cpu(sb->disk_recovery_offset);
-		if (rdev->recovery_offset != MaxSector)
-			clear_bit(In_sync, &rdev->flags);
-	}
-
-	/*
-	 * If a device comes back, set it as not In_sync and no longer faulty.
-	 */
-	if (test_bit(Faulty, &rdev->flags)) {
-		clear_bit(Faulty, &rdev->flags);
-		clear_bit(In_sync, &rdev->flags);
-		rdev->saved_raid_disk = rdev->raid_disk;
-		rdev->recovery_offset = 0;
-	}
-
-	clear_bit(FirstUse, &rdev->flags);
-
-	return 0;
-}
-
-/*
- * Analyse superblocks and select the freshest.
- */
-static int analyse_superblocks(struct dm_target *ti, struct raid_set *rs)
-{
-	int ret;
-	unsigned redundancy = 0;
-	struct raid_dev *dev;
-	struct md_rdev *rdev, *tmp, *freshest;
-	struct mddev *mddev = &rs->md;
-
-	switch (rs->raid_type->level) {
-	case 1:
-		redundancy = rs->md.raid_disks - 1;
-		break;
-	case 4:
-	case 5:
-	case 6:
-		redundancy = rs->raid_type->parity_devs;
-		break;
-	default:
-		ti->error = "Unknown RAID type";
-		return -EINVAL;
-	}
-
-	freshest = NULL;
-	rdev_for_each_safe(rdev, tmp, mddev) {
-		if (!rdev->meta_bdev)
-			continue;
-
-		ret = super_load(rdev, freshest);
-
-		switch (ret) {
-		case 1:
-			freshest = rdev;
-			break;
-		case 0:
-			break;
-		default:
-			dev = container_of(rdev, struct raid_dev, rdev);
-			if (redundancy--) {
-				if (dev->meta_dev)
-					dm_put_device(ti, dev->meta_dev);
-
-				dev->meta_dev = NULL;
-				rdev->meta_bdev = NULL;
-
-				if (rdev->sb_page)
-					put_page(rdev->sb_page);
-
-				rdev->sb_page = NULL;
-
-				rdev->sb_loaded = 0;
-
-				/*
-				 * We might be able to salvage the data device
-				 * even though the meta device has failed.  For
-				 * now, we behave as though '- -' had been
-				 * set for this device in the table.
-				 */
-				if (dev->data_dev)
-					dm_put_device(ti, dev->data_dev);
-
-				dev->data_dev = NULL;
-				rdev->bdev = NULL;
-
-				list_del(&rdev->same_set);
-
-				continue;
-			}
-			ti->error = "Failed to load superblock";
-			return ret;
-		}
-	}
-
-	if (!freshest)
-		return 0;
-
-	/*
-	 * Validation of the freshest device provides the source of
-	 * validation for the remaining devices.
-	 */
-	ti->error = "Unable to assemble array: Invalid superblocks";
-	if (super_validate(mddev, freshest))
-		return -EINVAL;
-
-	rdev_for_each(rdev, mddev)
-		if ((rdev != freshest) && super_validate(mddev, rdev))
-			return -EINVAL;
-
-	return 0;
-}
-
-/*
- * Construct a RAID4/5/6 mapping:
- * Args:
- *	<raid_type> <#raid_params> <raid_params>		\
- *	<#raid_devs> { <meta_dev1> <dev1> .. <meta_devN> <devN> }
- *
- * <raid_params> varies by <raid_type>.  See 'parse_raid_params' for
- * details on possible <raid_params>.
- */
-static int raid_ctr(struct dm_target *ti, unsigned argc, char **argv)
-{
-	int ret;
-	struct raid_type *rt;
-	unsigned long num_raid_params, num_raid_devs;
-	struct raid_set *rs = NULL;
-
-	/* Must have at least <raid_type> <#raid_params> */
-	if (argc < 2) {
-		ti->error = "Too few arguments";
-		return -EINVAL;
-	}
-
-	/* raid type */
-	rt = get_raid_type(argv[0]);
-	if (!rt) {
-		ti->error = "Unrecognised raid_type";
-		return -EINVAL;
-	}
-	argc--;
-	argv++;
-
-	/* number of RAID parameters */
-	if (strict_strtoul(argv[0], 10, &num_raid_params) < 0) {
-		ti->error = "Cannot understand number of RAID parameters";
-		return -EINVAL;
-	}
-	argc--;
-	argv++;
-
-	/* Skip over RAID params for now and find out # of devices */
-	if (num_raid_params + 1 > argc) {
-		ti->error = "Arguments do not agree with counts given";
-		return -EINVAL;
-	}
-
-	if ((strict_strtoul(argv[num_raid_params], 10, &num_raid_devs) < 0) ||
-	    (num_raid_devs >= INT_MAX)) {
-		ti->error = "Cannot understand number of raid devices";
-		return -EINVAL;
-	}
-
-	rs = context_alloc(ti, rt, (unsigned)num_raid_devs);
-	if (IS_ERR(rs))
-		return PTR_ERR(rs);
-
-	ret = parse_raid_params(rs, argv, (unsigned)num_raid_params);
-	if (ret)
-		goto bad;
-
-	ret = -EINVAL;
-
-	argc -= num_raid_params + 1; /* +1: we already have num_raid_devs */
-	argv += num_raid_params + 1;
-
-	if (argc != (num_raid_devs * 2)) {
-		ti->error = "Supplied RAID devices does not match the count given";
-		goto bad;
-	}
-
-	ret = dev_parms(rs, argv);
-	if (ret)
-		goto bad;
-
-	rs->md.sync_super = super_sync;
-	ret = analyse_superblocks(ti, rs);
-	if (ret)
-		goto bad;
-
-	INIT_WORK(&rs->md.event_work, do_table_event);
-	ti->private = rs;
-	ti->num_flush_requests = 1;
-
-	mutex_lock(&rs->md.reconfig_mutex);
-	ret = md_run(&rs->md);
-	rs->md.in_sync = 0; /* Assume already marked dirty */
-	mutex_unlock(&rs->md.reconfig_mutex);
-
-	if (ret) {
-		ti->error = "Fail to run raid array";
-		goto bad;
-	}
-
-	rs->callbacks.congested_fn = raid_is_congested;
-	dm_table_add_target_callbacks(ti->table, &rs->callbacks);
-
-	mddev_suspend(&rs->md);
-	return 0;
-
-bad:
-	context_free(rs);
-
-	return ret;
-}
-
-static void raid_dtr(struct dm_target *ti)
-{
-	struct raid_set *rs = ti->private;
-
-	list_del_init(&rs->callbacks.list);
-	md_stop(&rs->md);
-	context_free(rs);
-}
-
-static int raid_map(struct dm_target *ti, struct bio *bio, union map_info *map_context)
-{
-	struct raid_set *rs = ti->private;
-	struct mddev *mddev = &rs->md;
-
-	mddev->pers->make_request(mddev, bio);
-
-	return DM_MAPIO_SUBMITTED;
-}
-
-static int raid_status(struct dm_target *ti, status_type_t type,
-		       char *result, unsigned maxlen)
-{
-	struct raid_set *rs = ti->private;
-	unsigned raid_param_cnt = 1; /* at least 1 for chunksize */
-	unsigned sz = 0;
-	int i, array_in_sync = 0;
-	sector_t sync;
-
-	switch (type) {
-	case STATUSTYPE_INFO:
-		DMEMIT("%s %d ", rs->raid_type->name, rs->md.raid_disks);
-
-		if (test_bit(MD_RECOVERY_RUNNING, &rs->md.recovery))
-			sync = rs->md.curr_resync_completed;
-		else
-			sync = rs->md.recovery_cp;
-
-		if (sync >= rs->md.resync_max_sectors) {
-			array_in_sync = 1;
-			sync = rs->md.resync_max_sectors;
-		} else {
-			/*
-			 * The array may be doing an initial sync, or it may
-			 * be rebuilding individual components.  If all the
-			 * devices are In_sync, then it is the array that is
-			 * being initialized.
-			 */
-			for (i = 0; i < rs->md.raid_disks; i++)
-				if (!test_bit(In_sync, &rs->dev[i].rdev.flags))
-					array_in_sync = 1;
-		}
-		/*
-		 * Status characters:
-		 *  'D' = Dead/Failed device
-		 *  'a' = Alive but not in-sync
-		 *  'A' = Alive and in-sync
-		 */
-		for (i = 0; i < rs->md.raid_disks; i++) {
-			if (test_bit(Faulty, &rs->dev[i].rdev.flags))
-				DMEMIT("D");
-			else if (!array_in_sync ||
-				 !test_bit(In_sync, &rs->dev[i].rdev.flags))
-				DMEMIT("a");
-			else
-				DMEMIT("A");
-		}
-
-		/*
-		 * In-sync ratio:
-		 *  The in-sync ratio shows the progress of:
-		 *   - Initializing the array
-		 *   - Rebuilding a subset of devices of the array
-		 *  The user can distinguish between the two by referring
-		 *  to the status characters.
-		 */
-		DMEMIT(" %llu/%llu",
-		       (unsigned long long) sync,
-		       (unsigned long long) rs->md.resync_max_sectors);
-
-		break;
-	case STATUSTYPE_TABLE:
-		/* The string you would use to construct this array */
-		for (i = 0; i < rs->md.raid_disks; i++) {
-			if ((rs->print_flags & DMPF_REBUILD) &&
-			    rs->dev[i].data_dev &&
-			    !test_bit(In_sync, &rs->dev[i].rdev.flags))
-				raid_param_cnt += 2; /* for rebuilds */
-			if (rs->dev[i].data_dev &&
-			    test_bit(WriteMostly, &rs->dev[i].rdev.flags))
-				raid_param_cnt += 2;
-		}
-
-		raid_param_cnt += (hweight32(rs->print_flags & ~DMPF_REBUILD) * 2);
-		if (rs->print_flags & (DMPF_SYNC | DMPF_NOSYNC))
-			raid_param_cnt--;
-
-		DMEMIT("%s %u %u", rs->raid_type->name,
-		       raid_param_cnt, rs->md.chunk_sectors);
-
-		if ((rs->print_flags & DMPF_SYNC) &&
-		    (rs->md.recovery_cp == MaxSector))
-			DMEMIT(" sync");
-		if (rs->print_flags & DMPF_NOSYNC)
-			DMEMIT(" nosync");
-
-		for (i = 0; i < rs->md.raid_disks; i++)
-			if ((rs->print_flags & DMPF_REBUILD) &&
-			    rs->dev[i].data_dev &&
-			    !test_bit(In_sync, &rs->dev[i].rdev.flags))
-				DMEMIT(" rebuild %u", i);
-
-		if (rs->print_flags & DMPF_DAEMON_SLEEP)
-			DMEMIT(" daemon_sleep %lu",
-			       rs->md.bitmap_info.daemon_sleep);
-
-		if (rs->print_flags & DMPF_MIN_RECOVERY_RATE)
-			DMEMIT(" min_recovery_rate %d", rs->md.sync_speed_min);
-
-		if (rs->print_flags & DMPF_MAX_RECOVERY_RATE)
-			DMEMIT(" max_recovery_rate %d", rs->md.sync_speed_max);
-
-		for (i = 0; i < rs->md.raid_disks; i++)
-			if (rs->dev[i].data_dev &&
-			    test_bit(WriteMostly, &rs->dev[i].rdev.flags))
-				DMEMIT(" write_mostly %u", i);
-
-		if (rs->print_flags & DMPF_MAX_WRITE_BEHIND)
-			DMEMIT(" max_write_behind %lu",
-			       rs->md.bitmap_info.max_write_behind);
-
-		if (rs->print_flags & DMPF_STRIPE_CACHE) {
-			struct r5conf *conf = rs->md.private;
-
-			/* convert from kiB to sectors */
-			DMEMIT(" stripe_cache %d",
-			       conf ? conf->max_nr_stripes * 2 : 0);
-		}
-
-		if (rs->print_flags & DMPF_REGION_SIZE)
-			DMEMIT(" region_size %lu",
-			       rs->md.bitmap_info.chunksize >> 9);
-
-		DMEMIT(" %d", rs->md.raid_disks);
-		for (i = 0; i < rs->md.raid_disks; i++) {
-			if (rs->dev[i].meta_dev)
-				DMEMIT(" %s", rs->dev[i].meta_dev->name);
-			else
-				DMEMIT(" -");
-
-			if (rs->dev[i].data_dev)
-				DMEMIT(" %s", rs->dev[i].data_dev->name);
-			else
-				DMEMIT(" -");
-		}
-	}
-
-	return 0;
-}
-
-static int raid_iterate_devices(struct dm_target *ti, iterate_devices_callout_fn fn, void *data)
-{
-	struct raid_set *rs = ti->private;
-	unsigned i;
-	int ret = 0;
-
-	for (i = 0; !ret && i < rs->md.raid_disks; i++)
-		if (rs->dev[i].data_dev)
-			ret = fn(ti,
-				 rs->dev[i].data_dev,
-				 0, /* No offset on data devs */
-				 rs->md.dev_sectors,
-				 data);
-
-	return ret;
-}
-
-static void raid_io_hints(struct dm_target *ti, struct queue_limits *limits)
-{
-	struct raid_set *rs = ti->private;
-	unsigned chunk_size = rs->md.chunk_sectors << 9;
-	struct r5conf *conf = rs->md.private;
-
-	blk_limits_io_min(limits, chunk_size);
-	blk_limits_io_opt(limits, chunk_size * (conf->raid_disks - conf->max_degraded));
-}
-
-static void raid_presuspend(struct dm_target *ti)
-{
-	struct raid_set *rs = ti->private;
-
-	md_stop_writes(&rs->md);
-}
-
-static void raid_postsuspend(struct dm_target *ti)
-{
-	struct raid_set *rs = ti->private;
-
-	mddev_suspend(&rs->md);
-}
-
-static void raid_resume(struct dm_target *ti)
-{
-	struct raid_set *rs = ti->private;
-
-	if (!rs->bitmap_loaded) {
-		bitmap_load(&rs->md);
-		rs->bitmap_loaded = 1;
-	} else
-		md_wakeup_thread(rs->md.thread);
-
-	mddev_resume(&rs->md);
-}
-
-static struct target_type raid_target = {
-	.name = "raid",
-	.version = {1, 2, 0},
-	.module = THIS_MODULE,
-	.ctr = raid_ctr,
-	.dtr = raid_dtr,
-	.map = raid_map,
-	.status = raid_status,
-	.iterate_devices = raid_iterate_devices,
-	.io_hints = raid_io_hints,
-	.presuspend = raid_presuspend,
-	.postsuspend = raid_postsuspend,
-	.resume = raid_resume,
-};
-
-static int __init dm_raid_init(void)
-{
-	return dm_register_target(&raid_target);
-}
-
-static void __exit dm_raid_exit(void)
-{
-	dm_unregister_target(&raid_target);
-}
-
-module_init(dm_raid_init);
-module_exit(dm_raid_exit);
-
-MODULE_DESCRIPTION(DM_NAME " raid4/5/6 target");
-MODULE_ALIAS("dm-raid4");
-MODULE_ALIAS("dm-raid5");
-MODULE_ALIAS("dm-raid6");
-MODULE_AUTHOR("Neil Brown <dm-devel@redhat.com>");
-MODULE_LICENSE("GPL");
diff --git a/ANDROID_3.4.5/drivers/md/dm-raid1.c b/ANDROID_3.4.5/drivers/md/dm-raid1.c
deleted file mode 100644
index d039de83..00000000
--- a/ANDROID_3.4.5/drivers/md/dm-raid1.c
+++ /dev/null
@@ -1,1470 +0,0 @@
-/*
- * Copyright (C) 2003 Sistina Software Limited.
- * Copyright (C) 2005-2008 Red Hat, Inc. All rights reserved.
- *
- * This file is released under the GPL.
- */
-
-#include "dm-bio-record.h"
-
-#include <linux/init.h>
-#include <linux/mempool.h>
-#include <linux/module.h>
-#include <linux/pagemap.h>
-#include <linux/slab.h>
-#include <linux/workqueue.h>
-#include <linux/device-mapper.h>
-#include <linux/dm-io.h>
-#include <linux/dm-dirty-log.h>
-#include <linux/dm-kcopyd.h>
-#include <linux/dm-region-hash.h>
-
-#define DM_MSG_PREFIX "raid1"
-
-#define MAX_RECOVERY 1	/* Maximum number of regions recovered in parallel. */
-
-#define DM_RAID1_HANDLE_ERRORS 0x01
-#define errors_handled(p)	((p)->features & DM_RAID1_HANDLE_ERRORS)
-
-static DECLARE_WAIT_QUEUE_HEAD(_kmirrord_recovery_stopped);
-
-/*-----------------------------------------------------------------
- * Mirror set structures.
- *---------------------------------------------------------------*/
-enum dm_raid1_error {
-	DM_RAID1_WRITE_ERROR,
-	DM_RAID1_FLUSH_ERROR,
-	DM_RAID1_SYNC_ERROR,
-	DM_RAID1_READ_ERROR
-};
-
-struct mirror {
-	struct mirror_set *ms;
-	atomic_t error_count;
-	unsigned long error_type;
-	struct dm_dev *dev;
-	sector_t offset;
-};
-
-struct mirror_set {
-	struct dm_target *ti;
-	struct list_head list;
-
-	uint64_t features;
-
-	spinlock_t lock;	/* protects the lists */
-	struct bio_list reads;
-	struct bio_list writes;
-	struct bio_list failures;
-	struct bio_list holds;	/* bios are waiting until suspend */
-
-	struct dm_region_hash *rh;
-	struct dm_kcopyd_client *kcopyd_client;
-	struct dm_io_client *io_client;
-	mempool_t *read_record_pool;
-
-	/* recovery */
-	region_t nr_regions;
-	int in_sync;
-	int log_failure;
-	int leg_failure;
-	atomic_t suspend;
-
-	atomic_t default_mirror;	/* Default mirror */
-
-	struct workqueue_struct *kmirrord_wq;
-	struct work_struct kmirrord_work;
-	struct timer_list timer;
-	unsigned long timer_pending;
-
-	struct work_struct trigger_event;
-
-	unsigned nr_mirrors;
-	struct mirror mirror[0];
-};
-
-static void wakeup_mirrord(void *context)
-{
-	struct mirror_set *ms = context;
-
-	queue_work(ms->kmirrord_wq, &ms->kmirrord_work);
-}
-
-static void delayed_wake_fn(unsigned long data)
-{
-	struct mirror_set *ms = (struct mirror_set *) data;
-
-	clear_bit(0, &ms->timer_pending);
-	wakeup_mirrord(ms);
-}
-
-static void delayed_wake(struct mirror_set *ms)
-{
-	if (test_and_set_bit(0, &ms->timer_pending))
-		return;
-
-	ms->timer.expires = jiffies + HZ / 5;
-	ms->timer.data = (unsigned long) ms;
-	ms->timer.function = delayed_wake_fn;
-	add_timer(&ms->timer);
-}
-
-static void wakeup_all_recovery_waiters(void *context)
-{
-	wake_up_all(&_kmirrord_recovery_stopped);
-}
-
-static void queue_bio(struct mirror_set *ms, struct bio *bio, int rw)
-{
-	unsigned long flags;
-	int should_wake = 0;
-	struct bio_list *bl;
-
-	bl = (rw == WRITE) ? &ms->writes : &ms->reads;
-	spin_lock_irqsave(&ms->lock, flags);
-	should_wake = !(bl->head);
-	bio_list_add(bl, bio);
-	spin_unlock_irqrestore(&ms->lock, flags);
-
-	if (should_wake)
-		wakeup_mirrord(ms);
-}
-
-static void dispatch_bios(void *context, struct bio_list *bio_list)
-{
-	struct mirror_set *ms = context;
-	struct bio *bio;
-
-	while ((bio = bio_list_pop(bio_list)))
-		queue_bio(ms, bio, WRITE);
-}
-
-#define MIN_READ_RECORDS 20
-struct dm_raid1_read_record {
-	struct mirror *m;
-	struct dm_bio_details details;
-};
-
-static struct kmem_cache *_dm_raid1_read_record_cache;
-
-/*
- * Every mirror should look like this one.
- */
-#define DEFAULT_MIRROR 0
-
-/*
- * This is yucky.  We squirrel the mirror struct away inside
- * bi_next for read/write buffers.  This is safe since the bh
- * doesn't get submitted to the lower levels of block layer.
- */
-static struct mirror *bio_get_m(struct bio *bio)
-{
-	return (struct mirror *) bio->bi_next;
-}
-
-static void bio_set_m(struct bio *bio, struct mirror *m)
-{
-	bio->bi_next = (struct bio *) m;
-}
-
-static struct mirror *get_default_mirror(struct mirror_set *ms)
-{
-	return &ms->mirror[atomic_read(&ms->default_mirror)];
-}
-
-static void set_default_mirror(struct mirror *m)
-{
-	struct mirror_set *ms = m->ms;
-	struct mirror *m0 = &(ms->mirror[0]);
-
-	atomic_set(&ms->default_mirror, m - m0);
-}
-
-static struct mirror *get_valid_mirror(struct mirror_set *ms)
-{
-	struct mirror *m;
-
-	for (m = ms->mirror; m < ms->mirror + ms->nr_mirrors; m++)
-		if (!atomic_read(&m->error_count))
-			return m;
-
-	return NULL;
-}
-
-/* fail_mirror
- * @m: mirror device to fail
- * @error_type: one of the enum's, DM_RAID1_*_ERROR
- *
- * If errors are being handled, record the type of
- * error encountered for this device.  If this type
- * of error has already been recorded, we can return;
- * otherwise, we must signal userspace by triggering
- * an event.  Additionally, if the device is the
- * primary device, we must choose a new primary, but
- * only if the mirror is in-sync.
- *
- * This function must not block.
- */
-static void fail_mirror(struct mirror *m, enum dm_raid1_error error_type)
-{
-	struct mirror_set *ms = m->ms;
-	struct mirror *new;
-
-	ms->leg_failure = 1;
-
-	/*
-	 * error_count is used for nothing more than a
-	 * simple way to tell if a device has encountered
-	 * errors.
-	 */
-	atomic_inc(&m->error_count);
-
-	if (test_and_set_bit(error_type, &m->error_type))
-		return;
-
-	if (!errors_handled(ms))
-		return;
-
-	if (m != get_default_mirror(ms))
-		goto out;
-
-	if (!ms->in_sync) {
-		/*
-		 * Better to issue requests to same failing device
-		 * than to risk returning corrupt data.
-		 */
-		DMERR("Primary mirror (%s) failed while out-of-sync: "
-		      "Reads may fail.", m->dev->name);
-		goto out;
-	}
-
-	new = get_valid_mirror(ms);
-	if (new)
-		set_default_mirror(new);
-	else
-		DMWARN("All sides of mirror have failed.");
-
-out:
-	schedule_work(&ms->trigger_event);
-}
-
-static int mirror_flush(struct dm_target *ti)
-{
-	struct mirror_set *ms = ti->private;
-	unsigned long error_bits;
-
-	unsigned int i;
-	struct dm_io_region io[ms->nr_mirrors];
-	struct mirror *m;
-	struct dm_io_request io_req = {
-		.bi_rw = WRITE_FLUSH,
-		.mem.type = DM_IO_KMEM,
-		.mem.ptr.addr = NULL,
-		.client = ms->io_client,
-	};
-
-	for (i = 0, m = ms->mirror; i < ms->nr_mirrors; i++, m++) {
-		io[i].bdev = m->dev->bdev;
-		io[i].sector = 0;
-		io[i].count = 0;
-	}
-
-	error_bits = -1;
-	dm_io(&io_req, ms->nr_mirrors, io, &error_bits);
-	if (unlikely(error_bits != 0)) {
-		for (i = 0; i < ms->nr_mirrors; i++)
-			if (test_bit(i, &error_bits))
-				fail_mirror(ms->mirror + i,
-					    DM_RAID1_FLUSH_ERROR);
-		return -EIO;
-	}
-
-	return 0;
-}
-
-/*-----------------------------------------------------------------
- * Recovery.
- *
- * When a mirror is first activated we may find that some regions
- * are in the no-sync state.  We have to recover these by
- * recopying from the default mirror to all the others.
- *---------------------------------------------------------------*/
-static void recovery_complete(int read_err, unsigned long write_err,
-			      void *context)
-{
-	struct dm_region *reg = context;
-	struct mirror_set *ms = dm_rh_region_context(reg);
-	int m, bit = 0;
-
-	if (read_err) {
-		/* Read error means the failure of default mirror. */
-		DMERR_LIMIT("Unable to read primary mirror during recovery");
-		fail_mirror(get_default_mirror(ms), DM_RAID1_SYNC_ERROR);
-	}
-
-	if (write_err) {
-		DMERR_LIMIT("Write error during recovery (error = 0x%lx)",
-			    write_err);
-		/*
-		 * Bits correspond to devices (excluding default mirror).
-		 * The default mirror cannot change during recovery.
-		 */
-		for (m = 0; m < ms->nr_mirrors; m++) {
-			if (&ms->mirror[m] == get_default_mirror(ms))
-				continue;
-			if (test_bit(bit, &write_err))
-				fail_mirror(ms->mirror + m,
-					    DM_RAID1_SYNC_ERROR);
-			bit++;
-		}
-	}
-
-	dm_rh_recovery_end(reg, !(read_err || write_err));
-}
-
-static int recover(struct mirror_set *ms, struct dm_region *reg)
-{
-	int r;
-	unsigned i;
-	struct dm_io_region from, to[DM_KCOPYD_MAX_REGIONS], *dest;
-	struct mirror *m;
-	unsigned long flags = 0;
-	region_t key = dm_rh_get_region_key(reg);
-	sector_t region_size = dm_rh_get_region_size(ms->rh);
-
-	/* fill in the source */
-	m = get_default_mirror(ms);
-	from.bdev = m->dev->bdev;
-	from.sector = m->offset + dm_rh_region_to_sector(ms->rh, key);
-	if (key == (ms->nr_regions - 1)) {
-		/*
-		 * The final region may be smaller than
-		 * region_size.
-		 */
-		from.count = ms->ti->len & (region_size - 1);
-		if (!from.count)
-			from.count = region_size;
-	} else
-		from.count = region_size;
-
-	/* fill in the destinations */
-	for (i = 0, dest = to; i < ms->nr_mirrors; i++) {
-		if (&ms->mirror[i] == get_default_mirror(ms))
-			continue;
-
-		m = ms->mirror + i;
-		dest->bdev = m->dev->bdev;
-		dest->sector = m->offset + dm_rh_region_to_sector(ms->rh, key);
-		dest->count = from.count;
-		dest++;
-	}
-
-	/* hand to kcopyd */
-	if (!errors_handled(ms))
-		set_bit(DM_KCOPYD_IGNORE_ERROR, &flags);
-
-	r = dm_kcopyd_copy(ms->kcopyd_client, &from, ms->nr_mirrors - 1, to,
-			   flags, recovery_complete, reg);
-
-	return r;
-}
-
-static void do_recovery(struct mirror_set *ms)
-{
-	struct dm_region *reg;
-	struct dm_dirty_log *log = dm_rh_dirty_log(ms->rh);
-	int r;
-
-	/*
-	 * Start quiescing some regions.
-	 */
-	dm_rh_recovery_prepare(ms->rh);
-
-	/*
-	 * Copy any already quiesced regions.
-	 */
-	while ((reg = dm_rh_recovery_start(ms->rh))) {
-		r = recover(ms, reg);
-		if (r)
-			dm_rh_recovery_end(reg, 0);
-	}
-
-	/*
-	 * Update the in sync flag.
-	 */
-	if (!ms->in_sync &&
-	    (log->type->get_sync_count(log) == ms->nr_regions)) {
-		/* the sync is complete */
-		dm_table_event(ms->ti->table);
-		ms->in_sync = 1;
-	}
-}
-
-/*-----------------------------------------------------------------
- * Reads
- *---------------------------------------------------------------*/
-static struct mirror *choose_mirror(struct mirror_set *ms, sector_t sector)
-{
-	struct mirror *m = get_default_mirror(ms);
-
-	do {
-		if (likely(!atomic_read(&m->error_count)))
-			return m;
-
-		if (m-- == ms->mirror)
-			m += ms->nr_mirrors;
-	} while (m != get_default_mirror(ms));
-
-	return NULL;
-}
-
-static int default_ok(struct mirror *m)
-{
-	struct mirror *default_mirror = get_default_mirror(m->ms);
-
-	return !atomic_read(&default_mirror->error_count);
-}
-
-static int mirror_available(struct mirror_set *ms, struct bio *bio)
-{
-	struct dm_dirty_log *log = dm_rh_dirty_log(ms->rh);
-	region_t region = dm_rh_bio_to_region(ms->rh, bio);
-
-	if (log->type->in_sync(log, region, 0))
-		return choose_mirror(ms,  bio->bi_sector) ? 1 : 0;
-
-	return 0;
-}
-
-/*
- * remap a buffer to a particular mirror.
- */
-static sector_t map_sector(struct mirror *m, struct bio *bio)
-{
-	if (unlikely(!bio->bi_size))
-		return 0;
-	return m->offset + dm_target_offset(m->ms->ti, bio->bi_sector);
-}
-
-static void map_bio(struct mirror *m, struct bio *bio)
-{
-	bio->bi_bdev = m->dev->bdev;
-	bio->bi_sector = map_sector(m, bio);
-}
-
-static void map_region(struct dm_io_region *io, struct mirror *m,
-		       struct bio *bio)
-{
-	io->bdev = m->dev->bdev;
-	io->sector = map_sector(m, bio);
-	io->count = bio->bi_size >> 9;
-}
-
-static void hold_bio(struct mirror_set *ms, struct bio *bio)
-{
-	/*
-	 * Lock is required to avoid race condition during suspend
-	 * process.
-	 */
-	spin_lock_irq(&ms->lock);
-
-	if (atomic_read(&ms->suspend)) {
-		spin_unlock_irq(&ms->lock);
-
-		/*
-		 * If device is suspended, complete the bio.
-		 */
-		if (dm_noflush_suspending(ms->ti))
-			bio_endio(bio, DM_ENDIO_REQUEUE);
-		else
-			bio_endio(bio, -EIO);
-		return;
-	}
-
-	/*
-	 * Hold bio until the suspend is complete.
-	 */
-	bio_list_add(&ms->holds, bio);
-	spin_unlock_irq(&ms->lock);
-}
-
-/*-----------------------------------------------------------------
- * Reads
- *---------------------------------------------------------------*/
-static void read_callback(unsigned long error, void *context)
-{
-	struct bio *bio = context;
-	struct mirror *m;
-
-	m = bio_get_m(bio);
-	bio_set_m(bio, NULL);
-
-	if (likely(!error)) {
-		bio_endio(bio, 0);
-		return;
-	}
-
-	fail_mirror(m, DM_RAID1_READ_ERROR);
-
-	if (likely(default_ok(m)) || mirror_available(m->ms, bio)) {
-		DMWARN_LIMIT("Read failure on mirror device %s.  "
-			     "Trying alternative device.",
-			     m->dev->name);
-		queue_bio(m->ms, bio, bio_rw(bio));
-		return;
-	}
-
-	DMERR_LIMIT("Read failure on mirror device %s.  Failing I/O.",
-		    m->dev->name);
-	bio_endio(bio, -EIO);
-}
-
-/* Asynchronous read. */
-static void read_async_bio(struct mirror *m, struct bio *bio)
-{
-	struct dm_io_region io;
-	struct dm_io_request io_req = {
-		.bi_rw = READ,
-		.mem.type = DM_IO_BVEC,
-		.mem.ptr.bvec = bio->bi_io_vec + bio->bi_idx,
-		.notify.fn = read_callback,
-		.notify.context = bio,
-		.client = m->ms->io_client,
-	};
-
-	map_region(&io, m, bio);
-	bio_set_m(bio, m);
-	BUG_ON(dm_io(&io_req, 1, &io, NULL));
-}
-
-static inline int region_in_sync(struct mirror_set *ms, region_t region,
-				 int may_block)
-{
-	int state = dm_rh_get_state(ms->rh, region, may_block);
-	return state == DM_RH_CLEAN || state == DM_RH_DIRTY;
-}
-
-static void do_reads(struct mirror_set *ms, struct bio_list *reads)
-{
-	region_t region;
-	struct bio *bio;
-	struct mirror *m;
-
-	while ((bio = bio_list_pop(reads))) {
-		region = dm_rh_bio_to_region(ms->rh, bio);
-		m = get_default_mirror(ms);
-
-		/*
-		 * We can only read balance if the region is in sync.
-		 */
-		if (likely(region_in_sync(ms, region, 1)))
-			m = choose_mirror(ms, bio->bi_sector);
-		else if (m && atomic_read(&m->error_count))
-			m = NULL;
-
-		if (likely(m))
-			read_async_bio(m, bio);
-		else
-			bio_endio(bio, -EIO);
-	}
-}
-
-/*-----------------------------------------------------------------
- * Writes.
- *
- * We do different things with the write io depending on the
- * state of the region that it's in:
- *
- * SYNC: 	increment pending, use kcopyd to write to *all* mirrors
- * RECOVERING:	delay the io until recovery completes
- * NOSYNC:	increment pending, just write to the default mirror
- *---------------------------------------------------------------*/
-
-
-static void write_callback(unsigned long error, void *context)
-{
-	unsigned i, ret = 0;
-	struct bio *bio = (struct bio *) context;
-	struct mirror_set *ms;
-	int should_wake = 0;
-	unsigned long flags;
-
-	ms = bio_get_m(bio)->ms;
-	bio_set_m(bio, NULL);
-
-	/*
-	 * NOTE: We don't decrement the pending count here,
-	 * instead it is done by the targets endio function.
-	 * This way we handle both writes to SYNC and NOSYNC
-	 * regions with the same code.
-	 */
-	if (likely(!error)) {
-		bio_endio(bio, ret);
-		return;
-	}
-
-	for (i = 0; i < ms->nr_mirrors; i++)
-		if (test_bit(i, &error))
-			fail_mirror(ms->mirror + i, DM_RAID1_WRITE_ERROR);
-
-	/*
-	 * Need to raise event.  Since raising
-	 * events can block, we need to do it in
-	 * the main thread.
-	 */
-	spin_lock_irqsave(&ms->lock, flags);
-	if (!ms->failures.head)
-		should_wake = 1;
-	bio_list_add(&ms->failures, bio);
-	spin_unlock_irqrestore(&ms->lock, flags);
-	if (should_wake)
-		wakeup_mirrord(ms);
-}
-
-static void do_write(struct mirror_set *ms, struct bio *bio)
-{
-	unsigned int i;
-	struct dm_io_region io[ms->nr_mirrors], *dest = io;
-	struct mirror *m;
-	struct dm_io_request io_req = {
-		.bi_rw = WRITE | (bio->bi_rw & WRITE_FLUSH_FUA),
-		.mem.type = DM_IO_BVEC,
-		.mem.ptr.bvec = bio->bi_io_vec + bio->bi_idx,
-		.notify.fn = write_callback,
-		.notify.context = bio,
-		.client = ms->io_client,
-	};
-
-	if (bio->bi_rw & REQ_DISCARD) {
-		io_req.bi_rw |= REQ_DISCARD;
-		io_req.mem.type = DM_IO_KMEM;
-		io_req.mem.ptr.addr = NULL;
-	}
-
-	for (i = 0, m = ms->mirror; i < ms->nr_mirrors; i++, m++)
-		map_region(dest++, m, bio);
-
-	/*
-	 * Use default mirror because we only need it to retrieve the reference
-	 * to the mirror set in write_callback().
-	 */
-	bio_set_m(bio, get_default_mirror(ms));
-
-	BUG_ON(dm_io(&io_req, ms->nr_mirrors, io, NULL));
-}
-
-static void do_writes(struct mirror_set *ms, struct bio_list *writes)
-{
-	int state;
-	struct bio *bio;
-	struct bio_list sync, nosync, recover, *this_list = NULL;
-	struct bio_list requeue;
-	struct dm_dirty_log *log = dm_rh_dirty_log(ms->rh);
-	region_t region;
-
-	if (!writes->head)
-		return;
-
-	/*
-	 * Classify each write.
-	 */
-	bio_list_init(&sync);
-	bio_list_init(&nosync);
-	bio_list_init(&recover);
-	bio_list_init(&requeue);
-
-	while ((bio = bio_list_pop(writes))) {
-		if ((bio->bi_rw & REQ_FLUSH) ||
-		    (bio->bi_rw & REQ_DISCARD)) {
-			bio_list_add(&sync, bio);
-			continue;
-		}
-
-		region = dm_rh_bio_to_region(ms->rh, bio);
-
-		if (log->type->is_remote_recovering &&
-		    log->type->is_remote_recovering(log, region)) {
-			bio_list_add(&requeue, bio);
-			continue;
-		}
-
-		state = dm_rh_get_state(ms->rh, region, 1);
-		switch (state) {
-		case DM_RH_CLEAN:
-		case DM_RH_DIRTY:
-			this_list = &sync;
-			break;
-
-		case DM_RH_NOSYNC:
-			this_list = &nosync;
-			break;
-
-		case DM_RH_RECOVERING:
-			this_list = &recover;
-			break;
-		}
-
-		bio_list_add(this_list, bio);
-	}
-
-	/*
-	 * Add bios that are delayed due to remote recovery
-	 * back on to the write queue
-	 */
-	if (unlikely(requeue.head)) {
-		spin_lock_irq(&ms->lock);
-		bio_list_merge(&ms->writes, &requeue);
-		spin_unlock_irq(&ms->lock);
-		delayed_wake(ms);
-	}
-
-	/*
-	 * Increment the pending counts for any regions that will
-	 * be written to (writes to recover regions are going to
-	 * be delayed).
-	 */
-	dm_rh_inc_pending(ms->rh, &sync);
-	dm_rh_inc_pending(ms->rh, &nosync);
-
-	/*
-	 * If the flush fails on a previous call and succeeds here,
-	 * we must not reset the log_failure variable.  We need
-	 * userspace interaction to do that.
-	 */
-	ms->log_failure = dm_rh_flush(ms->rh) ? 1 : ms->log_failure;
-
-	/*
-	 * Dispatch io.
-	 */
-	if (unlikely(ms->log_failure) && errors_handled(ms)) {
-		spin_lock_irq(&ms->lock);
-		bio_list_merge(&ms->failures, &sync);
-		spin_unlock_irq(&ms->lock);
-		wakeup_mirrord(ms);
-	} else
-		while ((bio = bio_list_pop(&sync)))
-			do_write(ms, bio);
-
-	while ((bio = bio_list_pop(&recover)))
-		dm_rh_delay(ms->rh, bio);
-
-	while ((bio = bio_list_pop(&nosync))) {
-		if (unlikely(ms->leg_failure) && errors_handled(ms)) {
-			spin_lock_irq(&ms->lock);
-			bio_list_add(&ms->failures, bio);
-			spin_unlock_irq(&ms->lock);
-			wakeup_mirrord(ms);
-		} else {
-			map_bio(get_default_mirror(ms), bio);
-			generic_make_request(bio);
-		}
-	}
-}
-
-static void do_failures(struct mirror_set *ms, struct bio_list *failures)
-{
-	struct bio *bio;
-
-	if (likely(!failures->head))
-		return;
-
-	/*
-	 * If the log has failed, unattempted writes are being
-	 * put on the holds list.  We can't issue those writes
-	 * until a log has been marked, so we must store them.
-	 *
-	 * If a 'noflush' suspend is in progress, we can requeue
-	 * the I/O's to the core.  This give userspace a chance
-	 * to reconfigure the mirror, at which point the core
-	 * will reissue the writes.  If the 'noflush' flag is
-	 * not set, we have no choice but to return errors.
-	 *
-	 * Some writes on the failures list may have been
-	 * submitted before the log failure and represent a
-	 * failure to write to one of the devices.  It is ok
-	 * for us to treat them the same and requeue them
-	 * as well.
-	 */
-	while ((bio = bio_list_pop(failures))) {
-		if (!ms->log_failure) {
-			ms->in_sync = 0;
-			dm_rh_mark_nosync(ms->rh, bio);
-		}
-
-		/*
-		 * If all the legs are dead, fail the I/O.
-		 * If we have been told to handle errors, hold the bio
-		 * and wait for userspace to deal with the problem.
-		 * Otherwise pretend that the I/O succeeded. (This would
-		 * be wrong if the failed leg returned after reboot and
-		 * got replicated back to the good legs.)
-		 */
-		if (!get_valid_mirror(ms))
-			bio_endio(bio, -EIO);
-		else if (errors_handled(ms))
-			hold_bio(ms, bio);
-		else
-			bio_endio(bio, 0);
-	}
-}
-
-static void trigger_event(struct work_struct *work)
-{
-	struct mirror_set *ms =
-		container_of(work, struct mirror_set, trigger_event);
-
-	dm_table_event(ms->ti->table);
-}
-
-/*-----------------------------------------------------------------
- * kmirrord
- *---------------------------------------------------------------*/
-static void do_mirror(struct work_struct *work)
-{
-	struct mirror_set *ms = container_of(work, struct mirror_set,
-					     kmirrord_work);
-	struct bio_list reads, writes, failures;
-	unsigned long flags;
-
-	spin_lock_irqsave(&ms->lock, flags);
-	reads = ms->reads;
-	writes = ms->writes;
-	failures = ms->failures;
-	bio_list_init(&ms->reads);
-	bio_list_init(&ms->writes);
-	bio_list_init(&ms->failures);
-	spin_unlock_irqrestore(&ms->lock, flags);
-
-	dm_rh_update_states(ms->rh, errors_handled(ms));
-	do_recovery(ms);
-	do_reads(ms, &reads);
-	do_writes(ms, &writes);
-	do_failures(ms, &failures);
-}
-
-/*-----------------------------------------------------------------
- * Target functions
- *---------------------------------------------------------------*/
-static struct mirror_set *alloc_context(unsigned int nr_mirrors,
-					uint32_t region_size,
-					struct dm_target *ti,
-					struct dm_dirty_log *dl)
-{
-	size_t len;
-	struct mirror_set *ms = NULL;
-
-	len = sizeof(*ms) + (sizeof(ms->mirror[0]) * nr_mirrors);
-
-	ms = kzalloc(len, GFP_KERNEL);
-	if (!ms) {
-		ti->error = "Cannot allocate mirror context";
-		return NULL;
-	}
-
-	spin_lock_init(&ms->lock);
-	bio_list_init(&ms->reads);
-	bio_list_init(&ms->writes);
-	bio_list_init(&ms->failures);
-	bio_list_init(&ms->holds);
-
-	ms->ti = ti;
-	ms->nr_mirrors = nr_mirrors;
-	ms->nr_regions = dm_sector_div_up(ti->len, region_size);
-	ms->in_sync = 0;
-	ms->log_failure = 0;
-	ms->leg_failure = 0;
-	atomic_set(&ms->suspend, 0);
-	atomic_set(&ms->default_mirror, DEFAULT_MIRROR);
-
-	ms->read_record_pool = mempool_create_slab_pool(MIN_READ_RECORDS,
-						_dm_raid1_read_record_cache);
-
-	if (!ms->read_record_pool) {
-		ti->error = "Error creating mirror read_record_pool";
-		kfree(ms);
-		return NULL;
-	}
-
-	ms->io_client = dm_io_client_create();
-	if (IS_ERR(ms->io_client)) {
-		ti->error = "Error creating dm_io client";
-		mempool_destroy(ms->read_record_pool);
-		kfree(ms);
- 		return NULL;
-	}
-
-	ms->rh = dm_region_hash_create(ms, dispatch_bios, wakeup_mirrord,
-				       wakeup_all_recovery_waiters,
-				       ms->ti->begin, MAX_RECOVERY,
-				       dl, region_size, ms->nr_regions);
-	if (IS_ERR(ms->rh)) {
-		ti->error = "Error creating dirty region hash";
-		dm_io_client_destroy(ms->io_client);
-		mempool_destroy(ms->read_record_pool);
-		kfree(ms);
-		return NULL;
-	}
-
-	return ms;
-}
-
-static void free_context(struct mirror_set *ms, struct dm_target *ti,
-			 unsigned int m)
-{
-	while (m--)
-		dm_put_device(ti, ms->mirror[m].dev);
-
-	dm_io_client_destroy(ms->io_client);
-	dm_region_hash_destroy(ms->rh);
-	mempool_destroy(ms->read_record_pool);
-	kfree(ms);
-}
-
-static int get_mirror(struct mirror_set *ms, struct dm_target *ti,
-		      unsigned int mirror, char **argv)
-{
-	unsigned long long offset;
-	char dummy;
-
-	if (sscanf(argv[1], "%llu%c", &offset, &dummy) != 1) {
-		ti->error = "Invalid offset";
-		return -EINVAL;
-	}
-
-	if (dm_get_device(ti, argv[0], dm_table_get_mode(ti->table),
-			  &ms->mirror[mirror].dev)) {
-		ti->error = "Device lookup failure";
-		return -ENXIO;
-	}
-
-	ms->mirror[mirror].ms = ms;
-	atomic_set(&(ms->mirror[mirror].error_count), 0);
-	ms->mirror[mirror].error_type = 0;
-	ms->mirror[mirror].offset = offset;
-
-	return 0;
-}
-
-/*
- * Create dirty log: log_type #log_params <log_params>
- */
-static struct dm_dirty_log *create_dirty_log(struct dm_target *ti,
-					     unsigned argc, char **argv,
-					     unsigned *args_used)
-{
-	unsigned param_count;
-	struct dm_dirty_log *dl;
-	char dummy;
-
-	if (argc < 2) {
-		ti->error = "Insufficient mirror log arguments";
-		return NULL;
-	}
-
-	if (sscanf(argv[1], "%u%c", &param_count, &dummy) != 1) {
-		ti->error = "Invalid mirror log argument count";
-		return NULL;
-	}
-
-	*args_used = 2 + param_count;
-
-	if (argc < *args_used) {
-		ti->error = "Insufficient mirror log arguments";
-		return NULL;
-	}
-
-	dl = dm_dirty_log_create(argv[0], ti, mirror_flush, param_count,
-				 argv + 2);
-	if (!dl) {
-		ti->error = "Error creating mirror dirty log";
-		return NULL;
-	}
-
-	return dl;
-}
-
-static int parse_features(struct mirror_set *ms, unsigned argc, char **argv,
-			  unsigned *args_used)
-{
-	unsigned num_features;
-	struct dm_target *ti = ms->ti;
-	char dummy;
-
-	*args_used = 0;
-
-	if (!argc)
-		return 0;
-
-	if (sscanf(argv[0], "%u%c", &num_features, &dummy) != 1) {
-		ti->error = "Invalid number of features";
-		return -EINVAL;
-	}
-
-	argc--;
-	argv++;
-	(*args_used)++;
-
-	if (num_features > argc) {
-		ti->error = "Not enough arguments to support feature count";
-		return -EINVAL;
-	}
-
-	if (!strcmp("handle_errors", argv[0]))
-		ms->features |= DM_RAID1_HANDLE_ERRORS;
-	else {
-		ti->error = "Unrecognised feature requested";
-		return -EINVAL;
-	}
-
-	(*args_used)++;
-
-	return 0;
-}
-
-/*
- * Construct a mirror mapping:
- *
- * log_type #log_params <log_params>
- * #mirrors [mirror_path offset]{2,}
- * [#features <features>]
- *
- * log_type is "core" or "disk"
- * #log_params is between 1 and 3
- *
- * If present, features must be "handle_errors".
- */
-static int mirror_ctr(struct dm_target *ti, unsigned int argc, char **argv)
-{
-	int r;
-	unsigned int nr_mirrors, m, args_used;
-	struct mirror_set *ms;
-	struct dm_dirty_log *dl;
-	char dummy;
-
-	dl = create_dirty_log(ti, argc, argv, &args_used);
-	if (!dl)
-		return -EINVAL;
-
-	argv += args_used;
-	argc -= args_used;
-
-	if (!argc || sscanf(argv[0], "%u%c", &nr_mirrors, &dummy) != 1 ||
-	    nr_mirrors < 2 || nr_mirrors > DM_KCOPYD_MAX_REGIONS + 1) {
-		ti->error = "Invalid number of mirrors";
-		dm_dirty_log_destroy(dl);
-		return -EINVAL;
-	}
-
-	argv++, argc--;
-
-	if (argc < nr_mirrors * 2) {
-		ti->error = "Too few mirror arguments";
-		dm_dirty_log_destroy(dl);
-		return -EINVAL;
-	}
-
-	ms = alloc_context(nr_mirrors, dl->type->get_region_size(dl), ti, dl);
-	if (!ms) {
-		dm_dirty_log_destroy(dl);
-		return -ENOMEM;
-	}
-
-	/* Get the mirror parameter sets */
-	for (m = 0; m < nr_mirrors; m++) {
-		r = get_mirror(ms, ti, m, argv);
-		if (r) {
-			free_context(ms, ti, m);
-			return r;
-		}
-		argv += 2;
-		argc -= 2;
-	}
-
-	ti->private = ms;
-	ti->split_io = dm_rh_get_region_size(ms->rh);
-	ti->num_flush_requests = 1;
-	ti->num_discard_requests = 1;
-
-	ms->kmirrord_wq = alloc_workqueue("kmirrord",
-					  WQ_NON_REENTRANT | WQ_MEM_RECLAIM, 0);
-	if (!ms->kmirrord_wq) {
-		DMERR("couldn't start kmirrord");
-		r = -ENOMEM;
-		goto err_free_context;
-	}
-	INIT_WORK(&ms->kmirrord_work, do_mirror);
-	init_timer(&ms->timer);
-	ms->timer_pending = 0;
-	INIT_WORK(&ms->trigger_event, trigger_event);
-
-	r = parse_features(ms, argc, argv, &args_used);
-	if (r)
-		goto err_destroy_wq;
-
-	argv += args_used;
-	argc -= args_used;
-
-	/*
-	 * Any read-balancing addition depends on the
-	 * DM_RAID1_HANDLE_ERRORS flag being present.
-	 * This is because the decision to balance depends
-	 * on the sync state of a region.  If the above
-	 * flag is not present, we ignore errors; and
-	 * the sync state may be inaccurate.
-	 */
-
-	if (argc) {
-		ti->error = "Too many mirror arguments";
-		r = -EINVAL;
-		goto err_destroy_wq;
-	}
-
-	ms->kcopyd_client = dm_kcopyd_client_create();
-	if (IS_ERR(ms->kcopyd_client)) {
-		r = PTR_ERR(ms->kcopyd_client);
-		goto err_destroy_wq;
-	}
-
-	wakeup_mirrord(ms);
-	return 0;
-
-err_destroy_wq:
-	destroy_workqueue(ms->kmirrord_wq);
-err_free_context:
-	free_context(ms, ti, ms->nr_mirrors);
-	return r;
-}
-
-static void mirror_dtr(struct dm_target *ti)
-{
-	struct mirror_set *ms = (struct mirror_set *) ti->private;
-
-	del_timer_sync(&ms->timer);
-	flush_workqueue(ms->kmirrord_wq);
-	flush_work_sync(&ms->trigger_event);
-	dm_kcopyd_client_destroy(ms->kcopyd_client);
-	destroy_workqueue(ms->kmirrord_wq);
-	free_context(ms, ti, ms->nr_mirrors);
-}
-
-/*
- * Mirror mapping function
- */
-static int mirror_map(struct dm_target *ti, struct bio *bio,
-		      union map_info *map_context)
-{
-	int r, rw = bio_rw(bio);
-	struct mirror *m;
-	struct mirror_set *ms = ti->private;
-	struct dm_raid1_read_record *read_record = NULL;
-	struct dm_dirty_log *log = dm_rh_dirty_log(ms->rh);
-
-	if (rw == WRITE) {
-		/* Save region for mirror_end_io() handler */
-		map_context->ll = dm_rh_bio_to_region(ms->rh, bio);
-		queue_bio(ms, bio, rw);
-		return DM_MAPIO_SUBMITTED;
-	}
-
-	r = log->type->in_sync(log, dm_rh_bio_to_region(ms->rh, bio), 0);
-	if (r < 0 && r != -EWOULDBLOCK)
-		return r;
-
-	/*
-	 * If region is not in-sync queue the bio.
-	 */
-	if (!r || (r == -EWOULDBLOCK)) {
-		if (rw == READA)
-			return -EWOULDBLOCK;
-
-		queue_bio(ms, bio, rw);
-		return DM_MAPIO_SUBMITTED;
-	}
-
-	/*
-	 * The region is in-sync and we can perform reads directly.
-	 * Store enough information so we can retry if it fails.
-	 */
-	m = choose_mirror(ms, bio->bi_sector);
-	if (unlikely(!m))
-		return -EIO;
-
-	read_record = mempool_alloc(ms->read_record_pool, GFP_NOIO);
-	if (likely(read_record)) {
-		dm_bio_record(&read_record->details, bio);
-		map_context->ptr = read_record;
-		read_record->m = m;
-	}
-
-	map_bio(m, bio);
-
-	return DM_MAPIO_REMAPPED;
-}
-
-static int mirror_end_io(struct dm_target *ti, struct bio *bio,
-			 int error, union map_info *map_context)
-{
-	int rw = bio_rw(bio);
-	struct mirror_set *ms = (struct mirror_set *) ti->private;
-	struct mirror *m = NULL;
-	struct dm_bio_details *bd = NULL;
-	struct dm_raid1_read_record *read_record = map_context->ptr;
-
-	/*
-	 * We need to dec pending if this was a write.
-	 */
-	if (rw == WRITE) {
-		if (!(bio->bi_rw & REQ_FLUSH))
-			dm_rh_dec(ms->rh, map_context->ll);
-		return error;
-	}
-
-	if (error == -EOPNOTSUPP)
-		goto out;
-
-	if ((error == -EWOULDBLOCK) && (bio->bi_rw & REQ_RAHEAD))
-		goto out;
-
-	if (unlikely(error)) {
-		if (!read_record) {
-			/*
-			 * There wasn't enough memory to record necessary
-			 * information for a retry or there was no other
-			 * mirror in-sync.
-			 */
-			DMERR_LIMIT("Mirror read failed.");
-			return -EIO;
-		}
-
-		m = read_record->m;
-
-		DMERR("Mirror read failed from %s. Trying alternative device.",
-		      m->dev->name);
-
-		fail_mirror(m, DM_RAID1_READ_ERROR);
-
-		/*
-		 * A failed read is requeued for another attempt using an intact
-		 * mirror.
-		 */
-		if (default_ok(m) || mirror_available(ms, bio)) {
-			bd = &read_record->details;
-
-			dm_bio_restore(bd, bio);
-			mempool_free(read_record, ms->read_record_pool);
-			map_context->ptr = NULL;
-			queue_bio(ms, bio, rw);
-			return 1;
-		}
-		DMERR("All replicated volumes dead, failing I/O");
-	}
-
-out:
-	if (read_record) {
-		mempool_free(read_record, ms->read_record_pool);
-		map_context->ptr = NULL;
-	}
-
-	return error;
-}
-
-static void mirror_presuspend(struct dm_target *ti)
-{
-	struct mirror_set *ms = (struct mirror_set *) ti->private;
-	struct dm_dirty_log *log = dm_rh_dirty_log(ms->rh);
-
-	struct bio_list holds;
-	struct bio *bio;
-
-	atomic_set(&ms->suspend, 1);
-
-	/*
-	 * Process bios in the hold list to start recovery waiting
-	 * for bios in the hold list. After the process, no bio has
-	 * a chance to be added in the hold list because ms->suspend
-	 * is set.
-	 */
-	spin_lock_irq(&ms->lock);
-	holds = ms->holds;
-	bio_list_init(&ms->holds);
-	spin_unlock_irq(&ms->lock);
-
-	while ((bio = bio_list_pop(&holds)))
-		hold_bio(ms, bio);
-
-	/*
-	 * We must finish up all the work that we've
-	 * generated (i.e. recovery work).
-	 */
-	dm_rh_stop_recovery(ms->rh);
-
-	wait_event(_kmirrord_recovery_stopped,
-		   !dm_rh_recovery_in_flight(ms->rh));
-
-	if (log->type->presuspend && log->type->presuspend(log))
-		/* FIXME: need better error handling */
-		DMWARN("log presuspend failed");
-
-	/*
-	 * Now that recovery is complete/stopped and the
-	 * delayed bios are queued, we need to wait for
-	 * the worker thread to complete.  This way,
-	 * we know that all of our I/O has been pushed.
-	 */
-	flush_workqueue(ms->kmirrord_wq);
-}
-
-static void mirror_postsuspend(struct dm_target *ti)
-{
-	struct mirror_set *ms = ti->private;
-	struct dm_dirty_log *log = dm_rh_dirty_log(ms->rh);
-
-	if (log->type->postsuspend && log->type->postsuspend(log))
-		/* FIXME: need better error handling */
-		DMWARN("log postsuspend failed");
-}
-
-static void mirror_resume(struct dm_target *ti)
-{
-	struct mirror_set *ms = ti->private;
-	struct dm_dirty_log *log = dm_rh_dirty_log(ms->rh);
-
-	atomic_set(&ms->suspend, 0);
-	if (log->type->resume && log->type->resume(log))
-		/* FIXME: need better error handling */
-		DMWARN("log resume failed");
-	dm_rh_start_recovery(ms->rh);
-}
-
-/*
- * device_status_char
- * @m: mirror device/leg we want the status of
- *
- * We return one character representing the most severe error
- * we have encountered.
- *    A => Alive - No failures
- *    D => Dead - A write failure occurred leaving mirror out-of-sync
- *    S => Sync - A sychronization failure occurred, mirror out-of-sync
- *    R => Read - A read failure occurred, mirror data unaffected
- *
- * Returns: <char>
- */
-static char device_status_char(struct mirror *m)
-{
-	if (!atomic_read(&(m->error_count)))
-		return 'A';
-
-	return (test_bit(DM_RAID1_FLUSH_ERROR, &(m->error_type))) ? 'F' :
-		(test_bit(DM_RAID1_WRITE_ERROR, &(m->error_type))) ? 'D' :
-		(test_bit(DM_RAID1_SYNC_ERROR, &(m->error_type))) ? 'S' :
-		(test_bit(DM_RAID1_READ_ERROR, &(m->error_type))) ? 'R' : 'U';
-}
-
-
-static int mirror_status(struct dm_target *ti, status_type_t type,
-			 char *result, unsigned int maxlen)
-{
-	unsigned int m, sz = 0;
-	struct mirror_set *ms = (struct mirror_set *) ti->private;
-	struct dm_dirty_log *log = dm_rh_dirty_log(ms->rh);
-	char buffer[ms->nr_mirrors + 1];
-
-	switch (type) {
-	case STATUSTYPE_INFO:
-		DMEMIT("%d ", ms->nr_mirrors);
-		for (m = 0; m < ms->nr_mirrors; m++) {
-			DMEMIT("%s ", ms->mirror[m].dev->name);
-			buffer[m] = device_status_char(&(ms->mirror[m]));
-		}
-		buffer[m] = '\0';
-
-		DMEMIT("%llu/%llu 1 %s ",
-		      (unsigned long long)log->type->get_sync_count(log),
-		      (unsigned long long)ms->nr_regions, buffer);
-
-		sz += log->type->status(log, type, result+sz, maxlen-sz);
-
-		break;
-
-	case STATUSTYPE_TABLE:
-		sz = log->type->status(log, type, result, maxlen);
-
-		DMEMIT("%d", ms->nr_mirrors);
-		for (m = 0; m < ms->nr_mirrors; m++)
-			DMEMIT(" %s %llu", ms->mirror[m].dev->name,
-			       (unsigned long long)ms->mirror[m].offset);
-
-		if (ms->features & DM_RAID1_HANDLE_ERRORS)
-			DMEMIT(" 1 handle_errors");
-	}
-
-	return 0;
-}
-
-static int mirror_iterate_devices(struct dm_target *ti,
-				  iterate_devices_callout_fn fn, void *data)
-{
-	struct mirror_set *ms = ti->private;
-	int ret = 0;
-	unsigned i;
-
-	for (i = 0; !ret && i < ms->nr_mirrors; i++)
-		ret = fn(ti, ms->mirror[i].dev,
-			 ms->mirror[i].offset, ti->len, data);
-
-	return ret;
-}
-
-static struct target_type mirror_target = {
-	.name	 = "mirror",
-	.version = {1, 12, 1},
-	.module	 = THIS_MODULE,
-	.ctr	 = mirror_ctr,
-	.dtr	 = mirror_dtr,
-	.map	 = mirror_map,
-	.end_io	 = mirror_end_io,
-	.presuspend = mirror_presuspend,
-	.postsuspend = mirror_postsuspend,
-	.resume	 = mirror_resume,
-	.status	 = mirror_status,
-	.iterate_devices = mirror_iterate_devices,
-};
-
-static int __init dm_mirror_init(void)
-{
-	int r;
-
-	_dm_raid1_read_record_cache = KMEM_CACHE(dm_raid1_read_record, 0);
-	if (!_dm_raid1_read_record_cache) {
-		DMERR("Can't allocate dm_raid1_read_record cache");
-		r = -ENOMEM;
-		goto bad_cache;
-	}
-
-	r = dm_register_target(&mirror_target);
-	if (r < 0) {
-		DMERR("Failed to register mirror target");
-		goto bad_target;
-	}
-
-	return 0;
-
-bad_target:
-	kmem_cache_destroy(_dm_raid1_read_record_cache);
-bad_cache:
-	return r;
-}
-
-static void __exit dm_mirror_exit(void)
-{
-	dm_unregister_target(&mirror_target);
-	kmem_cache_destroy(_dm_raid1_read_record_cache);
-}
-
-/* Module hooks */
-module_init(dm_mirror_init);
-module_exit(dm_mirror_exit);
-
-MODULE_DESCRIPTION(DM_NAME " mirror target");
-MODULE_AUTHOR("Joe Thornber");
-MODULE_LICENSE("GPL");
diff --git a/ANDROID_3.4.5/drivers/md/dm-region-hash.c b/ANDROID_3.4.5/drivers/md/dm-region-hash.c
deleted file mode 100644
index 7771ed21..00000000
--- a/ANDROID_3.4.5/drivers/md/dm-region-hash.c
+++ /dev/null
@@ -1,720 +0,0 @@
-/*
- * Copyright (C) 2003 Sistina Software Limited.
- * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved.
- *
- * This file is released under the GPL.
- */
-
-#include <linux/dm-dirty-log.h>
-#include <linux/dm-region-hash.h>
-
-#include <linux/ctype.h>
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/slab.h>
-#include <linux/vmalloc.h>
-
-#include "dm.h"
-
-#define	DM_MSG_PREFIX	"region hash"
-
-/*-----------------------------------------------------------------
- * Region hash
- *
- * The mirror splits itself up into discrete regions.  Each
- * region can be in one of three states: clean, dirty,
- * nosync.  There is no need to put clean regions in the hash.
- *
- * In addition to being present in the hash table a region _may_
- * be present on one of three lists.
- *
- *   clean_regions: Regions on this list have no io pending to
- *   them, they are in sync, we are no longer interested in them,
- *   they are dull.  dm_rh_update_states() will remove them from the
- *   hash table.
- *
- *   quiesced_regions: These regions have been spun down, ready
- *   for recovery.  rh_recovery_start() will remove regions from
- *   this list and hand them to kmirrord, which will schedule the
- *   recovery io with kcopyd.
- *
- *   recovered_regions: Regions that kcopyd has successfully
- *   recovered.  dm_rh_update_states() will now schedule any delayed
- *   io, up the recovery_count, and remove the region from the
- *   hash.
- *
- * There are 2 locks:
- *   A rw spin lock 'hash_lock' protects just the hash table,
- *   this is never held in write mode from interrupt context,
- *   which I believe means that we only have to disable irqs when
- *   doing a write lock.
- *
- *   An ordinary spin lock 'region_lock' that protects the three
- *   lists in the region_hash, with the 'state', 'list' and
- *   'delayed_bios' fields of the regions.  This is used from irq
- *   context, so all other uses will have to suspend local irqs.
- *---------------------------------------------------------------*/
-struct dm_region_hash {
-	uint32_t region_size;
-	unsigned region_shift;
-
-	/* holds persistent region state */
-	struct dm_dirty_log *log;
-
-	/* hash table */
-	rwlock_t hash_lock;
-	mempool_t *region_pool;
-	unsigned mask;
-	unsigned nr_buckets;
-	unsigned prime;
-	unsigned shift;
-	struct list_head *buckets;
-
-	unsigned max_recovery; /* Max # of regions to recover in parallel */
-
-	spinlock_t region_lock;
-	atomic_t recovery_in_flight;
-	struct semaphore recovery_count;
-	struct list_head clean_regions;
-	struct list_head quiesced_regions;
-	struct list_head recovered_regions;
-	struct list_head failed_recovered_regions;
-
-	/*
-	 * If there was a flush failure no regions can be marked clean.
-	 */
-	int flush_failure;
-
-	void *context;
-	sector_t target_begin;
-
-	/* Callback function to schedule bios writes */
-	void (*dispatch_bios)(void *context, struct bio_list *bios);
-
-	/* Callback function to wakeup callers worker thread. */
-	void (*wakeup_workers)(void *context);
-
-	/* Callback function to wakeup callers recovery waiters. */
-	void (*wakeup_all_recovery_waiters)(void *context);
-};
-
-struct dm_region {
-	struct dm_region_hash *rh;	/* FIXME: can we get rid of this ? */
-	region_t key;
-	int state;
-
-	struct list_head hash_list;
-	struct list_head list;
-
-	atomic_t pending;
-	struct bio_list delayed_bios;
-};
-
-/*
- * Conversion fns
- */
-static region_t dm_rh_sector_to_region(struct dm_region_hash *rh, sector_t sector)
-{
-	return sector >> rh->region_shift;
-}
-
-sector_t dm_rh_region_to_sector(struct dm_region_hash *rh, region_t region)
-{
-	return region << rh->region_shift;
-}
-EXPORT_SYMBOL_GPL(dm_rh_region_to_sector);
-
-region_t dm_rh_bio_to_region(struct dm_region_hash *rh, struct bio *bio)
-{
-	return dm_rh_sector_to_region(rh, bio->bi_sector - rh->target_begin);
-}
-EXPORT_SYMBOL_GPL(dm_rh_bio_to_region);
-
-void *dm_rh_region_context(struct dm_region *reg)
-{
-	return reg->rh->context;
-}
-EXPORT_SYMBOL_GPL(dm_rh_region_context);
-
-region_t dm_rh_get_region_key(struct dm_region *reg)
-{
-	return reg->key;
-}
-EXPORT_SYMBOL_GPL(dm_rh_get_region_key);
-
-sector_t dm_rh_get_region_size(struct dm_region_hash *rh)
-{
-	return rh->region_size;
-}
-EXPORT_SYMBOL_GPL(dm_rh_get_region_size);
-
-/*
- * FIXME: shall we pass in a structure instead of all these args to
- * dm_region_hash_create()????
- */
-#define RH_HASH_MULT 2654435387U
-#define RH_HASH_SHIFT 12
-
-#define MIN_REGIONS 64
-struct dm_region_hash *dm_region_hash_create(
-		void *context, void (*dispatch_bios)(void *context,
-						     struct bio_list *bios),
-		void (*wakeup_workers)(void *context),
-		void (*wakeup_all_recovery_waiters)(void *context),
-		sector_t target_begin, unsigned max_recovery,
-		struct dm_dirty_log *log, uint32_t region_size,
-		region_t nr_regions)
-{
-	struct dm_region_hash *rh;
-	unsigned nr_buckets, max_buckets;
-	size_t i;
-
-	/*
-	 * Calculate a suitable number of buckets for our hash
-	 * table.
-	 */
-	max_buckets = nr_regions >> 6;
-	for (nr_buckets = 128u; nr_buckets < max_buckets; nr_buckets <<= 1)
-		;
-	nr_buckets >>= 1;
-
-	rh = kmalloc(sizeof(*rh), GFP_KERNEL);
-	if (!rh) {
-		DMERR("unable to allocate region hash memory");
-		return ERR_PTR(-ENOMEM);
-	}
-
-	rh->context = context;
-	rh->dispatch_bios = dispatch_bios;
-	rh->wakeup_workers = wakeup_workers;
-	rh->wakeup_all_recovery_waiters = wakeup_all_recovery_waiters;
-	rh->target_begin = target_begin;
-	rh->max_recovery = max_recovery;
-	rh->log = log;
-	rh->region_size = region_size;
-	rh->region_shift = ffs(region_size) - 1;
-	rwlock_init(&rh->hash_lock);
-	rh->mask = nr_buckets - 1;
-	rh->nr_buckets = nr_buckets;
-
-	rh->shift = RH_HASH_SHIFT;
-	rh->prime = RH_HASH_MULT;
-
-	rh->buckets = vmalloc(nr_buckets * sizeof(*rh->buckets));
-	if (!rh->buckets) {
-		DMERR("unable to allocate region hash bucket memory");
-		kfree(rh);
-		return ERR_PTR(-ENOMEM);
-	}
-
-	for (i = 0; i < nr_buckets; i++)
-		INIT_LIST_HEAD(rh->buckets + i);
-
-	spin_lock_init(&rh->region_lock);
-	sema_init(&rh->recovery_count, 0);
-	atomic_set(&rh->recovery_in_flight, 0);
-	INIT_LIST_HEAD(&rh->clean_regions);
-	INIT_LIST_HEAD(&rh->quiesced_regions);
-	INIT_LIST_HEAD(&rh->recovered_regions);
-	INIT_LIST_HEAD(&rh->failed_recovered_regions);
-	rh->flush_failure = 0;
-
-	rh->region_pool = mempool_create_kmalloc_pool(MIN_REGIONS,
-						      sizeof(struct dm_region));
-	if (!rh->region_pool) {
-		vfree(rh->buckets);
-		kfree(rh);
-		rh = ERR_PTR(-ENOMEM);
-	}
-
-	return rh;
-}
-EXPORT_SYMBOL_GPL(dm_region_hash_create);
-
-void dm_region_hash_destroy(struct dm_region_hash *rh)
-{
-	unsigned h;
-	struct dm_region *reg, *nreg;
-
-	BUG_ON(!list_empty(&rh->quiesced_regions));
-	for (h = 0; h < rh->nr_buckets; h++) {
-		list_for_each_entry_safe(reg, nreg, rh->buckets + h,
-					 hash_list) {
-			BUG_ON(atomic_read(&reg->pending));
-			mempool_free(reg, rh->region_pool);
-		}
-	}
-
-	if (rh->log)
-		dm_dirty_log_destroy(rh->log);
-
-	if (rh->region_pool)
-		mempool_destroy(rh->region_pool);
-
-	vfree(rh->buckets);
-	kfree(rh);
-}
-EXPORT_SYMBOL_GPL(dm_region_hash_destroy);
-
-struct dm_dirty_log *dm_rh_dirty_log(struct dm_region_hash *rh)
-{
-	return rh->log;
-}
-EXPORT_SYMBOL_GPL(dm_rh_dirty_log);
-
-static unsigned rh_hash(struct dm_region_hash *rh, region_t region)
-{
-	return (unsigned) ((region * rh->prime) >> rh->shift) & rh->mask;
-}
-
-static struct dm_region *__rh_lookup(struct dm_region_hash *rh, region_t region)
-{
-	struct dm_region *reg;
-	struct list_head *bucket = rh->buckets + rh_hash(rh, region);
-
-	list_for_each_entry(reg, bucket, hash_list)
-		if (reg->key == region)
-			return reg;
-
-	return NULL;
-}
-
-static void __rh_insert(struct dm_region_hash *rh, struct dm_region *reg)
-{
-	list_add(&reg->hash_list, rh->buckets + rh_hash(rh, reg->key));
-}
-
-static struct dm_region *__rh_alloc(struct dm_region_hash *rh, region_t region)
-{
-	struct dm_region *reg, *nreg;
-
-	nreg = mempool_alloc(rh->region_pool, GFP_ATOMIC);
-	if (unlikely(!nreg))
-		nreg = kmalloc(sizeof(*nreg), GFP_NOIO | __GFP_NOFAIL);
-
-	nreg->state = rh->log->type->in_sync(rh->log, region, 1) ?
-		      DM_RH_CLEAN : DM_RH_NOSYNC;
-	nreg->rh = rh;
-	nreg->key = region;
-	INIT_LIST_HEAD(&nreg->list);
-	atomic_set(&nreg->pending, 0);
-	bio_list_init(&nreg->delayed_bios);
-
-	write_lock_irq(&rh->hash_lock);
-	reg = __rh_lookup(rh, region);
-	if (reg)
-		/* We lost the race. */
-		mempool_free(nreg, rh->region_pool);
-	else {
-		__rh_insert(rh, nreg);
-		if (nreg->state == DM_RH_CLEAN) {
-			spin_lock(&rh->region_lock);
-			list_add(&nreg->list, &rh->clean_regions);
-			spin_unlock(&rh->region_lock);
-		}
-
-		reg = nreg;
-	}
-	write_unlock_irq(&rh->hash_lock);
-
-	return reg;
-}
-
-static struct dm_region *__rh_find(struct dm_region_hash *rh, region_t region)
-{
-	struct dm_region *reg;
-
-	reg = __rh_lookup(rh, region);
-	if (!reg) {
-		read_unlock(&rh->hash_lock);
-		reg = __rh_alloc(rh, region);
-		read_lock(&rh->hash_lock);
-	}
-
-	return reg;
-}
-
-int dm_rh_get_state(struct dm_region_hash *rh, region_t region, int may_block)
-{
-	int r;
-	struct dm_region *reg;
-
-	read_lock(&rh->hash_lock);
-	reg = __rh_lookup(rh, region);
-	read_unlock(&rh->hash_lock);
-
-	if (reg)
-		return reg->state;
-
-	/*
-	 * The region wasn't in the hash, so we fall back to the
-	 * dirty log.
-	 */
-	r = rh->log->type->in_sync(rh->log, region, may_block);
-
-	/*
-	 * Any error from the dirty log (eg. -EWOULDBLOCK) gets
-	 * taken as a DM_RH_NOSYNC
-	 */
-	return r == 1 ? DM_RH_CLEAN : DM_RH_NOSYNC;
-}
-EXPORT_SYMBOL_GPL(dm_rh_get_state);
-
-static void complete_resync_work(struct dm_region *reg, int success)
-{
-	struct dm_region_hash *rh = reg->rh;
-
-	rh->log->type->set_region_sync(rh->log, reg->key, success);
-
-	/*
-	 * Dispatch the bios before we call 'wake_up_all'.
-	 * This is important because if we are suspending,
-	 * we want to know that recovery is complete and
-	 * the work queue is flushed.  If we wake_up_all
-	 * before we dispatch_bios (queue bios and call wake()),
-	 * then we risk suspending before the work queue
-	 * has been properly flushed.
-	 */
-	rh->dispatch_bios(rh->context, &reg->delayed_bios);
-	if (atomic_dec_and_test(&rh->recovery_in_flight))
-		rh->wakeup_all_recovery_waiters(rh->context);
-	up(&rh->recovery_count);
-}
-
-/* dm_rh_mark_nosync
- * @ms
- * @bio
- *
- * The bio was written on some mirror(s) but failed on other mirror(s).
- * We can successfully endio the bio but should avoid the region being
- * marked clean by setting the state DM_RH_NOSYNC.
- *
- * This function is _not_ safe in interrupt context!
- */
-void dm_rh_mark_nosync(struct dm_region_hash *rh, struct bio *bio)
-{
-	unsigned long flags;
-	struct dm_dirty_log *log = rh->log;
-	struct dm_region *reg;
-	region_t region = dm_rh_bio_to_region(rh, bio);
-	int recovering = 0;
-
-	if (bio->bi_rw & REQ_FLUSH) {
-		rh->flush_failure = 1;
-		return;
-	}
-
-	/* We must inform the log that the sync count has changed. */
-	log->type->set_region_sync(log, region, 0);
-
-	read_lock(&rh->hash_lock);
-	reg = __rh_find(rh, region);
-	read_unlock(&rh->hash_lock);
-
-	/* region hash entry should exist because write was in-flight */
-	BUG_ON(!reg);
-	BUG_ON(!list_empty(&reg->list));
-
-	spin_lock_irqsave(&rh->region_lock, flags);
-	/*
-	 * Possible cases:
-	 *   1) DM_RH_DIRTY
-	 *   2) DM_RH_NOSYNC: was dirty, other preceding writes failed
-	 *   3) DM_RH_RECOVERING: flushing pending writes
-	 * Either case, the region should have not been connected to list.
-	 */
-	recovering = (reg->state == DM_RH_RECOVERING);
-	reg->state = DM_RH_NOSYNC;
-	BUG_ON(!list_empty(&reg->list));
-	spin_unlock_irqrestore(&rh->region_lock, flags);
-
-	if (recovering)
-		complete_resync_work(reg, 0);
-}
-EXPORT_SYMBOL_GPL(dm_rh_mark_nosync);
-
-void dm_rh_update_states(struct dm_region_hash *rh, int errors_handled)
-{
-	struct dm_region *reg, *next;
-
-	LIST_HEAD(clean);
-	LIST_HEAD(recovered);
-	LIST_HEAD(failed_recovered);
-
-	/*
-	 * Quickly grab the lists.
-	 */
-	write_lock_irq(&rh->hash_lock);
-	spin_lock(&rh->region_lock);
-	if (!list_empty(&rh->clean_regions)) {
-		list_splice_init(&rh->clean_regions, &clean);
-
-		list_for_each_entry(reg, &clean, list)
-			list_del(&reg->hash_list);
-	}
-
-	if (!list_empty(&rh->recovered_regions)) {
-		list_splice_init(&rh->recovered_regions, &recovered);
-
-		list_for_each_entry(reg, &recovered, list)
-			list_del(&reg->hash_list);
-	}
-
-	if (!list_empty(&rh->failed_recovered_regions)) {
-		list_splice_init(&rh->failed_recovered_regions,
-				 &failed_recovered);
-
-		list_for_each_entry(reg, &failed_recovered, list)
-			list_del(&reg->hash_list);
-	}
-
-	spin_unlock(&rh->region_lock);
-	write_unlock_irq(&rh->hash_lock);
-
-	/*
-	 * All the regions on the recovered and clean lists have
-	 * now been pulled out of the system, so no need to do
-	 * any more locking.
-	 */
-	list_for_each_entry_safe(reg, next, &recovered, list) {
-		rh->log->type->clear_region(rh->log, reg->key);
-		complete_resync_work(reg, 1);
-		mempool_free(reg, rh->region_pool);
-	}
-
-	list_for_each_entry_safe(reg, next, &failed_recovered, list) {
-		complete_resync_work(reg, errors_handled ? 0 : 1);
-		mempool_free(reg, rh->region_pool);
-	}
-
-	list_for_each_entry_safe(reg, next, &clean, list) {
-		rh->log->type->clear_region(rh->log, reg->key);
-		mempool_free(reg, rh->region_pool);
-	}
-
-	rh->log->type->flush(rh->log);
-}
-EXPORT_SYMBOL_GPL(dm_rh_update_states);
-
-static void rh_inc(struct dm_region_hash *rh, region_t region)
-{
-	struct dm_region *reg;
-
-	read_lock(&rh->hash_lock);
-	reg = __rh_find(rh, region);
-
-	spin_lock_irq(&rh->region_lock);
-	atomic_inc(&reg->pending);
-
-	if (reg->state == DM_RH_CLEAN) {
-		reg->state = DM_RH_DIRTY;
-		list_del_init(&reg->list);	/* take off the clean list */
-		spin_unlock_irq(&rh->region_lock);
-
-		rh->log->type->mark_region(rh->log, reg->key);
-	} else
-		spin_unlock_irq(&rh->region_lock);
-
-
-	read_unlock(&rh->hash_lock);
-}
-
-void dm_rh_inc_pending(struct dm_region_hash *rh, struct bio_list *bios)
-{
-	struct bio *bio;
-
-	for (bio = bios->head; bio; bio = bio->bi_next) {
-		if (bio->bi_rw & REQ_FLUSH)
-			continue;
-		rh_inc(rh, dm_rh_bio_to_region(rh, bio));
-	}
-}
-EXPORT_SYMBOL_GPL(dm_rh_inc_pending);
-
-void dm_rh_dec(struct dm_region_hash *rh, region_t region)
-{
-	unsigned long flags;
-	struct dm_region *reg;
-	int should_wake = 0;
-
-	read_lock(&rh->hash_lock);
-	reg = __rh_lookup(rh, region);
-	read_unlock(&rh->hash_lock);
-
-	spin_lock_irqsave(&rh->region_lock, flags);
-	if (atomic_dec_and_test(&reg->pending)) {
-		/*
-		 * There is no pending I/O for this region.
-		 * We can move the region to corresponding list for next action.
-		 * At this point, the region is not yet connected to any list.
-		 *
-		 * If the state is DM_RH_NOSYNC, the region should be kept off
-		 * from clean list.
-		 * The hash entry for DM_RH_NOSYNC will remain in memory
-		 * until the region is recovered or the map is reloaded.
-		 */
-
-		/* do nothing for DM_RH_NOSYNC */
-		if (unlikely(rh->flush_failure)) {
-			/*
-			 * If a write flush failed some time ago, we
-			 * don't know whether or not this write made it
-			 * to the disk, so we must resync the device.
-			 */
-			reg->state = DM_RH_NOSYNC;
-		} else if (reg->state == DM_RH_RECOVERING) {
-			list_add_tail(&reg->list, &rh->quiesced_regions);
-		} else if (reg->state == DM_RH_DIRTY) {
-			reg->state = DM_RH_CLEAN;
-			list_add(&reg->list, &rh->clean_regions);
-		}
-		should_wake = 1;
-	}
-	spin_unlock_irqrestore(&rh->region_lock, flags);
-
-	if (should_wake)
-		rh->wakeup_workers(rh->context);
-}
-EXPORT_SYMBOL_GPL(dm_rh_dec);
-
-/*
- * Starts quiescing a region in preparation for recovery.
- */
-static int __rh_recovery_prepare(struct dm_region_hash *rh)
-{
-	int r;
-	region_t region;
-	struct dm_region *reg;
-
-	/*
-	 * Ask the dirty log what's next.
-	 */
-	r = rh->log->type->get_resync_work(rh->log, &region);
-	if (r <= 0)
-		return r;
-
-	/*
-	 * Get this region, and start it quiescing by setting the
-	 * recovering flag.
-	 */
-	read_lock(&rh->hash_lock);
-	reg = __rh_find(rh, region);
-	read_unlock(&rh->hash_lock);
-
-	spin_lock_irq(&rh->region_lock);
-	reg->state = DM_RH_RECOVERING;
-
-	/* Already quiesced ? */
-	if (atomic_read(&reg->pending))
-		list_del_init(&reg->list);
-	else
-		list_move(&reg->list, &rh->quiesced_regions);
-
-	spin_unlock_irq(&rh->region_lock);
-
-	return 1;
-}
-
-void dm_rh_recovery_prepare(struct dm_region_hash *rh)
-{
-	/* Extra reference to avoid race with dm_rh_stop_recovery */
-	atomic_inc(&rh->recovery_in_flight);
-
-	while (!down_trylock(&rh->recovery_count)) {
-		atomic_inc(&rh->recovery_in_flight);
-		if (__rh_recovery_prepare(rh) <= 0) {
-			atomic_dec(&rh->recovery_in_flight);
-			up(&rh->recovery_count);
-			break;
-		}
-	}
-
-	/* Drop the extra reference */
-	if (atomic_dec_and_test(&rh->recovery_in_flight))
-		rh->wakeup_all_recovery_waiters(rh->context);
-}
-EXPORT_SYMBOL_GPL(dm_rh_recovery_prepare);
-
-/*
- * Returns any quiesced regions.
- */
-struct dm_region *dm_rh_recovery_start(struct dm_region_hash *rh)
-{
-	struct dm_region *reg = NULL;
-
-	spin_lock_irq(&rh->region_lock);
-	if (!list_empty(&rh->quiesced_regions)) {
-		reg = list_entry(rh->quiesced_regions.next,
-				 struct dm_region, list);
-		list_del_init(&reg->list);  /* remove from the quiesced list */
-	}
-	spin_unlock_irq(&rh->region_lock);
-
-	return reg;
-}
-EXPORT_SYMBOL_GPL(dm_rh_recovery_start);
-
-void dm_rh_recovery_end(struct dm_region *reg, int success)
-{
-	struct dm_region_hash *rh = reg->rh;
-
-	spin_lock_irq(&rh->region_lock);
-	if (success)
-		list_add(&reg->list, &reg->rh->recovered_regions);
-	else
-		list_add(&reg->list, &reg->rh->failed_recovered_regions);
-
-	spin_unlock_irq(&rh->region_lock);
-
-	rh->wakeup_workers(rh->context);
-}
-EXPORT_SYMBOL_GPL(dm_rh_recovery_end);
-
-/* Return recovery in flight count. */
-int dm_rh_recovery_in_flight(struct dm_region_hash *rh)
-{
-	return atomic_read(&rh->recovery_in_flight);
-}
-EXPORT_SYMBOL_GPL(dm_rh_recovery_in_flight);
-
-int dm_rh_flush(struct dm_region_hash *rh)
-{
-	return rh->log->type->flush(rh->log);
-}
-EXPORT_SYMBOL_GPL(dm_rh_flush);
-
-void dm_rh_delay(struct dm_region_hash *rh, struct bio *bio)
-{
-	struct dm_region *reg;
-
-	read_lock(&rh->hash_lock);
-	reg = __rh_find(rh, dm_rh_bio_to_region(rh, bio));
-	bio_list_add(&reg->delayed_bios, bio);
-	read_unlock(&rh->hash_lock);
-}
-EXPORT_SYMBOL_GPL(dm_rh_delay);
-
-void dm_rh_stop_recovery(struct dm_region_hash *rh)
-{
-	int i;
-
-	/* wait for any recovering regions */
-	for (i = 0; i < rh->max_recovery; i++)
-		down(&rh->recovery_count);
-}
-EXPORT_SYMBOL_GPL(dm_rh_stop_recovery);
-
-void dm_rh_start_recovery(struct dm_region_hash *rh)
-{
-	int i;
-
-	for (i = 0; i < rh->max_recovery; i++)
-		up(&rh->recovery_count);
-
-	rh->wakeup_workers(rh->context);
-}
-EXPORT_SYMBOL_GPL(dm_rh_start_recovery);
-
-MODULE_DESCRIPTION(DM_NAME " region hash");
-MODULE_AUTHOR("Joe Thornber/Heinz Mauelshagen <dm-devel@redhat.com>");
-MODULE_LICENSE("GPL");
diff --git a/ANDROID_3.4.5/drivers/md/dm-round-robin.c b/ANDROID_3.4.5/drivers/md/dm-round-robin.c
deleted file mode 100644
index 6ab1192c..00000000
--- a/ANDROID_3.4.5/drivers/md/dm-round-robin.c
+++ /dev/null
@@ -1,219 +0,0 @@
-/*
- * Copyright (C) 2003 Sistina Software.
- * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
- *
- * Module Author: Heinz Mauelshagen
- *
- * This file is released under the GPL.
- *
- * Round-robin path selector.
- */
-
-#include <linux/device-mapper.h>
-
-#include "dm-path-selector.h"
-
-#include <linux/slab.h>
-#include <linux/module.h>
-
-#define DM_MSG_PREFIX "multipath round-robin"
-
-/*-----------------------------------------------------------------
- * Path-handling code, paths are held in lists
- *---------------------------------------------------------------*/
-struct path_info {
-	struct list_head list;
-	struct dm_path *path;
-	unsigned repeat_count;
-};
-
-static void free_paths(struct list_head *paths)
-{
-	struct path_info *pi, *next;
-
-	list_for_each_entry_safe(pi, next, paths, list) {
-		list_del(&pi->list);
-		kfree(pi);
-	}
-}
-
-/*-----------------------------------------------------------------
- * Round-robin selector
- *---------------------------------------------------------------*/
-
-#define RR_MIN_IO		1000
-
-struct selector {
-	struct list_head valid_paths;
-	struct list_head invalid_paths;
-};
-
-static struct selector *alloc_selector(void)
-{
-	struct selector *s = kmalloc(sizeof(*s), GFP_KERNEL);
-
-	if (s) {
-		INIT_LIST_HEAD(&s->valid_paths);
-		INIT_LIST_HEAD(&s->invalid_paths);
-	}
-
-	return s;
-}
-
-static int rr_create(struct path_selector *ps, unsigned argc, char **argv)
-{
-	struct selector *s;
-
-	s = alloc_selector();
-	if (!s)
-		return -ENOMEM;
-
-	ps->context = s;
-	return 0;
-}
-
-static void rr_destroy(struct path_selector *ps)
-{
-	struct selector *s = (struct selector *) ps->context;
-
-	free_paths(&s->valid_paths);
-	free_paths(&s->invalid_paths);
-	kfree(s);
-	ps->context = NULL;
-}
-
-static int rr_status(struct path_selector *ps, struct dm_path *path,
-		     status_type_t type, char *result, unsigned int maxlen)
-{
-	struct path_info *pi;
-	int sz = 0;
-
-	if (!path)
-		DMEMIT("0 ");
-	else {
-		switch(type) {
-		case STATUSTYPE_INFO:
-			break;
-		case STATUSTYPE_TABLE:
-			pi = path->pscontext;
-			DMEMIT("%u ", pi->repeat_count);
-			break;
-		}
-	}
-
-	return sz;
-}
-
-/*
- * Called during initialisation to register each path with an
- * optional repeat_count.
- */
-static int rr_add_path(struct path_selector *ps, struct dm_path *path,
-		       int argc, char **argv, char **error)
-{
-	struct selector *s = (struct selector *) ps->context;
-	struct path_info *pi;
-	unsigned repeat_count = RR_MIN_IO;
-	char dummy;
-
-	if (argc > 1) {
-		*error = "round-robin ps: incorrect number of arguments";
-		return -EINVAL;
-	}
-
-	/* First path argument is number of I/Os before switching path */
-	if ((argc == 1) && (sscanf(argv[0], "%u%c", &repeat_count, &dummy) != 1)) {
-		*error = "round-robin ps: invalid repeat count";
-		return -EINVAL;
-	}
-
-	/* allocate the path */
-	pi = kmalloc(sizeof(*pi), GFP_KERNEL);
-	if (!pi) {
-		*error = "round-robin ps: Error allocating path context";
-		return -ENOMEM;
-	}
-
-	pi->path = path;
-	pi->repeat_count = repeat_count;
-
-	path->pscontext = pi;
-
-	list_add_tail(&pi->list, &s->valid_paths);
-
-	return 0;
-}
-
-static void rr_fail_path(struct path_selector *ps, struct dm_path *p)
-{
-	struct selector *s = (struct selector *) ps->context;
-	struct path_info *pi = p->pscontext;
-
-	list_move(&pi->list, &s->invalid_paths);
-}
-
-static int rr_reinstate_path(struct path_selector *ps, struct dm_path *p)
-{
-	struct selector *s = (struct selector *) ps->context;
-	struct path_info *pi = p->pscontext;
-
-	list_move(&pi->list, &s->valid_paths);
-
-	return 0;
-}
-
-static struct dm_path *rr_select_path(struct path_selector *ps,
-				      unsigned *repeat_count, size_t nr_bytes)
-{
-	struct selector *s = (struct selector *) ps->context;
-	struct path_info *pi = NULL;
-
-	if (!list_empty(&s->valid_paths)) {
-		pi = list_entry(s->valid_paths.next, struct path_info, list);
-		list_move_tail(&pi->list, &s->valid_paths);
-		*repeat_count = pi->repeat_count;
-	}
-
-	return pi ? pi->path : NULL;
-}
-
-static struct path_selector_type rr_ps = {
-	.name = "round-robin",
-	.module = THIS_MODULE,
-	.table_args = 1,
-	.info_args = 0,
-	.create = rr_create,
-	.destroy = rr_destroy,
-	.status = rr_status,
-	.add_path = rr_add_path,
-	.fail_path = rr_fail_path,
-	.reinstate_path = rr_reinstate_path,
-	.select_path = rr_select_path,
-};
-
-static int __init dm_rr_init(void)
-{
-	int r = dm_register_path_selector(&rr_ps);
-
-	if (r < 0)
-		DMERR("register failed %d", r);
-
-	DMINFO("version 1.0.0 loaded");
-
-	return r;
-}
-
-static void __exit dm_rr_exit(void)
-{
-	int r = dm_unregister_path_selector(&rr_ps);
-
-	if (r < 0)
-		DMERR("unregister failed %d", r);
-}
-
-module_init(dm_rr_init);
-module_exit(dm_rr_exit);
-
-MODULE_DESCRIPTION(DM_NAME " round-robin multipath path selector");
-MODULE_AUTHOR("Sistina Software <dm-devel@redhat.com>");
-MODULE_LICENSE("GPL");
diff --git a/ANDROID_3.4.5/drivers/md/dm-service-time.c b/ANDROID_3.4.5/drivers/md/dm-service-time.c
deleted file mode 100644
index 9df8f6bd..00000000
--- a/ANDROID_3.4.5/drivers/md/dm-service-time.c
+++ /dev/null
@@ -1,343 +0,0 @@
-/*
- * Copyright (C) 2007-2009 NEC Corporation.  All Rights Reserved.
- *
- * Module Author: Kiyoshi Ueda
- *
- * This file is released under the GPL.
- *
- * Throughput oriented path selector.
- */
-
-#include "dm.h"
-#include "dm-path-selector.h"
-
-#include <linux/slab.h>
-#include <linux/module.h>
-
-#define DM_MSG_PREFIX	"multipath service-time"
-#define ST_MIN_IO	1
-#define ST_MAX_RELATIVE_THROUGHPUT	100
-#define ST_MAX_RELATIVE_THROUGHPUT_SHIFT	7
-#define ST_MAX_INFLIGHT_SIZE	((size_t)-1 >> ST_MAX_RELATIVE_THROUGHPUT_SHIFT)
-#define ST_VERSION	"0.2.0"
-
-struct selector {
-	struct list_head valid_paths;
-	struct list_head failed_paths;
-};
-
-struct path_info {
-	struct list_head list;
-	struct dm_path *path;
-	unsigned repeat_count;
-	unsigned relative_throughput;
-	atomic_t in_flight_size;	/* Total size of in-flight I/Os */
-};
-
-static struct selector *alloc_selector(void)
-{
-	struct selector *s = kmalloc(sizeof(*s), GFP_KERNEL);
-
-	if (s) {
-		INIT_LIST_HEAD(&s->valid_paths);
-		INIT_LIST_HEAD(&s->failed_paths);
-	}
-
-	return s;
-}
-
-static int st_create(struct path_selector *ps, unsigned argc, char **argv)
-{
-	struct selector *s = alloc_selector();
-
-	if (!s)
-		return -ENOMEM;
-
-	ps->context = s;
-	return 0;
-}
-
-static void free_paths(struct list_head *paths)
-{
-	struct path_info *pi, *next;
-
-	list_for_each_entry_safe(pi, next, paths, list) {
-		list_del(&pi->list);
-		kfree(pi);
-	}
-}
-
-static void st_destroy(struct path_selector *ps)
-{
-	struct selector *s = ps->context;
-
-	free_paths(&s->valid_paths);
-	free_paths(&s->failed_paths);
-	kfree(s);
-	ps->context = NULL;
-}
-
-static int st_status(struct path_selector *ps, struct dm_path *path,
-		     status_type_t type, char *result, unsigned maxlen)
-{
-	unsigned sz = 0;
-	struct path_info *pi;
-
-	if (!path)
-		DMEMIT("0 ");
-	else {
-		pi = path->pscontext;
-
-		switch (type) {
-		case STATUSTYPE_INFO:
-			DMEMIT("%d %u ", atomic_read(&pi->in_flight_size),
-			       pi->relative_throughput);
-			break;
-		case STATUSTYPE_TABLE:
-			DMEMIT("%u %u ", pi->repeat_count,
-			       pi->relative_throughput);
-			break;
-		}
-	}
-
-	return sz;
-}
-
-static int st_add_path(struct path_selector *ps, struct dm_path *path,
-		       int argc, char **argv, char **error)
-{
-	struct selector *s = ps->context;
-	struct path_info *pi;
-	unsigned repeat_count = ST_MIN_IO;
-	unsigned relative_throughput = 1;
-	char dummy;
-
-	/*
-	 * Arguments: [<repeat_count> [<relative_throughput>]]
-	 * 	<repeat_count>: The number of I/Os before switching path.
-	 * 			If not given, default (ST_MIN_IO) is used.
-	 * 	<relative_throughput>: The relative throughput value of
-	 *			the path among all paths in the path-group.
-	 * 			The valid range: 0-<ST_MAX_RELATIVE_THROUGHPUT>
-	 *			If not given, minimum value '1' is used.
-	 *			If '0' is given, the path isn't selected while
-	 * 			other paths having a positive value are
-	 * 			available.
-	 */
-	if (argc > 2) {
-		*error = "service-time ps: incorrect number of arguments";
-		return -EINVAL;
-	}
-
-	if (argc && (sscanf(argv[0], "%u%c", &repeat_count, &dummy) != 1)) {
-		*error = "service-time ps: invalid repeat count";
-		return -EINVAL;
-	}
-
-	if ((argc == 2) &&
-	    (sscanf(argv[1], "%u%c", &relative_throughput, &dummy) != 1 ||
-	     relative_throughput > ST_MAX_RELATIVE_THROUGHPUT)) {
-		*error = "service-time ps: invalid relative_throughput value";
-		return -EINVAL;
-	}
-
-	/* allocate the path */
-	pi = kmalloc(sizeof(*pi), GFP_KERNEL);
-	if (!pi) {
-		*error = "service-time ps: Error allocating path context";
-		return -ENOMEM;
-	}
-
-	pi->path = path;
-	pi->repeat_count = repeat_count;
-	pi->relative_throughput = relative_throughput;
-	atomic_set(&pi->in_flight_size, 0);
-
-	path->pscontext = pi;
-
-	list_add_tail(&pi->list, &s->valid_paths);
-
-	return 0;
-}
-
-static void st_fail_path(struct path_selector *ps, struct dm_path *path)
-{
-	struct selector *s = ps->context;
-	struct path_info *pi = path->pscontext;
-
-	list_move(&pi->list, &s->failed_paths);
-}
-
-static int st_reinstate_path(struct path_selector *ps, struct dm_path *path)
-{
-	struct selector *s = ps->context;
-	struct path_info *pi = path->pscontext;
-
-	list_move_tail(&pi->list, &s->valid_paths);
-
-	return 0;
-}
-
-/*
- * Compare the estimated service time of 2 paths, pi1 and pi2,
- * for the incoming I/O.
- *
- * Returns:
- * < 0 : pi1 is better
- * 0   : no difference between pi1 and pi2
- * > 0 : pi2 is better
- *
- * Description:
- * Basically, the service time is estimated by:
- *     ('pi->in-flight-size' + 'incoming') / 'pi->relative_throughput'
- * To reduce the calculation, some optimizations are made.
- * (See comments inline)
- */
-static int st_compare_load(struct path_info *pi1, struct path_info *pi2,
-			   size_t incoming)
-{
-	size_t sz1, sz2, st1, st2;
-
-	sz1 = atomic_read(&pi1->in_flight_size);
-	sz2 = atomic_read(&pi2->in_flight_size);
-
-	/*
-	 * Case 1: Both have same throughput value. Choose less loaded path.
-	 */
-	if (pi1->relative_throughput == pi2->relative_throughput)
-		return sz1 - sz2;
-
-	/*
-	 * Case 2a: Both have same load. Choose higher throughput path.
-	 * Case 2b: One path has no throughput value. Choose the other one.
-	 */
-	if (sz1 == sz2 ||
-	    !pi1->relative_throughput || !pi2->relative_throughput)
-		return pi2->relative_throughput - pi1->relative_throughput;
-
-	/*
-	 * Case 3: Calculate service time. Choose faster path.
-	 *         Service time using pi1:
-	 *             st1 = (sz1 + incoming) / pi1->relative_throughput
-	 *         Service time using pi2:
-	 *             st2 = (sz2 + incoming) / pi2->relative_throughput
-	 *
-	 *         To avoid the division, transform the expression to use
-	 *         multiplication.
-	 *         Because ->relative_throughput > 0 here, if st1 < st2,
-	 *         the expressions below are the same meaning:
-	 *             (sz1 + incoming) / pi1->relative_throughput <
-	 *                 (sz2 + incoming) / pi2->relative_throughput
-	 *             (sz1 + incoming) * pi2->relative_throughput <
-	 *                 (sz2 + incoming) * pi1->relative_throughput
-	 *         So use the later one.
-	 */
-	sz1 += incoming;
-	sz2 += incoming;
-	if (unlikely(sz1 >= ST_MAX_INFLIGHT_SIZE ||
-		     sz2 >= ST_MAX_INFLIGHT_SIZE)) {
-		/*
-		 * Size may be too big for multiplying pi->relative_throughput
-		 * and overflow.
-		 * To avoid the overflow and mis-selection, shift down both.
-		 */
-		sz1 >>= ST_MAX_RELATIVE_THROUGHPUT_SHIFT;
-		sz2 >>= ST_MAX_RELATIVE_THROUGHPUT_SHIFT;
-	}
-	st1 = sz1 * pi2->relative_throughput;
-	st2 = sz2 * pi1->relative_throughput;
-	if (st1 != st2)
-		return st1 - st2;
-
-	/*
-	 * Case 4: Service time is equal. Choose higher throughput path.
-	 */
-	return pi2->relative_throughput - pi1->relative_throughput;
-}
-
-static struct dm_path *st_select_path(struct path_selector *ps,
-				      unsigned *repeat_count, size_t nr_bytes)
-{
-	struct selector *s = ps->context;
-	struct path_info *pi = NULL, *best = NULL;
-
-	if (list_empty(&s->valid_paths))
-		return NULL;
-
-	/* Change preferred (first in list) path to evenly balance. */
-	list_move_tail(s->valid_paths.next, &s->valid_paths);
-
-	list_for_each_entry(pi, &s->valid_paths, list)
-		if (!best || (st_compare_load(pi, best, nr_bytes) < 0))
-			best = pi;
-
-	if (!best)
-		return NULL;
-
-	*repeat_count = best->repeat_count;
-
-	return best->path;
-}
-
-static int st_start_io(struct path_selector *ps, struct dm_path *path,
-		       size_t nr_bytes)
-{
-	struct path_info *pi = path->pscontext;
-
-	atomic_add(nr_bytes, &pi->in_flight_size);
-
-	return 0;
-}
-
-static int st_end_io(struct path_selector *ps, struct dm_path *path,
-		     size_t nr_bytes)
-{
-	struct path_info *pi = path->pscontext;
-
-	atomic_sub(nr_bytes, &pi->in_flight_size);
-
-	return 0;
-}
-
-static struct path_selector_type st_ps = {
-	.name		= "service-time",
-	.module		= THIS_MODULE,
-	.table_args	= 2,
-	.info_args	= 2,
-	.create		= st_create,
-	.destroy	= st_destroy,
-	.status		= st_status,
-	.add_path	= st_add_path,
-	.fail_path	= st_fail_path,
-	.reinstate_path	= st_reinstate_path,
-	.select_path	= st_select_path,
-	.start_io	= st_start_io,
-	.end_io		= st_end_io,
-};
-
-static int __init dm_st_init(void)
-{
-	int r = dm_register_path_selector(&st_ps);
-
-	if (r < 0)
-		DMERR("register failed %d", r);
-
-	DMINFO("version " ST_VERSION " loaded");
-
-	return r;
-}
-
-static void __exit dm_st_exit(void)
-{
-	int r = dm_unregister_path_selector(&st_ps);
-
-	if (r < 0)
-		DMERR("unregister failed %d", r);
-}
-
-module_init(dm_st_init);
-module_exit(dm_st_exit);
-
-MODULE_DESCRIPTION(DM_NAME " throughput oriented path selector");
-MODULE_AUTHOR("Kiyoshi Ueda <k-ueda@ct.jp.nec.com>");
-MODULE_LICENSE("GPL");
diff --git a/ANDROID_3.4.5/drivers/md/dm-snap-persistent.c b/ANDROID_3.4.5/drivers/md/dm-snap-persistent.c
deleted file mode 100644
index 3ac41567..00000000
--- a/ANDROID_3.4.5/drivers/md/dm-snap-persistent.c
+++ /dev/null
@@ -1,898 +0,0 @@
-/*
- * Copyright (C) 2001-2002 Sistina Software (UK) Limited.
- * Copyright (C) 2006-2008 Red Hat GmbH
- *
- * This file is released under the GPL.
- */
-
-#include "dm-exception-store.h"
-
-#include <linux/mm.h>
-#include <linux/pagemap.h>
-#include <linux/vmalloc.h>
-#include <linux/export.h>
-#include <linux/slab.h>
-#include <linux/dm-io.h>
-
-#define DM_MSG_PREFIX "persistent snapshot"
-#define DM_CHUNK_SIZE_DEFAULT_SECTORS 32	/* 16KB */
-
-/*-----------------------------------------------------------------
- * Persistent snapshots, by persistent we mean that the snapshot
- * will survive a reboot.
- *---------------------------------------------------------------*/
-
-/*
- * We need to store a record of which parts of the origin have
- * been copied to the snapshot device.  The snapshot code
- * requires that we copy exception chunks to chunk aligned areas
- * of the COW store.  It makes sense therefore, to store the
- * metadata in chunk size blocks.
- *
- * There is no backward or forward compatibility implemented,
- * snapshots with different disk versions than the kernel will
- * not be usable.  It is expected that "lvcreate" will blank out
- * the start of a fresh COW device before calling the snapshot
- * constructor.
- *
- * The first chunk of the COW device just contains the header.
- * After this there is a chunk filled with exception metadata,
- * followed by as many exception chunks as can fit in the
- * metadata areas.
- *
- * All on disk structures are in little-endian format.  The end
- * of the exceptions info is indicated by an exception with a
- * new_chunk of 0, which is invalid since it would point to the
- * header chunk.
- */
-
-/*
- * Magic for persistent snapshots: "SnAp" - Feeble isn't it.
- */
-#define SNAP_MAGIC 0x70416e53
-
-/*
- * The on-disk version of the metadata.
- */
-#define SNAPSHOT_DISK_VERSION 1
-
-#define NUM_SNAPSHOT_HDR_CHUNKS 1
-
-struct disk_header {
-	__le32 magic;
-
-	/*
-	 * Is this snapshot valid.  There is no way of recovering
-	 * an invalid snapshot.
-	 */
-	__le32 valid;
-
-	/*
-	 * Simple, incrementing version. no backward
-	 * compatibility.
-	 */
-	__le32 version;
-
-	/* In sectors */
-	__le32 chunk_size;
-} __packed;
-
-struct disk_exception {
-	__le64 old_chunk;
-	__le64 new_chunk;
-} __packed;
-
-struct core_exception {
-	uint64_t old_chunk;
-	uint64_t new_chunk;
-};
-
-struct commit_callback {
-	void (*callback)(void *, int success);
-	void *context;
-};
-
-/*
- * The top level structure for a persistent exception store.
- */
-struct pstore {
-	struct dm_exception_store *store;
-	int version;
-	int valid;
-	uint32_t exceptions_per_area;
-
-	/*
-	 * Now that we have an asynchronous kcopyd there is no
-	 * need for large chunk sizes, so it wont hurt to have a
-	 * whole chunks worth of metadata in memory at once.
-	 */
-	void *area;
-
-	/*
-	 * An area of zeros used to clear the next area.
-	 */
-	void *zero_area;
-
-	/*
-	 * An area used for header. The header can be written
-	 * concurrently with metadata (when invalidating the snapshot),
-	 * so it needs a separate buffer.
-	 */
-	void *header_area;
-
-	/*
-	 * Used to keep track of which metadata area the data in
-	 * 'chunk' refers to.
-	 */
-	chunk_t current_area;
-
-	/*
-	 * The next free chunk for an exception.
-	 *
-	 * When creating exceptions, all the chunks here and above are
-	 * free.  It holds the next chunk to be allocated.  On rare
-	 * occasions (e.g. after a system crash) holes can be left in
-	 * the exception store because chunks can be committed out of
-	 * order.
-	 *
-	 * When merging exceptions, it does not necessarily mean all the
-	 * chunks here and above are free.  It holds the value it would
-	 * have held if all chunks had been committed in order of
-	 * allocation.  Consequently the value may occasionally be
-	 * slightly too low, but since it's only used for 'status' and
-	 * it can never reach its minimum value too early this doesn't
-	 * matter.
-	 */
-
-	chunk_t next_free;
-
-	/*
-	 * The index of next free exception in the current
-	 * metadata area.
-	 */
-	uint32_t current_committed;
-
-	atomic_t pending_count;
-	uint32_t callback_count;
-	struct commit_callback *callbacks;
-	struct dm_io_client *io_client;
-
-	struct workqueue_struct *metadata_wq;
-};
-
-static int alloc_area(struct pstore *ps)
-{
-	int r = -ENOMEM;
-	size_t len;
-
-	len = ps->store->chunk_size << SECTOR_SHIFT;
-
-	/*
-	 * Allocate the chunk_size block of memory that will hold
-	 * a single metadata area.
-	 */
-	ps->area = vmalloc(len);
-	if (!ps->area)
-		goto err_area;
-
-	ps->zero_area = vzalloc(len);
-	if (!ps->zero_area)
-		goto err_zero_area;
-
-	ps->header_area = vmalloc(len);
-	if (!ps->header_area)
-		goto err_header_area;
-
-	return 0;
-
-err_header_area:
-	vfree(ps->zero_area);
-
-err_zero_area:
-	vfree(ps->area);
-
-err_area:
-	return r;
-}
-
-static void free_area(struct pstore *ps)
-{
-	if (ps->area)
-		vfree(ps->area);
-	ps->area = NULL;
-
-	if (ps->zero_area)
-		vfree(ps->zero_area);
-	ps->zero_area = NULL;
-
-	if (ps->header_area)
-		vfree(ps->header_area);
-	ps->header_area = NULL;
-}
-
-struct mdata_req {
-	struct dm_io_region *where;
-	struct dm_io_request *io_req;
-	struct work_struct work;
-	int result;
-};
-
-static void do_metadata(struct work_struct *work)
-{
-	struct mdata_req *req = container_of(work, struct mdata_req, work);
-
-	req->result = dm_io(req->io_req, 1, req->where, NULL);
-}
-
-/*
- * Read or write a chunk aligned and sized block of data from a device.
- */
-static int chunk_io(struct pstore *ps, void *area, chunk_t chunk, int rw,
-		    int metadata)
-{
-	struct dm_io_region where = {
-		.bdev = dm_snap_cow(ps->store->snap)->bdev,
-		.sector = ps->store->chunk_size * chunk,
-		.count = ps->store->chunk_size,
-	};
-	struct dm_io_request io_req = {
-		.bi_rw = rw,
-		.mem.type = DM_IO_VMA,
-		.mem.ptr.vma = area,
-		.client = ps->io_client,
-		.notify.fn = NULL,
-	};
-	struct mdata_req req;
-
-	if (!metadata)
-		return dm_io(&io_req, 1, &where, NULL);
-
-	req.where = &where;
-	req.io_req = &io_req;
-
-	/*
-	 * Issue the synchronous I/O from a different thread
-	 * to avoid generic_make_request recursion.
-	 */
-	INIT_WORK_ONSTACK(&req.work, do_metadata);
-	queue_work(ps->metadata_wq, &req.work);
-	flush_work(&req.work);
-
-	return req.result;
-}
-
-/*
- * Convert a metadata area index to a chunk index.
- */
-static chunk_t area_location(struct pstore *ps, chunk_t area)
-{
-	return NUM_SNAPSHOT_HDR_CHUNKS + ((ps->exceptions_per_area + 1) * area);
-}
-
-/*
- * Read or write a metadata area.  Remembering to skip the first
- * chunk which holds the header.
- */
-static int area_io(struct pstore *ps, int rw)
-{
-	int r;
-	chunk_t chunk;
-
-	chunk = area_location(ps, ps->current_area);
-
-	r = chunk_io(ps, ps->area, chunk, rw, 0);
-	if (r)
-		return r;
-
-	return 0;
-}
-
-static void zero_memory_area(struct pstore *ps)
-{
-	memset(ps->area, 0, ps->store->chunk_size << SECTOR_SHIFT);
-}
-
-static int zero_disk_area(struct pstore *ps, chunk_t area)
-{
-	return chunk_io(ps, ps->zero_area, area_location(ps, area), WRITE, 0);
-}
-
-static int read_header(struct pstore *ps, int *new_snapshot)
-{
-	int r;
-	struct disk_header *dh;
-	unsigned chunk_size;
-	int chunk_size_supplied = 1;
-	char *chunk_err;
-
-	/*
-	 * Use default chunk size (or logical_block_size, if larger)
-	 * if none supplied
-	 */
-	if (!ps->store->chunk_size) {
-		ps->store->chunk_size = max(DM_CHUNK_SIZE_DEFAULT_SECTORS,
-		    bdev_logical_block_size(dm_snap_cow(ps->store->snap)->
-					    bdev) >> 9);
-		ps->store->chunk_mask = ps->store->chunk_size - 1;
-		ps->store->chunk_shift = ffs(ps->store->chunk_size) - 1;
-		chunk_size_supplied = 0;
-	}
-
-	ps->io_client = dm_io_client_create();
-	if (IS_ERR(ps->io_client))
-		return PTR_ERR(ps->io_client);
-
-	r = alloc_area(ps);
-	if (r)
-		return r;
-
-	r = chunk_io(ps, ps->header_area, 0, READ, 1);
-	if (r)
-		goto bad;
-
-	dh = ps->header_area;
-
-	if (le32_to_cpu(dh->magic) == 0) {
-		*new_snapshot = 1;
-		return 0;
-	}
-
-	if (le32_to_cpu(dh->magic) != SNAP_MAGIC) {
-		DMWARN("Invalid or corrupt snapshot");
-		r = -ENXIO;
-		goto bad;
-	}
-
-	*new_snapshot = 0;
-	ps->valid = le32_to_cpu(dh->valid);
-	ps->version = le32_to_cpu(dh->version);
-	chunk_size = le32_to_cpu(dh->chunk_size);
-
-	if (ps->store->chunk_size == chunk_size)
-		return 0;
-
-	if (chunk_size_supplied)
-		DMWARN("chunk size %u in device metadata overrides "
-		       "table chunk size of %u.",
-		       chunk_size, ps->store->chunk_size);
-
-	/* We had a bogus chunk_size. Fix stuff up. */
-	free_area(ps);
-
-	r = dm_exception_store_set_chunk_size(ps->store, chunk_size,
-					      &chunk_err);
-	if (r) {
-		DMERR("invalid on-disk chunk size %u: %s.",
-		      chunk_size, chunk_err);
-		return r;
-	}
-
-	r = alloc_area(ps);
-	return r;
-
-bad:
-	free_area(ps);
-	return r;
-}
-
-static int write_header(struct pstore *ps)
-{
-	struct disk_header *dh;
-
-	memset(ps->header_area, 0, ps->store->chunk_size << SECTOR_SHIFT);
-
-	dh = ps->header_area;
-	dh->magic = cpu_to_le32(SNAP_MAGIC);
-	dh->valid = cpu_to_le32(ps->valid);
-	dh->version = cpu_to_le32(ps->version);
-	dh->chunk_size = cpu_to_le32(ps->store->chunk_size);
-
-	return chunk_io(ps, ps->header_area, 0, WRITE, 1);
-}
-
-/*
- * Access functions for the disk exceptions, these do the endian conversions.
- */
-static struct disk_exception *get_exception(struct pstore *ps, uint32_t index)
-{
-	BUG_ON(index >= ps->exceptions_per_area);
-
-	return ((struct disk_exception *) ps->area) + index;
-}
-
-static void read_exception(struct pstore *ps,
-			   uint32_t index, struct core_exception *result)
-{
-	struct disk_exception *de = get_exception(ps, index);
-
-	/* copy it */
-	result->old_chunk = le64_to_cpu(de->old_chunk);
-	result->new_chunk = le64_to_cpu(de->new_chunk);
-}
-
-static void write_exception(struct pstore *ps,
-			    uint32_t index, struct core_exception *e)
-{
-	struct disk_exception *de = get_exception(ps, index);
-
-	/* copy it */
-	de->old_chunk = cpu_to_le64(e->old_chunk);
-	de->new_chunk = cpu_to_le64(e->new_chunk);
-}
-
-static void clear_exception(struct pstore *ps, uint32_t index)
-{
-	struct disk_exception *de = get_exception(ps, index);
-
-	/* clear it */
-	de->old_chunk = 0;
-	de->new_chunk = 0;
-}
-
-/*
- * Registers the exceptions that are present in the current area.
- * 'full' is filled in to indicate if the area has been
- * filled.
- */
-static int insert_exceptions(struct pstore *ps,
-			     int (*callback)(void *callback_context,
-					     chunk_t old, chunk_t new),
-			     void *callback_context,
-			     int *full)
-{
-	int r;
-	unsigned int i;
-	struct core_exception e;
-
-	/* presume the area is full */
-	*full = 1;
-
-	for (i = 0; i < ps->exceptions_per_area; i++) {
-		read_exception(ps, i, &e);
-
-		/*
-		 * If the new_chunk is pointing at the start of
-		 * the COW device, where the first metadata area
-		 * is we know that we've hit the end of the
-		 * exceptions.  Therefore the area is not full.
-		 */
-		if (e.new_chunk == 0LL) {
-			ps->current_committed = i;
-			*full = 0;
-			break;
-		}
-
-		/*
-		 * Keep track of the start of the free chunks.
-		 */
-		if (ps->next_free <= e.new_chunk)
-			ps->next_free = e.new_chunk + 1;
-
-		/*
-		 * Otherwise we add the exception to the snapshot.
-		 */
-		r = callback(callback_context, e.old_chunk, e.new_chunk);
-		if (r)
-			return r;
-	}
-
-	return 0;
-}
-
-static int read_exceptions(struct pstore *ps,
-			   int (*callback)(void *callback_context, chunk_t old,
-					   chunk_t new),
-			   void *callback_context)
-{
-	int r, full = 1;
-
-	/*
-	 * Keeping reading chunks and inserting exceptions until
-	 * we find a partially full area.
-	 */
-	for (ps->current_area = 0; full; ps->current_area++) {
-		r = area_io(ps, READ);
-		if (r)
-			return r;
-
-		r = insert_exceptions(ps, callback, callback_context, &full);
-		if (r)
-			return r;
-	}
-
-	ps->current_area--;
-
-	return 0;
-}
-
-static struct pstore *get_info(struct dm_exception_store *store)
-{
-	return (struct pstore *) store->context;
-}
-
-static void persistent_usage(struct dm_exception_store *store,
-			     sector_t *total_sectors,
-			     sector_t *sectors_allocated,
-			     sector_t *metadata_sectors)
-{
-	struct pstore *ps = get_info(store);
-
-	*sectors_allocated = ps->next_free * store->chunk_size;
-	*total_sectors = get_dev_size(dm_snap_cow(store->snap)->bdev);
-
-	/*
-	 * First chunk is the fixed header.
-	 * Then there are (ps->current_area + 1) metadata chunks, each one
-	 * separated from the next by ps->exceptions_per_area data chunks.
-	 */
-	*metadata_sectors = (ps->current_area + 1 + NUM_SNAPSHOT_HDR_CHUNKS) *
-			    store->chunk_size;
-}
-
-static void persistent_dtr(struct dm_exception_store *store)
-{
-	struct pstore *ps = get_info(store);
-
-	destroy_workqueue(ps->metadata_wq);
-
-	/* Created in read_header */
-	if (ps->io_client)
-		dm_io_client_destroy(ps->io_client);
-	free_area(ps);
-
-	/* Allocated in persistent_read_metadata */
-	if (ps->callbacks)
-		vfree(ps->callbacks);
-
-	kfree(ps);
-}
-
-static int persistent_read_metadata(struct dm_exception_store *store,
-				    int (*callback)(void *callback_context,
-						    chunk_t old, chunk_t new),
-				    void *callback_context)
-{
-	int r, uninitialized_var(new_snapshot);
-	struct pstore *ps = get_info(store);
-
-	/*
-	 * Read the snapshot header.
-	 */
-	r = read_header(ps, &new_snapshot);
-	if (r)
-		return r;
-
-	/*
-	 * Now we know correct chunk_size, complete the initialisation.
-	 */
-	ps->exceptions_per_area = (ps->store->chunk_size << SECTOR_SHIFT) /
-				  sizeof(struct disk_exception);
-	ps->callbacks = dm_vcalloc(ps->exceptions_per_area,
-				   sizeof(*ps->callbacks));
-	if (!ps->callbacks)
-		return -ENOMEM;
-
-	/*
-	 * Do we need to setup a new snapshot ?
-	 */
-	if (new_snapshot) {
-		r = write_header(ps);
-		if (r) {
-			DMWARN("write_header failed");
-			return r;
-		}
-
-		ps->current_area = 0;
-		zero_memory_area(ps);
-		r = zero_disk_area(ps, 0);
-		if (r)
-			DMWARN("zero_disk_area(0) failed");
-		return r;
-	}
-	/*
-	 * Sanity checks.
-	 */
-	if (ps->version != SNAPSHOT_DISK_VERSION) {
-		DMWARN("unable to handle snapshot disk version %d",
-		       ps->version);
-		return -EINVAL;
-	}
-
-	/*
-	 * Metadata are valid, but snapshot is invalidated
-	 */
-	if (!ps->valid)
-		return 1;
-
-	/*
-	 * Read the metadata.
-	 */
-	r = read_exceptions(ps, callback, callback_context);
-
-	return r;
-}
-
-static int persistent_prepare_exception(struct dm_exception_store *store,
-					struct dm_exception *e)
-{
-	struct pstore *ps = get_info(store);
-	uint32_t stride;
-	chunk_t next_free;
-	sector_t size = get_dev_size(dm_snap_cow(store->snap)->bdev);
-
-	/* Is there enough room ? */
-	if (size < ((ps->next_free + 1) * store->chunk_size))
-		return -ENOSPC;
-
-	e->new_chunk = ps->next_free;
-
-	/*
-	 * Move onto the next free pending, making sure to take
-	 * into account the location of the metadata chunks.
-	 */
-	stride = (ps->exceptions_per_area + 1);
-	next_free = ++ps->next_free;
-	if (sector_div(next_free, stride) == 1)
-		ps->next_free++;
-
-	atomic_inc(&ps->pending_count);
-	return 0;
-}
-
-static void persistent_commit_exception(struct dm_exception_store *store,
-					struct dm_exception *e,
-					void (*callback) (void *, int success),
-					void *callback_context)
-{
-	unsigned int i;
-	struct pstore *ps = get_info(store);
-	struct core_exception ce;
-	struct commit_callback *cb;
-
-	ce.old_chunk = e->old_chunk;
-	ce.new_chunk = e->new_chunk;
-	write_exception(ps, ps->current_committed++, &ce);
-
-	/*
-	 * Add the callback to the back of the array.  This code
-	 * is the only place where the callback array is
-	 * manipulated, and we know that it will never be called
-	 * multiple times concurrently.
-	 */
-	cb = ps->callbacks + ps->callback_count++;
-	cb->callback = callback;
-	cb->context = callback_context;
-
-	/*
-	 * If there are exceptions in flight and we have not yet
-	 * filled this metadata area there's nothing more to do.
-	 */
-	if (!atomic_dec_and_test(&ps->pending_count) &&
-	    (ps->current_committed != ps->exceptions_per_area))
-		return;
-
-	/*
-	 * If we completely filled the current area, then wipe the next one.
-	 */
-	if ((ps->current_committed == ps->exceptions_per_area) &&
-	    zero_disk_area(ps, ps->current_area + 1))
-		ps->valid = 0;
-
-	/*
-	 * Commit exceptions to disk.
-	 */
-	if (ps->valid && area_io(ps, WRITE_FLUSH_FUA))
-		ps->valid = 0;
-
-	/*
-	 * Advance to the next area if this one is full.
-	 */
-	if (ps->current_committed == ps->exceptions_per_area) {
-		ps->current_committed = 0;
-		ps->current_area++;
-		zero_memory_area(ps);
-	}
-
-	for (i = 0; i < ps->callback_count; i++) {
-		cb = ps->callbacks + i;
-		cb->callback(cb->context, ps->valid);
-	}
-
-	ps->callback_count = 0;
-}
-
-static int persistent_prepare_merge(struct dm_exception_store *store,
-				    chunk_t *last_old_chunk,
-				    chunk_t *last_new_chunk)
-{
-	struct pstore *ps = get_info(store);
-	struct core_exception ce;
-	int nr_consecutive;
-	int r;
-
-	/*
-	 * When current area is empty, move back to preceding area.
-	 */
-	if (!ps->current_committed) {
-		/*
-		 * Have we finished?
-		 */
-		if (!ps->current_area)
-			return 0;
-
-		ps->current_area--;
-		r = area_io(ps, READ);
-		if (r < 0)
-			return r;
-		ps->current_committed = ps->exceptions_per_area;
-	}
-
-	read_exception(ps, ps->current_committed - 1, &ce);
-	*last_old_chunk = ce.old_chunk;
-	*last_new_chunk = ce.new_chunk;
-
-	/*
-	 * Find number of consecutive chunks within the current area,
-	 * working backwards.
-	 */
-	for (nr_consecutive = 1; nr_consecutive < ps->current_committed;
-	     nr_consecutive++) {
-		read_exception(ps, ps->current_committed - 1 - nr_consecutive,
-			       &ce);
-		if (ce.old_chunk != *last_old_chunk - nr_consecutive ||
-		    ce.new_chunk != *last_new_chunk - nr_consecutive)
-			break;
-	}
-
-	return nr_consecutive;
-}
-
-static int persistent_commit_merge(struct dm_exception_store *store,
-				   int nr_merged)
-{
-	int r, i;
-	struct pstore *ps = get_info(store);
-
-	BUG_ON(nr_merged > ps->current_committed);
-
-	for (i = 0; i < nr_merged; i++)
-		clear_exception(ps, ps->current_committed - 1 - i);
-
-	r = area_io(ps, WRITE_FLUSH_FUA);
-	if (r < 0)
-		return r;
-
-	ps->current_committed -= nr_merged;
-
-	/*
-	 * At this stage, only persistent_usage() uses ps->next_free, so
-	 * we make no attempt to keep ps->next_free strictly accurate
-	 * as exceptions may have been committed out-of-order originally.
-	 * Once a snapshot has become merging, we set it to the value it
-	 * would have held had all the exceptions been committed in order.
-	 *
-	 * ps->current_area does not get reduced by prepare_merge() until
-	 * after commit_merge() has removed the nr_merged previous exceptions.
-	 */
-	ps->next_free = area_location(ps, ps->current_area) +
-			ps->current_committed + 1;
-
-	return 0;
-}
-
-static void persistent_drop_snapshot(struct dm_exception_store *store)
-{
-	struct pstore *ps = get_info(store);
-
-	ps->valid = 0;
-	if (write_header(ps))
-		DMWARN("write header failed");
-}
-
-static int persistent_ctr(struct dm_exception_store *store,
-			  unsigned argc, char **argv)
-{
-	struct pstore *ps;
-
-	/* allocate the pstore */
-	ps = kzalloc(sizeof(*ps), GFP_KERNEL);
-	if (!ps)
-		return -ENOMEM;
-
-	ps->store = store;
-	ps->valid = 1;
-	ps->version = SNAPSHOT_DISK_VERSION;
-	ps->area = NULL;
-	ps->zero_area = NULL;
-	ps->header_area = NULL;
-	ps->next_free = NUM_SNAPSHOT_HDR_CHUNKS + 1; /* header and 1st area */
-	ps->current_committed = 0;
-
-	ps->callback_count = 0;
-	atomic_set(&ps->pending_count, 0);
-	ps->callbacks = NULL;
-
-	ps->metadata_wq = alloc_workqueue("ksnaphd", WQ_MEM_RECLAIM, 0);
-	if (!ps->metadata_wq) {
-		kfree(ps);
-		DMERR("couldn't start header metadata update thread");
-		return -ENOMEM;
-	}
-
-	store->context = ps;
-
-	return 0;
-}
-
-static unsigned persistent_status(struct dm_exception_store *store,
-				  status_type_t status, char *result,
-				  unsigned maxlen)
-{
-	unsigned sz = 0;
-
-	switch (status) {
-	case STATUSTYPE_INFO:
-		break;
-	case STATUSTYPE_TABLE:
-		DMEMIT(" P %llu", (unsigned long long)store->chunk_size);
-	}
-
-	return sz;
-}
-
-static struct dm_exception_store_type _persistent_type = {
-	.name = "persistent",
-	.module = THIS_MODULE,
-	.ctr = persistent_ctr,
-	.dtr = persistent_dtr,
-	.read_metadata = persistent_read_metadata,
-	.prepare_exception = persistent_prepare_exception,
-	.commit_exception = persistent_commit_exception,
-	.prepare_merge = persistent_prepare_merge,
-	.commit_merge = persistent_commit_merge,
-	.drop_snapshot = persistent_drop_snapshot,
-	.usage = persistent_usage,
-	.status = persistent_status,
-};
-
-static struct dm_exception_store_type _persistent_compat_type = {
-	.name = "P",
-	.module = THIS_MODULE,
-	.ctr = persistent_ctr,
-	.dtr = persistent_dtr,
-	.read_metadata = persistent_read_metadata,
-	.prepare_exception = persistent_prepare_exception,
-	.commit_exception = persistent_commit_exception,
-	.prepare_merge = persistent_prepare_merge,
-	.commit_merge = persistent_commit_merge,
-	.drop_snapshot = persistent_drop_snapshot,
-	.usage = persistent_usage,
-	.status = persistent_status,
-};
-
-int dm_persistent_snapshot_init(void)
-{
-	int r;
-
-	r = dm_exception_store_type_register(&_persistent_type);
-	if (r) {
-		DMERR("Unable to register persistent exception store type");
-		return r;
-	}
-
-	r = dm_exception_store_type_register(&_persistent_compat_type);
-	if (r) {
-		DMERR("Unable to register old-style persistent exception "
-		      "store type");
-		dm_exception_store_type_unregister(&_persistent_type);
-		return r;
-	}
-
-	return r;
-}
-
-void dm_persistent_snapshot_exit(void)
-{
-	dm_exception_store_type_unregister(&_persistent_type);
-	dm_exception_store_type_unregister(&_persistent_compat_type);
-}
diff --git a/ANDROID_3.4.5/drivers/md/dm-snap-transient.c b/ANDROID_3.4.5/drivers/md/dm-snap-transient.c
deleted file mode 100644
index 1ce9a258..00000000
--- a/ANDROID_3.4.5/drivers/md/dm-snap-transient.c
+++ /dev/null
@@ -1,153 +0,0 @@
-/*
- * Copyright (C) 2001-2002 Sistina Software (UK) Limited.
- * Copyright (C) 2006-2008 Red Hat GmbH
- *
- * This file is released under the GPL.
- */
-
-#include "dm-exception-store.h"
-
-#include <linux/mm.h>
-#include <linux/pagemap.h>
-#include <linux/vmalloc.h>
-#include <linux/export.h>
-#include <linux/slab.h>
-#include <linux/dm-io.h>
-
-#define DM_MSG_PREFIX "transient snapshot"
-
-/*-----------------------------------------------------------------
- * Implementation of the store for non-persistent snapshots.
- *---------------------------------------------------------------*/
-struct transient_c {
-	sector_t next_free;
-};
-
-static void transient_dtr(struct dm_exception_store *store)
-{
-	kfree(store->context);
-}
-
-static int transient_read_metadata(struct dm_exception_store *store,
-				   int (*callback)(void *callback_context,
-						   chunk_t old, chunk_t new),
-				   void *callback_context)
-{
-	return 0;
-}
-
-static int transient_prepare_exception(struct dm_exception_store *store,
-				       struct dm_exception *e)
-{
-	struct transient_c *tc = store->context;
-	sector_t size = get_dev_size(dm_snap_cow(store->snap)->bdev);
-
-	if (size < (tc->next_free + store->chunk_size))
-		return -1;
-
-	e->new_chunk = sector_to_chunk(store, tc->next_free);
-	tc->next_free += store->chunk_size;
-
-	return 0;
-}
-
-static void transient_commit_exception(struct dm_exception_store *store,
-				       struct dm_exception *e,
-				       void (*callback) (void *, int success),
-				       void *callback_context)
-{
-	/* Just succeed */
-	callback(callback_context, 1);
-}
-
-static void transient_usage(struct dm_exception_store *store,
-			    sector_t *total_sectors,
-			    sector_t *sectors_allocated,
-			    sector_t *metadata_sectors)
-{
-	*sectors_allocated = ((struct transient_c *) store->context)->next_free;
-	*total_sectors = get_dev_size(dm_snap_cow(store->snap)->bdev);
-	*metadata_sectors = 0;
-}
-
-static int transient_ctr(struct dm_exception_store *store,
-			 unsigned argc, char **argv)
-{
-	struct transient_c *tc;
-
-	tc = kmalloc(sizeof(struct transient_c), GFP_KERNEL);
-	if (!tc)
-		return -ENOMEM;
-
-	tc->next_free = 0;
-	store->context = tc;
-
-	return 0;
-}
-
-static unsigned transient_status(struct dm_exception_store *store,
-				 status_type_t status, char *result,
-				 unsigned maxlen)
-{
-	unsigned sz = 0;
-
-	switch (status) {
-	case STATUSTYPE_INFO:
-		break;
-	case STATUSTYPE_TABLE:
-		DMEMIT(" N %llu", (unsigned long long)store->chunk_size);
-	}
-
-	return sz;
-}
-
-static struct dm_exception_store_type _transient_type = {
-	.name = "transient",
-	.module = THIS_MODULE,
-	.ctr = transient_ctr,
-	.dtr = transient_dtr,
-	.read_metadata = transient_read_metadata,
-	.prepare_exception = transient_prepare_exception,
-	.commit_exception = transient_commit_exception,
-	.usage = transient_usage,
-	.status = transient_status,
-};
-
-static struct dm_exception_store_type _transient_compat_type = {
-	.name = "N",
-	.module = THIS_MODULE,
-	.ctr = transient_ctr,
-	.dtr = transient_dtr,
-	.read_metadata = transient_read_metadata,
-	.prepare_exception = transient_prepare_exception,
-	.commit_exception = transient_commit_exception,
-	.usage = transient_usage,
-	.status = transient_status,
-};
-
-int dm_transient_snapshot_init(void)
-{
-	int r;
-
-	r = dm_exception_store_type_register(&_transient_type);
-	if (r) {
-		DMWARN("Unable to register transient exception store type");
-		return r;
-	}
-
-	r = dm_exception_store_type_register(&_transient_compat_type);
-	if (r) {
-		DMWARN("Unable to register old-style transient "
-		       "exception store type");
-		dm_exception_store_type_unregister(&_transient_type);
-		return r;
-	}
-
-	return r;
-}
-
-void dm_transient_snapshot_exit(void)
-{
-	dm_exception_store_type_unregister(&_transient_type);
-	dm_exception_store_type_unregister(&_transient_compat_type);
-}
diff --git a/ANDROID_3.4.5/drivers/md/dm-snap.c b/ANDROID_3.4.5/drivers/md/dm-snap.c
deleted file mode 100644
index 6f758870..00000000
--- a/ANDROID_3.4.5/drivers/md/dm-snap.c
+++ /dev/null
@@ -1,2329 +0,0 @@
-/*
- * dm-snapshot.c
- *
- * Copyright (C) 2001-2002 Sistina Software (UK) Limited.
- *
- * This file is released under the GPL.
- */
-
-#include <linux/blkdev.h>
-#include <linux/device-mapper.h>
-#include <linux/delay.h>
-#include <linux/fs.h>
-#include <linux/init.h>
-#include <linux/kdev_t.h>
-#include <linux/list.h>
-#include <linux/mempool.h>
-#include <linux/module.h>
-#include <linux/slab.h>
-#include <linux/vmalloc.h>
-#include <linux/log2.h>
-#include <linux/dm-kcopyd.h>
-
-#include "dm-exception-store.h"
-
-#define DM_MSG_PREFIX "snapshots"
-
-static const char dm_snapshot_merge_target_name[] = "snapshot-merge";
-
-#define dm_target_is_snapshot_merge(ti) \
-	((ti)->type->name == dm_snapshot_merge_target_name)
-
-/*
- * The size of the mempool used to track chunks in use.
- */
-#define MIN_IOS 256
-
-#define DM_TRACKED_CHUNK_HASH_SIZE	16
-#define DM_TRACKED_CHUNK_HASH(x)	((unsigned long)(x) & \
-					 (DM_TRACKED_CHUNK_HASH_SIZE - 1))
-
-struct dm_exception_table {
-	uint32_t hash_mask;
-	unsigned hash_shift;
-	struct list_head *table;
-};
-
-struct dm_snapshot {
-	struct rw_semaphore lock;
-
-	struct dm_dev *origin;
-	struct dm_dev *cow;
-
-	struct dm_target *ti;
-
-	/* List of snapshots per Origin */
-	struct list_head list;
-
-	/*
-	 * You can't use a snapshot if this is 0 (e.g. if full).
-	 * A snapshot-merge target never clears this.
-	 */
-	int valid;
-
-	/* Origin writes don't trigger exceptions until this is set */
-	int active;
-
-	atomic_t pending_exceptions_count;
-
-	mempool_t *pending_pool;
-
-	struct dm_exception_table pending;
-	struct dm_exception_table complete;
-
-	/*
-	 * pe_lock protects all pending_exception operations and access
-	 * as well as the snapshot_bios list.
-	 */
-	spinlock_t pe_lock;
-
-	/* Chunks with outstanding reads */
-	spinlock_t tracked_chunk_lock;
-	mempool_t *tracked_chunk_pool;
-	struct hlist_head tracked_chunk_hash[DM_TRACKED_CHUNK_HASH_SIZE];
-
-	/* The on disk metadata handler */
-	struct dm_exception_store *store;
-
-	struct dm_kcopyd_client *kcopyd_client;
-
-	/* Wait for events based on state_bits */
-	unsigned long state_bits;
-
-	/* Range of chunks currently being merged. */
-	chunk_t first_merging_chunk;
-	int num_merging_chunks;
-
-	/*
-	 * The merge operation failed if this flag is set.
-	 * Failure modes are handled as follows:
-	 * - I/O error reading the header
-	 *   	=> don't load the target; abort.
-	 * - Header does not have "valid" flag set
-	 *   	=> use the origin; forget about the snapshot.
-	 * - I/O error when reading exceptions
-	 *   	=> don't load the target; abort.
-	 *         (We can't use the intermediate origin state.)
-	 * - I/O error while merging
-	 *	=> stop merging; set merge_failed; process I/O normally.
-	 */
-	int merge_failed;
-
-	/*
-	 * Incoming bios that overlap with chunks being merged must wait
-	 * for them to be committed.
-	 */
-	struct bio_list bios_queued_during_merge;
-};
-
-/*
- * state_bits:
- *   RUNNING_MERGE  - Merge operation is in progress.
- *   SHUTDOWN_MERGE - Set to signal that merge needs to be stopped;
- *                    cleared afterwards.
- */
-#define RUNNING_MERGE          0
-#define SHUTDOWN_MERGE         1
-
-struct dm_dev *dm_snap_origin(struct dm_snapshot *s)
-{
-	return s->origin;
-}
-EXPORT_SYMBOL(dm_snap_origin);
-
-struct dm_dev *dm_snap_cow(struct dm_snapshot *s)
-{
-	return s->cow;
-}
-EXPORT_SYMBOL(dm_snap_cow);
-
-static sector_t chunk_to_sector(struct dm_exception_store *store,
-				chunk_t chunk)
-{
-	return chunk << store->chunk_shift;
-}
-
-static int bdev_equal(struct block_device *lhs, struct block_device *rhs)
-{
-	/*
-	 * There is only ever one instance of a particular block
-	 * device so we can compare pointers safely.
-	 */
-	return lhs == rhs;
-}
-
-struct dm_snap_pending_exception {
-	struct dm_exception e;
-
-	/*
-	 * Origin buffers waiting for this to complete are held
-	 * in a bio list
-	 */
-	struct bio_list origin_bios;
-	struct bio_list snapshot_bios;
-
-	/* Pointer back to snapshot context */
-	struct dm_snapshot *snap;
-
-	/*
-	 * 1 indicates the exception has already been sent to
-	 * kcopyd.
-	 */
-	int started;
-
-	/*
-	 * For writing a complete chunk, bypassing the copy.
-	 */
-	struct bio *full_bio;
-	bio_end_io_t *full_bio_end_io;
-	void *full_bio_private;
-};
-
-/*
- * Hash table mapping origin volumes to lists of snapshots and
- * a lock to protect it
- */
-static struct kmem_cache *exception_cache;
-static struct kmem_cache *pending_cache;
-
-struct dm_snap_tracked_chunk {
-	struct hlist_node node;
-	chunk_t chunk;
-};
-
-static struct kmem_cache *tracked_chunk_cache;
-
-static struct dm_snap_tracked_chunk *track_chunk(struct dm_snapshot *s,
-						 chunk_t chunk)
-{
-	struct dm_snap_tracked_chunk *c = mempool_alloc(s->tracked_chunk_pool,
-							GFP_NOIO);
-	unsigned long flags;
-
-	c->chunk = chunk;
-
-	spin_lock_irqsave(&s->tracked_chunk_lock, flags);
-	hlist_add_head(&c->node,
-		       &s->tracked_chunk_hash[DM_TRACKED_CHUNK_HASH(chunk)]);
-	spin_unlock_irqrestore(&s->tracked_chunk_lock, flags);
-
-	return c;
-}
-
-static void stop_tracking_chunk(struct dm_snapshot *s,
-				struct dm_snap_tracked_chunk *c)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&s->tracked_chunk_lock, flags);
-	hlist_del(&c->node);
-	spin_unlock_irqrestore(&s->tracked_chunk_lock, flags);
-
-	mempool_free(c, s->tracked_chunk_pool);
-}
-
-static int __chunk_is_tracked(struct dm_snapshot *s, chunk_t chunk)
-{
-	struct dm_snap_tracked_chunk *c;
-	struct hlist_node *hn;
-	int found = 0;
-
-	spin_lock_irq(&s->tracked_chunk_lock);
-
-	hlist_for_each_entry(c, hn,
-	    &s->tracked_chunk_hash[DM_TRACKED_CHUNK_HASH(chunk)], node) {
-		if (c->chunk == chunk) {
-			found = 1;
-			break;
-		}
-	}
-
-	spin_unlock_irq(&s->tracked_chunk_lock);
-
-	return found;
-}
-
-/*
- * This conflicting I/O is extremely improbable in the caller,
- * so msleep(1) is sufficient and there is no need for a wait queue.
- */
-static void __check_for_conflicting_io(struct dm_snapshot *s, chunk_t chunk)
-{
-	while (__chunk_is_tracked(s, chunk))
-		msleep(1);
-}
-
-/*
- * One of these per registered origin, held in the snapshot_origins hash
- */
-struct origin {
-	/* The origin device */
-	struct block_device *bdev;
-
-	struct list_head hash_list;
-
-	/* List of snapshots for this origin */
-	struct list_head snapshots;
-};
-
-/*
- * Size of the hash table for origin volumes. If we make this
- * the size of the minors list then it should be nearly perfect
- */
-#define ORIGIN_HASH_SIZE 256
-#define ORIGIN_MASK      0xFF
-static struct list_head *_origins;
-static struct rw_semaphore _origins_lock;
-
-static DECLARE_WAIT_QUEUE_HEAD(_pending_exceptions_done);
-static DEFINE_SPINLOCK(_pending_exceptions_done_spinlock);
-static uint64_t _pending_exceptions_done_count;
-
-static int init_origin_hash(void)
-{
-	int i;
-
-	_origins = kmalloc(ORIGIN_HASH_SIZE * sizeof(struct list_head),
-			   GFP_KERNEL);
-	if (!_origins) {
-		DMERR("unable to allocate memory");
-		return -ENOMEM;
-	}
-
-	for (i = 0; i < ORIGIN_HASH_SIZE; i++)
-		INIT_LIST_HEAD(_origins + i);
-	init_rwsem(&_origins_lock);
-
-	return 0;
-}
-
-static void exit_origin_hash(void)
-{
-	kfree(_origins);
-}
-
-static unsigned origin_hash(struct block_device *bdev)
-{
-	return bdev->bd_dev & ORIGIN_MASK;
-}
-
-static struct origin *__lookup_origin(struct block_device *origin)
-{
-	struct list_head *ol;
-	struct origin *o;
-
-	ol = &_origins[origin_hash(origin)];
-	list_for_each_entry (o, ol, hash_list)
-		if (bdev_equal(o->bdev, origin))
-			return o;
-
-	return NULL;
-}
-
-static void __insert_origin(struct origin *o)
-{
-	struct list_head *sl = &_origins[origin_hash(o->bdev)];
-	list_add_tail(&o->hash_list, sl);
-}
-
-/*
- * _origins_lock must be held when calling this function.
- * Returns number of snapshots registered using the supplied cow device, plus:
- * snap_src - a snapshot suitable for use as a source of exception handover
- * snap_dest - a snapshot capable of receiving exception handover.
- * snap_merge - an existing snapshot-merge target linked to the same origin.
- *   There can be at most one snapshot-merge target. The parameter is optional.
- *
- * Possible return values and states of snap_src and snap_dest.
- *   0: NULL, NULL  - first new snapshot
- *   1: snap_src, NULL - normal snapshot
- *   2: snap_src, snap_dest  - waiting for handover
- *   2: snap_src, NULL - handed over, waiting for old to be deleted
- *   1: NULL, snap_dest - source got destroyed without handover
- */
-static int __find_snapshots_sharing_cow(struct dm_snapshot *snap,
-					struct dm_snapshot **snap_src,
-					struct dm_snapshot **snap_dest,
-					struct dm_snapshot **snap_merge)
-{
-	struct dm_snapshot *s;
-	struct origin *o;
-	int count = 0;
-	int active;
-
-	o = __lookup_origin(snap->origin->bdev);
-	if (!o)
-		goto out;
-
-	list_for_each_entry(s, &o->snapshots, list) {
-		if (dm_target_is_snapshot_merge(s->ti) && snap_merge)
-			*snap_merge = s;
-		if (!bdev_equal(s->cow->bdev, snap->cow->bdev))
-			continue;
-
-		down_read(&s->lock);
-		active = s->active;
-		up_read(&s->lock);
-
-		if (active) {
-			if (snap_src)
-				*snap_src = s;
-		} else if (snap_dest)
-			*snap_dest = s;
-
-		count++;
-	}
-
-out:
-	return count;
-}
-
-/*
- * On success, returns 1 if this snapshot is a handover destination,
- * otherwise returns 0.
- */
-static int __validate_exception_handover(struct dm_snapshot *snap)
-{
-	struct dm_snapshot *snap_src = NULL, *snap_dest = NULL;
-	struct dm_snapshot *snap_merge = NULL;
-
-	/* Does snapshot need exceptions handed over to it? */
-	if ((__find_snapshots_sharing_cow(snap, &snap_src, &snap_dest,
-					  &snap_merge) == 2) ||
-	    snap_dest) {
-		snap->ti->error = "Snapshot cow pairing for exception "
-				  "table handover failed";
-		return -EINVAL;
-	}
-
-	/*
-	 * If no snap_src was found, snap cannot become a handover
-	 * destination.
-	 */
-	if (!snap_src)
-		return 0;
-
-	/*
-	 * Non-snapshot-merge handover?
-	 */
-	if (!dm_target_is_snapshot_merge(snap->ti))
-		return 1;
-
-	/*
-	 * Do not allow more than one merging snapshot.
-	 */
-	if (snap_merge) {
-		snap->ti->error = "A snapshot is already merging.";
-		return -EINVAL;
-	}
-
-	if (!snap_src->store->type->prepare_merge ||
-	    !snap_src->store->type->commit_merge) {
-		snap->ti->error = "Snapshot exception store does not "
-				  "support snapshot-merge.";
-		return -EINVAL;
-	}
-
-	return 1;
-}
-
-static void __insert_snapshot(struct origin *o, struct dm_snapshot *s)
-{
-	struct dm_snapshot *l;
-
-	/* Sort the list according to chunk size, largest-first smallest-last */
-	list_for_each_entry(l, &o->snapshots, list)
-		if (l->store->chunk_size < s->store->chunk_size)
-			break;
-	list_add_tail(&s->list, &l->list);
-}
-
-/*
- * Make a note of the snapshot and its origin so we can look it
- * up when the origin has a write on it.
- *
- * Also validate snapshot exception store handovers.
- * On success, returns 1 if this registration is a handover destination,
- * otherwise returns 0.
- */
-static int register_snapshot(struct dm_snapshot *snap)
-{
-	struct origin *o, *new_o = NULL;
-	struct block_device *bdev = snap->origin->bdev;
-	int r = 0;
-
-	new_o = kmalloc(sizeof(*new_o), GFP_KERNEL);
-	if (!new_o)
-		return -ENOMEM;
-
-	down_write(&_origins_lock);
-
-	r = __validate_exception_handover(snap);
-	if (r < 0) {
-		kfree(new_o);
-		goto out;
-	}
-
-	o = __lookup_origin(bdev);
-	if (o)
-		kfree(new_o);
-	else {
-		/* New origin */
-		o = new_o;
-
-		/* Initialise the struct */
-		INIT_LIST_HEAD(&o->snapshots);
-		o->bdev = bdev;
-
-		__insert_origin(o);
-	}
-
-	__insert_snapshot(o, snap);
-
-out:
-	up_write(&_origins_lock);
-
-	return r;
-}
-
-/*
- * Move snapshot to correct place in list according to chunk size.
- */
-static void reregister_snapshot(struct dm_snapshot *s)
-{
-	struct block_device *bdev = s->origin->bdev;
-
-	down_write(&_origins_lock);
-
-	list_del(&s->list);
-	__insert_snapshot(__lookup_origin(bdev), s);
-
-	up_write(&_origins_lock);
-}
-
-static void unregister_snapshot(struct dm_snapshot *s)
-{
-	struct origin *o;
-
-	down_write(&_origins_lock);
-	o = __lookup_origin(s->origin->bdev);
-
-	list_del(&s->list);
-	if (o && list_empty(&o->snapshots)) {
-		list_del(&o->hash_list);
-		kfree(o);
-	}
-
-	up_write(&_origins_lock);
-}
-
-/*
- * Implementation of the exception hash tables.
- * The lowest hash_shift bits of the chunk number are ignored, allowing
- * some consecutive chunks to be grouped together.
- */
-static int dm_exception_table_init(struct dm_exception_table *et,
-				   uint32_t size, unsigned hash_shift)
-{
-	unsigned int i;
-
-	et->hash_shift = hash_shift;
-	et->hash_mask = size - 1;
-	et->table = dm_vcalloc(size, sizeof(struct list_head));
-	if (!et->table)
-		return -ENOMEM;
-
-	for (i = 0; i < size; i++)
-		INIT_LIST_HEAD(et->table + i);
-
-	return 0;
-}
-
-static void dm_exception_table_exit(struct dm_exception_table *et,
-				    struct kmem_cache *mem)
-{
-	struct list_head *slot;
-	struct dm_exception *ex, *next;
-	int i, size;
-
-	size = et->hash_mask + 1;
-	for (i = 0; i < size; i++) {
-		slot = et->table + i;
-
-		list_for_each_entry_safe (ex, next, slot, hash_list)
-			kmem_cache_free(mem, ex);
-	}
-
-	vfree(et->table);
-}
-
-static uint32_t exception_hash(struct dm_exception_table *et, chunk_t chunk)
-{
-	return (chunk >> et->hash_shift) & et->hash_mask;
-}
-
-static void dm_remove_exception(struct dm_exception *e)
-{
-	list_del(&e->hash_list);
-}
-
-/*
- * Return the exception data for a sector, or NULL if not
- * remapped.
- */
-static struct dm_exception *dm_lookup_exception(struct dm_exception_table *et,
-						chunk_t chunk)
-{
-	struct list_head *slot;
-	struct dm_exception *e;
-
-	slot = &et->table[exception_hash(et, chunk)];
-	list_for_each_entry (e, slot, hash_list)
-		if (chunk >= e->old_chunk &&
-		    chunk <= e->old_chunk + dm_consecutive_chunk_count(e))
-			return e;
-
-	return NULL;
-}
-
-static struct dm_exception *alloc_completed_exception(void)
-{
-	struct dm_exception *e;
-
-	e = kmem_cache_alloc(exception_cache, GFP_NOIO);
-	if (!e)
-		e = kmem_cache_alloc(exception_cache, GFP_ATOMIC);
-
-	return e;
-}
-
-static void free_completed_exception(struct dm_exception *e)
-{
-	kmem_cache_free(exception_cache, e);
-}
-
-static struct dm_snap_pending_exception *alloc_pending_exception(struct dm_snapshot *s)
-{
-	struct dm_snap_pending_exception *pe = mempool_alloc(s->pending_pool,
-							     GFP_NOIO);
-
-	atomic_inc(&s->pending_exceptions_count);
-	pe->snap = s;
-
-	return pe;
-}
-
-static void free_pending_exception(struct dm_snap_pending_exception *pe)
-{
-	struct dm_snapshot *s = pe->snap;
-
-	mempool_free(pe, s->pending_pool);
-	smp_mb__before_atomic_dec();
-	atomic_dec(&s->pending_exceptions_count);
-}
-
-static void dm_insert_exception(struct dm_exception_table *eh,
-				struct dm_exception *new_e)
-{
-	struct list_head *l;
-	struct dm_exception *e = NULL;
-
-	l = &eh->table[exception_hash(eh, new_e->old_chunk)];
-
-	/* Add immediately if this table doesn't support consecutive chunks */
-	if (!eh->hash_shift)
-		goto out;
-
-	/* List is ordered by old_chunk */
-	list_for_each_entry_reverse(e, l, hash_list) {
-		/* Insert after an existing chunk? */
-		if (new_e->old_chunk == (e->old_chunk +
-					 dm_consecutive_chunk_count(e) + 1) &&
-		    new_e->new_chunk == (dm_chunk_number(e->new_chunk) +
-					 dm_consecutive_chunk_count(e) + 1)) {
-			dm_consecutive_chunk_count_inc(e);
-			free_completed_exception(new_e);
-			return;
-		}
-
-		/* Insert before an existing chunk? */
-		if (new_e->old_chunk == (e->old_chunk - 1) &&
-		    new_e->new_chunk == (dm_chunk_number(e->new_chunk) - 1)) {
-			dm_consecutive_chunk_count_inc(e);
-			e->old_chunk--;
-			e->new_chunk--;
-			free_completed_exception(new_e);
-			return;
-		}
-
-		if (new_e->old_chunk > e->old_chunk)
-			break;
-	}
-
-out:
-	list_add(&new_e->hash_list, e ? &e->hash_list : l);
-}
-
-/*
- * Callback used by the exception stores to load exceptions when
- * initialising.
- */
-static int dm_add_exception(void *context, chunk_t old, chunk_t new)
-{
-	struct dm_snapshot *s = context;
-	struct dm_exception *e;
-
-	e = alloc_completed_exception();
-	if (!e)
-		return -ENOMEM;
-
-	e->old_chunk = old;
-
-	/* Consecutive_count is implicitly initialised to zero */
-	e->new_chunk = new;
-
-	dm_insert_exception(&s->complete, e);
-
-	return 0;
-}
-
-/*
- * Return a minimum chunk size of all snapshots that have the specified origin.
- * Return zero if the origin has no snapshots.
- */
-static sector_t __minimum_chunk_size(struct origin *o)
-{
-	struct dm_snapshot *snap;
-	unsigned chunk_size = 0;
-
-	if (o)
-		list_for_each_entry(snap, &o->snapshots, list)
-			chunk_size = min_not_zero(chunk_size,
-						  snap->store->chunk_size);
-
-	return chunk_size;
-}
-
-/*
- * Hard coded magic.
- */
-static int calc_max_buckets(void)
-{
-	/* use a fixed size of 2MB */
-	unsigned long mem = 2 * 1024 * 1024;
-	mem /= sizeof(struct list_head);
-
-	return mem;
-}
-
-/*
- * Allocate room for a suitable hash table.
- */
-static int init_hash_tables(struct dm_snapshot *s)
-{
-	sector_t hash_size, cow_dev_size, origin_dev_size, max_buckets;
-
-	/*
-	 * Calculate based on the size of the original volume or
-	 * the COW volume...
-	 */
-	cow_dev_size = get_dev_size(s->cow->bdev);
-	origin_dev_size = get_dev_size(s->origin->bdev);
-	max_buckets = calc_max_buckets();
-
-	hash_size = min(origin_dev_size, cow_dev_size) >> s->store->chunk_shift;
-	hash_size = min(hash_size, max_buckets);
-
-	if (hash_size < 64)
-		hash_size = 64;
-	hash_size = rounddown_pow_of_two(hash_size);
-	if (dm_exception_table_init(&s->complete, hash_size,
-				    DM_CHUNK_CONSECUTIVE_BITS))
-		return -ENOMEM;
-
-	/*
-	 * Allocate hash table for in-flight exceptions
-	 * Make this smaller than the real hash table
-	 */
-	hash_size >>= 3;
-	if (hash_size < 64)
-		hash_size = 64;
-
-	if (dm_exception_table_init(&s->pending, hash_size, 0)) {
-		dm_exception_table_exit(&s->complete, exception_cache);
-		return -ENOMEM;
-	}
-
-	return 0;
-}
-
-static void merge_shutdown(struct dm_snapshot *s)
-{
-	clear_bit_unlock(RUNNING_MERGE, &s->state_bits);
-	smp_mb__after_clear_bit();
-	wake_up_bit(&s->state_bits, RUNNING_MERGE);
-}
-
-static struct bio *__release_queued_bios_after_merge(struct dm_snapshot *s)
-{
-	s->first_merging_chunk = 0;
-	s->num_merging_chunks = 0;
-
-	return bio_list_get(&s->bios_queued_during_merge);
-}
-
-/*
- * Remove one chunk from the index of completed exceptions.
- */
-static int __remove_single_exception_chunk(struct dm_snapshot *s,
-					   chunk_t old_chunk)
-{
-	struct dm_exception *e;
-
-	e = dm_lookup_exception(&s->complete, old_chunk);
-	if (!e) {
-		DMERR("Corruption detected: exception for block %llu is "
-		      "on disk but not in memory",
-		      (unsigned long long)old_chunk);
-		return -EINVAL;
-	}
-
-	/*
-	 * If this is the only chunk using this exception, remove exception.
-	 */
-	if (!dm_consecutive_chunk_count(e)) {
-		dm_remove_exception(e);
-		free_completed_exception(e);
-		return 0;
-	}
-
-	/*
-	 * The chunk may be either at the beginning or the end of a
-	 * group of consecutive chunks - never in the middle.  We are
-	 * removing chunks in the opposite order to that in which they
-	 * were added, so this should always be true.
-	 * Decrement the consecutive chunk counter and adjust the
-	 * starting point if necessary.
-	 */
-	if (old_chunk == e->old_chunk) {
-		e->old_chunk++;
-		e->new_chunk++;
-	} else if (old_chunk != e->old_chunk +
-		   dm_consecutive_chunk_count(e)) {
-		DMERR("Attempt to merge block %llu from the "
-		      "middle of a chunk range [%llu - %llu]",
-		      (unsigned long long)old_chunk,
-		      (unsigned long long)e->old_chunk,
-		      (unsigned long long)
-		      e->old_chunk + dm_consecutive_chunk_count(e));
-		return -EINVAL;
-	}
-
-	dm_consecutive_chunk_count_dec(e);
-
-	return 0;
-}
-
-static void flush_bios(struct bio *bio);
-
-static int remove_single_exception_chunk(struct dm_snapshot *s)
-{
-	struct bio *b = NULL;
-	int r;
-	chunk_t old_chunk = s->first_merging_chunk + s->num_merging_chunks - 1;
-
-	down_write(&s->lock);
-
-	/*
-	 * Process chunks (and associated exceptions) in reverse order
-	 * so that dm_consecutive_chunk_count_dec() accounting works.
-	 */
-	do {
-		r = __remove_single_exception_chunk(s, old_chunk);
-		if (r)
-			goto out;
-	} while (old_chunk-- > s->first_merging_chunk);
-
-	b = __release_queued_bios_after_merge(s);
-
-out:
-	up_write(&s->lock);
-	if (b)
-		flush_bios(b);
-
-	return r;
-}
-
-static int origin_write_extent(struct dm_snapshot *merging_snap,
-			       sector_t sector, unsigned chunk_size);
-
-static void merge_callback(int read_err, unsigned long write_err,
-			   void *context);
-
-static uint64_t read_pending_exceptions_done_count(void)
-{
-	uint64_t pending_exceptions_done;
-
-	spin_lock(&_pending_exceptions_done_spinlock);
-	pending_exceptions_done = _pending_exceptions_done_count;
-	spin_unlock(&_pending_exceptions_done_spinlock);
-
-	return pending_exceptions_done;
-}
-
-static void increment_pending_exceptions_done_count(void)
-{
-	spin_lock(&_pending_exceptions_done_spinlock);
-	_pending_exceptions_done_count++;
-	spin_unlock(&_pending_exceptions_done_spinlock);
-
-	wake_up_all(&_pending_exceptions_done);
-}
-
-static void snapshot_merge_next_chunks(struct dm_snapshot *s)
-{
-	int i, linear_chunks;
-	chunk_t old_chunk, new_chunk;
-	struct dm_io_region src, dest;
-	sector_t io_size;
-	uint64_t previous_count;
-
-	BUG_ON(!test_bit(RUNNING_MERGE, &s->state_bits));
-	if (unlikely(test_bit(SHUTDOWN_MERGE, &s->state_bits)))
-		goto shut;
-
-	/*
-	 * valid flag never changes during merge, so no lock required.
-	 */
-	if (!s->valid) {
-		DMERR("Snapshot is invalid: can't merge");
-		goto shut;
-	}
-
-	linear_chunks = s->store->type->prepare_merge(s->store, &old_chunk,
-						      &new_chunk);
-	if (linear_chunks <= 0) {
-		if (linear_chunks < 0) {
-			DMERR("Read error in exception store: "
-			      "shutting down merge");
-			down_write(&s->lock);
-			s->merge_failed = 1;
-			up_write(&s->lock);
-		}
-		goto shut;
-	}
-
-	/* Adjust old_chunk and new_chunk to reflect start of linear region */
-	old_chunk = old_chunk + 1 - linear_chunks;
-	new_chunk = new_chunk + 1 - linear_chunks;
-
-	/*
-	 * Use one (potentially large) I/O to copy all 'linear_chunks'
-	 * from the exception store to the origin
-	 */
-	io_size = linear_chunks * s->store->chunk_size;
-
-	dest.bdev = s->origin->bdev;
-	dest.sector = chunk_to_sector(s->store, old_chunk);
-	dest.count = min(io_size, get_dev_size(dest.bdev) - dest.sector);
-
-	src.bdev = s->cow->bdev;
-	src.sector = chunk_to_sector(s->store, new_chunk);
-	src.count = dest.count;
-
-	/*
-	 * Reallocate any exceptions needed in other snapshots then
-	 * wait for the pending exceptions to complete.
-	 * Each time any pending exception (globally on the system)
-	 * completes we are woken and repeat the process to find out
-	 * if we can proceed.  While this may not seem a particularly
-	 * efficient algorithm, it is not expected to have any
-	 * significant impact on performance.
-	 */
-	previous_count = read_pending_exceptions_done_count();
-	while (origin_write_extent(s, dest.sector, io_size)) {
-		wait_event(_pending_exceptions_done,
-			   (read_pending_exceptions_done_count() !=
-			    previous_count));
-		/* Retry after the wait, until all exceptions are done. */
-		previous_count = read_pending_exceptions_done_count();
-	}
-
-	down_write(&s->lock);
-	s->first_merging_chunk = old_chunk;
-	s->num_merging_chunks = linear_chunks;
-	up_write(&s->lock);
-
-	/* Wait until writes to all 'linear_chunks' drain */
-	for (i = 0; i < linear_chunks; i++)
-		__check_for_conflicting_io(s, old_chunk + i);
-
-	dm_kcopyd_copy(s->kcopyd_client, &src, 1, &dest, 0, merge_callback, s);
-	return;
-
-shut:
-	merge_shutdown(s);
-}
-
-static void error_bios(struct bio *bio);
-
-static void merge_callback(int read_err, unsigned long write_err, void *context)
-{
-	struct dm_snapshot *s = context;
-	struct bio *b = NULL;
-
-	if (read_err || write_err) {
-		if (read_err)
-			DMERR("Read error: shutting down merge.");
-		else
-			DMERR("Write error: shutting down merge.");
-		goto shut;
-	}
-
-	if (s->store->type->commit_merge(s->store,
-					 s->num_merging_chunks) < 0) {
-		DMERR("Write error in exception store: shutting down merge");
-		goto shut;
-	}
-
-	if (remove_single_exception_chunk(s) < 0)
-		goto shut;
-
-	snapshot_merge_next_chunks(s);
-
-	return;
-
-shut:
-	down_write(&s->lock);
-	s->merge_failed = 1;
-	b = __release_queued_bios_after_merge(s);
-	up_write(&s->lock);
-	error_bios(b);
-
-	merge_shutdown(s);
-}
-
-static void start_merge(struct dm_snapshot *s)
-{
-	if (!test_and_set_bit(RUNNING_MERGE, &s->state_bits))
-		snapshot_merge_next_chunks(s);
-}
-
-static int wait_schedule(void *ptr)
-{
-	schedule();
-
-	return 0;
-}
-
-/*
- * Stop the merging process and wait until it finishes.
- */
-static void stop_merge(struct dm_snapshot *s)
-{
-	set_bit(SHUTDOWN_MERGE, &s->state_bits);
-	wait_on_bit(&s->state_bits, RUNNING_MERGE, wait_schedule,
-		    TASK_UNINTERRUPTIBLE);
-	clear_bit(SHUTDOWN_MERGE, &s->state_bits);
-}
-
-/*
- * Construct a snapshot mapping: <origin_dev> <COW-dev> <p/n> <chunk-size>
- */
-static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
-{
-	struct dm_snapshot *s;
-	int i;
-	int r = -EINVAL;
-	char *origin_path, *cow_path;
-	unsigned args_used, num_flush_requests = 1;
-	fmode_t origin_mode = FMODE_READ;
-
-	if (argc != 4) {
-		ti->error = "requires exactly 4 arguments";
-		r = -EINVAL;
-		goto bad;
-	}
-
-	if (dm_target_is_snapshot_merge(ti)) {
-		num_flush_requests = 2;
-		origin_mode = FMODE_WRITE;
-	}
-
-	s = kmalloc(sizeof(*s), GFP_KERNEL);
-	if (!s) {
-		ti->error = "Cannot allocate private snapshot structure";
-		r = -ENOMEM;
-		goto bad;
-	}
-
-	origin_path = argv[0];
-	argv++;
-	argc--;
-
-	r = dm_get_device(ti, origin_path, origin_mode, &s->origin);
-	if (r) {
-		ti->error = "Cannot get origin device";
-		goto bad_origin;
-	}
-
-	cow_path = argv[0];
-	argv++;
-	argc--;
-
-	r = dm_get_device(ti, cow_path, dm_table_get_mode(ti->table), &s->cow);
-	if (r) {
-		ti->error = "Cannot get COW device";
-		goto bad_cow;
-	}
-
-	r = dm_exception_store_create(ti, argc, argv, s, &args_used, &s->store);
-	if (r) {
-		ti->error = "Couldn't create exception store";
-		r = -EINVAL;
-		goto bad_store;
-	}
-
-	argv += args_used;
-	argc -= args_used;
-
-	s->ti = ti;
-	s->valid = 1;
-	s->active = 0;
-	atomic_set(&s->pending_exceptions_count, 0);
-	init_rwsem(&s->lock);
-	INIT_LIST_HEAD(&s->list);
-	spin_lock_init(&s->pe_lock);
-	s->state_bits = 0;
-	s->merge_failed = 0;
-	s->first_merging_chunk = 0;
-	s->num_merging_chunks = 0;
-	bio_list_init(&s->bios_queued_during_merge);
-
-	/* Allocate hash table for COW data */
-	if (init_hash_tables(s)) {
-		ti->error = "Unable to allocate hash table space";
-		r = -ENOMEM;
-		goto bad_hash_tables;
-	}
-
-	s->kcopyd_client = dm_kcopyd_client_create();
-	if (IS_ERR(s->kcopyd_client)) {
-		r = PTR_ERR(s->kcopyd_client);
-		ti->error = "Could not create kcopyd client";
-		goto bad_kcopyd;
-	}
-
-	s->pending_pool = mempool_create_slab_pool(MIN_IOS, pending_cache);
-	if (!s->pending_pool) {
-		ti->error = "Could not allocate mempool for pending exceptions";
-		goto bad_pending_pool;
-	}
-
-	s->tracked_chunk_pool = mempool_create_slab_pool(MIN_IOS,
-							 tracked_chunk_cache);
-	if (!s->tracked_chunk_pool) {
-		ti->error = "Could not allocate tracked_chunk mempool for "
-			    "tracking reads";
-		goto bad_tracked_chunk_pool;
-	}
-
-	for (i = 0; i < DM_TRACKED_CHUNK_HASH_SIZE; i++)
-		INIT_HLIST_HEAD(&s->tracked_chunk_hash[i]);
-
-	spin_lock_init(&s->tracked_chunk_lock);
-
-	ti->private = s;
-	ti->num_flush_requests = num_flush_requests;
-
-	/* Add snapshot to the list of snapshots for this origin */
-	/* Exceptions aren't triggered till snapshot_resume() is called */
-	r = register_snapshot(s);
-	if (r == -ENOMEM) {
-		ti->error = "Snapshot origin struct allocation failed";
-		goto bad_load_and_register;
-	} else if (r < 0) {
-		/* invalid handover, register_snapshot has set ti->error */
-		goto bad_load_and_register;
-	}
-
-	/*
-	 * Metadata must only be loaded into one table at once, so skip this
-	 * if metadata will be handed over during resume.
-	 * Chunk size will be set during the handover - set it to zero to
-	 * ensure it's ignored.
-	 */
-	if (r > 0) {
-		s->store->chunk_size = 0;
-		return 0;
-	}
-
-	r = s->store->type->read_metadata(s->store, dm_add_exception,
-					  (void *)s);
-	if (r < 0) {
-		ti->error = "Failed to read snapshot metadata";
-		goto bad_read_metadata;
-	} else if (r > 0) {
-		s->valid = 0;
-		DMWARN("Snapshot is marked invalid.");
-	}
-
-	if (!s->store->chunk_size) {
-		ti->error = "Chunk size not set";
-		goto bad_read_metadata;
-	}
-	ti->split_io = s->store->chunk_size;
-
-	return 0;
-
-bad_read_metadata:
-	unregister_snapshot(s);
-
-bad_load_and_register:
-	mempool_destroy(s->tracked_chunk_pool);
-
-bad_tracked_chunk_pool:
-	mempool_destroy(s->pending_pool);
-
-bad_pending_pool:
-	dm_kcopyd_client_destroy(s->kcopyd_client);
-
-bad_kcopyd:
-	dm_exception_table_exit(&s->pending, pending_cache);
-	dm_exception_table_exit(&s->complete, exception_cache);
-
-bad_hash_tables:
-	dm_exception_store_destroy(s->store);
-
-bad_store:
-	dm_put_device(ti, s->cow);
-
-bad_cow:
-	dm_put_device(ti, s->origin);
-
-bad_origin:
-	kfree(s);
-
-bad:
-	return r;
-}
-
-static void __free_exceptions(struct dm_snapshot *s)
-{
-	dm_kcopyd_client_destroy(s->kcopyd_client);
-	s->kcopyd_client = NULL;
-
-	dm_exception_table_exit(&s->pending, pending_cache);
-	dm_exception_table_exit(&s->complete, exception_cache);
-}
-
-static void __handover_exceptions(struct dm_snapshot *snap_src,
-				  struct dm_snapshot *snap_dest)
-{
-	union {
-		struct dm_exception_table table_swap;
-		struct dm_exception_store *store_swap;
-	} u;
-
-	/*
-	 * Swap all snapshot context information between the two instances.
-	 */
-	u.table_swap = snap_dest->complete;
-	snap_dest->complete = snap_src->complete;
-	snap_src->complete = u.table_swap;
-
-	u.store_swap = snap_dest->store;
-	snap_dest->store = snap_src->store;
-	snap_src->store = u.store_swap;
-
-	snap_dest->store->snap = snap_dest;
-	snap_src->store->snap = snap_src;
-
-	snap_dest->ti->split_io = snap_dest->store->chunk_size;
-	snap_dest->valid = snap_src->valid;
-
-	/*
-	 * Set source invalid to ensure it receives no further I/O.
-	 */
-	snap_src->valid = 0;
-}
-
-static void snapshot_dtr(struct dm_target *ti)
-{
-#ifdef CONFIG_DM_DEBUG
-	int i;
-#endif
-	struct dm_snapshot *s = ti->private;
-	struct dm_snapshot *snap_src = NULL, *snap_dest = NULL;
-
-	down_read(&_origins_lock);
-	/* Check whether exception handover must be cancelled */
-	(void) __find_snapshots_sharing_cow(s, &snap_src, &snap_dest, NULL);
-	if (snap_src && snap_dest && (s == snap_src)) {
-		down_write(&snap_dest->lock);
-		snap_dest->valid = 0;
-		up_write(&snap_dest->lock);
-		DMERR("Cancelling snapshot handover.");
-	}
-	up_read(&_origins_lock);
-
-	if (dm_target_is_snapshot_merge(ti))
-		stop_merge(s);
-
-	/* Prevent further origin writes from using this snapshot. */
-	/* After this returns there can be no new kcopyd jobs. */
-	unregister_snapshot(s);
-
-	while (atomic_read(&s->pending_exceptions_count))
-		msleep(1);
-	/*
-	 * Ensure instructions in mempool_destroy aren't reordered
-	 * before atomic_read.
-	 */
-	smp_mb();
-
-#ifdef CONFIG_DM_DEBUG
-	for (i = 0; i < DM_TRACKED_CHUNK_HASH_SIZE; i++)
-		BUG_ON(!hlist_empty(&s->tracked_chunk_hash[i]));
-#endif
-
-	mempool_destroy(s->tracked_chunk_pool);
-
-	__free_exceptions(s);
-
-	mempool_destroy(s->pending_pool);
-
-	dm_exception_store_destroy(s->store);
-
-	dm_put_device(ti, s->cow);
-
-	dm_put_device(ti, s->origin);
-
-	kfree(s);
-}
-
-/*
- * Flush a list of buffers.
- */
-static void flush_bios(struct bio *bio)
-{
-	struct bio *n;
-
-	while (bio) {
-		n = bio->bi_next;
-		bio->bi_next = NULL;
-		generic_make_request(bio);
-		bio = n;
-	}
-}
-
-static int do_origin(struct dm_dev *origin, struct bio *bio);
-
-/*
- * Flush a list of buffers.
- */
-static void retry_origin_bios(struct dm_snapshot *s, struct bio *bio)
-{
-	struct bio *n;
-	int r;
-
-	while (bio) {
-		n = bio->bi_next;
-		bio->bi_next = NULL;
-		r = do_origin(s->origin, bio);
-		if (r == DM_MAPIO_REMAPPED)
-			generic_make_request(bio);
-		bio = n;
-	}
-}
-
-/*
- * Error a list of buffers.
- */
-static void error_bios(struct bio *bio)
-{
-	struct bio *n;
-
-	while (bio) {
-		n = bio->bi_next;
-		bio->bi_next = NULL;
-		bio_io_error(bio);
-		bio = n;
-	}
-}
-
-static void __invalidate_snapshot(struct dm_snapshot *s, int err)
-{
-	if (!s->valid)
-		return;
-
-	if (err == -EIO)
-		DMERR("Invalidating snapshot: Error reading/writing.");
-	else if (err == -ENOMEM)
-		DMERR("Invalidating snapshot: Unable to allocate exception.");
-
-	if (s->store->type->drop_snapshot)
-		s->store->type->drop_snapshot(s->store);
-
-	s->valid = 0;
-
-	dm_table_event(s->ti->table);
-}
-
-static void pending_complete(struct dm_snap_pending_exception *pe, int success)
-{
-	struct dm_exception *e;
-	struct dm_snapshot *s = pe->snap;
-	struct bio *origin_bios = NULL;
-	struct bio *snapshot_bios = NULL;
-	struct bio *full_bio = NULL;
-	int error = 0;
-
-	if (!success) {
-		/* Read/write error - snapshot is unusable */
-		down_write(&s->lock);
-		__invalidate_snapshot(s, -EIO);
-		error = 1;
-		goto out;
-	}
-
-	e = alloc_completed_exception();
-	if (!e) {
-		down_write(&s->lock);
-		__invalidate_snapshot(s, -ENOMEM);
-		error = 1;
-		goto out;
-	}
-	*e = pe->e;
-
-	down_write(&s->lock);
-	if (!s->valid) {
-		free_completed_exception(e);
-		error = 1;
-		goto out;
-	}
-
-	/* Check for conflicting reads */
-	__check_for_conflicting_io(s, pe->e.old_chunk);
-
-	/*
-	 * Add a proper exception, and remove the
-	 * in-flight exception from the list.
-	 */
-	dm_insert_exception(&s->complete, e);
-
-out:
-	dm_remove_exception(&pe->e);
-	snapshot_bios = bio_list_get(&pe->snapshot_bios);
-	origin_bios = bio_list_get(&pe->origin_bios);
-	full_bio = pe->full_bio;
-	if (full_bio) {
-		full_bio->bi_end_io = pe->full_bio_end_io;
-		full_bio->bi_private = pe->full_bio_private;
-	}
-	free_pending_exception(pe);
-
-	increment_pending_exceptions_done_count();
-
-	up_write(&s->lock);
-
-	/* Submit any pending write bios */
-	if (error) {
-		if (full_bio)
-			bio_io_error(full_bio);
-		error_bios(snapshot_bios);
-	} else {
-		if (full_bio)
-			bio_endio(full_bio, 0);
-		flush_bios(snapshot_bios);
-	}
-
-	retry_origin_bios(s, origin_bios);
-}
-
-static void commit_callback(void *context, int success)
-{
-	struct dm_snap_pending_exception *pe = context;
-
-	pending_complete(pe, success);
-}
-
-/*
- * Called when the copy I/O has finished.  kcopyd actually runs
- * this code so don't block.
- */
-static void copy_callback(int read_err, unsigned long write_err, void *context)
-{
-	struct dm_snap_pending_exception *pe = context;
-	struct dm_snapshot *s = pe->snap;
-
-	if (read_err || write_err)
-		pending_complete(pe, 0);
-
-	else
-		/* Update the metadata if we are persistent */
-		s->store->type->commit_exception(s->store, &pe->e,
-						 commit_callback, pe);
-}
-
-/*
- * Dispatches the copy operation to kcopyd.
- */
-static void start_copy(struct dm_snap_pending_exception *pe)
-{
-	struct dm_snapshot *s = pe->snap;
-	struct dm_io_region src, dest;
-	struct block_device *bdev = s->origin->bdev;
-	sector_t dev_size;
-
-	dev_size = get_dev_size(bdev);
-
-	src.bdev = bdev;
-	src.sector = chunk_to_sector(s->store, pe->e.old_chunk);
-	src.count = min((sector_t)s->store->chunk_size, dev_size - src.sector);
-
-	dest.bdev = s->cow->bdev;
-	dest.sector = chunk_to_sector(s->store, pe->e.new_chunk);
-	dest.count = src.count;
-
-	/* Hand over to kcopyd */
-	dm_kcopyd_copy(s->kcopyd_client, &src, 1, &dest, 0, copy_callback, pe);
-}
-
-static void full_bio_end_io(struct bio *bio, int error)
-{
-	void *callback_data = bio->bi_private;
-
-	dm_kcopyd_do_callback(callback_data, 0, error ? 1 : 0);
-}
-
-static void start_full_bio(struct dm_snap_pending_exception *pe,
-			   struct bio *bio)
-{
-	struct dm_snapshot *s = pe->snap;
-	void *callback_data;
-
-	pe->full_bio = bio;
-	pe->full_bio_end_io = bio->bi_end_io;
-	pe->full_bio_private = bio->bi_private;
-
-	callback_data = dm_kcopyd_prepare_callback(s->kcopyd_client,
-						   copy_callback, pe);
-
-	bio->bi_end_io = full_bio_end_io;
-	bio->bi_private = callback_data;
-
-	generic_make_request(bio);
-}
-
-static struct dm_snap_pending_exception *
-__lookup_pending_exception(struct dm_snapshot *s, chunk_t chunk)
-{
-	struct dm_exception *e = dm_lookup_exception(&s->pending, chunk);
-
-	if (!e)
-		return NULL;
-
-	return container_of(e, struct dm_snap_pending_exception, e);
-}
-
-/*
- * Looks to see if this snapshot already has a pending exception
- * for this chunk, otherwise it allocates a new one and inserts
- * it into the pending table.
- *
- * NOTE: a write lock must be held on snap->lock before calling
- * this.
- */
-static struct dm_snap_pending_exception *
-__find_pending_exception(struct dm_snapshot *s,
-			 struct dm_snap_pending_exception *pe, chunk_t chunk)
-{
-	struct dm_snap_pending_exception *pe2;
-
-	pe2 = __lookup_pending_exception(s, chunk);
-	if (pe2) {
-		free_pending_exception(pe);
-		return pe2;
-	}
-
-	pe->e.old_chunk = chunk;
-	bio_list_init(&pe->origin_bios);
-	bio_list_init(&pe->snapshot_bios);
-	pe->started = 0;
-	pe->full_bio = NULL;
-
-	if (s->store->type->prepare_exception(s->store, &pe->e)) {
-		free_pending_exception(pe);
-		return NULL;
-	}
-
-	dm_insert_exception(&s->pending, &pe->e);
-
-	return pe;
-}
-
-static void remap_exception(struct dm_snapshot *s, struct dm_exception *e,
-			    struct bio *bio, chunk_t chunk)
-{
-	bio->bi_bdev = s->cow->bdev;
-	bio->bi_sector = chunk_to_sector(s->store,
-					 dm_chunk_number(e->new_chunk) +
-					 (chunk - e->old_chunk)) +
-					 (bio->bi_sector &
-					  s->store->chunk_mask);
-}
-
-static int snapshot_map(struct dm_target *ti, struct bio *bio,
-			union map_info *map_context)
-{
-	struct dm_exception *e;
-	struct dm_snapshot *s = ti->private;
-	int r = DM_MAPIO_REMAPPED;
-	chunk_t chunk;
-	struct dm_snap_pending_exception *pe = NULL;
-
-	if (bio->bi_rw & REQ_FLUSH) {
-		bio->bi_bdev = s->cow->bdev;
-		return DM_MAPIO_REMAPPED;
-	}
-
-	chunk = sector_to_chunk(s->store, bio->bi_sector);
-
-	/* Full snapshots are not usable */
-	/* To get here the table must be live so s->active is always set. */
-	if (!s->valid)
-		return -EIO;
-
-	/* FIXME: should only take write lock if we need
-	 * to copy an exception */
-	down_write(&s->lock);
-
-	if (!s->valid) {
-		r = -EIO;
-		goto out_unlock;
-	}
-
-	/* If the block is already remapped - use that, else remap it */
-	e = dm_lookup_exception(&s->complete, chunk);
-	if (e) {
-		remap_exception(s, e, bio, chunk);
-		goto out_unlock;
-	}
-
-	/*
-	 * Write to snapshot - higher level takes care of RW/RO
-	 * flags so we should only get this if we are
-	 * writeable.
-	 */
-	if (bio_rw(bio) == WRITE) {
-		pe = __lookup_pending_exception(s, chunk);
-		if (!pe) {
-			up_write(&s->lock);
-			pe = alloc_pending_exception(s);
-			down_write(&s->lock);
-
-			if (!s->valid) {
-				free_pending_exception(pe);
-				r = -EIO;
-				goto out_unlock;
-			}
-
-			e = dm_lookup_exception(&s->complete, chunk);
-			if (e) {
-				free_pending_exception(pe);
-				remap_exception(s, e, bio, chunk);
-				goto out_unlock;
-			}
-
-			pe = __find_pending_exception(s, pe, chunk);
-			if (!pe) {
-				__invalidate_snapshot(s, -ENOMEM);
-				r = -EIO;
-				goto out_unlock;
-			}
-		}
-
-		remap_exception(s, &pe->e, bio, chunk);
-
-		r = DM_MAPIO_SUBMITTED;
-
-		if (!pe->started &&
-		    bio->bi_size == (s->store->chunk_size << SECTOR_SHIFT)) {
-			pe->started = 1;
-			up_write(&s->lock);
-			start_full_bio(pe, bio);
-			goto out;
-		}
-
-		bio_list_add(&pe->snapshot_bios, bio);
-
-		if (!pe->started) {
-			/* this is protected by snap->lock */
-			pe->started = 1;
-			up_write(&s->lock);
-			start_copy(pe);
-			goto out;
-		}
-	} else {
-		bio->bi_bdev = s->origin->bdev;
-		map_context->ptr = track_chunk(s, chunk);
-	}
-
-out_unlock:
-	up_write(&s->lock);
-out:
-	return r;
-}
-
-/*
- * A snapshot-merge target behaves like a combination of a snapshot
- * target and a snapshot-origin target.  It only generates new
- * exceptions in other snapshots and not in the one that is being
- * merged.
- *
- * For each chunk, if there is an existing exception, it is used to
- * redirect I/O to the cow device.  Otherwise I/O is sent to the origin,
- * which in turn might generate exceptions in other snapshots.
- * If merging is currently taking place on the chunk in question, the
- * I/O is deferred by adding it to s->bios_queued_during_merge.
- */
-static int snapshot_merge_map(struct dm_target *ti, struct bio *bio,
-			      union map_info *map_context)
-{
-	struct dm_exception *e;
-	struct dm_snapshot *s = ti->private;
-	int r = DM_MAPIO_REMAPPED;
-	chunk_t chunk;
-
-	if (bio->bi_rw & REQ_FLUSH) {
-		if (!map_context->target_request_nr)
-			bio->bi_bdev = s->origin->bdev;
-		else
-			bio->bi_bdev = s->cow->bdev;
-		map_context->ptr = NULL;
-		return DM_MAPIO_REMAPPED;
-	}
-
-	chunk = sector_to_chunk(s->store, bio->bi_sector);
-
-	down_write(&s->lock);
-
-	/* Full merging snapshots are redirected to the origin */
-	if (!s->valid)
-		goto redirect_to_origin;
-
-	/* If the block is already remapped - use that */
-	e = dm_lookup_exception(&s->complete, chunk);
-	if (e) {
-		/* Queue writes overlapping with chunks being merged */
-		if (bio_rw(bio) == WRITE &&
-		    chunk >= s->first_merging_chunk &&
-		    chunk < (s->first_merging_chunk +
-			     s->num_merging_chunks)) {
-			bio->bi_bdev = s->origin->bdev;
-			bio_list_add(&s->bios_queued_during_merge, bio);
-			r = DM_MAPIO_SUBMITTED;
-			goto out_unlock;
-		}
-
-		remap_exception(s, e, bio, chunk);
-
-		if (bio_rw(bio) == WRITE)
-			map_context->ptr = track_chunk(s, chunk);
-		goto out_unlock;
-	}
-
-redirect_to_origin:
-	bio->bi_bdev = s->origin->bdev;
-
-	if (bio_rw(bio) == WRITE) {
-		up_write(&s->lock);
-		return do_origin(s->origin, bio);
-	}
-
-out_unlock:
-	up_write(&s->lock);
-
-	return r;
-}
-
-static int snapshot_end_io(struct dm_target *ti, struct bio *bio,
-			   int error, union map_info *map_context)
-{
-	struct dm_snapshot *s = ti->private;
-	struct dm_snap_tracked_chunk *c = map_context->ptr;
-
-	if (c)
-		stop_tracking_chunk(s, c);
-
-	return 0;
-}
-
-static void snapshot_merge_presuspend(struct dm_target *ti)
-{
-	struct dm_snapshot *s = ti->private;
-
-	stop_merge(s);
-}
-
-static int snapshot_preresume(struct dm_target *ti)
-{
-	int r = 0;
-	struct dm_snapshot *s = ti->private;
-	struct dm_snapshot *snap_src = NULL, *snap_dest = NULL;
-
-	down_read(&_origins_lock);
-	(void) __find_snapshots_sharing_cow(s, &snap_src, &snap_dest, NULL);
-	if (snap_src && snap_dest) {
-		down_read(&snap_src->lock);
-		if (s == snap_src) {
-			DMERR("Unable to resume snapshot source until "
-			      "handover completes.");
-			r = -EINVAL;
-		} else if (!dm_suspended(snap_src->ti)) {
-			DMERR("Unable to perform snapshot handover until "
-			      "source is suspended.");
-			r = -EINVAL;
-		}
-		up_read(&snap_src->lock);
-	}
-	up_read(&_origins_lock);
-
-	return r;
-}
-
-static void snapshot_resume(struct dm_target *ti)
-{
-	struct dm_snapshot *s = ti->private;
-	struct dm_snapshot *snap_src = NULL, *snap_dest = NULL;
-
-	down_read(&_origins_lock);
-	(void) __find_snapshots_sharing_cow(s, &snap_src, &snap_dest, NULL);
-	if (snap_src && snap_dest) {
-		down_write(&snap_src->lock);
-		down_write_nested(&snap_dest->lock, SINGLE_DEPTH_NESTING);
-		__handover_exceptions(snap_src, snap_dest);
-		up_write(&snap_dest->lock);
-		up_write(&snap_src->lock);
-	}
-	up_read(&_origins_lock);
-
-	/* Now we have correct chunk size, reregister */
-	reregister_snapshot(s);
-
-	down_write(&s->lock);
-	s->active = 1;
-	up_write(&s->lock);
-}
-
-static sector_t get_origin_minimum_chunksize(struct block_device *bdev)
-{
-	sector_t min_chunksize;
-
-	down_read(&_origins_lock);
-	min_chunksize = __minimum_chunk_size(__lookup_origin(bdev));
-	up_read(&_origins_lock);
-
-	return min_chunksize;
-}
-
-static void snapshot_merge_resume(struct dm_target *ti)
-{
-	struct dm_snapshot *s = ti->private;
-
-	/*
-	 * Handover exceptions from existing snapshot.
-	 */
-	snapshot_resume(ti);
-
-	/*
-	 * snapshot-merge acts as an origin, so set ti->split_io
-	 */
-	ti->split_io = get_origin_minimum_chunksize(s->origin->bdev);
-
-	start_merge(s);
-}
-
-static int snapshot_status(struct dm_target *ti, status_type_t type,
-			   char *result, unsigned int maxlen)
-{
-	unsigned sz = 0;
-	struct dm_snapshot *snap = ti->private;
-
-	switch (type) {
-	case STATUSTYPE_INFO:
-
-		down_write(&snap->lock);
-
-		if (!snap->valid)
-			DMEMIT("Invalid");
-		else if (snap->merge_failed)
-			DMEMIT("Merge failed");
-		else {
-			if (snap->store->type->usage) {
-				sector_t total_sectors, sectors_allocated,
-					 metadata_sectors;
-				snap->store->type->usage(snap->store,
-							 &total_sectors,
-							 &sectors_allocated,
-							 &metadata_sectors);
-				DMEMIT("%llu/%llu %llu",
-				       (unsigned long long)sectors_allocated,
-				       (unsigned long long)total_sectors,
-				       (unsigned long long)metadata_sectors);
-			}
-			else
-				DMEMIT("Unknown");
-		}
-
-		up_write(&snap->lock);
-
-		break;
-
-	case STATUSTYPE_TABLE:
-		/*
-		 * kdevname returns a static pointer so we need
-		 * to make private copies if the output is to
-		 * make sense.
-		 */
-		DMEMIT("%s %s", snap->origin->name, snap->cow->name);
-		snap->store->type->status(snap->store, type, result + sz,
-					  maxlen - sz);
-		break;
-	}
-
-	return 0;
-}
-
-static int snapshot_iterate_devices(struct dm_target *ti,
-				    iterate_devices_callout_fn fn, void *data)
-{
-	struct dm_snapshot *snap = ti->private;
-	int r;
-
-	r = fn(ti, snap->origin, 0, ti->len, data);
-
-	if (!r)
-		r = fn(ti, snap->cow, 0, get_dev_size(snap->cow->bdev), data);
-
-	return r;
-}
-
-
-/*-----------------------------------------------------------------
- * Origin methods
- *---------------------------------------------------------------*/
-
-/*
- * If no exceptions need creating, DM_MAPIO_REMAPPED is returned and any
- * supplied bio was ignored.  The caller may submit it immediately.
- * (No remapping actually occurs as the origin is always a direct linear
- * map.)
- *
- * If further exceptions are required, DM_MAPIO_SUBMITTED is returned
- * and any supplied bio is added to a list to be submitted once all
- * the necessary exceptions exist.
- */
-static int __origin_write(struct list_head *snapshots, sector_t sector,
-			  struct bio *bio)
-{
-	int r = DM_MAPIO_REMAPPED;
-	struct dm_snapshot *snap;
-	struct dm_exception *e;
-	struct dm_snap_pending_exception *pe;
-	struct dm_snap_pending_exception *pe_to_start_now = NULL;
-	struct dm_snap_pending_exception *pe_to_start_last = NULL;
-	chunk_t chunk;
-
-	/* Do all the snapshots on this origin */
-	list_for_each_entry (snap, snapshots, list) {
-		/*
-		 * Don't make new exceptions in a merging snapshot
-		 * because it has effectively been deleted
-		 */
-		if (dm_target_is_snapshot_merge(snap->ti))
-			continue;
-
-		down_write(&snap->lock);
-
-		/* Only deal with valid and active snapshots */
-		if (!snap->valid || !snap->active)
-			goto next_snapshot;
-
-		/* Nothing to do if writing beyond end of snapshot */
-		if (sector >= dm_table_get_size(snap->ti->table))
-			goto next_snapshot;
-
-		/*
-		 * Remember, different snapshots can have
-		 * different chunk sizes.
-		 */
-		chunk = sector_to_chunk(snap->store, sector);
-
-		/*
-		 * Check exception table to see if block
-		 * is already remapped in this snapshot
-		 * and trigger an exception if not.
-		 */
-		e = dm_lookup_exception(&snap->complete, chunk);
-		if (e)
-			goto next_snapshot;
-
-		pe = __lookup_pending_exception(snap, chunk);
-		if (!pe) {
-			up_write(&snap->lock);
-			pe = alloc_pending_exception(snap);
-			down_write(&snap->lock);
-
-			if (!snap->valid) {
-				free_pending_exception(pe);
-				goto next_snapshot;
-			}
-
-			e = dm_lookup_exception(&snap->complete, chunk);
-			if (e) {
-				free_pending_exception(pe);
-				goto next_snapshot;
-			}
-
-			pe = __find_pending_exception(snap, pe, chunk);
-			if (!pe) {
-				__invalidate_snapshot(snap, -ENOMEM);
-				goto next_snapshot;
-			}
-		}
-
-		r = DM_MAPIO_SUBMITTED;
-
-		/*
-		 * If an origin bio was supplied, queue it to wait for the
-		 * completion of this exception, and start this one last,
-		 * at the end of the function.
-		 */
-		if (bio) {
-			bio_list_add(&pe->origin_bios, bio);
-			bio = NULL;
-
-			if (!pe->started) {
-				pe->started = 1;
-				pe_to_start_last = pe;
-			}
-		}
-
-		if (!pe->started) {
-			pe->started = 1;
-			pe_to_start_now = pe;
-		}
-
-next_snapshot:
-		up_write(&snap->lock);
-
-		if (pe_to_start_now) {
-			start_copy(pe_to_start_now);
-			pe_to_start_now = NULL;
-		}
-	}
-
-	/*
-	 * Submit the exception against which the bio is queued last,
-	 * to give the other exceptions a head start.
-	 */
-	if (pe_to_start_last)
-		start_copy(pe_to_start_last);
-
-	return r;
-}
-
-/*
- * Called on a write from the origin driver.
- */
-static int do_origin(struct dm_dev *origin, struct bio *bio)
-{
-	struct origin *o;
-	int r = DM_MAPIO_REMAPPED;
-
-	down_read(&_origins_lock);
-	o = __lookup_origin(origin->bdev);
-	if (o)
-		r = __origin_write(&o->snapshots, bio->bi_sector, bio);
-	up_read(&_origins_lock);
-
-	return r;
-}
-
-/*
- * Trigger exceptions in all non-merging snapshots.
- *
- * The chunk size of the merging snapshot may be larger than the chunk
- * size of some other snapshot so we may need to reallocate multiple
- * chunks in other snapshots.
- *
- * We scan all the overlapping exceptions in the other snapshots.
- * Returns 1 if anything was reallocated and must be waited for,
- * otherwise returns 0.
- *
- * size must be a multiple of merging_snap's chunk_size.
- */
-static int origin_write_extent(struct dm_snapshot *merging_snap,
-			       sector_t sector, unsigned size)
-{
-	int must_wait = 0;
-	sector_t n;
-	struct origin *o;
-
-	/*
-	 * The origin's __minimum_chunk_size() got stored in split_io
-	 * by snapshot_merge_resume().
-	 */
-	down_read(&_origins_lock);
-	o = __lookup_origin(merging_snap->origin->bdev);
-	for (n = 0; n < size; n += merging_snap->ti->split_io)
-		if (__origin_write(&o->snapshots, sector + n, NULL) ==
-		    DM_MAPIO_SUBMITTED)
-			must_wait = 1;
-	up_read(&_origins_lock);
-
-	return must_wait;
-}
-
-/*
- * Origin: maps a linear range of a device, with hooks for snapshotting.
- */
-
-/*
- * Construct an origin mapping: <dev_path>
- * The context for an origin is merely a 'struct dm_dev *'
- * pointing to the real device.
- */
-static int origin_ctr(struct dm_target *ti, unsigned int argc, char **argv)
-{
-	int r;
-	struct dm_dev *dev;
-
-	if (argc != 1) {
-		ti->error = "origin: incorrect number of arguments";
-		return -EINVAL;
-	}
-
-	r = dm_get_device(ti, argv[0], dm_table_get_mode(ti->table), &dev);
-	if (r) {
-		ti->error = "Cannot get target device";
-		return r;
-	}
-
-	ti->private = dev;
-	ti->num_flush_requests = 1;
-
-	return 0;
-}
-
-static void origin_dtr(struct dm_target *ti)
-{
-	struct dm_dev *dev = ti->private;
-	dm_put_device(ti, dev);
-}
-
-static int origin_map(struct dm_target *ti, struct bio *bio,
-		      union map_info *map_context)
-{
-	struct dm_dev *dev = ti->private;
-	bio->bi_bdev = dev->bdev;
-
-	if (bio->bi_rw & REQ_FLUSH)
-		return DM_MAPIO_REMAPPED;
-
-	/* Only tell snapshots if this is a write */
-	return (bio_rw(bio) == WRITE) ? do_origin(dev, bio) : DM_MAPIO_REMAPPED;
-}
-
-/*
- * Set the target "split_io" field to the minimum of all the snapshots'
- * chunk sizes.
- */
-static void origin_resume(struct dm_target *ti)
-{
-	struct dm_dev *dev = ti->private;
-
-	ti->split_io = get_origin_minimum_chunksize(dev->bdev);
-}
-
-static int origin_status(struct dm_target *ti, status_type_t type, char *result,
-			 unsigned int maxlen)
-{
-	struct dm_dev *dev = ti->private;
-
-	switch (type) {
-	case STATUSTYPE_INFO:
-		result[0] = '\0';
-		break;
-
-	case STATUSTYPE_TABLE:
-		snprintf(result, maxlen, "%s", dev->name);
-		break;
-	}
-
-	return 0;
-}
-
-static int origin_merge(struct dm_target *ti, struct bvec_merge_data *bvm,
-			struct bio_vec *biovec, int max_size)
-{
-	struct dm_dev *dev = ti->private;
-	struct request_queue *q = bdev_get_queue(dev->bdev);
-
-	if (!q->merge_bvec_fn)
-		return max_size;
-
-	bvm->bi_bdev = dev->bdev;
-	bvm->bi_sector = bvm->bi_sector;
-
-	return min(max_size, q->merge_bvec_fn(q, bvm, biovec));
-}
-
-static int origin_iterate_devices(struct dm_target *ti,
-				  iterate_devices_callout_fn fn, void *data)
-{
-	struct dm_dev *dev = ti->private;
-
-	return fn(ti, dev, 0, ti->len, data);
-}
-
-static struct target_type origin_target = {
-	.name    = "snapshot-origin",
-	.version = {1, 7, 1},
-	.module  = THIS_MODULE,
-	.ctr     = origin_ctr,
-	.dtr     = origin_dtr,
-	.map     = origin_map,
-	.resume  = origin_resume,
-	.status  = origin_status,
-	.merge	 = origin_merge,
-	.iterate_devices = origin_iterate_devices,
-};
-
-static struct target_type snapshot_target = {
-	.name    = "snapshot",
-	.version = {1, 10, 0},
-	.module  = THIS_MODULE,
-	.ctr     = snapshot_ctr,
-	.dtr     = snapshot_dtr,
-	.map     = snapshot_map,
-	.end_io  = snapshot_end_io,
-	.preresume  = snapshot_preresume,
-	.resume  = snapshot_resume,
-	.status  = snapshot_status,
-	.iterate_devices = snapshot_iterate_devices,
-};
-
-static struct target_type merge_target = {
-	.name    = dm_snapshot_merge_target_name,
-	.version = {1, 1, 0},
-	.module  = THIS_MODULE,
-	.ctr     = snapshot_ctr,
-	.dtr     = snapshot_dtr,
-	.map     = snapshot_merge_map,
-	.end_io  = snapshot_end_io,
-	.presuspend = snapshot_merge_presuspend,
-	.preresume  = snapshot_preresume,
-	.resume  = snapshot_merge_resume,
-	.status  = snapshot_status,
-	.iterate_devices = snapshot_iterate_devices,
-};
-
-static int __init dm_snapshot_init(void)
-{
-	int r;
-
-	r = dm_exception_store_init();
-	if (r) {
-		DMERR("Failed to initialize exception stores");
-		return r;
-	}
-
-	r = dm_register_target(&snapshot_target);
-	if (r < 0) {
-		DMERR("snapshot target register failed %d", r);
-		goto bad_register_snapshot_target;
-	}
-
-	r = dm_register_target(&origin_target);
-	if (r < 0) {
-		DMERR("Origin target register failed %d", r);
-		goto bad_register_origin_target;
-	}
-
-	r = dm_register_target(&merge_target);
-	if (r < 0) {
-		DMERR("Merge target register failed %d", r);
-		goto bad_register_merge_target;
-	}
-
-	r = init_origin_hash();
-	if (r) {
-		DMERR("init_origin_hash failed.");
-		goto bad_origin_hash;
-	}
-
-	exception_cache = KMEM_CACHE(dm_exception, 0);
-	if (!exception_cache) {
-		DMERR("Couldn't create exception cache.");
-		r = -ENOMEM;
-		goto bad_exception_cache;
-	}
-
-	pending_cache = KMEM_CACHE(dm_snap_pending_exception, 0);
-	if (!pending_cache) {
-		DMERR("Couldn't create pending cache.");
-		r = -ENOMEM;
-		goto bad_pending_cache;
-	}
-
-	tracked_chunk_cache = KMEM_CACHE(dm_snap_tracked_chunk, 0);
-	if (!tracked_chunk_cache) {
-		DMERR("Couldn't create cache to track chunks in use.");
-		r = -ENOMEM;
-		goto bad_tracked_chunk_cache;
-	}
-
-	return 0;
-
-bad_tracked_chunk_cache:
-	kmem_cache_destroy(pending_cache);
-bad_pending_cache:
-	kmem_cache_destroy(exception_cache);
-bad_exception_cache:
-	exit_origin_hash();
-bad_origin_hash:
-	dm_unregister_target(&merge_target);
-bad_register_merge_target:
-	dm_unregister_target(&origin_target);
-bad_register_origin_target:
-	dm_unregister_target(&snapshot_target);
-bad_register_snapshot_target:
-	dm_exception_store_exit();
-
-	return r;
-}
-
-static void __exit dm_snapshot_exit(void)
-{
-	dm_unregister_target(&snapshot_target);
-	dm_unregister_target(&origin_target);
-	dm_unregister_target(&merge_target);
-
-	exit_origin_hash();
-	kmem_cache_destroy(pending_cache);
-	kmem_cache_destroy(exception_cache);
-	kmem_cache_destroy(tracked_chunk_cache);
-
-	dm_exception_store_exit();
-}
-
-/* Module hooks */
-module_init(dm_snapshot_init);
-module_exit(dm_snapshot_exit);
-
-MODULE_DESCRIPTION(DM_NAME " snapshot target");
-MODULE_AUTHOR("Joe Thornber");
-MODULE_LICENSE("GPL");
diff --git a/ANDROID_3.4.5/drivers/md/dm-stripe.c b/ANDROID_3.4.5/drivers/md/dm-stripe.c
deleted file mode 100644
index 35c94ff2..00000000
--- a/ANDROID_3.4.5/drivers/md/dm-stripe.c
+++ /dev/null
@@ -1,450 +0,0 @@
-/*
- * Copyright (C) 2001-2003 Sistina Software (UK) Limited.
- *
- * This file is released under the GPL.
- */
-
-#include <linux/device-mapper.h>
-
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/blkdev.h>
-#include <linux/bio.h>
-#include <linux/slab.h>
-#include <linux/log2.h>
-
-#define DM_MSG_PREFIX "striped"
-#define DM_IO_ERROR_THRESHOLD 15
-
-struct stripe {
-	struct dm_dev *dev;
-	sector_t physical_start;
-
-	atomic_t error_count;
-};
-
-struct stripe_c {
-	uint32_t stripes;
-	int stripes_shift;
-	sector_t stripes_mask;
-
-	/* The size of this target / num. stripes */
-	sector_t stripe_width;
-
-	/* stripe chunk size */
-	uint32_t chunk_shift;
-	sector_t chunk_mask;
-
-	/* Needed for handling events */
-	struct dm_target *ti;
-
-	/* Work struct used for triggering events*/
-	struct work_struct trigger_event;
-
-	struct stripe stripe[0];
-};
-
-/*
- * An event is triggered whenever a drive
- * drops out of a stripe volume.
- */
-static void trigger_event(struct work_struct *work)
-{
-	struct stripe_c *sc = container_of(work, struct stripe_c,
-					   trigger_event);
-	dm_table_event(sc->ti->table);
-}
-
-static inline struct stripe_c *alloc_context(unsigned int stripes)
-{
-	size_t len;
-
-	if (dm_array_too_big(sizeof(struct stripe_c), sizeof(struct stripe),
-			     stripes))
-		return NULL;
-
-	len = sizeof(struct stripe_c) + (sizeof(struct stripe) * stripes);
-
-	return kmalloc(len, GFP_KERNEL);
-}
-
-/*
- * Parse a single <dev> <sector> pair
- */
-static int get_stripe(struct dm_target *ti, struct stripe_c *sc,
-		      unsigned int stripe, char **argv)
-{
-	unsigned long long start;
-	char dummy;
-
-	if (sscanf(argv[1], "%llu%c", &start, &dummy) != 1)
-		return -EINVAL;
-
-	if (dm_get_device(ti, argv[0], dm_table_get_mode(ti->table),
-			  &sc->stripe[stripe].dev))
-		return -ENXIO;
-
-	sc->stripe[stripe].physical_start = start;
-
-	return 0;
-}
-
-/*
- * Construct a striped mapping.
- * <number of stripes> <chunk size (2^^n)> [<dev_path> <offset>]+
- */
-static int stripe_ctr(struct dm_target *ti, unsigned int argc, char **argv)
-{
-	struct stripe_c *sc;
-	sector_t width;
-	uint32_t stripes;
-	uint32_t chunk_size;
-	char *end;
-	int r;
-	unsigned int i;
-
-	if (argc < 2) {
-		ti->error = "Not enough arguments";
-		return -EINVAL;
-	}
-
-	stripes = simple_strtoul(argv[0], &end, 10);
-	if (!stripes || *end) {
-		ti->error = "Invalid stripe count";
-		return -EINVAL;
-	}
-
-	chunk_size = simple_strtoul(argv[1], &end, 10);
-	if (*end) {
-		ti->error = "Invalid chunk_size";
-		return -EINVAL;
-	}
-
-	/*
-	 * chunk_size is a power of two
-	 */
-	if (!is_power_of_2(chunk_size) ||
-	    (chunk_size < (PAGE_SIZE >> SECTOR_SHIFT))) {
-		ti->error = "Invalid chunk size";
-		return -EINVAL;
-	}
-
-	if (ti->len & (chunk_size - 1)) {
-		ti->error = "Target length not divisible by "
-		    "chunk size";
-		return -EINVAL;
-	}
-
-	width = ti->len;
-	if (sector_div(width, stripes)) {
-		ti->error = "Target length not divisible by "
-		    "number of stripes";
-		return -EINVAL;
-	}
-
-	/*
-	 * Do we have enough arguments for that many stripes ?
-	 */
-	if (argc != (2 + 2 * stripes)) {
-		ti->error = "Not enough destinations "
-			"specified";
-		return -EINVAL;
-	}
-
-	sc = alloc_context(stripes);
-	if (!sc) {
-		ti->error = "Memory allocation for striped context "
-		    "failed";
-		return -ENOMEM;
-	}
-
-	INIT_WORK(&sc->trigger_event, trigger_event);
-
-	/* Set pointer to dm target; used in trigger_event */
-	sc->ti = ti;
-	sc->stripes = stripes;
-	sc->stripe_width = width;
-
-	if (stripes & (stripes - 1))
-		sc->stripes_shift = -1;
-	else {
-		sc->stripes_shift = ffs(stripes) - 1;
-		sc->stripes_mask = ((sector_t) stripes) - 1;
-	}
-
-	ti->split_io = chunk_size;
-	ti->num_flush_requests = stripes;
-	ti->num_discard_requests = stripes;
-
-	sc->chunk_shift = ffs(chunk_size) - 1;
-	sc->chunk_mask = ((sector_t) chunk_size) - 1;
-
-	/*
-	 * Get the stripe destinations.
-	 */
-	for (i = 0; i < stripes; i++) {
-		argv += 2;
-
-		r = get_stripe(ti, sc, i, argv);
-		if (r < 0) {
-			ti->error = "Couldn't parse stripe destination";
-			while (i--)
-				dm_put_device(ti, sc->stripe[i].dev);
-			kfree(sc);
-			return r;
-		}
-		atomic_set(&(sc->stripe[i].error_count), 0);
-	}
-
-	ti->private = sc;
-
-	return 0;
-}
-
-static void stripe_dtr(struct dm_target *ti)
-{
-	unsigned int i;
-	struct stripe_c *sc = (struct stripe_c *) ti->private;
-
-	for (i = 0; i < sc->stripes; i++)
-		dm_put_device(ti, sc->stripe[i].dev);
-
-	flush_work_sync(&sc->trigger_event);
-	kfree(sc);
-}
-
-static void stripe_map_sector(struct stripe_c *sc, sector_t sector,
-			      uint32_t *stripe, sector_t *result)
-{
-	sector_t offset = dm_target_offset(sc->ti, sector);
-	sector_t chunk = offset >> sc->chunk_shift;
-
-	if (sc->stripes_shift < 0)
-		*stripe = sector_div(chunk, sc->stripes);
-	else {
-		*stripe = chunk & sc->stripes_mask;
-		chunk >>= sc->stripes_shift;
-	}
-
-	*result = (chunk << sc->chunk_shift) | (offset & sc->chunk_mask);
-}
-
-static void stripe_map_range_sector(struct stripe_c *sc, sector_t sector,
-				    uint32_t target_stripe, sector_t *result)
-{
-	uint32_t stripe;
-
-	stripe_map_sector(sc, sector, &stripe, result);
-	if (stripe == target_stripe)
-		return;
-	*result &= ~sc->chunk_mask;			/* round down */
-	if (target_stripe < stripe)
-		*result += sc->chunk_mask + 1;		/* next chunk */
-}
-
-static int stripe_map_discard(struct stripe_c *sc, struct bio *bio,
-			      uint32_t target_stripe)
-{
-	sector_t begin, end;
-
-	stripe_map_range_sector(sc, bio->bi_sector, target_stripe, &begin);
-	stripe_map_range_sector(sc, bio->bi_sector + bio_sectors(bio),
-				target_stripe, &end);
-	if (begin < end) {
-		bio->bi_bdev = sc->stripe[target_stripe].dev->bdev;
-		bio->bi_sector = begin + sc->stripe[target_stripe].physical_start;
-		bio->bi_size = to_bytes(end - begin);
-		return DM_MAPIO_REMAPPED;
-	} else {
-		/* The range doesn't map to the target stripe */
-		bio_endio(bio, 0);
-		return DM_MAPIO_SUBMITTED;
-	}
-}
-
-static int stripe_map(struct dm_target *ti, struct bio *bio,
-		      union map_info *map_context)
-{
-	struct stripe_c *sc = ti->private;
-	uint32_t stripe;
-	unsigned target_request_nr;
-
-	if (bio->bi_rw & REQ_FLUSH) {
-		target_request_nr = map_context->target_request_nr;
-		BUG_ON(target_request_nr >= sc->stripes);
-		bio->bi_bdev = sc->stripe[target_request_nr].dev->bdev;
-		return DM_MAPIO_REMAPPED;
-	}
-	if (unlikely(bio->bi_rw & REQ_DISCARD)) {
-		target_request_nr = map_context->target_request_nr;
-		BUG_ON(target_request_nr >= sc->stripes);
-		return stripe_map_discard(sc, bio, target_request_nr);
-	}
-
-	stripe_map_sector(sc, bio->bi_sector, &stripe, &bio->bi_sector);
-
-	bio->bi_sector += sc->stripe[stripe].physical_start;
-	bio->bi_bdev = sc->stripe[stripe].dev->bdev;
-
-	return DM_MAPIO_REMAPPED;
-}
-
-/*
- * Stripe status:
- *
- * INFO
- * #stripes [stripe_name <stripe_name>] [group word count]
- * [error count 'A|D' <error count 'A|D'>]
- *
- * TABLE
- * #stripes [stripe chunk size]
- * [stripe_name physical_start <stripe_name physical_start>]
- *
- */
-
-static int stripe_status(struct dm_target *ti,
-			 status_type_t type, char *result, unsigned int maxlen)
-{
-	struct stripe_c *sc = (struct stripe_c *) ti->private;
-	char buffer[sc->stripes + 1];
-	unsigned int sz = 0;
-	unsigned int i;
-
-	switch (type) {
-	case STATUSTYPE_INFO:
-		DMEMIT("%d ", sc->stripes);
-		for (i = 0; i < sc->stripes; i++)  {
-			DMEMIT("%s ", sc->stripe[i].dev->name);
-			buffer[i] = atomic_read(&(sc->stripe[i].error_count)) ?
-				'D' : 'A';
-		}
-		buffer[i] = '\0';
-		DMEMIT("1 %s", buffer);
-		break;
-
-	case STATUSTYPE_TABLE:
-		DMEMIT("%d %llu", sc->stripes,
-			(unsigned long long)sc->chunk_mask + 1);
-		for (i = 0; i < sc->stripes; i++)
-			DMEMIT(" %s %llu", sc->stripe[i].dev->name,
-			    (unsigned long long)sc->stripe[i].physical_start);
-		break;
-	}
-	return 0;
-}
-
-static int stripe_end_io(struct dm_target *ti, struct bio *bio,
-			 int error, union map_info *map_context)
-{
-	unsigned i;
-	char major_minor[16];
-	struct stripe_c *sc = ti->private;
-
-	if (!error)
-		return 0; /* I/O complete */
-
-	if ((error == -EWOULDBLOCK) && (bio->bi_rw & REQ_RAHEAD))
-		return error;
-
-	if (error == -EOPNOTSUPP)
-		return error;
-
-	memset(major_minor, 0, sizeof(major_minor));
-	sprintf(major_minor, "%d:%d",
-		MAJOR(disk_devt(bio->bi_bdev->bd_disk)),
-		MINOR(disk_devt(bio->bi_bdev->bd_disk)));
-
-	/*
-	 * Test to see which stripe drive triggered the event
-	 * and increment error count for all stripes on that device.
-	 * If the error count for a given device exceeds the threshold
-	 * value we will no longer trigger any further events.
-	 */
-	for (i = 0; i < sc->stripes; i++)
-		if (!strcmp(sc->stripe[i].dev->name, major_minor)) {
-			atomic_inc(&(sc->stripe[i].error_count));
-			if (atomic_read(&(sc->stripe[i].error_count)) <
-			    DM_IO_ERROR_THRESHOLD)
-				schedule_work(&sc->trigger_event);
-		}
-
-	return error;
-}
-
-static int stripe_iterate_devices(struct dm_target *ti,
-				  iterate_devices_callout_fn fn, void *data)
-{
-	struct stripe_c *sc = ti->private;
-	int ret = 0;
-	unsigned i = 0;
-
-	do {
-		ret = fn(ti, sc->stripe[i].dev,
-			 sc->stripe[i].physical_start,
-			 sc->stripe_width, data);
-	} while (!ret && ++i < sc->stripes);
-
-	return ret;
-}
-
-static void stripe_io_hints(struct dm_target *ti,
-			    struct queue_limits *limits)
-{
-	struct stripe_c *sc = ti->private;
-	unsigned chunk_size = (sc->chunk_mask + 1) << 9;
-
-	blk_limits_io_min(limits, chunk_size);
-	blk_limits_io_opt(limits, chunk_size * sc->stripes);
-}
-
-static int stripe_merge(struct dm_target *ti, struct bvec_merge_data *bvm,
-			struct bio_vec *biovec, int max_size)
-{
-	struct stripe_c *sc = ti->private;
-	sector_t bvm_sector = bvm->bi_sector;
-	uint32_t stripe;
-	struct request_queue *q;
-
-	stripe_map_sector(sc, bvm_sector, &stripe, &bvm_sector);
-
-	q = bdev_get_queue(sc->stripe[stripe].dev->bdev);
-	if (!q->merge_bvec_fn)
-		return max_size;
-
-	bvm->bi_bdev = sc->stripe[stripe].dev->bdev;
-	bvm->bi_sector = sc->stripe[stripe].physical_start + bvm_sector;
-
-	return min(max_size, q->merge_bvec_fn(q, bvm, biovec));
-}
-
-static struct target_type stripe_target = {
-	.name   = "striped",
-	.version = {1, 4, 0},
-	.module = THIS_MODULE,
-	.ctr    = stripe_ctr,
-	.dtr    = stripe_dtr,
-	.map    = stripe_map,
-	.end_io = stripe_end_io,
-	.status = stripe_status,
-	.iterate_devices = stripe_iterate_devices,
-	.io_hints = stripe_io_hints,
-	.merge  = stripe_merge,
-};
-
-int __init dm_stripe_init(void)
-{
-	int r;
-
-	r = dm_register_target(&stripe_target);
-	if (r < 0) {
-		DMWARN("target registration failed");
-		return r;
-	}
-
-	return r;
-}
-
-void dm_stripe_exit(void)
-{
-	dm_unregister_target(&stripe_target);
-}
diff --git a/ANDROID_3.4.5/drivers/md/dm-sysfs.c b/ANDROID_3.4.5/drivers/md/dm-sysfs.c
deleted file mode 100644
index 84d2b91e..00000000
--- a/ANDROID_3.4.5/drivers/md/dm-sysfs.c
+++ /dev/null
@@ -1,108 +0,0 @@
-/*
- * Copyright (C) 2008 Red Hat, Inc. All rights reserved.
- *
- * This file is released under the GPL.
- */
-
-#include <linux/sysfs.h>
-#include <linux/dm-ioctl.h>
-#include "dm.h"
-
-struct dm_sysfs_attr {
-	struct attribute attr;
-	ssize_t (*show)(struct mapped_device *, char *);
-	ssize_t (*store)(struct mapped_device *, char *);
-};
-
-#define DM_ATTR_RO(_name) \
-struct dm_sysfs_attr dm_attr_##_name = \
-	__ATTR(_name, S_IRUGO, dm_attr_##_name##_show, NULL)
-
-static ssize_t dm_attr_show(struct kobject *kobj, struct attribute *attr,
-			    char *page)
-{
-	struct dm_sysfs_attr *dm_attr;
-	struct mapped_device *md;
-	ssize_t ret;
-
-	dm_attr = container_of(attr, struct dm_sysfs_attr, attr);
-	if (!dm_attr->show)
-		return -EIO;
-
-	md = dm_get_from_kobject(kobj);
-	if (!md)
-		return -EINVAL;
-
-	ret = dm_attr->show(md, page);
-	dm_put(md);
-
-	return ret;
-}
-
-static ssize_t dm_attr_name_show(struct mapped_device *md, char *buf)
-{
-	if (dm_copy_name_and_uuid(md, buf, NULL))
-		return -EIO;
-
-	strcat(buf, "\n");
-	return strlen(buf);
-}
-
-static ssize_t dm_attr_uuid_show(struct mapped_device *md, char *buf)
-{
-	if (dm_copy_name_and_uuid(md, NULL, buf))
-		return -EIO;
-
-	strcat(buf, "\n");
-	return strlen(buf);
-}
-
-static ssize_t dm_attr_suspended_show(struct mapped_device *md, char *buf)
-{
-	sprintf(buf, "%d\n", dm_suspended_md(md));
-
-	return strlen(buf);
-}
-
-static DM_ATTR_RO(name);
-static DM_ATTR_RO(uuid);
-static DM_ATTR_RO(suspended);
-
-static struct attribute *dm_attrs[] = {
-	&dm_attr_name.attr,
-	&dm_attr_uuid.attr,
-	&dm_attr_suspended.attr,
-	NULL,
-};
-
-static const struct sysfs_ops dm_sysfs_ops = {
-	.show	= dm_attr_show,
-};
-
-/*
- * dm kobject is embedded in mapped_device structure
- * no need to define release function here
- */
-static struct kobj_type dm_ktype = {
-	.sysfs_ops	= &dm_sysfs_ops,
-	.default_attrs	= dm_attrs,
-};
-
-/*
- * Initialize kobj
- * because nobody using md yet, no need to call explicit dm_get/put
- */
-int dm_sysfs_init(struct mapped_device *md)
-{
-	return kobject_init_and_add(dm_kobject(md), &dm_ktype,
-				    &disk_to_dev(dm_disk(md))->kobj,
-				    "%s", "dm");
-}
-
-/*
- * Remove kobj, called after all references removed
- */
-void dm_sysfs_exit(struct mapped_device *md)
-{
-	kobject_put(dm_kobject(md));
-}
diff --git a/ANDROID_3.4.5/drivers/md/dm-table.c b/ANDROID_3.4.5/drivers/md/dm-table.c
deleted file mode 100644
index 2e227fbf..00000000
--- a/ANDROID_3.4.5/drivers/md/dm-table.c
+++ /dev/null
@@ -1,1577 +0,0 @@
-/*
- * Copyright (C) 2001 Sistina Software (UK) Limited.
- * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved.
- *
- * This file is released under the GPL.
- */
-
-#include "dm.h"
-
-#include <linux/module.h>
-#include <linux/vmalloc.h>
-#include <linux/blkdev.h>
-#include <linux/namei.h>
-#include <linux/ctype.h>
-#include <linux/string.h>
-#include <linux/slab.h>
-#include <linux/interrupt.h>
-#include <linux/mutex.h>
-#include <linux/delay.h>
-#include <linux/atomic.h>
-
-#define DM_MSG_PREFIX "table"
-
-#define MAX_DEPTH 16
-#define NODE_SIZE L1_CACHE_BYTES
-#define KEYS_PER_NODE (NODE_SIZE / sizeof(sector_t))
-#define CHILDREN_PER_NODE (KEYS_PER_NODE + 1)
-
-/*
- * The table has always exactly one reference from either mapped_device->map
- * or hash_cell->new_map. This reference is not counted in table->holders.
- * A pair of dm_create_table/dm_destroy_table functions is used for table
- * creation/destruction.
- *
- * Temporary references from the other code increase table->holders. A pair
- * of dm_table_get/dm_table_put functions is used to manipulate it.
- *
- * When the table is about to be destroyed, we wait for table->holders to
- * drop to zero.
- */
-
-struct dm_table {
-	struct mapped_device *md;
-	atomic_t holders;
-	unsigned type;
-
-	/* btree table */
-	unsigned int depth;
-	unsigned int counts[MAX_DEPTH];	/* in nodes */
-	sector_t *index[MAX_DEPTH];
-
-	unsigned int num_targets;
-	unsigned int num_allocated;
-	sector_t *highs;
-	struct dm_target *targets;
-
-	struct target_type *immutable_target_type;
-	unsigned integrity_supported:1;
-	unsigned singleton:1;
-
-	/*
-	 * Indicates the rw permissions for the new logical
-	 * device.  This should be a combination of FMODE_READ
-	 * and FMODE_WRITE.
-	 */
-	fmode_t mode;
-
-	/* a list of devices used by this table */
-	struct list_head devices;
-
-	/* events get handed up using this callback */
-	void (*event_fn)(void *);
-	void *event_context;
-
-	struct dm_md_mempools *mempools;
-
-	struct list_head target_callbacks;
-};
-
-/*
- * Similar to ceiling(log_size(n))
- */
-static unsigned int int_log(unsigned int n, unsigned int base)
-{
-	int result = 0;
-
-	while (n > 1) {
-		n = dm_div_up(n, base);
-		result++;
-	}
-
-	return result;
-}
-
-/*
- * Calculate the index of the child node of the n'th node k'th key.
- */
-static inline unsigned int get_child(unsigned int n, unsigned int k)
-{
-	return (n * CHILDREN_PER_NODE) + k;
-}
-
-/*
- * Return the n'th node of level l from table t.
- */
-static inline sector_t *get_node(struct dm_table *t,
-				 unsigned int l, unsigned int n)
-{
-	return t->index[l] + (n * KEYS_PER_NODE);
-}
-
-/*
- * Return the highest key that you could lookup from the n'th
- * node on level l of the btree.
- */
-static sector_t high(struct dm_table *t, unsigned int l, unsigned int n)
-{
-	for (; l < t->depth - 1; l++)
-		n = get_child(n, CHILDREN_PER_NODE - 1);
-
-	if (n >= t->counts[l])
-		return (sector_t) - 1;
-
-	return get_node(t, l, n)[KEYS_PER_NODE - 1];
-}
-
-/*
- * Fills in a level of the btree based on the highs of the level
- * below it.
- */
-static int setup_btree_index(unsigned int l, struct dm_table *t)
-{
-	unsigned int n, k;
-	sector_t *node;
-
-	for (n = 0U; n < t->counts[l]; n++) {
-		node = get_node(t, l, n);
-
-		for (k = 0U; k < KEYS_PER_NODE; k++)
-			node[k] = high(t, l + 1, get_child(n, k));
-	}
-
-	return 0;
-}
-
-void *dm_vcalloc(unsigned long nmemb, unsigned long elem_size)
-{
-	unsigned long size;
-	void *addr;
-
-	/*
-	 * Check that we're not going to overflow.
-	 */
-	if (nmemb > (ULONG_MAX / elem_size))
-		return NULL;
-
-	size = nmemb * elem_size;
-	addr = vzalloc(size);
-
-	return addr;
-}
-EXPORT_SYMBOL(dm_vcalloc);
-
-/*
- * highs, and targets are managed as dynamic arrays during a
- * table load.
- */
-static int alloc_targets(struct dm_table *t, unsigned int num)
-{
-	sector_t *n_highs;
-	struct dm_target *n_targets;
-	int n = t->num_targets;
-
-	/*
-	 * Allocate both the target array and offset array at once.
-	 * Append an empty entry to catch sectors beyond the end of
-	 * the device.
-	 */
-	n_highs = (sector_t *) dm_vcalloc(num + 1, sizeof(struct dm_target) +
-					  sizeof(sector_t));
-	if (!n_highs)
-		return -ENOMEM;
-
-	n_targets = (struct dm_target *) (n_highs + num);
-
-	if (n) {
-		memcpy(n_highs, t->highs, sizeof(*n_highs) * n);
-		memcpy(n_targets, t->targets, sizeof(*n_targets) * n);
-	}
-
-	memset(n_highs + n, -1, sizeof(*n_highs) * (num - n));
-	vfree(t->highs);
-
-	t->num_allocated = num;
-	t->highs = n_highs;
-	t->targets = n_targets;
-
-	return 0;
-}
-
-int dm_table_create(struct dm_table **result, fmode_t mode,
-		    unsigned num_targets, struct mapped_device *md)
-{
-	struct dm_table *t = kzalloc(sizeof(*t), GFP_KERNEL);
-
-	if (!t)
-		return -ENOMEM;
-
-	INIT_LIST_HEAD(&t->devices);
-	INIT_LIST_HEAD(&t->target_callbacks);
-	atomic_set(&t->holders, 0);
-
-	if (!num_targets)
-		num_targets = KEYS_PER_NODE;
-
-	num_targets = dm_round_up(num_targets, KEYS_PER_NODE);
-
-	if (alloc_targets(t, num_targets)) {
-		kfree(t);
-		t = NULL;
-		return -ENOMEM;
-	}
-
-	t->mode = mode;
-	t->md = md;
-	*result = t;
-	return 0;
-}
-
-static void free_devices(struct list_head *devices)
-{
-	struct list_head *tmp, *next;
-
-	list_for_each_safe(tmp, next, devices) {
-		struct dm_dev_internal *dd =
-		    list_entry(tmp, struct dm_dev_internal, list);
-		DMWARN("dm_table_destroy: dm_put_device call missing for %s",
-		       dd->dm_dev.name);
-		kfree(dd);
-	}
-}
-
-void dm_table_destroy(struct dm_table *t)
-{
-	unsigned int i;
-
-	if (!t)
-		return;
-
-	while (atomic_read(&t->holders))
-		msleep(1);
-	smp_mb();
-
-	/* free the indexes */
-	if (t->depth >= 2)
-		vfree(t->index[t->depth - 2]);
-
-	/* free the targets */
-	for (i = 0; i < t->num_targets; i++) {
-		struct dm_target *tgt = t->targets + i;
-
-		if (tgt->type->dtr)
-			tgt->type->dtr(tgt);
-
-		dm_put_target_type(tgt->type);
-	}
-
-	vfree(t->highs);
-
-	/* free the device list */
-	free_devices(&t->devices);
-
-	dm_free_md_mempools(t->mempools);
-
-	kfree(t);
-}
-
-void dm_table_get(struct dm_table *t)
-{
-	atomic_inc(&t->holders);
-}
-EXPORT_SYMBOL(dm_table_get);
-
-void dm_table_put(struct dm_table *t)
-{
-	if (!t)
-		return;
-
-	smp_mb__before_atomic_dec();
-	atomic_dec(&t->holders);
-}
-EXPORT_SYMBOL(dm_table_put);
-
-/*
- * Checks to see if we need to extend highs or targets.
- */
-static inline int check_space(struct dm_table *t)
-{
-	if (t->num_targets >= t->num_allocated)
-		return alloc_targets(t, t->num_allocated * 2);
-
-	return 0;
-}
-
-/*
- * See if we've already got a device in the list.
- */
-static struct dm_dev_internal *find_device(struct list_head *l, dev_t dev)
-{
-	struct dm_dev_internal *dd;
-
-	list_for_each_entry (dd, l, list)
-		if (dd->dm_dev.bdev->bd_dev == dev)
-			return dd;
-
-	return NULL;
-}
-
-/*
- * Open a device so we can use it as a map destination.
- */
-static int open_dev(struct dm_dev_internal *d, dev_t dev,
-		    struct mapped_device *md)
-{
-	static char *_claim_ptr = "I belong to device-mapper";
-	struct block_device *bdev;
-
-	int r;
-
-	BUG_ON(d->dm_dev.bdev);
-
-	bdev = blkdev_get_by_dev(dev, d->dm_dev.mode | FMODE_EXCL, _claim_ptr);
-	if (IS_ERR(bdev))
-		return PTR_ERR(bdev);
-
-	r = bd_link_disk_holder(bdev, dm_disk(md));
-	if (r) {
-		blkdev_put(bdev, d->dm_dev.mode | FMODE_EXCL);
-		return r;
-	}
-
-	d->dm_dev.bdev = bdev;
-	return 0;
-}
-
-/*
- * Close a device that we've been using.
- */
-static void close_dev(struct dm_dev_internal *d, struct mapped_device *md)
-{
-	if (!d->dm_dev.bdev)
-		return;
-
-	bd_unlink_disk_holder(d->dm_dev.bdev, dm_disk(md));
-	blkdev_put(d->dm_dev.bdev, d->dm_dev.mode | FMODE_EXCL);
-	d->dm_dev.bdev = NULL;
-}
-
-/*
- * If possible, this checks an area of a destination device is invalid.
- */
-static int device_area_is_invalid(struct dm_target *ti, struct dm_dev *dev,
-				  sector_t start, sector_t len, void *data)
-{
-	struct request_queue *q;
-	struct queue_limits *limits = data;
-	struct block_device *bdev = dev->bdev;
-	sector_t dev_size =
-		i_size_read(bdev->bd_inode) >> SECTOR_SHIFT;
-	unsigned short logical_block_size_sectors =
-		limits->logical_block_size >> SECTOR_SHIFT;
-	char b[BDEVNAME_SIZE];
-
-	/*
-	 * Some devices exist without request functions,
-	 * such as loop devices not yet bound to backing files.
-	 * Forbid the use of such devices.
-	 */
-	q = bdev_get_queue(bdev);
-	if (!q || !q->make_request_fn) {
-		DMWARN("%s: %s is not yet initialised: "
-		       "start=%llu, len=%llu, dev_size=%llu",
-		       dm_device_name(ti->table->md), bdevname(bdev, b),
-		       (unsigned long long)start,
-		       (unsigned long long)len,
-		       (unsigned long long)dev_size);
-		return 1;
-	}
-
-	if (!dev_size)
-		return 0;
-
-	if ((start >= dev_size) || (start + len > dev_size)) {
-		DMWARN("%s: %s too small for target: "
-		       "start=%llu, len=%llu, dev_size=%llu",
-		       dm_device_name(ti->table->md), bdevname(bdev, b),
-		       (unsigned long long)start,
-		       (unsigned long long)len,
-		       (unsigned long long)dev_size);
-		return 1;
-	}
-
-	if (logical_block_size_sectors <= 1)
-		return 0;
-
-	if (start & (logical_block_size_sectors - 1)) {
-		DMWARN("%s: start=%llu not aligned to h/w "
-		       "logical block size %u of %s",
-		       dm_device_name(ti->table->md),
-		       (unsigned long long)start,
-		       limits->logical_block_size, bdevname(bdev, b));
-		return 1;
-	}
-
-	if (len & (logical_block_size_sectors - 1)) {
-		DMWARN("%s: len=%llu not aligned to h/w "
-		       "logical block size %u of %s",
-		       dm_device_name(ti->table->md),
-		       (unsigned long long)len,
-		       limits->logical_block_size, bdevname(bdev, b));
-		return 1;
-	}
-
-	return 0;
-}
-
-/*
- * This upgrades the mode on an already open dm_dev, being
- * careful to leave things as they were if we fail to reopen the
- * device and not to touch the existing bdev field in case
- * it is accessed concurrently inside dm_table_any_congested().
- */
-static int upgrade_mode(struct dm_dev_internal *dd, fmode_t new_mode,
-			struct mapped_device *md)
-{
-	int r;
-	struct dm_dev_internal dd_new, dd_old;
-
-	dd_new = dd_old = *dd;
-
-	dd_new.dm_dev.mode |= new_mode;
-	dd_new.dm_dev.bdev = NULL;
-
-	r = open_dev(&dd_new, dd->dm_dev.bdev->bd_dev, md);
-	if (r)
-		return r;
-
-	dd->dm_dev.mode |= new_mode;
-	close_dev(&dd_old, md);
-
-	return 0;
-}
-
-/*
- * Add a device to the list, or just increment the usage count if
- * it's already present.
- */
-int dm_get_device(struct dm_target *ti, const char *path, fmode_t mode,
-		  struct dm_dev **result)
-{
-	int r;
-	dev_t uninitialized_var(dev);
-	struct dm_dev_internal *dd;
-	unsigned int major, minor;
-	struct dm_table *t = ti->table;
-	char dummy;
-
-	BUG_ON(!t);
-
-	if (sscanf(path, "%u:%u%c", &major, &minor, &dummy) == 2) {
-		/* Extract the major/minor numbers */
-		dev = MKDEV(major, minor);
-		if (MAJOR(dev) != major || MINOR(dev) != minor)
-			return -EOVERFLOW;
-	} else {
-		/* convert the path to a device */
-		struct block_device *bdev = lookup_bdev(path);
-
-		if (IS_ERR(bdev))
-			return PTR_ERR(bdev);
-		dev = bdev->bd_dev;
-		bdput(bdev);
-	}
-
-	dd = find_device(&t->devices, dev);
-	if (!dd) {
-		dd = kmalloc(sizeof(*dd), GFP_KERNEL);
-		if (!dd)
-			return -ENOMEM;
-
-		dd->dm_dev.mode = mode;
-		dd->dm_dev.bdev = NULL;
-
-		if ((r = open_dev(dd, dev, t->md))) {
-			kfree(dd);
-			return r;
-		}
-
-		format_dev_t(dd->dm_dev.name, dev);
-
-		atomic_set(&dd->count, 0);
-		list_add(&dd->list, &t->devices);
-
-	} else if (dd->dm_dev.mode != (mode | dd->dm_dev.mode)) {
-		r = upgrade_mode(dd, mode, t->md);
-		if (r)
-			return r;
-	}
-	atomic_inc(&dd->count);
-
-	*result = &dd->dm_dev;
-	return 0;
-}
-EXPORT_SYMBOL(dm_get_device);
-
-int dm_set_device_limits(struct dm_target *ti, struct dm_dev *dev,
-			 sector_t start, sector_t len, void *data)
-{
-	struct queue_limits *limits = data;
-	struct block_device *bdev = dev->bdev;
-	struct request_queue *q = bdev_get_queue(bdev);
-	char b[BDEVNAME_SIZE];
-
-	if (unlikely(!q)) {
-		DMWARN("%s: Cannot set limits for nonexistent device %s",
-		       dm_device_name(ti->table->md), bdevname(bdev, b));
-		return 0;
-	}
-
-	if (bdev_stack_limits(limits, bdev, start) < 0)
-		DMWARN("%s: adding target device %s caused an alignment inconsistency: "
-		       "physical_block_size=%u, logical_block_size=%u, "
-		       "alignment_offset=%u, start=%llu",
-		       dm_device_name(ti->table->md), bdevname(bdev, b),
-		       q->limits.physical_block_size,
-		       q->limits.logical_block_size,
-		       q->limits.alignment_offset,
-		       (unsigned long long) start << SECTOR_SHIFT);
-
-	/*
-	 * Check if merge fn is supported.
-	 * If not we'll force DM to use PAGE_SIZE or
-	 * smaller I/O, just to be safe.
-	 */
-	if (dm_queue_merge_is_compulsory(q) && !ti->type->merge)
-		blk_limits_max_hw_sectors(limits,
-					  (unsigned int) (PAGE_SIZE >> 9));
-	return 0;
-}
-EXPORT_SYMBOL_GPL(dm_set_device_limits);
-
-/*
- * Decrement a device's use count and remove it if necessary.
- */
-void dm_put_device(struct dm_target *ti, struct dm_dev *d)
-{
-	struct dm_dev_internal *dd = container_of(d, struct dm_dev_internal,
-						  dm_dev);
-
-	if (atomic_dec_and_test(&dd->count)) {
-		close_dev(dd, ti->table->md);
-		list_del(&dd->list);
-		kfree(dd);
-	}
-}
-EXPORT_SYMBOL(dm_put_device);
-
-/*
- * Checks to see if the target joins onto the end of the table.
- */
-static int adjoin(struct dm_table *table, struct dm_target *ti)
-{
-	struct dm_target *prev;
-
-	if (!table->num_targets)
-		return !ti->begin;
-
-	prev = &table->targets[table->num_targets - 1];
-	return (ti->begin == (prev->begin + prev->len));
-}
-
-/*
- * Used to dynamically allocate the arg array.
- */
-static char **realloc_argv(unsigned *array_size, char **old_argv)
-{
-	char **argv;
-	unsigned new_size;
-
-	new_size = *array_size ? *array_size * 2 : 64;
-	argv = kmalloc(new_size * sizeof(*argv), GFP_KERNEL);
-	if (argv) {
-		memcpy(argv, old_argv, *array_size * sizeof(*argv));
-		*array_size = new_size;
-	}
-
-	kfree(old_argv);
-	return argv;
-}
-
-/*
- * Destructively splits up the argument list to pass to ctr.
- */
-int dm_split_args(int *argc, char ***argvp, char *input)
-{
-	char *start, *end = input, *out, **argv = NULL;
-	unsigned array_size = 0;
-
-	*argc = 0;
-
-	if (!input) {
-		*argvp = NULL;
-		return 0;
-	}
-
-	argv = realloc_argv(&array_size, argv);
-	if (!argv)
-		return -ENOMEM;
-
-	while (1) {
-		/* Skip whitespace */
-		start = skip_spaces(end);
-
-		if (!*start)
-			break;	/* success, we hit the end */
-
-		/* 'out' is used to remove any back-quotes */
-		end = out = start;
-		while (*end) {
-			/* Everything apart from '\0' can be quoted */
-			if (*end == '\\' && *(end + 1)) {
-				*out++ = *(end + 1);
-				end += 2;
-				continue;
-			}
-
-			if (isspace(*end))
-				break;	/* end of token */
-
-			*out++ = *end++;
-		}
-
-		/* have we already filled the array ? */
-		if ((*argc + 1) > array_size) {
-			argv = realloc_argv(&array_size, argv);
-			if (!argv)
-				return -ENOMEM;
-		}
-
-		/* we know this is whitespace */
-		if (*end)
-			end++;
-
-		/* terminate the string and put it in the array */
-		*out = '\0';
-		argv[*argc] = start;
-		(*argc)++;
-	}
-
-	*argvp = argv;
-	return 0;
-}
-
-/*
- * Impose necessary and sufficient conditions on a devices's table such
- * that any incoming bio which respects its logical_block_size can be
- * processed successfully.  If it falls across the boundary between
- * two or more targets, the size of each piece it gets split into must
- * be compatible with the logical_block_size of the target processing it.
- */
-static int validate_hardware_logical_block_alignment(struct dm_table *table,
-						 struct queue_limits *limits)
-{
-	/*
-	 * This function uses arithmetic modulo the logical_block_size
-	 * (in units of 512-byte sectors).
-	 */
-	unsigned short device_logical_block_size_sects =
-		limits->logical_block_size >> SECTOR_SHIFT;
-
-	/*
-	 * Offset of the start of the next table entry, mod logical_block_size.
-	 */
-	unsigned short next_target_start = 0;
-
-	/*
-	 * Given an aligned bio that extends beyond the end of a
-	 * target, how many sectors must the next target handle?
-	 */
-	unsigned short remaining = 0;
-
-	struct dm_target *uninitialized_var(ti);
-	struct queue_limits ti_limits;
-	unsigned i = 0;
-
-	/*
-	 * Check each entry in the table in turn.
-	 */
-	while (i < dm_table_get_num_targets(table)) {
-		ti = dm_table_get_target(table, i++);
-
-		blk_set_stacking_limits(&ti_limits);
-
-		/* combine all target devices' limits */
-		if (ti->type->iterate_devices)
-			ti->type->iterate_devices(ti, dm_set_device_limits,
-						  &ti_limits);
-
-		/*
-		 * If the remaining sectors fall entirely within this
-		 * table entry are they compatible with its logical_block_size?
-		 */
-		if (remaining < ti->len &&
-		    remaining & ((ti_limits.logical_block_size >>
-				  SECTOR_SHIFT) - 1))
-			break;	/* Error */
-
-		next_target_start =
-		    (unsigned short) ((next_target_start + ti->len) &
-				      (device_logical_block_size_sects - 1));
-		remaining = next_target_start ?
-		    device_logical_block_size_sects - next_target_start : 0;
-	}
-
-	if (remaining) {
-		DMWARN("%s: table line %u (start sect %llu len %llu) "
-		       "not aligned to h/w logical block size %u",
-		       dm_device_name(table->md), i,
-		       (unsigned long long) ti->begin,
-		       (unsigned long long) ti->len,
-		       limits->logical_block_size);
-		return -EINVAL;
-	}
-
-	return 0;
-}
-
-int dm_table_add_target(struct dm_table *t, const char *type,
-			sector_t start, sector_t len, char *params)
-{
-	int r = -EINVAL, argc;
-	char **argv;
-	struct dm_target *tgt;
-
-	if (t->singleton) {
-		DMERR("%s: target type %s must appear alone in table",
-		      dm_device_name(t->md), t->targets->type->name);
-		return -EINVAL;
-	}
-
-	if ((r = check_space(t)))
-		return r;
-
-	tgt = t->targets + t->num_targets;
-	memset(tgt, 0, sizeof(*tgt));
-
-	if (!len) {
-		DMERR("%s: zero-length target", dm_device_name(t->md));
-		return -EINVAL;
-	}
-
-	tgt->type = dm_get_target_type(type);
-	if (!tgt->type) {
-		DMERR("%s: %s: unknown target type", dm_device_name(t->md),
-		      type);
-		return -EINVAL;
-	}
-
-	if (dm_target_needs_singleton(tgt->type)) {
-		if (t->num_targets) {
-			DMERR("%s: target type %s must appear alone in table",
-			      dm_device_name(t->md), type);
-			return -EINVAL;
-		}
-		t->singleton = 1;
-	}
-
-	if (dm_target_always_writeable(tgt->type) && !(t->mode & FMODE_WRITE)) {
-		DMERR("%s: target type %s may not be included in read-only tables",
-		      dm_device_name(t->md), type);
-		return -EINVAL;
-	}
-
-	if (t->immutable_target_type) {
-		if (t->immutable_target_type != tgt->type) {
-			DMERR("%s: immutable target type %s cannot be mixed with other target types",
-			      dm_device_name(t->md), t->immutable_target_type->name);
-			return -EINVAL;
-		}
-	} else if (dm_target_is_immutable(tgt->type)) {
-		if (t->num_targets) {
-			DMERR("%s: immutable target type %s cannot be mixed with other target types",
-			      dm_device_name(t->md), tgt->type->name);
-			return -EINVAL;
-		}
-		t->immutable_target_type = tgt->type;
-	}
-
-	tgt->table = t;
-	tgt->begin = start;
-	tgt->len = len;
-	tgt->error = "Unknown error";
-
-	/*
-	 * Does this target adjoin the previous one ?
-	 */
-	if (!adjoin(t, tgt)) {
-		tgt->error = "Gap in table";
-		r = -EINVAL;
-		goto bad;
-	}
-
-	r = dm_split_args(&argc, &argv, params);
-	if (r) {
-		tgt->error = "couldn't split parameters (insufficient memory)";
-		goto bad;
-	}
-
-	r = tgt->type->ctr(tgt, argc, argv);
-	kfree(argv);
-	if (r)
-		goto bad;
-
-	t->highs[t->num_targets++] = tgt->begin + tgt->len - 1;
-
-	if (!tgt->num_discard_requests && tgt->discards_supported)
-		DMWARN("%s: %s: ignoring discards_supported because num_discard_requests is zero.",
-		       dm_device_name(t->md), type);
-
-	return 0;
-
- bad:
-	DMERR("%s: %s: %s", dm_device_name(t->md), type, tgt->error);
-	dm_put_target_type(tgt->type);
-	return r;
-}
-
-/*
- * Target argument parsing helpers.
- */
-static int validate_next_arg(struct dm_arg *arg, struct dm_arg_set *arg_set,
-			     unsigned *value, char **error, unsigned grouped)
-{
-	const char *arg_str = dm_shift_arg(arg_set);
-	char dummy;
-
-	if (!arg_str ||
-	    (sscanf(arg_str, "%u%c", value, &dummy) != 1) ||
-	    (*value < arg->min) ||
-	    (*value > arg->max) ||
-	    (grouped && arg_set->argc < *value)) {
-		*error = arg->error;
-		return -EINVAL;
-	}
-
-	return 0;
-}
-
-int dm_read_arg(struct dm_arg *arg, struct dm_arg_set *arg_set,
-		unsigned *value, char **error)
-{
-	return validate_next_arg(arg, arg_set, value, error, 0);
-}
-EXPORT_SYMBOL(dm_read_arg);
-
-int dm_read_arg_group(struct dm_arg *arg, struct dm_arg_set *arg_set,
-		      unsigned *value, char **error)
-{
-	return validate_next_arg(arg, arg_set, value, error, 1);
-}
-EXPORT_SYMBOL(dm_read_arg_group);
-
-const char *dm_shift_arg(struct dm_arg_set *as)
-{
-	char *r;
-
-	if (as->argc) {
-		as->argc--;
-		r = *as->argv;
-		as->argv++;
-		return r;
-	}
-
-	return NULL;
-}
-EXPORT_SYMBOL(dm_shift_arg);
-
-void dm_consume_args(struct dm_arg_set *as, unsigned num_args)
-{
-	BUG_ON(as->argc < num_args);
-	as->argc -= num_args;
-	as->argv += num_args;
-}
-EXPORT_SYMBOL(dm_consume_args);
-
-static int dm_table_set_type(struct dm_table *t)
-{
-	unsigned i;
-	unsigned bio_based = 0, request_based = 0;
-	struct dm_target *tgt;
-	struct dm_dev_internal *dd;
-	struct list_head *devices;
-
-	for (i = 0; i < t->num_targets; i++) {
-		tgt = t->targets + i;
-		if (dm_target_request_based(tgt))
-			request_based = 1;
-		else
-			bio_based = 1;
-
-		if (bio_based && request_based) {
-			DMWARN("Inconsistent table: different target types"
-			       " can't be mixed up");
-			return -EINVAL;
-		}
-	}
-
-	if (bio_based) {
-		/* We must use this table as bio-based */
-		t->type = DM_TYPE_BIO_BASED;
-		return 0;
-	}
-
-	BUG_ON(!request_based); /* No targets in this table */
-
-	/* Non-request-stackable devices can't be used for request-based dm */
-	devices = dm_table_get_devices(t);
-	list_for_each_entry(dd, devices, list) {
-		if (!blk_queue_stackable(bdev_get_queue(dd->dm_dev.bdev))) {
-			DMWARN("table load rejected: including"
-			       " non-request-stackable devices");
-			return -EINVAL;
-		}
-	}
-
-	/*
-	 * Request-based dm supports only tables that have a single target now.
-	 * To support multiple targets, request splitting support is needed,
-	 * and that needs lots of changes in the block-layer.
-	 * (e.g. request completion process for partial completion.)
-	 */
-	if (t->num_targets > 1) {
-		DMWARN("Request-based dm doesn't support multiple targets yet");
-		return -EINVAL;
-	}
-
-	t->type = DM_TYPE_REQUEST_BASED;
-
-	return 0;
-}
-
-unsigned dm_table_get_type(struct dm_table *t)
-{
-	return t->type;
-}
-
-struct target_type *dm_table_get_immutable_target_type(struct dm_table *t)
-{
-	return t->immutable_target_type;
-}
-
-bool dm_table_request_based(struct dm_table *t)
-{
-	return dm_table_get_type(t) == DM_TYPE_REQUEST_BASED;
-}
-
-int dm_table_alloc_md_mempools(struct dm_table *t)
-{
-	unsigned type = dm_table_get_type(t);
-
-	if (unlikely(type == DM_TYPE_NONE)) {
-		DMWARN("no table type is set, can't allocate mempools");
-		return -EINVAL;
-	}
-
-	t->mempools = dm_alloc_md_mempools(type, t->integrity_supported);
-	if (!t->mempools)
-		return -ENOMEM;
-
-	return 0;
-}
-
-void dm_table_free_md_mempools(struct dm_table *t)
-{
-	dm_free_md_mempools(t->mempools);
-	t->mempools = NULL;
-}
-
-struct dm_md_mempools *dm_table_get_md_mempools(struct dm_table *t)
-{
-	return t->mempools;
-}
-
-static int setup_indexes(struct dm_table *t)
-{
-	int i;
-	unsigned int total = 0;
-	sector_t *indexes;
-
-	/* allocate the space for *all* the indexes */
-	for (i = t->depth - 2; i >= 0; i--) {
-		t->counts[i] = dm_div_up(t->counts[i + 1], CHILDREN_PER_NODE);
-		total += t->counts[i];
-	}
-
-	indexes = (sector_t *) dm_vcalloc(total, (unsigned long) NODE_SIZE);
-	if (!indexes)
-		return -ENOMEM;
-
-	/* set up internal nodes, bottom-up */
-	for (i = t->depth - 2; i >= 0; i--) {
-		t->index[i] = indexes;
-		indexes += (KEYS_PER_NODE * t->counts[i]);
-		setup_btree_index(i, t);
-	}
-
-	return 0;
-}
-
-/*
- * Builds the btree to index the map.
- */
-static int dm_table_build_index(struct dm_table *t)
-{
-	int r = 0;
-	unsigned int leaf_nodes;
-
-	/* how many indexes will the btree have ? */
-	leaf_nodes = dm_div_up(t->num_targets, KEYS_PER_NODE);
-	t->depth = 1 + int_log(leaf_nodes, CHILDREN_PER_NODE);
-
-	/* leaf layer has already been set up */
-	t->counts[t->depth - 1] = leaf_nodes;
-	t->index[t->depth - 1] = t->highs;
-
-	if (t->depth >= 2)
-		r = setup_indexes(t);
-
-	return r;
-}
-
-/*
- * Get a disk whose integrity profile reflects the table's profile.
- * If %match_all is true, all devices' profiles must match.
- * If %match_all is false, all devices must at least have an
- * allocated integrity profile; but uninitialized is ok.
- * Returns NULL if integrity support was inconsistent or unavailable.
- */
-static struct gendisk * dm_table_get_integrity_disk(struct dm_table *t,
-						    bool match_all)
-{
-	struct list_head *devices = dm_table_get_devices(t);
-	struct dm_dev_internal *dd = NULL;
-	struct gendisk *prev_disk = NULL, *template_disk = NULL;
-
-	list_for_each_entry(dd, devices, list) {
-		template_disk = dd->dm_dev.bdev->bd_disk;
-		if (!blk_get_integrity(template_disk))
-			goto no_integrity;
-		if (!match_all && !blk_integrity_is_initialized(template_disk))
-			continue; /* skip uninitialized profiles */
-		else if (prev_disk &&
-			 blk_integrity_compare(prev_disk, template_disk) < 0)
-			goto no_integrity;
-		prev_disk = template_disk;
-	}
-
-	return template_disk;
-
-no_integrity:
-	if (prev_disk)
-		DMWARN("%s: integrity not set: %s and %s profile mismatch",
-		       dm_device_name(t->md),
-		       prev_disk->disk_name,
-		       template_disk->disk_name);
-	return NULL;
-}
-
-/*
- * Register the mapped device for blk_integrity support if
- * the underlying devices have an integrity profile.  But all devices
- * may not have matching profiles (checking all devices isn't reliable
- * during table load because this table may use other DM device(s) which
- * must be resumed before they will have an initialized integity profile).
- * Stacked DM devices force a 2 stage integrity profile validation:
- * 1 - during load, validate all initialized integrity profiles match
- * 2 - during resume, validate all integrity profiles match
- */
-static int dm_table_prealloc_integrity(struct dm_table *t, struct mapped_device *md)
-{
-	struct gendisk *template_disk = NULL;
-
-	template_disk = dm_table_get_integrity_disk(t, false);
-	if (!template_disk)
-		return 0;
-
-	if (!blk_integrity_is_initialized(dm_disk(md))) {
-		t->integrity_supported = 1;
-		return blk_integrity_register(dm_disk(md), NULL);
-	}
-
-	/*
-	 * If DM device already has an initalized integrity
-	 * profile the new profile should not conflict.
-	 */
-	if (blk_integrity_is_initialized(template_disk) &&
-	    blk_integrity_compare(dm_disk(md), template_disk) < 0) {
-		DMWARN("%s: conflict with existing integrity profile: "
-		       "%s profile mismatch",
-		       dm_device_name(t->md),
-		       template_disk->disk_name);
-		return 1;
-	}
-
-	/* Preserve existing initialized integrity profile */
-	t->integrity_supported = 1;
-	return 0;
-}
-
-/*
- * Prepares the table for use by building the indices,
- * setting the type, and allocating mempools.
- */
-int dm_table_complete(struct dm_table *t)
-{
-	int r;
-
-	r = dm_table_set_type(t);
-	if (r) {
-		DMERR("unable to set table type");
-		return r;
-	}
-
-	r = dm_table_build_index(t);
-	if (r) {
-		DMERR("unable to build btrees");
-		return r;
-	}
-
-	r = dm_table_prealloc_integrity(t, t->md);
-	if (r) {
-		DMERR("could not register integrity profile.");
-		return r;
-	}
-
-	r = dm_table_alloc_md_mempools(t);
-	if (r)
-		DMERR("unable to allocate mempools");
-
-	return r;
-}
-
-static DEFINE_MUTEX(_event_lock);
-void dm_table_event_callback(struct dm_table *t,
-			     void (*fn)(void *), void *context)
-{
-	mutex_lock(&_event_lock);
-	t->event_fn = fn;
-	t->event_context = context;
-	mutex_unlock(&_event_lock);
-}
-
-void dm_table_event(struct dm_table *t)
-{
-	/*
-	 * You can no longer call dm_table_event() from interrupt
-	 * context, use a bottom half instead.
-	 */
-	BUG_ON(in_interrupt());
-
-	mutex_lock(&_event_lock);
-	if (t->event_fn)
-		t->event_fn(t->event_context);
-	mutex_unlock(&_event_lock);
-}
-EXPORT_SYMBOL(dm_table_event);
-
-sector_t dm_table_get_size(struct dm_table *t)
-{
-	return t->num_targets ? (t->highs[t->num_targets - 1] + 1) : 0;
-}
-EXPORT_SYMBOL(dm_table_get_size);
-
-struct dm_target *dm_table_get_target(struct dm_table *t, unsigned int index)
-{
-	if (index >= t->num_targets)
-		return NULL;
-
-	return t->targets + index;
-}
-
-/*
- * Search the btree for the correct target.
- *
- * Caller should check returned pointer with dm_target_is_valid()
- * to trap I/O beyond end of device.
- */
-struct dm_target *dm_table_find_target(struct dm_table *t, sector_t sector)
-{
-	unsigned int l, n = 0, k = 0;
-	sector_t *node;
-
-	for (l = 0; l < t->depth; l++) {
-		n = get_child(n, k);
-		node = get_node(t, l, n);
-
-		for (k = 0; k < KEYS_PER_NODE; k++)
-			if (node[k] >= sector)
-				break;
-	}
-
-	return &t->targets[(KEYS_PER_NODE * n) + k];
-}
-
-/*
- * Establish the new table's queue_limits and validate them.
- */
-int dm_calculate_queue_limits(struct dm_table *table,
-			      struct queue_limits *limits)
-{
-	struct dm_target *uninitialized_var(ti);
-	struct queue_limits ti_limits;
-	unsigned i = 0;
-
-	blk_set_stacking_limits(limits);
-
-	while (i < dm_table_get_num_targets(table)) {
-		blk_set_stacking_limits(&ti_limits);
-
-		ti = dm_table_get_target(table, i++);
-
-		if (!ti->type->iterate_devices)
-			goto combine_limits;
-
-		/*
-		 * Combine queue limits of all the devices this target uses.
-		 */
-		ti->type->iterate_devices(ti, dm_set_device_limits,
-					  &ti_limits);
-
-		/* Set I/O hints portion of queue limits */
-		if (ti->type->io_hints)
-			ti->type->io_hints(ti, &ti_limits);
-
-		/*
-		 * Check each device area is consistent with the target's
-		 * overall queue limits.
-		 */
-		if (ti->type->iterate_devices(ti, device_area_is_invalid,
-					      &ti_limits))
-			return -EINVAL;
-
-combine_limits:
-		/*
-		 * Merge this target's queue limits into the overall limits
-		 * for the table.
-		 */
-		if (blk_stack_limits(limits, &ti_limits, 0) < 0)
-			DMWARN("%s: adding target device "
-			       "(start sect %llu len %llu) "
-			       "caused an alignment inconsistency",
-			       dm_device_name(table->md),
-			       (unsigned long long) ti->begin,
-			       (unsigned long long) ti->len);
-	}
-
-	return validate_hardware_logical_block_alignment(table, limits);
-}
-
-/*
- * Set the integrity profile for this device if all devices used have
- * matching profiles.  We're quite deep in the resume path but still
- * don't know if all devices (particularly DM devices this device
- * may be stacked on) have matching profiles.  Even if the profiles
- * don't match we have no way to fail (to resume) at this point.
- */
-static void dm_table_set_integrity(struct dm_table *t)
-{
-	struct gendisk *template_disk = NULL;
-
-	if (!blk_get_integrity(dm_disk(t->md)))
-		return;
-
-	template_disk = dm_table_get_integrity_disk(t, true);
-	if (template_disk)
-		blk_integrity_register(dm_disk(t->md),
-				       blk_get_integrity(template_disk));
-	else if (blk_integrity_is_initialized(dm_disk(t->md)))
-		DMWARN("%s: device no longer has a valid integrity profile",
-		       dm_device_name(t->md));
-	else
-		DMWARN("%s: unable to establish an integrity profile",
-		       dm_device_name(t->md));
-}
-
-static int device_flush_capable(struct dm_target *ti, struct dm_dev *dev,
-				sector_t start, sector_t len, void *data)
-{
-	unsigned flush = (*(unsigned *)data);
-	struct request_queue *q = bdev_get_queue(dev->bdev);
-
-	return q && (q->flush_flags & flush);
-}
-
-static bool dm_table_supports_flush(struct dm_table *t, unsigned flush)
-{
-	struct dm_target *ti;
-	unsigned i = 0;
-
-	/*
-	 * Require at least one underlying device to support flushes.
-	 * t->devices includes internal dm devices such as mirror logs
-	 * so we need to use iterate_devices here, which targets
-	 * supporting flushes must provide.
-	 */
-	while (i < dm_table_get_num_targets(t)) {
-		ti = dm_table_get_target(t, i++);
-
-		if (!ti->num_flush_requests)
-			continue;
-
-		if (ti->type->iterate_devices &&
-		    ti->type->iterate_devices(ti, device_flush_capable, &flush))
-			return 1;
-	}
-
-	return 0;
-}
-
-static bool dm_table_discard_zeroes_data(struct dm_table *t)
-{
-	struct dm_target *ti;
-	unsigned i = 0;
-
-	/* Ensure that all targets supports discard_zeroes_data. */
-	while (i < dm_table_get_num_targets(t)) {
-		ti = dm_table_get_target(t, i++);
-
-		if (ti->discard_zeroes_data_unsupported)
-			return 0;
-	}
-
-	return 1;
-}
-
-static int device_is_nonrot(struct dm_target *ti, struct dm_dev *dev,
-			    sector_t start, sector_t len, void *data)
-{
-	struct request_queue *q = bdev_get_queue(dev->bdev);
-
-	return q && blk_queue_nonrot(q);
-}
-
-static bool dm_table_is_nonrot(struct dm_table *t)
-{
-	struct dm_target *ti;
-	unsigned i = 0;
-
-	/* Ensure that all underlying device are non-rotational. */
-	while (i < dm_table_get_num_targets(t)) {
-		ti = dm_table_get_target(t, i++);
-
-		if (!ti->type->iterate_devices ||
-		    !ti->type->iterate_devices(ti, device_is_nonrot, NULL))
-			return 0;
-	}
-
-	return 1;
-}
-
-void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
-			       struct queue_limits *limits)
-{
-	unsigned flush = 0;
-
-	/*
-	 * Copy table's limits to the DM device's request_queue
-	 */
-	q->limits = *limits;
-
-	if (!dm_table_supports_discards(t))
-		queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, q);
-	else
-		queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q);
-
-	if (dm_table_supports_flush(t, REQ_FLUSH)) {
-		flush |= REQ_FLUSH;
-		if (dm_table_supports_flush(t, REQ_FUA))
-			flush |= REQ_FUA;
-	}
-	blk_queue_flush(q, flush);
-
-	if (!dm_table_discard_zeroes_data(t))
-		q->limits.discard_zeroes_data = 0;
-
-	if (dm_table_is_nonrot(t))
-		queue_flag_set_unlocked(QUEUE_FLAG_NONROT, q);
-	else
-		queue_flag_clear_unlocked(QUEUE_FLAG_NONROT, q);
-
-	dm_table_set_integrity(t);
-
-	/*
-	 * QUEUE_FLAG_STACKABLE must be set after all queue settings are
-	 * visible to other CPUs because, once the flag is set, incoming bios
-	 * are processed by request-based dm, which refers to the queue
-	 * settings.
-	 * Until the flag set, bios are passed to bio-based dm and queued to
-	 * md->deferred where queue settings are not needed yet.
-	 * Those bios are passed to request-based dm at the resume time.
-	 */
-	smp_mb();
-	if (dm_table_request_based(t))
-		queue_flag_set_unlocked(QUEUE_FLAG_STACKABLE, q);
-}
-
-unsigned int dm_table_get_num_targets(struct dm_table *t)
-{
-	return t->num_targets;
-}
-
-struct list_head *dm_table_get_devices(struct dm_table *t)
-{
-	return &t->devices;
-}
-
-fmode_t dm_table_get_mode(struct dm_table *t)
-{
-	return t->mode;
-}
-EXPORT_SYMBOL(dm_table_get_mode);
-
-static void suspend_targets(struct dm_table *t, unsigned postsuspend)
-{
-	int i = t->num_targets;
-	struct dm_target *ti = t->targets;
-
-	while (i--) {
-		if (postsuspend) {
-			if (ti->type->postsuspend)
-				ti->type->postsuspend(ti);
-		} else if (ti->type->presuspend)
-			ti->type->presuspend(ti);
-
-		ti++;
-	}
-}
-
-void dm_table_presuspend_targets(struct dm_table *t)
-{
-	if (!t)
-		return;
-
-	suspend_targets(t, 0);
-}
-
-void dm_table_postsuspend_targets(struct dm_table *t)
-{
-	if (!t)
-		return;
-
-	suspend_targets(t, 1);
-}
-
-int dm_table_resume_targets(struct dm_table *t)
-{
-	int i, r = 0;
-
-	for (i = 0; i < t->num_targets; i++) {
-		struct dm_target *ti = t->targets + i;
-
-		if (!ti->type->preresume)
-			continue;
-
-		r = ti->type->preresume(ti);
-		if (r)
-			return r;
-	}
-
-	for (i = 0; i < t->num_targets; i++) {
-		struct dm_target *ti = t->targets + i;
-
-		if (ti->type->resume)
-			ti->type->resume(ti);
-	}
-
-	return 0;
-}
-
-void dm_table_add_target_callbacks(struct dm_table *t, struct dm_target_callbacks *cb)
-{
-	list_add(&cb->list, &t->target_callbacks);
-}
-EXPORT_SYMBOL_GPL(dm_table_add_target_callbacks);
-
-int dm_table_any_congested(struct dm_table *t, int bdi_bits)
-{
-	struct dm_dev_internal *dd;
-	struct list_head *devices = dm_table_get_devices(t);
-	struct dm_target_callbacks *cb;
-	int r = 0;
-
-	list_for_each_entry(dd, devices, list) {
-		struct request_queue *q = bdev_get_queue(dd->dm_dev.bdev);
-		char b[BDEVNAME_SIZE];
-
-		if (likely(q))
-			r |= bdi_congested(&q->backing_dev_info, bdi_bits);
-		else
-			DMWARN_LIMIT("%s: any_congested: nonexistent device %s",
-				     dm_device_name(t->md),
-				     bdevname(dd->dm_dev.bdev, b));
-	}
-
-	list_for_each_entry(cb, &t->target_callbacks, list)
-		if (cb->congested_fn)
-			r |= cb->congested_fn(cb, bdi_bits);
-
-	return r;
-}
-
-int dm_table_any_busy_target(struct dm_table *t)
-{
-	unsigned i;
-	struct dm_target *ti;
-
-	for (i = 0; i < t->num_targets; i++) {
-		ti = t->targets + i;
-		if (ti->type->busy && ti->type->busy(ti))
-			return 1;
-	}
-
-	return 0;
-}
-
-struct mapped_device *dm_table_get_md(struct dm_table *t)
-{
-	return t->md;
-}
-EXPORT_SYMBOL(dm_table_get_md);
-
-static int device_discard_capable(struct dm_target *ti, struct dm_dev *dev,
-				  sector_t start, sector_t len, void *data)
-{
-	struct request_queue *q = bdev_get_queue(dev->bdev);
-
-	return q && blk_queue_discard(q);
-}
-
-bool dm_table_supports_discards(struct dm_table *t)
-{
-	struct dm_target *ti;
-	unsigned i = 0;
-
-	/*
-	 * Unless any target used by the table set discards_supported,
-	 * require at least one underlying device to support discards.
-	 * t->devices includes internal dm devices such as mirror logs
-	 * so we need to use iterate_devices here, which targets
-	 * supporting discard selectively must provide.
-	 */
-	while (i < dm_table_get_num_targets(t)) {
-		ti = dm_table_get_target(t, i++);
-
-		if (!ti->num_discard_requests)
-			continue;
-
-		if (ti->discards_supported)
-			return 1;
-
-		if (ti->type->iterate_devices &&
-		    ti->type->iterate_devices(ti, device_discard_capable, NULL))
-			return 1;
-	}
-
-	return 0;
-}
diff --git a/ANDROID_3.4.5/drivers/md/dm-target.c b/ANDROID_3.4.5/drivers/md/dm-target.c
deleted file mode 100644
index 8da366cf..00000000
--- a/ANDROID_3.4.5/drivers/md/dm-target.c
+++ /dev/null
@@ -1,154 +0,0 @@
-/*
- * Copyright (C) 2001 Sistina Software (UK) Limited
- *
- * This file is released under the GPL.
- */
-
-#include "dm.h"
-
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/kmod.h>
-#include <linux/bio.h>
-
-#define DM_MSG_PREFIX "target"
-
-static LIST_HEAD(_targets);
-static DECLARE_RWSEM(_lock);
-
-#define DM_MOD_NAME_SIZE 32
-
-static inline struct target_type *__find_target_type(const char *name)
-{
-	struct target_type *tt;
-
-	list_for_each_entry(tt, &_targets, list)
-		if (!strcmp(name, tt->name))
-			return tt;
-
-	return NULL;
-}
-
-static struct target_type *get_target_type(const char *name)
-{
-	struct target_type *tt;
-
-	down_read(&_lock);
-
-	tt = __find_target_type(name);
-	if (tt && !try_module_get(tt->module))
-		tt = NULL;
-
-	up_read(&_lock);
-	return tt;
-}
-
-static void load_module(const char *name)
-{
-	request_module("dm-%s", name);
-}
-
-struct target_type *dm_get_target_type(const char *name)
-{
-	struct target_type *tt = get_target_type(name);
-
-	if (!tt) {
-		load_module(name);
-		tt = get_target_type(name);
-	}
-
-	return tt;
-}
-
-void dm_put_target_type(struct target_type *tt)
-{
-	down_read(&_lock);
-	module_put(tt->module);
-	up_read(&_lock);
-}
-
-int dm_target_iterate(void (*iter_func)(struct target_type *tt,
-					void *param), void *param)
-{
-	struct target_type *tt;
-
-	down_read(&_lock);
-	list_for_each_entry(tt, &_targets, list)
-		iter_func(tt, param);
-	up_read(&_lock);
-
-	return 0;
-}
-
-int dm_register_target(struct target_type *tt)
-{
-	int rv = 0;
-
-	down_write(&_lock);
-	if (__find_target_type(tt->name))
-		rv = -EEXIST;
-	else
-		list_add(&tt->list, &_targets);
-
-	up_write(&_lock);
-	return rv;
-}
-
-void dm_unregister_target(struct target_type *tt)
-{
-	down_write(&_lock);
-	if (!__find_target_type(tt->name)) {
-		DMCRIT("Unregistering unrecognised target: %s", tt->name);
-		BUG();
-	}
-
-	list_del(&tt->list);
-
-	up_write(&_lock);
-}
-
-/*
- * io-err: always fails an io, useful for bringing
- * up LVs that have holes in them.
- */
-static int io_err_ctr(struct dm_target *tt, unsigned int argc, char **args)
-{
-	/*
-	 * Return error for discards instead of -EOPNOTSUPP
-	 */
-	tt->num_discard_requests = 1;
-
-	return 0;
-}
-
-static void io_err_dtr(struct dm_target *tt)
-{
-	/* empty */
-}
-
-static int io_err_map(struct dm_target *tt, struct bio *bio,
-		      union map_info *map_context)
-{
-	return -EIO;
-}
-
-static struct target_type error_target = {
-	.name = "error",
-	.version = {1, 0, 1},
-	.ctr  = io_err_ctr,
-	.dtr  = io_err_dtr,
-	.map  = io_err_map,
-};
-
-int __init dm_target_init(void)
-{
-	return dm_register_target(&error_target);
-}
-
-void dm_target_exit(void)
-{
-	dm_unregister_target(&error_target);
-}
-
-EXPORT_SYMBOL(dm_register_target);
-EXPORT_SYMBOL(dm_unregister_target);
diff --git a/ANDROID_3.4.5/drivers/md/dm-thin-metadata.c b/ANDROID_3.4.5/drivers/md/dm-thin-metadata.c
deleted file mode 100644
index 737d3886..00000000
--- a/ANDROID_3.4.5/drivers/md/dm-thin-metadata.c
+++ /dev/null
@@ -1,1409 +0,0 @@
-/*
- * Copyright (C) 2011 Red Hat, Inc.
- *
- * This file is released under the GPL.
- */
-
-#include "dm-thin-metadata.h"
-#include "persistent-data/dm-btree.h"
-#include "persistent-data/dm-space-map.h"
-#include "persistent-data/dm-space-map-disk.h"
-#include "persistent-data/dm-transaction-manager.h"
-
-#include <linux/list.h>
-#include <linux/device-mapper.h>
-#include <linux/workqueue.h>
-
-/*--------------------------------------------------------------------------
- * As far as the metadata goes, there is:
- *
- * - A superblock in block zero, taking up fewer than 512 bytes for
- *   atomic writes.
- *
- * - A space map managing the metadata blocks.
- *
- * - A space map managing the data blocks.
- *
- * - A btree mapping our internal thin dev ids onto struct disk_device_details.
- *
- * - A hierarchical btree, with 2 levels which effectively maps (thin
- *   dev id, virtual block) -> block_time.  Block time is a 64-bit
- *   field holding the time in the low 24 bits, and block in the top 48
- *   bits.
- *
- * BTrees consist solely of btree_nodes, that fill a block.  Some are
- * internal nodes, as such their values are a __le64 pointing to other
- * nodes.  Leaf nodes can store data of any reasonable size (ie. much
- * smaller than the block size).  The nodes consist of the header,
- * followed by an array of keys, followed by an array of values.  We have
- * to binary search on the keys so they're all held together to help the
- * cpu cache.
- *
- * Space maps have 2 btrees:
- *
- * - One maps a uint64_t onto a struct index_entry.  Which points to a
- *   bitmap block, and has some details about how many free entries there
- *   are etc.
- *
- * - The bitmap blocks have a header (for the checksum).  Then the rest
- *   of the block is pairs of bits.  With the meaning being:
- *
- *   0 - ref count is 0
- *   1 - ref count is 1
- *   2 - ref count is 2
- *   3 - ref count is higher than 2
- *
- * - If the count is higher than 2 then the ref count is entered in a
- *   second btree that directly maps the block_address to a uint32_t ref
- *   count.
- *
- * The space map metadata variant doesn't have a bitmaps btree.  Instead
- * it has one single blocks worth of index_entries.  This avoids
- * recursive issues with the bitmap btree needing to allocate space in
- * order to insert.  With a small data block size such as 64k the
- * metadata support data devices that are hundreds of terrabytes.
- *
- * The space maps allocate space linearly from front to back.  Space that
- * is freed in a transaction is never recycled within that transaction.
- * To try and avoid fragmenting _free_ space the allocator always goes
- * back and fills in gaps.
- *
- * All metadata io is in THIN_METADATA_BLOCK_SIZE sized/aligned chunks
- * from the block manager.
- *--------------------------------------------------------------------------*/
-
-#define DM_MSG_PREFIX   "thin metadata"
-
-#define THIN_SUPERBLOCK_MAGIC 27022010
-#define THIN_SUPERBLOCK_LOCATION 0
-#define THIN_VERSION 1
-#define THIN_METADATA_CACHE_SIZE 64
-#define SECTOR_TO_BLOCK_SHIFT 3
-
-/* This should be plenty */
-#define SPACE_MAP_ROOT_SIZE 128
-
-/*
- * Little endian on-disk superblock and device details.
- */
-struct thin_disk_superblock {
-	__le32 csum;	/* Checksum of superblock except for this field. */
-	__le32 flags;
-	__le64 blocknr;	/* This block number, dm_block_t. */
-
-	__u8 uuid[16];
-	__le64 magic;
-	__le32 version;
-	__le32 time;
-
-	__le64 trans_id;
-
-	/*
-	 * Root held by userspace transactions.
-	 */
-	__le64 held_root;
-
-	__u8 data_space_map_root[SPACE_MAP_ROOT_SIZE];
-	__u8 metadata_space_map_root[SPACE_MAP_ROOT_SIZE];
-
-	/*
-	 * 2-level btree mapping (dev_id, (dev block, time)) -> data block
-	 */
-	__le64 data_mapping_root;
-
-	/*
-	 * Device detail root mapping dev_id -> device_details
-	 */
-	__le64 device_details_root;
-
-	__le32 data_block_size;		/* In 512-byte sectors. */
-
-	__le32 metadata_block_size;	/* In 512-byte sectors. */
-	__le64 metadata_nr_blocks;
-
-	__le32 compat_flags;
-	__le32 compat_ro_flags;
-	__le32 incompat_flags;
-} __packed;
-
-struct disk_device_details {
-	__le64 mapped_blocks;
-	__le64 transaction_id;		/* When created. */
-	__le32 creation_time;
-	__le32 snapshotted_time;
-} __packed;
-
-struct dm_pool_metadata {
-	struct hlist_node hash;
-
-	struct block_device *bdev;
-	struct dm_block_manager *bm;
-	struct dm_space_map *metadata_sm;
-	struct dm_space_map *data_sm;
-	struct dm_transaction_manager *tm;
-	struct dm_transaction_manager *nb_tm;
-
-	/*
-	 * Two-level btree.
-	 * First level holds thin_dev_t.
-	 * Second level holds mappings.
-	 */
-	struct dm_btree_info info;
-
-	/*
-	 * Non-blocking version of the above.
-	 */
-	struct dm_btree_info nb_info;
-
-	/*
-	 * Just the top level for deleting whole devices.
-	 */
-	struct dm_btree_info tl_info;
-
-	/*
-	 * Just the bottom level for creating new devices.
-	 */
-	struct dm_btree_info bl_info;
-
-	/*
-	 * Describes the device details btree.
-	 */
-	struct dm_btree_info details_info;
-
-	struct rw_semaphore root_lock;
-	uint32_t time;
-	int need_commit;
-	dm_block_t root;
-	dm_block_t details_root;
-	struct list_head thin_devices;
-	uint64_t trans_id;
-	unsigned long flags;
-	sector_t data_block_size;
-};
-
-struct dm_thin_device {
-	struct list_head list;
-	struct dm_pool_metadata *pmd;
-	dm_thin_id id;
-
-	int open_count;
-	int changed;
-	uint64_t mapped_blocks;
-	uint64_t transaction_id;
-	uint32_t creation_time;
-	uint32_t snapshotted_time;
-};
-
-/*----------------------------------------------------------------
- * superblock validator
- *--------------------------------------------------------------*/
-
-#define SUPERBLOCK_CSUM_XOR 160774
-
-static void sb_prepare_for_write(struct dm_block_validator *v,
-				 struct dm_block *b,
-				 size_t block_size)
-{
-	struct thin_disk_superblock *disk_super = dm_block_data(b);
-
-	disk_super->blocknr = cpu_to_le64(dm_block_location(b));
-	disk_super->csum = cpu_to_le32(dm_bm_checksum(&disk_super->flags,
-						      block_size - sizeof(__le32),
-						      SUPERBLOCK_CSUM_XOR));
-}
-
-static int sb_check(struct dm_block_validator *v,
-		    struct dm_block *b,
-		    size_t block_size)
-{
-	struct thin_disk_superblock *disk_super = dm_block_data(b);
-	__le32 csum_le;
-
-	if (dm_block_location(b) != le64_to_cpu(disk_super->blocknr)) {
-		DMERR("sb_check failed: blocknr %llu: "
-		      "wanted %llu", le64_to_cpu(disk_super->blocknr),
-		      (unsigned long long)dm_block_location(b));
-		return -ENOTBLK;
-	}
-
-	if (le64_to_cpu(disk_super->magic) != THIN_SUPERBLOCK_MAGIC) {
-		DMERR("sb_check failed: magic %llu: "
-		      "wanted %llu", le64_to_cpu(disk_super->magic),
-		      (unsigned long long)THIN_SUPERBLOCK_MAGIC);
-		return -EILSEQ;
-	}
-
-	csum_le = cpu_to_le32(dm_bm_checksum(&disk_super->flags,
-					     block_size - sizeof(__le32),
-					     SUPERBLOCK_CSUM_XOR));
-	if (csum_le != disk_super->csum) {
-		DMERR("sb_check failed: csum %u: wanted %u",
-		      le32_to_cpu(csum_le), le32_to_cpu(disk_super->csum));
-		return -EILSEQ;
-	}
-
-	return 0;
-}
-
-static struct dm_block_validator sb_validator = {
-	.name = "superblock",
-	.prepare_for_write = sb_prepare_for_write,
-	.check = sb_check
-};
-
-/*----------------------------------------------------------------
- * Methods for the btree value types
- *--------------------------------------------------------------*/
-
-static uint64_t pack_block_time(dm_block_t b, uint32_t t)
-{
-	return (b << 24) | t;
-}
-
-static void unpack_block_time(uint64_t v, dm_block_t *b, uint32_t *t)
-{
-	*b = v >> 24;
-	*t = v & ((1 << 24) - 1);
-}
-
-static void data_block_inc(void *context, void *value_le)
-{
-	struct dm_space_map *sm = context;
-	__le64 v_le;
-	uint64_t b;
-	uint32_t t;
-
-	memcpy(&v_le, value_le, sizeof(v_le));
-	unpack_block_time(le64_to_cpu(v_le), &b, &t);
-	dm_sm_inc_block(sm, b);
-}
-
-static void data_block_dec(void *context, void *value_le)
-{
-	struct dm_space_map *sm = context;
-	__le64 v_le;
-	uint64_t b;
-	uint32_t t;
-
-	memcpy(&v_le, value_le, sizeof(v_le));
-	unpack_block_time(le64_to_cpu(v_le), &b, &t);
-	dm_sm_dec_block(sm, b);
-}
-
-static int data_block_equal(void *context, void *value1_le, void *value2_le)
-{
-	__le64 v1_le, v2_le;
-	uint64_t b1, b2;
-	uint32_t t;
-
-	memcpy(&v1_le, value1_le, sizeof(v1_le));
-	memcpy(&v2_le, value2_le, sizeof(v2_le));
-	unpack_block_time(le64_to_cpu(v1_le), &b1, &t);
-	unpack_block_time(le64_to_cpu(v2_le), &b2, &t);
-
-	return b1 == b2;
-}
-
-static void subtree_inc(void *context, void *value)
-{
-	struct dm_btree_info *info = context;
-	__le64 root_le;
-	uint64_t root;
-
-	memcpy(&root_le, value, sizeof(root_le));
-	root = le64_to_cpu(root_le);
-	dm_tm_inc(info->tm, root);
-}
-
-static void subtree_dec(void *context, void *value)
-{
-	struct dm_btree_info *info = context;
-	__le64 root_le;
-	uint64_t root;
-
-	memcpy(&root_le, value, sizeof(root_le));
-	root = le64_to_cpu(root_le);
-	if (dm_btree_del(info, root))
-		DMERR("btree delete failed\n");
-}
-
-static int subtree_equal(void *context, void *value1_le, void *value2_le)
-{
-	__le64 v1_le, v2_le;
-	memcpy(&v1_le, value1_le, sizeof(v1_le));
-	memcpy(&v2_le, value2_le, sizeof(v2_le));
-
-	return v1_le == v2_le;
-}
-
-/*----------------------------------------------------------------*/
-
-static int superblock_all_zeroes(struct dm_block_manager *bm, int *result)
-{
-	int r;
-	unsigned i;
-	struct dm_block *b;
-	__le64 *data_le, zero = cpu_to_le64(0);
-	unsigned block_size = dm_bm_block_size(bm) / sizeof(__le64);
-
-	/*
-	 * We can't use a validator here - it may be all zeroes.
-	 */
-	r = dm_bm_read_lock(bm, THIN_SUPERBLOCK_LOCATION, NULL, &b);
-	if (r)
-		return r;
-
-	data_le = dm_block_data(b);
-	*result = 1;
-	for (i = 0; i < block_size; i++) {
-		if (data_le[i] != zero) {
-			*result = 0;
-			break;
-		}
-	}
-
-	return dm_bm_unlock(b);
-}
-
-static int init_pmd(struct dm_pool_metadata *pmd,
-		    struct dm_block_manager *bm,
-		    dm_block_t nr_blocks, int create)
-{
-	int r;
-	struct dm_space_map *sm, *data_sm;
-	struct dm_transaction_manager *tm;
-	struct dm_block *sblock;
-
-	if (create) {
-		r = dm_tm_create_with_sm(bm, THIN_SUPERBLOCK_LOCATION,
-					 &sb_validator, &tm, &sm, &sblock);
-		if (r < 0) {
-			DMERR("tm_create_with_sm failed");
-			return r;
-		}
-
-		data_sm = dm_sm_disk_create(tm, nr_blocks);
-		if (IS_ERR(data_sm)) {
-			DMERR("sm_disk_create failed");
-			dm_tm_unlock(tm, sblock);
-			r = PTR_ERR(data_sm);
-			goto bad;
-		}
-	} else {
-		struct thin_disk_superblock *disk_super = NULL;
-		size_t space_map_root_offset =
-			offsetof(struct thin_disk_superblock, metadata_space_map_root);
-
-		r = dm_tm_open_with_sm(bm, THIN_SUPERBLOCK_LOCATION,
-				       &sb_validator, space_map_root_offset,
-				       SPACE_MAP_ROOT_SIZE, &tm, &sm, &sblock);
-		if (r < 0) {
-			DMERR("tm_open_with_sm failed");
-			return r;
-		}
-
-		disk_super = dm_block_data(sblock);
-		data_sm = dm_sm_disk_open(tm, disk_super->data_space_map_root,
-					  sizeof(disk_super->data_space_map_root));
-		if (IS_ERR(data_sm)) {
-			DMERR("sm_disk_open failed");
-			r = PTR_ERR(data_sm);
-			goto bad;
-		}
-	}
-
-
-	r = dm_tm_unlock(tm, sblock);
-	if (r < 0) {
-		DMERR("couldn't unlock superblock");
-		goto bad_data_sm;
-	}
-
-	pmd->bm = bm;
-	pmd->metadata_sm = sm;
-	pmd->data_sm = data_sm;
-	pmd->tm = tm;
-	pmd->nb_tm = dm_tm_create_non_blocking_clone(tm);
-	if (!pmd->nb_tm) {
-		DMERR("could not create clone tm");
-		r = -ENOMEM;
-		goto bad_data_sm;
-	}
-
-	pmd->info.tm = tm;
-	pmd->info.levels = 2;
-	pmd->info.value_type.context = pmd->data_sm;
-	pmd->info.value_type.size = sizeof(__le64);
-	pmd->info.value_type.inc = data_block_inc;
-	pmd->info.value_type.dec = data_block_dec;
-	pmd->info.value_type.equal = data_block_equal;
-
-	memcpy(&pmd->nb_info, &pmd->info, sizeof(pmd->nb_info));
-	pmd->nb_info.tm = pmd->nb_tm;
-
-	pmd->tl_info.tm = tm;
-	pmd->tl_info.levels = 1;
-	pmd->tl_info.value_type.context = &pmd->info;
-	pmd->tl_info.value_type.size = sizeof(__le64);
-	pmd->tl_info.value_type.inc = subtree_inc;
-	pmd->tl_info.value_type.dec = subtree_dec;
-	pmd->tl_info.value_type.equal = subtree_equal;
-
-	pmd->bl_info.tm = tm;
-	pmd->bl_info.levels = 1;
-	pmd->bl_info.value_type.context = pmd->data_sm;
-	pmd->bl_info.value_type.size = sizeof(__le64);
-	pmd->bl_info.value_type.inc = data_block_inc;
-	pmd->bl_info.value_type.dec = data_block_dec;
-	pmd->bl_info.value_type.equal = data_block_equal;
-
-	pmd->details_info.tm = tm;
-	pmd->details_info.levels = 1;
-	pmd->details_info.value_type.context = NULL;
-	pmd->details_info.value_type.size = sizeof(struct disk_device_details);
-	pmd->details_info.value_type.inc = NULL;
-	pmd->details_info.value_type.dec = NULL;
-	pmd->details_info.value_type.equal = NULL;
-
-	pmd->root = 0;
-
-	init_rwsem(&pmd->root_lock);
-	pmd->time = 0;
-	pmd->need_commit = 0;
-	pmd->details_root = 0;
-	pmd->trans_id = 0;
-	pmd->flags = 0;
-	INIT_LIST_HEAD(&pmd->thin_devices);
-
-	return 0;
-
-bad_data_sm:
-	dm_sm_destroy(data_sm);
-bad:
-	dm_tm_destroy(tm);
-	dm_sm_destroy(sm);
-
-	return r;
-}
-
-static int __begin_transaction(struct dm_pool_metadata *pmd)
-{
-	int r;
-	u32 features;
-	struct thin_disk_superblock *disk_super;
-	struct dm_block *sblock;
-
-	/*
-	 * __maybe_commit_transaction() resets these
-	 */
-	WARN_ON(pmd->need_commit);
-
-	/*
-	 * We re-read the superblock every time.  Shouldn't need to do this
-	 * really.
-	 */
-	r = dm_bm_read_lock(pmd->bm, THIN_SUPERBLOCK_LOCATION,
-			    &sb_validator, &sblock);
-	if (r)
-		return r;
-
-	disk_super = dm_block_data(sblock);
-	pmd->time = le32_to_cpu(disk_super->time);
-	pmd->root = le64_to_cpu(disk_super->data_mapping_root);
-	pmd->details_root = le64_to_cpu(disk_super->device_details_root);
-	pmd->trans_id = le64_to_cpu(disk_super->trans_id);
-	pmd->flags = le32_to_cpu(disk_super->flags);
-	pmd->data_block_size = le32_to_cpu(disk_super->data_block_size);
-
-	features = le32_to_cpu(disk_super->incompat_flags) & ~THIN_FEATURE_INCOMPAT_SUPP;
-	if (features) {
-		DMERR("could not access metadata due to "
-		      "unsupported optional features (%lx).",
-		      (unsigned long)features);
-		r = -EINVAL;
-		goto out;
-	}
-
-	/*
-	 * Check for read-only metadata to skip the following RDWR checks.
-	 */
-	if (get_disk_ro(pmd->bdev->bd_disk))
-		goto out;
-
-	features = le32_to_cpu(disk_super->compat_ro_flags) & ~THIN_FEATURE_COMPAT_RO_SUPP;
-	if (features) {
-		DMERR("could not access metadata RDWR due to "
-		      "unsupported optional features (%lx).",
-		      (unsigned long)features);
-		r = -EINVAL;
-	}
-
-out:
-	dm_bm_unlock(sblock);
-	return r;
-}
-
-static int __write_changed_details(struct dm_pool_metadata *pmd)
-{
-	int r;
-	struct dm_thin_device *td, *tmp;
-	struct disk_device_details details;
-	uint64_t key;
-
-	list_for_each_entry_safe(td, tmp, &pmd->thin_devices, list) {
-		if (!td->changed)
-			continue;
-
-		key = td->id;
-
-		details.mapped_blocks = cpu_to_le64(td->mapped_blocks);
-		details.transaction_id = cpu_to_le64(td->transaction_id);
-		details.creation_time = cpu_to_le32(td->creation_time);
-		details.snapshotted_time = cpu_to_le32(td->snapshotted_time);
-		__dm_bless_for_disk(&details);
-
-		r = dm_btree_insert(&pmd->details_info, pmd->details_root,
-				    &key, &details, &pmd->details_root);
-		if (r)
-			return r;
-
-		if (td->open_count)
-			td->changed = 0;
-		else {
-			list_del(&td->list);
-			kfree(td);
-		}
-
-		pmd->need_commit = 1;
-	}
-
-	return 0;
-}
-
-static int __commit_transaction(struct dm_pool_metadata *pmd)
-{
-	/*
-	 * FIXME: Associated pool should be made read-only on failure.
-	 */
-	int r;
-	size_t metadata_len, data_len;
-	struct thin_disk_superblock *disk_super;
-	struct dm_block *sblock;
-
-	/*
-	 * We need to know if the thin_disk_superblock exceeds a 512-byte sector.
-	 */
-	BUILD_BUG_ON(sizeof(struct thin_disk_superblock) > 512);
-
-	r = __write_changed_details(pmd);
-	if (r < 0)
-		goto out;
-
-	if (!pmd->need_commit)
-		goto out;
-
-	r = dm_sm_commit(pmd->data_sm);
-	if (r < 0)
-		goto out;
-
-	r = dm_tm_pre_commit(pmd->tm);
-	if (r < 0)
-		goto out;
-
-	r = dm_sm_root_size(pmd->metadata_sm, &metadata_len);
-	if (r < 0)
-		goto out;
-
-	r = dm_sm_root_size(pmd->data_sm, &data_len);
-	if (r < 0)
-		goto out;
-
-	r = dm_bm_write_lock(pmd->bm, THIN_SUPERBLOCK_LOCATION,
-			     &sb_validator, &sblock);
-	if (r)
-		goto out;
-
-	disk_super = dm_block_data(sblock);
-	disk_super->time = cpu_to_le32(pmd->time);
-	disk_super->data_mapping_root = cpu_to_le64(pmd->root);
-	disk_super->device_details_root = cpu_to_le64(pmd->details_root);
-	disk_super->trans_id = cpu_to_le64(pmd->trans_id);
-	disk_super->flags = cpu_to_le32(pmd->flags);
-
-	r = dm_sm_copy_root(pmd->metadata_sm, &disk_super->metadata_space_map_root,
-			    metadata_len);
-	if (r < 0)
-		goto out_locked;
-
-	r = dm_sm_copy_root(pmd->data_sm, &disk_super->data_space_map_root,
-			    data_len);
-	if (r < 0)
-		goto out_locked;
-
-	r = dm_tm_commit(pmd->tm, sblock);
-	if (!r)
-		pmd->need_commit = 0;
-
-out:
-	return r;
-
-out_locked:
-	dm_bm_unlock(sblock);
-	return r;
-}
-
-struct dm_pool_metadata *dm_pool_metadata_open(struct block_device *bdev,
-					       sector_t data_block_size)
-{
-	int r;
-	struct thin_disk_superblock *disk_super;
-	struct dm_pool_metadata *pmd;
-	sector_t bdev_size = i_size_read(bdev->bd_inode) >> SECTOR_SHIFT;
-	struct dm_block_manager *bm;
-	int create;
-	struct dm_block *sblock;
-
-	pmd = kmalloc(sizeof(*pmd), GFP_KERNEL);
-	if (!pmd) {
-		DMERR("could not allocate metadata struct");
-		return ERR_PTR(-ENOMEM);
-	}
-
-	/*
-	 * Max hex locks:
-	 *  3 for btree insert +
-	 *  2 for btree lookup used within space map
-	 */
-	bm = dm_block_manager_create(bdev, THIN_METADATA_BLOCK_SIZE,
-				     THIN_METADATA_CACHE_SIZE, 5);
-	if (!bm) {
-		DMERR("could not create block manager");
-		kfree(pmd);
-		return ERR_PTR(-ENOMEM);
-	}
-
-	r = superblock_all_zeroes(bm, &create);
-	if (r) {
-		dm_block_manager_destroy(bm);
-		kfree(pmd);
-		return ERR_PTR(r);
-	}
-
-
-	r = init_pmd(pmd, bm, 0, create);
-	if (r) {
-		dm_block_manager_destroy(bm);
-		kfree(pmd);
-		return ERR_PTR(r);
-	}
-	pmd->bdev = bdev;
-
-	if (!create) {
-		r = __begin_transaction(pmd);
-		if (r < 0)
-			goto bad;
-		return pmd;
-	}
-
-	/*
-	 * Create.
-	 */
-	r = dm_bm_write_lock(pmd->bm, THIN_SUPERBLOCK_LOCATION,
-			     &sb_validator, &sblock);
-	if (r)
-		goto bad;
-
-	if (bdev_size > THIN_METADATA_MAX_SECTORS)
-		bdev_size = THIN_METADATA_MAX_SECTORS;
-
-	disk_super = dm_block_data(sblock);
-	disk_super->magic = cpu_to_le64(THIN_SUPERBLOCK_MAGIC);
-	disk_super->version = cpu_to_le32(THIN_VERSION);
-	disk_super->time = 0;
-	disk_super->metadata_block_size = cpu_to_le32(THIN_METADATA_BLOCK_SIZE >> SECTOR_SHIFT);
-	disk_super->metadata_nr_blocks = cpu_to_le64(bdev_size >> SECTOR_TO_BLOCK_SHIFT);
-	disk_super->data_block_size = cpu_to_le32(data_block_size);
-
-	r = dm_bm_unlock(sblock);
-	if (r < 0)
-		goto bad;
-
-	r = dm_btree_empty(&pmd->info, &pmd->root);
-	if (r < 0)
-		goto bad;
-
-	r = dm_btree_empty(&pmd->details_info, &pmd->details_root);
-	if (r < 0) {
-		DMERR("couldn't create devices root");
-		goto bad;
-	}
-
-	pmd->flags = 0;
-	pmd->need_commit = 1;
-	r = dm_pool_commit_metadata(pmd);
-	if (r < 0) {
-		DMERR("%s: dm_pool_commit_metadata() failed, error = %d",
-		      __func__, r);
-		goto bad;
-	}
-
-	return pmd;
-
-bad:
-	if (dm_pool_metadata_close(pmd) < 0)
-		DMWARN("%s: dm_pool_metadata_close() failed.", __func__);
-	return ERR_PTR(r);
-}
-
-int dm_pool_metadata_close(struct dm_pool_metadata *pmd)
-{
-	int r;
-	unsigned open_devices = 0;
-	struct dm_thin_device *td, *tmp;
-
-	down_read(&pmd->root_lock);
-	list_for_each_entry_safe(td, tmp, &pmd->thin_devices, list) {
-		if (td->open_count)
-			open_devices++;
-		else {
-			list_del(&td->list);
-			kfree(td);
-		}
-	}
-	up_read(&pmd->root_lock);
-
-	if (open_devices) {
-		DMERR("attempt to close pmd when %u device(s) are still open",
-		       open_devices);
-		return -EBUSY;
-	}
-
-	r = __commit_transaction(pmd);
-	if (r < 0)
-		DMWARN("%s: __commit_transaction() failed, error = %d",
-		       __func__, r);
-
-	dm_tm_destroy(pmd->tm);
-	dm_tm_destroy(pmd->nb_tm);
-	dm_block_manager_destroy(pmd->bm);
-	dm_sm_destroy(pmd->metadata_sm);
-	dm_sm_destroy(pmd->data_sm);
-	kfree(pmd);
-
-	return 0;
-}
-
-/*
- * __open_device: Returns @td corresponding to device with id @dev,
- * creating it if @create is set and incrementing @td->open_count.
- * On failure, @td is undefined.
- */
-static int __open_device(struct dm_pool_metadata *pmd,
-			 dm_thin_id dev, int create,
-			 struct dm_thin_device **td)
-{
-	int r, changed = 0;
-	struct dm_thin_device *td2;
-	uint64_t key = dev;
-	struct disk_device_details details_le;
-
-	/*
-	 * If the device is already open, return it.
-	 */
-	list_for_each_entry(td2, &pmd->thin_devices, list)
-		if (td2->id == dev) {
-			/*
-			 * May not create an already-open device.
-			 */
-			if (create)
-				return -EEXIST;
-
-			td2->open_count++;
-			*td = td2;
-			return 0;
-		}
-
-	/*
-	 * Check the device exists.
-	 */
-	r = dm_btree_lookup(&pmd->details_info, pmd->details_root,
-			    &key, &details_le);
-	if (r) {
-		if (r != -ENODATA || !create)
-			return r;
-
-		/*
-		 * Create new device.
-		 */
-		changed = 1;
-		details_le.mapped_blocks = 0;
-		details_le.transaction_id = cpu_to_le64(pmd->trans_id);
-		details_le.creation_time = cpu_to_le32(pmd->time);
-		details_le.snapshotted_time = cpu_to_le32(pmd->time);
-	}
-
-	*td = kmalloc(sizeof(**td), GFP_NOIO);
-	if (!*td)
-		return -ENOMEM;
-
-	(*td)->pmd = pmd;
-	(*td)->id = dev;
-	(*td)->open_count = 1;
-	(*td)->changed = changed;
-	(*td)->mapped_blocks = le64_to_cpu(details_le.mapped_blocks);
-	(*td)->transaction_id = le64_to_cpu(details_le.transaction_id);
-	(*td)->creation_time = le32_to_cpu(details_le.creation_time);
-	(*td)->snapshotted_time = le32_to_cpu(details_le.snapshotted_time);
-
-	list_add(&(*td)->list, &pmd->thin_devices);
-
-	return 0;
-}
-
-static void __close_device(struct dm_thin_device *td)
-{
-	--td->open_count;
-}
-
-static int __create_thin(struct dm_pool_metadata *pmd,
-			 dm_thin_id dev)
-{
-	int r;
-	dm_block_t dev_root;
-	uint64_t key = dev;
-	struct disk_device_details details_le;
-	struct dm_thin_device *td;
-	__le64 value;
-
-	r = dm_btree_lookup(&pmd->details_info, pmd->details_root,
-			    &key, &details_le);
-	if (!r)
-		return -EEXIST;
-
-	/*
-	 * Create an empty btree for the mappings.
-	 */
-	r = dm_btree_empty(&pmd->bl_info, &dev_root);
-	if (r)
-		return r;
-
-	/*
-	 * Insert it into the main mapping tree.
-	 */
-	value = cpu_to_le64(dev_root);
-	__dm_bless_for_disk(&value);
-	r = dm_btree_insert(&pmd->tl_info, pmd->root, &key, &value, &pmd->root);
-	if (r) {
-		dm_btree_del(&pmd->bl_info, dev_root);
-		return r;
-	}
-
-	r = __open_device(pmd, dev, 1, &td);
-	if (r) {
-		dm_btree_remove(&pmd->tl_info, pmd->root, &key, &pmd->root);
-		dm_btree_del(&pmd->bl_info, dev_root);
-		return r;
-	}
-	__close_device(td);
-
-	return r;
-}
-
-int dm_pool_create_thin(struct dm_pool_metadata *pmd, dm_thin_id dev)
-{
-	int r;
-
-	down_write(&pmd->root_lock);
-	r = __create_thin(pmd, dev);
-	up_write(&pmd->root_lock);
-
-	return r;
-}
-
-static int __set_snapshot_details(struct dm_pool_metadata *pmd,
-				  struct dm_thin_device *snap,
-				  dm_thin_id origin, uint32_t time)
-{
-	int r;
-	struct dm_thin_device *td;
-
-	r = __open_device(pmd, origin, 0, &td);
-	if (r)
-		return r;
-
-	td->changed = 1;
-	td->snapshotted_time = time;
-
-	snap->mapped_blocks = td->mapped_blocks;
-	snap->snapshotted_time = time;
-	__close_device(td);
-
-	return 0;
-}
-
-static int __create_snap(struct dm_pool_metadata *pmd,
-			 dm_thin_id dev, dm_thin_id origin)
-{
-	int r;
-	dm_block_t origin_root;
-	uint64_t key = origin, dev_key = dev;
-	struct dm_thin_device *td;
-	struct disk_device_details details_le;
-	__le64 value;
-
-	/* check this device is unused */
-	r = dm_btree_lookup(&pmd->details_info, pmd->details_root,
-			    &dev_key, &details_le);
-	if (!r)
-		return -EEXIST;
-
-	/* find the mapping tree for the origin */
-	r = dm_btree_lookup(&pmd->tl_info, pmd->root, &key, &value);
-	if (r)
-		return r;
-	origin_root = le64_to_cpu(value);
-
-	/* clone the origin, an inc will do */
-	dm_tm_inc(pmd->tm, origin_root);
-
-	/* insert into the main mapping tree */
-	value = cpu_to_le64(origin_root);
-	__dm_bless_for_disk(&value);
-	key = dev;
-	r = dm_btree_insert(&pmd->tl_info, pmd->root, &key, &value, &pmd->root);
-	if (r) {
-		dm_tm_dec(pmd->tm, origin_root);
-		return r;
-	}
-
-	pmd->time++;
-
-	r = __open_device(pmd, dev, 1, &td);
-	if (r)
-		goto bad;
-
-	r = __set_snapshot_details(pmd, td, origin, pmd->time);
-	__close_device(td);
-
-	if (r)
-		goto bad;
-
-	return 0;
-
-bad:
-	dm_btree_remove(&pmd->tl_info, pmd->root, &key, &pmd->root);
-	dm_btree_remove(&pmd->details_info, pmd->details_root,
-			&key, &pmd->details_root);
-	return r;
-}
-
-int dm_pool_create_snap(struct dm_pool_metadata *pmd,
-				 dm_thin_id dev,
-				 dm_thin_id origin)
-{
-	int r;
-
-	down_write(&pmd->root_lock);
-	r = __create_snap(pmd, dev, origin);
-	up_write(&pmd->root_lock);
-
-	return r;
-}
-
-static int __delete_device(struct dm_pool_metadata *pmd, dm_thin_id dev)
-{
-	int r;
-	uint64_t key = dev;
-	struct dm_thin_device *td;
-
-	/* TODO: failure should mark the transaction invalid */
-	r = __open_device(pmd, dev, 0, &td);
-	if (r)
-		return r;
-
-	if (td->open_count > 1) {
-		__close_device(td);
-		return -EBUSY;
-	}
-
-	list_del(&td->list);
-	kfree(td);
-	r = dm_btree_remove(&pmd->details_info, pmd->details_root,
-			    &key, &pmd->details_root);
-	if (r)
-		return r;
-
-	r = dm_btree_remove(&pmd->tl_info, pmd->root, &key, &pmd->root);
-	if (r)
-		return r;
-
-	pmd->need_commit = 1;
-
-	return 0;
-}
-
-int dm_pool_delete_thin_device(struct dm_pool_metadata *pmd,
-			       dm_thin_id dev)
-{
-	int r;
-
-	down_write(&pmd->root_lock);
-	r = __delete_device(pmd, dev);
-	up_write(&pmd->root_lock);
-
-	return r;
-}
-
-int dm_pool_set_metadata_transaction_id(struct dm_pool_metadata *pmd,
-					uint64_t current_id,
-					uint64_t new_id)
-{
-	down_write(&pmd->root_lock);
-	if (pmd->trans_id != current_id) {
-		up_write(&pmd->root_lock);
-		DMERR("mismatched transaction id");
-		return -EINVAL;
-	}
-
-	pmd->trans_id = new_id;
-	pmd->need_commit = 1;
-	up_write(&pmd->root_lock);
-
-	return 0;
-}
-
-int dm_pool_get_metadata_transaction_id(struct dm_pool_metadata *pmd,
-					uint64_t *result)
-{
-	down_read(&pmd->root_lock);
-	*result = pmd->trans_id;
-	up_read(&pmd->root_lock);
-
-	return 0;
-}
-
-static int __get_held_metadata_root(struct dm_pool_metadata *pmd,
-				    dm_block_t *result)
-{
-	int r;
-	struct thin_disk_superblock *disk_super;
-	struct dm_block *sblock;
-
-	r = dm_bm_write_lock(pmd->bm, THIN_SUPERBLOCK_LOCATION,
-			     &sb_validator, &sblock);
-	if (r)
-		return r;
-
-	disk_super = dm_block_data(sblock);
-	*result = le64_to_cpu(disk_super->held_root);
-
-	return dm_bm_unlock(sblock);
-}
-
-int dm_pool_get_held_metadata_root(struct dm_pool_metadata *pmd,
-				   dm_block_t *result)
-{
-	int r;
-
-	down_read(&pmd->root_lock);
-	r = __get_held_metadata_root(pmd, result);
-	up_read(&pmd->root_lock);
-
-	return r;
-}
-
-int dm_pool_open_thin_device(struct dm_pool_metadata *pmd, dm_thin_id dev,
-			     struct dm_thin_device **td)
-{
-	int r;
-
-	down_write(&pmd->root_lock);
-	r = __open_device(pmd, dev, 0, td);
-	up_write(&pmd->root_lock);
-
-	return r;
-}
-
-int dm_pool_close_thin_device(struct dm_thin_device *td)
-{
-	down_write(&td->pmd->root_lock);
-	__close_device(td);
-	up_write(&td->pmd->root_lock);
-
-	return 0;
-}
-
-dm_thin_id dm_thin_dev_id(struct dm_thin_device *td)
-{
-	return td->id;
-}
-
-static int __snapshotted_since(struct dm_thin_device *td, uint32_t time)
-{
-	return td->snapshotted_time > time;
-}
-
-int dm_thin_find_block(struct dm_thin_device *td, dm_block_t block,
-		       int can_block, struct dm_thin_lookup_result *result)
-{
-	int r;
-	uint64_t block_time = 0;
-	__le64 value;
-	struct dm_pool_metadata *pmd = td->pmd;
-	dm_block_t keys[2] = { td->id, block };
-
-	if (can_block) {
-		down_read(&pmd->root_lock);
-		r = dm_btree_lookup(&pmd->info, pmd->root, keys, &value);
-		if (!r)
-			block_time = le64_to_cpu(value);
-		up_read(&pmd->root_lock);
-
-	} else if (down_read_trylock(&pmd->root_lock)) {
-		r = dm_btree_lookup(&pmd->nb_info, pmd->root, keys, &value);
-		if (!r)
-			block_time = le64_to_cpu(value);
-		up_read(&pmd->root_lock);
-
-	} else
-		return -EWOULDBLOCK;
-
-	if (!r) {
-		dm_block_t exception_block;
-		uint32_t exception_time;
-		unpack_block_time(block_time, &exception_block,
-				  &exception_time);
-		result->block = exception_block;
-		result->shared = __snapshotted_since(td, exception_time);
-	}
-
-	return r;
-}
-
-static int __insert(struct dm_thin_device *td, dm_block_t block,
-		    dm_block_t data_block)
-{
-	int r, inserted;
-	__le64 value;
-	struct dm_pool_metadata *pmd = td->pmd;
-	dm_block_t keys[2] = { td->id, block };
-
-	pmd->need_commit = 1;
-	value = cpu_to_le64(pack_block_time(data_block, pmd->time));
-	__dm_bless_for_disk(&value);
-
-	r = dm_btree_insert_notify(&pmd->info, pmd->root, keys, &value,
-				   &pmd->root, &inserted);
-	if (r)
-		return r;
-
-	if (inserted) {
-		td->mapped_blocks++;
-		td->changed = 1;
-	}
-
-	return 0;
-}
-
-int dm_thin_insert_block(struct dm_thin_device *td, dm_block_t block,
-			 dm_block_t data_block)
-{
-	int r;
-
-	down_write(&td->pmd->root_lock);
-	r = __insert(td, block, data_block);
-	up_write(&td->pmd->root_lock);
-
-	return r;
-}
-
-static int __remove(struct dm_thin_device *td, dm_block_t block)
-{
-	int r;
-	struct dm_pool_metadata *pmd = td->pmd;
-	dm_block_t keys[2] = { td->id, block };
-
-	r = dm_btree_remove(&pmd->info, pmd->root, keys, &pmd->root);
-	if (r)
-		return r;
-
-	td->mapped_blocks--;
-	td->changed = 1;
-	pmd->need_commit = 1;
-
-	return 0;
-}
-
-int dm_thin_remove_block(struct dm_thin_device *td, dm_block_t block)
-{
-	int r;
-
-	down_write(&td->pmd->root_lock);
-	r = __remove(td, block);
-	up_write(&td->pmd->root_lock);
-
-	return r;
-}
-
-int dm_pool_alloc_data_block(struct dm_pool_metadata *pmd, dm_block_t *result)
-{
-	int r;
-
-	down_write(&pmd->root_lock);
-
-	r = dm_sm_new_block(pmd->data_sm, result);
-	pmd->need_commit = 1;
-
-	up_write(&pmd->root_lock);
-
-	return r;
-}
-
-int dm_pool_commit_metadata(struct dm_pool_metadata *pmd)
-{
-	int r;
-
-	down_write(&pmd->root_lock);
-
-	r = __commit_transaction(pmd);
-	if (r <= 0)
-		goto out;
-
-	/*
-	 * Open the next transaction.
-	 */
-	r = __begin_transaction(pmd);
-out:
-	up_write(&pmd->root_lock);
-	return r;
-}
-
-int dm_pool_get_free_block_count(struct dm_pool_metadata *pmd, dm_block_t *result)
-{
-	int r;
-
-	down_read(&pmd->root_lock);
-	r = dm_sm_get_nr_free(pmd->data_sm, result);
-	up_read(&pmd->root_lock);
-
-	return r;
-}
-
-int dm_pool_get_free_metadata_block_count(struct dm_pool_metadata *pmd,
-					  dm_block_t *result)
-{
-	int r;
-
-	down_read(&pmd->root_lock);
-	r = dm_sm_get_nr_free(pmd->metadata_sm, result);
-	up_read(&pmd->root_lock);
-
-	return r;
-}
-
-int dm_pool_get_metadata_dev_size(struct dm_pool_metadata *pmd,
-				  dm_block_t *result)
-{
-	int r;
-
-	down_read(&pmd->root_lock);
-	r = dm_sm_get_nr_blocks(pmd->metadata_sm, result);
-	up_read(&pmd->root_lock);
-
-	return r;
-}
-
-int dm_pool_get_data_block_size(struct dm_pool_metadata *pmd, sector_t *result)
-{
-	down_read(&pmd->root_lock);
-	*result = pmd->data_block_size;
-	up_read(&pmd->root_lock);
-
-	return 0;
-}
-
-int dm_pool_get_data_dev_size(struct dm_pool_metadata *pmd, dm_block_t *result)
-{
-	int r;
-
-	down_read(&pmd->root_lock);
-	r = dm_sm_get_nr_blocks(pmd->data_sm, result);
-	up_read(&pmd->root_lock);
-
-	return r;
-}
-
-int dm_thin_get_mapped_count(struct dm_thin_device *td, dm_block_t *result)
-{
-	struct dm_pool_metadata *pmd = td->pmd;
-
-	down_read(&pmd->root_lock);
-	*result = td->mapped_blocks;
-	up_read(&pmd->root_lock);
-
-	return 0;
-}
-
-static int __highest_block(struct dm_thin_device *td, dm_block_t *result)
-{
-	int r;
-	__le64 value_le;
-	dm_block_t thin_root;
-	struct dm_pool_metadata *pmd = td->pmd;
-
-	r = dm_btree_lookup(&pmd->tl_info, pmd->root, &td->id, &value_le);
-	if (r)
-		return r;
-
-	thin_root = le64_to_cpu(value_le);
-
-	return dm_btree_find_highest_key(&pmd->bl_info, thin_root, result);
-}
-
-int dm_thin_get_highest_mapped_block(struct dm_thin_device *td,
-				     dm_block_t *result)
-{
-	int r;
-	struct dm_pool_metadata *pmd = td->pmd;
-
-	down_read(&pmd->root_lock);
-	r = __highest_block(td, result);
-	up_read(&pmd->root_lock);
-
-	return r;
-}
-
-static int __resize_data_dev(struct dm_pool_metadata *pmd, dm_block_t new_count)
-{
-	int r;
-	dm_block_t old_count;
-
-	r = dm_sm_get_nr_blocks(pmd->data_sm, &old_count);
-	if (r)
-		return r;
-
-	if (new_count == old_count)
-		return 0;
-
-	if (new_count < old_count) {
-		DMERR("cannot reduce size of data device");
-		return -EINVAL;
-	}
-
-	r = dm_sm_extend(pmd->data_sm, new_count - old_count);
-	if (!r)
-		pmd->need_commit = 1;
-
-	return r;
-}
-
-int dm_pool_resize_data_dev(struct dm_pool_metadata *pmd, dm_block_t new_count)
-{
-	int r;
-
-	down_write(&pmd->root_lock);
-	r = __resize_data_dev(pmd, new_count);
-	up_write(&pmd->root_lock);
-
-	return r;
-}
diff --git a/ANDROID_3.4.5/drivers/md/dm-thin-metadata.h b/ANDROID_3.4.5/drivers/md/dm-thin-metadata.h
deleted file mode 100644
index ed4725e6..00000000
--- a/ANDROID_3.4.5/drivers/md/dm-thin-metadata.h
+++ /dev/null
@@ -1,169 +0,0 @@
-/*
- * Copyright (C) 2010-2011 Red Hat, Inc.
- *
- * This file is released under the GPL.
- */
-
-#ifndef DM_THIN_METADATA_H
-#define DM_THIN_METADATA_H
-
-#include "persistent-data/dm-block-manager.h"
-
-#define THIN_METADATA_BLOCK_SIZE 4096
-
-/*
- * The metadata device is currently limited in size.
- *
- * We have one block of index, which can hold 255 index entries.  Each
- * index entry contains allocation info about 16k metadata blocks.
- */
-#define THIN_METADATA_MAX_SECTORS (255 * (1 << 14) * (THIN_METADATA_BLOCK_SIZE / (1 << SECTOR_SHIFT)))
-
-/*
- * A metadata device larger than 16GB triggers a warning.
- */
-#define THIN_METADATA_MAX_SECTORS_WARNING (16 * (1024 * 1024 * 1024 >> SECTOR_SHIFT))
-
-/*----------------------------------------------------------------*/
-
-struct dm_pool_metadata;
-struct dm_thin_device;
-
-/*
- * Device identifier
- */
-typedef uint64_t dm_thin_id;
-
-/*
- * Reopens or creates a new, empty metadata volume.
- */
-struct dm_pool_metadata *dm_pool_metadata_open(struct block_device *bdev,
-					       sector_t data_block_size);
-
-int dm_pool_metadata_close(struct dm_pool_metadata *pmd);
-
-/*
- * Compat feature flags.  Any incompat flags beyond the ones
- * specified below will prevent use of the thin metadata.
- */
-#define THIN_FEATURE_COMPAT_SUPP	  0UL
-#define THIN_FEATURE_COMPAT_RO_SUPP	  0UL
-#define THIN_FEATURE_INCOMPAT_SUPP	  0UL
-
-/*
- * Device creation/deletion.
- */
-int dm_pool_create_thin(struct dm_pool_metadata *pmd, dm_thin_id dev);
-
-/*
- * An internal snapshot.
- *
- * You can only snapshot a quiesced origin i.e. one that is either
- * suspended or not instanced at all.
- */
-int dm_pool_create_snap(struct dm_pool_metadata *pmd, dm_thin_id dev,
-			dm_thin_id origin);
-
-/*
- * Deletes a virtual device from the metadata.  It _is_ safe to call this
- * when that device is open.  Operations on that device will just start
- * failing.  You still need to call close() on the device.
- */
-int dm_pool_delete_thin_device(struct dm_pool_metadata *pmd,
-			       dm_thin_id dev);
-
-/*
- * Commits _all_ metadata changes: device creation, deletion, mapping
- * updates.
- */
-int dm_pool_commit_metadata(struct dm_pool_metadata *pmd);
-
-/*
- * Set/get userspace transaction id.
- */
-int dm_pool_set_metadata_transaction_id(struct dm_pool_metadata *pmd,
-					uint64_t current_id,
-					uint64_t new_id);
-
-int dm_pool_get_metadata_transaction_id(struct dm_pool_metadata *pmd,
-					uint64_t *result);
-
-/*
- * Hold/get root for userspace transaction.
- */
-int dm_pool_hold_metadata_root(struct dm_pool_metadata *pmd);
-
-int dm_pool_get_held_metadata_root(struct dm_pool_metadata *pmd,
-				   dm_block_t *result);
-
-/*
- * Actions on a single virtual device.
- */
-
-/*
- * Opening the same device more than once will fail with -EBUSY.
- */
-int dm_pool_open_thin_device(struct dm_pool_metadata *pmd, dm_thin_id dev,
-			     struct dm_thin_device **td);
-
-int dm_pool_close_thin_device(struct dm_thin_device *td);
-
-dm_thin_id dm_thin_dev_id(struct dm_thin_device *td);
-
-struct dm_thin_lookup_result {
-	dm_block_t block;
-	int shared;
-};
-
-/*
- * Returns:
- *   -EWOULDBLOCK iff @can_block is set and would block.
- *   -ENODATA iff that mapping is not present.
- *   0 success
- */
-int dm_thin_find_block(struct dm_thin_device *td, dm_block_t block,
-		       int can_block, struct dm_thin_lookup_result *result);
-
-/*
- * Obtain an unused block.
- */
-int dm_pool_alloc_data_block(struct dm_pool_metadata *pmd, dm_block_t *result);
-
-/*
- * Insert or remove block.
- */
-int dm_thin_insert_block(struct dm_thin_device *td, dm_block_t block,
-			 dm_block_t data_block);
-
-int dm_thin_remove_block(struct dm_thin_device *td, dm_block_t block);
-
-/*
- * Queries.
- */
-int dm_thin_get_highest_mapped_block(struct dm_thin_device *td,
-				     dm_block_t *highest_mapped);
-
-int dm_thin_get_mapped_count(struct dm_thin_device *td, dm_block_t *result);
-
-int dm_pool_get_free_block_count(struct dm_pool_metadata *pmd,
-				 dm_block_t *result);
-
-int dm_pool_get_free_metadata_block_count(struct dm_pool_metadata *pmd,
-					  dm_block_t *result);
-
-int dm_pool_get_metadata_dev_size(struct dm_pool_metadata *pmd,
-				  dm_block_t *result);
-
-int dm_pool_get_data_block_size(struct dm_pool_metadata *pmd, sector_t *result);
-
-int dm_pool_get_data_dev_size(struct dm_pool_metadata *pmd, dm_block_t *result);
-
-/*
- * Returns -ENOSPC if the new size is too small and already allocated
- * blocks would be lost.
- */
-int dm_pool_resize_data_dev(struct dm_pool_metadata *pmd, dm_block_t new_size);
-
-/*----------------------------------------------------------------*/
-
-#endif
diff --git a/ANDROID_3.4.5/drivers/md/dm-thin.c b/ANDROID_3.4.5/drivers/md/dm-thin.c
deleted file mode 100644
index eb3d138f..00000000
--- a/ANDROID_3.4.5/drivers/md/dm-thin.c
+++ /dev/null
@@ -1,2774 +0,0 @@
-/*
- * Copyright (C) 2011 Red Hat UK.
- *
- * This file is released under the GPL.
- */
-
-#include "dm-thin-metadata.h"
-
-#include <linux/device-mapper.h>
-#include <linux/dm-io.h>
-#include <linux/dm-kcopyd.h>
-#include <linux/list.h>
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/slab.h>
-
-#define	DM_MSG_PREFIX	"thin"
-
-/*
- * Tunable constants
- */
-#define ENDIO_HOOK_POOL_SIZE 10240
-#define DEFERRED_SET_SIZE 64
-#define MAPPING_POOL_SIZE 1024
-#define PRISON_CELLS 1024
-#define COMMIT_PERIOD HZ
-
-/*
- * The block size of the device holding pool data must be
- * between 64KB and 1GB.
- */
-#define DATA_DEV_BLOCK_SIZE_MIN_SECTORS (64 * 1024 >> SECTOR_SHIFT)
-#define DATA_DEV_BLOCK_SIZE_MAX_SECTORS (1024 * 1024 * 1024 >> SECTOR_SHIFT)
-
-/*
- * Device id is restricted to 24 bits.
- */
-#define MAX_DEV_ID ((1 << 24) - 1)
-
-/*
- * How do we handle breaking sharing of data blocks?
- * =================================================
- *
- * We use a standard copy-on-write btree to store the mappings for the
- * devices (note I'm talking about copy-on-write of the metadata here, not
- * the data).  When you take an internal snapshot you clone the root node
- * of the origin btree.  After this there is no concept of an origin or a
- * snapshot.  They are just two device trees that happen to point to the
- * same data blocks.
- *
- * When we get a write in we decide if it's to a shared data block using
- * some timestamp magic.  If it is, we have to break sharing.
- *
- * Let's say we write to a shared block in what was the origin.  The
- * steps are:
- *
- * i) plug io further to this physical block. (see bio_prison code).
- *
- * ii) quiesce any read io to that shared data block.  Obviously
- * including all devices that share this block.  (see deferred_set code)
- *
- * iii) copy the data block to a newly allocate block.  This step can be
- * missed out if the io covers the block. (schedule_copy).
- *
- * iv) insert the new mapping into the origin's btree
- * (process_prepared_mapping).  This act of inserting breaks some
- * sharing of btree nodes between the two devices.  Breaking sharing only
- * effects the btree of that specific device.  Btrees for the other
- * devices that share the block never change.  The btree for the origin
- * device as it was after the last commit is untouched, ie. we're using
- * persistent data structures in the functional programming sense.
- *
- * v) unplug io to this physical block, including the io that triggered
- * the breaking of sharing.
- *
- * Steps (ii) and (iii) occur in parallel.
- *
- * The metadata _doesn't_ need to be committed before the io continues.  We
- * get away with this because the io is always written to a _new_ block.
- * If there's a crash, then:
- *
- * - The origin mapping will point to the old origin block (the shared
- * one).  This will contain the data as it was before the io that triggered
- * the breaking of sharing came in.
- *
- * - The snap mapping still points to the old block.  As it would after
- * the commit.
- *
- * The downside of this scheme is the timestamp magic isn't perfect, and
- * will continue to think that data block in the snapshot device is shared
- * even after the write to the origin has broken sharing.  I suspect data
- * blocks will typically be shared by many different devices, so we're
- * breaking sharing n + 1 times, rather than n, where n is the number of
- * devices that reference this data block.  At the moment I think the
- * benefits far, far outweigh the disadvantages.
- */
-
-/*----------------------------------------------------------------*/
-
-/*
- * Sometimes we can't deal with a bio straight away.  We put them in prison
- * where they can't cause any mischief.  Bios are put in a cell identified
- * by a key, multiple bios can be in the same cell.  When the cell is
- * subsequently unlocked the bios become available.
- */
-struct bio_prison;
-
-struct cell_key {
-	int virtual;
-	dm_thin_id dev;
-	dm_block_t block;
-};
-
-struct cell {
-	struct hlist_node list;
-	struct bio_prison *prison;
-	struct cell_key key;
-	struct bio *holder;
-	struct bio_list bios;
-};
-
-struct bio_prison {
-	spinlock_t lock;
-	mempool_t *cell_pool;
-
-	unsigned nr_buckets;
-	unsigned hash_mask;
-	struct hlist_head *cells;
-};
-
-static uint32_t calc_nr_buckets(unsigned nr_cells)
-{
-	uint32_t n = 128;
-
-	nr_cells /= 4;
-	nr_cells = min(nr_cells, 8192u);
-
-	while (n < nr_cells)
-		n <<= 1;
-
-	return n;
-}
-
-/*
- * @nr_cells should be the number of cells you want in use _concurrently_.
- * Don't confuse it with the number of distinct keys.
- */
-static struct bio_prison *prison_create(unsigned nr_cells)
-{
-	unsigned i;
-	uint32_t nr_buckets = calc_nr_buckets(nr_cells);
-	size_t len = sizeof(struct bio_prison) +
-		(sizeof(struct hlist_head) * nr_buckets);
-	struct bio_prison *prison = kmalloc(len, GFP_KERNEL);
-
-	if (!prison)
-		return NULL;
-
-	spin_lock_init(&prison->lock);
-	prison->cell_pool = mempool_create_kmalloc_pool(nr_cells,
-							sizeof(struct cell));
-	if (!prison->cell_pool) {
-		kfree(prison);
-		return NULL;
-	}
-
-	prison->nr_buckets = nr_buckets;
-	prison->hash_mask = nr_buckets - 1;
-	prison->cells = (struct hlist_head *) (prison + 1);
-	for (i = 0; i < nr_buckets; i++)
-		INIT_HLIST_HEAD(prison->cells + i);
-
-	return prison;
-}
-
-static void prison_destroy(struct bio_prison *prison)
-{
-	mempool_destroy(prison->cell_pool);
-	kfree(prison);
-}
-
-static uint32_t hash_key(struct bio_prison *prison, struct cell_key *key)
-{
-	const unsigned long BIG_PRIME = 4294967291UL;
-	uint64_t hash = key->block * BIG_PRIME;
-
-	return (uint32_t) (hash & prison->hash_mask);
-}
-
-static int keys_equal(struct cell_key *lhs, struct cell_key *rhs)
-{
-	       return (lhs->virtual == rhs->virtual) &&
-		       (lhs->dev == rhs->dev) &&
-		       (lhs->block == rhs->block);
-}
-
-static struct cell *__search_bucket(struct hlist_head *bucket,
-				    struct cell_key *key)
-{
-	struct cell *cell;
-	struct hlist_node *tmp;
-
-	hlist_for_each_entry(cell, tmp, bucket, list)
-		if (keys_equal(&cell->key, key))
-			return cell;
-
-	return NULL;
-}
-
-/*
- * This may block if a new cell needs allocating.  You must ensure that
- * cells will be unlocked even if the calling thread is blocked.
- *
- * Returns 1 if the cell was already held, 0 if @inmate is the new holder.
- */
-static int bio_detain(struct bio_prison *prison, struct cell_key *key,
-		      struct bio *inmate, struct cell **ref)
-{
-	int r = 1;
-	unsigned long flags;
-	uint32_t hash = hash_key(prison, key);
-	struct cell *cell, *cell2;
-
-	BUG_ON(hash > prison->nr_buckets);
-
-	spin_lock_irqsave(&prison->lock, flags);
-
-	cell = __search_bucket(prison->cells + hash, key);
-	if (cell) {
-		bio_list_add(&cell->bios, inmate);
-		goto out;
-	}
-
-	/*
-	 * Allocate a new cell
-	 */
-	spin_unlock_irqrestore(&prison->lock, flags);
-	cell2 = mempool_alloc(prison->cell_pool, GFP_NOIO);
-	spin_lock_irqsave(&prison->lock, flags);
-
-	/*
-	 * We've been unlocked, so we have to double check that
-	 * nobody else has inserted this cell in the meantime.
-	 */
-	cell = __search_bucket(prison->cells + hash, key);
-	if (cell) {
-		mempool_free(cell2, prison->cell_pool);
-		bio_list_add(&cell->bios, inmate);
-		goto out;
-	}
-
-	/*
-	 * Use new cell.
-	 */
-	cell = cell2;
-
-	cell->prison = prison;
-	memcpy(&cell->key, key, sizeof(cell->key));
-	cell->holder = inmate;
-	bio_list_init(&cell->bios);
-	hlist_add_head(&cell->list, prison->cells + hash);
-
-	r = 0;
-
-out:
-	spin_unlock_irqrestore(&prison->lock, flags);
-
-	*ref = cell;
-
-	return r;
-}
-
-/*
- * @inmates must have been initialised prior to this call
- */
-static void __cell_release(struct cell *cell, struct bio_list *inmates)
-{
-	struct bio_prison *prison = cell->prison;
-
-	hlist_del(&cell->list);
-
-	if (inmates) {
-		bio_list_add(inmates, cell->holder);
-		bio_list_merge(inmates, &cell->bios);
-	}
-
-	mempool_free(cell, prison->cell_pool);
-}
-
-static void cell_release(struct cell *cell, struct bio_list *bios)
-{
-	unsigned long flags;
-	struct bio_prison *prison = cell->prison;
-
-	spin_lock_irqsave(&prison->lock, flags);
-	__cell_release(cell, bios);
-	spin_unlock_irqrestore(&prison->lock, flags);
-}
-
-/*
- * There are a couple of places where we put a bio into a cell briefly
- * before taking it out again.  In these situations we know that no other
- * bio may be in the cell.  This function releases the cell, and also does
- * a sanity check.
- */
-static void __cell_release_singleton(struct cell *cell, struct bio *bio)
-{
-	BUG_ON(cell->holder != bio);
-	BUG_ON(!bio_list_empty(&cell->bios));
-
-	__cell_release(cell, NULL);
-}
-
-static void cell_release_singleton(struct cell *cell, struct bio *bio)
-{
-	unsigned long flags;
-	struct bio_prison *prison = cell->prison;
-
-	spin_lock_irqsave(&prison->lock, flags);
-	__cell_release_singleton(cell, bio);
-	spin_unlock_irqrestore(&prison->lock, flags);
-}
-
-/*
- * Sometimes we don't want the holder, just the additional bios.
- */
-static void __cell_release_no_holder(struct cell *cell, struct bio_list *inmates)
-{
-	struct bio_prison *prison = cell->prison;
-
-	hlist_del(&cell->list);
-	bio_list_merge(inmates, &cell->bios);
-
-	mempool_free(cell, prison->cell_pool);
-}
-
-static void cell_release_no_holder(struct cell *cell, struct bio_list *inmates)
-{
-	unsigned long flags;
-	struct bio_prison *prison = cell->prison;
-
-	spin_lock_irqsave(&prison->lock, flags);
-	__cell_release_no_holder(cell, inmates);
-	spin_unlock_irqrestore(&prison->lock, flags);
-}
-
-static void cell_error(struct cell *cell)
-{
-	struct bio_prison *prison = cell->prison;
-	struct bio_list bios;
-	struct bio *bio;
-	unsigned long flags;
-
-	bio_list_init(&bios);
-
-	spin_lock_irqsave(&prison->lock, flags);
-	__cell_release(cell, &bios);
-	spin_unlock_irqrestore(&prison->lock, flags);
-
-	while ((bio = bio_list_pop(&bios)))
-		bio_io_error(bio);
-}
-
-/*----------------------------------------------------------------*/
-
-/*
- * We use the deferred set to keep track of pending reads to shared blocks.
- * We do this to ensure the new mapping caused by a write isn't performed
- * until these prior reads have completed.  Otherwise the insertion of the
- * new mapping could free the old block that the read bios are mapped to.
- */
-
-struct deferred_set;
-struct deferred_entry {
-	struct deferred_set *ds;
-	unsigned count;
-	struct list_head work_items;
-};
-
-struct deferred_set {
-	spinlock_t lock;
-	unsigned current_entry;
-	unsigned sweeper;
-	struct deferred_entry entries[DEFERRED_SET_SIZE];
-};
-
-static void ds_init(struct deferred_set *ds)
-{
-	int i;
-
-	spin_lock_init(&ds->lock);
-	ds->current_entry = 0;
-	ds->sweeper = 0;
-	for (i = 0; i < DEFERRED_SET_SIZE; i++) {
-		ds->entries[i].ds = ds;
-		ds->entries[i].count = 0;
-		INIT_LIST_HEAD(&ds->entries[i].work_items);
-	}
-}
-
-static struct deferred_entry *ds_inc(struct deferred_set *ds)
-{
-	unsigned long flags;
-	struct deferred_entry *entry;
-
-	spin_lock_irqsave(&ds->lock, flags);
-	entry = ds->entries + ds->current_entry;
-	entry->count++;
-	spin_unlock_irqrestore(&ds->lock, flags);
-
-	return entry;
-}
-
-static unsigned ds_next(unsigned index)
-{
-	return (index + 1) % DEFERRED_SET_SIZE;
-}
-
-static void __sweep(struct deferred_set *ds, struct list_head *head)
-{
-	while ((ds->sweeper != ds->current_entry) &&
-	       !ds->entries[ds->sweeper].count) {
-		list_splice_init(&ds->entries[ds->sweeper].work_items, head);
-		ds->sweeper = ds_next(ds->sweeper);
-	}
-
-	if ((ds->sweeper == ds->current_entry) && !ds->entries[ds->sweeper].count)
-		list_splice_init(&ds->entries[ds->sweeper].work_items, head);
-}
-
-static void ds_dec(struct deferred_entry *entry, struct list_head *head)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&entry->ds->lock, flags);
-	BUG_ON(!entry->count);
-	--entry->count;
-	__sweep(entry->ds, head);
-	spin_unlock_irqrestore(&entry->ds->lock, flags);
-}
-
-/*
- * Returns 1 if deferred or 0 if no pending items to delay job.
- */
-static int ds_add_work(struct deferred_set *ds, struct list_head *work)
-{
-	int r = 1;
-	unsigned long flags;
-	unsigned next_entry;
-
-	spin_lock_irqsave(&ds->lock, flags);
-	if ((ds->sweeper == ds->current_entry) &&
-	    !ds->entries[ds->current_entry].count)
-		r = 0;
-	else {
-		list_add(work, &ds->entries[ds->current_entry].work_items);
-		next_entry = ds_next(ds->current_entry);
-		if (!ds->entries[next_entry].count)
-			ds->current_entry = next_entry;
-	}
-	spin_unlock_irqrestore(&ds->lock, flags);
-
-	return r;
-}
-
-/*----------------------------------------------------------------*/
-
-/*
- * Key building.
- */
-static void build_data_key(struct dm_thin_device *td,
-			   dm_block_t b, struct cell_key *key)
-{
-	key->virtual = 0;
-	key->dev = dm_thin_dev_id(td);
-	key->block = b;
-}
-
-static void build_virtual_key(struct dm_thin_device *td, dm_block_t b,
-			      struct cell_key *key)
-{
-	key->virtual = 1;
-	key->dev = dm_thin_dev_id(td);
-	key->block = b;
-}
-
-/*----------------------------------------------------------------*/
-
-/*
- * A pool device ties together a metadata device and a data device.  It
- * also provides the interface for creating and destroying internal
- * devices.
- */
-struct new_mapping;
-
-struct pool_features {
-	unsigned zero_new_blocks:1;
-	unsigned discard_enabled:1;
-	unsigned discard_passdown:1;
-};
-
-struct pool {
-	struct list_head list;
-	struct dm_target *ti;	/* Only set if a pool target is bound */
-
-	struct mapped_device *pool_md;
-	struct block_device *md_dev;
-	struct dm_pool_metadata *pmd;
-
-	uint32_t sectors_per_block;
-	unsigned block_shift;
-	dm_block_t offset_mask;
-	dm_block_t low_water_blocks;
-
-	struct pool_features pf;
-	unsigned low_water_triggered:1;	/* A dm event has been sent */
-	unsigned no_free_space:1;	/* A -ENOSPC warning has been issued */
-
-	struct bio_prison *prison;
-	struct dm_kcopyd_client *copier;
-
-	struct workqueue_struct *wq;
-	struct work_struct worker;
-	struct delayed_work waker;
-
-	unsigned ref_count;
-	unsigned long last_commit_jiffies;
-
-	spinlock_t lock;
-	struct bio_list deferred_bios;
-	struct bio_list deferred_flush_bios;
-	struct list_head prepared_mappings;
-	struct list_head prepared_discards;
-
-	struct bio_list retry_on_resume_list;
-
-	struct deferred_set shared_read_ds;
-	struct deferred_set all_io_ds;
-
-	struct new_mapping *next_mapping;
-	mempool_t *mapping_pool;
-	mempool_t *endio_hook_pool;
-};
-
-/*
- * Target context for a pool.
- */
-struct pool_c {
-	struct dm_target *ti;
-	struct pool *pool;
-	struct dm_dev *data_dev;
-	struct dm_dev *metadata_dev;
-	struct dm_target_callbacks callbacks;
-
-	dm_block_t low_water_blocks;
-	struct pool_features pf;
-};
-
-/*
- * Target context for a thin.
- */
-struct thin_c {
-	struct dm_dev *pool_dev;
-	struct dm_dev *origin_dev;
-	dm_thin_id dev_id;
-
-	struct pool *pool;
-	struct dm_thin_device *td;
-};
-
-/*----------------------------------------------------------------*/
-
-/*
- * A global list of pools that uses a struct mapped_device as a key.
- */
-static struct dm_thin_pool_table {
-	struct mutex mutex;
-	struct list_head pools;
-} dm_thin_pool_table;
-
-static void pool_table_init(void)
-{
-	mutex_init(&dm_thin_pool_table.mutex);
-	INIT_LIST_HEAD(&dm_thin_pool_table.pools);
-}
-
-static void __pool_table_insert(struct pool *pool)
-{
-	BUG_ON(!mutex_is_locked(&dm_thin_pool_table.mutex));
-	list_add(&pool->list, &dm_thin_pool_table.pools);
-}
-
-static void __pool_table_remove(struct pool *pool)
-{
-	BUG_ON(!mutex_is_locked(&dm_thin_pool_table.mutex));
-	list_del(&pool->list);
-}
-
-static struct pool *__pool_table_lookup(struct mapped_device *md)
-{
-	struct pool *pool = NULL, *tmp;
-
-	BUG_ON(!mutex_is_locked(&dm_thin_pool_table.mutex));
-
-	list_for_each_entry(tmp, &dm_thin_pool_table.pools, list) {
-		if (tmp->pool_md == md) {
-			pool = tmp;
-			break;
-		}
-	}
-
-	return pool;
-}
-
-static struct pool *__pool_table_lookup_metadata_dev(struct block_device *md_dev)
-{
-	struct pool *pool = NULL, *tmp;
-
-	BUG_ON(!mutex_is_locked(&dm_thin_pool_table.mutex));
-
-	list_for_each_entry(tmp, &dm_thin_pool_table.pools, list) {
-		if (tmp->md_dev == md_dev) {
-			pool = tmp;
-			break;
-		}
-	}
-
-	return pool;
-}
-
-/*----------------------------------------------------------------*/
-
-struct endio_hook {
-	struct thin_c *tc;
-	struct deferred_entry *shared_read_entry;
-	struct deferred_entry *all_io_entry;
-	struct new_mapping *overwrite_mapping;
-};
-
-static void __requeue_bio_list(struct thin_c *tc, struct bio_list *master)
-{
-	struct bio *bio;
-	struct bio_list bios;
-
-	bio_list_init(&bios);
-	bio_list_merge(&bios, master);
-	bio_list_init(master);
-
-	while ((bio = bio_list_pop(&bios))) {
-		struct endio_hook *h = dm_get_mapinfo(bio)->ptr;
-		if (h->tc == tc)
-			bio_endio(bio, DM_ENDIO_REQUEUE);
-		else
-			bio_list_add(master, bio);
-	}
-}
-
-static void requeue_io(struct thin_c *tc)
-{
-	struct pool *pool = tc->pool;
-	unsigned long flags;
-
-	spin_lock_irqsave(&pool->lock, flags);
-	__requeue_bio_list(tc, &pool->deferred_bios);
-	__requeue_bio_list(tc, &pool->retry_on_resume_list);
-	spin_unlock_irqrestore(&pool->lock, flags);
-}
-
-/*
- * This section of code contains the logic for processing a thin device's IO.
- * Much of the code depends on pool object resources (lists, workqueues, etc)
- * but most is exclusively called from the thin target rather than the thin-pool
- * target.
- */
-
-static dm_block_t get_bio_block(struct thin_c *tc, struct bio *bio)
-{
-	return bio->bi_sector >> tc->pool->block_shift;
-}
-
-static void remap(struct thin_c *tc, struct bio *bio, dm_block_t block)
-{
-	struct pool *pool = tc->pool;
-
-	bio->bi_bdev = tc->pool_dev->bdev;
-	bio->bi_sector = (block << pool->block_shift) +
-		(bio->bi_sector & pool->offset_mask);
-}
-
-static void remap_to_origin(struct thin_c *tc, struct bio *bio)
-{
-	bio->bi_bdev = tc->origin_dev->bdev;
-}
-
-static void issue(struct thin_c *tc, struct bio *bio)
-{
-	struct pool *pool = tc->pool;
-	unsigned long flags;
-
-	/*
-	 * Batch together any FUA/FLUSH bios we find and then issue
-	 * a single commit for them in process_deferred_bios().
-	 */
-	if (bio->bi_rw & (REQ_FLUSH | REQ_FUA)) {
-		spin_lock_irqsave(&pool->lock, flags);
-		bio_list_add(&pool->deferred_flush_bios, bio);
-		spin_unlock_irqrestore(&pool->lock, flags);
-	} else
-		generic_make_request(bio);
-}
-
-static void remap_to_origin_and_issue(struct thin_c *tc, struct bio *bio)
-{
-	remap_to_origin(tc, bio);
-	issue(tc, bio);
-}
-
-static void remap_and_issue(struct thin_c *tc, struct bio *bio,
-			    dm_block_t block)
-{
-	remap(tc, bio, block);
-	issue(tc, bio);
-}
-
-/*
- * wake_worker() is used when new work is queued and when pool_resume is
- * ready to continue deferred IO processing.
- */
-static void wake_worker(struct pool *pool)
-{
-	queue_work(pool->wq, &pool->worker);
-}
-
-/*----------------------------------------------------------------*/
-
-/*
- * Bio endio functions.
- */
-struct new_mapping {
-	struct list_head list;
-
-	unsigned quiesced:1;
-	unsigned prepared:1;
-	unsigned pass_discard:1;
-
-	struct thin_c *tc;
-	dm_block_t virt_block;
-	dm_block_t data_block;
-	struct cell *cell, *cell2;
-	int err;
-
-	/*
-	 * If the bio covers the whole area of a block then we can avoid
-	 * zeroing or copying.  Instead this bio is hooked.  The bio will
-	 * still be in the cell, so care has to be taken to avoid issuing
-	 * the bio twice.
-	 */
-	struct bio *bio;
-	bio_end_io_t *saved_bi_end_io;
-};
-
-static void __maybe_add_mapping(struct new_mapping *m)
-{
-	struct pool *pool = m->tc->pool;
-
-	if (m->quiesced && m->prepared) {
-		list_add(&m->list, &pool->prepared_mappings);
-		wake_worker(pool);
-	}
-}
-
-static void copy_complete(int read_err, unsigned long write_err, void *context)
-{
-	unsigned long flags;
-	struct new_mapping *m = context;
-	struct pool *pool = m->tc->pool;
-
-	m->err = read_err || write_err ? -EIO : 0;
-
-	spin_lock_irqsave(&pool->lock, flags);
-	m->prepared = 1;
-	__maybe_add_mapping(m);
-	spin_unlock_irqrestore(&pool->lock, flags);
-}
-
-static void overwrite_endio(struct bio *bio, int err)
-{
-	unsigned long flags;
-	struct endio_hook *h = dm_get_mapinfo(bio)->ptr;
-	struct new_mapping *m = h->overwrite_mapping;
-	struct pool *pool = m->tc->pool;
-
-	m->err = err;
-
-	spin_lock_irqsave(&pool->lock, flags);
-	m->prepared = 1;
-	__maybe_add_mapping(m);
-	spin_unlock_irqrestore(&pool->lock, flags);
-}
-
-/*----------------------------------------------------------------*/
-
-/*
- * Workqueue.
- */
-
-/*
- * Prepared mapping jobs.
- */
-
-/*
- * This sends the bios in the cell back to the deferred_bios list.
- */
-static void cell_defer(struct thin_c *tc, struct cell *cell,
-		       dm_block_t data_block)
-{
-	struct pool *pool = tc->pool;
-	unsigned long flags;
-
-	spin_lock_irqsave(&pool->lock, flags);
-	cell_release(cell, &pool->deferred_bios);
-	spin_unlock_irqrestore(&tc->pool->lock, flags);
-
-	wake_worker(pool);
-}
-
-/*
- * Same as cell_defer above, except it omits one particular detainee,
- * a write bio that covers the block and has already been processed.
- */
-static void cell_defer_except(struct thin_c *tc, struct cell *cell)
-{
-	struct bio_list bios;
-	struct pool *pool = tc->pool;
-	unsigned long flags;
-
-	bio_list_init(&bios);
-
-	spin_lock_irqsave(&pool->lock, flags);
-	cell_release_no_holder(cell, &pool->deferred_bios);
-	spin_unlock_irqrestore(&pool->lock, flags);
-
-	wake_worker(pool);
-}
-
-static void process_prepared_mapping(struct new_mapping *m)
-{
-	struct thin_c *tc = m->tc;
-	struct bio *bio;
-	int r;
-
-	bio = m->bio;
-	if (bio)
-		bio->bi_end_io = m->saved_bi_end_io;
-
-	if (m->err) {
-		cell_error(m->cell);
-		return;
-	}
-
-	/*
-	 * Commit the prepared block into the mapping btree.
-	 * Any I/O for this block arriving after this point will get
-	 * remapped to it directly.
-	 */
-	r = dm_thin_insert_block(tc->td, m->virt_block, m->data_block);
-	if (r) {
-		DMERR("dm_thin_insert_block() failed");
-		cell_error(m->cell);
-		return;
-	}
-
-	/*
-	 * Release any bios held while the block was being provisioned.
-	 * If we are processing a write bio that completely covers the block,
-	 * we already processed it so can ignore it now when processing
-	 * the bios in the cell.
-	 */
-	if (bio) {
-		cell_defer_except(tc, m->cell);
-		bio_endio(bio, 0);
-	} else
-		cell_defer(tc, m->cell, m->data_block);
-
-	list_del(&m->list);
-	mempool_free(m, tc->pool->mapping_pool);
-}
-
-static void process_prepared_discard(struct new_mapping *m)
-{
-	int r;
-	struct thin_c *tc = m->tc;
-
-	r = dm_thin_remove_block(tc->td, m->virt_block);
-	if (r)
-		DMERR("dm_thin_remove_block() failed");
-
-	/*
-	 * Pass the discard down to the underlying device?
-	 */
-	if (m->pass_discard)
-		remap_and_issue(tc, m->bio, m->data_block);
-	else
-		bio_endio(m->bio, 0);
-
-	cell_defer_except(tc, m->cell);
-	cell_defer_except(tc, m->cell2);
-	mempool_free(m, tc->pool->mapping_pool);
-}
-
-static void process_prepared(struct pool *pool, struct list_head *head,
-			     void (*fn)(struct new_mapping *))
-{
-	unsigned long flags;
-	struct list_head maps;
-	struct new_mapping *m, *tmp;
-
-	INIT_LIST_HEAD(&maps);
-	spin_lock_irqsave(&pool->lock, flags);
-	list_splice_init(head, &maps);
-	spin_unlock_irqrestore(&pool->lock, flags);
-
-	list_for_each_entry_safe(m, tmp, &maps, list)
-		fn(m);
-}
-
-/*
- * Deferred bio jobs.
- */
-static int io_overlaps_block(struct pool *pool, struct bio *bio)
-{
-	return !(bio->bi_sector & pool->offset_mask) &&
-		(bio->bi_size == (pool->sectors_per_block << SECTOR_SHIFT));
-
-}
-
-static int io_overwrites_block(struct pool *pool, struct bio *bio)
-{
-	return (bio_data_dir(bio) == WRITE) &&
-		io_overlaps_block(pool, bio);
-}
-
-static void save_and_set_endio(struct bio *bio, bio_end_io_t **save,
-			       bio_end_io_t *fn)
-{
-	*save = bio->bi_end_io;
-	bio->bi_end_io = fn;
-}
-
-static int ensure_next_mapping(struct pool *pool)
-{
-	if (pool->next_mapping)
-		return 0;
-
-	pool->next_mapping = mempool_alloc(pool->mapping_pool, GFP_ATOMIC);
-
-	return pool->next_mapping ? 0 : -ENOMEM;
-}
-
-static struct new_mapping *get_next_mapping(struct pool *pool)
-{
-	struct new_mapping *r = pool->next_mapping;
-
-	BUG_ON(!pool->next_mapping);
-
-	pool->next_mapping = NULL;
-
-	return r;
-}
-
-static void schedule_copy(struct thin_c *tc, dm_block_t virt_block,
-			  struct dm_dev *origin, dm_block_t data_origin,
-			  dm_block_t data_dest,
-			  struct cell *cell, struct bio *bio)
-{
-	int r;
-	struct pool *pool = tc->pool;
-	struct new_mapping *m = get_next_mapping(pool);
-
-	INIT_LIST_HEAD(&m->list);
-	m->quiesced = 0;
-	m->prepared = 0;
-	m->tc = tc;
-	m->virt_block = virt_block;
-	m->data_block = data_dest;
-	m->cell = cell;
-	m->err = 0;
-	m->bio = NULL;
-
-	if (!ds_add_work(&pool->shared_read_ds, &m->list))
-		m->quiesced = 1;
-
-	/*
-	 * IO to pool_dev remaps to the pool target's data_dev.
-	 *
-	 * If the whole block of data is being overwritten, we can issue the
-	 * bio immediately. Otherwise we use kcopyd to clone the data first.
-	 */
-	if (io_overwrites_block(pool, bio)) {
-		struct endio_hook *h = dm_get_mapinfo(bio)->ptr;
-		h->overwrite_mapping = m;
-		m->bio = bio;
-		save_and_set_endio(bio, &m->saved_bi_end_io, overwrite_endio);
-		remap_and_issue(tc, bio, data_dest);
-	} else {
-		struct dm_io_region from, to;
-
-		from.bdev = origin->bdev;
-		from.sector = data_origin * pool->sectors_per_block;
-		from.count = pool->sectors_per_block;
-
-		to.bdev = tc->pool_dev->bdev;
-		to.sector = data_dest * pool->sectors_per_block;
-		to.count = pool->sectors_per_block;
-
-		r = dm_kcopyd_copy(pool->copier, &from, 1, &to,
-				   0, copy_complete, m);
-		if (r < 0) {
-			mempool_free(m, pool->mapping_pool);
-			DMERR("dm_kcopyd_copy() failed");
-			cell_error(cell);
-		}
-	}
-}
-
-static void schedule_internal_copy(struct thin_c *tc, dm_block_t virt_block,
-				   dm_block_t data_origin, dm_block_t data_dest,
-				   struct cell *cell, struct bio *bio)
-{
-	schedule_copy(tc, virt_block, tc->pool_dev,
-		      data_origin, data_dest, cell, bio);
-}
-
-static void schedule_external_copy(struct thin_c *tc, dm_block_t virt_block,
-				   dm_block_t data_dest,
-				   struct cell *cell, struct bio *bio)
-{
-	schedule_copy(tc, virt_block, tc->origin_dev,
-		      virt_block, data_dest, cell, bio);
-}
-
-static void schedule_zero(struct thin_c *tc, dm_block_t virt_block,
-			  dm_block_t data_block, struct cell *cell,
-			  struct bio *bio)
-{
-	struct pool *pool = tc->pool;
-	struct new_mapping *m = get_next_mapping(pool);
-
-	INIT_LIST_HEAD(&m->list);
-	m->quiesced = 1;
-	m->prepared = 0;
-	m->tc = tc;
-	m->virt_block = virt_block;
-	m->data_block = data_block;
-	m->cell = cell;
-	m->err = 0;
-	m->bio = NULL;
-
-	/*
-	 * If the whole block of data is being overwritten or we are not
-	 * zeroing pre-existing data, we can issue the bio immediately.
-	 * Otherwise we use kcopyd to zero the data first.
-	 */
-	if (!pool->pf.zero_new_blocks)
-		process_prepared_mapping(m);
-
-	else if (io_overwrites_block(pool, bio)) {
-		struct endio_hook *h = dm_get_mapinfo(bio)->ptr;
-		h->overwrite_mapping = m;
-		m->bio = bio;
-		save_and_set_endio(bio, &m->saved_bi_end_io, overwrite_endio);
-		remap_and_issue(tc, bio, data_block);
-
-	} else {
-		int r;
-		struct dm_io_region to;
-
-		to.bdev = tc->pool_dev->bdev;
-		to.sector = data_block * pool->sectors_per_block;
-		to.count = pool->sectors_per_block;
-
-		r = dm_kcopyd_zero(pool->copier, 1, &to, 0, copy_complete, m);
-		if (r < 0) {
-			mempool_free(m, pool->mapping_pool);
-			DMERR("dm_kcopyd_zero() failed");
-			cell_error(cell);
-		}
-	}
-}
-
-static int alloc_data_block(struct thin_c *tc, dm_block_t *result)
-{
-	int r;
-	dm_block_t free_blocks;
-	unsigned long flags;
-	struct pool *pool = tc->pool;
-
-	r = dm_pool_get_free_block_count(pool->pmd, &free_blocks);
-	if (r)
-		return r;
-
-	if (free_blocks <= pool->low_water_blocks && !pool->low_water_triggered) {
-		DMWARN("%s: reached low water mark, sending event.",
-		       dm_device_name(pool->pool_md));
-		spin_lock_irqsave(&pool->lock, flags);
-		pool->low_water_triggered = 1;
-		spin_unlock_irqrestore(&pool->lock, flags);
-		dm_table_event(pool->ti->table);
-	}
-
-	if (!free_blocks) {
-		if (pool->no_free_space)
-			return -ENOSPC;
-		else {
-			/*
-			 * Try to commit to see if that will free up some
-			 * more space.
-			 */
-			r = dm_pool_commit_metadata(pool->pmd);
-			if (r) {
-				DMERR("%s: dm_pool_commit_metadata() failed, error = %d",
-				      __func__, r);
-				return r;
-			}
-
-			r = dm_pool_get_free_block_count(pool->pmd, &free_blocks);
-			if (r)
-				return r;
-
-			/*
-			 * If we still have no space we set a flag to avoid
-			 * doing all this checking and return -ENOSPC.
-			 */
-			if (!free_blocks) {
-				DMWARN("%s: no free space available.",
-				       dm_device_name(pool->pool_md));
-				spin_lock_irqsave(&pool->lock, flags);
-				pool->no_free_space = 1;
-				spin_unlock_irqrestore(&pool->lock, flags);
-				return -ENOSPC;
-			}
-		}
-	}
-
-	r = dm_pool_alloc_data_block(pool->pmd, result);
-	if (r)
-		return r;
-
-	return 0;
-}
-
-/*
- * If we have run out of space, queue bios until the device is
- * resumed, presumably after having been reloaded with more space.
- */
-static void retry_on_resume(struct bio *bio)
-{
-	struct endio_hook *h = dm_get_mapinfo(bio)->ptr;
-	struct thin_c *tc = h->tc;
-	struct pool *pool = tc->pool;
-	unsigned long flags;
-
-	spin_lock_irqsave(&pool->lock, flags);
-	bio_list_add(&pool->retry_on_resume_list, bio);
-	spin_unlock_irqrestore(&pool->lock, flags);
-}
-
-static void no_space(struct cell *cell)
-{
-	struct bio *bio;
-	struct bio_list bios;
-
-	bio_list_init(&bios);
-	cell_release(cell, &bios);
-
-	while ((bio = bio_list_pop(&bios)))
-		retry_on_resume(bio);
-}
-
-static void process_discard(struct thin_c *tc, struct bio *bio)
-{
-	int r;
-	unsigned long flags;
-	struct pool *pool = tc->pool;
-	struct cell *cell, *cell2;
-	struct cell_key key, key2;
-	dm_block_t block = get_bio_block(tc, bio);
-	struct dm_thin_lookup_result lookup_result;
-	struct new_mapping *m;
-
-	build_virtual_key(tc->td, block, &key);
-	if (bio_detain(tc->pool->prison, &key, bio, &cell))
-		return;
-
-	r = dm_thin_find_block(tc->td, block, 1, &lookup_result);
-	switch (r) {
-	case 0:
-		/*
-		 * Check nobody is fiddling with this pool block.  This can
-		 * happen if someone's in the process of breaking sharing
-		 * on this block.
-		 */
-		build_data_key(tc->td, lookup_result.block, &key2);
-		if (bio_detain(tc->pool->prison, &key2, bio, &cell2)) {
-			cell_release_singleton(cell, bio);
-			break;
-		}
-
-		if (io_overlaps_block(pool, bio)) {
-			/*
-			 * IO may still be going to the destination block.  We must
-			 * quiesce before we can do the removal.
-			 */
-			m = get_next_mapping(pool);
-			m->tc = tc;
-			m->pass_discard = (!lookup_result.shared) & pool->pf.discard_passdown;
-			m->virt_block = block;
-			m->data_block = lookup_result.block;
-			m->cell = cell;
-			m->cell2 = cell2;
-			m->err = 0;
-			m->bio = bio;
-
-			if (!ds_add_work(&pool->all_io_ds, &m->list)) {
-				spin_lock_irqsave(&pool->lock, flags);
-				list_add(&m->list, &pool->prepared_discards);
-				spin_unlock_irqrestore(&pool->lock, flags);
-				wake_worker(pool);
-			}
-		} else {
-			/*
-			 * This path is hit if people are ignoring
-			 * limits->discard_granularity.  It ignores any
-			 * part of the discard that is in a subsequent
-			 * block.
-			 */
-			sector_t offset = bio->bi_sector - (block << pool->block_shift);
-			unsigned remaining = (pool->sectors_per_block - offset) << 9;
-			bio->bi_size = min(bio->bi_size, remaining);
-
-			cell_release_singleton(cell, bio);
-			cell_release_singleton(cell2, bio);
-			remap_and_issue(tc, bio, lookup_result.block);
-		}
-		break;
-
-	case -ENODATA:
-		/*
-		 * It isn't provisioned, just forget it.
-		 */
-		cell_release_singleton(cell, bio);
-		bio_endio(bio, 0);
-		break;
-
-	default:
-		DMERR("discard: find block unexpectedly returned %d", r);
-		cell_release_singleton(cell, bio);
-		bio_io_error(bio);
-		break;
-	}
-}
-
-static void break_sharing(struct thin_c *tc, struct bio *bio, dm_block_t block,
-			  struct cell_key *key,
-			  struct dm_thin_lookup_result *lookup_result,
-			  struct cell *cell)
-{
-	int r;
-	dm_block_t data_block;
-
-	r = alloc_data_block(tc, &data_block);
-	switch (r) {
-	case 0:
-		schedule_internal_copy(tc, block, lookup_result->block,
-				       data_block, cell, bio);
-		break;
-
-	case -ENOSPC:
-		no_space(cell);
-		break;
-
-	default:
-		DMERR("%s: alloc_data_block() failed, error = %d", __func__, r);
-		cell_error(cell);
-		break;
-	}
-}
-
-static void process_shared_bio(struct thin_c *tc, struct bio *bio,
-			       dm_block_t block,
-			       struct dm_thin_lookup_result *lookup_result)
-{
-	struct cell *cell;
-	struct pool *pool = tc->pool;
-	struct cell_key key;
-
-	/*
-	 * If cell is already occupied, then sharing is already in the process
-	 * of being broken so we have nothing further to do here.
-	 */
-	build_data_key(tc->td, lookup_result->block, &key);
-	if (bio_detain(pool->prison, &key, bio, &cell))
-		return;
-
-	if (bio_data_dir(bio) == WRITE)
-		break_sharing(tc, bio, block, &key, lookup_result, cell);
-	else {
-		struct endio_hook *h = dm_get_mapinfo(bio)->ptr;
-
-		h->shared_read_entry = ds_inc(&pool->shared_read_ds);
-
-		cell_release_singleton(cell, bio);
-		remap_and_issue(tc, bio, lookup_result->block);
-	}
-}
-
-static void provision_block(struct thin_c *tc, struct bio *bio, dm_block_t block,
-			    struct cell *cell)
-{
-	int r;
-	dm_block_t data_block;
-
-	/*
-	 * Remap empty bios (flushes) immediately, without provisioning.
-	 */
-	if (!bio->bi_size) {
-		cell_release_singleton(cell, bio);
-		remap_and_issue(tc, bio, 0);
-		return;
-	}
-
-	/*
-	 * Fill read bios with zeroes and complete them immediately.
-	 */
-	if (bio_data_dir(bio) == READ) {
-		zero_fill_bio(bio);
-		cell_release_singleton(cell, bio);
-		bio_endio(bio, 0);
-		return;
-	}
-
-	r = alloc_data_block(tc, &data_block);
-	switch (r) {
-	case 0:
-		if (tc->origin_dev)
-			schedule_external_copy(tc, block, data_block, cell, bio);
-		else
-			schedule_zero(tc, block, data_block, cell, bio);
-		break;
-
-	case -ENOSPC:
-		no_space(cell);
-		break;
-
-	default:
-		DMERR("%s: alloc_data_block() failed, error = %d", __func__, r);
-		cell_error(cell);
-		break;
-	}
-}
-
-static void process_bio(struct thin_c *tc, struct bio *bio)
-{
-	int r;
-	dm_block_t block = get_bio_block(tc, bio);
-	struct cell *cell;
-	struct cell_key key;
-	struct dm_thin_lookup_result lookup_result;
-
-	/*
-	 * If cell is already occupied, then the block is already
-	 * being provisioned so we have nothing further to do here.
-	 */
-	build_virtual_key(tc->td, block, &key);
-	if (bio_detain(tc->pool->prison, &key, bio, &cell))
-		return;
-
-	r = dm_thin_find_block(tc->td, block, 1, &lookup_result);
-	switch (r) {
-	case 0:
-		/*
-		 * We can release this cell now.  This thread is the only
-		 * one that puts bios into a cell, and we know there were
-		 * no preceding bios.
-		 */
-		/*
-		 * TODO: this will probably have to change when discard goes
-		 * back in.
-		 */
-		cell_release_singleton(cell, bio);
-
-		if (lookup_result.shared)
-			process_shared_bio(tc, bio, block, &lookup_result);
-		else
-			remap_and_issue(tc, bio, lookup_result.block);
-		break;
-
-	case -ENODATA:
-		if (bio_data_dir(bio) == READ && tc->origin_dev) {
-			cell_release_singleton(cell, bio);
-			remap_to_origin_and_issue(tc, bio);
-		} else
-			provision_block(tc, bio, block, cell);
-		break;
-
-	default:
-		DMERR("dm_thin_find_block() failed, error = %d", r);
-		cell_release_singleton(cell, bio);
-		bio_io_error(bio);
-		break;
-	}
-}
-
-static int need_commit_due_to_time(struct pool *pool)
-{
-	return jiffies < pool->last_commit_jiffies ||
-	       jiffies > pool->last_commit_jiffies + COMMIT_PERIOD;
-}
-
-static void process_deferred_bios(struct pool *pool)
-{
-	unsigned long flags;
-	struct bio *bio;
-	struct bio_list bios;
-	int r;
-
-	bio_list_init(&bios);
-
-	spin_lock_irqsave(&pool->lock, flags);
-	bio_list_merge(&bios, &pool->deferred_bios);
-	bio_list_init(&pool->deferred_bios);
-	spin_unlock_irqrestore(&pool->lock, flags);
-
-	while ((bio = bio_list_pop(&bios))) {
-		struct endio_hook *h = dm_get_mapinfo(bio)->ptr;
-		struct thin_c *tc = h->tc;
-
-		/*
-		 * If we've got no free new_mapping structs, and processing
-		 * this bio might require one, we pause until there are some
-		 * prepared mappings to process.
-		 */
-		if (ensure_next_mapping(pool)) {
-			spin_lock_irqsave(&pool->lock, flags);
-			bio_list_merge(&pool->deferred_bios, &bios);
-			spin_unlock_irqrestore(&pool->lock, flags);
-
-			break;
-		}
-
-		if (bio->bi_rw & REQ_DISCARD)
-			process_discard(tc, bio);
-		else
-			process_bio(tc, bio);
-	}
-
-	/*
-	 * If there are any deferred flush bios, we must commit
-	 * the metadata before issuing them.
-	 */
-	bio_list_init(&bios);
-	spin_lock_irqsave(&pool->lock, flags);
-	bio_list_merge(&bios, &pool->deferred_flush_bios);
-	bio_list_init(&pool->deferred_flush_bios);
-	spin_unlock_irqrestore(&pool->lock, flags);
-
-	if (bio_list_empty(&bios) && !need_commit_due_to_time(pool))
-		return;
-
-	r = dm_pool_commit_metadata(pool->pmd);
-	if (r) {
-		DMERR("%s: dm_pool_commit_metadata() failed, error = %d",
-		      __func__, r);
-		while ((bio = bio_list_pop(&bios)))
-			bio_io_error(bio);
-		return;
-	}
-	pool->last_commit_jiffies = jiffies;
-
-	while ((bio = bio_list_pop(&bios)))
-		generic_make_request(bio);
-}
-
-static void do_worker(struct work_struct *ws)
-{
-	struct pool *pool = container_of(ws, struct pool, worker);
-
-	process_prepared(pool, &pool->prepared_mappings, process_prepared_mapping);
-	process_prepared(pool, &pool->prepared_discards, process_prepared_discard);
-	process_deferred_bios(pool);
-}
-
-/*
- * We want to commit periodically so that not too much
- * unwritten data builds up.
- */
-static void do_waker(struct work_struct *ws)
-{
-	struct pool *pool = container_of(to_delayed_work(ws), struct pool, waker);
-	wake_worker(pool);
-	queue_delayed_work(pool->wq, &pool->waker, COMMIT_PERIOD);
-}
-
-/*----------------------------------------------------------------*/
-
-/*
- * Mapping functions.
- */
-
-/*
- * Called only while mapping a thin bio to hand it over to the workqueue.
- */
-static void thin_defer_bio(struct thin_c *tc, struct bio *bio)
-{
-	unsigned long flags;
-	struct pool *pool = tc->pool;
-
-	spin_lock_irqsave(&pool->lock, flags);
-	bio_list_add(&pool->deferred_bios, bio);
-	spin_unlock_irqrestore(&pool->lock, flags);
-
-	wake_worker(pool);
-}
-
-static struct endio_hook *thin_hook_bio(struct thin_c *tc, struct bio *bio)
-{
-	struct pool *pool = tc->pool;
-	struct endio_hook *h = mempool_alloc(pool->endio_hook_pool, GFP_NOIO);
-
-	h->tc = tc;
-	h->shared_read_entry = NULL;
-	h->all_io_entry = bio->bi_rw & REQ_DISCARD ? NULL : ds_inc(&pool->all_io_ds);
-	h->overwrite_mapping = NULL;
-
-	return h;
-}
-
-/*
- * Non-blocking function called from the thin target's map function.
- */
-static int thin_bio_map(struct dm_target *ti, struct bio *bio,
-			union map_info *map_context)
-{
-	int r;
-	struct thin_c *tc = ti->private;
-	dm_block_t block = get_bio_block(tc, bio);
-	struct dm_thin_device *td = tc->td;
-	struct dm_thin_lookup_result result;
-
-	map_context->ptr = thin_hook_bio(tc, bio);
-	if (bio->bi_rw & (REQ_DISCARD | REQ_FLUSH | REQ_FUA)) {
-		thin_defer_bio(tc, bio);
-		return DM_MAPIO_SUBMITTED;
-	}
-
-	r = dm_thin_find_block(td, block, 0, &result);
-
-	/*
-	 * Note that we defer readahead too.
-	 */
-	switch (r) {
-	case 0:
-		if (unlikely(result.shared)) {
-			/*
-			 * We have a race condition here between the
-			 * result.shared value returned by the lookup and
-			 * snapshot creation, which may cause new
-			 * sharing.
-			 *
-			 * To avoid this always quiesce the origin before
-			 * taking the snap.  You want to do this anyway to
-			 * ensure a consistent application view
-			 * (i.e. lockfs).
-			 *
-			 * More distant ancestors are irrelevant. The
-			 * shared flag will be set in their case.
-			 */
-			thin_defer_bio(tc, bio);
-			r = DM_MAPIO_SUBMITTED;
-		} else {
-			remap(tc, bio, result.block);
-			r = DM_MAPIO_REMAPPED;
-		}
-		break;
-
-	case -ENODATA:
-		/*
-		 * In future, the failed dm_thin_find_block above could
-		 * provide the hint to load the metadata into cache.
-		 */
-	case -EWOULDBLOCK:
-		thin_defer_bio(tc, bio);
-		r = DM_MAPIO_SUBMITTED;
-		break;
-	}
-
-	return r;
-}
-
-static int pool_is_congested(struct dm_target_callbacks *cb, int bdi_bits)
-{
-	int r;
-	unsigned long flags;
-	struct pool_c *pt = container_of(cb, struct pool_c, callbacks);
-
-	spin_lock_irqsave(&pt->pool->lock, flags);
-	r = !bio_list_empty(&pt->pool->retry_on_resume_list);
-	spin_unlock_irqrestore(&pt->pool->lock, flags);
-
-	if (!r) {
-		struct request_queue *q = bdev_get_queue(pt->data_dev->bdev);
-		r = bdi_congested(&q->backing_dev_info, bdi_bits);
-	}
-
-	return r;
-}
-
-static void __requeue_bios(struct pool *pool)
-{
-	bio_list_merge(&pool->deferred_bios, &pool->retry_on_resume_list);
-	bio_list_init(&pool->retry_on_resume_list);
-}
-
-/*----------------------------------------------------------------
- * Binding of control targets to a pool object
- *--------------------------------------------------------------*/
-static int bind_control_target(struct pool *pool, struct dm_target *ti)
-{
-	struct pool_c *pt = ti->private;
-
-	pool->ti = ti;
-	pool->low_water_blocks = pt->low_water_blocks;
-	pool->pf = pt->pf;
-
-	/*
-	 * If discard_passdown was enabled verify that the data device
-	 * supports discards.  Disable discard_passdown if not; otherwise
-	 * -EOPNOTSUPP will be returned.
-	 */
-	if (pt->pf.discard_passdown) {
-		struct request_queue *q = bdev_get_queue(pt->data_dev->bdev);
-		if (!q || !blk_queue_discard(q)) {
-			char buf[BDEVNAME_SIZE];
-			DMWARN("Discard unsupported by data device (%s): Disabling discard passdown.",
-			       bdevname(pt->data_dev->bdev, buf));
-			pool->pf.discard_passdown = 0;
-		}
-	}
-
-	return 0;
-}
-
-static void unbind_control_target(struct pool *pool, struct dm_target *ti)
-{
-	if (pool->ti == ti)
-		pool->ti = NULL;
-}
-
-/*----------------------------------------------------------------
- * Pool creation
- *--------------------------------------------------------------*/
-/* Initialize pool features. */
-static void pool_features_init(struct pool_features *pf)
-{
-	pf->zero_new_blocks = 1;
-	pf->discard_enabled = 1;
-	pf->discard_passdown = 1;
-}
-
-static void __pool_destroy(struct pool *pool)
-{
-	__pool_table_remove(pool);
-
-	if (dm_pool_metadata_close(pool->pmd) < 0)
-		DMWARN("%s: dm_pool_metadata_close() failed.", __func__);
-
-	prison_destroy(pool->prison);
-	dm_kcopyd_client_destroy(pool->copier);
-
-	if (pool->wq)
-		destroy_workqueue(pool->wq);
-
-	if (pool->next_mapping)
-		mempool_free(pool->next_mapping, pool->mapping_pool);
-	mempool_destroy(pool->mapping_pool);
-	mempool_destroy(pool->endio_hook_pool);
-	kfree(pool);
-}
-
-static struct pool *pool_create(struct mapped_device *pool_md,
-				struct block_device *metadata_dev,
-				unsigned long block_size, char **error)
-{
-	int r;
-	void *err_p;
-	struct pool *pool;
-	struct dm_pool_metadata *pmd;
-
-	pmd = dm_pool_metadata_open(metadata_dev, block_size);
-	if (IS_ERR(pmd)) {
-		*error = "Error creating metadata object";
-		return (struct pool *)pmd;
-	}
-
-	pool = kmalloc(sizeof(*pool), GFP_KERNEL);
-	if (!pool) {
-		*error = "Error allocating memory for pool";
-		err_p = ERR_PTR(-ENOMEM);
-		goto bad_pool;
-	}
-
-	pool->pmd = pmd;
-	pool->sectors_per_block = block_size;
-	pool->block_shift = ffs(block_size) - 1;
-	pool->offset_mask = block_size - 1;
-	pool->low_water_blocks = 0;
-	pool_features_init(&pool->pf);
-	pool->prison = prison_create(PRISON_CELLS);
-	if (!pool->prison) {
-		*error = "Error creating pool's bio prison";
-		err_p = ERR_PTR(-ENOMEM);
-		goto bad_prison;
-	}
-
-	pool->copier = dm_kcopyd_client_create();
-	if (IS_ERR(pool->copier)) {
-		r = PTR_ERR(pool->copier);
-		*error = "Error creating pool's kcopyd client";
-		err_p = ERR_PTR(r);
-		goto bad_kcopyd_client;
-	}
-
-	/*
-	 * Create singlethreaded workqueue that will service all devices
-	 * that use this metadata.
-	 */
-	pool->wq = alloc_ordered_workqueue("dm-" DM_MSG_PREFIX, WQ_MEM_RECLAIM);
-	if (!pool->wq) {
-		*error = "Error creating pool's workqueue";
-		err_p = ERR_PTR(-ENOMEM);
-		goto bad_wq;
-	}
-
-	INIT_WORK(&pool->worker, do_worker);
-	INIT_DELAYED_WORK(&pool->waker, do_waker);
-	spin_lock_init(&pool->lock);
-	bio_list_init(&pool->deferred_bios);
-	bio_list_init(&pool->deferred_flush_bios);
-	INIT_LIST_HEAD(&pool->prepared_mappings);
-	INIT_LIST_HEAD(&pool->prepared_discards);
-	pool->low_water_triggered = 0;
-	pool->no_free_space = 0;
-	bio_list_init(&pool->retry_on_resume_list);
-	ds_init(&pool->shared_read_ds);
-	ds_init(&pool->all_io_ds);
-
-	pool->next_mapping = NULL;
-	pool->mapping_pool =
-		mempool_create_kmalloc_pool(MAPPING_POOL_SIZE, sizeof(struct new_mapping));
-	if (!pool->mapping_pool) {
-		*error = "Error creating pool's mapping mempool";
-		err_p = ERR_PTR(-ENOMEM);
-		goto bad_mapping_pool;
-	}
-
-	pool->endio_hook_pool =
-		mempool_create_kmalloc_pool(ENDIO_HOOK_POOL_SIZE, sizeof(struct endio_hook));
-	if (!pool->endio_hook_pool) {
-		*error = "Error creating pool's endio_hook mempool";
-		err_p = ERR_PTR(-ENOMEM);
-		goto bad_endio_hook_pool;
-	}
-	pool->ref_count = 1;
-	pool->last_commit_jiffies = jiffies;
-	pool->pool_md = pool_md;
-	pool->md_dev = metadata_dev;
-	__pool_table_insert(pool);
-
-	return pool;
-
-bad_endio_hook_pool:
-	mempool_destroy(pool->mapping_pool);
-bad_mapping_pool:
-	destroy_workqueue(pool->wq);
-bad_wq:
-	dm_kcopyd_client_destroy(pool->copier);
-bad_kcopyd_client:
-	prison_destroy(pool->prison);
-bad_prison:
-	kfree(pool);
-bad_pool:
-	if (dm_pool_metadata_close(pmd))
-		DMWARN("%s: dm_pool_metadata_close() failed.", __func__);
-
-	return err_p;
-}
-
-static void __pool_inc(struct pool *pool)
-{
-	BUG_ON(!mutex_is_locked(&dm_thin_pool_table.mutex));
-	pool->ref_count++;
-}
-
-static void __pool_dec(struct pool *pool)
-{
-	BUG_ON(!mutex_is_locked(&dm_thin_pool_table.mutex));
-	BUG_ON(!pool->ref_count);
-	if (!--pool->ref_count)
-		__pool_destroy(pool);
-}
-
-static struct pool *__pool_find(struct mapped_device *pool_md,
-				struct block_device *metadata_dev,
-				unsigned long block_size, char **error,
-				int *created)
-{
-	struct pool *pool = __pool_table_lookup_metadata_dev(metadata_dev);
-
-	if (pool) {
-		if (pool->pool_md != pool_md)
-			return ERR_PTR(-EBUSY);
-		__pool_inc(pool);
-
-	} else {
-		pool = __pool_table_lookup(pool_md);
-		if (pool) {
-			if (pool->md_dev != metadata_dev)
-				return ERR_PTR(-EINVAL);
-			__pool_inc(pool);
-
-		} else {
-			pool = pool_create(pool_md, metadata_dev, block_size, error);
-			*created = 1;
-		}
-	}
-
-	return pool;
-}
-
-/*----------------------------------------------------------------
- * Pool target methods
- *--------------------------------------------------------------*/
-static void pool_dtr(struct dm_target *ti)
-{
-	struct pool_c *pt = ti->private;
-
-	mutex_lock(&dm_thin_pool_table.mutex);
-
-	unbind_control_target(pt->pool, ti);
-	__pool_dec(pt->pool);
-	dm_put_device(ti, pt->metadata_dev);
-	dm_put_device(ti, pt->data_dev);
-	kfree(pt);
-
-	mutex_unlock(&dm_thin_pool_table.mutex);
-}
-
-static int parse_pool_features(struct dm_arg_set *as, struct pool_features *pf,
-			       struct dm_target *ti)
-{
-	int r;
-	unsigned argc;
-	const char *arg_name;
-
-	static struct dm_arg _args[] = {
-		{0, 3, "Invalid number of pool feature arguments"},
-	};
-
-	/*
-	 * No feature arguments supplied.
-	 */
-	if (!as->argc)
-		return 0;
-
-	r = dm_read_arg_group(_args, as, &argc, &ti->error);
-	if (r)
-		return -EINVAL;
-
-	while (argc && !r) {
-		arg_name = dm_shift_arg(as);
-		argc--;
-
-		if (!strcasecmp(arg_name, "skip_block_zeroing")) {
-			pf->zero_new_blocks = 0;
-			continue;
-		} else if (!strcasecmp(arg_name, "ignore_discard")) {
-			pf->discard_enabled = 0;
-			continue;
-		} else if (!strcasecmp(arg_name, "no_discard_passdown")) {
-			pf->discard_passdown = 0;
-			continue;
-		}
-
-		ti->error = "Unrecognised pool feature requested";
-		r = -EINVAL;
-	}
-
-	return r;
-}
-
-/*
- * thin-pool <metadata dev> <data dev>
- *	     <data block size (sectors)>
- *	     <low water mark (blocks)>
- *	     [<#feature args> [<arg>]*]
- *
- * Optional feature arguments are:
- *	     skip_block_zeroing: skips the zeroing of newly-provisioned blocks.
- *	     ignore_discard: disable discard
- *	     no_discard_passdown: don't pass discards down to the data device
- */
-static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv)
-{
-	int r, pool_created = 0;
-	struct pool_c *pt;
-	struct pool *pool;
-	struct pool_features pf;
-	struct dm_arg_set as;
-	struct dm_dev *data_dev;
-	unsigned long block_size;
-	dm_block_t low_water_blocks;
-	struct dm_dev *metadata_dev;
-	sector_t metadata_dev_size;
-	char b[BDEVNAME_SIZE];
-
-	/*
-	 * FIXME Remove validation from scope of lock.
-	 */
-	mutex_lock(&dm_thin_pool_table.mutex);
-
-	if (argc < 4) {
-		ti->error = "Invalid argument count";
-		r = -EINVAL;
-		goto out_unlock;
-	}
-	as.argc = argc;
-	as.argv = argv;
-
-	r = dm_get_device(ti, argv[0], FMODE_READ | FMODE_WRITE, &metadata_dev);
-	if (r) {
-		ti->error = "Error opening metadata block device";
-		goto out_unlock;
-	}
-
-	metadata_dev_size = i_size_read(metadata_dev->bdev->bd_inode) >> SECTOR_SHIFT;
-	if (metadata_dev_size > THIN_METADATA_MAX_SECTORS_WARNING)
-		DMWARN("Metadata device %s is larger than %u sectors: excess space will not be used.",
-		       bdevname(metadata_dev->bdev, b), THIN_METADATA_MAX_SECTORS);
-
-	r = dm_get_device(ti, argv[1], FMODE_READ | FMODE_WRITE, &data_dev);
-	if (r) {
-		ti->error = "Error getting data device";
-		goto out_metadata;
-	}
-
-	if (kstrtoul(argv[2], 10, &block_size) || !block_size ||
-	    block_size < DATA_DEV_BLOCK_SIZE_MIN_SECTORS ||
-	    block_size > DATA_DEV_BLOCK_SIZE_MAX_SECTORS ||
-	    !is_power_of_2(block_size)) {
-		ti->error = "Invalid block size";
-		r = -EINVAL;
-		goto out;
-	}
-
-	if (kstrtoull(argv[3], 10, (unsigned long long *)&low_water_blocks)) {
-		ti->error = "Invalid low water mark";
-		r = -EINVAL;
-		goto out;
-	}
-
-	/*
-	 * Set default pool features.
-	 */
-	pool_features_init(&pf);
-
-	dm_consume_args(&as, 4);
-	r = parse_pool_features(&as, &pf, ti);
-	if (r)
-		goto out;
-
-	pt = kzalloc(sizeof(*pt), GFP_KERNEL);
-	if (!pt) {
-		r = -ENOMEM;
-		goto out;
-	}
-
-	pool = __pool_find(dm_table_get_md(ti->table), metadata_dev->bdev,
-			   block_size, &ti->error, &pool_created);
-	if (IS_ERR(pool)) {
-		r = PTR_ERR(pool);
-		goto out_free_pt;
-	}
-
-	/*
-	 * 'pool_created' reflects whether this is the first table load.
-	 * Top level discard support is not allowed to be changed after
-	 * initial load.  This would require a pool reload to trigger thin
-	 * device changes.
-	 */
-	if (!pool_created && pf.discard_enabled != pool->pf.discard_enabled) {
-		ti->error = "Discard support cannot be disabled once enabled";
-		r = -EINVAL;
-		goto out_flags_changed;
-	}
-
-	pt->pool = pool;
-	pt->ti = ti;
-	pt->metadata_dev = metadata_dev;
-	pt->data_dev = data_dev;
-	pt->low_water_blocks = low_water_blocks;
-	pt->pf = pf;
-	ti->num_flush_requests = 1;
-	/*
-	 * Only need to enable discards if the pool should pass
-	 * them down to the data device.  The thin device's discard
-	 * processing will cause mappings to be removed from the btree.
-	 */
-	if (pf.discard_enabled && pf.discard_passdown) {
-		ti->num_discard_requests = 1;
-		/*
-		 * Setting 'discards_supported' circumvents the normal
-		 * stacking of discard limits (this keeps the pool and
-		 * thin devices' discard limits consistent).
-		 */
-		ti->discards_supported = 1;
-	}
-	ti->private = pt;
-
-	pt->callbacks.congested_fn = pool_is_congested;
-	dm_table_add_target_callbacks(ti->table, &pt->callbacks);
-
-	mutex_unlock(&dm_thin_pool_table.mutex);
-
-	return 0;
-
-out_flags_changed:
-	__pool_dec(pool);
-out_free_pt:
-	kfree(pt);
-out:
-	dm_put_device(ti, data_dev);
-out_metadata:
-	dm_put_device(ti, metadata_dev);
-out_unlock:
-	mutex_unlock(&dm_thin_pool_table.mutex);
-
-	return r;
-}
-
-static int pool_map(struct dm_target *ti, struct bio *bio,
-		    union map_info *map_context)
-{
-	int r;
-	struct pool_c *pt = ti->private;
-	struct pool *pool = pt->pool;
-	unsigned long flags;
-
-	/*
-	 * As this is a singleton target, ti->begin is always zero.
-	 */
-	spin_lock_irqsave(&pool->lock, flags);
-	bio->bi_bdev = pt->data_dev->bdev;
-	r = DM_MAPIO_REMAPPED;
-	spin_unlock_irqrestore(&pool->lock, flags);
-
-	return r;
-}
-
-/*
- * Retrieves the number of blocks of the data device from
- * the superblock and compares it to the actual device size,
- * thus resizing the data device in case it has grown.
- *
- * This both copes with opening preallocated data devices in the ctr
- * being followed by a resume
- * -and-
- * calling the resume method individually after userspace has
- * grown the data device in reaction to a table event.
- */
-static int pool_preresume(struct dm_target *ti)
-{
-	int r;
-	struct pool_c *pt = ti->private;
-	struct pool *pool = pt->pool;
-	dm_block_t data_size, sb_data_size;
-
-	/*
-	 * Take control of the pool object.
-	 */
-	r = bind_control_target(pool, ti);
-	if (r)
-		return r;
-
-	data_size = ti->len >> pool->block_shift;
-	r = dm_pool_get_data_dev_size(pool->pmd, &sb_data_size);
-	if (r) {
-		DMERR("failed to retrieve data device size");
-		return r;
-	}
-
-	if (data_size < sb_data_size) {
-		DMERR("pool target too small, is %llu blocks (expected %llu)",
-		      data_size, sb_data_size);
-		return -EINVAL;
-
-	} else if (data_size > sb_data_size) {
-		r = dm_pool_resize_data_dev(pool->pmd, data_size);
-		if (r) {
-			DMERR("failed to resize data device");
-			return r;
-		}
-
-		r = dm_pool_commit_metadata(pool->pmd);
-		if (r) {
-			DMERR("%s: dm_pool_commit_metadata() failed, error = %d",
-			      __func__, r);
-			return r;
-		}
-	}
-
-	return 0;
-}
-
-static void pool_resume(struct dm_target *ti)
-{
-	struct pool_c *pt = ti->private;
-	struct pool *pool = pt->pool;
-	unsigned long flags;
-
-	spin_lock_irqsave(&pool->lock, flags);
-	pool->low_water_triggered = 0;
-	pool->no_free_space = 0;
-	__requeue_bios(pool);
-	spin_unlock_irqrestore(&pool->lock, flags);
-
-	do_waker(&pool->waker.work);
-}
-
-static void pool_postsuspend(struct dm_target *ti)
-{
-	int r;
-	struct pool_c *pt = ti->private;
-	struct pool *pool = pt->pool;
-
-	cancel_delayed_work(&pool->waker);
-	flush_workqueue(pool->wq);
-
-	r = dm_pool_commit_metadata(pool->pmd);
-	if (r < 0) {
-		DMERR("%s: dm_pool_commit_metadata() failed, error = %d",
-		      __func__, r);
-		/* FIXME: invalidate device? error the next FUA or FLUSH bio ?*/
-	}
-}
-
-static int check_arg_count(unsigned argc, unsigned args_required)
-{
-	if (argc != args_required) {
-		DMWARN("Message received with %u arguments instead of %u.",
-		       argc, args_required);
-		return -EINVAL;
-	}
-
-	return 0;
-}
-
-static int read_dev_id(char *arg, dm_thin_id *dev_id, int warning)
-{
-	if (!kstrtoull(arg, 10, (unsigned long long *)dev_id) &&
-	    *dev_id <= MAX_DEV_ID)
-		return 0;
-
-	if (warning)
-		DMWARN("Message received with invalid device id: %s", arg);
-
-	return -EINVAL;
-}
-
-static int process_create_thin_mesg(unsigned argc, char **argv, struct pool *pool)
-{
-	dm_thin_id dev_id;
-	int r;
-
-	r = check_arg_count(argc, 2);
-	if (r)
-		return r;
-
-	r = read_dev_id(argv[1], &dev_id, 1);
-	if (r)
-		return r;
-
-	r = dm_pool_create_thin(pool->pmd, dev_id);
-	if (r) {
-		DMWARN("Creation of new thinly-provisioned device with id %s failed.",
-		       argv[1]);
-		return r;
-	}
-
-	return 0;
-}
-
-static int process_create_snap_mesg(unsigned argc, char **argv, struct pool *pool)
-{
-	dm_thin_id dev_id;
-	dm_thin_id origin_dev_id;
-	int r;
-
-	r = check_arg_count(argc, 3);
-	if (r)
-		return r;
-
-	r = read_dev_id(argv[1], &dev_id, 1);
-	if (r)
-		return r;
-
-	r = read_dev_id(argv[2], &origin_dev_id, 1);
-	if (r)
-		return r;
-
-	r = dm_pool_create_snap(pool->pmd, dev_id, origin_dev_id);
-	if (r) {
-		DMWARN("Creation of new snapshot %s of device %s failed.",
-		       argv[1], argv[2]);
-		return r;
-	}
-
-	return 0;
-}
-
-static int process_delete_mesg(unsigned argc, char **argv, struct pool *pool)
-{
-	dm_thin_id dev_id;
-	int r;
-
-	r = check_arg_count(argc, 2);
-	if (r)
-		return r;
-
-	r = read_dev_id(argv[1], &dev_id, 1);
-	if (r)
-		return r;
-
-	r = dm_pool_delete_thin_device(pool->pmd, dev_id);
-	if (r)
-		DMWARN("Deletion of thin device %s failed.", argv[1]);
-
-	return r;
-}
-
-static int process_set_transaction_id_mesg(unsigned argc, char **argv, struct pool *pool)
-{
-	dm_thin_id old_id, new_id;
-	int r;
-
-	r = check_arg_count(argc, 3);
-	if (r)
-		return r;
-
-	if (kstrtoull(argv[1], 10, (unsigned long long *)&old_id)) {
-		DMWARN("set_transaction_id message: Unrecognised id %s.", argv[1]);
-		return -EINVAL;
-	}
-
-	if (kstrtoull(argv[2], 10, (unsigned long long *)&new_id)) {
-		DMWARN("set_transaction_id message: Unrecognised new id %s.", argv[2]);
-		return -EINVAL;
-	}
-
-	r = dm_pool_set_metadata_transaction_id(pool->pmd, old_id, new_id);
-	if (r) {
-		DMWARN("Failed to change transaction id from %s to %s.",
-		       argv[1], argv[2]);
-		return r;
-	}
-
-	return 0;
-}
-
-/*
- * Messages supported:
- *   create_thin	<dev_id>
- *   create_snap	<dev_id> <origin_id>
- *   delete		<dev_id>
- *   trim		<dev_id> <new_size_in_sectors>
- *   set_transaction_id <current_trans_id> <new_trans_id>
- */
-static int pool_message(struct dm_target *ti, unsigned argc, char **argv)
-{
-	int r = -EINVAL;
-	struct pool_c *pt = ti->private;
-	struct pool *pool = pt->pool;
-
-	if (!strcasecmp(argv[0], "create_thin"))
-		r = process_create_thin_mesg(argc, argv, pool);
-
-	else if (!strcasecmp(argv[0], "create_snap"))
-		r = process_create_snap_mesg(argc, argv, pool);
-
-	else if (!strcasecmp(argv[0], "delete"))
-		r = process_delete_mesg(argc, argv, pool);
-
-	else if (!strcasecmp(argv[0], "set_transaction_id"))
-		r = process_set_transaction_id_mesg(argc, argv, pool);
-
-	else
-		DMWARN("Unrecognised thin pool target message received: %s", argv[0]);
-
-	if (!r) {
-		r = dm_pool_commit_metadata(pool->pmd);
-		if (r)
-			DMERR("%s message: dm_pool_commit_metadata() failed, error = %d",
-			      argv[0], r);
-	}
-
-	return r;
-}
-
-/*
- * Status line is:
- *    <transaction id> <used metadata sectors>/<total metadata sectors>
- *    <used data sectors>/<total data sectors> <held metadata root>
- */
-static int pool_status(struct dm_target *ti, status_type_t type,
-		       char *result, unsigned maxlen)
-{
-	int r, count;
-	unsigned sz = 0;
-	uint64_t transaction_id;
-	dm_block_t nr_free_blocks_data;
-	dm_block_t nr_free_blocks_metadata;
-	dm_block_t nr_blocks_data;
-	dm_block_t nr_blocks_metadata;
-	dm_block_t held_root;
-	char buf[BDEVNAME_SIZE];
-	char buf2[BDEVNAME_SIZE];
-	struct pool_c *pt = ti->private;
-	struct pool *pool = pt->pool;
-
-	switch (type) {
-	case STATUSTYPE_INFO:
-		r = dm_pool_get_metadata_transaction_id(pool->pmd,
-							&transaction_id);
-		if (r)
-			return r;
-
-		r = dm_pool_get_free_metadata_block_count(pool->pmd,
-							  &nr_free_blocks_metadata);
-		if (r)
-			return r;
-
-		r = dm_pool_get_metadata_dev_size(pool->pmd, &nr_blocks_metadata);
-		if (r)
-			return r;
-
-		r = dm_pool_get_free_block_count(pool->pmd,
-						 &nr_free_blocks_data);
-		if (r)
-			return r;
-
-		r = dm_pool_get_data_dev_size(pool->pmd, &nr_blocks_data);
-		if (r)
-			return r;
-
-		r = dm_pool_get_held_metadata_root(pool->pmd, &held_root);
-		if (r)
-			return r;
-
-		DMEMIT("%llu %llu/%llu %llu/%llu ",
-		       (unsigned long long)transaction_id,
-		       (unsigned long long)(nr_blocks_metadata - nr_free_blocks_metadata),
-		       (unsigned long long)nr_blocks_metadata,
-		       (unsigned long long)(nr_blocks_data - nr_free_blocks_data),
-		       (unsigned long long)nr_blocks_data);
-
-		if (held_root)
-			DMEMIT("%llu", held_root);
-		else
-			DMEMIT("-");
-
-		break;
-
-	case STATUSTYPE_TABLE:
-		DMEMIT("%s %s %lu %llu ",
-		       format_dev_t(buf, pt->metadata_dev->bdev->bd_dev),
-		       format_dev_t(buf2, pt->data_dev->bdev->bd_dev),
-		       (unsigned long)pool->sectors_per_block,
-		       (unsigned long long)pt->low_water_blocks);
-
-		count = !pool->pf.zero_new_blocks + !pool->pf.discard_enabled +
-			!pt->pf.discard_passdown;
-		DMEMIT("%u ", count);
-
-		if (!pool->pf.zero_new_blocks)
-			DMEMIT("skip_block_zeroing ");
-
-		if (!pool->pf.discard_enabled)
-			DMEMIT("ignore_discard ");
-
-		if (!pt->pf.discard_passdown)
-			DMEMIT("no_discard_passdown ");
-
-		break;
-	}
-
-	return 0;
-}
-
-static int pool_iterate_devices(struct dm_target *ti,
-				iterate_devices_callout_fn fn, void *data)
-{
-	struct pool_c *pt = ti->private;
-
-	return fn(ti, pt->data_dev, 0, ti->len, data);
-}
-
-static int pool_merge(struct dm_target *ti, struct bvec_merge_data *bvm,
-		      struct bio_vec *biovec, int max_size)
-{
-	struct pool_c *pt = ti->private;
-	struct request_queue *q = bdev_get_queue(pt->data_dev->bdev);
-
-	if (!q->merge_bvec_fn)
-		return max_size;
-
-	bvm->bi_bdev = pt->data_dev->bdev;
-
-	return min(max_size, q->merge_bvec_fn(q, bvm, biovec));
-}
-
-static void set_discard_limits(struct pool *pool, struct queue_limits *limits)
-{
-	/*
-	 * FIXME: these limits may be incompatible with the pool's data device
-	 */
-	limits->max_discard_sectors = pool->sectors_per_block;
-
-	/*
-	 * This is just a hint, and not enforced.  We have to cope with
-	 * bios that overlap 2 blocks.
-	 */
-	limits->discard_granularity = pool->sectors_per_block << SECTOR_SHIFT;
-	limits->discard_zeroes_data = pool->pf.zero_new_blocks;
-}
-
-static void pool_io_hints(struct dm_target *ti, struct queue_limits *limits)
-{
-	struct pool_c *pt = ti->private;
-	struct pool *pool = pt->pool;
-
-	blk_limits_io_min(limits, 0);
-	blk_limits_io_opt(limits, pool->sectors_per_block << SECTOR_SHIFT);
-	if (pool->pf.discard_enabled)
-		set_discard_limits(pool, limits);
-}
-
-static struct target_type pool_target = {
-	.name = "thin-pool",
-	.features = DM_TARGET_SINGLETON | DM_TARGET_ALWAYS_WRITEABLE |
-		    DM_TARGET_IMMUTABLE,
-	.version = {1, 1, 0},
-	.module = THIS_MODULE,
-	.ctr = pool_ctr,
-	.dtr = pool_dtr,
-	.map = pool_map,
-	.postsuspend = pool_postsuspend,
-	.preresume = pool_preresume,
-	.resume = pool_resume,
-	.message = pool_message,
-	.status = pool_status,
-	.merge = pool_merge,
-	.iterate_devices = pool_iterate_devices,
-	.io_hints = pool_io_hints,
-};
-
-/*----------------------------------------------------------------
- * Thin target methods
- *--------------------------------------------------------------*/
-static void thin_dtr(struct dm_target *ti)
-{
-	struct thin_c *tc = ti->private;
-
-	mutex_lock(&dm_thin_pool_table.mutex);
-
-	__pool_dec(tc->pool);
-	dm_pool_close_thin_device(tc->td);
-	dm_put_device(ti, tc->pool_dev);
-	if (tc->origin_dev)
-		dm_put_device(ti, tc->origin_dev);
-	kfree(tc);
-
-	mutex_unlock(&dm_thin_pool_table.mutex);
-}
-
-/*
- * Thin target parameters:
- *
- * <pool_dev> <dev_id> [origin_dev]
- *
- * pool_dev: the path to the pool (eg, /dev/mapper/my_pool)
- * dev_id: the internal device identifier
- * origin_dev: a device external to the pool that should act as the origin
- *
- * If the pool device has discards disabled, they get disabled for the thin
- * device as well.
- */
-static int thin_ctr(struct dm_target *ti, unsigned argc, char **argv)
-{
-	int r;
-	struct thin_c *tc;
-	struct dm_dev *pool_dev, *origin_dev;
-	struct mapped_device *pool_md;
-
-	mutex_lock(&dm_thin_pool_table.mutex);
-
-	if (argc != 2 && argc != 3) {
-		ti->error = "Invalid argument count";
-		r = -EINVAL;
-		goto out_unlock;
-	}
-
-	tc = ti->private = kzalloc(sizeof(*tc), GFP_KERNEL);
-	if (!tc) {
-		ti->error = "Out of memory";
-		r = -ENOMEM;
-		goto out_unlock;
-	}
-
-	if (argc == 3) {
-		r = dm_get_device(ti, argv[2], FMODE_READ, &origin_dev);
-		if (r) {
-			ti->error = "Error opening origin device";
-			goto bad_origin_dev;
-		}
-		tc->origin_dev = origin_dev;
-	}
-
-	r = dm_get_device(ti, argv[0], dm_table_get_mode(ti->table), &pool_dev);
-	if (r) {
-		ti->error = "Error opening pool device";
-		goto bad_pool_dev;
-	}
-	tc->pool_dev = pool_dev;
-
-	if (read_dev_id(argv[1], (unsigned long long *)&tc->dev_id, 0)) {
-		ti->error = "Invalid device id";
-		r = -EINVAL;
-		goto bad_common;
-	}
-
-	pool_md = dm_get_md(tc->pool_dev->bdev->bd_dev);
-	if (!pool_md) {
-		ti->error = "Couldn't get pool mapped device";
-		r = -EINVAL;
-		goto bad_common;
-	}
-
-	tc->pool = __pool_table_lookup(pool_md);
-	if (!tc->pool) {
-		ti->error = "Couldn't find pool object";
-		r = -EINVAL;
-		goto bad_pool_lookup;
-	}
-	__pool_inc(tc->pool);
-
-	r = dm_pool_open_thin_device(tc->pool->pmd, tc->dev_id, &tc->td);
-	if (r) {
-		ti->error = "Couldn't open thin internal device";
-		goto bad_thin_open;
-	}
-
-	ti->split_io = tc->pool->sectors_per_block;
-	ti->num_flush_requests = 1;
-
-	/* In case the pool supports discards, pass them on. */
-	if (tc->pool->pf.discard_enabled) {
-		ti->discards_supported = 1;
-		ti->num_discard_requests = 1;
-	}
-
-	dm_put(pool_md);
-
-	mutex_unlock(&dm_thin_pool_table.mutex);
-
-	return 0;
-
-bad_thin_open:
-	__pool_dec(tc->pool);
-bad_pool_lookup:
-	dm_put(pool_md);
-bad_common:
-	dm_put_device(ti, tc->pool_dev);
-bad_pool_dev:
-	if (tc->origin_dev)
-		dm_put_device(ti, tc->origin_dev);
-bad_origin_dev:
-	kfree(tc);
-out_unlock:
-	mutex_unlock(&dm_thin_pool_table.mutex);
-
-	return r;
-}
-
-static int thin_map(struct dm_target *ti, struct bio *bio,
-		    union map_info *map_context)
-{
-	bio->bi_sector = dm_target_offset(ti, bio->bi_sector);
-
-	return thin_bio_map(ti, bio, map_context);
-}
-
-static int thin_endio(struct dm_target *ti,
-		      struct bio *bio, int err,
-		      union map_info *map_context)
-{
-	unsigned long flags;
-	struct endio_hook *h = map_context->ptr;
-	struct list_head work;
-	struct new_mapping *m, *tmp;
-	struct pool *pool = h->tc->pool;
-
-	if (h->shared_read_entry) {
-		INIT_LIST_HEAD(&work);
-		ds_dec(h->shared_read_entry, &work);
-
-		spin_lock_irqsave(&pool->lock, flags);
-		list_for_each_entry_safe(m, tmp, &work, list) {
-			list_del(&m->list);
-			m->quiesced = 1;
-			__maybe_add_mapping(m);
-		}
-		spin_unlock_irqrestore(&pool->lock, flags);
-	}
-
-	if (h->all_io_entry) {
-		INIT_LIST_HEAD(&work);
-		ds_dec(h->all_io_entry, &work);
-		spin_lock_irqsave(&pool->lock, flags);
-		list_for_each_entry_safe(m, tmp, &work, list)
-			list_add(&m->list, &pool->prepared_discards);
-		spin_unlock_irqrestore(&pool->lock, flags);
-	}
-
-	mempool_free(h, pool->endio_hook_pool);
-
-	return 0;
-}
-
-static void thin_postsuspend(struct dm_target *ti)
-{
-	if (dm_noflush_suspending(ti))
-		requeue_io((struct thin_c *)ti->private);
-}
-
-/*
- * <nr mapped sectors> <highest mapped sector>
- */
-static int thin_status(struct dm_target *ti, status_type_t type,
-		       char *result, unsigned maxlen)
-{
-	int r;
-	ssize_t sz = 0;
-	dm_block_t mapped, highest;
-	char buf[BDEVNAME_SIZE];
-	struct thin_c *tc = ti->private;
-
-	if (!tc->td)
-		DMEMIT("-");
-	else {
-		switch (type) {
-		case STATUSTYPE_INFO:
-			r = dm_thin_get_mapped_count(tc->td, &mapped);
-			if (r)
-				return r;
-
-			r = dm_thin_get_highest_mapped_block(tc->td, &highest);
-			if (r < 0)
-				return r;
-
-			DMEMIT("%llu ", mapped * tc->pool->sectors_per_block);
-			if (r)
-				DMEMIT("%llu", ((highest + 1) *
-						tc->pool->sectors_per_block) - 1);
-			else
-				DMEMIT("-");
-			break;
-
-		case STATUSTYPE_TABLE:
-			DMEMIT("%s %lu",
-			       format_dev_t(buf, tc->pool_dev->bdev->bd_dev),
-			       (unsigned long) tc->dev_id);
-			if (tc->origin_dev)
-				DMEMIT(" %s", format_dev_t(buf, tc->origin_dev->bdev->bd_dev));
-			break;
-		}
-	}
-
-	return 0;
-}
-
-static int thin_iterate_devices(struct dm_target *ti,
-				iterate_devices_callout_fn fn, void *data)
-{
-	dm_block_t blocks;
-	struct thin_c *tc = ti->private;
-
-	/*
-	 * We can't call dm_pool_get_data_dev_size() since that blocks.  So
-	 * we follow a more convoluted path through to the pool's target.
-	 */
-	if (!tc->pool->ti)
-		return 0;	/* nothing is bound */
-
-	blocks = tc->pool->ti->len >> tc->pool->block_shift;
-	if (blocks)
-		return fn(ti, tc->pool_dev, 0, tc->pool->sectors_per_block * blocks, data);
-
-	return 0;
-}
-
-static void thin_io_hints(struct dm_target *ti, struct queue_limits *limits)
-{
-	struct thin_c *tc = ti->private;
-	struct pool *pool = tc->pool;
-
-	blk_limits_io_min(limits, 0);
-	blk_limits_io_opt(limits, pool->sectors_per_block << SECTOR_SHIFT);
-	set_discard_limits(pool, limits);
-}
-
-static struct target_type thin_target = {
-	.name = "thin",
-	.version = {1, 1, 0},
-	.module	= THIS_MODULE,
-	.ctr = thin_ctr,
-	.dtr = thin_dtr,
-	.map = thin_map,
-	.end_io = thin_endio,
-	.postsuspend = thin_postsuspend,
-	.status = thin_status,
-	.iterate_devices = thin_iterate_devices,
-	.io_hints = thin_io_hints,
-};
-
-/*----------------------------------------------------------------*/
-
-static int __init dm_thin_init(void)
-{
-	int r;
-
-	pool_table_init();
-
-	r = dm_register_target(&thin_target);
-	if (r)
-		return r;
-
-	r = dm_register_target(&pool_target);
-	if (r)
-		dm_unregister_target(&thin_target);
-
-	return r;
-}
-
-static void dm_thin_exit(void)
-{
-	dm_unregister_target(&thin_target);
-	dm_unregister_target(&pool_target);
-}
-
-module_init(dm_thin_init);
-module_exit(dm_thin_exit);
-
-MODULE_DESCRIPTION(DM_NAME " thin provisioning target");
-MODULE_AUTHOR("Joe Thornber <dm-devel@redhat.com>");
-MODULE_LICENSE("GPL");
diff --git a/ANDROID_3.4.5/drivers/md/dm-uevent.c b/ANDROID_3.4.5/drivers/md/dm-uevent.c
deleted file mode 100644
index 8efe033b..00000000
--- a/ANDROID_3.4.5/drivers/md/dm-uevent.c
+++ /dev/null
@@ -1,219 +0,0 @@
-/*
- * Device Mapper Uevent Support (dm-uevent)
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- *
- * Copyright IBM Corporation, 2007
- * 	Author: Mike Anderson <andmike@linux.vnet.ibm.com>
- */
-#include <linux/list.h>
-#include <linux/slab.h>
-#include <linux/kobject.h>
-#include <linux/dm-ioctl.h>
-#include <linux/export.h>
-
-#include "dm.h"
-#include "dm-uevent.h"
-
-#define DM_MSG_PREFIX "uevent"
-
-static const struct {
-	enum dm_uevent_type type;
-	enum kobject_action action;
-	char *name;
-} _dm_uevent_type_names[] = {
-	{DM_UEVENT_PATH_FAILED, KOBJ_CHANGE, "PATH_FAILED"},
-	{DM_UEVENT_PATH_REINSTATED, KOBJ_CHANGE, "PATH_REINSTATED"},
-};
-
-static struct kmem_cache *_dm_event_cache;
-
-struct dm_uevent {
-	struct mapped_device *md;
-	enum kobject_action action;
-	struct kobj_uevent_env ku_env;
-	struct list_head elist;
-	char name[DM_NAME_LEN];
-	char uuid[DM_UUID_LEN];
-};
-
-static void dm_uevent_free(struct dm_uevent *event)
-{
-	kmem_cache_free(_dm_event_cache, event);
-}
-
-static struct dm_uevent *dm_uevent_alloc(struct mapped_device *md)
-{
-	struct dm_uevent *event;
-
-	event = kmem_cache_zalloc(_dm_event_cache, GFP_ATOMIC);
-	if (!event)
-		return NULL;
-
-	INIT_LIST_HEAD(&event->elist);
-	event->md = md;
-
-	return event;
-}
-
-static struct dm_uevent *dm_build_path_uevent(struct mapped_device *md,
-					      struct dm_target *ti,
-					      enum kobject_action action,
-					      const char *dm_action,
-					      const char *path,
-					      unsigned nr_valid_paths)
-{
-	struct dm_uevent *event;
-
-	event = dm_uevent_alloc(md);
-	if (!event) {
-		DMERR("%s: dm_uevent_alloc() failed", __func__);
-		goto err_nomem;
-	}
-
-	event->action = action;
-
-	if (add_uevent_var(&event->ku_env, "DM_TARGET=%s", ti->type->name)) {
-		DMERR("%s: add_uevent_var() for DM_TARGET failed",
-		      __func__);
-		goto err_add;
-	}
-
-	if (add_uevent_var(&event->ku_env, "DM_ACTION=%s", dm_action)) {
-		DMERR("%s: add_uevent_var() for DM_ACTION failed",
-		      __func__);
-		goto err_add;
-	}
-
-	if (add_uevent_var(&event->ku_env, "DM_SEQNUM=%u",
-			   dm_next_uevent_seq(md))) {
-		DMERR("%s: add_uevent_var() for DM_SEQNUM failed",
-		      __func__);
-		goto err_add;
-	}
-
-	if (add_uevent_var(&event->ku_env, "DM_PATH=%s", path)) {
-		DMERR("%s: add_uevent_var() for DM_PATH failed", __func__);
-		goto err_add;
-	}
-
-	if (add_uevent_var(&event->ku_env, "DM_NR_VALID_PATHS=%d",
-			   nr_valid_paths)) {
-		DMERR("%s: add_uevent_var() for DM_NR_VALID_PATHS failed",
-		      __func__);
-		goto err_add;
-	}
-
-	return event;
-
-err_add:
-	dm_uevent_free(event);
-err_nomem:
-	return ERR_PTR(-ENOMEM);
-}
-
-/**
- * dm_send_uevents - send uevents for given list
- *
- * @events:	list of events to send
- * @kobj:	kobject generating event
- *
- */
-void dm_send_uevents(struct list_head *events, struct kobject *kobj)
-{
-	int r;
-	struct dm_uevent *event, *next;
-
-	list_for_each_entry_safe(event, next, events, elist) {
-		list_del_init(&event->elist);
-
-		/*
-		 * When a device is being removed this copy fails and we
-		 * discard these unsent events.
-		 */
-		if (dm_copy_name_and_uuid(event->md, event->name,
-					  event->uuid)) {
-			DMINFO("%s: skipping sending uevent for lost device",
-			       __func__);
-			goto uevent_free;
-		}
-
-		if (add_uevent_var(&event->ku_env, "DM_NAME=%s", event->name)) {
-			DMERR("%s: add_uevent_var() for DM_NAME failed",
-			      __func__);
-			goto uevent_free;
-		}
-
-		if (add_uevent_var(&event->ku_env, "DM_UUID=%s", event->uuid)) {
-			DMERR("%s: add_uevent_var() for DM_UUID failed",
-			      __func__);
-			goto uevent_free;
-		}
-
-		r = kobject_uevent_env(kobj, event->action, event->ku_env.envp);
-		if (r)
-			DMERR("%s: kobject_uevent_env failed", __func__);
-uevent_free:
-		dm_uevent_free(event);
-	}
-}
-EXPORT_SYMBOL_GPL(dm_send_uevents);
-
-/**
- * dm_path_uevent - called to create a new path event and queue it
- *
- * @event_type:	path event type enum
- * @ti:			pointer to a dm_target
- * @path:		string containing pathname
- * @nr_valid_paths:	number of valid paths remaining
- *
- */
-void dm_path_uevent(enum dm_uevent_type event_type, struct dm_target *ti,
-		   const char *path, unsigned nr_valid_paths)
-{
-	struct mapped_device *md = dm_table_get_md(ti->table);
-	struct dm_uevent *event;
-
-	if (event_type >= ARRAY_SIZE(_dm_uevent_type_names)) {
-		DMERR("%s: Invalid event_type %d", __func__, event_type);
-		return;
-	}
-
-	event = dm_build_path_uevent(md, ti,
-				     _dm_uevent_type_names[event_type].action,
-				     _dm_uevent_type_names[event_type].name,
-				     path, nr_valid_paths);
-	if (IS_ERR(event))
-		return;
-
-	dm_uevent_add(md, &event->elist);
-}
-EXPORT_SYMBOL_GPL(dm_path_uevent);
-
-int dm_uevent_init(void)
-{
-	_dm_event_cache = KMEM_CACHE(dm_uevent, 0);
-	if (!_dm_event_cache)
-		return -ENOMEM;
-
-	DMINFO("version 1.0.3");
-
-	return 0;
-}
-
-void dm_uevent_exit(void)
-{
-	kmem_cache_destroy(_dm_event_cache);
-}
diff --git a/ANDROID_3.4.5/drivers/md/dm-uevent.h b/ANDROID_3.4.5/drivers/md/dm-uevent.h
deleted file mode 100644
index 2eccc8bd..00000000
--- a/ANDROID_3.4.5/drivers/md/dm-uevent.h
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- * Device Mapper Uevent Support
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- *
- * Copyright IBM Corporation, 2007
- * 	Author: Mike Anderson <andmike@linux.vnet.ibm.com>
- */
-#ifndef DM_UEVENT_H
-#define DM_UEVENT_H
-
-enum dm_uevent_type {
-	DM_UEVENT_PATH_FAILED,
-	DM_UEVENT_PATH_REINSTATED,
-};
-
-#ifdef CONFIG_DM_UEVENT
-
-extern int dm_uevent_init(void);
-extern void dm_uevent_exit(void);
-extern void dm_send_uevents(struct list_head *events, struct kobject *kobj);
-extern void dm_path_uevent(enum dm_uevent_type event_type,
-			   struct dm_target *ti, const char *path,
-			   unsigned nr_valid_paths);
-
-#else
-
-static inline int dm_uevent_init(void)
-{
-	return 0;
-}
-static inline void dm_uevent_exit(void)
-{
-}
-static inline void dm_send_uevents(struct list_head *events,
-				   struct kobject *kobj)
-{
-}
-static inline void dm_path_uevent(enum dm_uevent_type event_type,
-				  struct dm_target *ti, const char *path,
-				  unsigned nr_valid_paths)
-{
-}
-
-#endif	/* CONFIG_DM_UEVENT */
-
-#endif	/* DM_UEVENT_H */
diff --git a/ANDROID_3.4.5/drivers/md/dm-verity.c b/ANDROID_3.4.5/drivers/md/dm-verity.c
deleted file mode 100644
index fa365d39..00000000
--- a/ANDROID_3.4.5/drivers/md/dm-verity.c
+++ /dev/null
@@ -1,913 +0,0 @@
-/*
- * Copyright (C) 2012 Red Hat, Inc.
- *
- * Author: Mikulas Patocka <mpatocka@redhat.com>
- *
- * Based on Chromium dm-verity driver (C) 2011 The Chromium OS Authors
- *
- * This file is released under the GPLv2.
- *
- * In the file "/sys/module/dm_verity/parameters/prefetch_cluster" you can set
- * default prefetch value. Data are read in "prefetch_cluster" chunks from the
- * hash device. Setting this greatly improves performance when data and hash
- * are on the same disk on different partitions on devices with poor random
- * access behavior.
- */
-
-#include "dm-bufio.h"
-
-#include <linux/module.h>
-#include <linux/device-mapper.h>
-#include <crypto/hash.h>
-
-#define DM_MSG_PREFIX			"verity"
-
-#define DM_VERITY_IO_VEC_INLINE		16
-#define DM_VERITY_MEMPOOL_SIZE		4
-#define DM_VERITY_DEFAULT_PREFETCH_SIZE	262144
-
-#define DM_VERITY_MAX_LEVELS		63
-
-static unsigned dm_verity_prefetch_cluster = DM_VERITY_DEFAULT_PREFETCH_SIZE;
-
-module_param_named(prefetch_cluster, dm_verity_prefetch_cluster, uint, S_IRUGO | S_IWUSR);
-
-struct dm_verity {
-	struct dm_dev *data_dev;
-	struct dm_dev *hash_dev;
-	struct dm_target *ti;
-	struct dm_bufio_client *bufio;
-	char *alg_name;
-	struct crypto_shash *tfm;
-	u8 *root_digest;	/* digest of the root block */
-	u8 *salt;		/* salt: its size is salt_size */
-	unsigned salt_size;
-	sector_t data_start;	/* data offset in 512-byte sectors */
-	sector_t hash_start;	/* hash start in blocks */
-	sector_t data_blocks;	/* the number of data blocks */
-	sector_t hash_blocks;	/* the number of hash blocks */
-	unsigned char data_dev_block_bits;	/* log2(data blocksize) */
-	unsigned char hash_dev_block_bits;	/* log2(hash blocksize) */
-	unsigned char hash_per_block_bits;	/* log2(hashes in hash block) */
-	unsigned char levels;	/* the number of tree levels */
-	unsigned char version;
-	unsigned digest_size;	/* digest size for the current hash algorithm */
-	unsigned shash_descsize;/* the size of temporary space for crypto */
-	int hash_failed;	/* set to 1 if hash of any block failed */
-
-	mempool_t *io_mempool;	/* mempool of struct dm_verity_io */
-	mempool_t *vec_mempool;	/* mempool of bio vector */
-
-	struct workqueue_struct *verify_wq;
-
-	/* starting blocks for each tree level. 0 is the lowest level. */
-	sector_t hash_level_block[DM_VERITY_MAX_LEVELS];
-};
-
-struct dm_verity_io {
-	struct dm_verity *v;
-	struct bio *bio;
-
-	/* original values of bio->bi_end_io and bio->bi_private */
-	bio_end_io_t *orig_bi_end_io;
-	void *orig_bi_private;
-
-	sector_t block;
-	unsigned n_blocks;
-
-	/* saved bio vector */
-	struct bio_vec *io_vec;
-	unsigned io_vec_size;
-
-	struct work_struct work;
-
-	/* A space for short vectors; longer vectors are allocated separately. */
-	struct bio_vec io_vec_inline[DM_VERITY_IO_VEC_INLINE];
-
-	/*
-	 * Three variably-size fields follow this struct:
-	 *
-	 * u8 hash_desc[v->shash_descsize];
-	 * u8 real_digest[v->digest_size];
-	 * u8 want_digest[v->digest_size];
-	 *
-	 * To access them use: io_hash_desc(), io_real_digest() and io_want_digest().
-	 */
-};
-
-static struct shash_desc *io_hash_desc(struct dm_verity *v, struct dm_verity_io *io)
-{
-	return (struct shash_desc *)(io + 1);
-}
-
-static u8 *io_real_digest(struct dm_verity *v, struct dm_verity_io *io)
-{
-	return (u8 *)(io + 1) + v->shash_descsize;
-}
-
-static u8 *io_want_digest(struct dm_verity *v, struct dm_verity_io *io)
-{
-	return (u8 *)(io + 1) + v->shash_descsize + v->digest_size;
-}
-
-/*
- * Auxiliary structure appended to each dm-bufio buffer. If the value
- * hash_verified is nonzero, hash of the block has been verified.
- *
- * The variable hash_verified is set to 0 when allocating the buffer, then
- * it can be changed to 1 and it is never reset to 0 again.
- *
- * There is no lock around this value, a race condition can at worst cause
- * that multiple processes verify the hash of the same buffer simultaneously
- * and write 1 to hash_verified simultaneously.
- * This condition is harmless, so we don't need locking.
- */
-struct buffer_aux {
-	int hash_verified;
-};
-
-/*
- * Initialize struct buffer_aux for a freshly created buffer.
- */
-static void dm_bufio_alloc_callback(struct dm_buffer *buf)
-{
-	struct buffer_aux *aux = dm_bufio_get_aux_data(buf);
-
-	aux->hash_verified = 0;
-}
-
-/*
- * Translate input sector number to the sector number on the target device.
- */
-static sector_t verity_map_sector(struct dm_verity *v, sector_t bi_sector)
-{
-	return v->data_start + dm_target_offset(v->ti, bi_sector);
-}
-
-/*
- * Return hash position of a specified block at a specified tree level
- * (0 is the lowest level).
- * The lowest "hash_per_block_bits"-bits of the result denote hash position
- * inside a hash block. The remaining bits denote location of the hash block.
- */
-static sector_t verity_position_at_level(struct dm_verity *v, sector_t block,
-					 int level)
-{
-	return block >> (level * v->hash_per_block_bits);
-}
-
-static void verity_hash_at_level(struct dm_verity *v, sector_t block, int level,
-				 sector_t *hash_block, unsigned *offset)
-{
-	sector_t position = verity_position_at_level(v, block, level);
-	unsigned idx;
-
-	*hash_block = v->hash_level_block[level] + (position >> v->hash_per_block_bits);
-
-	if (!offset)
-		return;
-
-	idx = position & ((1 << v->hash_per_block_bits) - 1);
-	if (!v->version)
-		*offset = idx * v->digest_size;
-	else
-		*offset = idx << (v->hash_dev_block_bits - v->hash_per_block_bits);
-}
-
-/*
- * Verify hash of a metadata block pertaining to the specified data block
- * ("block" argument) at a specified level ("level" argument).
- *
- * On successful return, io_want_digest(v, io) contains the hash value for
- * a lower tree level or for the data block (if we're at the lowest leve).
- *
- * If "skip_unverified" is true, unverified buffer is skipped and 1 is returned.
- * If "skip_unverified" is false, unverified buffer is hashed and verified
- * against current value of io_want_digest(v, io).
- */
-static int verity_verify_level(struct dm_verity_io *io, sector_t block,
-			       int level, bool skip_unverified)
-{
-	struct dm_verity *v = io->v;
-	struct dm_buffer *buf;
-	struct buffer_aux *aux;
-	u8 *data;
-	int r;
-	sector_t hash_block;
-	unsigned offset;
-
-	verity_hash_at_level(v, block, level, &hash_block, &offset);
-
-	data = dm_bufio_read(v->bufio, hash_block, &buf);
-	if (unlikely(IS_ERR(data)))
-		return PTR_ERR(data);
-
-	aux = dm_bufio_get_aux_data(buf);
-
-	if (!aux->hash_verified) {
-		struct shash_desc *desc;
-		u8 *result;
-
-		if (skip_unverified) {
-			r = 1;
-			goto release_ret_r;
-		}
-
-		desc = io_hash_desc(v, io);
-		desc->tfm = v->tfm;
-		desc->flags = CRYPTO_TFM_REQ_MAY_SLEEP;
-		r = crypto_shash_init(desc);
-		if (r < 0) {
-			DMERR("crypto_shash_init failed: %d", r);
-			goto release_ret_r;
-		}
-
-		if (likely(v->version >= 1)) {
-			r = crypto_shash_update(desc, v->salt, v->salt_size);
-			if (r < 0) {
-				DMERR("crypto_shash_update failed: %d", r);
-				goto release_ret_r;
-			}
-		}
-
-		r = crypto_shash_update(desc, data, 1 << v->hash_dev_block_bits);
-		if (r < 0) {
-			DMERR("crypto_shash_update failed: %d", r);
-			goto release_ret_r;
-		}
-
-		if (!v->version) {
-			r = crypto_shash_update(desc, v->salt, v->salt_size);
-			if (r < 0) {
-				DMERR("crypto_shash_update failed: %d", r);
-				goto release_ret_r;
-			}
-		}
-
-		result = io_real_digest(v, io);
-		r = crypto_shash_final(desc, result);
-		if (r < 0) {
-			DMERR("crypto_shash_final failed: %d", r);
-			goto release_ret_r;
-		}
-		if (unlikely(memcmp(result, io_want_digest(v, io), v->digest_size))) {
-			DMERR_LIMIT("metadata block %llu is corrupted",
-				(unsigned long long)hash_block);
-			v->hash_failed = 1;
-			r = -EIO;
-			goto release_ret_r;
-		} else
-			aux->hash_verified = 1;
-	}
-
-	data += offset;
-
-	memcpy(io_want_digest(v, io), data, v->digest_size);
-
-	dm_bufio_release(buf);
-	return 0;
-
-release_ret_r:
-	dm_bufio_release(buf);
-
-	return r;
-}
-
-/*
- * Verify one "dm_verity_io" structure.
- */
-static int verity_verify_io(struct dm_verity_io *io)
-{
-	struct dm_verity *v = io->v;
-	unsigned b;
-	int i;
-	unsigned vector = 0, offset = 0;
-
-	for (b = 0; b < io->n_blocks; b++) {
-		struct shash_desc *desc;
-		u8 *result;
-		int r;
-		unsigned todo;
-
-		if (likely(v->levels)) {
-			/*
-			 * First, we try to get the requested hash for
-			 * the current block. If the hash block itself is
-			 * verified, zero is returned. If it isn't, this
-			 * function returns 0 and we fall back to whole
-			 * chain verification.
-			 */
-			int r = verity_verify_level(io, io->block + b, 0, true);
-			if (likely(!r))
-				goto test_block_hash;
-			if (r < 0)
-				return r;
-		}
-
-		memcpy(io_want_digest(v, io), v->root_digest, v->digest_size);
-
-		for (i = v->levels - 1; i >= 0; i--) {
-			int r = verity_verify_level(io, io->block + b, i, false);
-			if (unlikely(r))
-				return r;
-		}
-
-test_block_hash:
-		desc = io_hash_desc(v, io);
-		desc->tfm = v->tfm;
-		desc->flags = CRYPTO_TFM_REQ_MAY_SLEEP;
-		r = crypto_shash_init(desc);
-		if (r < 0) {
-			DMERR("crypto_shash_init failed: %d", r);
-			return r;
-		}
-
-		if (likely(v->version >= 1)) {
-			r = crypto_shash_update(desc, v->salt, v->salt_size);
-			if (r < 0) {
-				DMERR("crypto_shash_update failed: %d", r);
-				return r;
-			}
-		}
-
-		todo = 1 << v->data_dev_block_bits;
-		do {
-			struct bio_vec *bv;
-			u8 *page;
-			unsigned len;
-
-			BUG_ON(vector >= io->io_vec_size);
-			bv = &io->io_vec[vector];
-			page = kmap_atomic(bv->bv_page);
-			len = bv->bv_len - offset;
-			if (likely(len >= todo))
-				len = todo;
-			r = crypto_shash_update(desc,
-					page + bv->bv_offset + offset, len);
-			kunmap_atomic(page);
-			if (r < 0) {
-				DMERR("crypto_shash_update failed: %d", r);
-				return r;
-			}
-			offset += len;
-			if (likely(offset == bv->bv_len)) {
-				offset = 0;
-				vector++;
-			}
-			todo -= len;
-		} while (todo);
-
-		if (!v->version) {
-			r = crypto_shash_update(desc, v->salt, v->salt_size);
-			if (r < 0) {
-				DMERR("crypto_shash_update failed: %d", r);
-				return r;
-			}
-		}
-
-		result = io_real_digest(v, io);
-		r = crypto_shash_final(desc, result);
-		if (r < 0) {
-			DMERR("crypto_shash_final failed: %d", r);
-			return r;
-		}
-		if (unlikely(memcmp(result, io_want_digest(v, io), v->digest_size))) {
-			DMERR_LIMIT("data block %llu is corrupted",
-				(unsigned long long)(io->block + b));
-			v->hash_failed = 1;
-			return -EIO;
-		}
-	}
-	BUG_ON(vector != io->io_vec_size);
-	BUG_ON(offset);
-
-	return 0;
-}
-
-/*
- * End one "io" structure with a given error.
- */
-static void verity_finish_io(struct dm_verity_io *io, int error)
-{
-	struct bio *bio = io->bio;
-	struct dm_verity *v = io->v;
-
-	bio->bi_end_io = io->orig_bi_end_io;
-	bio->bi_private = io->orig_bi_private;
-
-	if (io->io_vec != io->io_vec_inline)
-		mempool_free(io->io_vec, v->vec_mempool);
-
-	mempool_free(io, v->io_mempool);
-
-	bio_endio(bio, error);
-}
-
-static void verity_work(struct work_struct *w)
-{
-	struct dm_verity_io *io = container_of(w, struct dm_verity_io, work);
-
-	verity_finish_io(io, verity_verify_io(io));
-}
-
-static void verity_end_io(struct bio *bio, int error)
-{
-	struct dm_verity_io *io = bio->bi_private;
-
-	if (error) {
-		verity_finish_io(io, error);
-		return;
-	}
-
-	INIT_WORK(&io->work, verity_work);
-	queue_work(io->v->verify_wq, &io->work);
-}
-
-/*
- * Prefetch buffers for the specified io.
- * The root buffer is not prefetched, it is assumed that it will be cached
- * all the time.
- */
-static void verity_prefetch_io(struct dm_verity *v, struct dm_verity_io *io)
-{
-	int i;
-
-	for (i = v->levels - 2; i >= 0; i--) {
-		sector_t hash_block_start;
-		sector_t hash_block_end;
-		verity_hash_at_level(v, io->block, i, &hash_block_start, NULL);
-		verity_hash_at_level(v, io->block + io->n_blocks - 1, i, &hash_block_end, NULL);
-		if (!i) {
-			unsigned cluster = *(volatile unsigned *)&dm_verity_prefetch_cluster;
-
-			cluster >>= v->data_dev_block_bits;
-			if (unlikely(!cluster))
-				goto no_prefetch_cluster;
-
-			if (unlikely(cluster & (cluster - 1)))
-				cluster = 1 << (fls(cluster) - 1);
-
-			hash_block_start &= ~(sector_t)(cluster - 1);
-			hash_block_end |= cluster - 1;
-			if (unlikely(hash_block_end >= v->hash_blocks))
-				hash_block_end = v->hash_blocks - 1;
-		}
-no_prefetch_cluster:
-		dm_bufio_prefetch(v->bufio, hash_block_start,
-				  hash_block_end - hash_block_start + 1);
-	}
-}
-
-/*
- * Bio map function. It allocates dm_verity_io structure and bio vector and
- * fills them. Then it issues prefetches and the I/O.
- */
-static int verity_map(struct dm_target *ti, struct bio *bio,
-		      union map_info *map_context)
-{
-	struct dm_verity *v = ti->private;
-	struct dm_verity_io *io;
-
-	bio->bi_bdev = v->data_dev->bdev;
-	bio->bi_sector = verity_map_sector(v, bio->bi_sector);
-
-	if (((unsigned)bio->bi_sector | bio_sectors(bio)) &
-	    ((1 << (v->data_dev_block_bits - SECTOR_SHIFT)) - 1)) {
-		DMERR_LIMIT("unaligned io");
-		return -EIO;
-	}
-
-	if ((bio->bi_sector + bio_sectors(bio)) >>
-	    (v->data_dev_block_bits - SECTOR_SHIFT) > v->data_blocks) {
-		DMERR_LIMIT("io out of range");
-		return -EIO;
-	}
-
-	if (bio_data_dir(bio) == WRITE)
-		return -EIO;
-
-	io = mempool_alloc(v->io_mempool, GFP_NOIO);
-	io->v = v;
-	io->bio = bio;
-	io->orig_bi_end_io = bio->bi_end_io;
-	io->orig_bi_private = bio->bi_private;
-	io->block = bio->bi_sector >> (v->data_dev_block_bits - SECTOR_SHIFT);
-	io->n_blocks = bio->bi_size >> v->data_dev_block_bits;
-
-	bio->bi_end_io = verity_end_io;
-	bio->bi_private = io;
-	io->io_vec_size = bio->bi_vcnt - bio->bi_idx;
-	if (io->io_vec_size < DM_VERITY_IO_VEC_INLINE)
-		io->io_vec = io->io_vec_inline;
-	else
-		io->io_vec = mempool_alloc(v->vec_mempool, GFP_NOIO);
-	memcpy(io->io_vec, bio_iovec(bio),
-	       io->io_vec_size * sizeof(struct bio_vec));
-
-	verity_prefetch_io(v, io);
-
-	generic_make_request(bio);
-
-	return DM_MAPIO_SUBMITTED;
-}
-
-/*
- * Status: V (valid) or C (corruption found)
- */
-static int verity_status(struct dm_target *ti, status_type_t type,
-			 char *result, unsigned maxlen)
-{
-	struct dm_verity *v = ti->private;
-	unsigned sz = 0;
-	unsigned x;
-
-	switch (type) {
-	case STATUSTYPE_INFO:
-		DMEMIT("%c", v->hash_failed ? 'C' : 'V');
-		break;
-	case STATUSTYPE_TABLE:
-		DMEMIT("%u %s %s %u %u %llu %llu %s ",
-			v->version,
-			v->data_dev->name,
-			v->hash_dev->name,
-			1 << v->data_dev_block_bits,
-			1 << v->hash_dev_block_bits,
-			(unsigned long long)v->data_blocks,
-			(unsigned long long)v->hash_start,
-			v->alg_name
-			);
-		for (x = 0; x < v->digest_size; x++)
-			DMEMIT("%02x", v->root_digest[x]);
-		DMEMIT(" ");
-		if (!v->salt_size)
-			DMEMIT("-");
-		else
-			for (x = 0; x < v->salt_size; x++)
-				DMEMIT("%02x", v->salt[x]);
-		break;
-	}
-
-	return 0;
-}
-
-static int verity_ioctl(struct dm_target *ti, unsigned cmd,
-			unsigned long arg)
-{
-	struct dm_verity *v = ti->private;
-	int r = 0;
-
-	if (v->data_start ||
-	    ti->len != i_size_read(v->data_dev->bdev->bd_inode) >> SECTOR_SHIFT)
-		r = scsi_verify_blk_ioctl(NULL, cmd);
-
-	return r ? : __blkdev_driver_ioctl(v->data_dev->bdev, v->data_dev->mode,
-				     cmd, arg);
-}
-
-static int verity_merge(struct dm_target *ti, struct bvec_merge_data *bvm,
-			struct bio_vec *biovec, int max_size)
-{
-	struct dm_verity *v = ti->private;
-	struct request_queue *q = bdev_get_queue(v->data_dev->bdev);
-
-	if (!q->merge_bvec_fn)
-		return max_size;
-
-	bvm->bi_bdev = v->data_dev->bdev;
-	bvm->bi_sector = verity_map_sector(v, bvm->bi_sector);
-
-	return min(max_size, q->merge_bvec_fn(q, bvm, biovec));
-}
-
-static int verity_iterate_devices(struct dm_target *ti,
-				  iterate_devices_callout_fn fn, void *data)
-{
-	struct dm_verity *v = ti->private;
-
-	return fn(ti, v->data_dev, v->data_start, ti->len, data);
-}
-
-static void verity_io_hints(struct dm_target *ti, struct queue_limits *limits)
-{
-	struct dm_verity *v = ti->private;
-
-	if (limits->logical_block_size < 1 << v->data_dev_block_bits)
-		limits->logical_block_size = 1 << v->data_dev_block_bits;
-
-	if (limits->physical_block_size < 1 << v->data_dev_block_bits)
-		limits->physical_block_size = 1 << v->data_dev_block_bits;
-
-	blk_limits_io_min(limits, limits->logical_block_size);
-}
-
-static void verity_dtr(struct dm_target *ti)
-{
-	struct dm_verity *v = ti->private;
-
-	if (v->verify_wq)
-		destroy_workqueue(v->verify_wq);
-
-	if (v->vec_mempool)
-		mempool_destroy(v->vec_mempool);
-
-	if (v->io_mempool)
-		mempool_destroy(v->io_mempool);
-
-	if (v->bufio)
-		dm_bufio_client_destroy(v->bufio);
-
-	kfree(v->salt);
-	kfree(v->root_digest);
-
-	if (v->tfm)
-		crypto_free_shash(v->tfm);
-
-	kfree(v->alg_name);
-
-	if (v->hash_dev)
-		dm_put_device(ti, v->hash_dev);
-
-	if (v->data_dev)
-		dm_put_device(ti, v->data_dev);
-
-	kfree(v);
-}
-
-/*
- * Target parameters:
- *	<version>	The current format is version 1.
- *			Vsn 0 is compatible with original Chromium OS releases.
- *	<data device>
- *	<hash device>
- *	<data block size>
- *	<hash block size>
- *	<the number of data blocks>
- *	<hash start block>
- *	<algorithm>
- *	<digest>
- *	<salt>		Hex string or "-" if no salt.
- */
-static int verity_ctr(struct dm_target *ti, unsigned argc, char **argv)
-{
-	struct dm_verity *v;
-	unsigned num;
-	unsigned long long num_ll;
-	int r;
-	int i;
-	sector_t hash_position;
-	char dummy;
-
-	v = kzalloc(sizeof(struct dm_verity), GFP_KERNEL);
-	if (!v) {
-		ti->error = "Cannot allocate verity structure";
-		return -ENOMEM;
-	}
-	ti->private = v;
-	v->ti = ti;
-
-	if ((dm_table_get_mode(ti->table) & ~FMODE_READ)) {
-		ti->error = "Device must be readonly";
-		r = -EINVAL;
-		goto bad;
-	}
-
-	if (argc != 10) {
-		ti->error = "Invalid argument count: exactly 10 arguments required";
-		r = -EINVAL;
-		goto bad;
-	}
-
-	if (sscanf(argv[0], "%d%c", &num, &dummy) != 1 ||
-	    num < 0 || num > 1) {
-		ti->error = "Invalid version";
-		r = -EINVAL;
-		goto bad;
-	}
-	v->version = num;
-
-	r = dm_get_device(ti, argv[1], FMODE_READ, &v->data_dev);
-	if (r) {
-		ti->error = "Data device lookup failed";
-		goto bad;
-	}
-
-	r = dm_get_device(ti, argv[2], FMODE_READ, &v->hash_dev);
-	if (r) {
-		ti->error = "Data device lookup failed";
-		goto bad;
-	}
-
-	if (sscanf(argv[3], "%u%c", &num, &dummy) != 1 ||
-	    !num || (num & (num - 1)) ||
-	    num < bdev_logical_block_size(v->data_dev->bdev) ||
-	    num > PAGE_SIZE) {
-		ti->error = "Invalid data device block size";
-		r = -EINVAL;
-		goto bad;
-	}
-	v->data_dev_block_bits = ffs(num) - 1;
-
-	if (sscanf(argv[4], "%u%c", &num, &dummy) != 1 ||
-	    !num || (num & (num - 1)) ||
-	    num < bdev_logical_block_size(v->hash_dev->bdev) ||
-	    num > INT_MAX) {
-		ti->error = "Invalid hash device block size";
-		r = -EINVAL;
-		goto bad;
-	}
-	v->hash_dev_block_bits = ffs(num) - 1;
-
-	if (sscanf(argv[5], "%llu%c", &num_ll, &dummy) != 1 ||
-	    num_ll << (v->data_dev_block_bits - SECTOR_SHIFT) !=
-	    (sector_t)num_ll << (v->data_dev_block_bits - SECTOR_SHIFT)) {
-		ti->error = "Invalid data blocks";
-		r = -EINVAL;
-		goto bad;
-	}
-	v->data_blocks = num_ll;
-
-	if (ti->len > (v->data_blocks << (v->data_dev_block_bits - SECTOR_SHIFT))) {
-		ti->error = "Data device is too small";
-		r = -EINVAL;
-		goto bad;
-	}
-
-	if (sscanf(argv[6], "%llu%c", &num_ll, &dummy) != 1 ||
-	    num_ll << (v->hash_dev_block_bits - SECTOR_SHIFT) !=
-	    (sector_t)num_ll << (v->hash_dev_block_bits - SECTOR_SHIFT)) {
-		ti->error = "Invalid hash start";
-		r = -EINVAL;
-		goto bad;
-	}
-	v->hash_start = num_ll;
-
-	v->alg_name = kstrdup(argv[7], GFP_KERNEL);
-	if (!v->alg_name) {
-		ti->error = "Cannot allocate algorithm name";
-		r = -ENOMEM;
-		goto bad;
-	}
-
-	v->tfm = crypto_alloc_shash(v->alg_name, 0, 0);
-	if (IS_ERR(v->tfm)) {
-		ti->error = "Cannot initialize hash function";
-		r = PTR_ERR(v->tfm);
-		v->tfm = NULL;
-		goto bad;
-	}
-	v->digest_size = crypto_shash_digestsize(v->tfm);
-	if ((1 << v->hash_dev_block_bits) < v->digest_size * 2) {
-		ti->error = "Digest size too big";
-		r = -EINVAL;
-		goto bad;
-	}
-	v->shash_descsize =
-		sizeof(struct shash_desc) + crypto_shash_descsize(v->tfm);
-
-	v->root_digest = kmalloc(v->digest_size, GFP_KERNEL);
-	if (!v->root_digest) {
-		ti->error = "Cannot allocate root digest";
-		r = -ENOMEM;
-		goto bad;
-	}
-	if (strlen(argv[8]) != v->digest_size * 2 ||
-	    hex2bin(v->root_digest, argv[8], v->digest_size)) {
-		ti->error = "Invalid root digest";
-		r = -EINVAL;
-		goto bad;
-	}
-
-	if (strcmp(argv[9], "-")) {
-		v->salt_size = strlen(argv[9]) / 2;
-		v->salt = kmalloc(v->salt_size, GFP_KERNEL);
-		if (!v->salt) {
-			ti->error = "Cannot allocate salt";
-			r = -ENOMEM;
-			goto bad;
-		}
-		if (strlen(argv[9]) != v->salt_size * 2 ||
-		    hex2bin(v->salt, argv[9], v->salt_size)) {
-			ti->error = "Invalid salt";
-			r = -EINVAL;
-			goto bad;
-		}
-	}
-
-	v->hash_per_block_bits =
-		fls((1 << v->hash_dev_block_bits) / v->digest_size) - 1;
-
-	v->levels = 0;
-	if (v->data_blocks)
-		while (v->hash_per_block_bits * v->levels < 64 &&
-		       (unsigned long long)(v->data_blocks - 1) >>
-		       (v->hash_per_block_bits * v->levels))
-			v->levels++;
-
-	if (v->levels > DM_VERITY_MAX_LEVELS) {
-		ti->error = "Too many tree levels";
-		r = -E2BIG;
-		goto bad;
-	}
-
-	hash_position = v->hash_start;
-	for (i = v->levels - 1; i >= 0; i--) {
-		sector_t s;
-		v->hash_level_block[i] = hash_position;
-		s = verity_position_at_level(v, v->data_blocks, i);
-		s = (s >> v->hash_per_block_bits) +
-		    !!(s & ((1 << v->hash_per_block_bits) - 1));
-		if (hash_position + s < hash_position) {
-			ti->error = "Hash device offset overflow";
-			r = -E2BIG;
-			goto bad;
-		}
-		hash_position += s;
-	}
-	v->hash_blocks = hash_position;
-
-	v->bufio = dm_bufio_client_create(v->hash_dev->bdev,
-		1 << v->hash_dev_block_bits, 1, sizeof(struct buffer_aux),
-		dm_bufio_alloc_callback, NULL);
-	if (IS_ERR(v->bufio)) {
-		ti->error = "Cannot initialize dm-bufio";
-		r = PTR_ERR(v->bufio);
-		v->bufio = NULL;
-		goto bad;
-	}
-
-	if (dm_bufio_get_device_size(v->bufio) < v->hash_blocks) {
-		ti->error = "Hash device is too small";
-		r = -E2BIG;
-		goto bad;
-	}
-
-	v->io_mempool = mempool_create_kmalloc_pool(DM_VERITY_MEMPOOL_SIZE,
-	  sizeof(struct dm_verity_io) + v->shash_descsize + v->digest_size * 2);
-	if (!v->io_mempool) {
-		ti->error = "Cannot allocate io mempool";
-		r = -ENOMEM;
-		goto bad;
-	}
-
-	v->vec_mempool = mempool_create_kmalloc_pool(DM_VERITY_MEMPOOL_SIZE,
-					BIO_MAX_PAGES * sizeof(struct bio_vec));
-	if (!v->vec_mempool) {
-		ti->error = "Cannot allocate vector mempool";
-		r = -ENOMEM;
-		goto bad;
-	}
-
-	/* WQ_UNBOUND greatly improves performance when running on ramdisk */
-	v->verify_wq = alloc_workqueue("kverityd", WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM | WQ_UNBOUND, num_online_cpus());
-	if (!v->verify_wq) {
-		ti->error = "Cannot allocate workqueue";
-		r = -ENOMEM;
-		goto bad;
-	}
-
-	return 0;
-
-bad:
-	verity_dtr(ti);
-
-	return r;
-}
-
-static struct target_type verity_target = {
-	.name		= "verity",
-	.version	= {1, 0, 0},
-	.module		= THIS_MODULE,
-	.ctr		= verity_ctr,
-	.dtr		= verity_dtr,
-	.map		= verity_map,
-	.status		= verity_status,
-	.ioctl		= verity_ioctl,
-	.merge		= verity_merge,
-	.iterate_devices = verity_iterate_devices,
-	.io_hints	= verity_io_hints,
-};
-
-static int __init dm_verity_init(void)
-{
-	int r;
-
-	r = dm_register_target(&verity_target);
-	if (r < 0)
-		DMERR("register failed %d", r);
-
-	return r;
-}
-
-static void __exit dm_verity_exit(void)
-{
-	dm_unregister_target(&verity_target);
-}
-
-module_init(dm_verity_init);
-module_exit(dm_verity_exit);
-
-MODULE_AUTHOR("Mikulas Patocka <mpatocka@redhat.com>");
-MODULE_AUTHOR("Mandeep Baines <msb@chromium.org>");
-MODULE_AUTHOR("Will Drewry <wad@chromium.org>");
-MODULE_DESCRIPTION(DM_NAME " target for transparent disk integrity checking");
-MODULE_LICENSE("GPL");
diff --git a/ANDROID_3.4.5/drivers/md/dm-zero.c b/ANDROID_3.4.5/drivers/md/dm-zero.c
deleted file mode 100644
index cc2b3cb8..00000000
--- a/ANDROID_3.4.5/drivers/md/dm-zero.c
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- * Copyright (C) 2003 Christophe Saout <christophe@saout.de>
- *
- * This file is released under the GPL.
- */
-
-#include <linux/device-mapper.h>
-
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/bio.h>
-
-#define DM_MSG_PREFIX "zero"
-
-/*
- * Construct a dummy mapping that only returns zeros
- */
-static int zero_ctr(struct dm_target *ti, unsigned int argc, char **argv)
-{
-	if (argc != 0) {
-		ti->error = "No arguments required";
-		return -EINVAL;
-	}
-
-	/*
-	 * Silently drop discards, avoiding -EOPNOTSUPP.
-	 */
-	ti->num_discard_requests = 1;
-
-	return 0;
-}
-
-/*
- * Return zeros only on reads
- */
-static int zero_map(struct dm_target *ti, struct bio *bio,
-		      union map_info *map_context)
-{
-	switch(bio_rw(bio)) {
-	case READ:
-		zero_fill_bio(bio);
-		break;
-	case READA:
-		/* readahead of null bytes only wastes buffer cache */
-		return -EIO;
-	case WRITE:
-		/* writes get silently dropped */
-		break;
-	}
-
-	bio_endio(bio, 0);
-
-	/* accepted bio, don't make new request */
-	return DM_MAPIO_SUBMITTED;
-}
-
-static struct target_type zero_target = {
-	.name   = "zero",
-	.version = {1, 0, 0},
-	.module = THIS_MODULE,
-	.ctr    = zero_ctr,
-	.map    = zero_map,
-};
-
-static int __init dm_zero_init(void)
-{
-	int r = dm_register_target(&zero_target);
-
-	if (r < 0)
-		DMERR("register failed %d", r);
-
-	return r;
-}
-
-static void __exit dm_zero_exit(void)
-{
-	dm_unregister_target(&zero_target);
-}
-
-module_init(dm_zero_init)
-module_exit(dm_zero_exit)
-
-MODULE_AUTHOR("Christophe Saout <christophe@saout.de>");
-MODULE_DESCRIPTION(DM_NAME " dummy target returning zeros");
-MODULE_LICENSE("GPL");
diff --git a/ANDROID_3.4.5/drivers/md/dm.c b/ANDROID_3.4.5/drivers/md/dm.c
deleted file mode 100644
index e24143cc..00000000
--- a/ANDROID_3.4.5/drivers/md/dm.c
+++ /dev/null
@@ -1,2780 +0,0 @@
-/*
- * Copyright (C) 2001, 2002 Sistina Software (UK) Limited.
- * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved.
- *
- * This file is released under the GPL.
- */
-
-#include "dm.h"
-#include "dm-uevent.h"
-
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/mutex.h>
-#include <linux/moduleparam.h>
-#include <linux/blkpg.h>
-#include <linux/bio.h>
-#include <linux/mempool.h>
-#include <linux/slab.h>
-#include <linux/idr.h>
-#include <linux/hdreg.h>
-#include <linux/delay.h>
-
-#include <trace/events/block.h>
-
-#define DM_MSG_PREFIX "core"
-
-#ifdef CONFIG_PRINTK
-/*
- * ratelimit state to be used in DMXXX_LIMIT().
- */
-DEFINE_RATELIMIT_STATE(dm_ratelimit_state,
-		       DEFAULT_RATELIMIT_INTERVAL,
-		       DEFAULT_RATELIMIT_BURST);
-EXPORT_SYMBOL(dm_ratelimit_state);
-#endif
-
-/*
- * Cookies are numeric values sent with CHANGE and REMOVE
- * uevents while resuming, removing or renaming the device.
- */
-#define DM_COOKIE_ENV_VAR_NAME "DM_COOKIE"
-#define DM_COOKIE_LENGTH 24
-
-static const char *_name = DM_NAME;
-
-static unsigned int major = 0;
-static unsigned int _major = 0;
-
-static DEFINE_IDR(_minor_idr);
-
-static DEFINE_SPINLOCK(_minor_lock);
-/*
- * For bio-based dm.
- * One of these is allocated per bio.
- */
-struct dm_io {
-	struct mapped_device *md;
-	int error;
-	atomic_t io_count;
-	struct bio *bio;
-	unsigned long start_time;
-	spinlock_t endio_lock;
-};
-
-/*
- * For bio-based dm.
- * One of these is allocated per target within a bio.  Hopefully
- * this will be simplified out one day.
- */
-struct dm_target_io {
-	struct dm_io *io;
-	struct dm_target *ti;
-	union map_info info;
-};
-
-/*
- * For request-based dm.
- * One of these is allocated per request.
- */
-struct dm_rq_target_io {
-	struct mapped_device *md;
-	struct dm_target *ti;
-	struct request *orig, clone;
-	int error;
-	union map_info info;
-};
-
-/*
- * For request-based dm.
- * One of these is allocated per bio.
- */
-struct dm_rq_clone_bio_info {
-	struct bio *orig;
-	struct dm_rq_target_io *tio;
-};
-
-union map_info *dm_get_mapinfo(struct bio *bio)
-{
-	if (bio && bio->bi_private)
-		return &((struct dm_target_io *)bio->bi_private)->info;
-	return NULL;
-}
-
-union map_info *dm_get_rq_mapinfo(struct request *rq)
-{
-	if (rq && rq->end_io_data)
-		return &((struct dm_rq_target_io *)rq->end_io_data)->info;
-	return NULL;
-}
-EXPORT_SYMBOL_GPL(dm_get_rq_mapinfo);
-
-#define MINOR_ALLOCED ((void *)-1)
-
-/*
- * Bits for the md->flags field.
- */
-#define DMF_BLOCK_IO_FOR_SUSPEND 0
-#define DMF_SUSPENDED 1
-#define DMF_FROZEN 2
-#define DMF_FREEING 3
-#define DMF_DELETING 4
-#define DMF_NOFLUSH_SUSPENDING 5
-#define DMF_MERGE_IS_OPTIONAL 6
-
-/*
- * Work processed by per-device workqueue.
- */
-struct mapped_device {
-	struct rw_semaphore io_lock;
-	struct mutex suspend_lock;
-	rwlock_t map_lock;
-	atomic_t holders;
-	atomic_t open_count;
-
-	unsigned long flags;
-
-	struct request_queue *queue;
-	unsigned type;
-	/* Protect queue and type against concurrent access. */
-	struct mutex type_lock;
-
-	struct target_type *immutable_target_type;
-
-	struct gendisk *disk;
-	char name[16];
-
-	void *interface_ptr;
-
-	/*
-	 * A list of ios that arrived while we were suspended.
-	 */
-	atomic_t pending[2];
-	wait_queue_head_t wait;
-	struct work_struct work;
-	struct bio_list deferred;
-	spinlock_t deferred_lock;
-
-	/*
-	 * Processing queue (flush)
-	 */
-	struct workqueue_struct *wq;
-
-	/*
-	 * The current mapping.
-	 */
-	struct dm_table *map;
-
-	/*
-	 * io objects are allocated from here.
-	 */
-	mempool_t *io_pool;
-	mempool_t *tio_pool;
-
-	struct bio_set *bs;
-
-	/*
-	 * Event handling.
-	 */
-	atomic_t event_nr;
-	wait_queue_head_t eventq;
-	atomic_t uevent_seq;
-	struct list_head uevent_list;
-	spinlock_t uevent_lock; /* Protect access to uevent_list */
-
-	/*
-	 * freeze/thaw support require holding onto a super block
-	 */
-	struct super_block *frozen_sb;
-	struct block_device *bdev;
-
-	/* forced geometry settings */
-	struct hd_geometry geometry;
-
-	/* sysfs handle */
-	struct kobject kobj;
-
-	/* zero-length flush that will be cloned and submitted to targets */
-	struct bio flush_bio;
-};
-
-/*
- * For mempools pre-allocation at the table loading time.
- */
-struct dm_md_mempools {
-	mempool_t *io_pool;
-	mempool_t *tio_pool;
-	struct bio_set *bs;
-};
-
-#define MIN_IOS 256
-static struct kmem_cache *_io_cache;
-static struct kmem_cache *_tio_cache;
-static struct kmem_cache *_rq_tio_cache;
-static struct kmem_cache *_rq_bio_info_cache;
-
-static int __init local_init(void)
-{
-	int r = -ENOMEM;
-
-	/* allocate a slab for the dm_ios */
-	_io_cache = KMEM_CACHE(dm_io, 0);
-	if (!_io_cache)
-		return r;
-
-	/* allocate a slab for the target ios */
-	_tio_cache = KMEM_CACHE(dm_target_io, 0);
-	if (!_tio_cache)
-		goto out_free_io_cache;
-
-	_rq_tio_cache = KMEM_CACHE(dm_rq_target_io, 0);
-	if (!_rq_tio_cache)
-		goto out_free_tio_cache;
-
-	_rq_bio_info_cache = KMEM_CACHE(dm_rq_clone_bio_info, 0);
-	if (!_rq_bio_info_cache)
-		goto out_free_rq_tio_cache;
-
-	r = dm_uevent_init();
-	if (r)
-		goto out_free_rq_bio_info_cache;
-
-	_major = major;
-	r = register_blkdev(_major, _name);
-	if (r < 0)
-		goto out_uevent_exit;
-
-	if (!_major)
-		_major = r;
-
-	return 0;
-
-out_uevent_exit:
-	dm_uevent_exit();
-out_free_rq_bio_info_cache:
-	kmem_cache_destroy(_rq_bio_info_cache);
-out_free_rq_tio_cache:
-	kmem_cache_destroy(_rq_tio_cache);
-out_free_tio_cache:
-	kmem_cache_destroy(_tio_cache);
-out_free_io_cache:
-	kmem_cache_destroy(_io_cache);
-
-	return r;
-}
-
-static void local_exit(void)
-{
-	kmem_cache_destroy(_rq_bio_info_cache);
-	kmem_cache_destroy(_rq_tio_cache);
-	kmem_cache_destroy(_tio_cache);
-	kmem_cache_destroy(_io_cache);
-	unregister_blkdev(_major, _name);
-	dm_uevent_exit();
-
-	_major = 0;
-
-	DMINFO("cleaned up");
-}
-
-static int (*_inits[])(void) __initdata = {
-	local_init,
-	dm_target_init,
-	dm_linear_init,
-	dm_stripe_init,
-	dm_io_init,
-	dm_kcopyd_init,
-	dm_interface_init,
-};
-
-static void (*_exits[])(void) = {
-	local_exit,
-	dm_target_exit,
-	dm_linear_exit,
-	dm_stripe_exit,
-	dm_io_exit,
-	dm_kcopyd_exit,
-	dm_interface_exit,
-};
-
-static int __init dm_init(void)
-{
-	const int count = ARRAY_SIZE(_inits);
-
-	int r, i;
-
-	for (i = 0; i < count; i++) {
-		r = _inits[i]();
-		if (r)
-			goto bad;
-	}
-
-	return 0;
-
-      bad:
-	while (i--)
-		_exits[i]();
-
-	return r;
-}
-
-static void __exit dm_exit(void)
-{
-	int i = ARRAY_SIZE(_exits);
-
-	while (i--)
-		_exits[i]();
-
-	/*
-	 * Should be empty by this point.
-	 */
-	idr_remove_all(&_minor_idr);
-	idr_destroy(&_minor_idr);
-}
-
-/*
- * Block device functions
- */
-int dm_deleting_md(struct mapped_device *md)
-{
-	return test_bit(DMF_DELETING, &md->flags);
-}
-
-static int dm_blk_open(struct block_device *bdev, fmode_t mode)
-{
-	struct mapped_device *md;
-
-	spin_lock(&_minor_lock);
-
-	md = bdev->bd_disk->private_data;
-	if (!md)
-		goto out;
-
-	if (test_bit(DMF_FREEING, &md->flags) ||
-	    dm_deleting_md(md)) {
-		md = NULL;
-		goto out;
-	}
-
-	dm_get(md);
-	atomic_inc(&md->open_count);
-
-out:
-	spin_unlock(&_minor_lock);
-
-	return md ? 0 : -ENXIO;
-}
-
-static int dm_blk_close(struct gendisk *disk, fmode_t mode)
-{
-	struct mapped_device *md = disk->private_data;
-
-	spin_lock(&_minor_lock);
-
-	atomic_dec(&md->open_count);
-	dm_put(md);
-
-	spin_unlock(&_minor_lock);
-
-	return 0;
-}
-
-int dm_open_count(struct mapped_device *md)
-{
-	return atomic_read(&md->open_count);
-}
-
-/*
- * Guarantees nothing is using the device before it's deleted.
- */
-int dm_lock_for_deletion(struct mapped_device *md)
-{
-	int r = 0;
-
-	spin_lock(&_minor_lock);
-
-	if (dm_open_count(md))
-		r = -EBUSY;
-	else
-		set_bit(DMF_DELETING, &md->flags);
-
-	spin_unlock(&_minor_lock);
-
-	return r;
-}
-
-static int dm_blk_getgeo(struct block_device *bdev, struct hd_geometry *geo)
-{
-	struct mapped_device *md = bdev->bd_disk->private_data;
-
-	return dm_get_geometry(md, geo);
-}
-
-static int dm_blk_ioctl(struct block_device *bdev, fmode_t mode,
-			unsigned int cmd, unsigned long arg)
-{
-	struct mapped_device *md = bdev->bd_disk->private_data;
-	struct dm_table *map = dm_get_live_table(md);
-	struct dm_target *tgt;
-	int r = -ENOTTY;
-
-	if (!map || !dm_table_get_size(map))
-		goto out;
-
-	/* We only support devices that have a single target */
-	if (dm_table_get_num_targets(map) != 1)
-		goto out;
-
-	tgt = dm_table_get_target(map, 0);
-
-	if (dm_suspended_md(md)) {
-		r = -EAGAIN;
-		goto out;
-	}
-
-	if (tgt->type->ioctl)
-		r = tgt->type->ioctl(tgt, cmd, arg);
-
-out:
-	dm_table_put(map);
-
-	return r;
-}
-
-static struct dm_io *alloc_io(struct mapped_device *md)
-{
-	return mempool_alloc(md->io_pool, GFP_NOIO);
-}
-
-static void free_io(struct mapped_device *md, struct dm_io *io)
-{
-	mempool_free(io, md->io_pool);
-}
-
-static void free_tio(struct mapped_device *md, struct dm_target_io *tio)
-{
-	mempool_free(tio, md->tio_pool);
-}
-
-static struct dm_rq_target_io *alloc_rq_tio(struct mapped_device *md,
-					    gfp_t gfp_mask)
-{
-	return mempool_alloc(md->tio_pool, gfp_mask);
-}
-
-static void free_rq_tio(struct dm_rq_target_io *tio)
-{
-	mempool_free(tio, tio->md->tio_pool);
-}
-
-static struct dm_rq_clone_bio_info *alloc_bio_info(struct mapped_device *md)
-{
-	return mempool_alloc(md->io_pool, GFP_ATOMIC);
-}
-
-static void free_bio_info(struct dm_rq_clone_bio_info *info)
-{
-	mempool_free(info, info->tio->md->io_pool);
-}
-
-static int md_in_flight(struct mapped_device *md)
-{
-	return atomic_read(&md->pending[READ]) +
-	       atomic_read(&md->pending[WRITE]);
-}
-
-static void start_io_acct(struct dm_io *io)
-{
-	struct mapped_device *md = io->md;
-	int cpu;
-	int rw = bio_data_dir(io->bio);
-
-	io->start_time = jiffies;
-
-	cpu = part_stat_lock();
-	part_round_stats(cpu, &dm_disk(md)->part0);
-	part_stat_unlock();
-	atomic_set(&dm_disk(md)->part0.in_flight[rw],
-		atomic_inc_return(&md->pending[rw]));
-}
-
-static void end_io_acct(struct dm_io *io)
-{
-	struct mapped_device *md = io->md;
-	struct bio *bio = io->bio;
-	unsigned long duration = jiffies - io->start_time;
-	int pending, cpu;
-	int rw = bio_data_dir(bio);
-
-	cpu = part_stat_lock();
-	part_round_stats(cpu, &dm_disk(md)->part0);
-	part_stat_add(cpu, &dm_disk(md)->part0, ticks[rw], duration);
-	part_stat_unlock();
-
-	/*
-	 * After this is decremented the bio must not be touched if it is
-	 * a flush.
-	 */
-	pending = atomic_dec_return(&md->pending[rw]);
-	atomic_set(&dm_disk(md)->part0.in_flight[rw], pending);
-	pending += atomic_read(&md->pending[rw^0x1]);
-
-	/* nudge anyone waiting on suspend queue */
-	if (!pending)
-		wake_up(&md->wait);
-}
-
-/*
- * Add the bio to the list of deferred io.
- */
-static void queue_io(struct mapped_device *md, struct bio *bio)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&md->deferred_lock, flags);
-	bio_list_add(&md->deferred, bio);
-	spin_unlock_irqrestore(&md->deferred_lock, flags);
-	queue_work(md->wq, &md->work);
-}
-
-/*
- * Everyone (including functions in this file), should use this
- * function to access the md->map field, and make sure they call
- * dm_table_put() when finished.
- */
-struct dm_table *dm_get_live_table(struct mapped_device *md)
-{
-	struct dm_table *t;
-	unsigned long flags;
-
-	read_lock_irqsave(&md->map_lock, flags);
-	t = md->map;
-	if (t)
-		dm_table_get(t);
-	read_unlock_irqrestore(&md->map_lock, flags);
-
-	return t;
-}
-
-/*
- * Get the geometry associated with a dm device
- */
-int dm_get_geometry(struct mapped_device *md, struct hd_geometry *geo)
-{
-	*geo = md->geometry;
-
-	return 0;
-}
-
-/*
- * Set the geometry of a device.
- */
-int dm_set_geometry(struct mapped_device *md, struct hd_geometry *geo)
-{
-	sector_t sz = (sector_t)geo->cylinders * geo->heads * geo->sectors;
-
-	if (geo->start > sz) {
-		DMWARN("Start sector is beyond the geometry limits.");
-		return -EINVAL;
-	}
-
-	md->geometry = *geo;
-
-	return 0;
-}
-
-/*-----------------------------------------------------------------
- * CRUD START:
- *   A more elegant soln is in the works that uses the queue
- *   merge fn, unfortunately there are a couple of changes to
- *   the block layer that I want to make for this.  So in the
- *   interests of getting something for people to use I give
- *   you this clearly demarcated crap.
- *---------------------------------------------------------------*/
-
-static int __noflush_suspending(struct mapped_device *md)
-{
-	return test_bit(DMF_NOFLUSH_SUSPENDING, &md->flags);
-}
-
-/*
- * Decrements the number of outstanding ios that a bio has been
- * cloned into, completing the original io if necc.
- */
-static void dec_pending(struct dm_io *io, int error)
-{
-	unsigned long flags;
-	int io_error;
-	struct bio *bio;
-	struct mapped_device *md = io->md;
-
-	/* Push-back supersedes any I/O errors */
-	if (unlikely(error)) {
-		spin_lock_irqsave(&io->endio_lock, flags);
-		if (!(io->error > 0 && __noflush_suspending(md)))
-			io->error = error;
-		spin_unlock_irqrestore(&io->endio_lock, flags);
-	}
-
-	if (atomic_dec_and_test(&io->io_count)) {
-		if (io->error == DM_ENDIO_REQUEUE) {
-			/*
-			 * Target requested pushing back the I/O.
-			 */
-			spin_lock_irqsave(&md->deferred_lock, flags);
-			if (__noflush_suspending(md))
-				bio_list_add_head(&md->deferred, io->bio);
-			else
-				/* noflush suspend was interrupted. */
-				io->error = -EIO;
-			spin_unlock_irqrestore(&md->deferred_lock, flags);
-		}
-
-		io_error = io->error;
-		bio = io->bio;
-		end_io_acct(io);
-		free_io(md, io);
-
-		if (io_error == DM_ENDIO_REQUEUE)
-			return;
-
-		if ((bio->bi_rw & REQ_FLUSH) && bio->bi_size) {
-			/*
-			 * Preflush done for flush with data, reissue
-			 * without REQ_FLUSH.
-			 */
-			bio->bi_rw &= ~REQ_FLUSH;
-			queue_io(md, bio);
-		} else {
-			/* done with normal IO or empty flush */
-			trace_block_bio_complete(md->queue, bio, io_error);
-			bio_endio(bio, io_error);
-		}
-	}
-}
-
-static void clone_endio(struct bio *bio, int error)
-{
-	int r = 0;
-	struct dm_target_io *tio = bio->bi_private;
-	struct dm_io *io = tio->io;
-	struct mapped_device *md = tio->io->md;
-	dm_endio_fn endio = tio->ti->type->end_io;
-
-	if (!bio_flagged(bio, BIO_UPTODATE) && !error)
-		error = -EIO;
-
-	if (endio) {
-		r = endio(tio->ti, bio, error, &tio->info);
-		if (r < 0 || r == DM_ENDIO_REQUEUE)
-			/*
-			 * error and requeue request are handled
-			 * in dec_pending().
-			 */
-			error = r;
-		else if (r == DM_ENDIO_INCOMPLETE)
-			/* The target will handle the io */
-			return;
-		else if (r) {
-			DMWARN("unimplemented target endio return value: %d", r);
-			BUG();
-		}
-	}
-
-	/*
-	 * Store md for cleanup instead of tio which is about to get freed.
-	 */
-	bio->bi_private = md->bs;
-
-	free_tio(md, tio);
-	bio_put(bio);
-	dec_pending(io, error);
-}
-
-/*
- * Partial completion handling for request-based dm
- */
-static void end_clone_bio(struct bio *clone, int error)
-{
-	struct dm_rq_clone_bio_info *info = clone->bi_private;
-	struct dm_rq_target_io *tio = info->tio;
-	struct bio *bio = info->orig;
-	unsigned int nr_bytes = info->orig->bi_size;
-
-	bio_put(clone);
-
-	if (tio->error)
-		/*
-		 * An error has already been detected on the request.
-		 * Once error occurred, just let clone->end_io() handle
-		 * the remainder.
-		 */
-		return;
-	else if (error) {
-		/*
-		 * Don't notice the error to the upper layer yet.
-		 * The error handling decision is made by the target driver,
-		 * when the request is completed.
-		 */
-		tio->error = error;
-		return;
-	}
-
-	/*
-	 * I/O for the bio successfully completed.
-	 * Notice the data completion to the upper layer.
-	 */
-
-	/*
-	 * bios are processed from the head of the list.
-	 * So the completing bio should always be rq->bio.
-	 * If it's not, something wrong is happening.
-	 */
-	if (tio->orig->bio != bio)
-		DMERR("bio completion is going in the middle of the request");
-
-	/*
-	 * Update the original request.
-	 * Do not use blk_end_request() here, because it may complete
-	 * the original request before the clone, and break the ordering.
-	 */
-	blk_update_request(tio->orig, 0, nr_bytes);
-}
-
-/*
- * Don't touch any member of the md after calling this function because
- * the md may be freed in dm_put() at the end of this function.
- * Or do dm_get() before calling this function and dm_put() later.
- */
-static void rq_completed(struct mapped_device *md, int rw, int run_queue)
-{
-	atomic_dec(&md->pending[rw]);
-
-	/* nudge anyone waiting on suspend queue */
-	if (!md_in_flight(md))
-		wake_up(&md->wait);
-
-	if (run_queue)
-		blk_run_queue(md->queue);
-
-	/*
-	 * dm_put() must be at the end of this function. See the comment above
-	 */
-	dm_put(md);
-}
-
-static void free_rq_clone(struct request *clone)
-{
-	struct dm_rq_target_io *tio = clone->end_io_data;
-
-	blk_rq_unprep_clone(clone);
-	free_rq_tio(tio);
-}
-
-/*
- * Complete the clone and the original request.
- * Must be called without queue lock.
- */
-static void dm_end_request(struct request *clone, int error)
-{
-	int rw = rq_data_dir(clone);
-	struct dm_rq_target_io *tio = clone->end_io_data;
-	struct mapped_device *md = tio->md;
-	struct request *rq = tio->orig;
-
-	if (rq->cmd_type == REQ_TYPE_BLOCK_PC) {
-		rq->errors = clone->errors;
-		rq->resid_len = clone->resid_len;
-
-		if (rq->sense)
-			/*
-			 * We are using the sense buffer of the original
-			 * request.
-			 * So setting the length of the sense data is enough.
-			 */
-			rq->sense_len = clone->sense_len;
-	}
-
-	free_rq_clone(clone);
-	blk_end_request_all(rq, error);
-	rq_completed(md, rw, true);
-}
-
-static void dm_unprep_request(struct request *rq)
-{
-	struct request *clone = rq->special;
-
-	rq->special = NULL;
-	rq->cmd_flags &= ~REQ_DONTPREP;
-
-	free_rq_clone(clone);
-}
-
-/*
- * Requeue the original request of a clone.
- */
-void dm_requeue_unmapped_request(struct request *clone)
-{
-	int rw = rq_data_dir(clone);
-	struct dm_rq_target_io *tio = clone->end_io_data;
-	struct mapped_device *md = tio->md;
-	struct request *rq = tio->orig;
-	struct request_queue *q = rq->q;
-	unsigned long flags;
-
-	dm_unprep_request(rq);
-
-	spin_lock_irqsave(q->queue_lock, flags);
-	blk_requeue_request(q, rq);
-	spin_unlock_irqrestore(q->queue_lock, flags);
-
-	rq_completed(md, rw, 0);
-}
-EXPORT_SYMBOL_GPL(dm_requeue_unmapped_request);
-
-static void __stop_queue(struct request_queue *q)
-{
-	blk_stop_queue(q);
-}
-
-static void stop_queue(struct request_queue *q)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(q->queue_lock, flags);
-	__stop_queue(q);
-	spin_unlock_irqrestore(q->queue_lock, flags);
-}
-
-static void __start_queue(struct request_queue *q)
-{
-	if (blk_queue_stopped(q))
-		blk_start_queue(q);
-}
-
-static void start_queue(struct request_queue *q)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(q->queue_lock, flags);
-	__start_queue(q);
-	spin_unlock_irqrestore(q->queue_lock, flags);
-}
-
-static void dm_done(struct request *clone, int error, bool mapped)
-{
-	int r = error;
-	struct dm_rq_target_io *tio = clone->end_io_data;
-	dm_request_endio_fn rq_end_io = tio->ti->type->rq_end_io;
-
-	if (mapped && rq_end_io)
-		r = rq_end_io(tio->ti, clone, error, &tio->info);
-
-	if (r <= 0)
-		/* The target wants to complete the I/O */
-		dm_end_request(clone, r);
-	else if (r == DM_ENDIO_INCOMPLETE)
-		/* The target will handle the I/O */
-		return;
-	else if (r == DM_ENDIO_REQUEUE)
-		/* The target wants to requeue the I/O */
-		dm_requeue_unmapped_request(clone);
-	else {
-		DMWARN("unimplemented target endio return value: %d", r);
-		BUG();
-	}
-}
-
-/*
- * Request completion handler for request-based dm
- */
-static void dm_softirq_done(struct request *rq)
-{
-	bool mapped = true;
-	struct request *clone = rq->completion_data;
-	struct dm_rq_target_io *tio = clone->end_io_data;
-
-	if (rq->cmd_flags & REQ_FAILED)
-		mapped = false;
-
-	dm_done(clone, tio->error, mapped);
-}
-
-/*
- * Complete the clone and the original request with the error status
- * through softirq context.
- */
-static void dm_complete_request(struct request *clone, int error)
-{
-	struct dm_rq_target_io *tio = clone->end_io_data;
-	struct request *rq = tio->orig;
-
-	tio->error = error;
-	rq->completion_data = clone;
-	blk_complete_request(rq);
-}
-
-/*
- * Complete the not-mapped clone and the original request with the error status
- * through softirq context.
- * Target's rq_end_io() function isn't called.
- * This may be used when the target's map_rq() function fails.
- */
-void dm_kill_unmapped_request(struct request *clone, int error)
-{
-	struct dm_rq_target_io *tio = clone->end_io_data;
-	struct request *rq = tio->orig;
-
-	rq->cmd_flags |= REQ_FAILED;
-	dm_complete_request(clone, error);
-}
-EXPORT_SYMBOL_GPL(dm_kill_unmapped_request);
-
-/*
- * Called with the queue lock held
- */
-static void end_clone_request(struct request *clone, int error)
-{
-	/*
-	 * For just cleaning up the information of the queue in which
-	 * the clone was dispatched.
-	 * The clone is *NOT* freed actually here because it is alloced from
-	 * dm own mempool and REQ_ALLOCED isn't set in clone->cmd_flags.
-	 */
-	__blk_put_request(clone->q, clone);
-
-	/*
-	 * Actual request completion is done in a softirq context which doesn't
-	 * hold the queue lock.  Otherwise, deadlock could occur because:
-	 *     - another request may be submitted by the upper level driver
-	 *       of the stacking during the completion
-	 *     - the submission which requires queue lock may be done
-	 *       against this queue
-	 */
-	dm_complete_request(clone, error);
-}
-
-/*
- * Return maximum size of I/O possible at the supplied sector up to the current
- * target boundary.
- */
-static sector_t max_io_len_target_boundary(sector_t sector, struct dm_target *ti)
-{
-	sector_t target_offset = dm_target_offset(ti, sector);
-
-	return ti->len - target_offset;
-}
-
-static sector_t max_io_len(sector_t sector, struct dm_target *ti)
-{
-	sector_t len = max_io_len_target_boundary(sector, ti);
-
-	/*
-	 * Does the target need to split even further ?
-	 */
-	if (ti->split_io) {
-		sector_t boundary;
-		sector_t offset = dm_target_offset(ti, sector);
-		boundary = ((offset + ti->split_io) & ~(ti->split_io - 1))
-			   - offset;
-		if (len > boundary)
-			len = boundary;
-	}
-
-	return len;
-}
-
-static void __map_bio(struct dm_target *ti, struct bio *clone,
-		      struct dm_target_io *tio)
-{
-	int r;
-	sector_t sector;
-	struct mapped_device *md;
-
-	clone->bi_end_io = clone_endio;
-	clone->bi_private = tio;
-
-	/*
-	 * Map the clone.  If r == 0 we don't need to do
-	 * anything, the target has assumed ownership of
-	 * this io.
-	 */
-	atomic_inc(&tio->io->io_count);
-	sector = clone->bi_sector;
-	r = ti->type->map(ti, clone, &tio->info);
-	if (r == DM_MAPIO_REMAPPED) {
-		/* the bio has been remapped so dispatch it */
-
-		trace_block_bio_remap(bdev_get_queue(clone->bi_bdev), clone,
-				      tio->io->bio->bi_bdev->bd_dev, sector);
-
-		generic_make_request(clone);
-	} else if (r < 0 || r == DM_MAPIO_REQUEUE) {
-		/* error the io and bail out, or requeue it if needed */
-		md = tio->io->md;
-		dec_pending(tio->io, r);
-		/*
-		 * Store bio_set for cleanup.
-		 */
-		clone->bi_end_io = NULL;
-		clone->bi_private = md->bs;
-		bio_put(clone);
-		free_tio(md, tio);
-	} else if (r) {
-		DMWARN("unimplemented target map return value: %d", r);
-		BUG();
-	}
-}
-
-struct clone_info {
-	struct mapped_device *md;
-	struct dm_table *map;
-	struct bio *bio;
-	struct dm_io *io;
-	sector_t sector;
-	sector_t sector_count;
-	unsigned short idx;
-};
-
-static void dm_bio_destructor(struct bio *bio)
-{
-	struct bio_set *bs = bio->bi_private;
-
-	bio_free(bio, bs);
-}
-
-/*
- * Creates a little bio that just does part of a bvec.
- */
-static struct bio *split_bvec(struct bio *bio, sector_t sector,
-			      unsigned short idx, unsigned int offset,
-			      unsigned int len, struct bio_set *bs)
-{
-	struct bio *clone;
-	struct bio_vec *bv = bio->bi_io_vec + idx;
-
-	clone = bio_alloc_bioset(GFP_NOIO, 1, bs);
-	clone->bi_destructor = dm_bio_destructor;
-	*clone->bi_io_vec = *bv;
-
-	clone->bi_sector = sector;
-	clone->bi_bdev = bio->bi_bdev;
-	clone->bi_rw = bio->bi_rw;
-	clone->bi_vcnt = 1;
-	clone->bi_size = to_bytes(len);
-	clone->bi_io_vec->bv_offset = offset;
-	clone->bi_io_vec->bv_len = clone->bi_size;
-	clone->bi_flags |= 1 << BIO_CLONED;
-
-	if (bio_integrity(bio)) {
-		bio_integrity_clone(clone, bio, GFP_NOIO, bs);
-		bio_integrity_trim(clone,
-				   bio_sector_offset(bio, idx, offset), len);
-	}
-
-	return clone;
-}
-
-/*
- * Creates a bio that consists of range of complete bvecs.
- */
-static struct bio *clone_bio(struct bio *bio, sector_t sector,
-			     unsigned short idx, unsigned short bv_count,
-			     unsigned int len, struct bio_set *bs)
-{
-	struct bio *clone;
-
-	clone = bio_alloc_bioset(GFP_NOIO, bio->bi_max_vecs, bs);
-	__bio_clone(clone, bio);
-	clone->bi_destructor = dm_bio_destructor;
-	clone->bi_sector = sector;
-	clone->bi_idx = idx;
-	clone->bi_vcnt = idx + bv_count;
-	clone->bi_size = to_bytes(len);
-	clone->bi_flags &= ~(1 << BIO_SEG_VALID);
-
-	if (bio_integrity(bio)) {
-		bio_integrity_clone(clone, bio, GFP_NOIO, bs);
-
-		if (idx != bio->bi_idx || clone->bi_size < bio->bi_size)
-			bio_integrity_trim(clone,
-					   bio_sector_offset(bio, idx, 0), len);
-	}
-
-	return clone;
-}
-
-static struct dm_target_io *alloc_tio(struct clone_info *ci,
-				      struct dm_target *ti)
-{
-	struct dm_target_io *tio = mempool_alloc(ci->md->tio_pool, GFP_NOIO);
-
-	tio->io = ci->io;
-	tio->ti = ti;
-	memset(&tio->info, 0, sizeof(tio->info));
-
-	return tio;
-}
-
-static void __issue_target_request(struct clone_info *ci, struct dm_target *ti,
-				   unsigned request_nr, sector_t len)
-{
-	struct dm_target_io *tio = alloc_tio(ci, ti);
-	struct bio *clone;
-
-	tio->info.target_request_nr = request_nr;
-
-	/*
-	 * Discard requests require the bio's inline iovecs be initialized.
-	 * ci->bio->bi_max_vecs is BIO_INLINE_VECS anyway, for both flush
-	 * and discard, so no need for concern about wasted bvec allocations.
-	 */
-	clone = bio_alloc_bioset(GFP_NOIO, ci->bio->bi_max_vecs, ci->md->bs);
-	__bio_clone(clone, ci->bio);
-	clone->bi_destructor = dm_bio_destructor;
-	if (len) {
-		clone->bi_sector = ci->sector;
-		clone->bi_size = to_bytes(len);
-	}
-
-	__map_bio(ti, clone, tio);
-}
-
-static void __issue_target_requests(struct clone_info *ci, struct dm_target *ti,
-				    unsigned num_requests, sector_t len)
-{
-	unsigned request_nr;
-
-	for (request_nr = 0; request_nr < num_requests; request_nr++)
-		__issue_target_request(ci, ti, request_nr, len);
-}
-
-static int __clone_and_map_empty_flush(struct clone_info *ci)
-{
-	unsigned target_nr = 0;
-	struct dm_target *ti;
-
-	BUG_ON(bio_has_data(ci->bio));
-	while ((ti = dm_table_get_target(ci->map, target_nr++)))
-		__issue_target_requests(ci, ti, ti->num_flush_requests, 0);
-
-	return 0;
-}
-
-/*
- * Perform all io with a single clone.
- */
-static void __clone_and_map_simple(struct clone_info *ci, struct dm_target *ti)
-{
-	struct bio *clone, *bio = ci->bio;
-	struct dm_target_io *tio;
-
-	tio = alloc_tio(ci, ti);
-	clone = clone_bio(bio, ci->sector, ci->idx,
-			  bio->bi_vcnt - ci->idx, ci->sector_count,
-			  ci->md->bs);
-	__map_bio(ti, clone, tio);
-	ci->sector_count = 0;
-}
-
-static int __clone_and_map_discard(struct clone_info *ci)
-{
-	struct dm_target *ti;
-	sector_t len;
-
-	do {
-		ti = dm_table_find_target(ci->map, ci->sector);
-		if (!dm_target_is_valid(ti))
-			return -EIO;
-
-		/*
-		 * Even though the device advertised discard support,
-		 * that does not mean every target supports it, and
-		 * reconfiguration might also have changed that since the
-		 * check was performed.
-		 */
-		if (!ti->num_discard_requests)
-			return -EOPNOTSUPP;
-
-		len = min(ci->sector_count, max_io_len_target_boundary(ci->sector, ti));
-
-		__issue_target_requests(ci, ti, ti->num_discard_requests, len);
-
-		ci->sector += len;
-	} while (ci->sector_count -= len);
-
-	return 0;
-}
-
-static int __clone_and_map(struct clone_info *ci)
-{
-	struct bio *clone, *bio = ci->bio;
-	struct dm_target *ti;
-	sector_t len = 0, max;
-	struct dm_target_io *tio;
-
-	if (unlikely(bio->bi_rw & REQ_DISCARD))
-		return __clone_and_map_discard(ci);
-
-	ti = dm_table_find_target(ci->map, ci->sector);
-	if (!dm_target_is_valid(ti))
-		return -EIO;
-
-	max = max_io_len(ci->sector, ti);
-
-	if (ci->sector_count <= max) {
-		/*
-		 * Optimise for the simple case where we can do all of
-		 * the remaining io with a single clone.
-		 */
-		__clone_and_map_simple(ci, ti);
-
-	} else if (to_sector(bio->bi_io_vec[ci->idx].bv_len) <= max) {
-		/*
-		 * There are some bvecs that don't span targets.
-		 * Do as many of these as possible.
-		 */
-		int i;
-		sector_t remaining = max;
-		sector_t bv_len;
-
-		for (i = ci->idx; remaining && (i < bio->bi_vcnt); i++) {
-			bv_len = to_sector(bio->bi_io_vec[i].bv_len);
-
-			if (bv_len > remaining)
-				break;
-
-			remaining -= bv_len;
-			len += bv_len;
-		}
-
-		tio = alloc_tio(ci, ti);
-		clone = clone_bio(bio, ci->sector, ci->idx, i - ci->idx, len,
-				  ci->md->bs);
-		__map_bio(ti, clone, tio);
-
-		ci->sector += len;
-		ci->sector_count -= len;
-		ci->idx = i;
-
-	} else {
-		/*
-		 * Handle a bvec that must be split between two or more targets.
-		 */
-		struct bio_vec *bv = bio->bi_io_vec + ci->idx;
-		sector_t remaining = to_sector(bv->bv_len);
-		unsigned int offset = 0;
-
-		do {
-			if (offset) {
-				ti = dm_table_find_target(ci->map, ci->sector);
-				if (!dm_target_is_valid(ti))
-					return -EIO;
-
-				max = max_io_len(ci->sector, ti);
-			}
-
-			len = min(remaining, max);
-
-			tio = alloc_tio(ci, ti);
-			clone = split_bvec(bio, ci->sector, ci->idx,
-					   bv->bv_offset + offset, len,
-					   ci->md->bs);
-
-			__map_bio(ti, clone, tio);
-
-			ci->sector += len;
-			ci->sector_count -= len;
-			offset += to_bytes(len);
-		} while (remaining -= len);
-
-		ci->idx++;
-	}
-
-	return 0;
-}
-
-/*
- * Split the bio into several clones and submit it to targets.
- */
-static void __split_and_process_bio(struct mapped_device *md, struct bio *bio)
-{
-	struct clone_info ci;
-	int error = 0;
-
-	ci.map = dm_get_live_table(md);
-	if (unlikely(!ci.map)) {
-		bio_io_error(bio);
-		return;
-	}
-
-	ci.md = md;
-	ci.io = alloc_io(md);
-	ci.io->error = 0;
-	atomic_set(&ci.io->io_count, 1);
-	ci.io->bio = bio;
-	ci.io->md = md;
-	spin_lock_init(&ci.io->endio_lock);
-	ci.sector = bio->bi_sector;
-	ci.idx = bio->bi_idx;
-
-	start_io_acct(ci.io);
-	if (bio->bi_rw & REQ_FLUSH) {
-		ci.bio = &ci.md->flush_bio;
-		ci.sector_count = 0;
-		error = __clone_and_map_empty_flush(&ci);
-		/* dec_pending submits any data associated with flush */
-	} else {
-		ci.bio = bio;
-		ci.sector_count = bio_sectors(bio);
-		while (ci.sector_count && !error)
-			error = __clone_and_map(&ci);
-	}
-
-	/* drop the extra reference count */
-	dec_pending(ci.io, error);
-	dm_table_put(ci.map);
-}
-/*-----------------------------------------------------------------
- * CRUD END
- *---------------------------------------------------------------*/
-
-static int dm_merge_bvec(struct request_queue *q,
-			 struct bvec_merge_data *bvm,
-			 struct bio_vec *biovec)
-{
-	struct mapped_device *md = q->queuedata;
-	struct dm_table *map = dm_get_live_table(md);
-	struct dm_target *ti;
-	sector_t max_sectors;
-	int max_size = 0;
-
-	if (unlikely(!map))
-		goto out;
-
-	ti = dm_table_find_target(map, bvm->bi_sector);
-	if (!dm_target_is_valid(ti))
-		goto out_table;
-
-	/*
-	 * Find maximum amount of I/O that won't need splitting
-	 */
-	max_sectors = min(max_io_len(bvm->bi_sector, ti),
-			  (sector_t) BIO_MAX_SECTORS);
-	max_size = (max_sectors << SECTOR_SHIFT) - bvm->bi_size;
-	if (max_size < 0)
-		max_size = 0;
-
-	/*
-	 * merge_bvec_fn() returns number of bytes
-	 * it can accept at this offset
-	 * max is precomputed maximal io size
-	 */
-	if (max_size && ti->type->merge)
-		max_size = ti->type->merge(ti, bvm, biovec, max_size);
-	/*
-	 * If the target doesn't support merge method and some of the devices
-	 * provided their merge_bvec method (we know this by looking at
-	 * queue_max_hw_sectors), then we can't allow bios with multiple vector
-	 * entries.  So always set max_size to 0, and the code below allows
-	 * just one page.
-	 */
-	else if (queue_max_hw_sectors(q) <= PAGE_SIZE >> 9)
-
-		max_size = 0;
-
-out_table:
-	dm_table_put(map);
-
-out:
-	/*
-	 * Always allow an entire first page
-	 */
-	if (max_size <= biovec->bv_len && !(bvm->bi_size >> SECTOR_SHIFT))
-		max_size = biovec->bv_len;
-
-	return max_size;
-}
-
-/*
- * The request function that just remaps the bio built up by
- * dm_merge_bvec.
- */
-static void _dm_request(struct request_queue *q, struct bio *bio)
-{
-	int rw = bio_data_dir(bio);
-	struct mapped_device *md = q->queuedata;
-	int cpu;
-
-	down_read(&md->io_lock);
-
-	cpu = part_stat_lock();
-	part_stat_inc(cpu, &dm_disk(md)->part0, ios[rw]);
-	part_stat_add(cpu, &dm_disk(md)->part0, sectors[rw], bio_sectors(bio));
-	part_stat_unlock();
-
-	/* if we're suspended, we have to queue this io for later */
-	if (unlikely(test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags))) {
-		up_read(&md->io_lock);
-
-		if (bio_rw(bio) != READA)
-			queue_io(md, bio);
-		else
-			bio_io_error(bio);
-		return;
-	}
-
-	__split_and_process_bio(md, bio);
-	up_read(&md->io_lock);
-	return;
-}
-
-static int dm_request_based(struct mapped_device *md)
-{
-	return blk_queue_stackable(md->queue);
-}
-
-static void dm_request(struct request_queue *q, struct bio *bio)
-{
-	struct mapped_device *md = q->queuedata;
-
-	if (dm_request_based(md))
-		blk_queue_bio(q, bio);
-	else
-		_dm_request(q, bio);
-}
-
-void dm_dispatch_request(struct request *rq)
-{
-	int r;
-
-	if (blk_queue_io_stat(rq->q))
-		rq->cmd_flags |= REQ_IO_STAT;
-
-	rq->start_time = jiffies;
-	r = blk_insert_cloned_request(rq->q, rq);
-	if (r)
-		dm_complete_request(rq, r);
-}
-EXPORT_SYMBOL_GPL(dm_dispatch_request);
-
-static void dm_rq_bio_destructor(struct bio *bio)
-{
-	struct dm_rq_clone_bio_info *info = bio->bi_private;
-	struct mapped_device *md = info->tio->md;
-
-	free_bio_info(info);
-	bio_free(bio, md->bs);
-}
-
-static int dm_rq_bio_constructor(struct bio *bio, struct bio *bio_orig,
-				 void *data)
-{
-	struct dm_rq_target_io *tio = data;
-	struct mapped_device *md = tio->md;
-	struct dm_rq_clone_bio_info *info = alloc_bio_info(md);
-
-	if (!info)
-		return -ENOMEM;
-
-	info->orig = bio_orig;
-	info->tio = tio;
-	bio->bi_end_io = end_clone_bio;
-	bio->bi_private = info;
-	bio->bi_destructor = dm_rq_bio_destructor;
-
-	return 0;
-}
-
-static int setup_clone(struct request *clone, struct request *rq,
-		       struct dm_rq_target_io *tio)
-{
-	int r;
-
-	r = blk_rq_prep_clone(clone, rq, tio->md->bs, GFP_ATOMIC,
-			      dm_rq_bio_constructor, tio);
-	if (r)
-		return r;
-
-	clone->cmd = rq->cmd;
-	clone->cmd_len = rq->cmd_len;
-	clone->sense = rq->sense;
-	clone->buffer = rq->buffer;
-	clone->end_io = end_clone_request;
-	clone->end_io_data = tio;
-
-	return 0;
-}
-
-static struct request *clone_rq(struct request *rq, struct mapped_device *md,
-				gfp_t gfp_mask)
-{
-	struct request *clone;
-	struct dm_rq_target_io *tio;
-
-	tio = alloc_rq_tio(md, gfp_mask);
-	if (!tio)
-		return NULL;
-
-	tio->md = md;
-	tio->ti = NULL;
-	tio->orig = rq;
-	tio->error = 0;
-	memset(&tio->info, 0, sizeof(tio->info));
-
-	clone = &tio->clone;
-	if (setup_clone(clone, rq, tio)) {
-		/* -ENOMEM */
-		free_rq_tio(tio);
-		return NULL;
-	}
-
-	return clone;
-}
-
-/*
- * Called with the queue lock held.
- */
-static int dm_prep_fn(struct request_queue *q, struct request *rq)
-{
-	struct mapped_device *md = q->queuedata;
-	struct request *clone;
-
-	if (unlikely(rq->special)) {
-		DMWARN("Already has something in rq->special.");
-		return BLKPREP_KILL;
-	}
-
-	clone = clone_rq(rq, md, GFP_ATOMIC);
-	if (!clone)
-		return BLKPREP_DEFER;
-
-	rq->special = clone;
-	rq->cmd_flags |= REQ_DONTPREP;
-
-	return BLKPREP_OK;
-}
-
-/*
- * Returns:
- * 0  : the request has been processed (not requeued)
- * !0 : the request has been requeued
- */
-static int map_request(struct dm_target *ti, struct request *clone,
-		       struct mapped_device *md)
-{
-	int r, requeued = 0;
-	struct dm_rq_target_io *tio = clone->end_io_data;
-
-	/*
-	 * Hold the md reference here for the in-flight I/O.
-	 * We can't rely on the reference count by device opener,
-	 * because the device may be closed during the request completion
-	 * when all bios are completed.
-	 * See the comment in rq_completed() too.
-	 */
-	dm_get(md);
-
-	tio->ti = ti;
-	r = ti->type->map_rq(ti, clone, &tio->info);
-	switch (r) {
-	case DM_MAPIO_SUBMITTED:
-		/* The target has taken the I/O to submit by itself later */
-		break;
-	case DM_MAPIO_REMAPPED:
-		/* The target has remapped the I/O so dispatch it */
-		trace_block_rq_remap(clone->q, clone, disk_devt(dm_disk(md)),
-				     blk_rq_pos(tio->orig));
-		dm_dispatch_request(clone);
-		break;
-	case DM_MAPIO_REQUEUE:
-		/* The target wants to requeue the I/O */
-		dm_requeue_unmapped_request(clone);
-		requeued = 1;
-		break;
-	default:
-		if (r > 0) {
-			DMWARN("unimplemented target map return value: %d", r);
-			BUG();
-		}
-
-		/* The target wants to complete the I/O */
-		dm_kill_unmapped_request(clone, r);
-		break;
-	}
-
-	return requeued;
-}
-
-/*
- * q->request_fn for request-based dm.
- * Called with the queue lock held.
- */
-static void dm_request_fn(struct request_queue *q)
-{
-	struct mapped_device *md = q->queuedata;
-	struct dm_table *map = dm_get_live_table(md);
-	struct dm_target *ti;
-	struct request *rq, *clone;
-	sector_t pos;
-
-	/*
-	 * For suspend, check blk_queue_stopped() and increment
-	 * ->pending within a single queue_lock not to increment the
-	 * number of in-flight I/Os after the queue is stopped in
-	 * dm_suspend().
-	 */
-	while (!blk_queue_stopped(q)) {
-		rq = blk_peek_request(q);
-		if (!rq)
-			goto delay_and_out;
-
-		/* always use block 0 to find the target for flushes for now */
-		pos = 0;
-		if (!(rq->cmd_flags & REQ_FLUSH))
-			pos = blk_rq_pos(rq);
-
-		ti = dm_table_find_target(map, pos);
-		BUG_ON(!dm_target_is_valid(ti));
-
-		if (ti->type->busy && ti->type->busy(ti))
-			goto delay_and_out;
-
-		blk_start_request(rq);
-		clone = rq->special;
-		atomic_inc(&md->pending[rq_data_dir(clone)]);
-
-		spin_unlock(q->queue_lock);
-		if (map_request(ti, clone, md))
-			goto requeued;
-
-		BUG_ON(!irqs_disabled());
-		spin_lock(q->queue_lock);
-	}
-
-	goto out;
-
-requeued:
-	BUG_ON(!irqs_disabled());
-	spin_lock(q->queue_lock);
-
-delay_and_out:
-	blk_delay_queue(q, HZ / 10);
-out:
-	dm_table_put(map);
-
-	return;
-}
-
-int dm_underlying_device_busy(struct request_queue *q)
-{
-	return blk_lld_busy(q);
-}
-EXPORT_SYMBOL_GPL(dm_underlying_device_busy);
-
-static int dm_lld_busy(struct request_queue *q)
-{
-	int r;
-	struct mapped_device *md = q->queuedata;
-	struct dm_table *map = dm_get_live_table(md);
-
-	if (!map || test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags))
-		r = 1;
-	else
-		r = dm_table_any_busy_target(map);
-
-	dm_table_put(map);
-
-	return r;
-}
-
-static int dm_any_congested(void *congested_data, int bdi_bits)
-{
-	int r = bdi_bits;
-	struct mapped_device *md = congested_data;
-	struct dm_table *map;
-
-	if (!test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) {
-		map = dm_get_live_table(md);
-		if (map) {
-			/*
-			 * Request-based dm cares about only own queue for
-			 * the query about congestion status of request_queue
-			 */
-			if (dm_request_based(md))
-				r = md->queue->backing_dev_info.state &
-				    bdi_bits;
-			else
-				r = dm_table_any_congested(map, bdi_bits);
-
-			dm_table_put(map);
-		}
-	}
-
-	return r;
-}
-
-/*-----------------------------------------------------------------
- * An IDR is used to keep track of allocated minor numbers.
- *---------------------------------------------------------------*/
-static void free_minor(int minor)
-{
-	spin_lock(&_minor_lock);
-	idr_remove(&_minor_idr, minor);
-	spin_unlock(&_minor_lock);
-}
-
-/*
- * See if the device with a specific minor # is free.
- */
-static int specific_minor(int minor)
-{
-	int r, m;
-
-	if (minor >= (1 << MINORBITS))
-		return -EINVAL;
-
-	r = idr_pre_get(&_minor_idr, GFP_KERNEL);
-	if (!r)
-		return -ENOMEM;
-
-	spin_lock(&_minor_lock);
-
-	if (idr_find(&_minor_idr, minor)) {
-		r = -EBUSY;
-		goto out;
-	}
-
-	r = idr_get_new_above(&_minor_idr, MINOR_ALLOCED, minor, &m);
-	if (r)
-		goto out;
-
-	if (m != minor) {
-		idr_remove(&_minor_idr, m);
-		r = -EBUSY;
-		goto out;
-	}
-
-out:
-	spin_unlock(&_minor_lock);
-	return r;
-}
-
-static int next_free_minor(int *minor)
-{
-	int r, m;
-
-	r = idr_pre_get(&_minor_idr, GFP_KERNEL);
-	if (!r)
-		return -ENOMEM;
-
-	spin_lock(&_minor_lock);
-
-	r = idr_get_new(&_minor_idr, MINOR_ALLOCED, &m);
-	if (r)
-		goto out;
-
-	if (m >= (1 << MINORBITS)) {
-		idr_remove(&_minor_idr, m);
-		r = -ENOSPC;
-		goto out;
-	}
-
-	*minor = m;
-
-out:
-	spin_unlock(&_minor_lock);
-	return r;
-}
-
-static const struct block_device_operations dm_blk_dops;
-
-static void dm_wq_work(struct work_struct *work);
-
-static void dm_init_md_queue(struct mapped_device *md)
-{
-	/*
-	 * Request-based dm devices cannot be stacked on top of bio-based dm
-	 * devices.  The type of this dm device has not been decided yet.
-	 * The type is decided at the first table loading time.
-	 * To prevent problematic device stacking, clear the queue flag
-	 * for request stacking support until then.
-	 *
-	 * This queue is new, so no concurrency on the queue_flags.
-	 */
-	queue_flag_clear_unlocked(QUEUE_FLAG_STACKABLE, md->queue);
-
-	md->queue->queuedata = md;
-	md->queue->backing_dev_info.congested_fn = dm_any_congested;
-	md->queue->backing_dev_info.congested_data = md;
-	blk_queue_make_request(md->queue, dm_request);
-	blk_queue_bounce_limit(md->queue, BLK_BOUNCE_ANY);
-	blk_queue_merge_bvec(md->queue, dm_merge_bvec);
-}
-
-/*
- * Allocate and initialise a blank device with a given minor.
- */
-static struct mapped_device *alloc_dev(int minor)
-{
-	int r;
-	struct mapped_device *md = kzalloc(sizeof(*md), GFP_KERNEL);
-	void *old_md;
-
-	if (!md) {
-		DMWARN("unable to allocate device, out of memory.");
-		return NULL;
-	}
-
-	if (!try_module_get(THIS_MODULE))
-		goto bad_module_get;
-
-	/* get a minor number for the dev */
-	if (minor == DM_ANY_MINOR)
-		r = next_free_minor(&minor);
-	else
-		r = specific_minor(minor);
-	if (r < 0)
-		goto bad_minor;
-
-	md->type = DM_TYPE_NONE;
-	init_rwsem(&md->io_lock);
-	mutex_init(&md->suspend_lock);
-	mutex_init(&md->type_lock);
-	spin_lock_init(&md->deferred_lock);
-	rwlock_init(&md->map_lock);
-	atomic_set(&md->holders, 1);
-	atomic_set(&md->open_count, 0);
-	atomic_set(&md->event_nr, 0);
-	atomic_set(&md->uevent_seq, 0);
-	INIT_LIST_HEAD(&md->uevent_list);
-	spin_lock_init(&md->uevent_lock);
-
-	md->queue = blk_alloc_queue(GFP_KERNEL);
-	if (!md->queue)
-		goto bad_queue;
-
-	dm_init_md_queue(md);
-
-	md->disk = alloc_disk(1);
-	if (!md->disk)
-		goto bad_disk;
-
-	atomic_set(&md->pending[0], 0);
-	atomic_set(&md->pending[1], 0);
-	init_waitqueue_head(&md->wait);
-	INIT_WORK(&md->work, dm_wq_work);
-	init_waitqueue_head(&md->eventq);
-
-	md->disk->major = _major;
-	md->disk->first_minor = minor;
-	md->disk->fops = &dm_blk_dops;
-	md->disk->queue = md->queue;
-	md->disk->private_data = md;
-	sprintf(md->disk->disk_name, "dm-%d", minor);
-	add_disk(md->disk);
-	format_dev_t(md->name, MKDEV(_major, minor));
-
-	md->wq = alloc_workqueue("kdmflush",
-				 WQ_NON_REENTRANT | WQ_MEM_RECLAIM, 0);
-	if (!md->wq)
-		goto bad_thread;
-
-	md->bdev = bdget_disk(md->disk, 0);
-	if (!md->bdev)
-		goto bad_bdev;
-
-	bio_init(&md->flush_bio);
-	md->flush_bio.bi_bdev = md->bdev;
-	md->flush_bio.bi_rw = WRITE_FLUSH;
-
-	/* Populate the mapping, nobody knows we exist yet */
-	spin_lock(&_minor_lock);
-	old_md = idr_replace(&_minor_idr, md, minor);
-	spin_unlock(&_minor_lock);
-
-	BUG_ON(old_md != MINOR_ALLOCED);
-
-	return md;
-
-bad_bdev:
-	destroy_workqueue(md->wq);
-bad_thread:
-	del_gendisk(md->disk);
-	put_disk(md->disk);
-bad_disk:
-	blk_cleanup_queue(md->queue);
-bad_queue:
-	free_minor(minor);
-bad_minor:
-	module_put(THIS_MODULE);
-bad_module_get:
-	kfree(md);
-	return NULL;
-}
-
-static void unlock_fs(struct mapped_device *md);
-
-static void free_dev(struct mapped_device *md)
-{
-	int minor = MINOR(disk_devt(md->disk));
-
-	unlock_fs(md);
-	bdput(md->bdev);
-	destroy_workqueue(md->wq);
-	if (md->tio_pool)
-		mempool_destroy(md->tio_pool);
-	if (md->io_pool)
-		mempool_destroy(md->io_pool);
-	if (md->bs)
-		bioset_free(md->bs);
-	blk_integrity_unregister(md->disk);
-	del_gendisk(md->disk);
-	free_minor(minor);
-
-	spin_lock(&_minor_lock);
-	md->disk->private_data = NULL;
-	spin_unlock(&_minor_lock);
-
-	put_disk(md->disk);
-	blk_cleanup_queue(md->queue);
-	module_put(THIS_MODULE);
-	kfree(md);
-}
-
-static void __bind_mempools(struct mapped_device *md, struct dm_table *t)
-{
-	struct dm_md_mempools *p;
-
-	if (md->io_pool && md->tio_pool && md->bs)
-		/* the md already has necessary mempools */
-		goto out;
-
-	p = dm_table_get_md_mempools(t);
-	BUG_ON(!p || md->io_pool || md->tio_pool || md->bs);
-
-	md->io_pool = p->io_pool;
-	p->io_pool = NULL;
-	md->tio_pool = p->tio_pool;
-	p->tio_pool = NULL;
-	md->bs = p->bs;
-	p->bs = NULL;
-
-out:
-	/* mempool bind completed, now no need any mempools in the table */
-	dm_table_free_md_mempools(t);
-}
-
-/*
- * Bind a table to the device.
- */
-static void event_callback(void *context)
-{
-	unsigned long flags;
-	LIST_HEAD(uevents);
-	struct mapped_device *md = (struct mapped_device *) context;
-
-	spin_lock_irqsave(&md->uevent_lock, flags);
-	list_splice_init(&md->uevent_list, &uevents);
-	spin_unlock_irqrestore(&md->uevent_lock, flags);
-
-	dm_send_uevents(&uevents, &disk_to_dev(md->disk)->kobj);
-
-	atomic_inc(&md->event_nr);
-	wake_up(&md->eventq);
-}
-
-/*
- * Protected by md->suspend_lock obtained by dm_swap_table().
- */
-static void __set_size(struct mapped_device *md, sector_t size)
-{
-	set_capacity(md->disk, size);
-
-	i_size_write(md->bdev->bd_inode, (loff_t)size << SECTOR_SHIFT);
-}
-
-/*
- * Return 1 if the queue has a compulsory merge_bvec_fn function.
- *
- * If this function returns 0, then the device is either a non-dm
- * device without a merge_bvec_fn, or it is a dm device that is
- * able to split any bios it receives that are too big.
- */
-int dm_queue_merge_is_compulsory(struct request_queue *q)
-{
-	struct mapped_device *dev_md;
-
-	if (!q->merge_bvec_fn)
-		return 0;
-
-	if (q->make_request_fn == dm_request) {
-		dev_md = q->queuedata;
-		if (test_bit(DMF_MERGE_IS_OPTIONAL, &dev_md->flags))
-			return 0;
-	}
-
-	return 1;
-}
-
-static int dm_device_merge_is_compulsory(struct dm_target *ti,
-					 struct dm_dev *dev, sector_t start,
-					 sector_t len, void *data)
-{
-	struct block_device *bdev = dev->bdev;
-	struct request_queue *q = bdev_get_queue(bdev);
-
-	return dm_queue_merge_is_compulsory(q);
-}
-
-/*
- * Return 1 if it is acceptable to ignore merge_bvec_fn based
- * on the properties of the underlying devices.
- */
-static int dm_table_merge_is_optional(struct dm_table *table)
-{
-	unsigned i = 0;
-	struct dm_target *ti;
-
-	while (i < dm_table_get_num_targets(table)) {
-		ti = dm_table_get_target(table, i++);
-
-		if (ti->type->iterate_devices &&
-		    ti->type->iterate_devices(ti, dm_device_merge_is_compulsory, NULL))
-			return 0;
-	}
-
-	return 1;
-}
-
-/*
- * Returns old map, which caller must destroy.
- */
-static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t,
-			       struct queue_limits *limits)
-{
-	struct dm_table *old_map;
-	struct request_queue *q = md->queue;
-	sector_t size;
-	unsigned long flags;
-	int merge_is_optional;
-
-	size = dm_table_get_size(t);
-
-	/*
-	 * Wipe any geometry if the size of the table changed.
-	 */
-	if (size != get_capacity(md->disk))
-		memset(&md->geometry, 0, sizeof(md->geometry));
-
-	__set_size(md, size);
-
-	dm_table_event_callback(t, event_callback, md);
-
-	/*
-	 * The queue hasn't been stopped yet, if the old table type wasn't
-	 * for request-based during suspension.  So stop it to prevent
-	 * I/O mapping before resume.
-	 * This must be done before setting the queue restrictions,
-	 * because request-based dm may be run just after the setting.
-	 */
-	if (dm_table_request_based(t) && !blk_queue_stopped(q))
-		stop_queue(q);
-
-	__bind_mempools(md, t);
-
-	merge_is_optional = dm_table_merge_is_optional(t);
-
-	write_lock_irqsave(&md->map_lock, flags);
-	old_map = md->map;
-	md->map = t;
-	md->immutable_target_type = dm_table_get_immutable_target_type(t);
-
-	dm_table_set_restrictions(t, q, limits);
-	if (merge_is_optional)
-		set_bit(DMF_MERGE_IS_OPTIONAL, &md->flags);
-	else
-		clear_bit(DMF_MERGE_IS_OPTIONAL, &md->flags);
-	write_unlock_irqrestore(&md->map_lock, flags);
-
-	return old_map;
-}
-
-/*
- * Returns unbound table for the caller to free.
- */
-static struct dm_table *__unbind(struct mapped_device *md)
-{
-	struct dm_table *map = md->map;
-	unsigned long flags;
-
-	if (!map)
-		return NULL;
-
-	dm_table_event_callback(map, NULL, NULL);
-	write_lock_irqsave(&md->map_lock, flags);
-	md->map = NULL;
-	write_unlock_irqrestore(&md->map_lock, flags);
-
-	return map;
-}
-
-/*
- * Constructor for a new device.
- */
-int dm_create(int minor, struct mapped_device **result)
-{
-	struct mapped_device *md;
-
-	md = alloc_dev(minor);
-	if (!md)
-		return -ENXIO;
-
-	dm_sysfs_init(md);
-
-	*result = md;
-	return 0;
-}
-
-/*
- * Functions to manage md->type.
- * All are required to hold md->type_lock.
- */
-void dm_lock_md_type(struct mapped_device *md)
-{
-	mutex_lock(&md->type_lock);
-}
-
-void dm_unlock_md_type(struct mapped_device *md)
-{
-	mutex_unlock(&md->type_lock);
-}
-
-void dm_set_md_type(struct mapped_device *md, unsigned type)
-{
-	md->type = type;
-}
-
-unsigned dm_get_md_type(struct mapped_device *md)
-{
-	return md->type;
-}
-
-struct target_type *dm_get_immutable_target_type(struct mapped_device *md)
-{
-	return md->immutable_target_type;
-}
-
-/*
- * Fully initialize a request-based queue (->elevator, ->request_fn, etc).
- */
-static int dm_init_request_based_queue(struct mapped_device *md)
-{
-	struct request_queue *q = NULL;
-
-	if (md->queue->elevator)
-		return 1;
-
-	/* Fully initialize the queue */
-	q = blk_init_allocated_queue(md->queue, dm_request_fn, NULL);
-	if (!q)
-		return 0;
-
-	md->queue = q;
-	dm_init_md_queue(md);
-	blk_queue_softirq_done(md->queue, dm_softirq_done);
-	blk_queue_prep_rq(md->queue, dm_prep_fn);
-	blk_queue_lld_busy(md->queue, dm_lld_busy);
-
-	elv_register_queue(md->queue);
-
-	return 1;
-}
-
-/*
- * Setup the DM device's queue based on md's type
- */
-int dm_setup_md_queue(struct mapped_device *md)
-{
-	if ((dm_get_md_type(md) == DM_TYPE_REQUEST_BASED) &&
-	    !dm_init_request_based_queue(md)) {
-		DMWARN("Cannot initialize queue for request-based mapped device");
-		return -EINVAL;
-	}
-
-	return 0;
-}
-
-static struct mapped_device *dm_find_md(dev_t dev)
-{
-	struct mapped_device *md;
-	unsigned minor = MINOR(dev);
-
-	if (MAJOR(dev) != _major || minor >= (1 << MINORBITS))
-		return NULL;
-
-	spin_lock(&_minor_lock);
-
-	md = idr_find(&_minor_idr, minor);
-	if (md && (md == MINOR_ALLOCED ||
-		   (MINOR(disk_devt(dm_disk(md))) != minor) ||
-		   dm_deleting_md(md) ||
-		   test_bit(DMF_FREEING, &md->flags))) {
-		md = NULL;
-		goto out;
-	}
-
-out:
-	spin_unlock(&_minor_lock);
-
-	return md;
-}
-
-struct mapped_device *dm_get_md(dev_t dev)
-{
-	struct mapped_device *md = dm_find_md(dev);
-
-	if (md)
-		dm_get(md);
-
-	return md;
-}
-EXPORT_SYMBOL_GPL(dm_get_md);
-
-void *dm_get_mdptr(struct mapped_device *md)
-{
-	return md->interface_ptr;
-}
-
-void dm_set_mdptr(struct mapped_device *md, void *ptr)
-{
-	md->interface_ptr = ptr;
-}
-
-void dm_get(struct mapped_device *md)
-{
-	atomic_inc(&md->holders);
-	BUG_ON(test_bit(DMF_FREEING, &md->flags));
-}
-
-const char *dm_device_name(struct mapped_device *md)
-{
-	return md->name;
-}
-EXPORT_SYMBOL_GPL(dm_device_name);
-
-static void __dm_destroy(struct mapped_device *md, bool wait)
-{
-	struct dm_table *map;
-
-	might_sleep();
-
-	spin_lock(&_minor_lock);
-	map = dm_get_live_table(md);
-	idr_replace(&_minor_idr, MINOR_ALLOCED, MINOR(disk_devt(dm_disk(md))));
-	set_bit(DMF_FREEING, &md->flags);
-	spin_unlock(&_minor_lock);
-
-	if (!dm_suspended_md(md)) {
-		dm_table_presuspend_targets(map);
-		dm_table_postsuspend_targets(map);
-	}
-
-	/*
-	 * Rare, but there may be I/O requests still going to complete,
-	 * for example.  Wait for all references to disappear.
-	 * No one should increment the reference count of the mapped_device,
-	 * after the mapped_device state becomes DMF_FREEING.
-	 */
-	if (wait)
-		while (atomic_read(&md->holders))
-			msleep(1);
-	else if (atomic_read(&md->holders))
-		DMWARN("%s: Forcibly removing mapped_device still in use! (%d users)",
-		       dm_device_name(md), atomic_read(&md->holders));
-
-	dm_sysfs_exit(md);
-	dm_table_put(map);
-	dm_table_destroy(__unbind(md));
-	free_dev(md);
-}
-
-void dm_destroy(struct mapped_device *md)
-{
-	__dm_destroy(md, true);
-}
-
-void dm_destroy_immediate(struct mapped_device *md)
-{
-	__dm_destroy(md, false);
-}
-
-void dm_put(struct mapped_device *md)
-{
-	atomic_dec(&md->holders);
-}
-EXPORT_SYMBOL_GPL(dm_put);
-
-static int dm_wait_for_completion(struct mapped_device *md, int interruptible)
-{
-	int r = 0;
-	DECLARE_WAITQUEUE(wait, current);
-
-	add_wait_queue(&md->wait, &wait);
-
-	while (1) {
-		set_current_state(interruptible);
-
-		if (!md_in_flight(md))
-			break;
-
-		if (interruptible == TASK_INTERRUPTIBLE &&
-		    signal_pending(current)) {
-			r = -EINTR;
-			break;
-		}
-
-		io_schedule();
-	}
-	set_current_state(TASK_RUNNING);
-
-	remove_wait_queue(&md->wait, &wait);
-
-	return r;
-}
-
-/*
- * Process the deferred bios
- */
-static void dm_wq_work(struct work_struct *work)
-{
-	struct mapped_device *md = container_of(work, struct mapped_device,
-						work);
-	struct bio *c;
-
-	down_read(&md->io_lock);
-
-	while (!test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) {
-		spin_lock_irq(&md->deferred_lock);
-		c = bio_list_pop(&md->deferred);
-		spin_unlock_irq(&md->deferred_lock);
-
-		if (!c)
-			break;
-
-		up_read(&md->io_lock);
-
-		if (dm_request_based(md))
-			generic_make_request(c);
-		else
-			__split_and_process_bio(md, c);
-
-		down_read(&md->io_lock);
-	}
-
-	up_read(&md->io_lock);
-}
-
-static void dm_queue_flush(struct mapped_device *md)
-{
-	clear_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags);
-	smp_mb__after_clear_bit();
-	queue_work(md->wq, &md->work);
-}
-
-/*
- * Swap in a new table, returning the old one for the caller to destroy.
- */
-struct dm_table *dm_swap_table(struct mapped_device *md, struct dm_table *table)
-{
-	struct dm_table *map = ERR_PTR(-EINVAL);
-	struct queue_limits limits;
-	int r;
-
-	mutex_lock(&md->suspend_lock);
-
-	/* device must be suspended */
-	if (!dm_suspended_md(md))
-		goto out;
-
-	r = dm_calculate_queue_limits(table, &limits);
-	if (r) {
-		map = ERR_PTR(r);
-		goto out;
-	}
-
-	map = __bind(md, table, &limits);
-
-out:
-	mutex_unlock(&md->suspend_lock);
-	return map;
-}
-
-/*
- * Functions to lock and unlock any filesystem running on the
- * device.
- */
-static int lock_fs(struct mapped_device *md)
-{
-	int r;
-
-	WARN_ON(md->frozen_sb);
-
-	md->frozen_sb = freeze_bdev(md->bdev);
-	if (IS_ERR(md->frozen_sb)) {
-		r = PTR_ERR(md->frozen_sb);
-		md->frozen_sb = NULL;
-		return r;
-	}
-
-	set_bit(DMF_FROZEN, &md->flags);
-
-	return 0;
-}
-
-static void unlock_fs(struct mapped_device *md)
-{
-	if (!test_bit(DMF_FROZEN, &md->flags))
-		return;
-
-	thaw_bdev(md->bdev, md->frozen_sb);
-	md->frozen_sb = NULL;
-	clear_bit(DMF_FROZEN, &md->flags);
-}
-
-/*
- * We need to be able to change a mapping table under a mounted
- * filesystem.  For example we might want to move some data in
- * the background.  Before the table can be swapped with
- * dm_bind_table, dm_suspend must be called to flush any in
- * flight bios and ensure that any further io gets deferred.
- */
-/*
- * Suspend mechanism in request-based dm.
- *
- * 1. Flush all I/Os by lock_fs() if needed.
- * 2. Stop dispatching any I/O by stopping the request_queue.
- * 3. Wait for all in-flight I/Os to be completed or requeued.
- *
- * To abort suspend, start the request_queue.
- */
-int dm_suspend(struct mapped_device *md, unsigned suspend_flags)
-{
-	struct dm_table *map = NULL;
-	int r = 0;
-	int do_lockfs = suspend_flags & DM_SUSPEND_LOCKFS_FLAG ? 1 : 0;
-	int noflush = suspend_flags & DM_SUSPEND_NOFLUSH_FLAG ? 1 : 0;
-
-	mutex_lock(&md->suspend_lock);
-
-	if (dm_suspended_md(md)) {
-		r = -EINVAL;
-		goto out_unlock;
-	}
-
-	map = dm_get_live_table(md);
-
-	/*
-	 * DMF_NOFLUSH_SUSPENDING must be set before presuspend.
-	 * This flag is cleared before dm_suspend returns.
-	 */
-	if (noflush)
-		set_bit(DMF_NOFLUSH_SUSPENDING, &md->flags);
-
-	/* This does not get reverted if there's an error later. */
-	dm_table_presuspend_targets(map);
-
-	/*
-	 * Flush I/O to the device.
-	 * Any I/O submitted after lock_fs() may not be flushed.
-	 * noflush takes precedence over do_lockfs.
-	 * (lock_fs() flushes I/Os and waits for them to complete.)
-	 */
-	if (!noflush && do_lockfs) {
-		r = lock_fs(md);
-		if (r)
-			goto out;
-	}
-
-	/*
-	 * Here we must make sure that no processes are submitting requests
-	 * to target drivers i.e. no one may be executing
-	 * __split_and_process_bio. This is called from dm_request and
-	 * dm_wq_work.
-	 *
-	 * To get all processes out of __split_and_process_bio in dm_request,
-	 * we take the write lock. To prevent any process from reentering
-	 * __split_and_process_bio from dm_request and quiesce the thread
-	 * (dm_wq_work), we set BMF_BLOCK_IO_FOR_SUSPEND and call
-	 * flush_workqueue(md->wq).
-	 */
-	down_write(&md->io_lock);
-	set_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags);
-	up_write(&md->io_lock);
-
-	/*
-	 * Stop md->queue before flushing md->wq in case request-based
-	 * dm defers requests to md->wq from md->queue.
-	 */
-	if (dm_request_based(md))
-		stop_queue(md->queue);
-
-	flush_workqueue(md->wq);
-
-	/*
-	 * At this point no more requests are entering target request routines.
-	 * We call dm_wait_for_completion to wait for all existing requests
-	 * to finish.
-	 */
-	r = dm_wait_for_completion(md, TASK_INTERRUPTIBLE);
-
-	down_write(&md->io_lock);
-	if (noflush)
-		clear_bit(DMF_NOFLUSH_SUSPENDING, &md->flags);
-	up_write(&md->io_lock);
-
-	/* were we interrupted ? */
-	if (r < 0) {
-		dm_queue_flush(md);
-
-		if (dm_request_based(md))
-			start_queue(md->queue);
-
-		unlock_fs(md);
-		goto out; /* pushback list is already flushed, so skip flush */
-	}
-
-	/*
-	 * If dm_wait_for_completion returned 0, the device is completely
-	 * quiescent now. There is no request-processing activity. All new
-	 * requests are being added to md->deferred list.
-	 */
-
-	set_bit(DMF_SUSPENDED, &md->flags);
-
-	dm_table_postsuspend_targets(map);
-
-out:
-	dm_table_put(map);
-
-out_unlock:
-	mutex_unlock(&md->suspend_lock);
-	return r;
-}
-
-int dm_resume(struct mapped_device *md)
-{
-	int r = -EINVAL;
-	struct dm_table *map = NULL;
-
-	mutex_lock(&md->suspend_lock);
-	if (!dm_suspended_md(md))
-		goto out;
-
-	map = dm_get_live_table(md);
-	if (!map || !dm_table_get_size(map))
-		goto out;
-
-	r = dm_table_resume_targets(map);
-	if (r)
-		goto out;
-
-	dm_queue_flush(md);
-
-	/*
-	 * Flushing deferred I/Os must be done after targets are resumed
-	 * so that mapping of targets can work correctly.
-	 * Request-based dm is queueing the deferred I/Os in its request_queue.
-	 */
-	if (dm_request_based(md))
-		start_queue(md->queue);
-
-	unlock_fs(md);
-
-	clear_bit(DMF_SUSPENDED, &md->flags);
-
-	r = 0;
-out:
-	dm_table_put(map);
-	mutex_unlock(&md->suspend_lock);
-
-	return r;
-}
-
-/*-----------------------------------------------------------------
- * Event notification.
- *---------------------------------------------------------------*/
-int dm_kobject_uevent(struct mapped_device *md, enum kobject_action action,
-		       unsigned cookie)
-{
-	char udev_cookie[DM_COOKIE_LENGTH];
-	char *envp[] = { udev_cookie, NULL };
-
-	if (!cookie)
-		return kobject_uevent(&disk_to_dev(md->disk)->kobj, action);
-	else {
-		snprintf(udev_cookie, DM_COOKIE_LENGTH, "%s=%u",
-			 DM_COOKIE_ENV_VAR_NAME, cookie);
-		return kobject_uevent_env(&disk_to_dev(md->disk)->kobj,
-					  action, envp);
-	}
-}
-
-uint32_t dm_next_uevent_seq(struct mapped_device *md)
-{
-	return atomic_add_return(1, &md->uevent_seq);
-}
-
-uint32_t dm_get_event_nr(struct mapped_device *md)
-{
-	return atomic_read(&md->event_nr);
-}
-
-int dm_wait_event(struct mapped_device *md, int event_nr)
-{
-	return wait_event_interruptible(md->eventq,
-			(event_nr != atomic_read(&md->event_nr)));
-}
-
-void dm_uevent_add(struct mapped_device *md, struct list_head *elist)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&md->uevent_lock, flags);
-	list_add(elist, &md->uevent_list);
-	spin_unlock_irqrestore(&md->uevent_lock, flags);
-}
-
-/*
- * The gendisk is only valid as long as you have a reference
- * count on 'md'.
- */
-struct gendisk *dm_disk(struct mapped_device *md)
-{
-	return md->disk;
-}
-
-struct kobject *dm_kobject(struct mapped_device *md)
-{
-	return &md->kobj;
-}
-
-/*
- * struct mapped_device should not be exported outside of dm.c
- * so use this check to verify that kobj is part of md structure
- */
-struct mapped_device *dm_get_from_kobject(struct kobject *kobj)
-{
-	struct mapped_device *md;
-
-	md = container_of(kobj, struct mapped_device, kobj);
-	if (&md->kobj != kobj)
-		return NULL;
-
-	if (test_bit(DMF_FREEING, &md->flags) ||
-	    dm_deleting_md(md))
-		return NULL;
-
-	dm_get(md);
-	return md;
-}
-
-int dm_suspended_md(struct mapped_device *md)
-{
-	return test_bit(DMF_SUSPENDED, &md->flags);
-}
-
-int dm_suspended(struct dm_target *ti)
-{
-	return dm_suspended_md(dm_table_get_md(ti->table));
-}
-EXPORT_SYMBOL_GPL(dm_suspended);
-
-int dm_noflush_suspending(struct dm_target *ti)
-{
-	return __noflush_suspending(dm_table_get_md(ti->table));
-}
-EXPORT_SYMBOL_GPL(dm_noflush_suspending);
-
-struct dm_md_mempools *dm_alloc_md_mempools(unsigned type, unsigned integrity)
-{
-	struct dm_md_mempools *pools = kmalloc(sizeof(*pools), GFP_KERNEL);
-	unsigned int pool_size = (type == DM_TYPE_BIO_BASED) ? 16 : MIN_IOS;
-
-	if (!pools)
-		return NULL;
-
-	pools->io_pool = (type == DM_TYPE_BIO_BASED) ?
-			 mempool_create_slab_pool(MIN_IOS, _io_cache) :
-			 mempool_create_slab_pool(MIN_IOS, _rq_bio_info_cache);
-	if (!pools->io_pool)
-		goto free_pools_and_out;
-
-	pools->tio_pool = (type == DM_TYPE_BIO_BASED) ?
-			  mempool_create_slab_pool(MIN_IOS, _tio_cache) :
-			  mempool_create_slab_pool(MIN_IOS, _rq_tio_cache);
-	if (!pools->tio_pool)
-		goto free_io_pool_and_out;
-
-	pools->bs = bioset_create(pool_size, 0);
-	if (!pools->bs)
-		goto free_tio_pool_and_out;
-
-	if (integrity && bioset_integrity_create(pools->bs, pool_size))
-		goto free_bioset_and_out;
-
-	return pools;
-
-free_bioset_and_out:
-	bioset_free(pools->bs);
-
-free_tio_pool_and_out:
-	mempool_destroy(pools->tio_pool);
-
-free_io_pool_and_out:
-	mempool_destroy(pools->io_pool);
-
-free_pools_and_out:
-	kfree(pools);
-
-	return NULL;
-}
-
-void dm_free_md_mempools(struct dm_md_mempools *pools)
-{
-	if (!pools)
-		return;
-
-	if (pools->io_pool)
-		mempool_destroy(pools->io_pool);
-
-	if (pools->tio_pool)
-		mempool_destroy(pools->tio_pool);
-
-	if (pools->bs)
-		bioset_free(pools->bs);
-
-	kfree(pools);
-}
-
-static const struct block_device_operations dm_blk_dops = {
-	.open = dm_blk_open,
-	.release = dm_blk_close,
-	.ioctl = dm_blk_ioctl,
-	.getgeo = dm_blk_getgeo,
-	.owner = THIS_MODULE
-};
-
-EXPORT_SYMBOL(dm_get_mapinfo);
-
-/*
- * module hooks
- */
-module_init(dm_init);
-module_exit(dm_exit);
-
-module_param(major, uint, 0);
-MODULE_PARM_DESC(major, "The major number of the device mapper");
-MODULE_DESCRIPTION(DM_NAME " driver");
-MODULE_AUTHOR("Joe Thornber <dm-devel@redhat.com>");
-MODULE_LICENSE("GPL");
diff --git a/ANDROID_3.4.5/drivers/md/dm.h b/ANDROID_3.4.5/drivers/md/dm.h
deleted file mode 100644
index b7dacd59..00000000
--- a/ANDROID_3.4.5/drivers/md/dm.h
+++ /dev/null
@@ -1,159 +0,0 @@
-/*
- * Internal header file for device mapper
- *
- * Copyright (C) 2001, 2002 Sistina Software
- * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
- *
- * This file is released under the LGPL.
- */
-
-#ifndef DM_INTERNAL_H
-#define DM_INTERNAL_H
-
-#include <linux/fs.h>
-#include <linux/device-mapper.h>
-#include <linux/list.h>
-#include <linux/blkdev.h>
-#include <linux/hdreg.h>
-
-/*
- * Suspend feature flags
- */
-#define DM_SUSPEND_LOCKFS_FLAG		(1 << 0)
-#define DM_SUSPEND_NOFLUSH_FLAG		(1 << 1)
-
-/*
- * Type of table and mapped_device's mempool
- */
-#define DM_TYPE_NONE		0
-#define DM_TYPE_BIO_BASED	1
-#define DM_TYPE_REQUEST_BASED	2
-
-/*
- * List of devices that a metadevice uses and should open/close.
- */
-struct dm_dev_internal {
-	struct list_head list;
-	atomic_t count;
-	struct dm_dev dm_dev;
-};
-
-struct dm_table;
-struct dm_md_mempools;
-
-/*-----------------------------------------------------------------
- * Internal table functions.
- *---------------------------------------------------------------*/
-void dm_table_destroy(struct dm_table *t);
-void dm_table_event_callback(struct dm_table *t,
-			     void (*fn)(void *), void *context);
-struct dm_target *dm_table_get_target(struct dm_table *t, unsigned int index);
-struct dm_target *dm_table_find_target(struct dm_table *t, sector_t sector);
-int dm_calculate_queue_limits(struct dm_table *table,
-			      struct queue_limits *limits);
-void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
-			       struct queue_limits *limits);
-struct list_head *dm_table_get_devices(struct dm_table *t);
-void dm_table_presuspend_targets(struct dm_table *t);
-void dm_table_postsuspend_targets(struct dm_table *t);
-int dm_table_resume_targets(struct dm_table *t);
-int dm_table_any_congested(struct dm_table *t, int bdi_bits);
-int dm_table_any_busy_target(struct dm_table *t);
-unsigned dm_table_get_type(struct dm_table *t);
-struct target_type *dm_table_get_immutable_target_type(struct dm_table *t);
-bool dm_table_request_based(struct dm_table *t);
-bool dm_table_supports_discards(struct dm_table *t);
-int dm_table_alloc_md_mempools(struct dm_table *t);
-void dm_table_free_md_mempools(struct dm_table *t);
-struct dm_md_mempools *dm_table_get_md_mempools(struct dm_table *t);
-
-int dm_queue_merge_is_compulsory(struct request_queue *q);
-
-void dm_lock_md_type(struct mapped_device *md);
-void dm_unlock_md_type(struct mapped_device *md);
-void dm_set_md_type(struct mapped_device *md, unsigned type);
-unsigned dm_get_md_type(struct mapped_device *md);
-struct target_type *dm_get_immutable_target_type(struct mapped_device *md);
-
-int dm_setup_md_queue(struct mapped_device *md);
-
-/*
- * To check the return value from dm_table_find_target().
- */
-#define dm_target_is_valid(t) ((t)->table)
-
-/*
- * To check whether the target type is request-based or not (bio-based).
- */
-#define dm_target_request_based(t) ((t)->type->map_rq != NULL)
-
-/*-----------------------------------------------------------------
- * A registry of target types.
- *---------------------------------------------------------------*/
-int dm_target_init(void);
-void dm_target_exit(void);
-struct target_type *dm_get_target_type(const char *name);
-void dm_put_target_type(struct target_type *tt);
-int dm_target_iterate(void (*iter_func)(struct target_type *tt,
-					void *param), void *param);
-
-int dm_split_args(int *argc, char ***argvp, char *input);
-
-/*
- * Is this mapped_device being deleted?
- */
-int dm_deleting_md(struct mapped_device *md);
-
-/*
- * Is this mapped_device suspended?
- */
-int dm_suspended_md(struct mapped_device *md);
-
-/*
- * The device-mapper can be driven through one of two interfaces;
- * ioctl or filesystem, depending which patch you have applied.
- */
-int dm_interface_init(void);
-void dm_interface_exit(void);
-
-/*
- * sysfs interface
- */
-int dm_sysfs_init(struct mapped_device *md);
-void dm_sysfs_exit(struct mapped_device *md);
-struct kobject *dm_kobject(struct mapped_device *md);
-struct mapped_device *dm_get_from_kobject(struct kobject *kobj);
-
-/*
- * Targets for linear and striped mappings
- */
-int dm_linear_init(void);
-void dm_linear_exit(void);
-
-int dm_stripe_init(void);
-void dm_stripe_exit(void);
-
-/*
- * mapped_device operations
- */
-void dm_destroy(struct mapped_device *md);
-void dm_destroy_immediate(struct mapped_device *md);
-int dm_open_count(struct mapped_device *md);
-int dm_lock_for_deletion(struct mapped_device *md);
-
-int dm_kobject_uevent(struct mapped_device *md, enum kobject_action action,
-		      unsigned cookie);
-
-int dm_io_init(void);
-void dm_io_exit(void);
-
-int dm_kcopyd_init(void);
-void dm_kcopyd_exit(void);
-
-/*
- * Mempool operations
- */
-struct dm_md_mempools *dm_alloc_md_mempools(unsigned type, unsigned integrity);
-void dm_free_md_mempools(struct dm_md_mempools *pools);
-
-#endif
diff --git a/ANDROID_3.4.5/drivers/md/faulty.c b/ANDROID_3.4.5/drivers/md/faulty.c
deleted file mode 100644
index 45135f69..00000000
--- a/ANDROID_3.4.5/drivers/md/faulty.c
+++ /dev/null
@@ -1,367 +0,0 @@
-/*
- * faulty.c : Multiple Devices driver for Linux
- *
- * Copyright (C) 2004 Neil Brown
- *
- * fautly-device-simulator personality for md
- *
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2, or (at your option)
- * any later version.
- *
- * You should have received a copy of the GNU General Public License
- * (for example /usr/src/linux/COPYING); if not, write to the Free
- * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-
-/*
- * The "faulty" personality causes some requests to fail.
- *
- * Possible failure modes are:
- *   reads fail "randomly" but succeed on retry
- *   writes fail "randomly" but succeed on retry
- *   reads for some address fail and then persist until a write
- *   reads for some address fail and then persist irrespective of write
- *   writes for some address fail and persist
- *   all writes fail
- *
- * Different modes can be active at a time, but only
- * one can be set at array creation.  Others can be added later.
- * A mode can be one-shot or recurrent with the recurrence being
- * once in every N requests.
- * The bottom 5 bits of the "layout" indicate the mode.  The
- * remainder indicate a period, or 0 for one-shot.
- *
- * There is an implementation limit on the number of concurrently
- * persisting-faulty blocks. When a new fault is requested that would
- * exceed the limit, it is ignored.
- * All current faults can be clear using a layout of "0".
- *
- * Requests are always sent to the device.  If they are to fail,
- * we clone the bio and insert a new b_end_io into the chain.
- */
-
-#define	WriteTransient	0
-#define	ReadTransient	1
-#define	WritePersistent	2
-#define	ReadPersistent	3
-#define	WriteAll	4 /* doesn't go to device */
-#define	ReadFixable	5
-#define	Modes	6
-
-#define	ClearErrors	31
-#define	ClearFaults	30
-
-#define AllPersist	100 /* internal use only */
-#define	NoPersist	101
-
-#define	ModeMask	0x1f
-#define	ModeShift	5
-
-#define MaxFault	50
-#include <linux/blkdev.h>
-#include <linux/module.h>
-#include <linux/raid/md_u.h>
-#include <linux/slab.h>
-#include "md.h"
-#include <linux/seq_file.h>
-
-
-static void faulty_fail(struct bio *bio, int error)
-{
-	struct bio *b = bio->bi_private;
-
-	b->bi_size = bio->bi_size;
-	b->bi_sector = bio->bi_sector;
-
-	bio_put(bio);
-
-	bio_io_error(b);
-}
-
-struct faulty_conf {
-	int period[Modes];
-	atomic_t counters[Modes];
-	sector_t faults[MaxFault];
-	int	modes[MaxFault];
-	int nfaults;
-	struct md_rdev *rdev;
-};
-
-static int check_mode(struct faulty_conf *conf, int mode)
-{
-	if (conf->period[mode] == 0 &&
-	    atomic_read(&conf->counters[mode]) <= 0)
-		return 0; /* no failure, no decrement */
-
-
-	if (atomic_dec_and_test(&conf->counters[mode])) {
-		if (conf->period[mode])
-			atomic_set(&conf->counters[mode], conf->period[mode]);
-		return 1;
-	}
-	return 0;
-}
-
-static int check_sector(struct faulty_conf *conf, sector_t start, sector_t end, int dir)
-{
-	/* If we find a ReadFixable sector, we fix it ... */
-	int i;
-	for (i=0; i<conf->nfaults; i++)
-		if (conf->faults[i] >= start &&
-		    conf->faults[i] < end) {
-			/* found it ... */
-			switch (conf->modes[i] * 2 + dir) {
-			case WritePersistent*2+WRITE: return 1;
-			case ReadPersistent*2+READ: return 1;
-			case ReadFixable*2+READ: return 1;
-			case ReadFixable*2+WRITE:
-				conf->modes[i] = NoPersist;
-				return 0;
-			case AllPersist*2+READ:
-			case AllPersist*2+WRITE: return 1;
-			default:
-				return 0;
-			}
-		}
-	return 0;
-}
-
-static void add_sector(struct faulty_conf *conf, sector_t start, int mode)
-{
-	int i;
-	int n = conf->nfaults;
-	for (i=0; i<conf->nfaults; i++)
-		if (conf->faults[i] == start) {
-			switch(mode) {
-			case NoPersist: conf->modes[i] = mode; return;
-			case WritePersistent:
-				if (conf->modes[i] == ReadPersistent ||
-				    conf->modes[i] == ReadFixable)
-					conf->modes[i] = AllPersist;
-				else
-					conf->modes[i] = WritePersistent;
-				return;
-			case ReadPersistent:
-				if (conf->modes[i] == WritePersistent)
-					conf->modes[i] = AllPersist;
-				else
-					conf->modes[i] = ReadPersistent;
-				return;
-			case ReadFixable:
-				if (conf->modes[i] == WritePersistent ||
-				    conf->modes[i] == ReadPersistent)
-					conf->modes[i] = AllPersist;
-				else
-					conf->modes[i] = ReadFixable;
-				return;
-			}
-		} else if (conf->modes[i] == NoPersist)
-			n = i;
-
-	if (n >= MaxFault)
-		return;
-	conf->faults[n] = start;
-	conf->modes[n] = mode;
-	if (conf->nfaults == n)
-		conf->nfaults = n+1;
-}
-
-static void make_request(struct mddev *mddev, struct bio *bio)
-{
-	struct faulty_conf *conf = mddev->private;
-	int failit = 0;
-
-	if (bio_data_dir(bio) == WRITE) {
-		/* write request */
-		if (atomic_read(&conf->counters[WriteAll])) {
-			/* special case - don't decrement, don't generic_make_request,
-			 * just fail immediately
-			 */
-			bio_endio(bio, -EIO);
-			return;
-		}
-
-		if (check_sector(conf, bio->bi_sector, bio->bi_sector+(bio->bi_size>>9),
-				 WRITE))
-			failit = 1;
-		if (check_mode(conf, WritePersistent)) {
-			add_sector(conf, bio->bi_sector, WritePersistent);
-			failit = 1;
-		}
-		if (check_mode(conf, WriteTransient))
-			failit = 1;
-	} else {
-		/* read request */
-		if (check_sector(conf, bio->bi_sector, bio->bi_sector + (bio->bi_size>>9),
-				 READ))
-			failit = 1;
-		if (check_mode(conf, ReadTransient))
-			failit = 1;
-		if (check_mode(conf, ReadPersistent)) {
-			add_sector(conf, bio->bi_sector, ReadPersistent);
-			failit = 1;
-		}
-		if (check_mode(conf, ReadFixable)) {
-			add_sector(conf, bio->bi_sector, ReadFixable);
-			failit = 1;
-		}
-	}
-	if (failit) {
-		struct bio *b = bio_clone_mddev(bio, GFP_NOIO, mddev);
-
-		b->bi_bdev = conf->rdev->bdev;
-		b->bi_private = bio;
-		b->bi_end_io = faulty_fail;
-		bio = b;
-	} else
-		bio->bi_bdev = conf->rdev->bdev;
-
-	generic_make_request(bio);
-}
-
-static void status(struct seq_file *seq, struct mddev *mddev)
-{
-	struct faulty_conf *conf = mddev->private;
-	int n;
-
-	if ((n=atomic_read(&conf->counters[WriteTransient])) != 0)
-		seq_printf(seq, " WriteTransient=%d(%d)",
-			   n, conf->period[WriteTransient]);
-
-	if ((n=atomic_read(&conf->counters[ReadTransient])) != 0)
-		seq_printf(seq, " ReadTransient=%d(%d)",
-			   n, conf->period[ReadTransient]);
-
-	if ((n=atomic_read(&conf->counters[WritePersistent])) != 0)
-		seq_printf(seq, " WritePersistent=%d(%d)",
-			   n, conf->period[WritePersistent]);
-
-	if ((n=atomic_read(&conf->counters[ReadPersistent])) != 0)
-		seq_printf(seq, " ReadPersistent=%d(%d)",
-			   n, conf->period[ReadPersistent]);
-
-
-	if ((n=atomic_read(&conf->counters[ReadFixable])) != 0)
-		seq_printf(seq, " ReadFixable=%d(%d)",
-			   n, conf->period[ReadFixable]);
-
-	if ((n=atomic_read(&conf->counters[WriteAll])) != 0)
-		seq_printf(seq, " WriteAll");
-
-	seq_printf(seq, " nfaults=%d", conf->nfaults);
-}
-
-
-static int reshape(struct mddev *mddev)
-{
-	int mode = mddev->new_layout & ModeMask;
-	int count = mddev->new_layout >> ModeShift;
-	struct faulty_conf *conf = mddev->private;
-
-	if (mddev->new_layout < 0)
-		return 0;
-
-	/* new layout */
-	if (mode == ClearFaults)
-		conf->nfaults = 0;
-	else if (mode == ClearErrors) {
-		int i;
-		for (i=0 ; i < Modes ; i++) {
-			conf->period[i] = 0;
-			atomic_set(&conf->counters[i], 0);
-		}
-	} else if (mode < Modes) {
-		conf->period[mode] = count;
-		if (!count) count++;
-		atomic_set(&conf->counters[mode], count);
-	} else
-		return -EINVAL;
-	mddev->new_layout = -1;
-	mddev->layout = -1; /* makes sure further changes come through */
-	return 0;
-}
-
-static sector_t faulty_size(struct mddev *mddev, sector_t sectors, int raid_disks)
-{
-	WARN_ONCE(raid_disks,
-		  "%s does not support generic reshape\n", __func__);
-
-	if (sectors == 0)
-		return mddev->dev_sectors;
-
-	return sectors;
-}
-
-static int run(struct mddev *mddev)
-{
-	struct md_rdev *rdev;
-	int i;
-	struct faulty_conf *conf;
-
-	if (md_check_no_bitmap(mddev))
-		return -EINVAL;
-
-	conf = kmalloc(sizeof(*conf), GFP_KERNEL);
-	if (!conf)
-		return -ENOMEM;
-
-	for (i=0; i<Modes; i++) {
-		atomic_set(&conf->counters[i], 0);
-		conf->period[i] = 0;
-	}
-	conf->nfaults = 0;
-
-	rdev_for_each(rdev, mddev)
-		conf->rdev = rdev;
-
-	md_set_array_sectors(mddev, faulty_size(mddev, 0, 0));
-	mddev->private = conf;
-
-	reshape(mddev);
-
-	return 0;
-}
-
-static int stop(struct mddev *mddev)
-{
-	struct faulty_conf *conf = mddev->private;
-
-	kfree(conf);
-	mddev->private = NULL;
-	return 0;
-}
-
-static struct md_personality faulty_personality =
-{
-	.name		= "faulty",
-	.level		= LEVEL_FAULTY,
-	.owner		= THIS_MODULE,
-	.make_request	= make_request,
-	.run		= run,
-	.stop		= stop,
-	.status		= status,
-	.check_reshape	= reshape,
-	.size		= faulty_size,
-};
-
-static int __init raid_init(void)
-{
-	return register_md_personality(&faulty_personality);
-}
-
-static void raid_exit(void)
-{
-	unregister_md_personality(&faulty_personality);
-}
-
-module_init(raid_init);
-module_exit(raid_exit);
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("Fault injection personality for MD");
-MODULE_ALIAS("md-personality-10"); /* faulty */
-MODULE_ALIAS("md-faulty");
-MODULE_ALIAS("md-level--5");
diff --git a/ANDROID_3.4.5/drivers/md/linear.c b/ANDROID_3.4.5/drivers/md/linear.c
deleted file mode 100644
index fa211d80..00000000
--- a/ANDROID_3.4.5/drivers/md/linear.c
+++ /dev/null
@@ -1,369 +0,0 @@
-/*
-   linear.c : Multiple Devices driver for Linux
-	      Copyright (C) 1994-96 Marc ZYNGIER
-	      <zyngier@ufr-info-p7.ibp.fr> or
-	      <maz@gloups.fdn.fr>
-
-   Linear mode management functions.
-
-   This program is free software; you can redistribute it and/or modify
-   it under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 2, or (at your option)
-   any later version.
-   
-   You should have received a copy of the GNU General Public License
-   (for example /usr/src/linux/COPYING); if not, write to the Free
-   Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.  
-*/
-
-#include <linux/blkdev.h>
-#include <linux/raid/md_u.h>
-#include <linux/seq_file.h>
-#include <linux/module.h>
-#include <linux/slab.h>
-#include "md.h"
-#include "linear.h"
-
-/*
- * find which device holds a particular offset 
- */
-static inline struct dev_info *which_dev(struct mddev *mddev, sector_t sector)
-{
-	int lo, mid, hi;
-	struct linear_conf *conf;
-
-	lo = 0;
-	hi = mddev->raid_disks - 1;
-	conf = rcu_dereference(mddev->private);
-
-	/*
-	 * Binary Search
-	 */
-
-	while (hi > lo) {
-
-		mid = (hi + lo) / 2;
-		if (sector < conf->disks[mid].end_sector)
-			hi = mid;
-		else
-			lo = mid + 1;
-	}
-
-	return conf->disks + lo;
-}
-
-/**
- *	linear_mergeable_bvec -- tell bio layer if two requests can be merged
- *	@q: request queue
- *	@bvm: properties of new bio
- *	@biovec: the request that could be merged to it.
- *
- *	Return amount of bytes we can take at this offset
- */
-static int linear_mergeable_bvec(struct request_queue *q,
-				 struct bvec_merge_data *bvm,
-				 struct bio_vec *biovec)
-{
-	struct mddev *mddev = q->queuedata;
-	struct dev_info *dev0;
-	unsigned long maxsectors, bio_sectors = bvm->bi_size >> 9;
-	sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev);
-	int maxbytes = biovec->bv_len;
-	struct request_queue *subq;
-
-	rcu_read_lock();
-	dev0 = which_dev(mddev, sector);
-	maxsectors = dev0->end_sector - sector;
-	subq = bdev_get_queue(dev0->rdev->bdev);
-	if (subq->merge_bvec_fn) {
-		bvm->bi_bdev = dev0->rdev->bdev;
-		bvm->bi_sector -= dev0->end_sector - dev0->rdev->sectors;
-		maxbytes = min(maxbytes, subq->merge_bvec_fn(subq, bvm,
-							     biovec));
-	}
-	rcu_read_unlock();
-
-	if (maxsectors < bio_sectors)
-		maxsectors = 0;
-	else
-		maxsectors -= bio_sectors;
-
-	if (maxsectors <= (PAGE_SIZE >> 9 ) && bio_sectors == 0)
-		return maxbytes;
-
-	if (maxsectors > (maxbytes >> 9))
-		return maxbytes;
-	else
-		return maxsectors << 9;
-}
-
-static int linear_congested(void *data, int bits)
-{
-	struct mddev *mddev = data;
-	struct linear_conf *conf;
-	int i, ret = 0;
-
-	if (mddev_congested(mddev, bits))
-		return 1;
-
-	rcu_read_lock();
-	conf = rcu_dereference(mddev->private);
-
-	for (i = 0; i < mddev->raid_disks && !ret ; i++) {
-		struct request_queue *q = bdev_get_queue(conf->disks[i].rdev->bdev);
-		ret |= bdi_congested(&q->backing_dev_info, bits);
-	}
-
-	rcu_read_unlock();
-	return ret;
-}
-
-static sector_t linear_size(struct mddev *mddev, sector_t sectors, int raid_disks)
-{
-	struct linear_conf *conf;
-	sector_t array_sectors;
-
-	rcu_read_lock();
-	conf = rcu_dereference(mddev->private);
-	WARN_ONCE(sectors || raid_disks,
-		  "%s does not support generic reshape\n", __func__);
-	array_sectors = conf->array_sectors;
-	rcu_read_unlock();
-
-	return array_sectors;
-}
-
-static struct linear_conf *linear_conf(struct mddev *mddev, int raid_disks)
-{
-	struct linear_conf *conf;
-	struct md_rdev *rdev;
-	int i, cnt;
-
-	conf = kzalloc (sizeof (*conf) + raid_disks*sizeof(struct dev_info),
-			GFP_KERNEL);
-	if (!conf)
-		return NULL;
-
-	cnt = 0;
-	conf->array_sectors = 0;
-
-	rdev_for_each(rdev, mddev) {
-		int j = rdev->raid_disk;
-		struct dev_info *disk = conf->disks + j;
-		sector_t sectors;
-
-		if (j < 0 || j >= raid_disks || disk->rdev) {
-			printk(KERN_ERR "md/linear:%s: disk numbering problem. Aborting!\n",
-			       mdname(mddev));
-			goto out;
-		}
-
-		disk->rdev = rdev;
-		if (mddev->chunk_sectors) {
-			sectors = rdev->sectors;
-			sector_div(sectors, mddev->chunk_sectors);
-			rdev->sectors = sectors * mddev->chunk_sectors;
-		}
-
-		disk_stack_limits(mddev->gendisk, rdev->bdev,
-				  rdev->data_offset << 9);
-
-		conf->array_sectors += rdev->sectors;
-		cnt++;
-
-	}
-	if (cnt != raid_disks) {
-		printk(KERN_ERR "md/linear:%s: not enough drives present. Aborting!\n",
-		       mdname(mddev));
-		goto out;
-	}
-
-	/*
-	 * Here we calculate the device offsets.
-	 */
-	conf->disks[0].end_sector = conf->disks[0].rdev->sectors;
-
-	for (i = 1; i < raid_disks; i++)
-		conf->disks[i].end_sector =
-			conf->disks[i-1].end_sector +
-			conf->disks[i].rdev->sectors;
-
-	return conf;
-
-out:
-	kfree(conf);
-	return NULL;
-}
-
-static int linear_run (struct mddev *mddev)
-{
-	struct linear_conf *conf;
-	int ret;
-
-	if (md_check_no_bitmap(mddev))
-		return -EINVAL;
-	conf = linear_conf(mddev, mddev->raid_disks);
-
-	if (!conf)
-		return 1;
-	mddev->private = conf;
-	md_set_array_sectors(mddev, linear_size(mddev, 0, 0));
-
-	blk_queue_merge_bvec(mddev->queue, linear_mergeable_bvec);
-	mddev->queue->backing_dev_info.congested_fn = linear_congested;
-	mddev->queue->backing_dev_info.congested_data = mddev;
-
-	ret =  md_integrity_register(mddev);
-	if (ret) {
-		kfree(conf);
-		mddev->private = NULL;
-	}
-	return ret;
-}
-
-static int linear_add(struct mddev *mddev, struct md_rdev *rdev)
-{
-	/* Adding a drive to a linear array allows the array to grow.
-	 * It is permitted if the new drive has a matching superblock
-	 * already on it, with raid_disk equal to raid_disks.
-	 * It is achieved by creating a new linear_private_data structure
-	 * and swapping it in in-place of the current one.
-	 * The current one is never freed until the array is stopped.
-	 * This avoids races.
-	 */
-	struct linear_conf *newconf, *oldconf;
-
-	if (rdev->saved_raid_disk != mddev->raid_disks)
-		return -EINVAL;
-
-	rdev->raid_disk = rdev->saved_raid_disk;
-	rdev->saved_raid_disk = -1;
-
-	newconf = linear_conf(mddev,mddev->raid_disks+1);
-
-	if (!newconf)
-		return -ENOMEM;
-
-	oldconf = rcu_dereference(mddev->private);
-	mddev->raid_disks++;
-	rcu_assign_pointer(mddev->private, newconf);
-	md_set_array_sectors(mddev, linear_size(mddev, 0, 0));
-	set_capacity(mddev->gendisk, mddev->array_sectors);
-	revalidate_disk(mddev->gendisk);
-	kfree_rcu(oldconf, rcu);
-	return 0;
-}
-
-static int linear_stop (struct mddev *mddev)
-{
-	struct linear_conf *conf = mddev->private;
-
-	/*
-	 * We do not require rcu protection here since
-	 * we hold reconfig_mutex for both linear_add and
-	 * linear_stop, so they cannot race.
-	 * We should make sure any old 'conf's are properly
-	 * freed though.
-	 */
-	rcu_barrier();
-	blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/
-	kfree(conf);
-	mddev->private = NULL;
-
-	return 0;
-}
-
-static void linear_make_request(struct mddev *mddev, struct bio *bio)
-{
-	struct dev_info *tmp_dev;
-	sector_t start_sector;
-
-	if (unlikely(bio->bi_rw & REQ_FLUSH)) {
-		md_flush_request(mddev, bio);
-		return;
-	}
-
-	rcu_read_lock();
-	tmp_dev = which_dev(mddev, bio->bi_sector);
-	start_sector = tmp_dev->end_sector - tmp_dev->rdev->sectors;
-
-
-	if (unlikely(bio->bi_sector >= (tmp_dev->end_sector)
-		     || (bio->bi_sector < start_sector))) {
-		char b[BDEVNAME_SIZE];
-
-		printk(KERN_ERR
-		       "md/linear:%s: make_request: Sector %llu out of bounds on "
-		       "dev %s: %llu sectors, offset %llu\n",
-		       mdname(mddev),
-		       (unsigned long long)bio->bi_sector,
-		       bdevname(tmp_dev->rdev->bdev, b),
-		       (unsigned long long)tmp_dev->rdev->sectors,
-		       (unsigned long long)start_sector);
-		rcu_read_unlock();
-		bio_io_error(bio);
-		return;
-	}
-	if (unlikely(bio->bi_sector + (bio->bi_size >> 9) >
-		     tmp_dev->end_sector)) {
-		/* This bio crosses a device boundary, so we have to
-		 * split it.
-		 */
-		struct bio_pair *bp;
-		sector_t end_sector = tmp_dev->end_sector;
-
-		rcu_read_unlock();
-
-		bp = bio_split(bio, end_sector - bio->bi_sector);
-
-		linear_make_request(mddev, &bp->bio1);
-		linear_make_request(mddev, &bp->bio2);
-		bio_pair_release(bp);
-		return;
-	}
-		    
-	bio->bi_bdev = tmp_dev->rdev->bdev;
-	bio->bi_sector = bio->bi_sector - start_sector
-		+ tmp_dev->rdev->data_offset;
-	rcu_read_unlock();
-	generic_make_request(bio);
-}
-
-static void linear_status (struct seq_file *seq, struct mddev *mddev)
-{
-
-	seq_printf(seq, " %dk rounding", mddev->chunk_sectors / 2);
-}
-
-
-static struct md_personality linear_personality =
-{
-	.name		= "linear",
-	.level		= LEVEL_LINEAR,
-	.owner		= THIS_MODULE,
-	.make_request	= linear_make_request,
-	.run		= linear_run,
-	.stop		= linear_stop,
-	.status		= linear_status,
-	.hot_add_disk	= linear_add,
-	.size		= linear_size,
-};
-
-static int __init linear_init (void)
-{
-	return register_md_personality (&linear_personality);
-}
-
-static void linear_exit (void)
-{
-	unregister_md_personality (&linear_personality);
-}
-
-
-module_init(linear_init);
-module_exit(linear_exit);
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("Linear device concatenation personality for MD");
-MODULE_ALIAS("md-personality-1"); /* LINEAR - deprecated*/
-MODULE_ALIAS("md-linear");
-MODULE_ALIAS("md-level--1");
diff --git a/ANDROID_3.4.5/drivers/md/linear.h b/ANDROID_3.4.5/drivers/md/linear.h
deleted file mode 100644
index b685ddd7..00000000
--- a/ANDROID_3.4.5/drivers/md/linear.h
+++ /dev/null
@@ -1,15 +0,0 @@
-#ifndef _LINEAR_H
-#define _LINEAR_H
-
-struct dev_info {
-	struct md_rdev	*rdev;
-	sector_t	end_sector;
-};
-
-struct linear_conf
-{
-	struct rcu_head		rcu;
-	sector_t		array_sectors;
-	struct dev_info		disks[0];
-};
-#endif
diff --git a/ANDROID_3.4.5/drivers/md/md.c b/ANDROID_3.4.5/drivers/md/md.c
deleted file mode 100644
index 2b30ffdb..00000000
--- a/ANDROID_3.4.5/drivers/md/md.c
+++ /dev/null
@@ -1,8342 +0,0 @@
-/*
-   md.c : Multiple Devices driver for Linux
-	  Copyright (C) 1998, 1999, 2000 Ingo Molnar
-
-     completely rewritten, based on the MD driver code from Marc Zyngier
-
-   Changes:
-
-   - RAID-1/RAID-5 extensions by Miguel de Icaza, Gadi Oxman, Ingo Molnar
-   - RAID-6 extensions by H. Peter Anvin <hpa@zytor.com>
-   - boot support for linear and striped mode by Harald Hoyer <HarryH@Royal.Net>
-   - kerneld support by Boris Tobotras <boris@xtalk.msk.su>
-   - kmod support by: Cyrus Durgin
-   - RAID0 bugfixes: Mark Anthony Lisher <markal@iname.com>
-   - Devfs support by Richard Gooch <rgooch@atnf.csiro.au>
-
-   - lots of fixes and improvements to the RAID1/RAID5 and generic
-     RAID code (such as request based resynchronization):
-
-     Neil Brown <neilb@cse.unsw.edu.au>.
-
-   - persistent bitmap code
-     Copyright (C) 2003-2004, Paul Clements, SteelEye Technology, Inc.
-
-   This program is free software; you can redistribute it and/or modify
-   it under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 2, or (at your option)
-   any later version.
-
-   You should have received a copy of the GNU General Public License
-   (for example /usr/src/linux/COPYING); if not, write to the Free
-   Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-*/
-
-#include <linux/kthread.h>
-#include <linux/blkdev.h>
-#include <linux/sysctl.h>
-#include <linux/seq_file.h>
-#include <linux/fs.h>
-#include <linux/poll.h>
-#include <linux/ctype.h>
-#include <linux/string.h>
-#include <linux/hdreg.h>
-#include <linux/proc_fs.h>
-#include <linux/random.h>
-#include <linux/module.h>
-#include <linux/reboot.h>
-#include <linux/file.h>
-#include <linux/compat.h>
-#include <linux/delay.h>
-#include <linux/raid/md_p.h>
-#include <linux/raid/md_u.h>
-#include <linux/slab.h>
-#include "md.h"
-#include "bitmap.h"
-
-#ifndef MODULE
-static void autostart_arrays(int part);
-#endif
-
-/* pers_list is a list of registered personalities protected
- * by pers_lock.
- * pers_lock does extra service to protect accesses to
- * mddev->thread when the mutex cannot be held.
- */
-static LIST_HEAD(pers_list);
-static DEFINE_SPINLOCK(pers_lock);
-
-static void md_print_devices(void);
-
-static DECLARE_WAIT_QUEUE_HEAD(resync_wait);
-static struct workqueue_struct *md_wq;
-static struct workqueue_struct *md_misc_wq;
-
-#define MD_BUG(x...) { printk("md: bug in file %s, line %d\n", __FILE__, __LINE__); md_print_devices(); }
-
-/*
- * Default number of read corrections we'll attempt on an rdev
- * before ejecting it from the array. We divide the read error
- * count by 2 for every hour elapsed between read errors.
- */
-#define MD_DEFAULT_MAX_CORRECTED_READ_ERRORS 20
-/*
- * Current RAID-1,4,5 parallel reconstruction 'guaranteed speed limit'
- * is 1000 KB/sec, so the extra system load does not show up that much.
- * Increase it if you want to have more _guaranteed_ speed. Note that
- * the RAID driver will use the maximum available bandwidth if the IO
- * subsystem is idle. There is also an 'absolute maximum' reconstruction
- * speed limit - in case reconstruction slows down your system despite
- * idle IO detection.
- *
- * you can change it via /proc/sys/dev/raid/speed_limit_min and _max.
- * or /sys/block/mdX/md/sync_speed_{min,max}
- */
-
-static int sysctl_speed_limit_min = 1000;
-static int sysctl_speed_limit_max = 200000;
-static inline int speed_min(struct mddev *mddev)
-{
-	return mddev->sync_speed_min ?
-		mddev->sync_speed_min : sysctl_speed_limit_min;
-}
-
-static inline int speed_max(struct mddev *mddev)
-{
-	return mddev->sync_speed_max ?
-		mddev->sync_speed_max : sysctl_speed_limit_max;
-}
-
-static struct ctl_table_header *raid_table_header;
-
-static ctl_table raid_table[] = {
-	{
-		.procname	= "speed_limit_min",
-		.data		= &sysctl_speed_limit_min,
-		.maxlen		= sizeof(int),
-		.mode		= S_IRUGO|S_IWUSR,
-		.proc_handler	= proc_dointvec,
-	},
-	{
-		.procname	= "speed_limit_max",
-		.data		= &sysctl_speed_limit_max,
-		.maxlen		= sizeof(int),
-		.mode		= S_IRUGO|S_IWUSR,
-		.proc_handler	= proc_dointvec,
-	},
-	{ }
-};
-
-static ctl_table raid_dir_table[] = {
-	{
-		.procname	= "raid",
-		.maxlen		= 0,
-		.mode		= S_IRUGO|S_IXUGO,
-		.child		= raid_table,
-	},
-	{ }
-};
-
-static ctl_table raid_root_table[] = {
-	{
-		.procname	= "dev",
-		.maxlen		= 0,
-		.mode		= 0555,
-		.child		= raid_dir_table,
-	},
-	{  }
-};
-
-static const struct block_device_operations md_fops;
-
-static int start_readonly;
-
-/* bio_clone_mddev
- * like bio_clone, but with a local bio set
- */
-
-static void mddev_bio_destructor(struct bio *bio)
-{
-	struct mddev *mddev, **mddevp;
-
-	mddevp = (void*)bio;
-	mddev = mddevp[-1];
-
-	bio_free(bio, mddev->bio_set);
-}
-
-struct bio *bio_alloc_mddev(gfp_t gfp_mask, int nr_iovecs,
-			    struct mddev *mddev)
-{
-	struct bio *b;
-	struct mddev **mddevp;
-
-	if (!mddev || !mddev->bio_set)
-		return bio_alloc(gfp_mask, nr_iovecs);
-
-	b = bio_alloc_bioset(gfp_mask, nr_iovecs,
-			     mddev->bio_set);
-	if (!b)
-		return NULL;
-	mddevp = (void*)b;
-	mddevp[-1] = mddev;
-	b->bi_destructor = mddev_bio_destructor;
-	return b;
-}
-EXPORT_SYMBOL_GPL(bio_alloc_mddev);
-
-struct bio *bio_clone_mddev(struct bio *bio, gfp_t gfp_mask,
-			    struct mddev *mddev)
-{
-	struct bio *b;
-	struct mddev **mddevp;
-
-	if (!mddev || !mddev->bio_set)
-		return bio_clone(bio, gfp_mask);
-
-	b = bio_alloc_bioset(gfp_mask, bio->bi_max_vecs,
-			     mddev->bio_set);
-	if (!b)
-		return NULL;
-	mddevp = (void*)b;
-	mddevp[-1] = mddev;
-	b->bi_destructor = mddev_bio_destructor;
-	__bio_clone(b, bio);
-	if (bio_integrity(bio)) {
-		int ret;
-
-		ret = bio_integrity_clone(b, bio, gfp_mask, mddev->bio_set);
-
-		if (ret < 0) {
-			bio_put(b);
-			return NULL;
-		}
-	}
-
-	return b;
-}
-EXPORT_SYMBOL_GPL(bio_clone_mddev);
-
-void md_trim_bio(struct bio *bio, int offset, int size)
-{
-	/* 'bio' is a cloned bio which we need to trim to match
-	 * the given offset and size.
-	 * This requires adjusting bi_sector, bi_size, and bi_io_vec
-	 */
-	int i;
-	struct bio_vec *bvec;
-	int sofar = 0;
-
-	size <<= 9;
-	if (offset == 0 && size == bio->bi_size)
-		return;
-
-	bio->bi_sector += offset;
-	bio->bi_size = size;
-	offset <<= 9;
-	clear_bit(BIO_SEG_VALID, &bio->bi_flags);
-
-	while (bio->bi_idx < bio->bi_vcnt &&
-	       bio->bi_io_vec[bio->bi_idx].bv_len <= offset) {
-		/* remove this whole bio_vec */
-		offset -= bio->bi_io_vec[bio->bi_idx].bv_len;
-		bio->bi_idx++;
-	}
-	if (bio->bi_idx < bio->bi_vcnt) {
-		bio->bi_io_vec[bio->bi_idx].bv_offset += offset;
-		bio->bi_io_vec[bio->bi_idx].bv_len -= offset;
-	}
-	/* avoid any complications with bi_idx being non-zero*/
-	if (bio->bi_idx) {
-		memmove(bio->bi_io_vec, bio->bi_io_vec+bio->bi_idx,
-			(bio->bi_vcnt - bio->bi_idx) * sizeof(struct bio_vec));
-		bio->bi_vcnt -= bio->bi_idx;
-		bio->bi_idx = 0;
-	}
-	/* Make sure vcnt and last bv are not too big */
-	bio_for_each_segment(bvec, bio, i) {
-		if (sofar + bvec->bv_len > size)
-			bvec->bv_len = size - sofar;
-		if (bvec->bv_len == 0) {
-			bio->bi_vcnt = i;
-			break;
-		}
-		sofar += bvec->bv_len;
-	}
-}
-EXPORT_SYMBOL_GPL(md_trim_bio);
-
-/*
- * We have a system wide 'event count' that is incremented
- * on any 'interesting' event, and readers of /proc/mdstat
- * can use 'poll' or 'select' to find out when the event
- * count increases.
- *
- * Events are:
- *  start array, stop array, error, add device, remove device,
- *  start build, activate spare
- */
-static DECLARE_WAIT_QUEUE_HEAD(md_event_waiters);
-static atomic_t md_event_count;
-void md_new_event(struct mddev *mddev)
-{
-	atomic_inc(&md_event_count);
-	wake_up(&md_event_waiters);
-}
-EXPORT_SYMBOL_GPL(md_new_event);
-
-/* Alternate version that can be called from interrupts
- * when calling sysfs_notify isn't needed.
- */
-static void md_new_event_inintr(struct mddev *mddev)
-{
-	atomic_inc(&md_event_count);
-	wake_up(&md_event_waiters);
-}
-
-/*
- * Enables to iterate over all existing md arrays
- * all_mddevs_lock protects this list.
- */
-static LIST_HEAD(all_mddevs);
-static DEFINE_SPINLOCK(all_mddevs_lock);
-
-
-/*
- * iterates through all used mddevs in the system.
- * We take care to grab the all_mddevs_lock whenever navigating
- * the list, and to always hold a refcount when unlocked.
- * Any code which breaks out of this loop while own
- * a reference to the current mddev and must mddev_put it.
- */
-#define for_each_mddev(_mddev,_tmp)					\
-									\
-	for (({ spin_lock(&all_mddevs_lock); 				\
-		_tmp = all_mddevs.next;					\
-		_mddev = NULL;});					\
-	     ({ if (_tmp != &all_mddevs)				\
-			mddev_get(list_entry(_tmp, struct mddev, all_mddevs));\
-		spin_unlock(&all_mddevs_lock);				\
-		if (_mddev) mddev_put(_mddev);				\
-		_mddev = list_entry(_tmp, struct mddev, all_mddevs);	\
-		_tmp != &all_mddevs;});					\
-	     ({ spin_lock(&all_mddevs_lock);				\
-		_tmp = _tmp->next;})					\
-		)
-
-
-/* Rather than calling directly into the personality make_request function,
- * IO requests come here first so that we can check if the device is
- * being suspended pending a reconfiguration.
- * We hold a refcount over the call to ->make_request.  By the time that
- * call has finished, the bio has been linked into some internal structure
- * and so is visible to ->quiesce(), so we don't need the refcount any more.
- */
-static void md_make_request(struct request_queue *q, struct bio *bio)
-{
-	const int rw = bio_data_dir(bio);
-	struct mddev *mddev = q->queuedata;
-	int cpu;
-	unsigned int sectors;
-
-	if (mddev == NULL || mddev->pers == NULL
-	    || !mddev->ready) {
-		bio_io_error(bio);
-		return;
-	}
-	smp_rmb(); /* Ensure implications of  'active' are visible */
-	rcu_read_lock();
-	if (mddev->suspended) {
-		DEFINE_WAIT(__wait);
-		for (;;) {
-			prepare_to_wait(&mddev->sb_wait, &__wait,
-					TASK_UNINTERRUPTIBLE);
-			if (!mddev->suspended)
-				break;
-			rcu_read_unlock();
-			schedule();
-			rcu_read_lock();
-		}
-		finish_wait(&mddev->sb_wait, &__wait);
-	}
-	atomic_inc(&mddev->active_io);
-	rcu_read_unlock();
-
-	/*
-	 * save the sectors now since our bio can
-	 * go away inside make_request
-	 */
-	sectors = bio_sectors(bio);
-	mddev->pers->make_request(mddev, bio);
-
-	cpu = part_stat_lock();
-	part_stat_inc(cpu, &mddev->gendisk->part0, ios[rw]);
-	part_stat_add(cpu, &mddev->gendisk->part0, sectors[rw], sectors);
-	part_stat_unlock();
-
-	if (atomic_dec_and_test(&mddev->active_io) && mddev->suspended)
-		wake_up(&mddev->sb_wait);
-}
-
-/* mddev_suspend makes sure no new requests are submitted
- * to the device, and that any requests that have been submitted
- * are completely handled.
- * Once ->stop is called and completes, the module will be completely
- * unused.
- */
-void mddev_suspend(struct mddev *mddev)
-{
-	BUG_ON(mddev->suspended);
-	mddev->suspended = 1;
-	synchronize_rcu();
-	wait_event(mddev->sb_wait, atomic_read(&mddev->active_io) == 0);
-	mddev->pers->quiesce(mddev, 1);
-
-	del_timer_sync(&mddev->safemode_timer);
-}
-EXPORT_SYMBOL_GPL(mddev_suspend);
-
-void mddev_resume(struct mddev *mddev)
-{
-	mddev->suspended = 0;
-	wake_up(&mddev->sb_wait);
-	mddev->pers->quiesce(mddev, 0);
-
-	md_wakeup_thread(mddev->thread);
-	md_wakeup_thread(mddev->sync_thread); /* possibly kick off a reshape */
-}
-EXPORT_SYMBOL_GPL(mddev_resume);
-
-int mddev_congested(struct mddev *mddev, int bits)
-{
-	return mddev->suspended;
-}
-EXPORT_SYMBOL(mddev_congested);
-
-/*
- * Generic flush handling for md
- */
-
-static void md_end_flush(struct bio *bio, int err)
-{
-	struct md_rdev *rdev = bio->bi_private;
-	struct mddev *mddev = rdev->mddev;
-
-	rdev_dec_pending(rdev, mddev);
-
-	if (atomic_dec_and_test(&mddev->flush_pending)) {
-		/* The pre-request flush has finished */
-		queue_work(md_wq, &mddev->flush_work);
-	}
-	bio_put(bio);
-}
-
-static void md_submit_flush_data(struct work_struct *ws);
-
-static void submit_flushes(struct work_struct *ws)
-{
-	struct mddev *mddev = container_of(ws, struct mddev, flush_work);
-	struct md_rdev *rdev;
-
-	INIT_WORK(&mddev->flush_work, md_submit_flush_data);
-	atomic_set(&mddev->flush_pending, 1);
-	rcu_read_lock();
-	rdev_for_each_rcu(rdev, mddev)
-		if (rdev->raid_disk >= 0 &&
-		    !test_bit(Faulty, &rdev->flags)) {
-			/* Take two references, one is dropped
-			 * when request finishes, one after
-			 * we reclaim rcu_read_lock
-			 */
-			struct bio *bi;
-			atomic_inc(&rdev->nr_pending);
-			atomic_inc(&rdev->nr_pending);
-			rcu_read_unlock();
-			bi = bio_alloc_mddev(GFP_NOIO, 0, mddev);
-			bi->bi_end_io = md_end_flush;
-			bi->bi_private = rdev;
-			bi->bi_bdev = rdev->bdev;
-			atomic_inc(&mddev->flush_pending);
-			submit_bio(WRITE_FLUSH, bi);
-			rcu_read_lock();
-			rdev_dec_pending(rdev, mddev);
-		}
-	rcu_read_unlock();
-	if (atomic_dec_and_test(&mddev->flush_pending))
-		queue_work(md_wq, &mddev->flush_work);
-}
-
-static void md_submit_flush_data(struct work_struct *ws)
-{
-	struct mddev *mddev = container_of(ws, struct mddev, flush_work);
-	struct bio *bio = mddev->flush_bio;
-
-	if (bio->bi_size == 0)
-		/* an empty barrier - all done */
-		bio_endio(bio, 0);
-	else {
-		bio->bi_rw &= ~REQ_FLUSH;
-		mddev->pers->make_request(mddev, bio);
-	}
-
-	mddev->flush_bio = NULL;
-	wake_up(&mddev->sb_wait);
-}
-
-void md_flush_request(struct mddev *mddev, struct bio *bio)
-{
-	spin_lock_irq(&mddev->write_lock);
-	wait_event_lock_irq(mddev->sb_wait,
-			    !mddev->flush_bio,
-			    mddev->write_lock, /*nothing*/);
-	mddev->flush_bio = bio;
-	spin_unlock_irq(&mddev->write_lock);
-
-	INIT_WORK(&mddev->flush_work, submit_flushes);
-	queue_work(md_wq, &mddev->flush_work);
-}
-EXPORT_SYMBOL(md_flush_request);
-
-/* Support for plugging.
- * This mirrors the plugging support in request_queue, but does not
- * require having a whole queue or request structures.
- * We allocate an md_plug_cb for each md device and each thread it gets
- * plugged on.  This links tot the private plug_handle structure in the
- * personality data where we keep a count of the number of outstanding
- * plugs so other code can see if a plug is active.
- */
-struct md_plug_cb {
-	struct blk_plug_cb cb;
-	struct mddev *mddev;
-};
-
-static void plugger_unplug(struct blk_plug_cb *cb)
-{
-	struct md_plug_cb *mdcb = container_of(cb, struct md_plug_cb, cb);
-	if (atomic_dec_and_test(&mdcb->mddev->plug_cnt))
-		md_wakeup_thread(mdcb->mddev->thread);
-	kfree(mdcb);
-}
-
-/* Check that an unplug wakeup will come shortly.
- * If not, wakeup the md thread immediately
- */
-int mddev_check_plugged(struct mddev *mddev)
-{
-	struct blk_plug *plug = current->plug;
-	struct md_plug_cb *mdcb;
-
-	if (!plug)
-		return 0;
-
-	list_for_each_entry(mdcb, &plug->cb_list, cb.list) {
-		if (mdcb->cb.callback == plugger_unplug &&
-		    mdcb->mddev == mddev) {
-			/* Already on the list, move to top */
-			if (mdcb != list_first_entry(&plug->cb_list,
-						    struct md_plug_cb,
-						    cb.list))
-				list_move(&mdcb->cb.list, &plug->cb_list);
-			return 1;
-		}
-	}
-	/* Not currently on the callback list */
-	mdcb = kmalloc(sizeof(*mdcb), GFP_ATOMIC);
-	if (!mdcb)
-		return 0;
-
-	mdcb->mddev = mddev;
-	mdcb->cb.callback = plugger_unplug;
-	atomic_inc(&mddev->plug_cnt);
-	list_add(&mdcb->cb.list, &plug->cb_list);
-	return 1;
-}
-EXPORT_SYMBOL_GPL(mddev_check_plugged);
-
-static inline struct mddev *mddev_get(struct mddev *mddev)
-{
-	atomic_inc(&mddev->active);
-	return mddev;
-}
-
-static void mddev_delayed_delete(struct work_struct *ws);
-
-static void mddev_put(struct mddev *mddev)
-{
-	struct bio_set *bs = NULL;
-
-	if (!atomic_dec_and_lock(&mddev->active, &all_mddevs_lock))
-		return;
-	if (!mddev->raid_disks && list_empty(&mddev->disks) &&
-	    mddev->ctime == 0 && !mddev->hold_active) {
-		/* Array is not configured at all, and not held active,
-		 * so destroy it */
-		list_del_init(&mddev->all_mddevs);
-		bs = mddev->bio_set;
-		mddev->bio_set = NULL;
-		if (mddev->gendisk) {
-			/* We did a probe so need to clean up.  Call
-			 * queue_work inside the spinlock so that
-			 * flush_workqueue() after mddev_find will
-			 * succeed in waiting for the work to be done.
-			 */
-			INIT_WORK(&mddev->del_work, mddev_delayed_delete);
-			queue_work(md_misc_wq, &mddev->del_work);
-		} else
-			kfree(mddev);
-	}
-	spin_unlock(&all_mddevs_lock);
-	if (bs)
-		bioset_free(bs);
-}
-
-void mddev_init(struct mddev *mddev)
-{
-	mutex_init(&mddev->open_mutex);
-	mutex_init(&mddev->reconfig_mutex);
-	mutex_init(&mddev->bitmap_info.mutex);
-	INIT_LIST_HEAD(&mddev->disks);
-	INIT_LIST_HEAD(&mddev->all_mddevs);
-	init_timer(&mddev->safemode_timer);
-	atomic_set(&mddev->active, 1);
-	atomic_set(&mddev->openers, 0);
-	atomic_set(&mddev->active_io, 0);
-	atomic_set(&mddev->plug_cnt, 0);
-	spin_lock_init(&mddev->write_lock);
-	atomic_set(&mddev->flush_pending, 0);
-	init_waitqueue_head(&mddev->sb_wait);
-	init_waitqueue_head(&mddev->recovery_wait);
-	mddev->reshape_position = MaxSector;
-	mddev->resync_min = 0;
-	mddev->resync_max = MaxSector;
-	mddev->level = LEVEL_NONE;
-}
-EXPORT_SYMBOL_GPL(mddev_init);
-
-static struct mddev * mddev_find(dev_t unit)
-{
-	struct mddev *mddev, *new = NULL;
-
-	if (unit && MAJOR(unit) != MD_MAJOR)
-		unit &= ~((1<<MdpMinorShift)-1);
-
- retry:
-	spin_lock(&all_mddevs_lock);
-
-	if (unit) {
-		list_for_each_entry(mddev, &all_mddevs, all_mddevs)
-			if (mddev->unit == unit) {
-				mddev_get(mddev);
-				spin_unlock(&all_mddevs_lock);
-				kfree(new);
-				return mddev;
-			}
-
-		if (new) {
-			list_add(&new->all_mddevs, &all_mddevs);
-			spin_unlock(&all_mddevs_lock);
-			new->hold_active = UNTIL_IOCTL;
-			return new;
-		}
-	} else if (new) {
-		/* find an unused unit number */
-		static int next_minor = 512;
-		int start = next_minor;
-		int is_free = 0;
-		int dev = 0;
-		while (!is_free) {
-			dev = MKDEV(MD_MAJOR, next_minor);
-			next_minor++;
-			if (next_minor > MINORMASK)
-				next_minor = 0;
-			if (next_minor == start) {
-				/* Oh dear, all in use. */
-				spin_unlock(&all_mddevs_lock);
-				kfree(new);
-				return NULL;
-			}
-				
-			is_free = 1;
-			list_for_each_entry(mddev, &all_mddevs, all_mddevs)
-				if (mddev->unit == dev) {
-					is_free = 0;
-					break;
-				}
-		}
-		new->unit = dev;
-		new->md_minor = MINOR(dev);
-		new->hold_active = UNTIL_STOP;
-		list_add(&new->all_mddevs, &all_mddevs);
-		spin_unlock(&all_mddevs_lock);
-		return new;
-	}
-	spin_unlock(&all_mddevs_lock);
-
-	new = kzalloc(sizeof(*new), GFP_KERNEL);
-	if (!new)
-		return NULL;
-
-	new->unit = unit;
-	if (MAJOR(unit) == MD_MAJOR)
-		new->md_minor = MINOR(unit);
-	else
-		new->md_minor = MINOR(unit) >> MdpMinorShift;
-
-	mddev_init(new);
-
-	goto retry;
-}
-
-static inline int mddev_lock(struct mddev * mddev)
-{
-	return mutex_lock_interruptible(&mddev->reconfig_mutex);
-}
-
-static inline int mddev_is_locked(struct mddev *mddev)
-{
-	return mutex_is_locked(&mddev->reconfig_mutex);
-}
-
-static inline int mddev_trylock(struct mddev * mddev)
-{
-	return mutex_trylock(&mddev->reconfig_mutex);
-}
-
-static struct attribute_group md_redundancy_group;
-
-static void mddev_unlock(struct mddev * mddev)
-{
-	if (mddev->to_remove) {
-		/* These cannot be removed under reconfig_mutex as
-		 * an access to the files will try to take reconfig_mutex
-		 * while holding the file unremovable, which leads to
-		 * a deadlock.
-		 * So hold set sysfs_active while the remove in happeing,
-		 * and anything else which might set ->to_remove or my
-		 * otherwise change the sysfs namespace will fail with
-		 * -EBUSY if sysfs_active is still set.
-		 * We set sysfs_active under reconfig_mutex and elsewhere
-		 * test it under the same mutex to ensure its correct value
-		 * is seen.
-		 */
-		struct attribute_group *to_remove = mddev->to_remove;
-		mddev->to_remove = NULL;
-		mddev->sysfs_active = 1;
-		mutex_unlock(&mddev->reconfig_mutex);
-
-		if (mddev->kobj.sd) {
-			if (to_remove != &md_redundancy_group)
-				sysfs_remove_group(&mddev->kobj, to_remove);
-			if (mddev->pers == NULL ||
-			    mddev->pers->sync_request == NULL) {
-				sysfs_remove_group(&mddev->kobj, &md_redundancy_group);
-				if (mddev->sysfs_action)
-					sysfs_put(mddev->sysfs_action);
-				mddev->sysfs_action = NULL;
-			}
-		}
-		mddev->sysfs_active = 0;
-	} else
-		mutex_unlock(&mddev->reconfig_mutex);
-
-	/* As we've dropped the mutex we need a spinlock to
-	 * make sure the thread doesn't disappear
-	 */
-	spin_lock(&pers_lock);
-	md_wakeup_thread(mddev->thread);
-	spin_unlock(&pers_lock);
-}
-
-static struct md_rdev * find_rdev_nr(struct mddev *mddev, int nr)
-{
-	struct md_rdev *rdev;
-
-	rdev_for_each(rdev, mddev)
-		if (rdev->desc_nr == nr)
-			return rdev;
-
-	return NULL;
-}
-
-static struct md_rdev * find_rdev(struct mddev * mddev, dev_t dev)
-{
-	struct md_rdev *rdev;
-
-	rdev_for_each(rdev, mddev)
-		if (rdev->bdev->bd_dev == dev)
-			return rdev;
-
-	return NULL;
-}
-
-static struct md_personality *find_pers(int level, char *clevel)
-{
-	struct md_personality *pers;
-	list_for_each_entry(pers, &pers_list, list) {
-		if (level != LEVEL_NONE && pers->level == level)
-			return pers;
-		if (strcmp(pers->name, clevel)==0)
-			return pers;
-	}
-	return NULL;
-}
-
-/* return the offset of the super block in 512byte sectors */
-static inline sector_t calc_dev_sboffset(struct md_rdev *rdev)
-{
-	sector_t num_sectors = i_size_read(rdev->bdev->bd_inode) / 512;
-	return MD_NEW_SIZE_SECTORS(num_sectors);
-}
-
-static int alloc_disk_sb(struct md_rdev * rdev)
-{
-	if (rdev->sb_page)
-		MD_BUG();
-
-	rdev->sb_page = alloc_page(GFP_KERNEL);
-	if (!rdev->sb_page) {
-		printk(KERN_ALERT "md: out of memory.\n");
-		return -ENOMEM;
-	}
-
-	return 0;
-}
-
-static void free_disk_sb(struct md_rdev * rdev)
-{
-	if (rdev->sb_page) {
-		put_page(rdev->sb_page);
-		rdev->sb_loaded = 0;
-		rdev->sb_page = NULL;
-		rdev->sb_start = 0;
-		rdev->sectors = 0;
-	}
-	if (rdev->bb_page) {
-		put_page(rdev->bb_page);
-		rdev->bb_page = NULL;
-	}
-}
-
-
-static void super_written(struct bio *bio, int error)
-{
-	struct md_rdev *rdev = bio->bi_private;
-	struct mddev *mddev = rdev->mddev;
-
-	if (error || !test_bit(BIO_UPTODATE, &bio->bi_flags)) {
-		printk("md: super_written gets error=%d, uptodate=%d\n",
-		       error, test_bit(BIO_UPTODATE, &bio->bi_flags));
-		WARN_ON(test_bit(BIO_UPTODATE, &bio->bi_flags));
-		md_error(mddev, rdev);
-	}
-
-	if (atomic_dec_and_test(&mddev->pending_writes))
-		wake_up(&mddev->sb_wait);
-	bio_put(bio);
-}
-
-void md_super_write(struct mddev *mddev, struct md_rdev *rdev,
-		   sector_t sector, int size, struct page *page)
-{
-	/* write first size bytes of page to sector of rdev
-	 * Increment mddev->pending_writes before returning
-	 * and decrement it on completion, waking up sb_wait
-	 * if zero is reached.
-	 * If an error occurred, call md_error
-	 */
-	struct bio *bio = bio_alloc_mddev(GFP_NOIO, 1, mddev);
-
-	bio->bi_bdev = rdev->meta_bdev ? rdev->meta_bdev : rdev->bdev;
-	bio->bi_sector = sector;
-	bio_add_page(bio, page, size, 0);
-	bio->bi_private = rdev;
-	bio->bi_end_io = super_written;
-
-	atomic_inc(&mddev->pending_writes);
-	submit_bio(WRITE_FLUSH_FUA, bio);
-}
-
-void md_super_wait(struct mddev *mddev)
-{
-	/* wait for all superblock writes that were scheduled to complete */
-	DEFINE_WAIT(wq);
-	for(;;) {
-		prepare_to_wait(&mddev->sb_wait, &wq, TASK_UNINTERRUPTIBLE);
-		if (atomic_read(&mddev->pending_writes)==0)
-			break;
-		schedule();
-	}
-	finish_wait(&mddev->sb_wait, &wq);
-}
-
-static void bi_complete(struct bio *bio, int error)
-{
-	complete((struct completion*)bio->bi_private);
-}
-
-int sync_page_io(struct md_rdev *rdev, sector_t sector, int size,
-		 struct page *page, int rw, bool metadata_op)
-{
-	struct bio *bio = bio_alloc_mddev(GFP_NOIO, 1, rdev->mddev);
-	struct completion event;
-	int ret;
-
-	rw |= REQ_SYNC;
-
-	bio->bi_bdev = (metadata_op && rdev->meta_bdev) ?
-		rdev->meta_bdev : rdev->bdev;
-	if (metadata_op)
-		bio->bi_sector = sector + rdev->sb_start;
-	else
-		bio->bi_sector = sector + rdev->data_offset;
-	bio_add_page(bio, page, size, 0);
-	init_completion(&event);
-	bio->bi_private = &event;
-	bio->bi_end_io = bi_complete;
-	submit_bio(rw, bio);
-	wait_for_completion(&event);
-
-	ret = test_bit(BIO_UPTODATE, &bio->bi_flags);
-	bio_put(bio);
-	return ret;
-}
-EXPORT_SYMBOL_GPL(sync_page_io);
-
-static int read_disk_sb(struct md_rdev * rdev, int size)
-{
-	char b[BDEVNAME_SIZE];
-	if (!rdev->sb_page) {
-		MD_BUG();
-		return -EINVAL;
-	}
-	if (rdev->sb_loaded)
-		return 0;
-
-
-	if (!sync_page_io(rdev, 0, size, rdev->sb_page, READ, true))
-		goto fail;
-	rdev->sb_loaded = 1;
-	return 0;
-
-fail:
-	printk(KERN_WARNING "md: disabled device %s, could not read superblock.\n",
-		bdevname(rdev->bdev,b));
-	return -EINVAL;
-}
-
-static int uuid_equal(mdp_super_t *sb1, mdp_super_t *sb2)
-{
-	return 	sb1->set_uuid0 == sb2->set_uuid0 &&
-		sb1->set_uuid1 == sb2->set_uuid1 &&
-		sb1->set_uuid2 == sb2->set_uuid2 &&
-		sb1->set_uuid3 == sb2->set_uuid3;
-}
-
-static int sb_equal(mdp_super_t *sb1, mdp_super_t *sb2)
-{
-	int ret;
-	mdp_super_t *tmp1, *tmp2;
-
-	tmp1 = kmalloc(sizeof(*tmp1),GFP_KERNEL);
-	tmp2 = kmalloc(sizeof(*tmp2),GFP_KERNEL);
-
-	if (!tmp1 || !tmp2) {
-		ret = 0;
-		printk(KERN_INFO "md.c sb_equal(): failed to allocate memory!\n");
-		goto abort;
-	}
-
-	*tmp1 = *sb1;
-	*tmp2 = *sb2;
-
-	/*
-	 * nr_disks is not constant
-	 */
-	tmp1->nr_disks = 0;
-	tmp2->nr_disks = 0;
-
-	ret = (memcmp(tmp1, tmp2, MD_SB_GENERIC_CONSTANT_WORDS * 4) == 0);
-abort:
-	kfree(tmp1);
-	kfree(tmp2);
-	return ret;
-}
-
-
-static u32 md_csum_fold(u32 csum)
-{
-	csum = (csum & 0xffff) + (csum >> 16);
-	return (csum & 0xffff) + (csum >> 16);
-}
-
-static unsigned int calc_sb_csum(mdp_super_t * sb)
-{
-	u64 newcsum = 0;
-	u32 *sb32 = (u32*)sb;
-	int i;
-	unsigned int disk_csum, csum;
-
-	disk_csum = sb->sb_csum;
-	sb->sb_csum = 0;
-
-	for (i = 0; i < MD_SB_BYTES/4 ; i++)
-		newcsum += sb32[i];
-	csum = (newcsum & 0xffffffff) + (newcsum>>32);
-
-
-#ifdef CONFIG_ALPHA
-	/* This used to use csum_partial, which was wrong for several
-	 * reasons including that different results are returned on
-	 * different architectures.  It isn't critical that we get exactly
-	 * the same return value as before (we always csum_fold before
-	 * testing, and that removes any differences).  However as we
-	 * know that csum_partial always returned a 16bit value on
-	 * alphas, do a fold to maximise conformity to previous behaviour.
-	 */
-	sb->sb_csum = md_csum_fold(disk_csum);
-#else
-	sb->sb_csum = disk_csum;
-#endif
-	return csum;
-}
-
-
-/*
- * Handle superblock details.
- * We want to be able to handle multiple superblock formats
- * so we have a common interface to them all, and an array of
- * different handlers.
- * We rely on user-space to write the initial superblock, and support
- * reading and updating of superblocks.
- * Interface methods are:
- *   int load_super(struct md_rdev *dev, struct md_rdev *refdev, int minor_version)
- *      loads and validates a superblock on dev.
- *      if refdev != NULL, compare superblocks on both devices
- *    Return:
- *      0 - dev has a superblock that is compatible with refdev
- *      1 - dev has a superblock that is compatible and newer than refdev
- *          so dev should be used as the refdev in future
- *     -EINVAL superblock incompatible or invalid
- *     -othererror e.g. -EIO
- *
- *   int validate_super(struct mddev *mddev, struct md_rdev *dev)
- *      Verify that dev is acceptable into mddev.
- *       The first time, mddev->raid_disks will be 0, and data from
- *       dev should be merged in.  Subsequent calls check that dev
- *       is new enough.  Return 0 or -EINVAL
- *
- *   void sync_super(struct mddev *mddev, struct md_rdev *dev)
- *     Update the superblock for rdev with data in mddev
- *     This does not write to disc.
- *
- */
-
-struct super_type  {
-	char		    *name;
-	struct module	    *owner;
-	int		    (*load_super)(struct md_rdev *rdev, struct md_rdev *refdev,
-					  int minor_version);
-	int		    (*validate_super)(struct mddev *mddev, struct md_rdev *rdev);
-	void		    (*sync_super)(struct mddev *mddev, struct md_rdev *rdev);
-	unsigned long long  (*rdev_size_change)(struct md_rdev *rdev,
-						sector_t num_sectors);
-};
-
-/*
- * Check that the given mddev has no bitmap.
- *
- * This function is called from the run method of all personalities that do not
- * support bitmaps. It prints an error message and returns non-zero if mddev
- * has a bitmap. Otherwise, it returns 0.
- *
- */
-int md_check_no_bitmap(struct mddev *mddev)
-{
-	if (!mddev->bitmap_info.file && !mddev->bitmap_info.offset)
-		return 0;
-	printk(KERN_ERR "%s: bitmaps are not supported for %s\n",
-		mdname(mddev), mddev->pers->name);
-	return 1;
-}
-EXPORT_SYMBOL(md_check_no_bitmap);
-
-/*
- * load_super for 0.90.0 
- */
-static int super_90_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_version)
-{
-	char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
-	mdp_super_t *sb;
-	int ret;
-
-	/*
-	 * Calculate the position of the superblock (512byte sectors),
-	 * it's at the end of the disk.
-	 *
-	 * It also happens to be a multiple of 4Kb.
-	 */
-	rdev->sb_start = calc_dev_sboffset(rdev);
-
-	ret = read_disk_sb(rdev, MD_SB_BYTES);
-	if (ret) return ret;
-
-	ret = -EINVAL;
-
-	bdevname(rdev->bdev, b);
-	sb = page_address(rdev->sb_page);
-
-	if (sb->md_magic != MD_SB_MAGIC) {
-		printk(KERN_ERR "md: invalid raid superblock magic on %s\n",
-		       b);
-		goto abort;
-	}
-
-	if (sb->major_version != 0 ||
-	    sb->minor_version < 90 ||
-	    sb->minor_version > 91) {
-		printk(KERN_WARNING "Bad version number %d.%d on %s\n",
-			sb->major_version, sb->minor_version,
-			b);
-		goto abort;
-	}
-
-	if (sb->raid_disks <= 0)
-		goto abort;
-
-	if (md_csum_fold(calc_sb_csum(sb)) != md_csum_fold(sb->sb_csum)) {
-		printk(KERN_WARNING "md: invalid superblock checksum on %s\n",
-			b);
-		goto abort;
-	}
-
-	rdev->preferred_minor = sb->md_minor;
-	rdev->data_offset = 0;
-	rdev->sb_size = MD_SB_BYTES;
-	rdev->badblocks.shift = -1;
-
-	if (sb->level == LEVEL_MULTIPATH)
-		rdev->desc_nr = -1;
-	else
-		rdev->desc_nr = sb->this_disk.number;
-
-	if (!refdev) {
-		ret = 1;
-	} else {
-		__u64 ev1, ev2;
-		mdp_super_t *refsb = page_address(refdev->sb_page);
-		if (!uuid_equal(refsb, sb)) {
-			printk(KERN_WARNING "md: %s has different UUID to %s\n",
-				b, bdevname(refdev->bdev,b2));
-			goto abort;
-		}
-		if (!sb_equal(refsb, sb)) {
-			printk(KERN_WARNING "md: %s has same UUID"
-			       " but different superblock to %s\n",
-			       b, bdevname(refdev->bdev, b2));
-			goto abort;
-		}
-		ev1 = md_event(sb);
-		ev2 = md_event(refsb);
-		if (ev1 > ev2)
-			ret = 1;
-		else 
-			ret = 0;
-	}
-	rdev->sectors = rdev->sb_start;
-	/* Limit to 4TB as metadata cannot record more than that */
-	if (rdev->sectors >= (2ULL << 32))
-		rdev->sectors = (2ULL << 32) - 2;
-
-	if (rdev->sectors < ((sector_t)sb->size) * 2 && sb->level >= 1)
-		/* "this cannot possibly happen" ... */
-		ret = -EINVAL;
-
- abort:
-	return ret;
-}
-
-/*
- * validate_super for 0.90.0
- */
-static int super_90_validate(struct mddev *mddev, struct md_rdev *rdev)
-{
-	mdp_disk_t *desc;
-	mdp_super_t *sb = page_address(rdev->sb_page);
-	__u64 ev1 = md_event(sb);
-
-	rdev->raid_disk = -1;
-	clear_bit(Faulty, &rdev->flags);
-	clear_bit(In_sync, &rdev->flags);
-	clear_bit(WriteMostly, &rdev->flags);
-
-	if (mddev->raid_disks == 0) {
-		mddev->major_version = 0;
-		mddev->minor_version = sb->minor_version;
-		mddev->patch_version = sb->patch_version;
-		mddev->external = 0;
-		mddev->chunk_sectors = sb->chunk_size >> 9;
-		mddev->ctime = sb->ctime;
-		mddev->utime = sb->utime;
-		mddev->level = sb->level;
-		mddev->clevel[0] = 0;
-		mddev->layout = sb->layout;
-		mddev->raid_disks = sb->raid_disks;
-		mddev->dev_sectors = ((sector_t)sb->size) * 2;
-		mddev->events = ev1;
-		mddev->bitmap_info.offset = 0;
-		mddev->bitmap_info.default_offset = MD_SB_BYTES >> 9;
-
-		if (mddev->minor_version >= 91) {
-			mddev->reshape_position = sb->reshape_position;
-			mddev->delta_disks = sb->delta_disks;
-			mddev->new_level = sb->new_level;
-			mddev->new_layout = sb->new_layout;
-			mddev->new_chunk_sectors = sb->new_chunk >> 9;
-		} else {
-			mddev->reshape_position = MaxSector;
-			mddev->delta_disks = 0;
-			mddev->new_level = mddev->level;
-			mddev->new_layout = mddev->layout;
-			mddev->new_chunk_sectors = mddev->chunk_sectors;
-		}
-
-		if (sb->state & (1<<MD_SB_CLEAN))
-			mddev->recovery_cp = MaxSector;
-		else {
-			if (sb->events_hi == sb->cp_events_hi && 
-				sb->events_lo == sb->cp_events_lo) {
-				mddev->recovery_cp = sb->recovery_cp;
-			} else
-				mddev->recovery_cp = 0;
-		}
-
-		memcpy(mddev->uuid+0, &sb->set_uuid0, 4);
-		memcpy(mddev->uuid+4, &sb->set_uuid1, 4);
-		memcpy(mddev->uuid+8, &sb->set_uuid2, 4);
-		memcpy(mddev->uuid+12,&sb->set_uuid3, 4);
-
-		mddev->max_disks = MD_SB_DISKS;
-
-		if (sb->state & (1<<MD_SB_BITMAP_PRESENT) &&
-		    mddev->bitmap_info.file == NULL)
-			mddev->bitmap_info.offset =
-				mddev->bitmap_info.default_offset;
-
-	} else if (mddev->pers == NULL) {
-		/* Insist on good event counter while assembling, except
-		 * for spares (which don't need an event count) */
-		++ev1;
-		if (sb->disks[rdev->desc_nr].state & (
-			    (1<<MD_DISK_SYNC) | (1 << MD_DISK_ACTIVE)))
-			if (ev1 < mddev->events) 
-				return -EINVAL;
-	} else if (mddev->bitmap) {
-		/* if adding to array with a bitmap, then we can accept an
-		 * older device ... but not too old.
-		 */
-		if (ev1 < mddev->bitmap->events_cleared)
-			return 0;
-	} else {
-		if (ev1 < mddev->events)
-			/* just a hot-add of a new device, leave raid_disk at -1 */
-			return 0;
-	}
-
-	if (mddev->level != LEVEL_MULTIPATH) {
-		desc = sb->disks + rdev->desc_nr;
-
-		if (desc->state & (1<<MD_DISK_FAULTY))
-			set_bit(Faulty, &rdev->flags);
-		else if (desc->state & (1<<MD_DISK_SYNC) /* &&
-			    desc->raid_disk < mddev->raid_disks */) {
-			set_bit(In_sync, &rdev->flags);
-			rdev->raid_disk = desc->raid_disk;
-		} else if (desc->state & (1<<MD_DISK_ACTIVE)) {
-			/* active but not in sync implies recovery up to
-			 * reshape position.  We don't know exactly where
-			 * that is, so set to zero for now */
-			if (mddev->minor_version >= 91) {
-				rdev->recovery_offset = 0;
-				rdev->raid_disk = desc->raid_disk;
-			}
-		}
-		if (desc->state & (1<<MD_DISK_WRITEMOSTLY))
-			set_bit(WriteMostly, &rdev->flags);
-	} else /* MULTIPATH are always insync */
-		set_bit(In_sync, &rdev->flags);
-	return 0;
-}
-
-/*
- * sync_super for 0.90.0
- */
-static void super_90_sync(struct mddev *mddev, struct md_rdev *rdev)
-{
-	mdp_super_t *sb;
-	struct md_rdev *rdev2;
-	int next_spare = mddev->raid_disks;
-
-
-	/* make rdev->sb match mddev data..
-	 *
-	 * 1/ zero out disks
-	 * 2/ Add info for each disk, keeping track of highest desc_nr (next_spare);
-	 * 3/ any empty disks < next_spare become removed
-	 *
-	 * disks[0] gets initialised to REMOVED because
-	 * we cannot be sure from other fields if it has
-	 * been initialised or not.
-	 */
-	int i;
-	int active=0, working=0,failed=0,spare=0,nr_disks=0;
-
-	rdev->sb_size = MD_SB_BYTES;
-
-	sb = page_address(rdev->sb_page);
-
-	memset(sb, 0, sizeof(*sb));
-
-	sb->md_magic = MD_SB_MAGIC;
-	sb->major_version = mddev->major_version;
-	sb->patch_version = mddev->patch_version;
-	sb->gvalid_words  = 0; /* ignored */
-	memcpy(&sb->set_uuid0, mddev->uuid+0, 4);
-	memcpy(&sb->set_uuid1, mddev->uuid+4, 4);
-	memcpy(&sb->set_uuid2, mddev->uuid+8, 4);
-	memcpy(&sb->set_uuid3, mddev->uuid+12,4);
-
-	sb->ctime = mddev->ctime;
-	sb->level = mddev->level;
-	sb->size = mddev->dev_sectors / 2;
-	sb->raid_disks = mddev->raid_disks;
-	sb->md_minor = mddev->md_minor;
-	sb->not_persistent = 0;
-	sb->utime = mddev->utime;
-	sb->state = 0;
-	sb->events_hi = (mddev->events>>32);
-	sb->events_lo = (u32)mddev->events;
-
-	if (mddev->reshape_position == MaxSector)
-		sb->minor_version = 90;
-	else {
-		sb->minor_version = 91;
-		sb->reshape_position = mddev->reshape_position;
-		sb->new_level = mddev->new_level;
-		sb->delta_disks = mddev->delta_disks;
-		sb->new_layout = mddev->new_layout;
-		sb->new_chunk = mddev->new_chunk_sectors << 9;
-	}
-	mddev->minor_version = sb->minor_version;
-	if (mddev->in_sync)
-	{
-		sb->recovery_cp = mddev->recovery_cp;
-		sb->cp_events_hi = (mddev->events>>32);
-		sb->cp_events_lo = (u32)mddev->events;
-		if (mddev->recovery_cp == MaxSector)
-			sb->state = (1<< MD_SB_CLEAN);
-	} else
-		sb->recovery_cp = 0;
-
-	sb->layout = mddev->layout;
-	sb->chunk_size = mddev->chunk_sectors << 9;
-
-	if (mddev->bitmap && mddev->bitmap_info.file == NULL)
-		sb->state |= (1<<MD_SB_BITMAP_PRESENT);
-
-	sb->disks[0].state = (1<<MD_DISK_REMOVED);
-	rdev_for_each(rdev2, mddev) {
-		mdp_disk_t *d;
-		int desc_nr;
-		int is_active = test_bit(In_sync, &rdev2->flags);
-
-		if (rdev2->raid_disk >= 0 &&
-		    sb->minor_version >= 91)
-			/* we have nowhere to store the recovery_offset,
-			 * but if it is not below the reshape_position,
-			 * we can piggy-back on that.
-			 */
-			is_active = 1;
-		if (rdev2->raid_disk < 0 ||
-		    test_bit(Faulty, &rdev2->flags))
-			is_active = 0;
-		if (is_active)
-			desc_nr = rdev2->raid_disk;
-		else
-			desc_nr = next_spare++;
-		rdev2->desc_nr = desc_nr;
-		d = &sb->disks[rdev2->desc_nr];
-		nr_disks++;
-		d->number = rdev2->desc_nr;
-		d->major = MAJOR(rdev2->bdev->bd_dev);
-		d->minor = MINOR(rdev2->bdev->bd_dev);
-		if (is_active)
-			d->raid_disk = rdev2->raid_disk;
-		else
-			d->raid_disk = rdev2->desc_nr; /* compatibility */
-		if (test_bit(Faulty, &rdev2->flags))
-			d->state = (1<<MD_DISK_FAULTY);
-		else if (is_active) {
-			d->state = (1<<MD_DISK_ACTIVE);
-			if (test_bit(In_sync, &rdev2->flags))
-				d->state |= (1<<MD_DISK_SYNC);
-			active++;
-			working++;
-		} else {
-			d->state = 0;
-			spare++;
-			working++;
-		}
-		if (test_bit(WriteMostly, &rdev2->flags))
-			d->state |= (1<<MD_DISK_WRITEMOSTLY);
-	}
-	/* now set the "removed" and "faulty" bits on any missing devices */
-	for (i=0 ; i < mddev->raid_disks ; i++) {
-		mdp_disk_t *d = &sb->disks[i];
-		if (d->state == 0 && d->number == 0) {
-			d->number = i;
-			d->raid_disk = i;
-			d->state = (1<<MD_DISK_REMOVED);
-			d->state |= (1<<MD_DISK_FAULTY);
-			failed++;
-		}
-	}
-	sb->nr_disks = nr_disks;
-	sb->active_disks = active;
-	sb->working_disks = working;
-	sb->failed_disks = failed;
-	sb->spare_disks = spare;
-
-	sb->this_disk = sb->disks[rdev->desc_nr];
-	sb->sb_csum = calc_sb_csum(sb);
-}
-
-/*
- * rdev_size_change for 0.90.0
- */
-static unsigned long long
-super_90_rdev_size_change(struct md_rdev *rdev, sector_t num_sectors)
-{
-	if (num_sectors && num_sectors < rdev->mddev->dev_sectors)
-		return 0; /* component must fit device */
-	if (rdev->mddev->bitmap_info.offset)
-		return 0; /* can't move bitmap */
-	rdev->sb_start = calc_dev_sboffset(rdev);
-	if (!num_sectors || num_sectors > rdev->sb_start)
-		num_sectors = rdev->sb_start;
-	/* Limit to 4TB as metadata cannot record more than that.
-	 * 4TB == 2^32 KB, or 2*2^32 sectors.
-	 */
-	if (num_sectors >= (2ULL << 32))
-		num_sectors = (2ULL << 32) - 2;
-	md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size,
-		       rdev->sb_page);
-	md_super_wait(rdev->mddev);
-	return num_sectors;
-}
-
-
-/*
- * version 1 superblock
- */
-
-static __le32 calc_sb_1_csum(struct mdp_superblock_1 * sb)
-{
-	__le32 disk_csum;
-	u32 csum;
-	unsigned long long newcsum;
-	int size = 256 + le32_to_cpu(sb->max_dev)*2;
-	__le32 *isuper = (__le32*)sb;
-	int i;
-
-	disk_csum = sb->sb_csum;
-	sb->sb_csum = 0;
-	newcsum = 0;
-	for (i=0; size>=4; size -= 4 )
-		newcsum += le32_to_cpu(*isuper++);
-
-	if (size == 2)
-		newcsum += le16_to_cpu(*(__le16*) isuper);
-
-	csum = (newcsum & 0xffffffff) + (newcsum >> 32);
-	sb->sb_csum = disk_csum;
-	return cpu_to_le32(csum);
-}
-
-static int md_set_badblocks(struct badblocks *bb, sector_t s, int sectors,
-			    int acknowledged);
-static int super_1_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_version)
-{
-	struct mdp_superblock_1 *sb;
-	int ret;
-	sector_t sb_start;
-	char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
-	int bmask;
-
-	/*
-	 * Calculate the position of the superblock in 512byte sectors.
-	 * It is always aligned to a 4K boundary and
-	 * depeding on minor_version, it can be:
-	 * 0: At least 8K, but less than 12K, from end of device
-	 * 1: At start of device
-	 * 2: 4K from start of device.
-	 */
-	switch(minor_version) {
-	case 0:
-		sb_start = i_size_read(rdev->bdev->bd_inode) >> 9;
-		sb_start -= 8*2;
-		sb_start &= ~(sector_t)(4*2-1);
-		break;
-	case 1:
-		sb_start = 0;
-		break;
-	case 2:
-		sb_start = 8;
-		break;
-	default:
-		return -EINVAL;
-	}
-	rdev->sb_start = sb_start;
-
-	/* superblock is rarely larger than 1K, but it can be larger,
-	 * and it is safe to read 4k, so we do that
-	 */
-	ret = read_disk_sb(rdev, 4096);
-	if (ret) return ret;
-
-
-	sb = page_address(rdev->sb_page);
-
-	if (sb->magic != cpu_to_le32(MD_SB_MAGIC) ||
-	    sb->major_version != cpu_to_le32(1) ||
-	    le32_to_cpu(sb->max_dev) > (4096-256)/2 ||
-	    le64_to_cpu(sb->super_offset) != rdev->sb_start ||
-	    (le32_to_cpu(sb->feature_map) & ~MD_FEATURE_ALL) != 0)
-		return -EINVAL;
-
-	if (calc_sb_1_csum(sb) != sb->sb_csum) {
-		printk("md: invalid superblock checksum on %s\n",
-			bdevname(rdev->bdev,b));
-		return -EINVAL;
-	}
-	if (le64_to_cpu(sb->data_size) < 10) {
-		printk("md: data_size too small on %s\n",
-		       bdevname(rdev->bdev,b));
-		return -EINVAL;
-	}
-
-	rdev->preferred_minor = 0xffff;
-	rdev->data_offset = le64_to_cpu(sb->data_offset);
-	atomic_set(&rdev->corrected_errors, le32_to_cpu(sb->cnt_corrected_read));
-
-	rdev->sb_size = le32_to_cpu(sb->max_dev) * 2 + 256;
-	bmask = queue_logical_block_size(rdev->bdev->bd_disk->queue)-1;
-	if (rdev->sb_size & bmask)
-		rdev->sb_size = (rdev->sb_size | bmask) + 1;
-
-	if (minor_version
-	    && rdev->data_offset < sb_start + (rdev->sb_size/512))
-		return -EINVAL;
-
-	if (sb->level == cpu_to_le32(LEVEL_MULTIPATH))
-		rdev->desc_nr = -1;
-	else
-		rdev->desc_nr = le32_to_cpu(sb->dev_number);
-
-	if (!rdev->bb_page) {
-		rdev->bb_page = alloc_page(GFP_KERNEL);
-		if (!rdev->bb_page)
-			return -ENOMEM;
-	}
-	if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_BAD_BLOCKS) &&
-	    rdev->badblocks.count == 0) {
-		/* need to load the bad block list.
-		 * Currently we limit it to one page.
-		 */
-		s32 offset;
-		sector_t bb_sector;
-		u64 *bbp;
-		int i;
-		int sectors = le16_to_cpu(sb->bblog_size);
-		if (sectors > (PAGE_SIZE / 512))
-			return -EINVAL;
-		offset = le32_to_cpu(sb->bblog_offset);
-		if (offset == 0)
-			return -EINVAL;
-		bb_sector = (long long)offset;
-		if (!sync_page_io(rdev, bb_sector, sectors << 9,
-				  rdev->bb_page, READ, true))
-			return -EIO;
-		bbp = (u64 *)page_address(rdev->bb_page);
-		rdev->badblocks.shift = sb->bblog_shift;
-		for (i = 0 ; i < (sectors << (9-3)) ; i++, bbp++) {
-			u64 bb = le64_to_cpu(*bbp);
-			int count = bb & (0x3ff);
-			u64 sector = bb >> 10;
-			sector <<= sb->bblog_shift;
-			count <<= sb->bblog_shift;
-			if (bb + 1 == 0)
-				break;
-			if (md_set_badblocks(&rdev->badblocks,
-					     sector, count, 1) == 0)
-				return -EINVAL;
-		}
-	} else if (sb->bblog_offset == 0)
-		rdev->badblocks.shift = -1;
-
-	if (!refdev) {
-		ret = 1;
-	} else {
-		__u64 ev1, ev2;
-		struct mdp_superblock_1 *refsb = page_address(refdev->sb_page);
-
-		if (memcmp(sb->set_uuid, refsb->set_uuid, 16) != 0 ||
-		    sb->level != refsb->level ||
-		    sb->layout != refsb->layout ||
-		    sb->chunksize != refsb->chunksize) {
-			printk(KERN_WARNING "md: %s has strangely different"
-				" superblock to %s\n",
-				bdevname(rdev->bdev,b),
-				bdevname(refdev->bdev,b2));
-			return -EINVAL;
-		}
-		ev1 = le64_to_cpu(sb->events);
-		ev2 = le64_to_cpu(refsb->events);
-
-		if (ev1 > ev2)
-			ret = 1;
-		else
-			ret = 0;
-	}
-	if (minor_version)
-		rdev->sectors = (i_size_read(rdev->bdev->bd_inode) >> 9) -
-			le64_to_cpu(sb->data_offset);
-	else
-		rdev->sectors = rdev->sb_start;
-	if (rdev->sectors < le64_to_cpu(sb->data_size))
-		return -EINVAL;
-	rdev->sectors = le64_to_cpu(sb->data_size);
-	if (le64_to_cpu(sb->size) > rdev->sectors)
-		return -EINVAL;
-	return ret;
-}
-
-static int super_1_validate(struct mddev *mddev, struct md_rdev *rdev)
-{
-	struct mdp_superblock_1 *sb = page_address(rdev->sb_page);
-	__u64 ev1 = le64_to_cpu(sb->events);
-
-	rdev->raid_disk = -1;
-	clear_bit(Faulty, &rdev->flags);
-	clear_bit(In_sync, &rdev->flags);
-	clear_bit(WriteMostly, &rdev->flags);
-
-	if (mddev->raid_disks == 0) {
-		mddev->major_version = 1;
-		mddev->patch_version = 0;
-		mddev->external = 0;
-		mddev->chunk_sectors = le32_to_cpu(sb->chunksize);
-		mddev->ctime = le64_to_cpu(sb->ctime) & ((1ULL << 32)-1);
-		mddev->utime = le64_to_cpu(sb->utime) & ((1ULL << 32)-1);
-		mddev->level = le32_to_cpu(sb->level);
-		mddev->clevel[0] = 0;
-		mddev->layout = le32_to_cpu(sb->layout);
-		mddev->raid_disks = le32_to_cpu(sb->raid_disks);
-		mddev->dev_sectors = le64_to_cpu(sb->size);
-		mddev->events = ev1;
-		mddev->bitmap_info.offset = 0;
-		mddev->bitmap_info.default_offset = 1024 >> 9;
-		
-		mddev->recovery_cp = le64_to_cpu(sb->resync_offset);
-		memcpy(mddev->uuid, sb->set_uuid, 16);
-
-		mddev->max_disks =  (4096-256)/2;
-
-		if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_BITMAP_OFFSET) &&
-		    mddev->bitmap_info.file == NULL )
-			mddev->bitmap_info.offset =
-				(__s32)le32_to_cpu(sb->bitmap_offset);
-
-		if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_RESHAPE_ACTIVE)) {
-			mddev->reshape_position = le64_to_cpu(sb->reshape_position);
-			mddev->delta_disks = le32_to_cpu(sb->delta_disks);
-			mddev->new_level = le32_to_cpu(sb->new_level);
-			mddev->new_layout = le32_to_cpu(sb->new_layout);
-			mddev->new_chunk_sectors = le32_to_cpu(sb->new_chunk);
-		} else {
-			mddev->reshape_position = MaxSector;
-			mddev->delta_disks = 0;
-			mddev->new_level = mddev->level;
-			mddev->new_layout = mddev->layout;
-			mddev->new_chunk_sectors = mddev->chunk_sectors;
-		}
-
-	} else if (mddev->pers == NULL) {
-		/* Insist of good event counter while assembling, except for
-		 * spares (which don't need an event count) */
-		++ev1;
-		if (rdev->desc_nr >= 0 &&
-		    rdev->desc_nr < le32_to_cpu(sb->max_dev) &&
-		    le16_to_cpu(sb->dev_roles[rdev->desc_nr]) < 0xfffe)
-			if (ev1 < mddev->events)
-				return -EINVAL;
-	} else if (mddev->bitmap) {
-		/* If adding to array with a bitmap, then we can accept an
-		 * older device, but not too old.
-		 */
-		if (ev1 < mddev->bitmap->events_cleared)
-			return 0;
-	} else {
-		if (ev1 < mddev->events)
-			/* just a hot-add of a new device, leave raid_disk at -1 */
-			return 0;
-	}
-	if (mddev->level != LEVEL_MULTIPATH) {
-		int role;
-		if (rdev->desc_nr < 0 ||
-		    rdev->desc_nr >= le32_to_cpu(sb->max_dev)) {
-			role = 0xffff;
-			rdev->desc_nr = -1;
-		} else
-			role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]);
-		switch(role) {
-		case 0xffff: /* spare */
-			break;
-		case 0xfffe: /* faulty */
-			set_bit(Faulty, &rdev->flags);
-			break;
-		default:
-			if ((le32_to_cpu(sb->feature_map) &
-			     MD_FEATURE_RECOVERY_OFFSET))
-				rdev->recovery_offset = le64_to_cpu(sb->recovery_offset);
-			else
-				set_bit(In_sync, &rdev->flags);
-			rdev->raid_disk = role;
-			break;
-		}
-		if (sb->devflags & WriteMostly1)
-			set_bit(WriteMostly, &rdev->flags);
-		if (le32_to_cpu(sb->feature_map) & MD_FEATURE_REPLACEMENT)
-			set_bit(Replacement, &rdev->flags);
-	} else /* MULTIPATH are always insync */
-		set_bit(In_sync, &rdev->flags);
-
-	return 0;
-}
-
-static void super_1_sync(struct mddev *mddev, struct md_rdev *rdev)
-{
-	struct mdp_superblock_1 *sb;
-	struct md_rdev *rdev2;
-	int max_dev, i;
-	/* make rdev->sb match mddev and rdev data. */
-
-	sb = page_address(rdev->sb_page);
-
-	sb->feature_map = 0;
-	sb->pad0 = 0;
-	sb->recovery_offset = cpu_to_le64(0);
-	memset(sb->pad1, 0, sizeof(sb->pad1));
-	memset(sb->pad3, 0, sizeof(sb->pad3));
-
-	sb->utime = cpu_to_le64((__u64)mddev->utime);
-	sb->events = cpu_to_le64(mddev->events);
-	if (mddev->in_sync)
-		sb->resync_offset = cpu_to_le64(mddev->recovery_cp);
-	else
-		sb->resync_offset = cpu_to_le64(0);
-
-	sb->cnt_corrected_read = cpu_to_le32(atomic_read(&rdev->corrected_errors));
-
-	sb->raid_disks = cpu_to_le32(mddev->raid_disks);
-	sb->size = cpu_to_le64(mddev->dev_sectors);
-	sb->chunksize = cpu_to_le32(mddev->chunk_sectors);
-	sb->level = cpu_to_le32(mddev->level);
-	sb->layout = cpu_to_le32(mddev->layout);
-
-	if (test_bit(WriteMostly, &rdev->flags))
-		sb->devflags |= WriteMostly1;
-	else
-		sb->devflags &= ~WriteMostly1;
-
-	if (mddev->bitmap && mddev->bitmap_info.file == NULL) {
-		sb->bitmap_offset = cpu_to_le32((__u32)mddev->bitmap_info.offset);
-		sb->feature_map = cpu_to_le32(MD_FEATURE_BITMAP_OFFSET);
-	}
-
-	if (rdev->raid_disk >= 0 &&
-	    !test_bit(In_sync, &rdev->flags)) {
-		sb->feature_map |=
-			cpu_to_le32(MD_FEATURE_RECOVERY_OFFSET);
-		sb->recovery_offset =
-			cpu_to_le64(rdev->recovery_offset);
-	}
-	if (test_bit(Replacement, &rdev->flags))
-		sb->feature_map |=
-			cpu_to_le32(MD_FEATURE_REPLACEMENT);
-
-	if (mddev->reshape_position != MaxSector) {
-		sb->feature_map |= cpu_to_le32(MD_FEATURE_RESHAPE_ACTIVE);
-		sb->reshape_position = cpu_to_le64(mddev->reshape_position);
-		sb->new_layout = cpu_to_le32(mddev->new_layout);
-		sb->delta_disks = cpu_to_le32(mddev->delta_disks);
-		sb->new_level = cpu_to_le32(mddev->new_level);
-		sb->new_chunk = cpu_to_le32(mddev->new_chunk_sectors);
-	}
-
-	if (rdev->badblocks.count == 0)
-		/* Nothing to do for bad blocks*/ ;
-	else if (sb->bblog_offset == 0)
-		/* Cannot record bad blocks on this device */
-		md_error(mddev, rdev);
-	else {
-		struct badblocks *bb = &rdev->badblocks;
-		u64 *bbp = (u64 *)page_address(rdev->bb_page);
-		u64 *p = bb->page;
-		sb->feature_map |= cpu_to_le32(MD_FEATURE_BAD_BLOCKS);
-		if (bb->changed) {
-			unsigned seq;
-
-retry:
-			seq = read_seqbegin(&bb->lock);
-
-			memset(bbp, 0xff, PAGE_SIZE);
-
-			for (i = 0 ; i < bb->count ; i++) {
-				u64 internal_bb = *p++;
-				u64 store_bb = ((BB_OFFSET(internal_bb) << 10)
-						| BB_LEN(internal_bb));
-				*bbp++ = cpu_to_le64(store_bb);
-			}
-			bb->changed = 0;
-			if (read_seqretry(&bb->lock, seq))
-				goto retry;
-
-			bb->sector = (rdev->sb_start +
-				      (int)le32_to_cpu(sb->bblog_offset));
-			bb->size = le16_to_cpu(sb->bblog_size);
-		}
-	}
-
-	max_dev = 0;
-	rdev_for_each(rdev2, mddev)
-		if (rdev2->desc_nr+1 > max_dev)
-			max_dev = rdev2->desc_nr+1;
-
-	if (max_dev > le32_to_cpu(sb->max_dev)) {
-		int bmask;
-		sb->max_dev = cpu_to_le32(max_dev);
-		rdev->sb_size = max_dev * 2 + 256;
-		bmask = queue_logical_block_size(rdev->bdev->bd_disk->queue)-1;
-		if (rdev->sb_size & bmask)
-			rdev->sb_size = (rdev->sb_size | bmask) + 1;
-	} else
-		max_dev = le32_to_cpu(sb->max_dev);
-
-	for (i=0; i<max_dev;i++)
-		sb->dev_roles[i] = cpu_to_le16(0xfffe);
-	
-	rdev_for_each(rdev2, mddev) {
-		i = rdev2->desc_nr;
-		if (test_bit(Faulty, &rdev2->flags))
-			sb->dev_roles[i] = cpu_to_le16(0xfffe);
-		else if (test_bit(In_sync, &rdev2->flags))
-			sb->dev_roles[i] = cpu_to_le16(rdev2->raid_disk);
-		else if (rdev2->raid_disk >= 0)
-			sb->dev_roles[i] = cpu_to_le16(rdev2->raid_disk);
-		else
-			sb->dev_roles[i] = cpu_to_le16(0xffff);
-	}
-
-	sb->sb_csum = calc_sb_1_csum(sb);
-}
-
-static unsigned long long
-super_1_rdev_size_change(struct md_rdev *rdev, sector_t num_sectors)
-{
-	struct mdp_superblock_1 *sb;
-	sector_t max_sectors;
-	if (num_sectors && num_sectors < rdev->mddev->dev_sectors)
-		return 0; /* component must fit device */
-	if (rdev->sb_start < rdev->data_offset) {
-		/* minor versions 1 and 2; superblock before data */
-		max_sectors = i_size_read(rdev->bdev->bd_inode) >> 9;
-		max_sectors -= rdev->data_offset;
-		if (!num_sectors || num_sectors > max_sectors)
-			num_sectors = max_sectors;
-	} else if (rdev->mddev->bitmap_info.offset) {
-		/* minor version 0 with bitmap we can't move */
-		return 0;
-	} else {
-		/* minor version 0; superblock after data */
-		sector_t sb_start;
-		sb_start = (i_size_read(rdev->bdev->bd_inode) >> 9) - 8*2;
-		sb_start &= ~(sector_t)(4*2 - 1);
-		max_sectors = rdev->sectors + sb_start - rdev->sb_start;
-		if (!num_sectors || num_sectors > max_sectors)
-			num_sectors = max_sectors;
-		rdev->sb_start = sb_start;
-	}
-	sb = page_address(rdev->sb_page);
-	sb->data_size = cpu_to_le64(num_sectors);
-	sb->super_offset = rdev->sb_start;
-	sb->sb_csum = calc_sb_1_csum(sb);
-	md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size,
-		       rdev->sb_page);
-	md_super_wait(rdev->mddev);
-	return num_sectors;
-}
-
-static struct super_type super_types[] = {
-	[0] = {
-		.name	= "0.90.0",
-		.owner	= THIS_MODULE,
-		.load_super	    = super_90_load,
-		.validate_super	    = super_90_validate,
-		.sync_super	    = super_90_sync,
-		.rdev_size_change   = super_90_rdev_size_change,
-	},
-	[1] = {
-		.name	= "md-1",
-		.owner	= THIS_MODULE,
-		.load_super	    = super_1_load,
-		.validate_super	    = super_1_validate,
-		.sync_super	    = super_1_sync,
-		.rdev_size_change   = super_1_rdev_size_change,
-	},
-};
-
-static void sync_super(struct mddev *mddev, struct md_rdev *rdev)
-{
-	if (mddev->sync_super) {
-		mddev->sync_super(mddev, rdev);
-		return;
-	}
-
-	BUG_ON(mddev->major_version >= ARRAY_SIZE(super_types));
-
-	super_types[mddev->major_version].sync_super(mddev, rdev);
-}
-
-static int match_mddev_units(struct mddev *mddev1, struct mddev *mddev2)
-{
-	struct md_rdev *rdev, *rdev2;
-
-	rcu_read_lock();
-	rdev_for_each_rcu(rdev, mddev1)
-		rdev_for_each_rcu(rdev2, mddev2)
-			if (rdev->bdev->bd_contains ==
-			    rdev2->bdev->bd_contains) {
-				rcu_read_unlock();
-				return 1;
-			}
-	rcu_read_unlock();
-	return 0;
-}
-
-static LIST_HEAD(pending_raid_disks);
-
-/*
- * Try to register data integrity profile for an mddev
- *
- * This is called when an array is started and after a disk has been kicked
- * from the array. It only succeeds if all working and active component devices
- * are integrity capable with matching profiles.
- */
-int md_integrity_register(struct mddev *mddev)
-{
-	struct md_rdev *rdev, *reference = NULL;
-
-	if (list_empty(&mddev->disks))
-		return 0; /* nothing to do */
-	if (!mddev->gendisk || blk_get_integrity(mddev->gendisk))
-		return 0; /* shouldn't register, or already is */
-	rdev_for_each(rdev, mddev) {
-		/* skip spares and non-functional disks */
-		if (test_bit(Faulty, &rdev->flags))
-			continue;
-		if (rdev->raid_disk < 0)
-			continue;
-		if (!reference) {
-			/* Use the first rdev as the reference */
-			reference = rdev;
-			continue;
-		}
-		/* does this rdev's profile match the reference profile? */
-		if (blk_integrity_compare(reference->bdev->bd_disk,
-				rdev->bdev->bd_disk) < 0)
-			return -EINVAL;
-	}
-	if (!reference || !bdev_get_integrity(reference->bdev))
-		return 0;
-	/*
-	 * All component devices are integrity capable and have matching
-	 * profiles, register the common profile for the md device.
-	 */
-	if (blk_integrity_register(mddev->gendisk,
-			bdev_get_integrity(reference->bdev)) != 0) {
-		printk(KERN_ERR "md: failed to register integrity for %s\n",
-			mdname(mddev));
-		return -EINVAL;
-	}
-	printk(KERN_NOTICE "md: data integrity enabled on %s\n", mdname(mddev));
-	if (bioset_integrity_create(mddev->bio_set, BIO_POOL_SIZE)) {
-		printk(KERN_ERR "md: failed to create integrity pool for %s\n",
-		       mdname(mddev));
-		return -EINVAL;
-	}
-	return 0;
-}
-EXPORT_SYMBOL(md_integrity_register);
-
-/* Disable data integrity if non-capable/non-matching disk is being added */
-void md_integrity_add_rdev(struct md_rdev *rdev, struct mddev *mddev)
-{
-	struct blk_integrity *bi_rdev = bdev_get_integrity(rdev->bdev);
-	struct blk_integrity *bi_mddev = blk_get_integrity(mddev->gendisk);
-
-	if (!bi_mddev) /* nothing to do */
-		return;
-	if (rdev->raid_disk < 0) /* skip spares */
-		return;
-	if (bi_rdev && blk_integrity_compare(mddev->gendisk,
-					     rdev->bdev->bd_disk) >= 0)
-		return;
-	printk(KERN_NOTICE "disabling data integrity on %s\n", mdname(mddev));
-	blk_integrity_unregister(mddev->gendisk);
-}
-EXPORT_SYMBOL(md_integrity_add_rdev);
-
-static int bind_rdev_to_array(struct md_rdev * rdev, struct mddev * mddev)
-{
-	char b[BDEVNAME_SIZE];
-	struct kobject *ko;
-	char *s;
-	int err;
-
-	if (rdev->mddev) {
-		MD_BUG();
-		return -EINVAL;
-	}
-
-	/* prevent duplicates */
-	if (find_rdev(mddev, rdev->bdev->bd_dev))
-		return -EEXIST;
-
-	/* make sure rdev->sectors exceeds mddev->dev_sectors */
-	if (rdev->sectors && (mddev->dev_sectors == 0 ||
-			rdev->sectors < mddev->dev_sectors)) {
-		if (mddev->pers) {
-			/* Cannot change size, so fail
-			 * If mddev->level <= 0, then we don't care
-			 * about aligning sizes (e.g. linear)
-			 */
-			if (mddev->level > 0)
-				return -ENOSPC;
-		} else
-			mddev->dev_sectors = rdev->sectors;
-	}
-
-	/* Verify rdev->desc_nr is unique.
-	 * If it is -1, assign a free number, else
-	 * check number is not in use
-	 */
-	if (rdev->desc_nr < 0) {
-		int choice = 0;
-		if (mddev->pers) choice = mddev->raid_disks;
-		while (find_rdev_nr(mddev, choice))
-			choice++;
-		rdev->desc_nr = choice;
-	} else {
-		if (find_rdev_nr(mddev, rdev->desc_nr))
-			return -EBUSY;
-	}
-	if (mddev->max_disks && rdev->desc_nr >= mddev->max_disks) {
-		printk(KERN_WARNING "md: %s: array is limited to %d devices\n",
-		       mdname(mddev), mddev->max_disks);
-		return -EBUSY;
-	}
-	bdevname(rdev->bdev,b);
-	while ( (s=strchr(b, '/')) != NULL)
-		*s = '!';
-
-	rdev->mddev = mddev;
-	printk(KERN_INFO "md: bind<%s>\n", b);
-
-	if ((err = kobject_add(&rdev->kobj, &mddev->kobj, "dev-%s", b)))
-		goto fail;
-
-	ko = &part_to_dev(rdev->bdev->bd_part)->kobj;
-	if (sysfs_create_link(&rdev->kobj, ko, "block"))
-		/* failure here is OK */;
-	rdev->sysfs_state = sysfs_get_dirent_safe(rdev->kobj.sd, "state");
-
-	list_add_rcu(&rdev->same_set, &mddev->disks);
-	bd_link_disk_holder(rdev->bdev, mddev->gendisk);
-
-	/* May as well allow recovery to be retried once */
-	mddev->recovery_disabled++;
-
-	return 0;
-
- fail:
-	printk(KERN_WARNING "md: failed to register dev-%s for %s\n",
-	       b, mdname(mddev));
-	return err;
-}
-
-static void md_delayed_delete(struct work_struct *ws)
-{
-	struct md_rdev *rdev = container_of(ws, struct md_rdev, del_work);
-	kobject_del(&rdev->kobj);
-	kobject_put(&rdev->kobj);
-}
-
-static void unbind_rdev_from_array(struct md_rdev * rdev)
-{
-	char b[BDEVNAME_SIZE];
-	if (!rdev->mddev) {
-		MD_BUG();
-		return;
-	}
-	bd_unlink_disk_holder(rdev->bdev, rdev->mddev->gendisk);
-	list_del_rcu(&rdev->same_set);
-	printk(KERN_INFO "md: unbind<%s>\n", bdevname(rdev->bdev,b));
-	rdev->mddev = NULL;
-	sysfs_remove_link(&rdev->kobj, "block");
-	sysfs_put(rdev->sysfs_state);
-	rdev->sysfs_state = NULL;
-	kfree(rdev->badblocks.page);
-	rdev->badblocks.count = 0;
-	rdev->badblocks.page = NULL;
-	/* We need to delay this, otherwise we can deadlock when
-	 * writing to 'remove' to "dev/state".  We also need
-	 * to delay it due to rcu usage.
-	 */
-	synchronize_rcu();
-	INIT_WORK(&rdev->del_work, md_delayed_delete);
-	kobject_get(&rdev->kobj);
-	queue_work(md_misc_wq, &rdev->del_work);
-}
-
-/*
- * prevent the device from being mounted, repartitioned or
- * otherwise reused by a RAID array (or any other kernel
- * subsystem), by bd_claiming the device.
- */
-static int lock_rdev(struct md_rdev *rdev, dev_t dev, int shared)
-{
-	int err = 0;
-	struct block_device *bdev;
-	char b[BDEVNAME_SIZE];
-
-	bdev = blkdev_get_by_dev(dev, FMODE_READ|FMODE_WRITE|FMODE_EXCL,
-				 shared ? (struct md_rdev *)lock_rdev : rdev);
-	if (IS_ERR(bdev)) {
-		printk(KERN_ERR "md: could not open %s.\n",
-			__bdevname(dev, b));
-		return PTR_ERR(bdev);
-	}
-	rdev->bdev = bdev;
-	return err;
-}
-
-static void unlock_rdev(struct md_rdev *rdev)
-{
-	struct block_device *bdev = rdev->bdev;
-	rdev->bdev = NULL;
-	if (!bdev)
-		MD_BUG();
-	blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
-}
-
-void md_autodetect_dev(dev_t dev);
-
-static void export_rdev(struct md_rdev * rdev)
-{
-	char b[BDEVNAME_SIZE];
-	printk(KERN_INFO "md: export_rdev(%s)\n",
-		bdevname(rdev->bdev,b));
-	if (rdev->mddev)
-		MD_BUG();
-	free_disk_sb(rdev);
-#ifndef MODULE
-	if (test_bit(AutoDetected, &rdev->flags))
-		md_autodetect_dev(rdev->bdev->bd_dev);
-#endif
-	unlock_rdev(rdev);
-	kobject_put(&rdev->kobj);
-}
-
-static void kick_rdev_from_array(struct md_rdev * rdev)
-{
-	unbind_rdev_from_array(rdev);
-	export_rdev(rdev);
-}
-
-static void export_array(struct mddev *mddev)
-{
-	struct md_rdev *rdev, *tmp;
-
-	rdev_for_each_safe(rdev, tmp, mddev) {
-		if (!rdev->mddev) {
-			MD_BUG();
-			continue;
-		}
-		kick_rdev_from_array(rdev);
-	}
-	if (!list_empty(&mddev->disks))
-		MD_BUG();
-	mddev->raid_disks = 0;
-	mddev->major_version = 0;
-}
-
-static void print_desc(mdp_disk_t *desc)
-{
-	printk(" DISK<N:%d,(%d,%d),R:%d,S:%d>\n", desc->number,
-		desc->major,desc->minor,desc->raid_disk,desc->state);
-}
-
-static void print_sb_90(mdp_super_t *sb)
-{
-	int i;
-
-	printk(KERN_INFO 
-		"md:  SB: (V:%d.%d.%d) ID:<%08x.%08x.%08x.%08x> CT:%08x\n",
-		sb->major_version, sb->minor_version, sb->patch_version,
-		sb->set_uuid0, sb->set_uuid1, sb->set_uuid2, sb->set_uuid3,
-		sb->ctime);
-	printk(KERN_INFO "md:     L%d S%08d ND:%d RD:%d md%d LO:%d CS:%d\n",
-		sb->level, sb->size, sb->nr_disks, sb->raid_disks,
-		sb->md_minor, sb->layout, sb->chunk_size);
-	printk(KERN_INFO "md:     UT:%08x ST:%d AD:%d WD:%d"
-		" FD:%d SD:%d CSUM:%08x E:%08lx\n",
-		sb->utime, sb->state, sb->active_disks, sb->working_disks,
-		sb->failed_disks, sb->spare_disks,
-		sb->sb_csum, (unsigned long)sb->events_lo);
-
-	printk(KERN_INFO);
-	for (i = 0; i < MD_SB_DISKS; i++) {
-		mdp_disk_t *desc;
-
-		desc = sb->disks + i;
-		if (desc->number || desc->major || desc->minor ||
-		    desc->raid_disk || (desc->state && (desc->state != 4))) {
-			printk("     D %2d: ", i);
-			print_desc(desc);
-		}
-	}
-	printk(KERN_INFO "md:     THIS: ");
-	print_desc(&sb->this_disk);
-}
-
-static void print_sb_1(struct mdp_superblock_1 *sb)
-{
-	__u8 *uuid;
-
-	uuid = sb->set_uuid;
-	printk(KERN_INFO
-	       "md:  SB: (V:%u) (F:0x%08x) Array-ID:<%pU>\n"
-	       "md:    Name: \"%s\" CT:%llu\n",
-		le32_to_cpu(sb->major_version),
-		le32_to_cpu(sb->feature_map),
-		uuid,
-		sb->set_name,
-		(unsigned long long)le64_to_cpu(sb->ctime)
-		       & MD_SUPERBLOCK_1_TIME_SEC_MASK);
-
-	uuid = sb->device_uuid;
-	printk(KERN_INFO
-	       "md:       L%u SZ%llu RD:%u LO:%u CS:%u DO:%llu DS:%llu SO:%llu"
-			" RO:%llu\n"
-	       "md:     Dev:%08x UUID: %pU\n"
-	       "md:       (F:0x%08x) UT:%llu Events:%llu ResyncOffset:%llu CSUM:0x%08x\n"
-	       "md:         (MaxDev:%u) \n",
-		le32_to_cpu(sb->level),
-		(unsigned long long)le64_to_cpu(sb->size),
-		le32_to_cpu(sb->raid_disks),
-		le32_to_cpu(sb->layout),
-		le32_to_cpu(sb->chunksize),
-		(unsigned long long)le64_to_cpu(sb->data_offset),
-		(unsigned long long)le64_to_cpu(sb->data_size),
-		(unsigned long long)le64_to_cpu(sb->super_offset),
-		(unsigned long long)le64_to_cpu(sb->recovery_offset),
-		le32_to_cpu(sb->dev_number),
-		uuid,
-		sb->devflags,
-		(unsigned long long)le64_to_cpu(sb->utime) & MD_SUPERBLOCK_1_TIME_SEC_MASK,
-		(unsigned long long)le64_to_cpu(sb->events),
-		(unsigned long long)le64_to_cpu(sb->resync_offset),
-		le32_to_cpu(sb->sb_csum),
-		le32_to_cpu(sb->max_dev)
-		);
-}
-
-static void print_rdev(struct md_rdev *rdev, int major_version)
-{
-	char b[BDEVNAME_SIZE];
-	printk(KERN_INFO "md: rdev %s, Sect:%08llu F:%d S:%d DN:%u\n",
-		bdevname(rdev->bdev, b), (unsigned long long)rdev->sectors,
-	        test_bit(Faulty, &rdev->flags), test_bit(In_sync, &rdev->flags),
-	        rdev->desc_nr);
-	if (rdev->sb_loaded) {
-		printk(KERN_INFO "md: rdev superblock (MJ:%d):\n", major_version);
-		switch (major_version) {
-		case 0:
-			print_sb_90(page_address(rdev->sb_page));
-			break;
-		case 1:
-			print_sb_1(page_address(rdev->sb_page));
-			break;
-		}
-	} else
-		printk(KERN_INFO "md: no rdev superblock!\n");
-}
-
-static void md_print_devices(void)
-{
-	struct list_head *tmp;
-	struct md_rdev *rdev;
-	struct mddev *mddev;
-	char b[BDEVNAME_SIZE];
-
-	printk("\n");
-	printk("md:	**********************************\n");
-	printk("md:	* <COMPLETE RAID STATE PRINTOUT> *\n");
-	printk("md:	**********************************\n");
-	for_each_mddev(mddev, tmp) {
-
-		if (mddev->bitmap)
-			bitmap_print_sb(mddev->bitmap);
-		else
-			printk("%s: ", mdname(mddev));
-		rdev_for_each(rdev, mddev)
-			printk("<%s>", bdevname(rdev->bdev,b));
-		printk("\n");
-
-		rdev_for_each(rdev, mddev)
-			print_rdev(rdev, mddev->major_version);
-	}
-	printk("md:	**********************************\n");
-	printk("\n");
-}
-
-
-static void sync_sbs(struct mddev * mddev, int nospares)
-{
-	/* Update each superblock (in-memory image), but
-	 * if we are allowed to, skip spares which already
-	 * have the right event counter, or have one earlier
-	 * (which would mean they aren't being marked as dirty
-	 * with the rest of the array)
-	 */
-	struct md_rdev *rdev;
-	rdev_for_each(rdev, mddev) {
-		if (rdev->sb_events == mddev->events ||
-		    (nospares &&
-		     rdev->raid_disk < 0 &&
-		     rdev->sb_events+1 == mddev->events)) {
-			/* Don't update this superblock */
-			rdev->sb_loaded = 2;
-		} else {
-			sync_super(mddev, rdev);
-			rdev->sb_loaded = 1;
-		}
-	}
-}
-
-static void md_update_sb(struct mddev * mddev, int force_change)
-{
-	struct md_rdev *rdev;
-	int sync_req;
-	int nospares = 0;
-	int any_badblocks_changed = 0;
-
-repeat:
-	/* First make sure individual recovery_offsets are correct */
-	rdev_for_each(rdev, mddev) {
-		if (rdev->raid_disk >= 0 &&
-		    mddev->delta_disks >= 0 &&
-		    !test_bit(In_sync, &rdev->flags) &&
-		    mddev->curr_resync_completed > rdev->recovery_offset)
-				rdev->recovery_offset = mddev->curr_resync_completed;
-
-	}	
-	if (!mddev->persistent) {
-		clear_bit(MD_CHANGE_CLEAN, &mddev->flags);
-		clear_bit(MD_CHANGE_DEVS, &mddev->flags);
-		if (!mddev->external) {
-			clear_bit(MD_CHANGE_PENDING, &mddev->flags);
-			rdev_for_each(rdev, mddev) {
-				if (rdev->badblocks.changed) {
-					rdev->badblocks.changed = 0;
-					md_ack_all_badblocks(&rdev->badblocks);
-					md_error(mddev, rdev);
-				}
-				clear_bit(Blocked, &rdev->flags);
-				clear_bit(BlockedBadBlocks, &rdev->flags);
-				wake_up(&rdev->blocked_wait);
-			}
-		}
-		wake_up(&mddev->sb_wait);
-		return;
-	}
-
-	spin_lock_irq(&mddev->write_lock);
-
-	mddev->utime = get_seconds();
-
-	if (test_and_clear_bit(MD_CHANGE_DEVS, &mddev->flags))
-		force_change = 1;
-	if (test_and_clear_bit(MD_CHANGE_CLEAN, &mddev->flags))
-		/* just a clean<-> dirty transition, possibly leave spares alone,
-		 * though if events isn't the right even/odd, we will have to do
-		 * spares after all
-		 */
-		nospares = 1;
-	if (force_change)
-		nospares = 0;
-	if (mddev->degraded)
-		/* If the array is degraded, then skipping spares is both
-		 * dangerous and fairly pointless.
-		 * Dangerous because a device that was removed from the array
-		 * might have a event_count that still looks up-to-date,
-		 * so it can be re-added without a resync.
-		 * Pointless because if there are any spares to skip,
-		 * then a recovery will happen and soon that array won't
-		 * be degraded any more and the spare can go back to sleep then.
-		 */
-		nospares = 0;
-
-	sync_req = mddev->in_sync;
-
-	/* If this is just a dirty<->clean transition, and the array is clean
-	 * and 'events' is odd, we can roll back to the previous clean state */
-	if (nospares
-	    && (mddev->in_sync && mddev->recovery_cp == MaxSector)
-	    && mddev->can_decrease_events
-	    && mddev->events != 1) {
-		mddev->events--;
-		mddev->can_decrease_events = 0;
-	} else {
-		/* otherwise we have to go forward and ... */
-		mddev->events ++;
-		mddev->can_decrease_events = nospares;
-	}
-
-	if (!mddev->events) {
-		/*
-		 * oops, this 64-bit counter should never wrap.
-		 * Either we are in around ~1 trillion A.C., assuming
-		 * 1 reboot per second, or we have a bug:
-		 */
-		MD_BUG();
-		mddev->events --;
-	}
-
-	rdev_for_each(rdev, mddev) {
-		if (rdev->badblocks.changed)
-			any_badblocks_changed++;
-		if (test_bit(Faulty, &rdev->flags))
-			set_bit(FaultRecorded, &rdev->flags);
-	}
-
-	sync_sbs(mddev, nospares);
-	spin_unlock_irq(&mddev->write_lock);
-
-	pr_debug("md: updating %s RAID superblock on device (in sync %d)\n",
-		 mdname(mddev), mddev->in_sync);
-
-	bitmap_update_sb(mddev->bitmap);
-	rdev_for_each(rdev, mddev) {
-		char b[BDEVNAME_SIZE];
-
-		if (rdev->sb_loaded != 1)
-			continue; /* no noise on spare devices */
-
-		if (!test_bit(Faulty, &rdev->flags) &&
-		    rdev->saved_raid_disk == -1) {
-			md_super_write(mddev,rdev,
-				       rdev->sb_start, rdev->sb_size,
-				       rdev->sb_page);
-			pr_debug("md: (write) %s's sb offset: %llu\n",
-				 bdevname(rdev->bdev, b),
-				 (unsigned long long)rdev->sb_start);
-			rdev->sb_events = mddev->events;
-			if (rdev->badblocks.size) {
-				md_super_write(mddev, rdev,
-					       rdev->badblocks.sector,
-					       rdev->badblocks.size << 9,
-					       rdev->bb_page);
-				rdev->badblocks.size = 0;
-			}
-
-		} else if (test_bit(Faulty, &rdev->flags))
-			pr_debug("md: %s (skipping faulty)\n",
-				 bdevname(rdev->bdev, b));
-		else
-			pr_debug("(skipping incremental s/r ");
-
-		if (mddev->level == LEVEL_MULTIPATH)
-			/* only need to write one superblock... */
-			break;
-	}
-	md_super_wait(mddev);
-	/* if there was a failure, MD_CHANGE_DEVS was set, and we re-write super */
-
-	spin_lock_irq(&mddev->write_lock);
-	if (mddev->in_sync != sync_req ||
-	    test_bit(MD_CHANGE_DEVS, &mddev->flags)) {
-		/* have to write it out again */
-		spin_unlock_irq(&mddev->write_lock);
-		goto repeat;
-	}
-	clear_bit(MD_CHANGE_PENDING, &mddev->flags);
-	spin_unlock_irq(&mddev->write_lock);
-	wake_up(&mddev->sb_wait);
-	if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
-		sysfs_notify(&mddev->kobj, NULL, "sync_completed");
-
-	rdev_for_each(rdev, mddev) {
-		if (test_and_clear_bit(FaultRecorded, &rdev->flags))
-			clear_bit(Blocked, &rdev->flags);
-
-		if (any_badblocks_changed)
-			md_ack_all_badblocks(&rdev->badblocks);
-		clear_bit(BlockedBadBlocks, &rdev->flags);
-		wake_up(&rdev->blocked_wait);
-	}
-}
-
-/* words written to sysfs files may, or may not, be \n terminated.
- * We want to accept with case. For this we use cmd_match.
- */
-static int cmd_match(const char *cmd, const char *str)
-{
-	/* See if cmd, written into a sysfs file, matches
-	 * str.  They must either be the same, or cmd can
-	 * have a trailing newline
-	 */
-	while (*cmd && *str && *cmd == *str) {
-		cmd++;
-		str++;
-	}
-	if (*cmd == '\n')
-		cmd++;
-	if (*str || *cmd)
-		return 0;
-	return 1;
-}
-
-struct rdev_sysfs_entry {
-	struct attribute attr;
-	ssize_t (*show)(struct md_rdev *, char *);
-	ssize_t (*store)(struct md_rdev *, const char *, size_t);
-};
-
-static ssize_t
-state_show(struct md_rdev *rdev, char *page)
-{
-	char *sep = "";
-	size_t len = 0;
-
-	if (test_bit(Faulty, &rdev->flags) ||
-	    rdev->badblocks.unacked_exist) {
-		len+= sprintf(page+len, "%sfaulty",sep);
-		sep = ",";
-	}
-	if (test_bit(In_sync, &rdev->flags)) {
-		len += sprintf(page+len, "%sin_sync",sep);
-		sep = ",";
-	}
-	if (test_bit(WriteMostly, &rdev->flags)) {
-		len += sprintf(page+len, "%swrite_mostly",sep);
-		sep = ",";
-	}
-	if (test_bit(Blocked, &rdev->flags) ||
-	    (rdev->badblocks.unacked_exist
-	     && !test_bit(Faulty, &rdev->flags))) {
-		len += sprintf(page+len, "%sblocked", sep);
-		sep = ",";
-	}
-	if (!test_bit(Faulty, &rdev->flags) &&
-	    !test_bit(In_sync, &rdev->flags)) {
-		len += sprintf(page+len, "%sspare", sep);
-		sep = ",";
-	}
-	if (test_bit(WriteErrorSeen, &rdev->flags)) {
-		len += sprintf(page+len, "%swrite_error", sep);
-		sep = ",";
-	}
-	if (test_bit(WantReplacement, &rdev->flags)) {
-		len += sprintf(page+len, "%swant_replacement", sep);
-		sep = ",";
-	}
-	if (test_bit(Replacement, &rdev->flags)) {
-		len += sprintf(page+len, "%sreplacement", sep);
-		sep = ",";
-	}
-
-	return len+sprintf(page+len, "\n");
-}
-
-static ssize_t
-state_store(struct md_rdev *rdev, const char *buf, size_t len)
-{
-	/* can write
-	 *  faulty  - simulates an error
-	 *  remove  - disconnects the device
-	 *  writemostly - sets write_mostly
-	 *  -writemostly - clears write_mostly
-	 *  blocked - sets the Blocked flags
-	 *  -blocked - clears the Blocked and possibly simulates an error
-	 *  insync - sets Insync providing device isn't active
-	 *  write_error - sets WriteErrorSeen
-	 *  -write_error - clears WriteErrorSeen
-	 */
-	int err = -EINVAL;
-	if (cmd_match(buf, "faulty") && rdev->mddev->pers) {
-		md_error(rdev->mddev, rdev);
-		if (test_bit(Faulty, &rdev->flags))
-			err = 0;
-		else
-			err = -EBUSY;
-	} else if (cmd_match(buf, "remove")) {
-		if (rdev->raid_disk >= 0)
-			err = -EBUSY;
-		else {
-			struct mddev *mddev = rdev->mddev;
-			kick_rdev_from_array(rdev);
-			if (mddev->pers)
-				md_update_sb(mddev, 1);
-			md_new_event(mddev);
-			err = 0;
-		}
-	} else if (cmd_match(buf, "writemostly")) {
-		set_bit(WriteMostly, &rdev->flags);
-		err = 0;
-	} else if (cmd_match(buf, "-writemostly")) {
-		clear_bit(WriteMostly, &rdev->flags);
-		err = 0;
-	} else if (cmd_match(buf, "blocked")) {
-		set_bit(Blocked, &rdev->flags);
-		err = 0;
-	} else if (cmd_match(buf, "-blocked")) {
-		if (!test_bit(Faulty, &rdev->flags) &&
-		    rdev->badblocks.unacked_exist) {
-			/* metadata handler doesn't understand badblocks,
-			 * so we need to fail the device
-			 */
-			md_error(rdev->mddev, rdev);
-		}
-		clear_bit(Blocked, &rdev->flags);
-		clear_bit(BlockedBadBlocks, &rdev->flags);
-		wake_up(&rdev->blocked_wait);
-		set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
-		md_wakeup_thread(rdev->mddev->thread);
-
-		err = 0;
-	} else if (cmd_match(buf, "insync") && rdev->raid_disk == -1) {
-		set_bit(In_sync, &rdev->flags);
-		err = 0;
-	} else if (cmd_match(buf, "write_error")) {
-		set_bit(WriteErrorSeen, &rdev->flags);
-		err = 0;
-	} else if (cmd_match(buf, "-write_error")) {
-		clear_bit(WriteErrorSeen, &rdev->flags);
-		err = 0;
-	} else if (cmd_match(buf, "want_replacement")) {
-		/* Any non-spare device that is not a replacement can
-		 * become want_replacement at any time, but we then need to
-		 * check if recovery is needed.
-		 */
-		if (rdev->raid_disk >= 0 &&
-		    !test_bit(Replacement, &rdev->flags))
-			set_bit(WantReplacement, &rdev->flags);
-		set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
-		md_wakeup_thread(rdev->mddev->thread);
-		err = 0;
-	} else if (cmd_match(buf, "-want_replacement")) {
-		/* Clearing 'want_replacement' is always allowed.
-		 * Once replacements starts it is too late though.
-		 */
-		err = 0;
-		clear_bit(WantReplacement, &rdev->flags);
-	} else if (cmd_match(buf, "replacement")) {
-		/* Can only set a device as a replacement when array has not
-		 * yet been started.  Once running, replacement is automatic
-		 * from spares, or by assigning 'slot'.
-		 */
-		if (rdev->mddev->pers)
-			err = -EBUSY;
-		else {
-			set_bit(Replacement, &rdev->flags);
-			err = 0;
-		}
-	} else if (cmd_match(buf, "-replacement")) {
-		/* Similarly, can only clear Replacement before start */
-		if (rdev->mddev->pers)
-			err = -EBUSY;
-		else {
-			clear_bit(Replacement, &rdev->flags);
-			err = 0;
-		}
-	}
-	if (!err)
-		sysfs_notify_dirent_safe(rdev->sysfs_state);
-	return err ? err : len;
-}
-static struct rdev_sysfs_entry rdev_state =
-__ATTR(state, S_IRUGO|S_IWUSR, state_show, state_store);
-
-static ssize_t
-errors_show(struct md_rdev *rdev, char *page)
-{
-	return sprintf(page, "%d\n", atomic_read(&rdev->corrected_errors));
-}
-
-static ssize_t
-errors_store(struct md_rdev *rdev, const char *buf, size_t len)
-{
-	char *e;
-	unsigned long n = simple_strtoul(buf, &e, 10);
-	if (*buf && (*e == 0 || *e == '\n')) {
-		atomic_set(&rdev->corrected_errors, n);
-		return len;
-	}
-	return -EINVAL;
-}
-static struct rdev_sysfs_entry rdev_errors =
-__ATTR(errors, S_IRUGO|S_IWUSR, errors_show, errors_store);
-
-static ssize_t
-slot_show(struct md_rdev *rdev, char *page)
-{
-	if (rdev->raid_disk < 0)
-		return sprintf(page, "none\n");
-	else
-		return sprintf(page, "%d\n", rdev->raid_disk);
-}
-
-static ssize_t
-slot_store(struct md_rdev *rdev, const char *buf, size_t len)
-{
-	char *e;
-	int err;
-	int slot = simple_strtoul(buf, &e, 10);
-	if (strncmp(buf, "none", 4)==0)
-		slot = -1;
-	else if (e==buf || (*e && *e!= '\n'))
-		return -EINVAL;
-	if (rdev->mddev->pers && slot == -1) {
-		/* Setting 'slot' on an active array requires also
-		 * updating the 'rd%d' link, and communicating
-		 * with the personality with ->hot_*_disk.
-		 * For now we only support removing
-		 * failed/spare devices.  This normally happens automatically,
-		 * but not when the metadata is externally managed.
-		 */
-		if (rdev->raid_disk == -1)
-			return -EEXIST;
-		/* personality does all needed checks */
-		if (rdev->mddev->pers->hot_remove_disk == NULL)
-			return -EINVAL;
-		err = rdev->mddev->pers->
-			hot_remove_disk(rdev->mddev, rdev);
-		if (err)
-			return err;
-		sysfs_unlink_rdev(rdev->mddev, rdev);
-		rdev->raid_disk = -1;
-		set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
-		md_wakeup_thread(rdev->mddev->thread);
-	} else if (rdev->mddev->pers) {
-		/* Activating a spare .. or possibly reactivating
-		 * if we ever get bitmaps working here.
-		 */
-
-		if (rdev->raid_disk != -1)
-			return -EBUSY;
-
-		if (test_bit(MD_RECOVERY_RUNNING, &rdev->mddev->recovery))
-			return -EBUSY;
-
-		if (rdev->mddev->pers->hot_add_disk == NULL)
-			return -EINVAL;
-
-		if (slot >= rdev->mddev->raid_disks &&
-		    slot >= rdev->mddev->raid_disks + rdev->mddev->delta_disks)
-			return -ENOSPC;
-
-		rdev->raid_disk = slot;
-		if (test_bit(In_sync, &rdev->flags))
-			rdev->saved_raid_disk = slot;
-		else
-			rdev->saved_raid_disk = -1;
-		clear_bit(In_sync, &rdev->flags);
-		err = rdev->mddev->pers->
-			hot_add_disk(rdev->mddev, rdev);
-		if (err) {
-			rdev->raid_disk = -1;
-			return err;
-		} else
-			sysfs_notify_dirent_safe(rdev->sysfs_state);
-		if (sysfs_link_rdev(rdev->mddev, rdev))
-			/* failure here is OK */;
-		/* don't wakeup anyone, leave that to userspace. */
-	} else {
-		if (slot >= rdev->mddev->raid_disks &&
-		    slot >= rdev->mddev->raid_disks + rdev->mddev->delta_disks)
-			return -ENOSPC;
-		rdev->raid_disk = slot;
-		/* assume it is working */
-		clear_bit(Faulty, &rdev->flags);
-		clear_bit(WriteMostly, &rdev->flags);
-		set_bit(In_sync, &rdev->flags);
-		sysfs_notify_dirent_safe(rdev->sysfs_state);
-	}
-	return len;
-}
-
-
-static struct rdev_sysfs_entry rdev_slot =
-__ATTR(slot, S_IRUGO|S_IWUSR, slot_show, slot_store);
-
-static ssize_t
-offset_show(struct md_rdev *rdev, char *page)
-{
-	return sprintf(page, "%llu\n", (unsigned long long)rdev->data_offset);
-}
-
-static ssize_t
-offset_store(struct md_rdev *rdev, const char *buf, size_t len)
-{
-	char *e;
-	unsigned long long offset = simple_strtoull(buf, &e, 10);
-	if (e==buf || (*e && *e != '\n'))
-		return -EINVAL;
-	if (rdev->mddev->pers && rdev->raid_disk >= 0)
-		return -EBUSY;
-	if (rdev->sectors && rdev->mddev->external)
-		/* Must set offset before size, so overlap checks
-		 * can be sane */
-		return -EBUSY;
-	rdev->data_offset = offset;
-	return len;
-}
-
-static struct rdev_sysfs_entry rdev_offset =
-__ATTR(offset, S_IRUGO|S_IWUSR, offset_show, offset_store);
-
-static ssize_t
-rdev_size_show(struct md_rdev *rdev, char *page)
-{
-	return sprintf(page, "%llu\n", (unsigned long long)rdev->sectors / 2);
-}
-
-static int overlaps(sector_t s1, sector_t l1, sector_t s2, sector_t l2)
-{
-	/* check if two start/length pairs overlap */
-	if (s1+l1 <= s2)
-		return 0;
-	if (s2+l2 <= s1)
-		return 0;
-	return 1;
-}
-
-static int strict_blocks_to_sectors(const char *buf, sector_t *sectors)
-{
-	unsigned long long blocks;
-	sector_t new;
-
-	if (strict_strtoull(buf, 10, &blocks) < 0)
-		return -EINVAL;
-
-	if (blocks & 1ULL << (8 * sizeof(blocks) - 1))
-		return -EINVAL; /* sector conversion overflow */
-
-	new = blocks * 2;
-	if (new != blocks * 2)
-		return -EINVAL; /* unsigned long long to sector_t overflow */
-
-	*sectors = new;
-	return 0;
-}
-
-static ssize_t
-rdev_size_store(struct md_rdev *rdev, const char *buf, size_t len)
-{
-	struct mddev *my_mddev = rdev->mddev;
-	sector_t oldsectors = rdev->sectors;
-	sector_t sectors;
-
-	if (strict_blocks_to_sectors(buf, &sectors) < 0)
-		return -EINVAL;
-	if (my_mddev->pers && rdev->raid_disk >= 0) {
-		if (my_mddev->persistent) {
-			sectors = super_types[my_mddev->major_version].
-				rdev_size_change(rdev, sectors);
-			if (!sectors)
-				return -EBUSY;
-		} else if (!sectors)
-			sectors = (i_size_read(rdev->bdev->bd_inode) >> 9) -
-				rdev->data_offset;
-	}
-	if (sectors < my_mddev->dev_sectors)
-		return -EINVAL; /* component must fit device */
-
-	rdev->sectors = sectors;
-	if (sectors > oldsectors && my_mddev->external) {
-		/* need to check that all other rdevs with the same ->bdev
-		 * do not overlap.  We need to unlock the mddev to avoid
-		 * a deadlock.  We have already changed rdev->sectors, and if
-		 * we have to change it back, we will have the lock again.
-		 */
-		struct mddev *mddev;
-		int overlap = 0;
-		struct list_head *tmp;
-
-		mddev_unlock(my_mddev);
-		for_each_mddev(mddev, tmp) {
-			struct md_rdev *rdev2;
-
-			mddev_lock(mddev);
-			rdev_for_each(rdev2, mddev)
-				if (rdev->bdev == rdev2->bdev &&
-				    rdev != rdev2 &&
-				    overlaps(rdev->data_offset, rdev->sectors,
-					     rdev2->data_offset,
-					     rdev2->sectors)) {
-					overlap = 1;
-					break;
-				}
-			mddev_unlock(mddev);
-			if (overlap) {
-				mddev_put(mddev);
-				break;
-			}
-		}
-		mddev_lock(my_mddev);
-		if (overlap) {
-			/* Someone else could have slipped in a size
-			 * change here, but doing so is just silly.
-			 * We put oldsectors back because we *know* it is
-			 * safe, and trust userspace not to race with
-			 * itself
-			 */
-			rdev->sectors = oldsectors;
-			return -EBUSY;
-		}
-	}
-	return len;
-}
-
-static struct rdev_sysfs_entry rdev_size =
-__ATTR(size, S_IRUGO|S_IWUSR, rdev_size_show, rdev_size_store);
-
-
-static ssize_t recovery_start_show(struct md_rdev *rdev, char *page)
-{
-	unsigned long long recovery_start = rdev->recovery_offset;
-
-	if (test_bit(In_sync, &rdev->flags) ||
-	    recovery_start == MaxSector)
-		return sprintf(page, "none\n");
-
-	return sprintf(page, "%llu\n", recovery_start);
-}
-
-static ssize_t recovery_start_store(struct md_rdev *rdev, const char *buf, size_t len)
-{
-	unsigned long long recovery_start;
-
-	if (cmd_match(buf, "none"))
-		recovery_start = MaxSector;
-	else if (strict_strtoull(buf, 10, &recovery_start))
-		return -EINVAL;
-
-	if (rdev->mddev->pers &&
-	    rdev->raid_disk >= 0)
-		return -EBUSY;
-
-	rdev->recovery_offset = recovery_start;
-	if (recovery_start == MaxSector)
-		set_bit(In_sync, &rdev->flags);
-	else
-		clear_bit(In_sync, &rdev->flags);
-	return len;
-}
-
-static struct rdev_sysfs_entry rdev_recovery_start =
-__ATTR(recovery_start, S_IRUGO|S_IWUSR, recovery_start_show, recovery_start_store);
-
-
-static ssize_t
-badblocks_show(struct badblocks *bb, char *page, int unack);
-static ssize_t
-badblocks_store(struct badblocks *bb, const char *page, size_t len, int unack);
-
-static ssize_t bb_show(struct md_rdev *rdev, char *page)
-{
-	return badblocks_show(&rdev->badblocks, page, 0);
-}
-static ssize_t bb_store(struct md_rdev *rdev, const char *page, size_t len)
-{
-	int rv = badblocks_store(&rdev->badblocks, page, len, 0);
-	/* Maybe that ack was all we needed */
-	if (test_and_clear_bit(BlockedBadBlocks, &rdev->flags))
-		wake_up(&rdev->blocked_wait);
-	return rv;
-}
-static struct rdev_sysfs_entry rdev_bad_blocks =
-__ATTR(bad_blocks, S_IRUGO|S_IWUSR, bb_show, bb_store);
-
-
-static ssize_t ubb_show(struct md_rdev *rdev, char *page)
-{
-	return badblocks_show(&rdev->badblocks, page, 1);
-}
-static ssize_t ubb_store(struct md_rdev *rdev, const char *page, size_t len)
-{
-	return badblocks_store(&rdev->badblocks, page, len, 1);
-}
-static struct rdev_sysfs_entry rdev_unack_bad_blocks =
-__ATTR(unacknowledged_bad_blocks, S_IRUGO|S_IWUSR, ubb_show, ubb_store);
-
-static struct attribute *rdev_default_attrs[] = {
-	&rdev_state.attr,
-	&rdev_errors.attr,
-	&rdev_slot.attr,
-	&rdev_offset.attr,
-	&rdev_size.attr,
-	&rdev_recovery_start.attr,
-	&rdev_bad_blocks.attr,
-	&rdev_unack_bad_blocks.attr,
-	NULL,
-};
-static ssize_t
-rdev_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
-{
-	struct rdev_sysfs_entry *entry = container_of(attr, struct rdev_sysfs_entry, attr);
-	struct md_rdev *rdev = container_of(kobj, struct md_rdev, kobj);
-	struct mddev *mddev = rdev->mddev;
-	ssize_t rv;
-
-	if (!entry->show)
-		return -EIO;
-
-	rv = mddev ? mddev_lock(mddev) : -EBUSY;
-	if (!rv) {
-		if (rdev->mddev == NULL)
-			rv = -EBUSY;
-		else
-			rv = entry->show(rdev, page);
-		mddev_unlock(mddev);
-	}
-	return rv;
-}
-
-static ssize_t
-rdev_attr_store(struct kobject *kobj, struct attribute *attr,
-	      const char *page, size_t length)
-{
-	struct rdev_sysfs_entry *entry = container_of(attr, struct rdev_sysfs_entry, attr);
-	struct md_rdev *rdev = container_of(kobj, struct md_rdev, kobj);
-	ssize_t rv;
-	struct mddev *mddev = rdev->mddev;
-
-	if (!entry->store)
-		return -EIO;
-	if (!capable(CAP_SYS_ADMIN))
-		return -EACCES;
-	rv = mddev ? mddev_lock(mddev): -EBUSY;
-	if (!rv) {
-		if (rdev->mddev == NULL)
-			rv = -EBUSY;
-		else
-			rv = entry->store(rdev, page, length);
-		mddev_unlock(mddev);
-	}
-	return rv;
-}
-
-static void rdev_free(struct kobject *ko)
-{
-	struct md_rdev *rdev = container_of(ko, struct md_rdev, kobj);
-	kfree(rdev);
-}
-static const struct sysfs_ops rdev_sysfs_ops = {
-	.show		= rdev_attr_show,
-	.store		= rdev_attr_store,
-};
-static struct kobj_type rdev_ktype = {
-	.release	= rdev_free,
-	.sysfs_ops	= &rdev_sysfs_ops,
-	.default_attrs	= rdev_default_attrs,
-};
-
-int md_rdev_init(struct md_rdev *rdev)
-{
-	rdev->desc_nr = -1;
-	rdev->saved_raid_disk = -1;
-	rdev->raid_disk = -1;
-	rdev->flags = 0;
-	rdev->data_offset = 0;
-	rdev->sb_events = 0;
-	rdev->last_read_error.tv_sec  = 0;
-	rdev->last_read_error.tv_nsec = 0;
-	rdev->sb_loaded = 0;
-	rdev->bb_page = NULL;
-	atomic_set(&rdev->nr_pending, 0);
-	atomic_set(&rdev->read_errors, 0);
-	atomic_set(&rdev->corrected_errors, 0);
-
-	INIT_LIST_HEAD(&rdev->same_set);
-	init_waitqueue_head(&rdev->blocked_wait);
-
-	/* Add space to store bad block list.
-	 * This reserves the space even on arrays where it cannot
-	 * be used - I wonder if that matters
-	 */
-	rdev->badblocks.count = 0;
-	rdev->badblocks.shift = 0;
-	rdev->badblocks.page = kmalloc(PAGE_SIZE, GFP_KERNEL);
-	seqlock_init(&rdev->badblocks.lock);
-	if (rdev->badblocks.page == NULL)
-		return -ENOMEM;
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(md_rdev_init);
-/*
- * Import a device. If 'super_format' >= 0, then sanity check the superblock
- *
- * mark the device faulty if:
- *
- *   - the device is nonexistent (zero size)
- *   - the device has no valid superblock
- *
- * a faulty rdev _never_ has rdev->sb set.
- */
-static struct md_rdev *md_import_device(dev_t newdev, int super_format, int super_minor)
-{
-	char b[BDEVNAME_SIZE];
-	int err;
-	struct md_rdev *rdev;
-	sector_t size;
-
-	rdev = kzalloc(sizeof(*rdev), GFP_KERNEL);
-	if (!rdev) {
-		printk(KERN_ERR "md: could not alloc mem for new device!\n");
-		return ERR_PTR(-ENOMEM);
-	}
-
-	err = md_rdev_init(rdev);
-	if (err)
-		goto abort_free;
-	err = alloc_disk_sb(rdev);
-	if (err)
-		goto abort_free;
-
-	err = lock_rdev(rdev, newdev, super_format == -2);
-	if (err)
-		goto abort_free;
-
-	kobject_init(&rdev->kobj, &rdev_ktype);
-
-	size = i_size_read(rdev->bdev->bd_inode) >> BLOCK_SIZE_BITS;
-	if (!size) {
-		printk(KERN_WARNING 
-			"md: %s has zero or unknown size, marking faulty!\n",
-			bdevname(rdev->bdev,b));
-		err = -EINVAL;
-		goto abort_free;
-	}
-
-	if (super_format >= 0) {
-		err = super_types[super_format].
-			load_super(rdev, NULL, super_minor);
-		if (err == -EINVAL) {
-			printk(KERN_WARNING
-				"md: %s does not have a valid v%d.%d "
-			       "superblock, not importing!\n",
-				bdevname(rdev->bdev,b),
-			       super_format, super_minor);
-			goto abort_free;
-		}
-		if (err < 0) {
-			printk(KERN_WARNING 
-				"md: could not read %s's sb, not importing!\n",
-				bdevname(rdev->bdev,b));
-			goto abort_free;
-		}
-	}
-	if (super_format == -1)
-		/* hot-add for 0.90, or non-persistent: so no badblocks */
-		rdev->badblocks.shift = -1;
-
-	return rdev;
-
-abort_free:
-	if (rdev->bdev)
-		unlock_rdev(rdev);
-	free_disk_sb(rdev);
-	kfree(rdev->badblocks.page);
-	kfree(rdev);
-	return ERR_PTR(err);
-}
-
-/*
- * Check a full RAID array for plausibility
- */
-
-
-static void analyze_sbs(struct mddev * mddev)
-{
-	int i;
-	struct md_rdev *rdev, *freshest, *tmp;
-	char b[BDEVNAME_SIZE];
-
-	freshest = NULL;
-	rdev_for_each_safe(rdev, tmp, mddev)
-		switch (super_types[mddev->major_version].
-			load_super(rdev, freshest, mddev->minor_version)) {
-		case 1:
-			freshest = rdev;
-			break;
-		case 0:
-			break;
-		default:
-			printk( KERN_ERR \
-				"md: fatal superblock inconsistency in %s"
-				" -- removing from array\n", 
-				bdevname(rdev->bdev,b));
-			kick_rdev_from_array(rdev);
-		}
-
-
-	super_types[mddev->major_version].
-		validate_super(mddev, freshest);
-
-	i = 0;
-	rdev_for_each_safe(rdev, tmp, mddev) {
-		if (mddev->max_disks &&
-		    (rdev->desc_nr >= mddev->max_disks ||
-		     i > mddev->max_disks)) {
-			printk(KERN_WARNING
-			       "md: %s: %s: only %d devices permitted\n",
-			       mdname(mddev), bdevname(rdev->bdev, b),
-			       mddev->max_disks);
-			kick_rdev_from_array(rdev);
-			continue;
-		}
-		if (rdev != freshest)
-			if (super_types[mddev->major_version].
-			    validate_super(mddev, rdev)) {
-				printk(KERN_WARNING "md: kicking non-fresh %s"
-					" from array!\n",
-					bdevname(rdev->bdev,b));
-				kick_rdev_from_array(rdev);
-				continue;
-			}
-		if (mddev->level == LEVEL_MULTIPATH) {
-			rdev->desc_nr = i++;
-			rdev->raid_disk = rdev->desc_nr;
-			set_bit(In_sync, &rdev->flags);
-		} else if (rdev->raid_disk >= (mddev->raid_disks - min(0, mddev->delta_disks))) {
-			rdev->raid_disk = -1;
-			clear_bit(In_sync, &rdev->flags);
-		}
-	}
-}
-
-/* Read a fixed-point number.
- * Numbers in sysfs attributes should be in "standard" units where
- * possible, so time should be in seconds.
- * However we internally use a a much smaller unit such as 
- * milliseconds or jiffies.
- * This function takes a decimal number with a possible fractional
- * component, and produces an integer which is the result of
- * multiplying that number by 10^'scale'.
- * all without any floating-point arithmetic.
- */
-int strict_strtoul_scaled(const char *cp, unsigned long *res, int scale)
-{
-	unsigned long result = 0;
-	long decimals = -1;
-	while (isdigit(*cp) || (*cp == '.' && decimals < 0)) {
-		if (*cp == '.')
-			decimals = 0;
-		else if (decimals < scale) {
-			unsigned int value;
-			value = *cp - '0';
-			result = result * 10 + value;
-			if (decimals >= 0)
-				decimals++;
-		}
-		cp++;
-	}
-	if (*cp == '\n')
-		cp++;
-	if (*cp)
-		return -EINVAL;
-	if (decimals < 0)
-		decimals = 0;
-	while (decimals < scale) {
-		result *= 10;
-		decimals ++;
-	}
-	*res = result;
-	return 0;
-}
-
-
-static void md_safemode_timeout(unsigned long data);
-
-static ssize_t
-safe_delay_show(struct mddev *mddev, char *page)
-{
-	int msec = (mddev->safemode_delay*1000)/HZ;
-	return sprintf(page, "%d.%03d\n", msec/1000, msec%1000);
-}
-static ssize_t
-safe_delay_store(struct mddev *mddev, const char *cbuf, size_t len)
-{
-	unsigned long msec;
-
-	if (strict_strtoul_scaled(cbuf, &msec, 3) < 0)
-		return -EINVAL;
-	if (msec == 0)
-		mddev->safemode_delay = 0;
-	else {
-		unsigned long old_delay = mddev->safemode_delay;
-		mddev->safemode_delay = (msec*HZ)/1000;
-		if (mddev->safemode_delay == 0)
-			mddev->safemode_delay = 1;
-		if (mddev->safemode_delay < old_delay)
-			md_safemode_timeout((unsigned long)mddev);
-	}
-	return len;
-}
-static struct md_sysfs_entry md_safe_delay =
-__ATTR(safe_mode_delay, S_IRUGO|S_IWUSR,safe_delay_show, safe_delay_store);
-
-static ssize_t
-level_show(struct mddev *mddev, char *page)
-{
-	struct md_personality *p = mddev->pers;
-	if (p)
-		return sprintf(page, "%s\n", p->name);
-	else if (mddev->clevel[0])
-		return sprintf(page, "%s\n", mddev->clevel);
-	else if (mddev->level != LEVEL_NONE)
-		return sprintf(page, "%d\n", mddev->level);
-	else
-		return 0;
-}
-
-static ssize_t
-level_store(struct mddev *mddev, const char *buf, size_t len)
-{
-	char clevel[16];
-	ssize_t rv = len;
-	struct md_personality *pers;
-	long level;
-	void *priv;
-	struct md_rdev *rdev;
-
-	if (mddev->pers == NULL) {
-		if (len == 0)
-			return 0;
-		if (len >= sizeof(mddev->clevel))
-			return -ENOSPC;
-		strncpy(mddev->clevel, buf, len);
-		if (mddev->clevel[len-1] == '\n')
-			len--;
-		mddev->clevel[len] = 0;
-		mddev->level = LEVEL_NONE;
-		return rv;
-	}
-
-	/* request to change the personality.  Need to ensure:
-	 *  - array is not engaged in resync/recovery/reshape
-	 *  - old personality can be suspended
-	 *  - new personality will access other array.
-	 */
-
-	if (mddev->sync_thread ||
-	    mddev->reshape_position != MaxSector ||
-	    mddev->sysfs_active)
-		return -EBUSY;
-
-	if (!mddev->pers->quiesce) {
-		printk(KERN_WARNING "md: %s: %s does not support online personality change\n",
-		       mdname(mddev), mddev->pers->name);
-		return -EINVAL;
-	}
-
-	/* Now find the new personality */
-	if (len == 0 || len >= sizeof(clevel))
-		return -EINVAL;
-	strncpy(clevel, buf, len);
-	if (clevel[len-1] == '\n')
-		len--;
-	clevel[len] = 0;
-	if (strict_strtol(clevel, 10, &level))
-		level = LEVEL_NONE;
-
-	if (request_module("md-%s", clevel) != 0)
-		request_module("md-level-%s", clevel);
-	spin_lock(&pers_lock);
-	pers = find_pers(level, clevel);
-	if (!pers || !try_module_get(pers->owner)) {
-		spin_unlock(&pers_lock);
-		printk(KERN_WARNING "md: personality %s not loaded\n", clevel);
-		return -EINVAL;
-	}
-	spin_unlock(&pers_lock);
-
-	if (pers == mddev->pers) {
-		/* Nothing to do! */
-		module_put(pers->owner);
-		return rv;
-	}
-	if (!pers->takeover) {
-		module_put(pers->owner);
-		printk(KERN_WARNING "md: %s: %s does not support personality takeover\n",
-		       mdname(mddev), clevel);
-		return -EINVAL;
-	}
-
-	rdev_for_each(rdev, mddev)
-		rdev->new_raid_disk = rdev->raid_disk;
-
-	/* ->takeover must set new_* and/or delta_disks
-	 * if it succeeds, and may set them when it fails.
-	 */
-	priv = pers->takeover(mddev);
-	if (IS_ERR(priv)) {
-		mddev->new_level = mddev->level;
-		mddev->new_layout = mddev->layout;
-		mddev->new_chunk_sectors = mddev->chunk_sectors;
-		mddev->raid_disks -= mddev->delta_disks;
-		mddev->delta_disks = 0;
-		module_put(pers->owner);
-		printk(KERN_WARNING "md: %s: %s would not accept array\n",
-		       mdname(mddev), clevel);
-		return PTR_ERR(priv);
-	}
-
-	/* Looks like we have a winner */
-	mddev_suspend(mddev);
-	mddev->pers->stop(mddev);
-	
-	if (mddev->pers->sync_request == NULL &&
-	    pers->sync_request != NULL) {
-		/* need to add the md_redundancy_group */
-		if (sysfs_create_group(&mddev->kobj, &md_redundancy_group))
-			printk(KERN_WARNING
-			       "md: cannot register extra attributes for %s\n",
-			       mdname(mddev));
-		mddev->sysfs_action = sysfs_get_dirent(mddev->kobj.sd, NULL, "sync_action");
-	}		
-	if (mddev->pers->sync_request != NULL &&
-	    pers->sync_request == NULL) {
-		/* need to remove the md_redundancy_group */
-		if (mddev->to_remove == NULL)
-			mddev->to_remove = &md_redundancy_group;
-	}
-
-	if (mddev->pers->sync_request == NULL &&
-	    mddev->external) {
-		/* We are converting from a no-redundancy array
-		 * to a redundancy array and metadata is managed
-		 * externally so we need to be sure that writes
-		 * won't block due to a need to transition
-		 *      clean->dirty
-		 * until external management is started.
-		 */
-		mddev->in_sync = 0;
-		mddev->safemode_delay = 0;
-		mddev->safemode = 0;
-	}
-
-	rdev_for_each(rdev, mddev) {
-		if (rdev->raid_disk < 0)
-			continue;
-		if (rdev->new_raid_disk >= mddev->raid_disks)
-			rdev->new_raid_disk = -1;
-		if (rdev->new_raid_disk == rdev->raid_disk)
-			continue;
-		sysfs_unlink_rdev(mddev, rdev);
-	}
-	rdev_for_each(rdev, mddev) {
-		if (rdev->raid_disk < 0)
-			continue;
-		if (rdev->new_raid_disk == rdev->raid_disk)
-			continue;
-		rdev->raid_disk = rdev->new_raid_disk;
-		if (rdev->raid_disk < 0)
-			clear_bit(In_sync, &rdev->flags);
-		else {
-			if (sysfs_link_rdev(mddev, rdev))
-				printk(KERN_WARNING "md: cannot register rd%d"
-				       " for %s after level change\n",
-				       rdev->raid_disk, mdname(mddev));
-		}
-	}
-
-	module_put(mddev->pers->owner);
-	mddev->pers = pers;
-	mddev->private = priv;
-	strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel));
-	mddev->level = mddev->new_level;
-	mddev->layout = mddev->new_layout;
-	mddev->chunk_sectors = mddev->new_chunk_sectors;
-	mddev->delta_disks = 0;
-	mddev->degraded = 0;
-	if (mddev->pers->sync_request == NULL) {
-		/* this is now an array without redundancy, so
-		 * it must always be in_sync
-		 */
-		mddev->in_sync = 1;
-		del_timer_sync(&mddev->safemode_timer);
-	}
-	pers->run(mddev);
-	mddev_resume(mddev);
-	set_bit(MD_CHANGE_DEVS, &mddev->flags);
-	set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
-	md_wakeup_thread(mddev->thread);
-	sysfs_notify(&mddev->kobj, NULL, "level");
-	md_new_event(mddev);
-	return rv;
-}
-
-static struct md_sysfs_entry md_level =
-__ATTR(level, S_IRUGO|S_IWUSR, level_show, level_store);
-
-
-static ssize_t
-layout_show(struct mddev *mddev, char *page)
-{
-	/* just a number, not meaningful for all levels */
-	if (mddev->reshape_position != MaxSector &&
-	    mddev->layout != mddev->new_layout)
-		return sprintf(page, "%d (%d)\n",
-			       mddev->new_layout, mddev->layout);
-	return sprintf(page, "%d\n", mddev->layout);
-}
-
-static ssize_t
-layout_store(struct mddev *mddev, const char *buf, size_t len)
-{
-	char *e;
-	unsigned long n = simple_strtoul(buf, &e, 10);
-
-	if (!*buf || (*e && *e != '\n'))
-		return -EINVAL;
-
-	if (mddev->pers) {
-		int err;
-		if (mddev->pers->check_reshape == NULL)
-			return -EBUSY;
-		mddev->new_layout = n;
-		err = mddev->pers->check_reshape(mddev);
-		if (err) {
-			mddev->new_layout = mddev->layout;
-			return err;
-		}
-	} else {
-		mddev->new_layout = n;
-		if (mddev->reshape_position == MaxSector)
-			mddev->layout = n;
-	}
-	return len;
-}
-static struct md_sysfs_entry md_layout =
-__ATTR(layout, S_IRUGO|S_IWUSR, layout_show, layout_store);
-
-
-static ssize_t
-raid_disks_show(struct mddev *mddev, char *page)
-{
-	if (mddev->raid_disks == 0)
-		return 0;
-	if (mddev->reshape_position != MaxSector &&
-	    mddev->delta_disks != 0)
-		return sprintf(page, "%d (%d)\n", mddev->raid_disks,
-			       mddev->raid_disks - mddev->delta_disks);
-	return sprintf(page, "%d\n", mddev->raid_disks);
-}
-
-static int update_raid_disks(struct mddev *mddev, int raid_disks);
-
-static ssize_t
-raid_disks_store(struct mddev *mddev, const char *buf, size_t len)
-{
-	char *e;
-	int rv = 0;
-	unsigned long n = simple_strtoul(buf, &e, 10);
-
-	if (!*buf || (*e && *e != '\n'))
-		return -EINVAL;
-
-	if (mddev->pers)
-		rv = update_raid_disks(mddev, n);
-	else if (mddev->reshape_position != MaxSector) {
-		int olddisks = mddev->raid_disks - mddev->delta_disks;
-		mddev->delta_disks = n - olddisks;
-		mddev->raid_disks = n;
-	} else
-		mddev->raid_disks = n;
-	return rv ? rv : len;
-}
-static struct md_sysfs_entry md_raid_disks =
-__ATTR(raid_disks, S_IRUGO|S_IWUSR, raid_disks_show, raid_disks_store);
-
-static ssize_t
-chunk_size_show(struct mddev *mddev, char *page)
-{
-	if (mddev->reshape_position != MaxSector &&
-	    mddev->chunk_sectors != mddev->new_chunk_sectors)
-		return sprintf(page, "%d (%d)\n",
-			       mddev->new_chunk_sectors << 9,
-			       mddev->chunk_sectors << 9);
-	return sprintf(page, "%d\n", mddev->chunk_sectors << 9);
-}
-
-static ssize_t
-chunk_size_store(struct mddev *mddev, const char *buf, size_t len)
-{
-	char *e;
-	unsigned long n = simple_strtoul(buf, &e, 10);
-
-	if (!*buf || (*e && *e != '\n'))
-		return -EINVAL;
-
-	if (mddev->pers) {
-		int err;
-		if (mddev->pers->check_reshape == NULL)
-			return -EBUSY;
-		mddev->new_chunk_sectors = n >> 9;
-		err = mddev->pers->check_reshape(mddev);
-		if (err) {
-			mddev->new_chunk_sectors = mddev->chunk_sectors;
-			return err;
-		}
-	} else {
-		mddev->new_chunk_sectors = n >> 9;
-		if (mddev->reshape_position == MaxSector)
-			mddev->chunk_sectors = n >> 9;
-	}
-	return len;
-}
-static struct md_sysfs_entry md_chunk_size =
-__ATTR(chunk_size, S_IRUGO|S_IWUSR, chunk_size_show, chunk_size_store);
-
-static ssize_t
-resync_start_show(struct mddev *mddev, char *page)
-{
-	if (mddev->recovery_cp == MaxSector)
-		return sprintf(page, "none\n");
-	return sprintf(page, "%llu\n", (unsigned long long)mddev->recovery_cp);
-}
-
-static ssize_t
-resync_start_store(struct mddev *mddev, const char *buf, size_t len)
-{
-	char *e;
-	unsigned long long n = simple_strtoull(buf, &e, 10);
-
-	if (mddev->pers && !test_bit(MD_RECOVERY_FROZEN, &mddev->recovery))
-		return -EBUSY;
-	if (cmd_match(buf, "none"))
-		n = MaxSector;
-	else if (!*buf || (*e && *e != '\n'))
-		return -EINVAL;
-
-	mddev->recovery_cp = n;
-	return len;
-}
-static struct md_sysfs_entry md_resync_start =
-__ATTR(resync_start, S_IRUGO|S_IWUSR, resync_start_show, resync_start_store);
-
-/*
- * The array state can be:
- *
- * clear
- *     No devices, no size, no level
- *     Equivalent to STOP_ARRAY ioctl
- * inactive
- *     May have some settings, but array is not active
- *        all IO results in error
- *     When written, doesn't tear down array, but just stops it
- * suspended (not supported yet)
- *     All IO requests will block. The array can be reconfigured.
- *     Writing this, if accepted, will block until array is quiescent
- * readonly
- *     no resync can happen.  no superblocks get written.
- *     write requests fail
- * read-auto
- *     like readonly, but behaves like 'clean' on a write request.
- *
- * clean - no pending writes, but otherwise active.
- *     When written to inactive array, starts without resync
- *     If a write request arrives then
- *       if metadata is known, mark 'dirty' and switch to 'active'.
- *       if not known, block and switch to write-pending
- *     If written to an active array that has pending writes, then fails.
- * active
- *     fully active: IO and resync can be happening.
- *     When written to inactive array, starts with resync
- *
- * write-pending
- *     clean, but writes are blocked waiting for 'active' to be written.
- *
- * active-idle
- *     like active, but no writes have been seen for a while (100msec).
- *
- */
-enum array_state { clear, inactive, suspended, readonly, read_auto, clean, active,
-		   write_pending, active_idle, bad_word};
-static char *array_states[] = {
-	"clear", "inactive", "suspended", "readonly", "read-auto", "clean", "active",
-	"write-pending", "active-idle", NULL };
-
-static int match_word(const char *word, char **list)
-{
-	int n;
-	for (n=0; list[n]; n++)
-		if (cmd_match(word, list[n]))
-			break;
-	return n;
-}
-
-static ssize_t
-array_state_show(struct mddev *mddev, char *page)
-{
-	enum array_state st = inactive;
-
-	if (mddev->pers)
-		switch(mddev->ro) {
-		case 1:
-			st = readonly;
-			break;
-		case 2:
-			st = read_auto;
-			break;
-		case 0:
-			if (mddev->in_sync)
-				st = clean;
-			else if (test_bit(MD_CHANGE_PENDING, &mddev->flags))
-				st = write_pending;
-			else if (mddev->safemode)
-				st = active_idle;
-			else
-				st = active;
-		}
-	else {
-		if (list_empty(&mddev->disks) &&
-		    mddev->raid_disks == 0 &&
-		    mddev->dev_sectors == 0)
-			st = clear;
-		else
-			st = inactive;
-	}
-	return sprintf(page, "%s\n", array_states[st]);
-}
-
-static int do_md_stop(struct mddev * mddev, int ro, int is_open);
-static int md_set_readonly(struct mddev * mddev, int is_open);
-static int do_md_run(struct mddev * mddev);
-static int restart_array(struct mddev *mddev);
-
-static ssize_t
-array_state_store(struct mddev *mddev, const char *buf, size_t len)
-{
-	int err = -EINVAL;
-	enum array_state st = match_word(buf, array_states);
-	switch(st) {
-	case bad_word:
-		break;
-	case clear:
-		/* stopping an active array */
-		if (atomic_read(&mddev->openers) > 0)
-			return -EBUSY;
-		err = do_md_stop(mddev, 0, 0);
-		break;
-	case inactive:
-		/* stopping an active array */
-		if (mddev->pers) {
-			if (atomic_read(&mddev->openers) > 0)
-				return -EBUSY;
-			err = do_md_stop(mddev, 2, 0);
-		} else
-			err = 0; /* already inactive */
-		break;
-	case suspended:
-		break; /* not supported yet */
-	case readonly:
-		if (mddev->pers)
-			err = md_set_readonly(mddev, 0);
-		else {
-			mddev->ro = 1;
-			set_disk_ro(mddev->gendisk, 1);
-			err = do_md_run(mddev);
-		}
-		break;
-	case read_auto:
-		if (mddev->pers) {
-			if (mddev->ro == 0)
-				err = md_set_readonly(mddev, 0);
-			else if (mddev->ro == 1)
-				err = restart_array(mddev);
-			if (err == 0) {
-				mddev->ro = 2;
-				set_disk_ro(mddev->gendisk, 0);
-			}
-		} else {
-			mddev->ro = 2;
-			err = do_md_run(mddev);
-		}
-		break;
-	case clean:
-		if (mddev->pers) {
-			restart_array(mddev);
-			spin_lock_irq(&mddev->write_lock);
-			if (atomic_read(&mddev->writes_pending) == 0) {
-				if (mddev->in_sync == 0) {
-					mddev->in_sync = 1;
-					if (mddev->safemode == 1)
-						mddev->safemode = 0;
-					set_bit(MD_CHANGE_CLEAN, &mddev->flags);
-				}
-				err = 0;
-			} else
-				err = -EBUSY;
-			spin_unlock_irq(&mddev->write_lock);
-		} else
-			err = -EINVAL;
-		break;
-	case active:
-		if (mddev->pers) {
-			restart_array(mddev);
-			clear_bit(MD_CHANGE_PENDING, &mddev->flags);
-			wake_up(&mddev->sb_wait);
-			err = 0;
-		} else {
-			mddev->ro = 0;
-			set_disk_ro(mddev->gendisk, 0);
-			err = do_md_run(mddev);
-		}
-		break;
-	case write_pending:
-	case active_idle:
-		/* these cannot be set */
-		break;
-	}
-	if (err)
-		return err;
-	else {
-		if (mddev->hold_active == UNTIL_IOCTL)
-			mddev->hold_active = 0;
-		sysfs_notify_dirent_safe(mddev->sysfs_state);
-		return len;
-	}
-}
-static struct md_sysfs_entry md_array_state =
-__ATTR(array_state, S_IRUGO|S_IWUSR, array_state_show, array_state_store);
-
-static ssize_t
-max_corrected_read_errors_show(struct mddev *mddev, char *page) {
-	return sprintf(page, "%d\n",
-		       atomic_read(&mddev->max_corr_read_errors));
-}
-
-static ssize_t
-max_corrected_read_errors_store(struct mddev *mddev, const char *buf, size_t len)
-{
-	char *e;
-	unsigned long n = simple_strtoul(buf, &e, 10);
-
-	if (*buf && (*e == 0 || *e == '\n')) {
-		atomic_set(&mddev->max_corr_read_errors, n);
-		return len;
-	}
-	return -EINVAL;
-}
-
-static struct md_sysfs_entry max_corr_read_errors =
-__ATTR(max_read_errors, S_IRUGO|S_IWUSR, max_corrected_read_errors_show,
-	max_corrected_read_errors_store);
-
-static ssize_t
-null_show(struct mddev *mddev, char *page)
-{
-	return -EINVAL;
-}
-
-static ssize_t
-new_dev_store(struct mddev *mddev, const char *buf, size_t len)
-{
-	/* buf must be %d:%d\n? giving major and minor numbers */
-	/* The new device is added to the array.
-	 * If the array has a persistent superblock, we read the
-	 * superblock to initialise info and check validity.
-	 * Otherwise, only checking done is that in bind_rdev_to_array,
-	 * which mainly checks size.
-	 */
-	char *e;
-	int major = simple_strtoul(buf, &e, 10);
-	int minor;
-	dev_t dev;
-	struct md_rdev *rdev;
-	int err;
-
-	if (!*buf || *e != ':' || !e[1] || e[1] == '\n')
-		return -EINVAL;
-	minor = simple_strtoul(e+1, &e, 10);
-	if (*e && *e != '\n')
-		return -EINVAL;
-	dev = MKDEV(major, minor);
-	if (major != MAJOR(dev) ||
-	    minor != MINOR(dev))
-		return -EOVERFLOW;
-
-
-	if (mddev->persistent) {
-		rdev = md_import_device(dev, mddev->major_version,
-					mddev->minor_version);
-		if (!IS_ERR(rdev) && !list_empty(&mddev->disks)) {
-			struct md_rdev *rdev0
-				= list_entry(mddev->disks.next,
-					     struct md_rdev, same_set);
-			err = super_types[mddev->major_version]
-				.load_super(rdev, rdev0, mddev->minor_version);
-			if (err < 0)
-				goto out;
-		}
-	} else if (mddev->external)
-		rdev = md_import_device(dev, -2, -1);
-	else
-		rdev = md_import_device(dev, -1, -1);
-
-	if (IS_ERR(rdev))
-		return PTR_ERR(rdev);
-	err = bind_rdev_to_array(rdev, mddev);
- out:
-	if (err)
-		export_rdev(rdev);
-	return err ? err : len;
-}
-
-static struct md_sysfs_entry md_new_device =
-__ATTR(new_dev, S_IWUSR, null_show, new_dev_store);
-
-static ssize_t
-bitmap_store(struct mddev *mddev, const char *buf, size_t len)
-{
-	char *end;
-	unsigned long chunk, end_chunk;
-
-	if (!mddev->bitmap)
-		goto out;
-	/* buf should be <chunk> <chunk> ... or <chunk>-<chunk> ... (range) */
-	while (*buf) {
-		chunk = end_chunk = simple_strtoul(buf, &end, 0);
-		if (buf == end) break;
-		if (*end == '-') { /* range */
-			buf = end + 1;
-			end_chunk = simple_strtoul(buf, &end, 0);
-			if (buf == end) break;
-		}
-		if (*end && !isspace(*end)) break;
-		bitmap_dirty_bits(mddev->bitmap, chunk, end_chunk);
-		buf = skip_spaces(end);
-	}
-	bitmap_unplug(mddev->bitmap); /* flush the bits to disk */
-out:
-	return len;
-}
-
-static struct md_sysfs_entry md_bitmap =
-__ATTR(bitmap_set_bits, S_IWUSR, null_show, bitmap_store);
-
-static ssize_t
-size_show(struct mddev *mddev, char *page)
-{
-	return sprintf(page, "%llu\n",
-		(unsigned long long)mddev->dev_sectors / 2);
-}
-
-static int update_size(struct mddev *mddev, sector_t num_sectors);
-
-static ssize_t
-size_store(struct mddev *mddev, const char *buf, size_t len)
-{
-	/* If array is inactive, we can reduce the component size, but
-	 * not increase it (except from 0).
-	 * If array is active, we can try an on-line resize
-	 */
-	sector_t sectors;
-	int err = strict_blocks_to_sectors(buf, &sectors);
-
-	if (err < 0)
-		return err;
-	if (mddev->pers) {
-		err = update_size(mddev, sectors);
-		md_update_sb(mddev, 1);
-	} else {
-		if (mddev->dev_sectors == 0 ||
-		    mddev->dev_sectors > sectors)
-			mddev->dev_sectors = sectors;
-		else
-			err = -ENOSPC;
-	}
-	return err ? err : len;
-}
-
-static struct md_sysfs_entry md_size =
-__ATTR(component_size, S_IRUGO|S_IWUSR, size_show, size_store);
-
-
-/* Metdata version.
- * This is one of
- *   'none' for arrays with no metadata (good luck...)
- *   'external' for arrays with externally managed metadata,
- * or N.M for internally known formats
- */
-static ssize_t
-metadata_show(struct mddev *mddev, char *page)
-{
-	if (mddev->persistent)
-		return sprintf(page, "%d.%d\n",
-			       mddev->major_version, mddev->minor_version);
-	else if (mddev->external)
-		return sprintf(page, "external:%s\n", mddev->metadata_type);
-	else
-		return sprintf(page, "none\n");
-}
-
-static ssize_t
-metadata_store(struct mddev *mddev, const char *buf, size_t len)
-{
-	int major, minor;
-	char *e;
-	/* Changing the details of 'external' metadata is
-	 * always permitted.  Otherwise there must be
-	 * no devices attached to the array.
-	 */
-	if (mddev->external && strncmp(buf, "external:", 9) == 0)
-		;
-	else if (!list_empty(&mddev->disks))
-		return -EBUSY;
-
-	if (cmd_match(buf, "none")) {
-		mddev->persistent = 0;
-		mddev->external = 0;
-		mddev->major_version = 0;
-		mddev->minor_version = 90;
-		return len;
-	}
-	if (strncmp(buf, "external:", 9) == 0) {
-		size_t namelen = len-9;
-		if (namelen >= sizeof(mddev->metadata_type))
-			namelen = sizeof(mddev->metadata_type)-1;
-		strncpy(mddev->metadata_type, buf+9, namelen);
-		mddev->metadata_type[namelen] = 0;
-		if (namelen && mddev->metadata_type[namelen-1] == '\n')
-			mddev->metadata_type[--namelen] = 0;
-		mddev->persistent = 0;
-		mddev->external = 1;
-		mddev->major_version = 0;
-		mddev->minor_version = 90;
-		return len;
-	}
-	major = simple_strtoul(buf, &e, 10);
-	if (e==buf || *e != '.')
-		return -EINVAL;
-	buf = e+1;
-	minor = simple_strtoul(buf, &e, 10);
-	if (e==buf || (*e && *e != '\n') )
-		return -EINVAL;
-	if (major >= ARRAY_SIZE(super_types) || super_types[major].name == NULL)
-		return -ENOENT;
-	mddev->major_version = major;
-	mddev->minor_version = minor;
-	mddev->persistent = 1;
-	mddev->external = 0;
-	return len;
-}
-
-static struct md_sysfs_entry md_metadata =
-__ATTR(metadata_version, S_IRUGO|S_IWUSR, metadata_show, metadata_store);
-
-static ssize_t
-action_show(struct mddev *mddev, char *page)
-{
-	char *type = "idle";
-	if (test_bit(MD_RECOVERY_FROZEN, &mddev->recovery))
-		type = "frozen";
-	else if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
-	    (!mddev->ro && test_bit(MD_RECOVERY_NEEDED, &mddev->recovery))) {
-		if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
-			type = "reshape";
-		else if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
-			if (!test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
-				type = "resync";
-			else if (test_bit(MD_RECOVERY_CHECK, &mddev->recovery))
-				type = "check";
-			else
-				type = "repair";
-		} else if (test_bit(MD_RECOVERY_RECOVER, &mddev->recovery))
-			type = "recover";
-	}
-	return sprintf(page, "%s\n", type);
-}
-
-static void reap_sync_thread(struct mddev *mddev);
-
-static ssize_t
-action_store(struct mddev *mddev, const char *page, size_t len)
-{
-	if (!mddev->pers || !mddev->pers->sync_request)
-		return -EINVAL;
-
-	if (cmd_match(page, "frozen"))
-		set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
-	else
-		clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
-
-	if (cmd_match(page, "idle") || cmd_match(page, "frozen")) {
-		if (mddev->sync_thread) {
-			set_bit(MD_RECOVERY_INTR, &mddev->recovery);
-			reap_sync_thread(mddev);
-		}
-	} else if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
-		   test_bit(MD_RECOVERY_NEEDED, &mddev->recovery))
-		return -EBUSY;
-	else if (cmd_match(page, "resync"))
-		set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
-	else if (cmd_match(page, "recover")) {
-		set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
-		set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
-	} else if (cmd_match(page, "reshape")) {
-		int err;
-		if (mddev->pers->start_reshape == NULL)
-			return -EINVAL;
-		err = mddev->pers->start_reshape(mddev);
-		if (err)
-			return err;
-		sysfs_notify(&mddev->kobj, NULL, "degraded");
-	} else {
-		if (cmd_match(page, "check"))
-			set_bit(MD_RECOVERY_CHECK, &mddev->recovery);
-		else if (!cmd_match(page, "repair"))
-			return -EINVAL;
-		set_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
-		set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
-	}
-	set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
-	md_wakeup_thread(mddev->thread);
-	sysfs_notify_dirent_safe(mddev->sysfs_action);
-	return len;
-}
-
-static ssize_t
-mismatch_cnt_show(struct mddev *mddev, char *page)
-{
-	return sprintf(page, "%llu\n",
-		       (unsigned long long) mddev->resync_mismatches);
-}
-
-static struct md_sysfs_entry md_scan_mode =
-__ATTR(sync_action, S_IRUGO|S_IWUSR, action_show, action_store);
-
-
-static struct md_sysfs_entry md_mismatches = __ATTR_RO(mismatch_cnt);
-
-static ssize_t
-sync_min_show(struct mddev *mddev, char *page)
-{
-	return sprintf(page, "%d (%s)\n", speed_min(mddev),
-		       mddev->sync_speed_min ? "local": "system");
-}
-
-static ssize_t
-sync_min_store(struct mddev *mddev, const char *buf, size_t len)
-{
-	int min;
-	char *e;
-	if (strncmp(buf, "system", 6)==0) {
-		mddev->sync_speed_min = 0;
-		return len;
-	}
-	min = simple_strtoul(buf, &e, 10);
-	if (buf == e || (*e && *e != '\n') || min <= 0)
-		return -EINVAL;
-	mddev->sync_speed_min = min;
-	return len;
-}
-
-static struct md_sysfs_entry md_sync_min =
-__ATTR(sync_speed_min, S_IRUGO|S_IWUSR, sync_min_show, sync_min_store);
-
-static ssize_t
-sync_max_show(struct mddev *mddev, char *page)
-{
-	return sprintf(page, "%d (%s)\n", speed_max(mddev),
-		       mddev->sync_speed_max ? "local": "system");
-}
-
-static ssize_t
-sync_max_store(struct mddev *mddev, const char *buf, size_t len)
-{
-	int max;
-	char *e;
-	if (strncmp(buf, "system", 6)==0) {
-		mddev->sync_speed_max = 0;
-		return len;
-	}
-	max = simple_strtoul(buf, &e, 10);
-	if (buf == e || (*e && *e != '\n') || max <= 0)
-		return -EINVAL;
-	mddev->sync_speed_max = max;
-	return len;
-}
-
-static struct md_sysfs_entry md_sync_max =
-__ATTR(sync_speed_max, S_IRUGO|S_IWUSR, sync_max_show, sync_max_store);
-
-static ssize_t
-degraded_show(struct mddev *mddev, char *page)
-{
-	return sprintf(page, "%d\n", mddev->degraded);
-}
-static struct md_sysfs_entry md_degraded = __ATTR_RO(degraded);
-
-static ssize_t
-sync_force_parallel_show(struct mddev *mddev, char *page)
-{
-	return sprintf(page, "%d\n", mddev->parallel_resync);
-}
-
-static ssize_t
-sync_force_parallel_store(struct mddev *mddev, const char *buf, size_t len)
-{
-	long n;
-
-	if (strict_strtol(buf, 10, &n))
-		return -EINVAL;
-
-	if (n != 0 && n != 1)
-		return -EINVAL;
-
-	mddev->parallel_resync = n;
-
-	if (mddev->sync_thread)
-		wake_up(&resync_wait);
-
-	return len;
-}
-
-/* force parallel resync, even with shared block devices */
-static struct md_sysfs_entry md_sync_force_parallel =
-__ATTR(sync_force_parallel, S_IRUGO|S_IWUSR,
-       sync_force_parallel_show, sync_force_parallel_store);
-
-static ssize_t
-sync_speed_show(struct mddev *mddev, char *page)
-{
-	unsigned long resync, dt, db;
-	if (mddev->curr_resync == 0)
-		return sprintf(page, "none\n");
-	resync = mddev->curr_mark_cnt - atomic_read(&mddev->recovery_active);
-	dt = (jiffies - mddev->resync_mark) / HZ;
-	if (!dt) dt++;
-	db = resync - mddev->resync_mark_cnt;
-	return sprintf(page, "%lu\n", db/dt/2); /* K/sec */
-}
-
-static struct md_sysfs_entry md_sync_speed = __ATTR_RO(sync_speed);
-
-static ssize_t
-sync_completed_show(struct mddev *mddev, char *page)
-{
-	unsigned long long max_sectors, resync;
-
-	if (!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
-		return sprintf(page, "none\n");
-
-	if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery))
-		max_sectors = mddev->resync_max_sectors;
-	else
-		max_sectors = mddev->dev_sectors;
-
-	resync = mddev->curr_resync_completed;
-	return sprintf(page, "%llu / %llu\n", resync, max_sectors);
-}
-
-static struct md_sysfs_entry md_sync_completed = __ATTR_RO(sync_completed);
-
-static ssize_t
-min_sync_show(struct mddev *mddev, char *page)
-{
-	return sprintf(page, "%llu\n",
-		       (unsigned long long)mddev->resync_min);
-}
-static ssize_t
-min_sync_store(struct mddev *mddev, const char *buf, size_t len)
-{
-	unsigned long long min;
-	if (strict_strtoull(buf, 10, &min))
-		return -EINVAL;
-	if (min > mddev->resync_max)
-		return -EINVAL;
-	if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
-		return -EBUSY;
-
-	/* Must be a multiple of chunk_size */
-	if (mddev->chunk_sectors) {
-		sector_t temp = min;
-		if (sector_div(temp, mddev->chunk_sectors))
-			return -EINVAL;
-	}
-	mddev->resync_min = min;
-
-	return len;
-}
-
-static struct md_sysfs_entry md_min_sync =
-__ATTR(sync_min, S_IRUGO|S_IWUSR, min_sync_show, min_sync_store);
-
-static ssize_t
-max_sync_show(struct mddev *mddev, char *page)
-{
-	if (mddev->resync_max == MaxSector)
-		return sprintf(page, "max\n");
-	else
-		return sprintf(page, "%llu\n",
-			       (unsigned long long)mddev->resync_max);
-}
-static ssize_t
-max_sync_store(struct mddev *mddev, const char *buf, size_t len)
-{
-	if (strncmp(buf, "max", 3) == 0)
-		mddev->resync_max = MaxSector;
-	else {
-		unsigned long long max;
-		if (strict_strtoull(buf, 10, &max))
-			return -EINVAL;
-		if (max < mddev->resync_min)
-			return -EINVAL;
-		if (max < mddev->resync_max &&
-		    mddev->ro == 0 &&
-		    test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
-			return -EBUSY;
-
-		/* Must be a multiple of chunk_size */
-		if (mddev->chunk_sectors) {
-			sector_t temp = max;
-			if (sector_div(temp, mddev->chunk_sectors))
-				return -EINVAL;
-		}
-		mddev->resync_max = max;
-	}
-	wake_up(&mddev->recovery_wait);
-	return len;
-}
-
-static struct md_sysfs_entry md_max_sync =
-__ATTR(sync_max, S_IRUGO|S_IWUSR, max_sync_show, max_sync_store);
-
-static ssize_t
-suspend_lo_show(struct mddev *mddev, char *page)
-{
-	return sprintf(page, "%llu\n", (unsigned long long)mddev->suspend_lo);
-}
-
-static ssize_t
-suspend_lo_store(struct mddev *mddev, const char *buf, size_t len)
-{
-	char *e;
-	unsigned long long new = simple_strtoull(buf, &e, 10);
-	unsigned long long old = mddev->suspend_lo;
-
-	if (mddev->pers == NULL || 
-	    mddev->pers->quiesce == NULL)
-		return -EINVAL;
-	if (buf == e || (*e && *e != '\n'))
-		return -EINVAL;
-
-	mddev->suspend_lo = new;
-	if (new >= old)
-		/* Shrinking suspended region */
-		mddev->pers->quiesce(mddev, 2);
-	else {
-		/* Expanding suspended region - need to wait */
-		mddev->pers->quiesce(mddev, 1);
-		mddev->pers->quiesce(mddev, 0);
-	}
-	return len;
-}
-static struct md_sysfs_entry md_suspend_lo =
-__ATTR(suspend_lo, S_IRUGO|S_IWUSR, suspend_lo_show, suspend_lo_store);
-
-
-static ssize_t
-suspend_hi_show(struct mddev *mddev, char *page)
-{
-	return sprintf(page, "%llu\n", (unsigned long long)mddev->suspend_hi);
-}
-
-static ssize_t
-suspend_hi_store(struct mddev *mddev, const char *buf, size_t len)
-{
-	char *e;
-	unsigned long long new = simple_strtoull(buf, &e, 10);
-	unsigned long long old = mddev->suspend_hi;
-
-	if (mddev->pers == NULL ||
-	    mddev->pers->quiesce == NULL)
-		return -EINVAL;
-	if (buf == e || (*e && *e != '\n'))
-		return -EINVAL;
-
-	mddev->suspend_hi = new;
-	if (new <= old)
-		/* Shrinking suspended region */
-		mddev->pers->quiesce(mddev, 2);
-	else {
-		/* Expanding suspended region - need to wait */
-		mddev->pers->quiesce(mddev, 1);
-		mddev->pers->quiesce(mddev, 0);
-	}
-	return len;
-}
-static struct md_sysfs_entry md_suspend_hi =
-__ATTR(suspend_hi, S_IRUGO|S_IWUSR, suspend_hi_show, suspend_hi_store);
-
-static ssize_t
-reshape_position_show(struct mddev *mddev, char *page)
-{
-	if (mddev->reshape_position != MaxSector)
-		return sprintf(page, "%llu\n",
-			       (unsigned long long)mddev->reshape_position);
-	strcpy(page, "none\n");
-	return 5;
-}
-
-static ssize_t
-reshape_position_store(struct mddev *mddev, const char *buf, size_t len)
-{
-	char *e;
-	unsigned long long new = simple_strtoull(buf, &e, 10);
-	if (mddev->pers)
-		return -EBUSY;
-	if (buf == e || (*e && *e != '\n'))
-		return -EINVAL;
-	mddev->reshape_position = new;
-	mddev->delta_disks = 0;
-	mddev->new_level = mddev->level;
-	mddev->new_layout = mddev->layout;
-	mddev->new_chunk_sectors = mddev->chunk_sectors;
-	return len;
-}
-
-static struct md_sysfs_entry md_reshape_position =
-__ATTR(reshape_position, S_IRUGO|S_IWUSR, reshape_position_show,
-       reshape_position_store);
-
-static ssize_t
-array_size_show(struct mddev *mddev, char *page)
-{
-	if (mddev->external_size)
-		return sprintf(page, "%llu\n",
-			       (unsigned long long)mddev->array_sectors/2);
-	else
-		return sprintf(page, "default\n");
-}
-
-static ssize_t
-array_size_store(struct mddev *mddev, const char *buf, size_t len)
-{
-	sector_t sectors;
-
-	if (strncmp(buf, "default", 7) == 0) {
-		if (mddev->pers)
-			sectors = mddev->pers->size(mddev, 0, 0);
-		else
-			sectors = mddev->array_sectors;
-
-		mddev->external_size = 0;
-	} else {
-		if (strict_blocks_to_sectors(buf, &sectors) < 0)
-			return -EINVAL;
-		if (mddev->pers && mddev->pers->size(mddev, 0, 0) < sectors)
-			return -E2BIG;
-
-		mddev->external_size = 1;
-	}
-
-	mddev->array_sectors = sectors;
-	if (mddev->pers) {
-		set_capacity(mddev->gendisk, mddev->array_sectors);
-		revalidate_disk(mddev->gendisk);
-	}
-	return len;
-}
-
-static struct md_sysfs_entry md_array_size =
-__ATTR(array_size, S_IRUGO|S_IWUSR, array_size_show,
-       array_size_store);
-
-static struct attribute *md_default_attrs[] = {
-	&md_level.attr,
-	&md_layout.attr,
-	&md_raid_disks.attr,
-	&md_chunk_size.attr,
-	&md_size.attr,
-	&md_resync_start.attr,
-	&md_metadata.attr,
-	&md_new_device.attr,
-	&md_safe_delay.attr,
-	&md_array_state.attr,
-	&md_reshape_position.attr,
-	&md_array_size.attr,
-	&max_corr_read_errors.attr,
-	NULL,
-};
-
-static struct attribute *md_redundancy_attrs[] = {
-	&md_scan_mode.attr,
-	&md_mismatches.attr,
-	&md_sync_min.attr,
-	&md_sync_max.attr,
-	&md_sync_speed.attr,
-	&md_sync_force_parallel.attr,
-	&md_sync_completed.attr,
-	&md_min_sync.attr,
-	&md_max_sync.attr,
-	&md_suspend_lo.attr,
-	&md_suspend_hi.attr,
-	&md_bitmap.attr,
-	&md_degraded.attr,
-	NULL,
-};
-static struct attribute_group md_redundancy_group = {
-	.name = NULL,
-	.attrs = md_redundancy_attrs,
-};
-
-
-static ssize_t
-md_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
-{
-	struct md_sysfs_entry *entry = container_of(attr, struct md_sysfs_entry, attr);
-	struct mddev *mddev = container_of(kobj, struct mddev, kobj);
-	ssize_t rv;
-
-	if (!entry->show)
-		return -EIO;
-	spin_lock(&all_mddevs_lock);
-	if (list_empty(&mddev->all_mddevs)) {
-		spin_unlock(&all_mddevs_lock);
-		return -EBUSY;
-	}
-	mddev_get(mddev);
-	spin_unlock(&all_mddevs_lock);
-
-	rv = mddev_lock(mddev);
-	if (!rv) {
-		rv = entry->show(mddev, page);
-		mddev_unlock(mddev);
-	}
-	mddev_put(mddev);
-	return rv;
-}
-
-static ssize_t
-md_attr_store(struct kobject *kobj, struct attribute *attr,
-	      const char *page, size_t length)
-{
-	struct md_sysfs_entry *entry = container_of(attr, struct md_sysfs_entry, attr);
-	struct mddev *mddev = container_of(kobj, struct mddev, kobj);
-	ssize_t rv;
-
-	if (!entry->store)
-		return -EIO;
-	if (!capable(CAP_SYS_ADMIN))
-		return -EACCES;
-	spin_lock(&all_mddevs_lock);
-	if (list_empty(&mddev->all_mddevs)) {
-		spin_unlock(&all_mddevs_lock);
-		return -EBUSY;
-	}
-	mddev_get(mddev);
-	spin_unlock(&all_mddevs_lock);
-	rv = mddev_lock(mddev);
-	if (!rv) {
-		rv = entry->store(mddev, page, length);
-		mddev_unlock(mddev);
-	}
-	mddev_put(mddev);
-	return rv;
-}
-
-static void md_free(struct kobject *ko)
-{
-	struct mddev *mddev = container_of(ko, struct mddev, kobj);
-
-	if (mddev->sysfs_state)
-		sysfs_put(mddev->sysfs_state);
-
-	if (mddev->gendisk) {
-		del_gendisk(mddev->gendisk);
-		put_disk(mddev->gendisk);
-	}
-	if (mddev->queue)
-		blk_cleanup_queue(mddev->queue);
-
-	kfree(mddev);
-}
-
-static const struct sysfs_ops md_sysfs_ops = {
-	.show	= md_attr_show,
-	.store	= md_attr_store,
-};
-static struct kobj_type md_ktype = {
-	.release	= md_free,
-	.sysfs_ops	= &md_sysfs_ops,
-	.default_attrs	= md_default_attrs,
-};
-
-int mdp_major = 0;
-
-static void mddev_delayed_delete(struct work_struct *ws)
-{
-	struct mddev *mddev = container_of(ws, struct mddev, del_work);
-
-	sysfs_remove_group(&mddev->kobj, &md_bitmap_group);
-	kobject_del(&mddev->kobj);
-	kobject_put(&mddev->kobj);
-}
-
-static int md_alloc(dev_t dev, char *name)
-{
-	static DEFINE_MUTEX(disks_mutex);
-	struct mddev *mddev = mddev_find(dev);
-	struct gendisk *disk;
-	int partitioned;
-	int shift;
-	int unit;
-	int error;
-
-	if (!mddev)
-		return -ENODEV;
-
-	partitioned = (MAJOR(mddev->unit) != MD_MAJOR);
-	shift = partitioned ? MdpMinorShift : 0;
-	unit = MINOR(mddev->unit) >> shift;
-
-	/* wait for any previous instance of this device to be
-	 * completely removed (mddev_delayed_delete).
-	 */
-	flush_workqueue(md_misc_wq);
-
-	mutex_lock(&disks_mutex);
-	error = -EEXIST;
-	if (mddev->gendisk)
-		goto abort;
-
-	if (name) {
-		/* Need to ensure that 'name' is not a duplicate.
-		 */
-		struct mddev *mddev2;
-		spin_lock(&all_mddevs_lock);
-
-		list_for_each_entry(mddev2, &all_mddevs, all_mddevs)
-			if (mddev2->gendisk &&
-			    strcmp(mddev2->gendisk->disk_name, name) == 0) {
-				spin_unlock(&all_mddevs_lock);
-				goto abort;
-			}
-		spin_unlock(&all_mddevs_lock);
-	}
-
-	error = -ENOMEM;
-	mddev->queue = blk_alloc_queue(GFP_KERNEL);
-	if (!mddev->queue)
-		goto abort;
-	mddev->queue->queuedata = mddev;
-
-	blk_queue_make_request(mddev->queue, md_make_request);
-	blk_set_stacking_limits(&mddev->queue->limits);
-
-	disk = alloc_disk(1 << shift);
-	if (!disk) {
-		blk_cleanup_queue(mddev->queue);
-		mddev->queue = NULL;
-		goto abort;
-	}
-	disk->major = MAJOR(mddev->unit);
-	disk->first_minor = unit << shift;
-	if (name)
-		strcpy(disk->disk_name, name);
-	else if (partitioned)
-		sprintf(disk->disk_name, "md_d%d", unit);
-	else
-		sprintf(disk->disk_name, "md%d", unit);
-	disk->fops = &md_fops;
-	disk->private_data = mddev;
-	disk->queue = mddev->queue;
-	blk_queue_flush(mddev->queue, REQ_FLUSH | REQ_FUA);
-	/* Allow extended partitions.  This makes the
-	 * 'mdp' device redundant, but we can't really
-	 * remove it now.
-	 */
-	disk->flags |= GENHD_FL_EXT_DEVT;
-	mddev->gendisk = disk;
-	/* As soon as we call add_disk(), another thread could get
-	 * through to md_open, so make sure it doesn't get too far
-	 */
-	mutex_lock(&mddev->open_mutex);
-	add_disk(disk);
-
-	error = kobject_init_and_add(&mddev->kobj, &md_ktype,
-				     &disk_to_dev(disk)->kobj, "%s", "md");
-	if (error) {
-		/* This isn't possible, but as kobject_init_and_add is marked
-		 * __must_check, we must do something with the result
-		 */
-		printk(KERN_WARNING "md: cannot register %s/md - name in use\n",
-		       disk->disk_name);
-		error = 0;
-	}
-	if (mddev->kobj.sd &&
-	    sysfs_create_group(&mddev->kobj, &md_bitmap_group))
-		printk(KERN_DEBUG "pointless warning\n");
-	mutex_unlock(&mddev->open_mutex);
- abort:
-	mutex_unlock(&disks_mutex);
-	if (!error && mddev->kobj.sd) {
-		kobject_uevent(&mddev->kobj, KOBJ_ADD);
-		mddev->sysfs_state = sysfs_get_dirent_safe(mddev->kobj.sd, "array_state");
-	}
-	mddev_put(mddev);
-	return error;
-}
-
-static struct kobject *md_probe(dev_t dev, int *part, void *data)
-{
-	md_alloc(dev, NULL);
-	return NULL;
-}
-
-static int add_named_array(const char *val, struct kernel_param *kp)
-{
-	/* val must be "md_*" where * is not all digits.
-	 * We allocate an array with a large free minor number, and
-	 * set the name to val.  val must not already be an active name.
-	 */
-	int len = strlen(val);
-	char buf[DISK_NAME_LEN];
-
-	while (len && val[len-1] == '\n')
-		len--;
-	if (len >= DISK_NAME_LEN)
-		return -E2BIG;
-	strlcpy(buf, val, len+1);
-	if (strncmp(buf, "md_", 3) != 0)
-		return -EINVAL;
-	return md_alloc(0, buf);
-}
-
-static void md_safemode_timeout(unsigned long data)
-{
-	struct mddev *mddev = (struct mddev *) data;
-
-	if (!atomic_read(&mddev->writes_pending)) {
-		mddev->safemode = 1;
-		if (mddev->external)
-			sysfs_notify_dirent_safe(mddev->sysfs_state);
-	}
-	md_wakeup_thread(mddev->thread);
-}
-
-static int start_dirty_degraded;
-
-int md_run(struct mddev *mddev)
-{
-	int err;
-	struct md_rdev *rdev;
-	struct md_personality *pers;
-
-	if (list_empty(&mddev->disks))
-		/* cannot run an array with no devices.. */
-		return -EINVAL;
-
-	if (mddev->pers)
-		return -EBUSY;
-	/* Cannot run until previous stop completes properly */
-	if (mddev->sysfs_active)
-		return -EBUSY;
-
-	/*
-	 * Analyze all RAID superblock(s)
-	 */
-	if (!mddev->raid_disks) {
-		if (!mddev->persistent)
-			return -EINVAL;
-		analyze_sbs(mddev);
-	}
-
-	if (mddev->level != LEVEL_NONE)
-		request_module("md-level-%d", mddev->level);
-	else if (mddev->clevel[0])
-		request_module("md-%s", mddev->clevel);
-
-	/*
-	 * Drop all container device buffers, from now on
-	 * the only valid external interface is through the md
-	 * device.
-	 */
-	rdev_for_each(rdev, mddev) {
-		if (test_bit(Faulty, &rdev->flags))
-			continue;
-		sync_blockdev(rdev->bdev);
-		invalidate_bdev(rdev->bdev);
-
-		/* perform some consistency tests on the device.
-		 * We don't want the data to overlap the metadata,
-		 * Internal Bitmap issues have been handled elsewhere.
-		 */
-		if (rdev->meta_bdev) {
-			/* Nothing to check */;
-		} else if (rdev->data_offset < rdev->sb_start) {
-			if (mddev->dev_sectors &&
-			    rdev->data_offset + mddev->dev_sectors
-			    > rdev->sb_start) {
-				printk("md: %s: data overlaps metadata\n",
-				       mdname(mddev));
-				return -EINVAL;
-			}
-		} else {
-			if (rdev->sb_start + rdev->sb_size/512
-			    > rdev->data_offset) {
-				printk("md: %s: metadata overlaps data\n",
-				       mdname(mddev));
-				return -EINVAL;
-			}
-		}
-		sysfs_notify_dirent_safe(rdev->sysfs_state);
-	}
-
-	if (mddev->bio_set == NULL)
-		mddev->bio_set = bioset_create(BIO_POOL_SIZE,
-					       sizeof(struct mddev *));
-
-	spin_lock(&pers_lock);
-	pers = find_pers(mddev->level, mddev->clevel);
-	if (!pers || !try_module_get(pers->owner)) {
-		spin_unlock(&pers_lock);
-		if (mddev->level != LEVEL_NONE)
-			printk(KERN_WARNING "md: personality for level %d is not loaded!\n",
-			       mddev->level);
-		else
-			printk(KERN_WARNING "md: personality for level %s is not loaded!\n",
-			       mddev->clevel);
-		return -EINVAL;
-	}
-	mddev->pers = pers;
-	spin_unlock(&pers_lock);
-	if (mddev->level != pers->level) {
-		mddev->level = pers->level;
-		mddev->new_level = pers->level;
-	}
-	strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel));
-
-	if (mddev->reshape_position != MaxSector &&
-	    pers->start_reshape == NULL) {
-		/* This personality cannot handle reshaping... */
-		mddev->pers = NULL;
-		module_put(pers->owner);
-		return -EINVAL;
-	}
-
-	if (pers->sync_request) {
-		/* Warn if this is a potentially silly
-		 * configuration.
-		 */
-		char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
-		struct md_rdev *rdev2;
-		int warned = 0;
-
-		rdev_for_each(rdev, mddev)
-			rdev_for_each(rdev2, mddev) {
-				if (rdev < rdev2 &&
-				    rdev->bdev->bd_contains ==
-				    rdev2->bdev->bd_contains) {
-					printk(KERN_WARNING
-					       "%s: WARNING: %s appears to be"
-					       " on the same physical disk as"
-					       " %s.\n",
-					       mdname(mddev),
-					       bdevname(rdev->bdev,b),
-					       bdevname(rdev2->bdev,b2));
-					warned = 1;
-				}
-			}
-
-		if (warned)
-			printk(KERN_WARNING
-			       "True protection against single-disk"
-			       " failure might be compromised.\n");
-	}
-
-	mddev->recovery = 0;
-	/* may be over-ridden by personality */
-	mddev->resync_max_sectors = mddev->dev_sectors;
-
-	mddev->ok_start_degraded = start_dirty_degraded;
-
-	if (start_readonly && mddev->ro == 0)
-		mddev->ro = 2; /* read-only, but switch on first write */
-
-	err = mddev->pers->run(mddev);
-	if (err)
-		printk(KERN_ERR "md: pers->run() failed ...\n");
-	else if (mddev->pers->size(mddev, 0, 0) < mddev->array_sectors) {
-		WARN_ONCE(!mddev->external_size, "%s: default size too small,"
-			  " but 'external_size' not in effect?\n", __func__);
-		printk(KERN_ERR
-		       "md: invalid array_size %llu > default size %llu\n",
-		       (unsigned long long)mddev->array_sectors / 2,
-		       (unsigned long long)mddev->pers->size(mddev, 0, 0) / 2);
-		err = -EINVAL;
-		mddev->pers->stop(mddev);
-	}
-	if (err == 0 && mddev->pers->sync_request) {
-		err = bitmap_create(mddev);
-		if (err) {
-			printk(KERN_ERR "%s: failed to create bitmap (%d)\n",
-			       mdname(mddev), err);
-			mddev->pers->stop(mddev);
-		}
-	}
-	if (err) {
-		module_put(mddev->pers->owner);
-		mddev->pers = NULL;
-		bitmap_destroy(mddev);
-		return err;
-	}
-	if (mddev->pers->sync_request) {
-		if (mddev->kobj.sd &&
-		    sysfs_create_group(&mddev->kobj, &md_redundancy_group))
-			printk(KERN_WARNING
-			       "md: cannot register extra attributes for %s\n",
-			       mdname(mddev));
-		mddev->sysfs_action = sysfs_get_dirent_safe(mddev->kobj.sd, "sync_action");
-	} else if (mddev->ro == 2) /* auto-readonly not meaningful */
-		mddev->ro = 0;
-
- 	atomic_set(&mddev->writes_pending,0);
-	atomic_set(&mddev->max_corr_read_errors,
-		   MD_DEFAULT_MAX_CORRECTED_READ_ERRORS);
-	mddev->safemode = 0;
-	mddev->safemode_timer.function = md_safemode_timeout;
-	mddev->safemode_timer.data = (unsigned long) mddev;
-	mddev->safemode_delay = (200 * HZ)/1000 +1; /* 200 msec delay */
-	mddev->in_sync = 1;
-	smp_wmb();
-	mddev->ready = 1;
-	rdev_for_each(rdev, mddev)
-		if (rdev->raid_disk >= 0)
-			if (sysfs_link_rdev(mddev, rdev))
-				/* failure here is OK */;
-	
-	set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
-	
-	if (mddev->flags)
-		md_update_sb(mddev, 0);
-
-	md_new_event(mddev);
-	sysfs_notify_dirent_safe(mddev->sysfs_state);
-	sysfs_notify_dirent_safe(mddev->sysfs_action);
-	sysfs_notify(&mddev->kobj, NULL, "degraded");
-	return 0;
-}
-EXPORT_SYMBOL_GPL(md_run);
-
-static int do_md_run(struct mddev *mddev)
-{
-	int err;
-
-	err = md_run(mddev);
-	if (err)
-		goto out;
-	err = bitmap_load(mddev);
-	if (err) {
-		bitmap_destroy(mddev);
-		goto out;
-	}
-
-	md_wakeup_thread(mddev->thread);
-	md_wakeup_thread(mddev->sync_thread); /* possibly kick off a reshape */
-
-	set_capacity(mddev->gendisk, mddev->array_sectors);
-	revalidate_disk(mddev->gendisk);
-	mddev->changed = 1;
-	kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE);
-out:
-	return err;
-}
-
-static int restart_array(struct mddev *mddev)
-{
-	struct gendisk *disk = mddev->gendisk;
-
-	/* Complain if it has no devices */
-	if (list_empty(&mddev->disks))
-		return -ENXIO;
-	if (!mddev->pers)
-		return -EINVAL;
-	if (!mddev->ro)
-		return -EBUSY;
-	mddev->safemode = 0;
-	mddev->ro = 0;
-	set_disk_ro(disk, 0);
-	printk(KERN_INFO "md: %s switched to read-write mode.\n",
-		mdname(mddev));
-	/* Kick recovery or resync if necessary */
-	set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
-	md_wakeup_thread(mddev->thread);
-	md_wakeup_thread(mddev->sync_thread);
-	sysfs_notify_dirent_safe(mddev->sysfs_state);
-	return 0;
-}
-
-/* similar to deny_write_access, but accounts for our holding a reference
- * to the file ourselves */
-static int deny_bitmap_write_access(struct file * file)
-{
-	struct inode *inode = file->f_mapping->host;
-
-	spin_lock(&inode->i_lock);
-	if (atomic_read(&inode->i_writecount) > 1) {
-		spin_unlock(&inode->i_lock);
-		return -ETXTBSY;
-	}
-	atomic_set(&inode->i_writecount, -1);
-	spin_unlock(&inode->i_lock);
-
-	return 0;
-}
-
-void restore_bitmap_write_access(struct file *file)
-{
-	struct inode *inode = file->f_mapping->host;
-
-	spin_lock(&inode->i_lock);
-	atomic_set(&inode->i_writecount, 1);
-	spin_unlock(&inode->i_lock);
-}
-
-static void md_clean(struct mddev *mddev)
-{
-	mddev->array_sectors = 0;
-	mddev->external_size = 0;
-	mddev->dev_sectors = 0;
-	mddev->raid_disks = 0;
-	mddev->recovery_cp = 0;
-	mddev->resync_min = 0;
-	mddev->resync_max = MaxSector;
-	mddev->reshape_position = MaxSector;
-	mddev->external = 0;
-	mddev->persistent = 0;
-	mddev->level = LEVEL_NONE;
-	mddev->clevel[0] = 0;
-	mddev->flags = 0;
-	mddev->ro = 0;
-	mddev->metadata_type[0] = 0;
-	mddev->chunk_sectors = 0;
-	mddev->ctime = mddev->utime = 0;
-	mddev->layout = 0;
-	mddev->max_disks = 0;
-	mddev->events = 0;
-	mddev->can_decrease_events = 0;
-	mddev->delta_disks = 0;
-	mddev->new_level = LEVEL_NONE;
-	mddev->new_layout = 0;
-	mddev->new_chunk_sectors = 0;
-	mddev->curr_resync = 0;
-	mddev->resync_mismatches = 0;
-	mddev->suspend_lo = mddev->suspend_hi = 0;
-	mddev->sync_speed_min = mddev->sync_speed_max = 0;
-	mddev->recovery = 0;
-	mddev->in_sync = 0;
-	mddev->changed = 0;
-	mddev->degraded = 0;
-	mddev->safemode = 0;
-	mddev->merge_check_needed = 0;
-	mddev->bitmap_info.offset = 0;
-	mddev->bitmap_info.default_offset = 0;
-	mddev->bitmap_info.chunksize = 0;
-	mddev->bitmap_info.daemon_sleep = 0;
-	mddev->bitmap_info.max_write_behind = 0;
-}
-
-static void __md_stop_writes(struct mddev *mddev)
-{
-	if (mddev->sync_thread) {
-		set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
-		set_bit(MD_RECOVERY_INTR, &mddev->recovery);
-		reap_sync_thread(mddev);
-	}
-
-	del_timer_sync(&mddev->safemode_timer);
-
-	bitmap_flush(mddev);
-	md_super_wait(mddev);
-
-	if (!mddev->in_sync || mddev->flags) {
-		/* mark array as shutdown cleanly */
-		mddev->in_sync = 1;
-		md_update_sb(mddev, 1);
-	}
-}
-
-void md_stop_writes(struct mddev *mddev)
-{
-	mddev_lock(mddev);
-	__md_stop_writes(mddev);
-	mddev_unlock(mddev);
-}
-EXPORT_SYMBOL_GPL(md_stop_writes);
-
-void md_stop(struct mddev *mddev)
-{
-	mddev->ready = 0;
-	mddev->pers->stop(mddev);
-	if (mddev->pers->sync_request && mddev->to_remove == NULL)
-		mddev->to_remove = &md_redundancy_group;
-	module_put(mddev->pers->owner);
-	mddev->pers = NULL;
-	clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
-}
-EXPORT_SYMBOL_GPL(md_stop);
-
-static int md_set_readonly(struct mddev *mddev, int is_open)
-{
-	int err = 0;
-	mutex_lock(&mddev->open_mutex);
-	if (atomic_read(&mddev->openers) > is_open) {
-		printk("md: %s still in use.\n",mdname(mddev));
-		err = -EBUSY;
-		goto out;
-	}
-	if (mddev->pers) {
-		__md_stop_writes(mddev);
-
-		err  = -ENXIO;
-		if (mddev->ro==1)
-			goto out;
-		mddev->ro = 1;
-		set_disk_ro(mddev->gendisk, 1);
-		clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
-		sysfs_notify_dirent_safe(mddev->sysfs_state);
-		err = 0;	
-	}
-out:
-	mutex_unlock(&mddev->open_mutex);
-	return err;
-}
-
-/* mode:
- *   0 - completely stop and dis-assemble array
- *   2 - stop but do not disassemble array
- */
-static int do_md_stop(struct mddev * mddev, int mode, int is_open)
-{
-	struct gendisk *disk = mddev->gendisk;
-	struct md_rdev *rdev;
-
-	mutex_lock(&mddev->open_mutex);
-	if (atomic_read(&mddev->openers) > is_open ||
-	    mddev->sysfs_active) {
-		printk("md: %s still in use.\n",mdname(mddev));
-		mutex_unlock(&mddev->open_mutex);
-		return -EBUSY;
-	}
-
-	if (mddev->pers) {
-		if (mddev->ro)
-			set_disk_ro(disk, 0);
-
-		__md_stop_writes(mddev);
-		md_stop(mddev);
-		mddev->queue->merge_bvec_fn = NULL;
-		mddev->queue->backing_dev_info.congested_fn = NULL;
-
-		/* tell userspace to handle 'inactive' */
-		sysfs_notify_dirent_safe(mddev->sysfs_state);
-
-		rdev_for_each(rdev, mddev)
-			if (rdev->raid_disk >= 0)
-				sysfs_unlink_rdev(mddev, rdev);
-
-		set_capacity(disk, 0);
-		mutex_unlock(&mddev->open_mutex);
-		mddev->changed = 1;
-		revalidate_disk(disk);
-
-		if (mddev->ro)
-			mddev->ro = 0;
-	} else
-		mutex_unlock(&mddev->open_mutex);
-	/*
-	 * Free resources if final stop
-	 */
-	if (mode == 0) {
-		printk(KERN_INFO "md: %s stopped.\n", mdname(mddev));
-
-		bitmap_destroy(mddev);
-		if (mddev->bitmap_info.file) {
-			restore_bitmap_write_access(mddev->bitmap_info.file);
-			fput(mddev->bitmap_info.file);
-			mddev->bitmap_info.file = NULL;
-		}
-		mddev->bitmap_info.offset = 0;
-
-		export_array(mddev);
-
-		md_clean(mddev);
-		kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE);
-		if (mddev->hold_active == UNTIL_STOP)
-			mddev->hold_active = 0;
-	}
-	blk_integrity_unregister(disk);
-	md_new_event(mddev);
-	sysfs_notify_dirent_safe(mddev->sysfs_state);
-	return 0;
-}
-
-#ifndef MODULE
-static void autorun_array(struct mddev *mddev)
-{
-	struct md_rdev *rdev;
-	int err;
-
-	if (list_empty(&mddev->disks))
-		return;
-
-	printk(KERN_INFO "md: running: ");
-
-	rdev_for_each(rdev, mddev) {
-		char b[BDEVNAME_SIZE];
-		printk("<%s>", bdevname(rdev->bdev,b));
-	}
-	printk("\n");
-
-	err = do_md_run(mddev);
-	if (err) {
-		printk(KERN_WARNING "md: do_md_run() returned %d\n", err);
-		do_md_stop(mddev, 0, 0);
-	}
-}
-
-/*
- * lets try to run arrays based on all disks that have arrived
- * until now. (those are in pending_raid_disks)
- *
- * the method: pick the first pending disk, collect all disks with
- * the same UUID, remove all from the pending list and put them into
- * the 'same_array' list. Then order this list based on superblock
- * update time (freshest comes first), kick out 'old' disks and
- * compare superblocks. If everything's fine then run it.
- *
- * If "unit" is allocated, then bump its reference count
- */
-static void autorun_devices(int part)
-{
-	struct md_rdev *rdev0, *rdev, *tmp;
-	struct mddev *mddev;
-	char b[BDEVNAME_SIZE];
-
-	printk(KERN_INFO "md: autorun ...\n");
-	while (!list_empty(&pending_raid_disks)) {
-		int unit;
-		dev_t dev;
-		LIST_HEAD(candidates);
-		rdev0 = list_entry(pending_raid_disks.next,
-					 struct md_rdev, same_set);
-
-		printk(KERN_INFO "md: considering %s ...\n",
-			bdevname(rdev0->bdev,b));
-		INIT_LIST_HEAD(&candidates);
-		rdev_for_each_list(rdev, tmp, &pending_raid_disks)
-			if (super_90_load(rdev, rdev0, 0) >= 0) {
-				printk(KERN_INFO "md:  adding %s ...\n",
-					bdevname(rdev->bdev,b));
-				list_move(&rdev->same_set, &candidates);
-			}
-		/*
-		 * now we have a set of devices, with all of them having
-		 * mostly sane superblocks. It's time to allocate the
-		 * mddev.
-		 */
-		if (part) {
-			dev = MKDEV(mdp_major,
-				    rdev0->preferred_minor << MdpMinorShift);
-			unit = MINOR(dev) >> MdpMinorShift;
-		} else {
-			dev = MKDEV(MD_MAJOR, rdev0->preferred_minor);
-			unit = MINOR(dev);
-		}
-		if (rdev0->preferred_minor != unit) {
-			printk(KERN_INFO "md: unit number in %s is bad: %d\n",
-			       bdevname(rdev0->bdev, b), rdev0->preferred_minor);
-			break;
-		}
-
-		md_probe(dev, NULL, NULL);
-		mddev = mddev_find(dev);
-		if (!mddev || !mddev->gendisk) {
-			if (mddev)
-				mddev_put(mddev);
-			printk(KERN_ERR
-				"md: cannot allocate memory for md drive.\n");
-			break;
-		}
-		if (mddev_lock(mddev)) 
-			printk(KERN_WARNING "md: %s locked, cannot run\n",
-			       mdname(mddev));
-		else if (mddev->raid_disks || mddev->major_version
-			 || !list_empty(&mddev->disks)) {
-			printk(KERN_WARNING 
-				"md: %s already running, cannot run %s\n",
-				mdname(mddev), bdevname(rdev0->bdev,b));
-			mddev_unlock(mddev);
-		} else {
-			printk(KERN_INFO "md: created %s\n", mdname(mddev));
-			mddev->persistent = 1;
-			rdev_for_each_list(rdev, tmp, &candidates) {
-				list_del_init(&rdev->same_set);
-				if (bind_rdev_to_array(rdev, mddev))
-					export_rdev(rdev);
-			}
-			autorun_array(mddev);
-			mddev_unlock(mddev);
-		}
-		/* on success, candidates will be empty, on error
-		 * it won't...
-		 */
-		rdev_for_each_list(rdev, tmp, &candidates) {
-			list_del_init(&rdev->same_set);
-			export_rdev(rdev);
-		}
-		mddev_put(mddev);
-	}
-	printk(KERN_INFO "md: ... autorun DONE.\n");
-}
-#endif /* !MODULE */
-
-static int get_version(void __user * arg)
-{
-	mdu_version_t ver;
-
-	ver.major = MD_MAJOR_VERSION;
-	ver.minor = MD_MINOR_VERSION;
-	ver.patchlevel = MD_PATCHLEVEL_VERSION;
-
-	if (copy_to_user(arg, &ver, sizeof(ver)))
-		return -EFAULT;
-
-	return 0;
-}
-
-static int get_array_info(struct mddev * mddev, void __user * arg)
-{
-	mdu_array_info_t info;
-	int nr,working,insync,failed,spare;
-	struct md_rdev *rdev;
-
-	nr=working=insync=failed=spare=0;
-	rdev_for_each(rdev, mddev) {
-		nr++;
-		if (test_bit(Faulty, &rdev->flags))
-			failed++;
-		else {
-			working++;
-			if (test_bit(In_sync, &rdev->flags))
-				insync++;	
-			else
-				spare++;
-		}
-	}
-
-	info.major_version = mddev->major_version;
-	info.minor_version = mddev->minor_version;
-	info.patch_version = MD_PATCHLEVEL_VERSION;
-	info.ctime         = mddev->ctime;
-	info.level         = mddev->level;
-	info.size          = mddev->dev_sectors / 2;
-	if (info.size != mddev->dev_sectors / 2) /* overflow */
-		info.size = -1;
-	info.nr_disks      = nr;
-	info.raid_disks    = mddev->raid_disks;
-	info.md_minor      = mddev->md_minor;
-	info.not_persistent= !mddev->persistent;
-
-	info.utime         = mddev->utime;
-	info.state         = 0;
-	if (mddev->in_sync)
-		info.state = (1<<MD_SB_CLEAN);
-	if (mddev->bitmap && mddev->bitmap_info.offset)
-		info.state = (1<<MD_SB_BITMAP_PRESENT);
-	info.active_disks  = insync;
-	info.working_disks = working;
-	info.failed_disks  = failed;
-	info.spare_disks   = spare;
-
-	info.layout        = mddev->layout;
-	info.chunk_size    = mddev->chunk_sectors << 9;
-
-	if (copy_to_user(arg, &info, sizeof(info)))
-		return -EFAULT;
-
-	return 0;
-}
-
-static int get_bitmap_file(struct mddev * mddev, void __user * arg)
-{
-	mdu_bitmap_file_t *file = NULL; /* too big for stack allocation */
-	char *ptr, *buf = NULL;
-	int err = -ENOMEM;
-
-	if (md_allow_write(mddev))
-		file = kmalloc(sizeof(*file), GFP_NOIO);
-	else
-		file = kmalloc(sizeof(*file), GFP_KERNEL);
-
-	if (!file)
-		goto out;
-
-	/* bitmap disabled, zero the first byte and copy out */
-	if (!mddev->bitmap || !mddev->bitmap->file) {
-		file->pathname[0] = '\0';
-		goto copy_out;
-	}
-
-	buf = kmalloc(sizeof(file->pathname), GFP_KERNEL);
-	if (!buf)
-		goto out;
-
-	ptr = d_path(&mddev->bitmap->file->f_path, buf, sizeof(file->pathname));
-	if (IS_ERR(ptr))
-		goto out;
-
-	strcpy(file->pathname, ptr);
-
-copy_out:
-	err = 0;
-	if (copy_to_user(arg, file, sizeof(*file)))
-		err = -EFAULT;
-out:
-	kfree(buf);
-	kfree(file);
-	return err;
-}
-
-static int get_disk_info(struct mddev * mddev, void __user * arg)
-{
-	mdu_disk_info_t info;
-	struct md_rdev *rdev;
-
-	if (copy_from_user(&info, arg, sizeof(info)))
-		return -EFAULT;
-
-	rdev = find_rdev_nr(mddev, info.number);
-	if (rdev) {
-		info.major = MAJOR(rdev->bdev->bd_dev);
-		info.minor = MINOR(rdev->bdev->bd_dev);
-		info.raid_disk = rdev->raid_disk;
-		info.state = 0;
-		if (test_bit(Faulty, &rdev->flags))
-			info.state |= (1<<MD_DISK_FAULTY);
-		else if (test_bit(In_sync, &rdev->flags)) {
-			info.state |= (1<<MD_DISK_ACTIVE);
-			info.state |= (1<<MD_DISK_SYNC);
-		}
-		if (test_bit(WriteMostly, &rdev->flags))
-			info.state |= (1<<MD_DISK_WRITEMOSTLY);
-	} else {
-		info.major = info.minor = 0;
-		info.raid_disk = -1;
-		info.state = (1<<MD_DISK_REMOVED);
-	}
-
-	if (copy_to_user(arg, &info, sizeof(info)))
-		return -EFAULT;
-
-	return 0;
-}
-
-static int add_new_disk(struct mddev * mddev, mdu_disk_info_t *info)
-{
-	char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
-	struct md_rdev *rdev;
-	dev_t dev = MKDEV(info->major,info->minor);
-
-	if (info->major != MAJOR(dev) || info->minor != MINOR(dev))
-		return -EOVERFLOW;
-
-	if (!mddev->raid_disks) {
-		int err;
-		/* expecting a device which has a superblock */
-		rdev = md_import_device(dev, mddev->major_version, mddev->minor_version);
-		if (IS_ERR(rdev)) {
-			printk(KERN_WARNING 
-				"md: md_import_device returned %ld\n",
-				PTR_ERR(rdev));
-			return PTR_ERR(rdev);
-		}
-		if (!list_empty(&mddev->disks)) {
-			struct md_rdev *rdev0
-				= list_entry(mddev->disks.next,
-					     struct md_rdev, same_set);
-			err = super_types[mddev->major_version]
-				.load_super(rdev, rdev0, mddev->minor_version);
-			if (err < 0) {
-				printk(KERN_WARNING 
-					"md: %s has different UUID to %s\n",
-					bdevname(rdev->bdev,b), 
-					bdevname(rdev0->bdev,b2));
-				export_rdev(rdev);
-				return -EINVAL;
-			}
-		}
-		err = bind_rdev_to_array(rdev, mddev);
-		if (err)
-			export_rdev(rdev);
-		return err;
-	}
-
-	/*
-	 * add_new_disk can be used once the array is assembled
-	 * to add "hot spares".  They must already have a superblock
-	 * written
-	 */
-	if (mddev->pers) {
-		int err;
-		if (!mddev->pers->hot_add_disk) {
-			printk(KERN_WARNING 
-				"%s: personality does not support diskops!\n",
-			       mdname(mddev));
-			return -EINVAL;
-		}
-		if (mddev->persistent)
-			rdev = md_import_device(dev, mddev->major_version,
-						mddev->minor_version);
-		else
-			rdev = md_import_device(dev, -1, -1);
-		if (IS_ERR(rdev)) {
-			printk(KERN_WARNING 
-				"md: md_import_device returned %ld\n",
-				PTR_ERR(rdev));
-			return PTR_ERR(rdev);
-		}
-		/* set saved_raid_disk if appropriate */
-		if (!mddev->persistent) {
-			if (info->state & (1<<MD_DISK_SYNC)  &&
-			    info->raid_disk < mddev->raid_disks) {
-				rdev->raid_disk = info->raid_disk;
-				set_bit(In_sync, &rdev->flags);
-			} else
-				rdev->raid_disk = -1;
-		} else
-			super_types[mddev->major_version].
-				validate_super(mddev, rdev);
-		if ((info->state & (1<<MD_DISK_SYNC)) &&
-		    (!test_bit(In_sync, &rdev->flags) ||
-		     rdev->raid_disk != info->raid_disk)) {
-			/* This was a hot-add request, but events doesn't
-			 * match, so reject it.
-			 */
-			export_rdev(rdev);
-			return -EINVAL;
-		}
-
-		if (test_bit(In_sync, &rdev->flags))
-			rdev->saved_raid_disk = rdev->raid_disk;
-		else
-			rdev->saved_raid_disk = -1;
-
-		clear_bit(In_sync, &rdev->flags); /* just to be sure */
-		if (info->state & (1<<MD_DISK_WRITEMOSTLY))
-			set_bit(WriteMostly, &rdev->flags);
-		else
-			clear_bit(WriteMostly, &rdev->flags);
-
-		rdev->raid_disk = -1;
-		err = bind_rdev_to_array(rdev, mddev);
-		if (!err && !mddev->pers->hot_remove_disk) {
-			/* If there is hot_add_disk but no hot_remove_disk
-			 * then added disks for geometry changes,
-			 * and should be added immediately.
-			 */
-			super_types[mddev->major_version].
-				validate_super(mddev, rdev);
-			err = mddev->pers->hot_add_disk(mddev, rdev);
-			if (err)
-				unbind_rdev_from_array(rdev);
-		}
-		if (err)
-			export_rdev(rdev);
-		else
-			sysfs_notify_dirent_safe(rdev->sysfs_state);
-
-		md_update_sb(mddev, 1);
-		if (mddev->degraded)
-			set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
-		set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
-		if (!err)
-			md_new_event(mddev);
-		md_wakeup_thread(mddev->thread);
-		return err;
-	}
-
-	/* otherwise, add_new_disk is only allowed
-	 * for major_version==0 superblocks
-	 */
-	if (mddev->major_version != 0) {
-		printk(KERN_WARNING "%s: ADD_NEW_DISK not supported\n",
-		       mdname(mddev));
-		return -EINVAL;
-	}
-
-	if (!(info->state & (1<<MD_DISK_FAULTY))) {
-		int err;
-		rdev = md_import_device(dev, -1, 0);
-		if (IS_ERR(rdev)) {
-			printk(KERN_WARNING 
-				"md: error, md_import_device() returned %ld\n",
-				PTR_ERR(rdev));
-			return PTR_ERR(rdev);
-		}
-		rdev->desc_nr = info->number;
-		if (info->raid_disk < mddev->raid_disks)
-			rdev->raid_disk = info->raid_disk;
-		else
-			rdev->raid_disk = -1;
-
-		if (rdev->raid_disk < mddev->raid_disks)
-			if (info->state & (1<<MD_DISK_SYNC))
-				set_bit(In_sync, &rdev->flags);
-
-		if (info->state & (1<<MD_DISK_WRITEMOSTLY))
-			set_bit(WriteMostly, &rdev->flags);
-
-		if (!mddev->persistent) {
-			printk(KERN_INFO "md: nonpersistent superblock ...\n");
-			rdev->sb_start = i_size_read(rdev->bdev->bd_inode) / 512;
-		} else
-			rdev->sb_start = calc_dev_sboffset(rdev);
-		rdev->sectors = rdev->sb_start;
-
-		err = bind_rdev_to_array(rdev, mddev);
-		if (err) {
-			export_rdev(rdev);
-			return err;
-		}
-	}
-
-	return 0;
-}
-
-static int hot_remove_disk(struct mddev * mddev, dev_t dev)
-{
-	char b[BDEVNAME_SIZE];
-	struct md_rdev *rdev;
-
-	rdev = find_rdev(mddev, dev);
-	if (!rdev)
-		return -ENXIO;
-
-	if (rdev->raid_disk >= 0)
-		goto busy;
-
-	kick_rdev_from_array(rdev);
-	md_update_sb(mddev, 1);
-	md_new_event(mddev);
-
-	return 0;
-busy:
-	printk(KERN_WARNING "md: cannot remove active disk %s from %s ...\n",
-		bdevname(rdev->bdev,b), mdname(mddev));
-	return -EBUSY;
-}
-
-static int hot_add_disk(struct mddev * mddev, dev_t dev)
-{
-	char b[BDEVNAME_SIZE];
-	int err;
-	struct md_rdev *rdev;
-
-	if (!mddev->pers)
-		return -ENODEV;
-
-	if (mddev->major_version != 0) {
-		printk(KERN_WARNING "%s: HOT_ADD may only be used with"
-			" version-0 superblocks.\n",
-			mdname(mddev));
-		return -EINVAL;
-	}
-	if (!mddev->pers->hot_add_disk) {
-		printk(KERN_WARNING 
-			"%s: personality does not support diskops!\n",
-			mdname(mddev));
-		return -EINVAL;
-	}
-
-	rdev = md_import_device(dev, -1, 0);
-	if (IS_ERR(rdev)) {
-		printk(KERN_WARNING 
-			"md: error, md_import_device() returned %ld\n",
-			PTR_ERR(rdev));
-		return -EINVAL;
-	}
-
-	if (mddev->persistent)
-		rdev->sb_start = calc_dev_sboffset(rdev);
-	else
-		rdev->sb_start = i_size_read(rdev->bdev->bd_inode) / 512;
-
-	rdev->sectors = rdev->sb_start;
-
-	if (test_bit(Faulty, &rdev->flags)) {
-		printk(KERN_WARNING 
-			"md: can not hot-add faulty %s disk to %s!\n",
-			bdevname(rdev->bdev,b), mdname(mddev));
-		err = -EINVAL;
-		goto abort_export;
-	}
-	clear_bit(In_sync, &rdev->flags);
-	rdev->desc_nr = -1;
-	rdev->saved_raid_disk = -1;
-	err = bind_rdev_to_array(rdev, mddev);
-	if (err)
-		goto abort_export;
-
-	/*
-	 * The rest should better be atomic, we can have disk failures
-	 * noticed in interrupt contexts ...
-	 */
-
-	rdev->raid_disk = -1;
-
-	md_update_sb(mddev, 1);
-
-	/*
-	 * Kick recovery, maybe this spare has to be added to the
-	 * array immediately.
-	 */
-	set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
-	md_wakeup_thread(mddev->thread);
-	md_new_event(mddev);
-	return 0;
-
-abort_export:
-	export_rdev(rdev);
-	return err;
-}
-
-static int set_bitmap_file(struct mddev *mddev, int fd)
-{
-	int err;
-
-	if (mddev->pers) {
-		if (!mddev->pers->quiesce)
-			return -EBUSY;
-		if (mddev->recovery || mddev->sync_thread)
-			return -EBUSY;
-		/* we should be able to change the bitmap.. */
-	}
-
-
-	if (fd >= 0) {
-		if (mddev->bitmap)
-			return -EEXIST; /* cannot add when bitmap is present */
-		mddev->bitmap_info.file = fget(fd);
-
-		if (mddev->bitmap_info.file == NULL) {
-			printk(KERN_ERR "%s: error: failed to get bitmap file\n",
-			       mdname(mddev));
-			return -EBADF;
-		}
-
-		err = deny_bitmap_write_access(mddev->bitmap_info.file);
-		if (err) {
-			printk(KERN_ERR "%s: error: bitmap file is already in use\n",
-			       mdname(mddev));
-			fput(mddev->bitmap_info.file);
-			mddev->bitmap_info.file = NULL;
-			return err;
-		}
-		mddev->bitmap_info.offset = 0; /* file overrides offset */
-	} else if (mddev->bitmap == NULL)
-		return -ENOENT; /* cannot remove what isn't there */
-	err = 0;
-	if (mddev->pers) {
-		mddev->pers->quiesce(mddev, 1);
-		if (fd >= 0) {
-			err = bitmap_create(mddev);
-			if (!err)
-				err = bitmap_load(mddev);
-		}
-		if (fd < 0 || err) {
-			bitmap_destroy(mddev);
-			fd = -1; /* make sure to put the file */
-		}
-		mddev->pers->quiesce(mddev, 0);
-	}
-	if (fd < 0) {
-		if (mddev->bitmap_info.file) {
-			restore_bitmap_write_access(mddev->bitmap_info.file);
-			fput(mddev->bitmap_info.file);
-		}
-		mddev->bitmap_info.file = NULL;
-	}
-
-	return err;
-}
-
-/*
- * set_array_info is used two different ways
- * The original usage is when creating a new array.
- * In this usage, raid_disks is > 0 and it together with
- *  level, size, not_persistent,layout,chunksize determine the
- *  shape of the array.
- *  This will always create an array with a type-0.90.0 superblock.
- * The newer usage is when assembling an array.
- *  In this case raid_disks will be 0, and the major_version field is
- *  use to determine which style super-blocks are to be found on the devices.
- *  The minor and patch _version numbers are also kept incase the
- *  super_block handler wishes to interpret them.
- */
-static int set_array_info(struct mddev * mddev, mdu_array_info_t *info)
-{
-
-	if (info->raid_disks == 0) {
-		/* just setting version number for superblock loading */
-		if (info->major_version < 0 ||
-		    info->major_version >= ARRAY_SIZE(super_types) ||
-		    super_types[info->major_version].name == NULL) {
-			/* maybe try to auto-load a module? */
-			printk(KERN_INFO 
-				"md: superblock version %d not known\n",
-				info->major_version);
-			return -EINVAL;
-		}
-		mddev->major_version = info->major_version;
-		mddev->minor_version = info->minor_version;
-		mddev->patch_version = info->patch_version;
-		mddev->persistent = !info->not_persistent;
-		/* ensure mddev_put doesn't delete this now that there
-		 * is some minimal configuration.
-		 */
-		mddev->ctime         = get_seconds();
-		return 0;
-	}
-	mddev->major_version = MD_MAJOR_VERSION;
-	mddev->minor_version = MD_MINOR_VERSION;
-	mddev->patch_version = MD_PATCHLEVEL_VERSION;
-	mddev->ctime         = get_seconds();
-
-	mddev->level         = info->level;
-	mddev->clevel[0]     = 0;
-	mddev->dev_sectors   = 2 * (sector_t)info->size;
-	mddev->raid_disks    = info->raid_disks;
-	/* don't set md_minor, it is determined by which /dev/md* was
-	 * openned
-	 */
-	if (info->state & (1<<MD_SB_CLEAN))
-		mddev->recovery_cp = MaxSector;
-	else
-		mddev->recovery_cp = 0;
-	mddev->persistent    = ! info->not_persistent;
-	mddev->external	     = 0;
-
-	mddev->layout        = info->layout;
-	mddev->chunk_sectors = info->chunk_size >> 9;
-
-	mddev->max_disks     = MD_SB_DISKS;
-
-	if (mddev->persistent)
-		mddev->flags         = 0;
-	set_bit(MD_CHANGE_DEVS, &mddev->flags);
-
-	mddev->bitmap_info.default_offset = MD_SB_BYTES >> 9;
-	mddev->bitmap_info.offset = 0;
-
-	mddev->reshape_position = MaxSector;
-
-	/*
-	 * Generate a 128 bit UUID
-	 */
-	get_random_bytes(mddev->uuid, 16);
-
-	mddev->new_level = mddev->level;
-	mddev->new_chunk_sectors = mddev->chunk_sectors;
-	mddev->new_layout = mddev->layout;
-	mddev->delta_disks = 0;
-
-	return 0;
-}
-
-void md_set_array_sectors(struct mddev *mddev, sector_t array_sectors)
-{
-	WARN(!mddev_is_locked(mddev), "%s: unlocked mddev!\n", __func__);
-
-	if (mddev->external_size)
-		return;
-
-	mddev->array_sectors = array_sectors;
-}
-EXPORT_SYMBOL(md_set_array_sectors);
-
-static int update_size(struct mddev *mddev, sector_t num_sectors)
-{
-	struct md_rdev *rdev;
-	int rv;
-	int fit = (num_sectors == 0);
-
-	if (mddev->pers->resize == NULL)
-		return -EINVAL;
-	/* The "num_sectors" is the number of sectors of each device that
-	 * is used.  This can only make sense for arrays with redundancy.
-	 * linear and raid0 always use whatever space is available. We can only
-	 * consider changing this number if no resync or reconstruction is
-	 * happening, and if the new size is acceptable. It must fit before the
-	 * sb_start or, if that is <data_offset, it must fit before the size
-	 * of each device.  If num_sectors is zero, we find the largest size
-	 * that fits.
-	 */
-	if (mddev->sync_thread)
-		return -EBUSY;
-	if (mddev->bitmap)
-		/* Sorry, cannot grow a bitmap yet, just remove it,
-		 * grow, and re-add.
-		 */
-		return -EBUSY;
-	rdev_for_each(rdev, mddev) {
-		sector_t avail = rdev->sectors;
-
-		if (fit && (num_sectors == 0 || num_sectors > avail))
-			num_sectors = avail;
-		if (avail < num_sectors)
-			return -ENOSPC;
-	}
-	rv = mddev->pers->resize(mddev, num_sectors);
-	if (!rv)
-		revalidate_disk(mddev->gendisk);
-	return rv;
-}
-
-static int update_raid_disks(struct mddev *mddev, int raid_disks)
-{
-	int rv;
-	/* change the number of raid disks */
-	if (mddev->pers->check_reshape == NULL)
-		return -EINVAL;
-	if (raid_disks <= 0 ||
-	    (mddev->max_disks && raid_disks >= mddev->max_disks))
-		return -EINVAL;
-	if (mddev->sync_thread || mddev->reshape_position != MaxSector)
-		return -EBUSY;
-	mddev->delta_disks = raid_disks - mddev->raid_disks;
-
-	rv = mddev->pers->check_reshape(mddev);
-	if (rv < 0)
-		mddev->delta_disks = 0;
-	return rv;
-}
-
-
-/*
- * update_array_info is used to change the configuration of an
- * on-line array.
- * The version, ctime,level,size,raid_disks,not_persistent, layout,chunk_size
- * fields in the info are checked against the array.
- * Any differences that cannot be handled will cause an error.
- * Normally, only one change can be managed at a time.
- */
-static int update_array_info(struct mddev *mddev, mdu_array_info_t *info)
-{
-	int rv = 0;
-	int cnt = 0;
-	int state = 0;
-
-	/* calculate expected state,ignoring low bits */
-	if (mddev->bitmap && mddev->bitmap_info.offset)
-		state |= (1 << MD_SB_BITMAP_PRESENT);
-
-	if (mddev->major_version != info->major_version ||
-	    mddev->minor_version != info->minor_version ||
-/*	    mddev->patch_version != info->patch_version || */
-	    mddev->ctime         != info->ctime         ||
-	    mddev->level         != info->level         ||
-/*	    mddev->layout        != info->layout        || */
-	    !mddev->persistent	 != info->not_persistent||
-	    mddev->chunk_sectors != info->chunk_size >> 9 ||
-	    /* ignore bottom 8 bits of state, and allow SB_BITMAP_PRESENT to change */
-	    ((state^info->state) & 0xfffffe00)
-		)
-		return -EINVAL;
-	/* Check there is only one change */
-	if (info->size >= 0 && mddev->dev_sectors / 2 != info->size)
-		cnt++;
-	if (mddev->raid_disks != info->raid_disks)
-		cnt++;
-	if (mddev->layout != info->layout)
-		cnt++;
-	if ((state ^ info->state) & (1<<MD_SB_BITMAP_PRESENT))
-		cnt++;
-	if (cnt == 0)
-		return 0;
-	if (cnt > 1)
-		return -EINVAL;
-
-	if (mddev->layout != info->layout) {
-		/* Change layout
-		 * we don't need to do anything at the md level, the
-		 * personality will take care of it all.
-		 */
-		if (mddev->pers->check_reshape == NULL)
-			return -EINVAL;
-		else {
-			mddev->new_layout = info->layout;
-			rv = mddev->pers->check_reshape(mddev);
-			if (rv)
-				mddev->new_layout = mddev->layout;
-			return rv;
-		}
-	}
-	if (info->size >= 0 && mddev->dev_sectors / 2 != info->size)
-		rv = update_size(mddev, (sector_t)info->size * 2);
-
-	if (mddev->raid_disks    != info->raid_disks)
-		rv = update_raid_disks(mddev, info->raid_disks);
-
-	if ((state ^ info->state) & (1<<MD_SB_BITMAP_PRESENT)) {
-		if (mddev->pers->quiesce == NULL)
-			return -EINVAL;
-		if (mddev->recovery || mddev->sync_thread)
-			return -EBUSY;
-		if (info->state & (1<<MD_SB_BITMAP_PRESENT)) {
-			/* add the bitmap */
-			if (mddev->bitmap)
-				return -EEXIST;
-			if (mddev->bitmap_info.default_offset == 0)
-				return -EINVAL;
-			mddev->bitmap_info.offset =
-				mddev->bitmap_info.default_offset;
-			mddev->pers->quiesce(mddev, 1);
-			rv = bitmap_create(mddev);
-			if (!rv)
-				rv = bitmap_load(mddev);
-			if (rv)
-				bitmap_destroy(mddev);
-			mddev->pers->quiesce(mddev, 0);
-		} else {
-			/* remove the bitmap */
-			if (!mddev->bitmap)
-				return -ENOENT;
-			if (mddev->bitmap->file)
-				return -EINVAL;
-			mddev->pers->quiesce(mddev, 1);
-			bitmap_destroy(mddev);
-			mddev->pers->quiesce(mddev, 0);
-			mddev->bitmap_info.offset = 0;
-		}
-	}
-	md_update_sb(mddev, 1);
-	return rv;
-}
-
-static int set_disk_faulty(struct mddev *mddev, dev_t dev)
-{
-	struct md_rdev *rdev;
-
-	if (mddev->pers == NULL)
-		return -ENODEV;
-
-	rdev = find_rdev(mddev, dev);
-	if (!rdev)
-		return -ENODEV;
-
-	md_error(mddev, rdev);
-	if (!test_bit(Faulty, &rdev->flags))
-		return -EBUSY;
-	return 0;
-}
-
-/*
- * We have a problem here : there is no easy way to give a CHS
- * virtual geometry. We currently pretend that we have a 2 heads
- * 4 sectors (with a BIG number of cylinders...). This drives
- * dosfs just mad... ;-)
- */
-static int md_getgeo(struct block_device *bdev, struct hd_geometry *geo)
-{
-	struct mddev *mddev = bdev->bd_disk->private_data;
-
-	geo->heads = 2;
-	geo->sectors = 4;
-	geo->cylinders = mddev->array_sectors / 8;
-	return 0;
-}
-
-static int md_ioctl(struct block_device *bdev, fmode_t mode,
-			unsigned int cmd, unsigned long arg)
-{
-	int err = 0;
-	void __user *argp = (void __user *)arg;
-	struct mddev *mddev = NULL;
-	int ro;
-
-	switch (cmd) {
-	case RAID_VERSION:
-	case GET_ARRAY_INFO:
-	case GET_DISK_INFO:
-		break;
-	default:
-		if (!capable(CAP_SYS_ADMIN))
-			return -EACCES;
-	}
-
-	/*
-	 * Commands dealing with the RAID driver but not any
-	 * particular array:
-	 */
-	switch (cmd)
-	{
-		case RAID_VERSION:
-			err = get_version(argp);
-			goto done;
-
-		case PRINT_RAID_DEBUG:
-			err = 0;
-			md_print_devices();
-			goto done;
-
-#ifndef MODULE
-		case RAID_AUTORUN:
-			err = 0;
-			autostart_arrays(arg);
-			goto done;
-#endif
-		default:;
-	}
-
-	/*
-	 * Commands creating/starting a new array:
-	 */
-
-	mddev = bdev->bd_disk->private_data;
-
-	if (!mddev) {
-		BUG();
-		goto abort;
-	}
-
-	err = mddev_lock(mddev);
-	if (err) {
-		printk(KERN_INFO 
-			"md: ioctl lock interrupted, reason %d, cmd %d\n",
-			err, cmd);
-		goto abort;
-	}
-
-	switch (cmd)
-	{
-		case SET_ARRAY_INFO:
-			{
-				mdu_array_info_t info;
-				if (!arg)
-					memset(&info, 0, sizeof(info));
-				else if (copy_from_user(&info, argp, sizeof(info))) {
-					err = -EFAULT;
-					goto abort_unlock;
-				}
-				if (mddev->pers) {
-					err = update_array_info(mddev, &info);
-					if (err) {
-						printk(KERN_WARNING "md: couldn't update"
-						       " array info. %d\n", err);
-						goto abort_unlock;
-					}
-					goto done_unlock;
-				}
-				if (!list_empty(&mddev->disks)) {
-					printk(KERN_WARNING
-					       "md: array %s already has disks!\n",
-					       mdname(mddev));
-					err = -EBUSY;
-					goto abort_unlock;
-				}
-				if (mddev->raid_disks) {
-					printk(KERN_WARNING
-					       "md: array %s already initialised!\n",
-					       mdname(mddev));
-					err = -EBUSY;
-					goto abort_unlock;
-				}
-				err = set_array_info(mddev, &info);
-				if (err) {
-					printk(KERN_WARNING "md: couldn't set"
-					       " array info. %d\n", err);
-					goto abort_unlock;
-				}
-			}
-			goto done_unlock;
-
-		default:;
-	}
-
-	/*
-	 * Commands querying/configuring an existing array:
-	 */
-	/* if we are not initialised yet, only ADD_NEW_DISK, STOP_ARRAY,
-	 * RUN_ARRAY, and GET_ and SET_BITMAP_FILE are allowed */
-	if ((!mddev->raid_disks && !mddev->external)
-	    && cmd != ADD_NEW_DISK && cmd != STOP_ARRAY
-	    && cmd != RUN_ARRAY && cmd != SET_BITMAP_FILE
-	    && cmd != GET_BITMAP_FILE) {
-		err = -ENODEV;
-		goto abort_unlock;
-	}
-
-	/*
-	 * Commands even a read-only array can execute:
-	 */
-	switch (cmd)
-	{
-		case GET_ARRAY_INFO:
-			err = get_array_info(mddev, argp);
-			goto done_unlock;
-
-		case GET_BITMAP_FILE:
-			err = get_bitmap_file(mddev, argp);
-			goto done_unlock;
-
-		case GET_DISK_INFO:
-			err = get_disk_info(mddev, argp);
-			goto done_unlock;
-
-		case RESTART_ARRAY_RW:
-			err = restart_array(mddev);
-			goto done_unlock;
-
-		case STOP_ARRAY:
-			err = do_md_stop(mddev, 0, 1);
-			goto done_unlock;
-
-		case STOP_ARRAY_RO:
-			err = md_set_readonly(mddev, 1);
-			goto done_unlock;
-
-		case BLKROSET:
-			if (get_user(ro, (int __user *)(arg))) {
-				err = -EFAULT;
-				goto done_unlock;
-			}
-			err = -EINVAL;
-
-			/* if the bdev is going readonly the value of mddev->ro
-			 * does not matter, no writes are coming
-			 */
-			if (ro)
-				goto done_unlock;
-
-			/* are we are already prepared for writes? */
-			if (mddev->ro != 1)
-				goto done_unlock;
-
-			/* transitioning to readauto need only happen for
-			 * arrays that call md_write_start
-			 */
-			if (mddev->pers) {
-				err = restart_array(mddev);
-				if (err == 0) {
-					mddev->ro = 2;
-					set_disk_ro(mddev->gendisk, 0);
-				}
-			}
-			goto done_unlock;
-	}
-
-	/*
-	 * The remaining ioctls are changing the state of the
-	 * superblock, so we do not allow them on read-only arrays.
-	 * However non-MD ioctls (e.g. get-size) will still come through
-	 * here and hit the 'default' below, so only disallow
-	 * 'md' ioctls, and switch to rw mode if started auto-readonly.
-	 */
-	if (_IOC_TYPE(cmd) == MD_MAJOR && mddev->ro && mddev->pers) {
-		if (mddev->ro == 2) {
-			mddev->ro = 0;
-			sysfs_notify_dirent_safe(mddev->sysfs_state);
-			set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
-			md_wakeup_thread(mddev->thread);
-		} else {
-			err = -EROFS;
-			goto abort_unlock;
-		}
-	}
-
-	switch (cmd)
-	{
-		case ADD_NEW_DISK:
-		{
-			mdu_disk_info_t info;
-			if (copy_from_user(&info, argp, sizeof(info)))
-				err = -EFAULT;
-			else
-				err = add_new_disk(mddev, &info);
-			goto done_unlock;
-		}
-
-		case HOT_REMOVE_DISK:
-			err = hot_remove_disk(mddev, new_decode_dev(arg));
-			goto done_unlock;
-
-		case HOT_ADD_DISK:
-			err = hot_add_disk(mddev, new_decode_dev(arg));
-			goto done_unlock;
-
-		case SET_DISK_FAULTY:
-			err = set_disk_faulty(mddev, new_decode_dev(arg));
-			goto done_unlock;
-
-		case RUN_ARRAY:
-			err = do_md_run(mddev);
-			goto done_unlock;
-
-		case SET_BITMAP_FILE:
-			err = set_bitmap_file(mddev, (int)arg);
-			goto done_unlock;
-
-		default:
-			err = -EINVAL;
-			goto abort_unlock;
-	}
-
-done_unlock:
-abort_unlock:
-	if (mddev->hold_active == UNTIL_IOCTL &&
-	    err != -EINVAL)
-		mddev->hold_active = 0;
-	mddev_unlock(mddev);
-
-	return err;
-done:
-	if (err)
-		MD_BUG();
-abort:
-	return err;
-}
-#ifdef CONFIG_COMPAT
-static int md_compat_ioctl(struct block_device *bdev, fmode_t mode,
-		    unsigned int cmd, unsigned long arg)
-{
-	switch (cmd) {
-	case HOT_REMOVE_DISK:
-	case HOT_ADD_DISK:
-	case SET_DISK_FAULTY:
-	case SET_BITMAP_FILE:
-		/* These take in integer arg, do not convert */
-		break;
-	default:
-		arg = (unsigned long)compat_ptr(arg);
-		break;
-	}
-
-	return md_ioctl(bdev, mode, cmd, arg);
-}
-#endif /* CONFIG_COMPAT */
-
-static int md_open(struct block_device *bdev, fmode_t mode)
-{
-	/*
-	 * Succeed if we can lock the mddev, which confirms that
-	 * it isn't being stopped right now.
-	 */
-	struct mddev *mddev = mddev_find(bdev->bd_dev);
-	int err;
-
-	if (mddev->gendisk != bdev->bd_disk) {
-		/* we are racing with mddev_put which is discarding this
-		 * bd_disk.
-		 */
-		mddev_put(mddev);
-		/* Wait until bdev->bd_disk is definitely gone */
-		flush_workqueue(md_misc_wq);
-		/* Then retry the open from the top */
-		return -ERESTARTSYS;
-	}
-	BUG_ON(mddev != bdev->bd_disk->private_data);
-
-	if ((err = mutex_lock_interruptible(&mddev->open_mutex)))
-		goto out;
-
-	err = 0;
-	atomic_inc(&mddev->openers);
-	mutex_unlock(&mddev->open_mutex);
-
-	check_disk_change(bdev);
- out:
-	return err;
-}
-
-static int md_release(struct gendisk *disk, fmode_t mode)
-{
- 	struct mddev *mddev = disk->private_data;
-
-	BUG_ON(!mddev);
-	atomic_dec(&mddev->openers);
-	mddev_put(mddev);
-
-	return 0;
-}
-
-static int md_media_changed(struct gendisk *disk)
-{
-	struct mddev *mddev = disk->private_data;
-
-	return mddev->changed;
-}
-
-static int md_revalidate(struct gendisk *disk)
-{
-	struct mddev *mddev = disk->private_data;
-
-	mddev->changed = 0;
-	return 0;
-}
-static const struct block_device_operations md_fops =
-{
-	.owner		= THIS_MODULE,
-	.open		= md_open,
-	.release	= md_release,
-	.ioctl		= md_ioctl,
-#ifdef CONFIG_COMPAT
-	.compat_ioctl	= md_compat_ioctl,
-#endif
-	.getgeo		= md_getgeo,
-	.media_changed  = md_media_changed,
-	.revalidate_disk= md_revalidate,
-};
-
-static int md_thread(void * arg)
-{
-	struct md_thread *thread = arg;
-
-	/*
-	 * md_thread is a 'system-thread', it's priority should be very
-	 * high. We avoid resource deadlocks individually in each
-	 * raid personality. (RAID5 does preallocation) We also use RR and
-	 * the very same RT priority as kswapd, thus we will never get
-	 * into a priority inversion deadlock.
-	 *
-	 * we definitely have to have equal or higher priority than
-	 * bdflush, otherwise bdflush will deadlock if there are too
-	 * many dirty RAID5 blocks.
-	 */
-
-	allow_signal(SIGKILL);
-	while (!kthread_should_stop()) {
-
-		/* We need to wait INTERRUPTIBLE so that
-		 * we don't add to the load-average.
-		 * That means we need to be sure no signals are
-		 * pending
-		 */
-		if (signal_pending(current))
-			flush_signals(current);
-
-		wait_event_interruptible_timeout
-			(thread->wqueue,
-			 test_bit(THREAD_WAKEUP, &thread->flags)
-			 || kthread_should_stop(),
-			 thread->timeout);
-
-		clear_bit(THREAD_WAKEUP, &thread->flags);
-		if (!kthread_should_stop())
-			thread->run(thread->mddev);
-	}
-
-	return 0;
-}
-
-void md_wakeup_thread(struct md_thread *thread)
-{
-	if (thread) {
-		pr_debug("md: waking up MD thread %s.\n", thread->tsk->comm);
-		set_bit(THREAD_WAKEUP, &thread->flags);
-		wake_up(&thread->wqueue);
-	}
-}
-
-struct md_thread *md_register_thread(void (*run) (struct mddev *), struct mddev *mddev,
-				 const char *name)
-{
-	struct md_thread *thread;
-
-	thread = kzalloc(sizeof(struct md_thread), GFP_KERNEL);
-	if (!thread)
-		return NULL;
-
-	init_waitqueue_head(&thread->wqueue);
-
-	thread->run = run;
-	thread->mddev = mddev;
-	thread->timeout = MAX_SCHEDULE_TIMEOUT;
-	thread->tsk = kthread_run(md_thread, thread,
-				  "%s_%s",
-				  mdname(thread->mddev),
-				  name ?: mddev->pers->name);
-	if (IS_ERR(thread->tsk)) {
-		kfree(thread);
-		return NULL;
-	}
-	return thread;
-}
-
-void md_unregister_thread(struct md_thread **threadp)
-{
-	struct md_thread *thread = *threadp;
-	if (!thread)
-		return;
-	pr_debug("interrupting MD-thread pid %d\n", task_pid_nr(thread->tsk));
-	/* Locking ensures that mddev_unlock does not wake_up a
-	 * non-existent thread
-	 */
-	spin_lock(&pers_lock);
-	*threadp = NULL;
-	spin_unlock(&pers_lock);
-
-	kthread_stop(thread->tsk);
-	kfree(thread);
-}
-
-void md_error(struct mddev *mddev, struct md_rdev *rdev)
-{
-	if (!mddev) {
-		MD_BUG();
-		return;
-	}
-
-	if (!rdev || test_bit(Faulty, &rdev->flags))
-		return;
-
-	if (!mddev->pers || !mddev->pers->error_handler)
-		return;
-	mddev->pers->error_handler(mddev,rdev);
-	if (mddev->degraded)
-		set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
-	sysfs_notify_dirent_safe(rdev->sysfs_state);
-	set_bit(MD_RECOVERY_INTR, &mddev->recovery);
-	set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
-	md_wakeup_thread(mddev->thread);
-	if (mddev->event_work.func)
-		queue_work(md_misc_wq, &mddev->event_work);
-	md_new_event_inintr(mddev);
-}
-
-/* seq_file implementation /proc/mdstat */
-
-static void status_unused(struct seq_file *seq)
-{
-	int i = 0;
-	struct md_rdev *rdev;
-
-	seq_printf(seq, "unused devices: ");
-
-	list_for_each_entry(rdev, &pending_raid_disks, same_set) {
-		char b[BDEVNAME_SIZE];
-		i++;
-		seq_printf(seq, "%s ",
-			      bdevname(rdev->bdev,b));
-	}
-	if (!i)
-		seq_printf(seq, "<none>");
-
-	seq_printf(seq, "\n");
-}
-
-
-static void status_resync(struct seq_file *seq, struct mddev * mddev)
-{
-	sector_t max_sectors, resync, res;
-	unsigned long dt, db;
-	sector_t rt;
-	int scale;
-	unsigned int per_milli;
-
-	resync = mddev->curr_resync - atomic_read(&mddev->recovery_active);
-
-	if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery))
-		max_sectors = mddev->resync_max_sectors;
-	else
-		max_sectors = mddev->dev_sectors;
-
-	/*
-	 * Should not happen.
-	 */
-	if (!max_sectors) {
-		MD_BUG();
-		return;
-	}
-	/* Pick 'scale' such that (resync>>scale)*1000 will fit
-	 * in a sector_t, and (max_sectors>>scale) will fit in a
-	 * u32, as those are the requirements for sector_div.
-	 * Thus 'scale' must be at least 10
-	 */
-	scale = 10;
-	if (sizeof(sector_t) > sizeof(unsigned long)) {
-		while ( max_sectors/2 > (1ULL<<(scale+32)))
-			scale++;
-	}
-	res = (resync>>scale)*1000;
-	sector_div(res, (u32)((max_sectors>>scale)+1));
-
-	per_milli = res;
-	{
-		int i, x = per_milli/50, y = 20-x;
-		seq_printf(seq, "[");
-		for (i = 0; i < x; i++)
-			seq_printf(seq, "=");
-		seq_printf(seq, ">");
-		for (i = 0; i < y; i++)
-			seq_printf(seq, ".");
-		seq_printf(seq, "] ");
-	}
-	seq_printf(seq, " %s =%3u.%u%% (%llu/%llu)",
-		   (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)?
-		    "reshape" :
-		    (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)?
-		     "check" :
-		     (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ?
-		      "resync" : "recovery"))),
-		   per_milli/10, per_milli % 10,
-		   (unsigned long long) resync/2,
-		   (unsigned long long) max_sectors/2);
-
-	/*
-	 * dt: time from mark until now
-	 * db: blocks written from mark until now
-	 * rt: remaining time
-	 *
-	 * rt is a sector_t, so could be 32bit or 64bit.
-	 * So we divide before multiply in case it is 32bit and close
-	 * to the limit.
-	 * We scale the divisor (db) by 32 to avoid losing precision
-	 * near the end of resync when the number of remaining sectors
-	 * is close to 'db'.
-	 * We then divide rt by 32 after multiplying by db to compensate.
-	 * The '+1' avoids division by zero if db is very small.
-	 */
-	dt = ((jiffies - mddev->resync_mark) / HZ);
-	if (!dt) dt++;
-	db = (mddev->curr_mark_cnt - atomic_read(&mddev->recovery_active))
-		- mddev->resync_mark_cnt;
-
-	rt = max_sectors - resync;    /* number of remaining sectors */
-	sector_div(rt, db/32+1);
-	rt *= dt;
-	rt >>= 5;
-
-	seq_printf(seq, " finish=%lu.%lumin", (unsigned long)rt / 60,
-		   ((unsigned long)rt % 60)/6);
-
-	seq_printf(seq, " speed=%ldK/sec", db/2/dt);
-}
-
-static void *md_seq_start(struct seq_file *seq, loff_t *pos)
-{
-	struct list_head *tmp;
-	loff_t l = *pos;
-	struct mddev *mddev;
-
-	if (l >= 0x10000)
-		return NULL;
-	if (!l--)
-		/* header */
-		return (void*)1;
-
-	spin_lock(&all_mddevs_lock);
-	list_for_each(tmp,&all_mddevs)
-		if (!l--) {
-			mddev = list_entry(tmp, struct mddev, all_mddevs);
-			mddev_get(mddev);
-			spin_unlock(&all_mddevs_lock);
-			return mddev;
-		}
-	spin_unlock(&all_mddevs_lock);
-	if (!l--)
-		return (void*)2;/* tail */
-	return NULL;
-}
-
-static void *md_seq_next(struct seq_file *seq, void *v, loff_t *pos)
-{
-	struct list_head *tmp;
-	struct mddev *next_mddev, *mddev = v;
-	
-	++*pos;
-	if (v == (void*)2)
-		return NULL;
-
-	spin_lock(&all_mddevs_lock);
-	if (v == (void*)1)
-		tmp = all_mddevs.next;
-	else
-		tmp = mddev->all_mddevs.next;
-	if (tmp != &all_mddevs)
-		next_mddev = mddev_get(list_entry(tmp,struct mddev,all_mddevs));
-	else {
-		next_mddev = (void*)2;
-		*pos = 0x10000;
-	}		
-	spin_unlock(&all_mddevs_lock);
-
-	if (v != (void*)1)
-		mddev_put(mddev);
-	return next_mddev;
-
-}
-
-static void md_seq_stop(struct seq_file *seq, void *v)
-{
-	struct mddev *mddev = v;
-
-	if (mddev && v != (void*)1 && v != (void*)2)
-		mddev_put(mddev);
-}
-
-static int md_seq_show(struct seq_file *seq, void *v)
-{
-	struct mddev *mddev = v;
-	sector_t sectors;
-	struct md_rdev *rdev;
-
-	if (v == (void*)1) {
-		struct md_personality *pers;
-		seq_printf(seq, "Personalities : ");
-		spin_lock(&pers_lock);
-		list_for_each_entry(pers, &pers_list, list)
-			seq_printf(seq, "[%s] ", pers->name);
-
-		spin_unlock(&pers_lock);
-		seq_printf(seq, "\n");
-		seq->poll_event = atomic_read(&md_event_count);
-		return 0;
-	}
-	if (v == (void*)2) {
-		status_unused(seq);
-		return 0;
-	}
-
-	if (mddev_lock(mddev) < 0)
-		return -EINTR;
-
-	if (mddev->pers || mddev->raid_disks || !list_empty(&mddev->disks)) {
-		seq_printf(seq, "%s : %sactive", mdname(mddev),
-						mddev->pers ? "" : "in");
-		if (mddev->pers) {
-			if (mddev->ro==1)
-				seq_printf(seq, " (read-only)");
-			if (mddev->ro==2)
-				seq_printf(seq, " (auto-read-only)");
-			seq_printf(seq, " %s", mddev->pers->name);
-		}
-
-		sectors = 0;
-		rdev_for_each(rdev, mddev) {
-			char b[BDEVNAME_SIZE];
-			seq_printf(seq, " %s[%d]",
-				bdevname(rdev->bdev,b), rdev->desc_nr);
-			if (test_bit(WriteMostly, &rdev->flags))
-				seq_printf(seq, "(W)");
-			if (test_bit(Faulty, &rdev->flags)) {
-				seq_printf(seq, "(F)");
-				continue;
-			}
-			if (rdev->raid_disk < 0)
-				seq_printf(seq, "(S)"); /* spare */
-			if (test_bit(Replacement, &rdev->flags))
-				seq_printf(seq, "(R)");
-			sectors += rdev->sectors;
-		}
-
-		if (!list_empty(&mddev->disks)) {
-			if (mddev->pers)
-				seq_printf(seq, "\n      %llu blocks",
-					   (unsigned long long)
-					   mddev->array_sectors / 2);
-			else
-				seq_printf(seq, "\n      %llu blocks",
-					   (unsigned long long)sectors / 2);
-		}
-		if (mddev->persistent) {
-			if (mddev->major_version != 0 ||
-			    mddev->minor_version != 90) {
-				seq_printf(seq," super %d.%d",
-					   mddev->major_version,
-					   mddev->minor_version);
-			}
-		} else if (mddev->external)
-			seq_printf(seq, " super external:%s",
-				   mddev->metadata_type);
-		else
-			seq_printf(seq, " super non-persistent");
-
-		if (mddev->pers) {
-			mddev->pers->status(seq, mddev);
-	 		seq_printf(seq, "\n      ");
-			if (mddev->pers->sync_request) {
-				if (mddev->curr_resync > 2) {
-					status_resync(seq, mddev);
-					seq_printf(seq, "\n      ");
-				} else if (mddev->curr_resync == 1 || mddev->curr_resync == 2)
-					seq_printf(seq, "\tresync=DELAYED\n      ");
-				else if (mddev->recovery_cp < MaxSector)
-					seq_printf(seq, "\tresync=PENDING\n      ");
-			}
-		} else
-			seq_printf(seq, "\n       ");
-
-		bitmap_status(seq, mddev->bitmap);
-
-		seq_printf(seq, "\n");
-	}
-	mddev_unlock(mddev);
-	
-	return 0;
-}
-
-static const struct seq_operations md_seq_ops = {
-	.start  = md_seq_start,
-	.next   = md_seq_next,
-	.stop   = md_seq_stop,
-	.show   = md_seq_show,
-};
-
-static int md_seq_open(struct inode *inode, struct file *file)
-{
-	struct seq_file *seq;
-	int error;
-
-	error = seq_open(file, &md_seq_ops);
-	if (error)
-		return error;
-
-	seq = file->private_data;
-	seq->poll_event = atomic_read(&md_event_count);
-	return error;
-}
-
-static unsigned int mdstat_poll(struct file *filp, poll_table *wait)
-{
-	struct seq_file *seq = filp->private_data;
-	int mask;
-
-	poll_wait(filp, &md_event_waiters, wait);
-
-	/* always allow read */
-	mask = POLLIN | POLLRDNORM;
-
-	if (seq->poll_event != atomic_read(&md_event_count))
-		mask |= POLLERR | POLLPRI;
-	return mask;
-}
-
-static const struct file_operations md_seq_fops = {
-	.owner		= THIS_MODULE,
-	.open           = md_seq_open,
-	.read           = seq_read,
-	.llseek         = seq_lseek,
-	.release	= seq_release_private,
-	.poll		= mdstat_poll,
-};
-
-int register_md_personality(struct md_personality *p)
-{
-	spin_lock(&pers_lock);
-	list_add_tail(&p->list, &pers_list);
-	printk(KERN_INFO "md: %s personality registered for level %d\n", p->name, p->level);
-	spin_unlock(&pers_lock);
-	return 0;
-}
-
-int unregister_md_personality(struct md_personality *p)
-{
-	printk(KERN_INFO "md: %s personality unregistered\n", p->name);
-	spin_lock(&pers_lock);
-	list_del_init(&p->list);
-	spin_unlock(&pers_lock);
-	return 0;
-}
-
-static int is_mddev_idle(struct mddev *mddev, int init)
-{
-	struct md_rdev * rdev;
-	int idle;
-	int curr_events;
-
-	idle = 1;
-	rcu_read_lock();
-	rdev_for_each_rcu(rdev, mddev) {
-		struct gendisk *disk = rdev->bdev->bd_contains->bd_disk;
-		curr_events = (int)part_stat_read(&disk->part0, sectors[0]) +
-			      (int)part_stat_read(&disk->part0, sectors[1]) -
-			      atomic_read(&disk->sync_io);
-		/* sync IO will cause sync_io to increase before the disk_stats
-		 * as sync_io is counted when a request starts, and
-		 * disk_stats is counted when it completes.
-		 * So resync activity will cause curr_events to be smaller than
-		 * when there was no such activity.
-		 * non-sync IO will cause disk_stat to increase without
-		 * increasing sync_io so curr_events will (eventually)
-		 * be larger than it was before.  Once it becomes
-		 * substantially larger, the test below will cause
-		 * the array to appear non-idle, and resync will slow
-		 * down.
-		 * If there is a lot of outstanding resync activity when
-		 * we set last_event to curr_events, then all that activity
-		 * completing might cause the array to appear non-idle
-		 * and resync will be slowed down even though there might
-		 * not have been non-resync activity.  This will only
-		 * happen once though.  'last_events' will soon reflect
-		 * the state where there is little or no outstanding
-		 * resync requests, and further resync activity will
-		 * always make curr_events less than last_events.
-		 *
-		 */
-		if (init || curr_events - rdev->last_events > 64) {
-			rdev->last_events = curr_events;
-			idle = 0;
-		}
-	}
-	rcu_read_unlock();
-	return idle;
-}
-
-void md_done_sync(struct mddev *mddev, int blocks, int ok)
-{
-	/* another "blocks" (512byte) blocks have been synced */
-	atomic_sub(blocks, &mddev->recovery_active);
-	wake_up(&mddev->recovery_wait);
-	if (!ok) {
-		set_bit(MD_RECOVERY_INTR, &mddev->recovery);
-		md_wakeup_thread(mddev->thread);
-		// stop recovery, signal do_sync ....
-	}
-}
-
-
-/* md_write_start(mddev, bi)
- * If we need to update some array metadata (e.g. 'active' flag
- * in superblock) before writing, schedule a superblock update
- * and wait for it to complete.
- */
-void md_write_start(struct mddev *mddev, struct bio *bi)
-{
-	int did_change = 0;
-	if (bio_data_dir(bi) != WRITE)
-		return;
-
-	BUG_ON(mddev->ro == 1);
-	if (mddev->ro == 2) {
-		/* need to switch to read/write */
-		mddev->ro = 0;
-		set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
-		md_wakeup_thread(mddev->thread);
-		md_wakeup_thread(mddev->sync_thread);
-		did_change = 1;
-	}
-	atomic_inc(&mddev->writes_pending);
-	if (mddev->safemode == 1)
-		mddev->safemode = 0;
-	if (mddev->in_sync) {
-		spin_lock_irq(&mddev->write_lock);
-		if (mddev->in_sync) {
-			mddev->in_sync = 0;
-			set_bit(MD_CHANGE_CLEAN, &mddev->flags);
-			set_bit(MD_CHANGE_PENDING, &mddev->flags);
-			md_wakeup_thread(mddev->thread);
-			did_change = 1;
-		}
-		spin_unlock_irq(&mddev->write_lock);
-	}
-	if (did_change)
-		sysfs_notify_dirent_safe(mddev->sysfs_state);
-	wait_event(mddev->sb_wait,
-		   !test_bit(MD_CHANGE_PENDING, &mddev->flags));
-}
-
-void md_write_end(struct mddev *mddev)
-{
-	if (atomic_dec_and_test(&mddev->writes_pending)) {
-		if (mddev->safemode == 2)
-			md_wakeup_thread(mddev->thread);
-		else if (mddev->safemode_delay)
-			mod_timer(&mddev->safemode_timer, jiffies + mddev->safemode_delay);
-	}
-}
-
-/* md_allow_write(mddev)
- * Calling this ensures that the array is marked 'active' so that writes
- * may proceed without blocking.  It is important to call this before
- * attempting a GFP_KERNEL allocation while holding the mddev lock.
- * Must be called with mddev_lock held.
- *
- * In the ->external case MD_CHANGE_CLEAN can not be cleared until mddev->lock
- * is dropped, so return -EAGAIN after notifying userspace.
- */
-int md_allow_write(struct mddev *mddev)
-{
-	if (!mddev->pers)
-		return 0;
-	if (mddev->ro)
-		return 0;
-	if (!mddev->pers->sync_request)
-		return 0;
-
-	spin_lock_irq(&mddev->write_lock);
-	if (mddev->in_sync) {
-		mddev->in_sync = 0;
-		set_bit(MD_CHANGE_CLEAN, &mddev->flags);
-		set_bit(MD_CHANGE_PENDING, &mddev->flags);
-		if (mddev->safemode_delay &&
-		    mddev->safemode == 0)
-			mddev->safemode = 1;
-		spin_unlock_irq(&mddev->write_lock);
-		md_update_sb(mddev, 0);
-		sysfs_notify_dirent_safe(mddev->sysfs_state);
-	} else
-		spin_unlock_irq(&mddev->write_lock);
-
-	if (test_bit(MD_CHANGE_PENDING, &mddev->flags))
-		return -EAGAIN;
-	else
-		return 0;
-}
-EXPORT_SYMBOL_GPL(md_allow_write);
-
-#define SYNC_MARKS	10
-#define	SYNC_MARK_STEP	(3*HZ)
-void md_do_sync(struct mddev *mddev)
-{
-	struct mddev *mddev2;
-	unsigned int currspeed = 0,
-		 window;
-	sector_t max_sectors,j, io_sectors;
-	unsigned long mark[SYNC_MARKS];
-	sector_t mark_cnt[SYNC_MARKS];
-	int last_mark,m;
-	struct list_head *tmp;
-	sector_t last_check;
-	int skipped = 0;
-	struct md_rdev *rdev;
-	char *desc;
-
-	/* just incase thread restarts... */
-	if (test_bit(MD_RECOVERY_DONE, &mddev->recovery))
-		return;
-	if (mddev->ro) /* never try to sync a read-only array */
-		return;
-
-	if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
-		if (test_bit(MD_RECOVERY_CHECK, &mddev->recovery))
-			desc = "data-check";
-		else if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
-			desc = "requested-resync";
-		else
-			desc = "resync";
-	} else if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
-		desc = "reshape";
-	else
-		desc = "recovery";
-
-	/* we overload curr_resync somewhat here.
-	 * 0 == not engaged in resync at all
-	 * 2 == checking that there is no conflict with another sync
-	 * 1 == like 2, but have yielded to allow conflicting resync to
-	 *		commense
-	 * other == active in resync - this many blocks
-	 *
-	 * Before starting a resync we must have set curr_resync to
-	 * 2, and then checked that every "conflicting" array has curr_resync
-	 * less than ours.  When we find one that is the same or higher
-	 * we wait on resync_wait.  To avoid deadlock, we reduce curr_resync
-	 * to 1 if we choose to yield (based arbitrarily on address of mddev structure).
-	 * This will mean we have to start checking from the beginning again.
-	 *
-	 */
-
-	do {
-		mddev->curr_resync = 2;
-
-	try_again:
-		if (kthread_should_stop())
-			set_bit(MD_RECOVERY_INTR, &mddev->recovery);
-
-		if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
-			goto skip;
-		for_each_mddev(mddev2, tmp) {
-			if (mddev2 == mddev)
-				continue;
-			if (!mddev->parallel_resync
-			&&  mddev2->curr_resync
-			&&  match_mddev_units(mddev, mddev2)) {
-				DEFINE_WAIT(wq);
-				if (mddev < mddev2 && mddev->curr_resync == 2) {
-					/* arbitrarily yield */
-					mddev->curr_resync = 1;
-					wake_up(&resync_wait);
-				}
-				if (mddev > mddev2 && mddev->curr_resync == 1)
-					/* no need to wait here, we can wait the next
-					 * time 'round when curr_resync == 2
-					 */
-					continue;
-				/* We need to wait 'interruptible' so as not to
-				 * contribute to the load average, and not to
-				 * be caught by 'softlockup'
-				 */
-				prepare_to_wait(&resync_wait, &wq, TASK_INTERRUPTIBLE);
-				if (!kthread_should_stop() &&
-				    mddev2->curr_resync >= mddev->curr_resync) {
-					printk(KERN_INFO "md: delaying %s of %s"
-					       " until %s has finished (they"
-					       " share one or more physical units)\n",
-					       desc, mdname(mddev), mdname(mddev2));
-					mddev_put(mddev2);
-					if (signal_pending(current))
-						flush_signals(current);
-					schedule();
-					finish_wait(&resync_wait, &wq);
-					goto try_again;
-				}
-				finish_wait(&resync_wait, &wq);
-			}
-		}
-	} while (mddev->curr_resync < 2);
-
-	j = 0;
-	if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
-		/* resync follows the size requested by the personality,
-		 * which defaults to physical size, but can be virtual size
-		 */
-		max_sectors = mddev->resync_max_sectors;
-		mddev->resync_mismatches = 0;
-		/* we don't use the checkpoint if there's a bitmap */
-		if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
-			j = mddev->resync_min;
-		else if (!mddev->bitmap)
-			j = mddev->recovery_cp;
-
-	} else if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
-		max_sectors = mddev->dev_sectors;
-	else {
-		/* recovery follows the physical size of devices */
-		max_sectors = mddev->dev_sectors;
-		j = MaxSector;
-		rcu_read_lock();
-		rdev_for_each_rcu(rdev, mddev)
-			if (rdev->raid_disk >= 0 &&
-			    !test_bit(Faulty, &rdev->flags) &&
-			    !test_bit(In_sync, &rdev->flags) &&
-			    rdev->recovery_offset < j)
-				j = rdev->recovery_offset;
-		rcu_read_unlock();
-	}
-
-	printk(KERN_INFO "md: %s of RAID array %s\n", desc, mdname(mddev));
-	printk(KERN_INFO "md: minimum _guaranteed_  speed:"
-		" %d KB/sec/disk.\n", speed_min(mddev));
-	printk(KERN_INFO "md: using maximum available idle IO bandwidth "
-	       "(but not more than %d KB/sec) for %s.\n",
-	       speed_max(mddev), desc);
-
-	is_mddev_idle(mddev, 1); /* this initializes IO event counters */
-
-	io_sectors = 0;
-	for (m = 0; m < SYNC_MARKS; m++) {
-		mark[m] = jiffies;
-		mark_cnt[m] = io_sectors;
-	}
-	last_mark = 0;
-	mddev->resync_mark = mark[last_mark];
-	mddev->resync_mark_cnt = mark_cnt[last_mark];
-
-	/*
-	 * Tune reconstruction:
-	 */
-	window = 32*(PAGE_SIZE/512);
-	printk(KERN_INFO "md: using %dk window, over a total of %lluk.\n",
-		window/2, (unsigned long long)max_sectors/2);
-
-	atomic_set(&mddev->recovery_active, 0);
-	last_check = 0;
-
-	if (j>2) {
-		printk(KERN_INFO 
-		       "md: resuming %s of %s from checkpoint.\n",
-		       desc, mdname(mddev));
-		mddev->curr_resync = j;
-	}
-	mddev->curr_resync_completed = j;
-
-	while (j < max_sectors) {
-		sector_t sectors;
-
-		skipped = 0;
-
-		if (!test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
-		    ((mddev->curr_resync > mddev->curr_resync_completed &&
-		      (mddev->curr_resync - mddev->curr_resync_completed)
-		      > (max_sectors >> 4)) ||
-		     (j - mddev->curr_resync_completed)*2
-		     >= mddev->resync_max - mddev->curr_resync_completed
-			    )) {
-			/* time to update curr_resync_completed */
-			wait_event(mddev->recovery_wait,
-				   atomic_read(&mddev->recovery_active) == 0);
-			mddev->curr_resync_completed = j;
-			set_bit(MD_CHANGE_CLEAN, &mddev->flags);
-			sysfs_notify(&mddev->kobj, NULL, "sync_completed");
-		}
-
-		while (j >= mddev->resync_max && !kthread_should_stop()) {
-			/* As this condition is controlled by user-space,
-			 * we can block indefinitely, so use '_interruptible'
-			 * to avoid triggering warnings.
-			 */
-			flush_signals(current); /* just in case */
-			wait_event_interruptible(mddev->recovery_wait,
-						 mddev->resync_max > j
-						 || kthread_should_stop());
-		}
-
-		if (kthread_should_stop())
-			goto interrupted;
-
-		sectors = mddev->pers->sync_request(mddev, j, &skipped,
-						  currspeed < speed_min(mddev));
-		if (sectors == 0) {
-			set_bit(MD_RECOVERY_INTR, &mddev->recovery);
-			goto out;
-		}
-
-		if (!skipped) { /* actual IO requested */
-			io_sectors += sectors;
-			atomic_add(sectors, &mddev->recovery_active);
-		}
-
-		if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
-			break;
-
-		j += sectors;
-		if (j>1) mddev->curr_resync = j;
-		mddev->curr_mark_cnt = io_sectors;
-		if (last_check == 0)
-			/* this is the earliest that rebuild will be
-			 * visible in /proc/mdstat
-			 */
-			md_new_event(mddev);
-
-		if (last_check + window > io_sectors || j == max_sectors)
-			continue;
-
-		last_check = io_sectors;
-	repeat:
-		if (time_after_eq(jiffies, mark[last_mark] + SYNC_MARK_STEP )) {
-			/* step marks */
-			int next = (last_mark+1) % SYNC_MARKS;
-
-			mddev->resync_mark = mark[next];
-			mddev->resync_mark_cnt = mark_cnt[next];
-			mark[next] = jiffies;
-			mark_cnt[next] = io_sectors - atomic_read(&mddev->recovery_active);
-			last_mark = next;
-		}
-
-
-		if (kthread_should_stop())
-			goto interrupted;
-
-
-		/*
-		 * this loop exits only if either when we are slower than
-		 * the 'hard' speed limit, or the system was IO-idle for
-		 * a jiffy.
-		 * the system might be non-idle CPU-wise, but we only care
-		 * about not overloading the IO subsystem. (things like an
-		 * e2fsck being done on the RAID array should execute fast)
-		 */
-		cond_resched();
-
-		currspeed = ((unsigned long)(io_sectors-mddev->resync_mark_cnt))/2
-			/((jiffies-mddev->resync_mark)/HZ +1) +1;
-
-		if (currspeed > speed_min(mddev)) {
-			if ((currspeed > speed_max(mddev)) ||
-					!is_mddev_idle(mddev, 0)) {
-				msleep(500);
-				goto repeat;
-			}
-		}
-	}
-	printk(KERN_INFO "md: %s: %s done.\n",mdname(mddev), desc);
-	/*
-	 * this also signals 'finished resyncing' to md_stop
-	 */
- out:
-	wait_event(mddev->recovery_wait, !atomic_read(&mddev->recovery_active));
-
-	/* tell personality that we are finished */
-	mddev->pers->sync_request(mddev, max_sectors, &skipped, 1);
-
-	if (!test_bit(MD_RECOVERY_CHECK, &mddev->recovery) &&
-	    mddev->curr_resync > 2) {
-		if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
-			if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
-				if (mddev->curr_resync >= mddev->recovery_cp) {
-					printk(KERN_INFO
-					       "md: checkpointing %s of %s.\n",
-					       desc, mdname(mddev));
-					mddev->recovery_cp =
-						mddev->curr_resync_completed;
-				}
-			} else
-				mddev->recovery_cp = MaxSector;
-		} else {
-			if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery))
-				mddev->curr_resync = MaxSector;
-			rcu_read_lock();
-			rdev_for_each_rcu(rdev, mddev)
-				if (rdev->raid_disk >= 0 &&
-				    mddev->delta_disks >= 0 &&
-				    !test_bit(Faulty, &rdev->flags) &&
-				    !test_bit(In_sync, &rdev->flags) &&
-				    rdev->recovery_offset < mddev->curr_resync)
-					rdev->recovery_offset = mddev->curr_resync;
-			rcu_read_unlock();
-		}
-	}
- skip:
-	set_bit(MD_CHANGE_DEVS, &mddev->flags);
-
-	if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
-		/* We completed so min/max setting can be forgotten if used. */
-		if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
-			mddev->resync_min = 0;
-		mddev->resync_max = MaxSector;
-	} else if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
-		mddev->resync_min = mddev->curr_resync_completed;
-	mddev->curr_resync = 0;
-	wake_up(&resync_wait);
-	set_bit(MD_RECOVERY_DONE, &mddev->recovery);
-	md_wakeup_thread(mddev->thread);
-	return;
-
- interrupted:
-	/*
-	 * got a signal, exit.
-	 */
-	printk(KERN_INFO
-	       "md: md_do_sync() got signal ... exiting\n");
-	set_bit(MD_RECOVERY_INTR, &mddev->recovery);
-	goto out;
-
-}
-EXPORT_SYMBOL_GPL(md_do_sync);
-
-static int remove_and_add_spares(struct mddev *mddev)
-{
-	struct md_rdev *rdev;
-	int spares = 0;
-	int removed = 0;
-
-	mddev->curr_resync_completed = 0;
-
-	rdev_for_each(rdev, mddev)
-		if (rdev->raid_disk >= 0 &&
-		    !test_bit(Blocked, &rdev->flags) &&
-		    (test_bit(Faulty, &rdev->flags) ||
-		     ! test_bit(In_sync, &rdev->flags)) &&
-		    atomic_read(&rdev->nr_pending)==0) {
-			if (mddev->pers->hot_remove_disk(
-				    mddev, rdev) == 0) {
-				sysfs_unlink_rdev(mddev, rdev);
-				rdev->raid_disk = -1;
-				removed++;
-			}
-		}
-	if (removed)
-		sysfs_notify(&mddev->kobj, NULL,
-			     "degraded");
-
-
-	rdev_for_each(rdev, mddev) {
-		if (rdev->raid_disk >= 0 &&
-		    !test_bit(In_sync, &rdev->flags) &&
-		    !test_bit(Faulty, &rdev->flags))
-			spares++;
-		if (rdev->raid_disk < 0
-		    && !test_bit(Faulty, &rdev->flags)) {
-			rdev->recovery_offset = 0;
-			if (mddev->pers->
-			    hot_add_disk(mddev, rdev) == 0) {
-				if (sysfs_link_rdev(mddev, rdev))
-					/* failure here is OK */;
-				spares++;
-				md_new_event(mddev);
-				set_bit(MD_CHANGE_DEVS, &mddev->flags);
-			}
-		}
-	}
-	return spares;
-}
-
-static void reap_sync_thread(struct mddev *mddev)
-{
-	struct md_rdev *rdev;
-
-	/* resync has finished, collect result */
-	md_unregister_thread(&mddev->sync_thread);
-	if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
-	    !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) {
-		/* success...*/
-		/* activate any spares */
-		if (mddev->pers->spare_active(mddev))
-			sysfs_notify(&mddev->kobj, NULL,
-				     "degraded");
-	}
-	if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
-	    mddev->pers->finish_reshape)
-		mddev->pers->finish_reshape(mddev);
-
-	/* If array is no-longer degraded, then any saved_raid_disk
-	 * information must be scrapped.  Also if any device is now
-	 * In_sync we must scrape the saved_raid_disk for that device
-	 * do the superblock for an incrementally recovered device
-	 * written out.
-	 */
-	rdev_for_each(rdev, mddev)
-		if (!mddev->degraded ||
-		    test_bit(In_sync, &rdev->flags))
-			rdev->saved_raid_disk = -1;
-
-	md_update_sb(mddev, 1);
-	clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
-	clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
-	clear_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
-	clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
-	clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
-	/* flag recovery needed just to double check */
-	set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
-	sysfs_notify_dirent_safe(mddev->sysfs_action);
-	md_new_event(mddev);
-	if (mddev->event_work.func)
-		queue_work(md_misc_wq, &mddev->event_work);
-}
-
-/*
- * This routine is regularly called by all per-raid-array threads to
- * deal with generic issues like resync and super-block update.
- * Raid personalities that don't have a thread (linear/raid0) do not
- * need this as they never do any recovery or update the superblock.
- *
- * It does not do any resync itself, but rather "forks" off other threads
- * to do that as needed.
- * When it is determined that resync is needed, we set MD_RECOVERY_RUNNING in
- * "->recovery" and create a thread at ->sync_thread.
- * When the thread finishes it sets MD_RECOVERY_DONE
- * and wakeups up this thread which will reap the thread and finish up.
- * This thread also removes any faulty devices (with nr_pending == 0).
- *
- * The overall approach is:
- *  1/ if the superblock needs updating, update it.
- *  2/ If a recovery thread is running, don't do anything else.
- *  3/ If recovery has finished, clean up, possibly marking spares active.
- *  4/ If there are any faulty devices, remove them.
- *  5/ If array is degraded, try to add spares devices
- *  6/ If array has spares or is not in-sync, start a resync thread.
- */
-void md_check_recovery(struct mddev *mddev)
-{
-	if (mddev->suspended)
-		return;
-
-	if (mddev->bitmap)
-		bitmap_daemon_work(mddev);
-
-	if (signal_pending(current)) {
-		if (mddev->pers->sync_request && !mddev->external) {
-			printk(KERN_INFO "md: %s in immediate safe mode\n",
-			       mdname(mddev));
-			mddev->safemode = 2;
-		}
-		flush_signals(current);
-	}
-
-	if (mddev->ro && !test_bit(MD_RECOVERY_NEEDED, &mddev->recovery))
-		return;
-	if ( ! (
-		(mddev->flags & ~ (1<<MD_CHANGE_PENDING)) ||
-		test_bit(MD_RECOVERY_NEEDED, &mddev->recovery) ||
-		test_bit(MD_RECOVERY_DONE, &mddev->recovery) ||
-		(mddev->external == 0 && mddev->safemode == 1) ||
-		(mddev->safemode == 2 && ! atomic_read(&mddev->writes_pending)
-		 && !mddev->in_sync && mddev->recovery_cp == MaxSector)
-		))
-		return;
-
-	if (mddev_trylock(mddev)) {
-		int spares = 0;
-
-		if (mddev->ro) {
-			/* Only thing we do on a ro array is remove
-			 * failed devices.
-			 */
-			struct md_rdev *rdev;
-			rdev_for_each(rdev, mddev)
-				if (rdev->raid_disk >= 0 &&
-				    !test_bit(Blocked, &rdev->flags) &&
-				    test_bit(Faulty, &rdev->flags) &&
-				    atomic_read(&rdev->nr_pending)==0) {
-					if (mddev->pers->hot_remove_disk(
-						    mddev, rdev) == 0) {
-						sysfs_unlink_rdev(mddev, rdev);
-						rdev->raid_disk = -1;
-					}
-				}
-			clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
-			goto unlock;
-		}
-
-		if (!mddev->external) {
-			int did_change = 0;
-			spin_lock_irq(&mddev->write_lock);
-			if (mddev->safemode &&
-			    !atomic_read(&mddev->writes_pending) &&
-			    !mddev->in_sync &&
-			    mddev->recovery_cp == MaxSector) {
-				mddev->in_sync = 1;
-				did_change = 1;
-				set_bit(MD_CHANGE_CLEAN, &mddev->flags);
-			}
-			if (mddev->safemode == 1)
-				mddev->safemode = 0;
-			spin_unlock_irq(&mddev->write_lock);
-			if (did_change)
-				sysfs_notify_dirent_safe(mddev->sysfs_state);
-		}
-
-		if (mddev->flags)
-			md_update_sb(mddev, 0);
-
-		if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) &&
-		    !test_bit(MD_RECOVERY_DONE, &mddev->recovery)) {
-			/* resync/recovery still happening */
-			clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
-			goto unlock;
-		}
-		if (mddev->sync_thread) {
-			reap_sync_thread(mddev);
-			goto unlock;
-		}
-		/* Set RUNNING before clearing NEEDED to avoid
-		 * any transients in the value of "sync_action".
-		 */
-		set_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
-		/* Clear some bits that don't mean anything, but
-		 * might be left set
-		 */
-		clear_bit(MD_RECOVERY_INTR, &mddev->recovery);
-		clear_bit(MD_RECOVERY_DONE, &mddev->recovery);
-
-		if (!test_and_clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery) ||
-		    test_bit(MD_RECOVERY_FROZEN, &mddev->recovery))
-			goto unlock;
-		/* no recovery is running.
-		 * remove any failed drives, then
-		 * add spares if possible.
-		 * Spare are also removed and re-added, to allow
-		 * the personality to fail the re-add.
-		 */
-
-		if (mddev->reshape_position != MaxSector) {
-			if (mddev->pers->check_reshape == NULL ||
-			    mddev->pers->check_reshape(mddev) != 0)
-				/* Cannot proceed */
-				goto unlock;
-			set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
-			clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
-		} else if ((spares = remove_and_add_spares(mddev))) {
-			clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
-			clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
-			clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
-			set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
-		} else if (mddev->recovery_cp < MaxSector) {
-			set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
-			clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
-		} else if (!test_bit(MD_RECOVERY_SYNC, &mddev->recovery))
-			/* nothing to be done ... */
-			goto unlock;
-
-		if (mddev->pers->sync_request) {
-			if (spares && mddev->bitmap && ! mddev->bitmap->file) {
-				/* We are adding a device or devices to an array
-				 * which has the bitmap stored on all devices.
-				 * So make sure all bitmap pages get written
-				 */
-				bitmap_write_all(mddev->bitmap);
-			}
-			mddev->sync_thread = md_register_thread(md_do_sync,
-								mddev,
-								"resync");
-			if (!mddev->sync_thread) {
-				printk(KERN_ERR "%s: could not start resync"
-					" thread...\n", 
-					mdname(mddev));
-				/* leave the spares where they are, it shouldn't hurt */
-				clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
-				clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
-				clear_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
-				clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
-				clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
-			} else
-				md_wakeup_thread(mddev->sync_thread);
-			sysfs_notify_dirent_safe(mddev->sysfs_action);
-			md_new_event(mddev);
-		}
-	unlock:
-		if (!mddev->sync_thread) {
-			clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
-			if (test_and_clear_bit(MD_RECOVERY_RECOVER,
-					       &mddev->recovery))
-				if (mddev->sysfs_action)
-					sysfs_notify_dirent_safe(mddev->sysfs_action);
-		}
-		mddev_unlock(mddev);
-	}
-}
-
-void md_wait_for_blocked_rdev(struct md_rdev *rdev, struct mddev *mddev)
-{
-	sysfs_notify_dirent_safe(rdev->sysfs_state);
-	wait_event_timeout(rdev->blocked_wait,
-			   !test_bit(Blocked, &rdev->flags) &&
-			   !test_bit(BlockedBadBlocks, &rdev->flags),
-			   msecs_to_jiffies(5000));
-	rdev_dec_pending(rdev, mddev);
-}
-EXPORT_SYMBOL(md_wait_for_blocked_rdev);
-
-
-/* Bad block management.
- * We can record which blocks on each device are 'bad' and so just
- * fail those blocks, or that stripe, rather than the whole device.
- * Entries in the bad-block table are 64bits wide.  This comprises:
- * Length of bad-range, in sectors: 0-511 for lengths 1-512
- * Start of bad-range, sector offset, 54 bits (allows 8 exbibytes)
- *  A 'shift' can be set so that larger blocks are tracked and
- *  consequently larger devices can be covered.
- * 'Acknowledged' flag - 1 bit. - the most significant bit.
- *
- * Locking of the bad-block table uses a seqlock so md_is_badblock
- * might need to retry if it is very unlucky.
- * We will sometimes want to check for bad blocks in a bi_end_io function,
- * so we use the write_seqlock_irq variant.
- *
- * When looking for a bad block we specify a range and want to
- * know if any block in the range is bad.  So we binary-search
- * to the last range that starts at-or-before the given endpoint,
- * (or "before the sector after the target range")
- * then see if it ends after the given start.
- * We return
- *  0 if there are no known bad blocks in the range
- *  1 if there are known bad block which are all acknowledged
- * -1 if there are bad blocks which have not yet been acknowledged in metadata.
- * plus the start/length of the first bad section we overlap.
- */
-int md_is_badblock(struct badblocks *bb, sector_t s, int sectors,
-		   sector_t *first_bad, int *bad_sectors)
-{
-	int hi;
-	int lo = 0;
-	u64 *p = bb->page;
-	int rv = 0;
-	sector_t target = s + sectors;
-	unsigned seq;
-
-	if (bb->shift > 0) {
-		/* round the start down, and the end up */
-		s >>= bb->shift;
-		target += (1<<bb->shift) - 1;
-		target >>= bb->shift;
-		sectors = target - s;
-	}
-	/* 'target' is now the first block after the bad range */
-
-retry:
-	seq = read_seqbegin(&bb->lock);
-
-	hi = bb->count;
-
-	/* Binary search between lo and hi for 'target'
-	 * i.e. for the last range that starts before 'target'
-	 */
-	/* INVARIANT: ranges before 'lo' and at-or-after 'hi'
-	 * are known not to be the last range before target.
-	 * VARIANT: hi-lo is the number of possible
-	 * ranges, and decreases until it reaches 1
-	 */
-	while (hi - lo > 1) {
-		int mid = (lo + hi) / 2;
-		sector_t a = BB_OFFSET(p[mid]);
-		if (a < target)
-			/* This could still be the one, earlier ranges
-			 * could not. */
-			lo = mid;
-		else
-			/* This and later ranges are definitely out. */
-			hi = mid;
-	}
-	/* 'lo' might be the last that started before target, but 'hi' isn't */
-	if (hi > lo) {
-		/* need to check all range that end after 's' to see if
-		 * any are unacknowledged.
-		 */
-		while (lo >= 0 &&
-		       BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > s) {
-			if (BB_OFFSET(p[lo]) < target) {
-				/* starts before the end, and finishes after
-				 * the start, so they must overlap
-				 */
-				if (rv != -1 && BB_ACK(p[lo]))
-					rv = 1;
-				else
-					rv = -1;
-				*first_bad = BB_OFFSET(p[lo]);
-				*bad_sectors = BB_LEN(p[lo]);
-			}
-			lo--;
-		}
-	}
-
-	if (read_seqretry(&bb->lock, seq))
-		goto retry;
-
-	return rv;
-}
-EXPORT_SYMBOL_GPL(md_is_badblock);
-
-/*
- * Add a range of bad blocks to the table.
- * This might extend the table, or might contract it
- * if two adjacent ranges can be merged.
- * We binary-search to find the 'insertion' point, then
- * decide how best to handle it.
- */
-static int md_set_badblocks(struct badblocks *bb, sector_t s, int sectors,
-			    int acknowledged)
-{
-	u64 *p;
-	int lo, hi;
-	int rv = 1;
-
-	if (bb->shift < 0)
-		/* badblocks are disabled */
-		return 0;
-
-	if (bb->shift) {
-		/* round the start down, and the end up */
-		sector_t next = s + sectors;
-		s >>= bb->shift;
-		next += (1<<bb->shift) - 1;
-		next >>= bb->shift;
-		sectors = next - s;
-	}
-
-	write_seqlock_irq(&bb->lock);
-
-	p = bb->page;
-	lo = 0;
-	hi = bb->count;
-	/* Find the last range that starts at-or-before 's' */
-	while (hi - lo > 1) {
-		int mid = (lo + hi) / 2;
-		sector_t a = BB_OFFSET(p[mid]);
-		if (a <= s)
-			lo = mid;
-		else
-			hi = mid;
-	}
-	if (hi > lo && BB_OFFSET(p[lo]) > s)
-		hi = lo;
-
-	if (hi > lo) {
-		/* we found a range that might merge with the start
-		 * of our new range
-		 */
-		sector_t a = BB_OFFSET(p[lo]);
-		sector_t e = a + BB_LEN(p[lo]);
-		int ack = BB_ACK(p[lo]);
-		if (e >= s) {
-			/* Yes, we can merge with a previous range */
-			if (s == a && s + sectors >= e)
-				/* new range covers old */
-				ack = acknowledged;
-			else
-				ack = ack && acknowledged;
-
-			if (e < s + sectors)
-				e = s + sectors;
-			if (e - a <= BB_MAX_LEN) {
-				p[lo] = BB_MAKE(a, e-a, ack);
-				s = e;
-			} else {
-				/* does not all fit in one range,
-				 * make p[lo] maximal
-				 */
-				if (BB_LEN(p[lo]) != BB_MAX_LEN)
-					p[lo] = BB_MAKE(a, BB_MAX_LEN, ack);
-				s = a + BB_MAX_LEN;
-			}
-			sectors = e - s;
-		}
-	}
-	if (sectors && hi < bb->count) {
-		/* 'hi' points to the first range that starts after 's'.
-		 * Maybe we can merge with the start of that range */
-		sector_t a = BB_OFFSET(p[hi]);
-		sector_t e = a + BB_LEN(p[hi]);
-		int ack = BB_ACK(p[hi]);
-		if (a <= s + sectors) {
-			/* merging is possible */
-			if (e <= s + sectors) {
-				/* full overlap */
-				e = s + sectors;
-				ack = acknowledged;
-			} else
-				ack = ack && acknowledged;
-
-			a = s;
-			if (e - a <= BB_MAX_LEN) {
-				p[hi] = BB_MAKE(a, e-a, ack);
-				s = e;
-			} else {
-				p[hi] = BB_MAKE(a, BB_MAX_LEN, ack);
-				s = a + BB_MAX_LEN;
-			}
-			sectors = e - s;
-			lo = hi;
-			hi++;
-		}
-	}
-	if (sectors == 0 && hi < bb->count) {
-		/* we might be able to combine lo and hi */
-		/* Note: 's' is at the end of 'lo' */
-		sector_t a = BB_OFFSET(p[hi]);
-		int lolen = BB_LEN(p[lo]);
-		int hilen = BB_LEN(p[hi]);
-		int newlen = lolen + hilen - (s - a);
-		if (s >= a && newlen < BB_MAX_LEN) {
-			/* yes, we can combine them */
-			int ack = BB_ACK(p[lo]) && BB_ACK(p[hi]);
-			p[lo] = BB_MAKE(BB_OFFSET(p[lo]), newlen, ack);
-			memmove(p + hi, p + hi + 1,
-				(bb->count - hi - 1) * 8);
-			bb->count--;
-		}
-	}
-	while (sectors) {
-		/* didn't merge (it all).
-		 * Need to add a range just before 'hi' */
-		if (bb->count >= MD_MAX_BADBLOCKS) {
-			/* No room for more */
-			rv = 0;
-			break;
-		} else {
-			int this_sectors = sectors;
-			memmove(p + hi + 1, p + hi,
-				(bb->count - hi) * 8);
-			bb->count++;
-
-			if (this_sectors > BB_MAX_LEN)
-				this_sectors = BB_MAX_LEN;
-			p[hi] = BB_MAKE(s, this_sectors, acknowledged);
-			sectors -= this_sectors;
-			s += this_sectors;
-		}
-	}
-
-	bb->changed = 1;
-	if (!acknowledged)
-		bb->unacked_exist = 1;
-	write_sequnlock_irq(&bb->lock);
-
-	return rv;
-}
-
-int rdev_set_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
-		       int acknowledged)
-{
-	int rv = md_set_badblocks(&rdev->badblocks,
-				  s + rdev->data_offset, sectors, acknowledged);
-	if (rv) {
-		/* Make sure they get written out promptly */
-		sysfs_notify_dirent_safe(rdev->sysfs_state);
-		set_bit(MD_CHANGE_CLEAN, &rdev->mddev->flags);
-		md_wakeup_thread(rdev->mddev->thread);
-	}
-	return rv;
-}
-EXPORT_SYMBOL_GPL(rdev_set_badblocks);
-
-/*
- * Remove a range of bad blocks from the table.
- * This may involve extending the table if we spilt a region,
- * but it must not fail.  So if the table becomes full, we just
- * drop the remove request.
- */
-static int md_clear_badblocks(struct badblocks *bb, sector_t s, int sectors)
-{
-	u64 *p;
-	int lo, hi;
-	sector_t target = s + sectors;
-	int rv = 0;
-
-	if (bb->shift > 0) {
-		/* When clearing we round the start up and the end down.
-		 * This should not matter as the shift should align with
-		 * the block size and no rounding should ever be needed.
-		 * However it is better the think a block is bad when it
-		 * isn't than to think a block is not bad when it is.
-		 */
-		s += (1<<bb->shift) - 1;
-		s >>= bb->shift;
-		target >>= bb->shift;
-		sectors = target - s;
-	}
-
-	write_seqlock_irq(&bb->lock);
-
-	p = bb->page;
-	lo = 0;
-	hi = bb->count;
-	/* Find the last range that starts before 'target' */
-	while (hi - lo > 1) {
-		int mid = (lo + hi) / 2;
-		sector_t a = BB_OFFSET(p[mid]);
-		if (a < target)
-			lo = mid;
-		else
-			hi = mid;
-	}
-	if (hi > lo) {
-		/* p[lo] is the last range that could overlap the
-		 * current range.  Earlier ranges could also overlap,
-		 * but only this one can overlap the end of the range.
-		 */
-		if (BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > target) {
-			/* Partial overlap, leave the tail of this range */
-			int ack = BB_ACK(p[lo]);
-			sector_t a = BB_OFFSET(p[lo]);
-			sector_t end = a + BB_LEN(p[lo]);
-
-			if (a < s) {
-				/* we need to split this range */
-				if (bb->count >= MD_MAX_BADBLOCKS) {
-					rv = 0;
-					goto out;
-				}
-				memmove(p+lo+1, p+lo, (bb->count - lo) * 8);
-				bb->count++;
-				p[lo] = BB_MAKE(a, s-a, ack);
-				lo++;
-			}
-			p[lo] = BB_MAKE(target, end - target, ack);
-			/* there is no longer an overlap */
-			hi = lo;
-			lo--;
-		}
-		while (lo >= 0 &&
-		       BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > s) {
-			/* This range does overlap */
-			if (BB_OFFSET(p[lo]) < s) {
-				/* Keep the early parts of this range. */
-				int ack = BB_ACK(p[lo]);
-				sector_t start = BB_OFFSET(p[lo]);
-				p[lo] = BB_MAKE(start, s - start, ack);
-				/* now low doesn't overlap, so.. */
-				break;
-			}
-			lo--;
-		}
-		/* 'lo' is strictly before, 'hi' is strictly after,
-		 * anything between needs to be discarded
-		 */
-		if (hi - lo > 1) {
-			memmove(p+lo+1, p+hi, (bb->count - hi) * 8);
-			bb->count -= (hi - lo - 1);
-		}
-	}
-
-	bb->changed = 1;
-out:
-	write_sequnlock_irq(&bb->lock);
-	return rv;
-}
-
-int rdev_clear_badblocks(struct md_rdev *rdev, sector_t s, int sectors)
-{
-	return md_clear_badblocks(&rdev->badblocks,
-				  s + rdev->data_offset,
-				  sectors);
-}
-EXPORT_SYMBOL_GPL(rdev_clear_badblocks);
-
-/*
- * Acknowledge all bad blocks in a list.
- * This only succeeds if ->changed is clear.  It is used by
- * in-kernel metadata updates
- */
-void md_ack_all_badblocks(struct badblocks *bb)
-{
-	if (bb->page == NULL || bb->changed)
-		/* no point even trying */
-		return;
-	write_seqlock_irq(&bb->lock);
-
-	if (bb->changed == 0 && bb->unacked_exist) {
-		u64 *p = bb->page;
-		int i;
-		for (i = 0; i < bb->count ; i++) {
-			if (!BB_ACK(p[i])) {
-				sector_t start = BB_OFFSET(p[i]);
-				int len = BB_LEN(p[i]);
-				p[i] = BB_MAKE(start, len, 1);
-			}
-		}
-		bb->unacked_exist = 0;
-	}
-	write_sequnlock_irq(&bb->lock);
-}
-EXPORT_SYMBOL_GPL(md_ack_all_badblocks);
-
-/* sysfs access to bad-blocks list.
- * We present two files.
- * 'bad-blocks' lists sector numbers and lengths of ranges that
- *    are recorded as bad.  The list is truncated to fit within
- *    the one-page limit of sysfs.
- *    Writing "sector length" to this file adds an acknowledged
- *    bad block list.
- * 'unacknowledged-bad-blocks' lists bad blocks that have not yet
- *    been acknowledged.  Writing to this file adds bad blocks
- *    without acknowledging them.  This is largely for testing.
- */
-
-static ssize_t
-badblocks_show(struct badblocks *bb, char *page, int unack)
-{
-	size_t len;
-	int i;
-	u64 *p = bb->page;
-	unsigned seq;
-
-	if (bb->shift < 0)
-		return 0;
-
-retry:
-	seq = read_seqbegin(&bb->lock);
-
-	len = 0;
-	i = 0;
-
-	while (len < PAGE_SIZE && i < bb->count) {
-		sector_t s = BB_OFFSET(p[i]);
-		unsigned int length = BB_LEN(p[i]);
-		int ack = BB_ACK(p[i]);
-		i++;
-
-		if (unack && ack)
-			continue;
-
-		len += snprintf(page+len, PAGE_SIZE-len, "%llu %u\n",
-				(unsigned long long)s << bb->shift,
-				length << bb->shift);
-	}
-	if (unack && len == 0)
-		bb->unacked_exist = 0;
-
-	if (read_seqretry(&bb->lock, seq))
-		goto retry;
-
-	return len;
-}
-
-#define DO_DEBUG 1
-
-static ssize_t
-badblocks_store(struct badblocks *bb, const char *page, size_t len, int unack)
-{
-	unsigned long long sector;
-	int length;
-	char newline;
-#ifdef DO_DEBUG
-	/* Allow clearing via sysfs *only* for testing/debugging.
-	 * Normally only a successful write may clear a badblock
-	 */
-	int clear = 0;
-	if (page[0] == '-') {
-		clear = 1;
-		page++;
-	}
-#endif /* DO_DEBUG */
-
-	switch (sscanf(page, "%llu %d%c", &sector, &length, &newline)) {
-	case 3:
-		if (newline != '\n')
-			return -EINVAL;
-	case 2:
-		if (length <= 0)
-			return -EINVAL;
-		break;
-	default:
-		return -EINVAL;
-	}
-
-#ifdef DO_DEBUG
-	if (clear) {
-		md_clear_badblocks(bb, sector, length);
-		return len;
-	}
-#endif /* DO_DEBUG */
-	if (md_set_badblocks(bb, sector, length, !unack))
-		return len;
-	else
-		return -ENOSPC;
-}
-
-static int md_notify_reboot(struct notifier_block *this,
-			    unsigned long code, void *x)
-{
-	struct list_head *tmp;
-	struct mddev *mddev;
-	int need_delay = 0;
-
-	for_each_mddev(mddev, tmp) {
-		if (mddev_trylock(mddev)) {
-			if (mddev->pers)
-				__md_stop_writes(mddev);
-			mddev->safemode = 2;
-			mddev_unlock(mddev);
-		}
-		need_delay = 1;
-	}
-	/*
-	 * certain more exotic SCSI devices are known to be
-	 * volatile wrt too early system reboots. While the
-	 * right place to handle this issue is the given
-	 * driver, we do want to have a safe RAID driver ...
-	 */
-	if (need_delay)
-		mdelay(1000*1);
-
-	return NOTIFY_DONE;
-}
-
-static struct notifier_block md_notifier = {
-	.notifier_call	= md_notify_reboot,
-	.next		= NULL,
-	.priority	= INT_MAX, /* before any real devices */
-};
-
-static void md_geninit(void)
-{
-	pr_debug("md: sizeof(mdp_super_t) = %d\n", (int)sizeof(mdp_super_t));
-
-	proc_create("mdstat", S_IRUGO, NULL, &md_seq_fops);
-}
-
-static int __init md_init(void)
-{
-	int ret = -ENOMEM;
-
-	md_wq = alloc_workqueue("md", WQ_MEM_RECLAIM, 0);
-	if (!md_wq)
-		goto err_wq;
-
-	md_misc_wq = alloc_workqueue("md_misc", 0, 0);
-	if (!md_misc_wq)
-		goto err_misc_wq;
-
-	if ((ret = register_blkdev(MD_MAJOR, "md")) < 0)
-		goto err_md;
-
-	if ((ret = register_blkdev(0, "mdp")) < 0)
-		goto err_mdp;
-	mdp_major = ret;
-
-	blk_register_region(MKDEV(MD_MAJOR, 0), 1UL<<MINORBITS, THIS_MODULE,
-			    md_probe, NULL, NULL);
-	blk_register_region(MKDEV(mdp_major, 0), 1UL<<MINORBITS, THIS_MODULE,
-			    md_probe, NULL, NULL);
-
-	register_reboot_notifier(&md_notifier);
-	raid_table_header = register_sysctl_table(raid_root_table);
-
-	md_geninit();
-	return 0;
-
-err_mdp:
-	unregister_blkdev(MD_MAJOR, "md");
-err_md:
-	destroy_workqueue(md_misc_wq);
-err_misc_wq:
-	destroy_workqueue(md_wq);
-err_wq:
-	return ret;
-}
-
-#ifndef MODULE
-
-/*
- * Searches all registered partitions for autorun RAID arrays
- * at boot time.
- */
-
-static LIST_HEAD(all_detected_devices);
-struct detected_devices_node {
-	struct list_head list;
-	dev_t dev;
-};
-
-void md_autodetect_dev(dev_t dev)
-{
-	struct detected_devices_node *node_detected_dev;
-
-	node_detected_dev = kzalloc(sizeof(*node_detected_dev), GFP_KERNEL);
-	if (node_detected_dev) {
-		node_detected_dev->dev = dev;
-		list_add_tail(&node_detected_dev->list, &all_detected_devices);
-	} else {
-		printk(KERN_CRIT "md: md_autodetect_dev: kzalloc failed"
-			", skipping dev(%d,%d)\n", MAJOR(dev), MINOR(dev));
-	}
-}
-
-
-static void autostart_arrays(int part)
-{
-	struct md_rdev *rdev;
-	struct detected_devices_node *node_detected_dev;
-	dev_t dev;
-	int i_scanned, i_passed;
-
-	i_scanned = 0;
-	i_passed = 0;
-
-	printk(KERN_INFO "md: Autodetecting RAID arrays.\n");
-
-	while (!list_empty(&all_detected_devices) && i_scanned < INT_MAX) {
-		i_scanned++;
-		node_detected_dev = list_entry(all_detected_devices.next,
-					struct detected_devices_node, list);
-		list_del(&node_detected_dev->list);
-		dev = node_detected_dev->dev;
-		kfree(node_detected_dev);
-		rdev = md_import_device(dev,0, 90);
-		if (IS_ERR(rdev))
-			continue;
-
-		if (test_bit(Faulty, &rdev->flags)) {
-			MD_BUG();
-			continue;
-		}
-		set_bit(AutoDetected, &rdev->flags);
-		list_add(&rdev->same_set, &pending_raid_disks);
-		i_passed++;
-	}
-
-	printk(KERN_INFO "md: Scanned %d and added %d devices.\n",
-						i_scanned, i_passed);
-
-	autorun_devices(part);
-}
-
-#endif /* !MODULE */
-
-static __exit void md_exit(void)
-{
-	struct mddev *mddev;
-	struct list_head *tmp;
-
-	blk_unregister_region(MKDEV(MD_MAJOR,0), 1U << MINORBITS);
-	blk_unregister_region(MKDEV(mdp_major,0), 1U << MINORBITS);
-
-	unregister_blkdev(MD_MAJOR,"md");
-	unregister_blkdev(mdp_major, "mdp");
-	unregister_reboot_notifier(&md_notifier);
-	unregister_sysctl_table(raid_table_header);
-	remove_proc_entry("mdstat", NULL);
-	for_each_mddev(mddev, tmp) {
-		export_array(mddev);
-		mddev->hold_active = 0;
-	}
-	destroy_workqueue(md_misc_wq);
-	destroy_workqueue(md_wq);
-}
-
-subsys_initcall(md_init);
-module_exit(md_exit)
-
-static int get_ro(char *buffer, struct kernel_param *kp)
-{
-	return sprintf(buffer, "%d", start_readonly);
-}
-static int set_ro(const char *val, struct kernel_param *kp)
-{
-	char *e;
-	int num = simple_strtoul(val, &e, 10);
-	if (*val && (*e == '\0' || *e == '\n')) {
-		start_readonly = num;
-		return 0;
-	}
-	return -EINVAL;
-}
-
-module_param_call(start_ro, set_ro, get_ro, NULL, S_IRUSR|S_IWUSR);
-module_param(start_dirty_degraded, int, S_IRUGO|S_IWUSR);
-
-module_param_call(new_array, add_named_array, NULL, NULL, S_IWUSR);
-
-EXPORT_SYMBOL(register_md_personality);
-EXPORT_SYMBOL(unregister_md_personality);
-EXPORT_SYMBOL(md_error);
-EXPORT_SYMBOL(md_done_sync);
-EXPORT_SYMBOL(md_write_start);
-EXPORT_SYMBOL(md_write_end);
-EXPORT_SYMBOL(md_register_thread);
-EXPORT_SYMBOL(md_unregister_thread);
-EXPORT_SYMBOL(md_wakeup_thread);
-EXPORT_SYMBOL(md_check_recovery);
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("MD RAID framework");
-MODULE_ALIAS("md");
-MODULE_ALIAS_BLOCKDEV_MAJOR(MD_MAJOR);
diff --git a/ANDROID_3.4.5/drivers/md/md.h b/ANDROID_3.4.5/drivers/md/md.h
deleted file mode 100644
index 1c2063cc..00000000
--- a/ANDROID_3.4.5/drivers/md/md.h
+++ /dev/null
@@ -1,627 +0,0 @@
-/*
-   md.h : kernel internal structure of the Linux MD driver
-          Copyright (C) 1996-98 Ingo Molnar, Gadi Oxman
-	  
-   This program is free software; you can redistribute it and/or modify
-   it under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 2, or (at your option)
-   any later version.
-   
-   You should have received a copy of the GNU General Public License
-   (for example /usr/src/linux/COPYING); if not, write to the Free
-   Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.  
-*/
-
-#ifndef _MD_MD_H
-#define _MD_MD_H
-
-#include <linux/blkdev.h>
-#include <linux/kobject.h>
-#include <linux/list.h>
-#include <linux/mm.h>
-#include <linux/mutex.h>
-#include <linux/timer.h>
-#include <linux/wait.h>
-#include <linux/workqueue.h>
-
-#define MaxSector (~(sector_t)0)
-
-/* Bad block numbers are stored sorted in a single page.
- * 64bits is used for each block or extent.
- * 54 bits are sector number, 9 bits are extent size,
- * 1 bit is an 'acknowledged' flag.
- */
-#define MD_MAX_BADBLOCKS	(PAGE_SIZE/8)
-
-/*
- * MD's 'extended' device
- */
-struct md_rdev {
-	struct list_head same_set;	/* RAID devices within the same set */
-
-	sector_t sectors;		/* Device size (in 512bytes sectors) */
-	struct mddev *mddev;		/* RAID array if running */
-	int last_events;		/* IO event timestamp */
-
-	/*
-	 * If meta_bdev is non-NULL, it means that a separate device is
-	 * being used to store the metadata (superblock/bitmap) which
-	 * would otherwise be contained on the same device as the data (bdev).
-	 */
-	struct block_device *meta_bdev;
-	struct block_device *bdev;	/* block device handle */
-
-	struct page	*sb_page, *bb_page;
-	int		sb_loaded;
-	__u64		sb_events;
-	sector_t	data_offset;	/* start of data in array */
-	sector_t 	sb_start;	/* offset of the super block (in 512byte sectors) */
-	int		sb_size;	/* bytes in the superblock */
-	int		preferred_minor;	/* autorun support */
-
-	struct kobject	kobj;
-
-	/* A device can be in one of three states based on two flags:
-	 * Not working:   faulty==1 in_sync==0
-	 * Fully working: faulty==0 in_sync==1
-	 * Working, but not
-	 * in sync with array
-	 *                faulty==0 in_sync==0
-	 *
-	 * It can never have faulty==1, in_sync==1
-	 * This reduces the burden of testing multiple flags in many cases
-	 */
-
-	unsigned long	flags;	/* bit set of 'enum flag_bits' bits. */
-	wait_queue_head_t blocked_wait;
-
-	int desc_nr;			/* descriptor index in the superblock */
-	int raid_disk;			/* role of device in array */
-	int new_raid_disk;		/* role that the device will have in
-					 * the array after a level-change completes.
-					 */
-	int saved_raid_disk;		/* role that device used to have in the
-					 * array and could again if we did a partial
-					 * resync from the bitmap
-					 */
-	sector_t	recovery_offset;/* If this device has been partially
-					 * recovered, this is where we were
-					 * up to.
-					 */
-
-	atomic_t	nr_pending;	/* number of pending requests.
-					 * only maintained for arrays that
-					 * support hot removal
-					 */
-	atomic_t	read_errors;	/* number of consecutive read errors that
-					 * we have tried to ignore.
-					 */
-	struct timespec last_read_error;	/* monotonic time since our
-						 * last read error
-						 */
-	atomic_t	corrected_errors; /* number of corrected read errors,
-					   * for reporting to userspace and storing
-					   * in superblock.
-					   */
-	struct work_struct del_work;	/* used for delayed sysfs removal */
-
-	struct sysfs_dirent *sysfs_state; /* handle for 'state'
-					   * sysfs entry */
-
-	struct badblocks {
-		int	count;		/* count of bad blocks */
-		int	unacked_exist;	/* there probably are unacknowledged
-					 * bad blocks.  This is only cleared
-					 * when a read discovers none
-					 */
-		int	shift;		/* shift from sectors to block size
-					 * a -ve shift means badblocks are
-					 * disabled.*/
-		u64	*page;		/* badblock list */
-		int	changed;
-		seqlock_t lock;
-
-		sector_t sector;
-		sector_t size;		/* in sectors */
-	} badblocks;
-};
-enum flag_bits {
-	Faulty,			/* device is known to have a fault */
-	In_sync,		/* device is in_sync with rest of array */
-	Unmerged,		/* device is being added to array and should
-				 * be considerred for bvec_merge_fn but not
-				 * yet for actual IO
-				 */
-	WriteMostly,		/* Avoid reading if at all possible */
-	AutoDetected,		/* added by auto-detect */
-	Blocked,		/* An error occurred but has not yet
-				 * been acknowledged by the metadata
-				 * handler, so don't allow writes
-				 * until it is cleared */
-	WriteErrorSeen,		/* A write error has been seen on this
-				 * device
-				 */
-	FaultRecorded,		/* Intermediate state for clearing
-				 * Blocked.  The Fault is/will-be
-				 * recorded in the metadata, but that
-				 * metadata hasn't been stored safely
-				 * on disk yet.
-				 */
-	BlockedBadBlocks,	/* A writer is blocked because they
-				 * found an unacknowledged bad-block.
-				 * This can safely be cleared at any
-				 * time, and the writer will re-check.
-				 * It may be set at any time, and at
-				 * worst the writer will timeout and
-				 * re-check.  So setting it as
-				 * accurately as possible is good, but
-				 * not absolutely critical.
-				 */
-	WantReplacement,	/* This device is a candidate to be
-				 * hot-replaced, either because it has
-				 * reported some faults, or because
-				 * of explicit request.
-				 */
-	Replacement,		/* This device is a replacement for
-				 * a want_replacement device with same
-				 * raid_disk number.
-				 */
-};
-
-#define BB_LEN_MASK	(0x00000000000001FFULL)
-#define BB_OFFSET_MASK	(0x7FFFFFFFFFFFFE00ULL)
-#define BB_ACK_MASK	(0x8000000000000000ULL)
-#define BB_MAX_LEN	512
-#define BB_OFFSET(x)	(((x) & BB_OFFSET_MASK) >> 9)
-#define BB_LEN(x)	(((x) & BB_LEN_MASK) + 1)
-#define BB_ACK(x)	(!!((x) & BB_ACK_MASK))
-#define BB_MAKE(a, l, ack) (((a)<<9) | ((l)-1) | ((u64)(!!(ack)) << 63))
-
-extern int md_is_badblock(struct badblocks *bb, sector_t s, int sectors,
-			  sector_t *first_bad, int *bad_sectors);
-static inline int is_badblock(struct md_rdev *rdev, sector_t s, int sectors,
-			      sector_t *first_bad, int *bad_sectors)
-{
-	if (unlikely(rdev->badblocks.count)) {
-		int rv = md_is_badblock(&rdev->badblocks, rdev->data_offset + s,
-					sectors,
-					first_bad, bad_sectors);
-		if (rv)
-			*first_bad -= rdev->data_offset;
-		return rv;
-	}
-	return 0;
-}
-extern int rdev_set_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
-			      int acknowledged);
-extern int rdev_clear_badblocks(struct md_rdev *rdev, sector_t s, int sectors);
-extern void md_ack_all_badblocks(struct badblocks *bb);
-
-struct mddev {
-	void				*private;
-	struct md_personality		*pers;
-	dev_t				unit;
-	int				md_minor;
-	struct list_head 		disks;
-	unsigned long			flags;
-#define MD_CHANGE_DEVS	0	/* Some device status has changed */
-#define MD_CHANGE_CLEAN 1	/* transition to or from 'clean' */
-#define MD_CHANGE_PENDING 2	/* switch from 'clean' to 'active' in progress */
-#define MD_ARRAY_FIRST_USE 3    /* First use of array, needs initialization */
-
-	int				suspended;
-	atomic_t			active_io;
-	int				ro;
-	int				sysfs_active; /* set when sysfs deletes
-						       * are happening, so run/
-						       * takeover/stop are not safe
-						       */
-	int				ready; /* See when safe to pass 
-						* IO requests down */
-	struct gendisk			*gendisk;
-
-	struct kobject			kobj;
-	int				hold_active;
-#define	UNTIL_IOCTL	1
-#define	UNTIL_STOP	2
-
-	/* Superblock information */
-	int				major_version,
-					minor_version,
-					patch_version;
-	int				persistent;
-	int 				external;	/* metadata is
-							 * managed externally */
-	char				metadata_type[17]; /* externally set*/
-	int				chunk_sectors;
-	time_t				ctime, utime;
-	int				level, layout;
-	char				clevel[16];
-	int				raid_disks;
-	int				max_disks;
-	sector_t			dev_sectors; 	/* used size of
-							 * component devices */
-	sector_t			array_sectors; /* exported array size */
-	int				external_size; /* size managed
-							* externally */
-	__u64				events;
-	/* If the last 'event' was simply a clean->dirty transition, and
-	 * we didn't write it to the spares, then it is safe and simple
-	 * to just decrement the event count on a dirty->clean transition.
-	 * So we record that possibility here.
-	 */
-	int				can_decrease_events;
-
-	char				uuid[16];
-
-	/* If the array is being reshaped, we need to record the
-	 * new shape and an indication of where we are up to.
-	 * This is written to the superblock.
-	 * If reshape_position is MaxSector, then no reshape is happening (yet).
-	 */
-	sector_t			reshape_position;
-	int				delta_disks, new_level, new_layout;
-	int				new_chunk_sectors;
-
-	atomic_t			plug_cnt;	/* If device is expecting
-							 * more bios soon.
-							 */
-	struct md_thread		*thread;	/* management thread */
-	struct md_thread		*sync_thread;	/* doing resync or reconstruct */
-	sector_t			curr_resync;	/* last block scheduled */
-	/* As resync requests can complete out of order, we cannot easily track
-	 * how much resync has been completed.  So we occasionally pause until
-	 * everything completes, then set curr_resync_completed to curr_resync.
-	 * As such it may be well behind the real resync mark, but it is a value
-	 * we are certain of.
-	 */
-	sector_t			curr_resync_completed;
-	unsigned long			resync_mark;	/* a recent timestamp */
-	sector_t			resync_mark_cnt;/* blocks written at resync_mark */
-	sector_t			curr_mark_cnt; /* blocks scheduled now */
-
-	sector_t			resync_max_sectors; /* may be set by personality */
-
-	sector_t			resync_mismatches; /* count of sectors where
-							    * parity/replica mismatch found
-							    */
-
-	/* allow user-space to request suspension of IO to regions of the array */
-	sector_t			suspend_lo;
-	sector_t			suspend_hi;
-	/* if zero, use the system-wide default */
-	int				sync_speed_min;
-	int				sync_speed_max;
-
-	/* resync even though the same disks are shared among md-devices */
-	int				parallel_resync;
-
-	int				ok_start_degraded;
-	/* recovery/resync flags 
-	 * NEEDED:   we might need to start a resync/recover
-	 * RUNNING:  a thread is running, or about to be started
-	 * SYNC:     actually doing a resync, not a recovery
-	 * RECOVER:  doing recovery, or need to try it.
-	 * INTR:     resync needs to be aborted for some reason
-	 * DONE:     thread is done and is waiting to be reaped
-	 * REQUEST:  user-space has requested a sync (used with SYNC)
-	 * CHECK:    user-space request for check-only, no repair
-	 * RESHAPE:  A reshape is happening
-	 *
-	 * If neither SYNC or RESHAPE are set, then it is a recovery.
-	 */
-#define	MD_RECOVERY_RUNNING	0
-#define	MD_RECOVERY_SYNC	1
-#define	MD_RECOVERY_RECOVER	2
-#define	MD_RECOVERY_INTR	3
-#define	MD_RECOVERY_DONE	4
-#define	MD_RECOVERY_NEEDED	5
-#define	MD_RECOVERY_REQUESTED	6
-#define	MD_RECOVERY_CHECK	7
-#define MD_RECOVERY_RESHAPE	8
-#define	MD_RECOVERY_FROZEN	9
-
-	unsigned long			recovery;
-	/* If a RAID personality determines that recovery (of a particular
-	 * device) will fail due to a read error on the source device, it
-	 * takes a copy of this number and does not attempt recovery again
-	 * until this number changes.
-	 */
-	int				recovery_disabled;
-
-	int				in_sync;	/* know to not need resync */
-	/* 'open_mutex' avoids races between 'md_open' and 'do_md_stop', so
-	 * that we are never stopping an array while it is open.
-	 * 'reconfig_mutex' protects all other reconfiguration.
-	 * These locks are separate due to conflicting interactions
-	 * with bdev->bd_mutex.
-	 * Lock ordering is:
-	 *  reconfig_mutex -> bd_mutex : e.g. do_md_run -> revalidate_disk
-	 *  bd_mutex -> open_mutex:  e.g. __blkdev_get -> md_open
-	 */
-	struct mutex			open_mutex;
-	struct mutex			reconfig_mutex;
-	atomic_t			active;		/* general refcount */
-	atomic_t			openers;	/* number of active opens */
-
-	int				changed;	/* True if we might need to
-							 * reread partition info */
-	int				degraded;	/* whether md should consider
-							 * adding a spare
-							 */
-	int				merge_check_needed; /* at least one
-							     * member device
-							     * has a
-							     * merge_bvec_fn */
-
-	atomic_t			recovery_active; /* blocks scheduled, but not written */
-	wait_queue_head_t		recovery_wait;
-	sector_t			recovery_cp;
-	sector_t			resync_min;	/* user requested sync
-							 * starts here */
-	sector_t			resync_max;	/* resync should pause
-							 * when it gets here */
-
-	struct sysfs_dirent		*sysfs_state;	/* handle for 'array_state'
-							 * file in sysfs.
-							 */
-	struct sysfs_dirent		*sysfs_action;  /* handle for 'sync_action' */
-
-	struct work_struct del_work;	/* used for delayed sysfs removal */
-
-	spinlock_t			write_lock;
-	wait_queue_head_t		sb_wait;	/* for waiting on superblock updates */
-	atomic_t			pending_writes;	/* number of active superblock writes */
-
-	unsigned int			safemode;	/* if set, update "clean" superblock
-							 * when no writes pending.
-							 */ 
-	unsigned int			safemode_delay;
-	struct timer_list		safemode_timer;
-	atomic_t			writes_pending; 
-	struct request_queue		*queue;	/* for plugging ... */
-
-	struct bitmap                   *bitmap; /* the bitmap for the device */
-	struct {
-		struct file		*file; /* the bitmap file */
-		loff_t			offset; /* offset from superblock of
-						 * start of bitmap. May be
-						 * negative, but not '0'
-						 * For external metadata, offset
-						 * from start of device. 
-						 */
-		loff_t			default_offset; /* this is the offset to use when
-							 * hot-adding a bitmap.  It should
-							 * eventually be settable by sysfs.
-							 */
-		struct mutex		mutex;
-		unsigned long		chunksize;
-		unsigned long		daemon_sleep; /* how many jiffies between updates? */
-		unsigned long		max_write_behind; /* write-behind mode */
-		int			external;
-	} bitmap_info;
-
-	atomic_t 			max_corr_read_errors; /* max read retries */
-	struct list_head		all_mddevs;
-
-	struct attribute_group		*to_remove;
-
-	struct bio_set			*bio_set;
-
-	/* Generic flush handling.
-	 * The last to finish preflush schedules a worker to submit
-	 * the rest of the request (without the REQ_FLUSH flag).
-	 */
-	struct bio *flush_bio;
-	atomic_t flush_pending;
-	struct work_struct flush_work;
-	struct work_struct event_work;	/* used by dm to report failure event */
-	void (*sync_super)(struct mddev *mddev, struct md_rdev *rdev);
-};
-
-
-static inline void rdev_dec_pending(struct md_rdev *rdev, struct mddev *mddev)
-{
-	int faulty = test_bit(Faulty, &rdev->flags);
-	if (atomic_dec_and_test(&rdev->nr_pending) && faulty)
-		set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
-}
-
-static inline void md_sync_acct(struct block_device *bdev, unsigned long nr_sectors)
-{
-        atomic_add(nr_sectors, &bdev->bd_contains->bd_disk->sync_io);
-}
-
-struct md_personality
-{
-	char *name;
-	int level;
-	struct list_head list;
-	struct module *owner;
-	void (*make_request)(struct mddev *mddev, struct bio *bio);
-	int (*run)(struct mddev *mddev);
-	int (*stop)(struct mddev *mddev);
-	void (*status)(struct seq_file *seq, struct mddev *mddev);
-	/* error_handler must set ->faulty and clear ->in_sync
-	 * if appropriate, and should abort recovery if needed 
-	 */
-	void (*error_handler)(struct mddev *mddev, struct md_rdev *rdev);
-	int (*hot_add_disk) (struct mddev *mddev, struct md_rdev *rdev);
-	int (*hot_remove_disk) (struct mddev *mddev, struct md_rdev *rdev);
-	int (*spare_active) (struct mddev *mddev);
-	sector_t (*sync_request)(struct mddev *mddev, sector_t sector_nr, int *skipped, int go_faster);
-	int (*resize) (struct mddev *mddev, sector_t sectors);
-	sector_t (*size) (struct mddev *mddev, sector_t sectors, int raid_disks);
-	int (*check_reshape) (struct mddev *mddev);
-	int (*start_reshape) (struct mddev *mddev);
-	void (*finish_reshape) (struct mddev *mddev);
-	/* quiesce moves between quiescence states
-	 * 0 - fully active
-	 * 1 - no new requests allowed
-	 * others - reserved
-	 */
-	void (*quiesce) (struct mddev *mddev, int state);
-	/* takeover is used to transition an array from one
-	 * personality to another.  The new personality must be able
-	 * to handle the data in the current layout.
-	 * e.g. 2drive raid1 -> 2drive raid5
-	 *      ndrive raid5 -> degraded n+1drive raid6 with special layout
-	 * If the takeover succeeds, a new 'private' structure is returned.
-	 * This needs to be installed and then ->run used to activate the
-	 * array.
-	 */
-	void *(*takeover) (struct mddev *mddev);
-};
-
-
-struct md_sysfs_entry {
-	struct attribute attr;
-	ssize_t (*show)(struct mddev *, char *);
-	ssize_t (*store)(struct mddev *, const char *, size_t);
-};
-extern struct attribute_group md_bitmap_group;
-
-static inline struct sysfs_dirent *sysfs_get_dirent_safe(struct sysfs_dirent *sd, char *name)
-{
-	if (sd)
-		return sysfs_get_dirent(sd, NULL, name);
-	return sd;
-}
-static inline void sysfs_notify_dirent_safe(struct sysfs_dirent *sd)
-{
-	if (sd)
-		sysfs_notify_dirent(sd);
-}
-
-static inline char * mdname (struct mddev * mddev)
-{
-	return mddev->gendisk ? mddev->gendisk->disk_name : "mdX";
-}
-
-static inline int sysfs_link_rdev(struct mddev *mddev, struct md_rdev *rdev)
-{
-	char nm[20];
-	if (!test_bit(Replacement, &rdev->flags)) {
-		sprintf(nm, "rd%d", rdev->raid_disk);
-		return sysfs_create_link(&mddev->kobj, &rdev->kobj, nm);
-	} else
-		return 0;
-}
-
-static inline void sysfs_unlink_rdev(struct mddev *mddev, struct md_rdev *rdev)
-{
-	char nm[20];
-	if (!test_bit(Replacement, &rdev->flags)) {
-		sprintf(nm, "rd%d", rdev->raid_disk);
-		sysfs_remove_link(&mddev->kobj, nm);
-	}
-}
-
-/*
- * iterates through some rdev ringlist. It's safe to remove the
- * current 'rdev'. Dont touch 'tmp' though.
- */
-#define rdev_for_each_list(rdev, tmp, head)				\
-	list_for_each_entry_safe(rdev, tmp, head, same_set)
-
-/*
- * iterates through the 'same array disks' ringlist
- */
-#define rdev_for_each(rdev, mddev)				\
-	list_for_each_entry(rdev, &((mddev)->disks), same_set)
-
-#define rdev_for_each_safe(rdev, tmp, mddev)				\
-	list_for_each_entry_safe(rdev, tmp, &((mddev)->disks), same_set)
-
-#define rdev_for_each_rcu(rdev, mddev)				\
-	list_for_each_entry_rcu(rdev, &((mddev)->disks), same_set)
-
-struct md_thread {
-	void			(*run) (struct mddev *mddev);
-	struct mddev		*mddev;
-	wait_queue_head_t	wqueue;
-	unsigned long           flags;
-	struct task_struct	*tsk;
-	unsigned long		timeout;
-};
-
-#define THREAD_WAKEUP  0
-
-#define __wait_event_lock_irq(wq, condition, lock, cmd) 		\
-do {									\
-	wait_queue_t __wait;						\
-	init_waitqueue_entry(&__wait, current);				\
-									\
-	add_wait_queue(&wq, &__wait);					\
-	for (;;) {							\
-		set_current_state(TASK_UNINTERRUPTIBLE);		\
-		if (condition)						\
-			break;						\
-		spin_unlock_irq(&lock);					\
-		cmd;							\
-		schedule();						\
-		spin_lock_irq(&lock);					\
-	}								\
-	current->state = TASK_RUNNING;					\
-	remove_wait_queue(&wq, &__wait);				\
-} while (0)
-
-#define wait_event_lock_irq(wq, condition, lock, cmd) 			\
-do {									\
-	if (condition)	 						\
-		break;							\
-	__wait_event_lock_irq(wq, condition, lock, cmd);		\
-} while (0)
-
-static inline void safe_put_page(struct page *p)
-{
-	if (p) put_page(p);
-}
-
-extern int register_md_personality(struct md_personality *p);
-extern int unregister_md_personality(struct md_personality *p);
-extern struct md_thread *md_register_thread(
-	void (*run)(struct mddev *mddev),
-	struct mddev *mddev,
-	const char *name);
-extern void md_unregister_thread(struct md_thread **threadp);
-extern void md_wakeup_thread(struct md_thread *thread);
-extern void md_check_recovery(struct mddev *mddev);
-extern void md_write_start(struct mddev *mddev, struct bio *bi);
-extern void md_write_end(struct mddev *mddev);
-extern void md_done_sync(struct mddev *mddev, int blocks, int ok);
-extern void md_error(struct mddev *mddev, struct md_rdev *rdev);
-
-extern int mddev_congested(struct mddev *mddev, int bits);
-extern void md_flush_request(struct mddev *mddev, struct bio *bio);
-extern void md_super_write(struct mddev *mddev, struct md_rdev *rdev,
-			   sector_t sector, int size, struct page *page);
-extern void md_super_wait(struct mddev *mddev);
-extern int sync_page_io(struct md_rdev *rdev, sector_t sector, int size, 
-			struct page *page, int rw, bool metadata_op);
-extern void md_do_sync(struct mddev *mddev);
-extern void md_new_event(struct mddev *mddev);
-extern int md_allow_write(struct mddev *mddev);
-extern void md_wait_for_blocked_rdev(struct md_rdev *rdev, struct mddev *mddev);
-extern void md_set_array_sectors(struct mddev *mddev, sector_t array_sectors);
-extern int md_check_no_bitmap(struct mddev *mddev);
-extern int md_integrity_register(struct mddev *mddev);
-extern void md_integrity_add_rdev(struct md_rdev *rdev, struct mddev *mddev);
-extern int strict_strtoul_scaled(const char *cp, unsigned long *res, int scale);
-extern void restore_bitmap_write_access(struct file *file);
-
-extern void mddev_init(struct mddev *mddev);
-extern int md_run(struct mddev *mddev);
-extern void md_stop(struct mddev *mddev);
-extern void md_stop_writes(struct mddev *mddev);
-extern int md_rdev_init(struct md_rdev *rdev);
-
-extern void mddev_suspend(struct mddev *mddev);
-extern void mddev_resume(struct mddev *mddev);
-extern struct bio *bio_clone_mddev(struct bio *bio, gfp_t gfp_mask,
-				   struct mddev *mddev);
-extern struct bio *bio_alloc_mddev(gfp_t gfp_mask, int nr_iovecs,
-				   struct mddev *mddev);
-extern int mddev_check_plugged(struct mddev *mddev);
-extern void md_trim_bio(struct bio *bio, int offset, int size);
-#endif /* _MD_MD_H */
diff --git a/ANDROID_3.4.5/drivers/md/multipath.c b/ANDROID_3.4.5/drivers/md/multipath.c
deleted file mode 100644
index 9339e67f..00000000
--- a/ANDROID_3.4.5/drivers/md/multipath.c
+++ /dev/null
@@ -1,557 +0,0 @@
-/*
- * multipath.c : Multiple Devices driver for Linux
- *
- * Copyright (C) 1999, 2000, 2001 Ingo Molnar, Red Hat
- *
- * Copyright (C) 1996, 1997, 1998 Ingo Molnar, Miguel de Icaza, Gadi Oxman
- *
- * MULTIPATH management functions.
- *
- * derived from raid1.c.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2, or (at your option)
- * any later version.
- *
- * You should have received a copy of the GNU General Public License
- * (for example /usr/src/linux/COPYING); if not, write to the Free
- * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#include <linux/blkdev.h>
-#include <linux/module.h>
-#include <linux/raid/md_u.h>
-#include <linux/seq_file.h>
-#include <linux/slab.h>
-#include "md.h"
-#include "multipath.h"
-
-#define MAX_WORK_PER_DISK 128
-
-#define	NR_RESERVED_BUFS	32
-
-
-static int multipath_map (struct mpconf *conf)
-{
-	int i, disks = conf->raid_disks;
-
-	/*
-	 * Later we do read balancing on the read side 
-	 * now we use the first available disk.
-	 */
-
-	rcu_read_lock();
-	for (i = 0; i < disks; i++) {
-		struct md_rdev *rdev = rcu_dereference(conf->multipaths[i].rdev);
-		if (rdev && test_bit(In_sync, &rdev->flags)) {
-			atomic_inc(&rdev->nr_pending);
-			rcu_read_unlock();
-			return i;
-		}
-	}
-	rcu_read_unlock();
-
-	printk(KERN_ERR "multipath_map(): no more operational IO paths?\n");
-	return (-1);
-}
-
-static void multipath_reschedule_retry (struct multipath_bh *mp_bh)
-{
-	unsigned long flags;
-	struct mddev *mddev = mp_bh->mddev;
-	struct mpconf *conf = mddev->private;
-
-	spin_lock_irqsave(&conf->device_lock, flags);
-	list_add(&mp_bh->retry_list, &conf->retry_list);
-	spin_unlock_irqrestore(&conf->device_lock, flags);
-	md_wakeup_thread(mddev->thread);
-}
-
-
-/*
- * multipath_end_bh_io() is called when we have finished servicing a multipathed
- * operation and are ready to return a success/failure code to the buffer
- * cache layer.
- */
-static void multipath_end_bh_io (struct multipath_bh *mp_bh, int err)
-{
-	struct bio *bio = mp_bh->master_bio;
-	struct mpconf *conf = mp_bh->mddev->private;
-
-	bio_endio(bio, err);
-	mempool_free(mp_bh, conf->pool);
-}
-
-static void multipath_end_request(struct bio *bio, int error)
-{
-	int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
-	struct multipath_bh *mp_bh = bio->bi_private;
-	struct mpconf *conf = mp_bh->mddev->private;
-	struct md_rdev *rdev = conf->multipaths[mp_bh->path].rdev;
-
-	if (uptodate)
-		multipath_end_bh_io(mp_bh, 0);
-	else if (!(bio->bi_rw & REQ_RAHEAD)) {
-		/*
-		 * oops, IO error:
-		 */
-		char b[BDEVNAME_SIZE];
-		md_error (mp_bh->mddev, rdev);
-		printk(KERN_ERR "multipath: %s: rescheduling sector %llu\n", 
-		       bdevname(rdev->bdev,b), 
-		       (unsigned long long)bio->bi_sector);
-		multipath_reschedule_retry(mp_bh);
-	} else
-		multipath_end_bh_io(mp_bh, error);
-	rdev_dec_pending(rdev, conf->mddev);
-}
-
-static void multipath_make_request(struct mddev *mddev, struct bio * bio)
-{
-	struct mpconf *conf = mddev->private;
-	struct multipath_bh * mp_bh;
-	struct multipath_info *multipath;
-
-	if (unlikely(bio->bi_rw & REQ_FLUSH)) {
-		md_flush_request(mddev, bio);
-		return;
-	}
-
-	mp_bh = mempool_alloc(conf->pool, GFP_NOIO);
-
-	mp_bh->master_bio = bio;
-	mp_bh->mddev = mddev;
-
-	mp_bh->path = multipath_map(conf);
-	if (mp_bh->path < 0) {
-		bio_endio(bio, -EIO);
-		mempool_free(mp_bh, conf->pool);
-		return;
-	}
-	multipath = conf->multipaths + mp_bh->path;
-
-	mp_bh->bio = *bio;
-	mp_bh->bio.bi_sector += multipath->rdev->data_offset;
-	mp_bh->bio.bi_bdev = multipath->rdev->bdev;
-	mp_bh->bio.bi_rw |= REQ_FAILFAST_TRANSPORT;
-	mp_bh->bio.bi_end_io = multipath_end_request;
-	mp_bh->bio.bi_private = mp_bh;
-	generic_make_request(&mp_bh->bio);
-	return;
-}
-
-static void multipath_status (struct seq_file *seq, struct mddev *mddev)
-{
-	struct mpconf *conf = mddev->private;
-	int i;
-	
-	seq_printf (seq, " [%d/%d] [", conf->raid_disks,
-		    conf->raid_disks - mddev->degraded);
-	for (i = 0; i < conf->raid_disks; i++)
-		seq_printf (seq, "%s",
-			       conf->multipaths[i].rdev && 
-			       test_bit(In_sync, &conf->multipaths[i].rdev->flags) ? "U" : "_");
-	seq_printf (seq, "]");
-}
-
-static int multipath_congested(void *data, int bits)
-{
-	struct mddev *mddev = data;
-	struct mpconf *conf = mddev->private;
-	int i, ret = 0;
-
-	if (mddev_congested(mddev, bits))
-		return 1;
-
-	rcu_read_lock();
-	for (i = 0; i < mddev->raid_disks ; i++) {
-		struct md_rdev *rdev = rcu_dereference(conf->multipaths[i].rdev);
-		if (rdev && !test_bit(Faulty, &rdev->flags)) {
-			struct request_queue *q = bdev_get_queue(rdev->bdev);
-
-			ret |= bdi_congested(&q->backing_dev_info, bits);
-			/* Just like multipath_map, we just check the
-			 * first available device
-			 */
-			break;
-		}
-	}
-	rcu_read_unlock();
-	return ret;
-}
-
-/*
- * Careful, this can execute in IRQ contexts as well!
- */
-static void multipath_error (struct mddev *mddev, struct md_rdev *rdev)
-{
-	struct mpconf *conf = mddev->private;
-	char b[BDEVNAME_SIZE];
-
-	if (conf->raid_disks - mddev->degraded <= 1) {
-		/*
-		 * Uh oh, we can do nothing if this is our last path, but
-		 * first check if this is a queued request for a device
-		 * which has just failed.
-		 */
-		printk(KERN_ALERT 
-		       "multipath: only one IO path left and IO error.\n");
-		/* leave it active... it's all we have */
-		return;
-	}
-	/*
-	 * Mark disk as unusable
-	 */
-	if (test_and_clear_bit(In_sync, &rdev->flags)) {
-		unsigned long flags;
-		spin_lock_irqsave(&conf->device_lock, flags);
-		mddev->degraded++;
-		spin_unlock_irqrestore(&conf->device_lock, flags);
-	}
-	set_bit(Faulty, &rdev->flags);
-	set_bit(MD_CHANGE_DEVS, &mddev->flags);
-	printk(KERN_ALERT "multipath: IO failure on %s,"
-	       " disabling IO path.\n"
-	       "multipath: Operation continuing"
-	       " on %d IO paths.\n",
-	       bdevname(rdev->bdev, b),
-	       conf->raid_disks - mddev->degraded);
-}
-
-static void print_multipath_conf (struct mpconf *conf)
-{
-	int i;
-	struct multipath_info *tmp;
-
-	printk("MULTIPATH conf printout:\n");
-	if (!conf) {
-		printk("(conf==NULL)\n");
-		return;
-	}
-	printk(" --- wd:%d rd:%d\n", conf->raid_disks - conf->mddev->degraded,
-			 conf->raid_disks);
-
-	for (i = 0; i < conf->raid_disks; i++) {
-		char b[BDEVNAME_SIZE];
-		tmp = conf->multipaths + i;
-		if (tmp->rdev)
-			printk(" disk%d, o:%d, dev:%s\n",
-				i,!test_bit(Faulty, &tmp->rdev->flags),
-			       bdevname(tmp->rdev->bdev,b));
-	}
-}
-
-
-static int multipath_add_disk(struct mddev *mddev, struct md_rdev *rdev)
-{
-	struct mpconf *conf = mddev->private;
-	struct request_queue *q;
-	int err = -EEXIST;
-	int path;
-	struct multipath_info *p;
-	int first = 0;
-	int last = mddev->raid_disks - 1;
-
-	if (rdev->raid_disk >= 0)
-		first = last = rdev->raid_disk;
-
-	print_multipath_conf(conf);
-
-	for (path = first; path <= last; path++)
-		if ((p=conf->multipaths+path)->rdev == NULL) {
-			q = rdev->bdev->bd_disk->queue;
-			disk_stack_limits(mddev->gendisk, rdev->bdev,
-					  rdev->data_offset << 9);
-
-		/* as we don't honour merge_bvec_fn, we must never risk
-		 * violating it, so limit ->max_segments to one, lying
-		 * within a single page.
-		 * (Note: it is very unlikely that a device with
-		 * merge_bvec_fn will be involved in multipath.)
-		 */
-			if (q->merge_bvec_fn) {
-				blk_queue_max_segments(mddev->queue, 1);
-				blk_queue_segment_boundary(mddev->queue,
-							   PAGE_CACHE_SIZE - 1);
-			}
-
-			spin_lock_irq(&conf->device_lock);
-			mddev->degraded--;
-			rdev->raid_disk = path;
-			set_bit(In_sync, &rdev->flags);
-			spin_unlock_irq(&conf->device_lock);
-			rcu_assign_pointer(p->rdev, rdev);
-			err = 0;
-			md_integrity_add_rdev(rdev, mddev);
-			break;
-		}
-
-	print_multipath_conf(conf);
-
-	return err;
-}
-
-static int multipath_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
-{
-	struct mpconf *conf = mddev->private;
-	int err = 0;
-	int number = rdev->raid_disk;
-	struct multipath_info *p = conf->multipaths + number;
-
-	print_multipath_conf(conf);
-
-	if (rdev == p->rdev) {
-		if (test_bit(In_sync, &rdev->flags) ||
-		    atomic_read(&rdev->nr_pending)) {
-			printk(KERN_ERR "hot-remove-disk, slot %d is identified"
-			       " but is still operational!\n", number);
-			err = -EBUSY;
-			goto abort;
-		}
-		p->rdev = NULL;
-		synchronize_rcu();
-		if (atomic_read(&rdev->nr_pending)) {
-			/* lost the race, try later */
-			err = -EBUSY;
-			p->rdev = rdev;
-			goto abort;
-		}
-		err = md_integrity_register(mddev);
-	}
-abort:
-
-	print_multipath_conf(conf);
-	return err;
-}
-
-
-
-/*
- * This is a kernel thread which:
- *
- *	1.	Retries failed read operations on working multipaths.
- *	2.	Updates the raid superblock when problems encounter.
- *	3.	Performs writes following reads for array syncronising.
- */
-
-static void multipathd (struct mddev *mddev)
-{
-	struct multipath_bh *mp_bh;
-	struct bio *bio;
-	unsigned long flags;
-	struct mpconf *conf = mddev->private;
-	struct list_head *head = &conf->retry_list;
-
-	md_check_recovery(mddev);
-	for (;;) {
-		char b[BDEVNAME_SIZE];
-		spin_lock_irqsave(&conf->device_lock, flags);
-		if (list_empty(head))
-			break;
-		mp_bh = list_entry(head->prev, struct multipath_bh, retry_list);
-		list_del(head->prev);
-		spin_unlock_irqrestore(&conf->device_lock, flags);
-
-		bio = &mp_bh->bio;
-		bio->bi_sector = mp_bh->master_bio->bi_sector;
-		
-		if ((mp_bh->path = multipath_map (conf))<0) {
-			printk(KERN_ALERT "multipath: %s: unrecoverable IO read"
-				" error for block %llu\n",
-				bdevname(bio->bi_bdev,b),
-				(unsigned long long)bio->bi_sector);
-			multipath_end_bh_io(mp_bh, -EIO);
-		} else {
-			printk(KERN_ERR "multipath: %s: redirecting sector %llu"
-				" to another IO path\n",
-				bdevname(bio->bi_bdev,b),
-				(unsigned long long)bio->bi_sector);
-			*bio = *(mp_bh->master_bio);
-			bio->bi_sector += conf->multipaths[mp_bh->path].rdev->data_offset;
-			bio->bi_bdev = conf->multipaths[mp_bh->path].rdev->bdev;
-			bio->bi_rw |= REQ_FAILFAST_TRANSPORT;
-			bio->bi_end_io = multipath_end_request;
-			bio->bi_private = mp_bh;
-			generic_make_request(bio);
-		}
-	}
-	spin_unlock_irqrestore(&conf->device_lock, flags);
-}
-
-static sector_t multipath_size(struct mddev *mddev, sector_t sectors, int raid_disks)
-{
-	WARN_ONCE(sectors || raid_disks,
-		  "%s does not support generic reshape\n", __func__);
-
-	return mddev->dev_sectors;
-}
-
-static int multipath_run (struct mddev *mddev)
-{
-	struct mpconf *conf;
-	int disk_idx;
-	struct multipath_info *disk;
-	struct md_rdev *rdev;
-	int working_disks;
-
-	if (md_check_no_bitmap(mddev))
-		return -EINVAL;
-
-	if (mddev->level != LEVEL_MULTIPATH) {
-		printk("multipath: %s: raid level not set to multipath IO (%d)\n",
-		       mdname(mddev), mddev->level);
-		goto out;
-	}
-	/*
-	 * copy the already verified devices into our private MULTIPATH
-	 * bookkeeping area. [whatever we allocate in multipath_run(),
-	 * should be freed in multipath_stop()]
-	 */
-
-	conf = kzalloc(sizeof(struct mpconf), GFP_KERNEL);
-	mddev->private = conf;
-	if (!conf) {
-		printk(KERN_ERR 
-			"multipath: couldn't allocate memory for %s\n",
-			mdname(mddev));
-		goto out;
-	}
-
-	conf->multipaths = kzalloc(sizeof(struct multipath_info)*mddev->raid_disks,
-				   GFP_KERNEL);
-	if (!conf->multipaths) {
-		printk(KERN_ERR 
-			"multipath: couldn't allocate memory for %s\n",
-			mdname(mddev));
-		goto out_free_conf;
-	}
-
-	working_disks = 0;
-	rdev_for_each(rdev, mddev) {
-		disk_idx = rdev->raid_disk;
-		if (disk_idx < 0 ||
-		    disk_idx >= mddev->raid_disks)
-			continue;
-
-		disk = conf->multipaths + disk_idx;
-		disk->rdev = rdev;
-		disk_stack_limits(mddev->gendisk, rdev->bdev,
-				  rdev->data_offset << 9);
-
-		/* as we don't honour merge_bvec_fn, we must never risk
-		 * violating it, not that we ever expect a device with
-		 * a merge_bvec_fn to be involved in multipath */
-		if (rdev->bdev->bd_disk->queue->merge_bvec_fn) {
-			blk_queue_max_segments(mddev->queue, 1);
-			blk_queue_segment_boundary(mddev->queue,
-						   PAGE_CACHE_SIZE - 1);
-		}
-
-		if (!test_bit(Faulty, &rdev->flags))
-			working_disks++;
-	}
-
-	conf->raid_disks = mddev->raid_disks;
-	conf->mddev = mddev;
-	spin_lock_init(&conf->device_lock);
-	INIT_LIST_HEAD(&conf->retry_list);
-
-	if (!working_disks) {
-		printk(KERN_ERR "multipath: no operational IO paths for %s\n",
-			mdname(mddev));
-		goto out_free_conf;
-	}
-	mddev->degraded = conf->raid_disks - working_disks;
-
-	conf->pool = mempool_create_kmalloc_pool(NR_RESERVED_BUFS,
-						 sizeof(struct multipath_bh));
-	if (conf->pool == NULL) {
-		printk(KERN_ERR 
-			"multipath: couldn't allocate memory for %s\n",
-			mdname(mddev));
-		goto out_free_conf;
-	}
-
-	{
-		mddev->thread = md_register_thread(multipathd, mddev, NULL);
-		if (!mddev->thread) {
-			printk(KERN_ERR "multipath: couldn't allocate thread"
-				" for %s\n", mdname(mddev));
-			goto out_free_conf;
-		}
-	}
-
-	printk(KERN_INFO 
-		"multipath: array %s active with %d out of %d IO paths\n",
-		mdname(mddev), conf->raid_disks - mddev->degraded,
-	       mddev->raid_disks);
-	/*
-	 * Ok, everything is just fine now
-	 */
-	md_set_array_sectors(mddev, multipath_size(mddev, 0, 0));
-
-	mddev->queue->backing_dev_info.congested_fn = multipath_congested;
-	mddev->queue->backing_dev_info.congested_data = mddev;
-
-	if (md_integrity_register(mddev))
-		goto out_free_conf;
-
-	return 0;
-
-out_free_conf:
-	if (conf->pool)
-		mempool_destroy(conf->pool);
-	kfree(conf->multipaths);
-	kfree(conf);
-	mddev->private = NULL;
-out:
-	return -EIO;
-}
-
-
-static int multipath_stop (struct mddev *mddev)
-{
-	struct mpconf *conf = mddev->private;
-
-	md_unregister_thread(&mddev->thread);
-	blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/
-	mempool_destroy(conf->pool);
-	kfree(conf->multipaths);
-	kfree(conf);
-	mddev->private = NULL;
-	return 0;
-}
-
-static struct md_personality multipath_personality =
-{
-	.name		= "multipath",
-	.level		= LEVEL_MULTIPATH,
-	.owner		= THIS_MODULE,
-	.make_request	= multipath_make_request,
-	.run		= multipath_run,
-	.stop		= multipath_stop,
-	.status		= multipath_status,
-	.error_handler	= multipath_error,
-	.hot_add_disk	= multipath_add_disk,
-	.hot_remove_disk= multipath_remove_disk,
-	.size		= multipath_size,
-};
-
-static int __init multipath_init (void)
-{
-	return register_md_personality (&multipath_personality);
-}
-
-static void __exit multipath_exit (void)
-{
-	unregister_md_personality (&multipath_personality);
-}
-
-module_init(multipath_init);
-module_exit(multipath_exit);
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("simple multi-path personality for MD");
-MODULE_ALIAS("md-personality-7"); /* MULTIPATH */
-MODULE_ALIAS("md-multipath");
-MODULE_ALIAS("md-level--4");
diff --git a/ANDROID_3.4.5/drivers/md/multipath.h b/ANDROID_3.4.5/drivers/md/multipath.h
deleted file mode 100644
index 717c60f6..00000000
--- a/ANDROID_3.4.5/drivers/md/multipath.h
+++ /dev/null
@@ -1,31 +0,0 @@
-#ifndef _MULTIPATH_H
-#define _MULTIPATH_H
-
-struct multipath_info {
-	struct md_rdev	*rdev;
-};
-
-struct mpconf {
-	struct mddev			*mddev;
-	struct multipath_info	*multipaths;
-	int			raid_disks;
-	spinlock_t		device_lock;
-	struct list_head	retry_list;
-
-	mempool_t		*pool;
-};
-
-/*
- * this is our 'private' 'collective' MULTIPATH buffer head.
- * it contains information about what kind of IO operations were started
- * for this MULTIPATH operation, and about their status:
- */
-
-struct multipath_bh {
-	struct mddev			*mddev;
-	struct bio		*master_bio;
-	struct bio		bio;
-	int			path;
-	struct list_head	retry_list;
-};
-#endif
diff --git a/ANDROID_3.4.5/drivers/md/persistent-data/Kconfig b/ANDROID_3.4.5/drivers/md/persistent-data/Kconfig
deleted file mode 100644
index ceb35905..00000000
--- a/ANDROID_3.4.5/drivers/md/persistent-data/Kconfig
+++ /dev/null
@@ -1,8 +0,0 @@
-config DM_PERSISTENT_DATA
-       tristate
-       depends on BLK_DEV_DM && EXPERIMENTAL
-       select LIBCRC32C
-       select DM_BUFIO
-       ---help---
-	 Library providing immutable on-disk data structure support for
-	 device-mapper targets such as the thin provisioning target.
diff --git a/ANDROID_3.4.5/drivers/md/persistent-data/Makefile b/ANDROID_3.4.5/drivers/md/persistent-data/Makefile
deleted file mode 100644
index cfa95f66..00000000
--- a/ANDROID_3.4.5/drivers/md/persistent-data/Makefile
+++ /dev/null
@@ -1,11 +0,0 @@
-obj-$(CONFIG_DM_PERSISTENT_DATA) += dm-persistent-data.o
-dm-persistent-data-objs := \
-	dm-block-manager.o \
-	dm-space-map-checker.o \
-	dm-space-map-common.o \
-	dm-space-map-disk.o \
-	dm-space-map-metadata.o \
-	dm-transaction-manager.o \
-	dm-btree.o \
-	dm-btree-remove.o \
-	dm-btree-spine.o
diff --git a/ANDROID_3.4.5/drivers/md/persistent-data/dm-block-manager.c b/ANDROID_3.4.5/drivers/md/persistent-data/dm-block-manager.c
deleted file mode 100644
index 0317ecdc..00000000
--- a/ANDROID_3.4.5/drivers/md/persistent-data/dm-block-manager.c
+++ /dev/null
@@ -1,620 +0,0 @@
-/*
- * Copyright (C) 2011 Red Hat, Inc.
- *
- * This file is released under the GPL.
- */
-#include "dm-block-manager.h"
-#include "dm-persistent-data-internal.h"
-#include "../dm-bufio.h"
-
-#include <linux/crc32c.h>
-#include <linux/module.h>
-#include <linux/slab.h>
-#include <linux/rwsem.h>
-#include <linux/device-mapper.h>
-#include <linux/stacktrace.h>
-
-#define DM_MSG_PREFIX "block manager"
-
-/*----------------------------------------------------------------*/
-
-/*
- * This is a read/write semaphore with a couple of differences.
- *
- * i) There is a restriction on the number of concurrent read locks that
- * may be held at once.  This is just an implementation detail.
- *
- * ii) Recursive locking attempts are detected and return EINVAL.  A stack
- * trace is also emitted for the previous lock aquisition.
- *
- * iii) Priority is given to write locks.
- */
-#define MAX_HOLDERS 4
-#define MAX_STACK 10
-
-typedef unsigned long stack_entries[MAX_STACK];
-
-struct block_lock {
-	spinlock_t lock;
-	__s32 count;
-	struct list_head waiters;
-	struct task_struct *holders[MAX_HOLDERS];
-
-#ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING
-	struct stack_trace traces[MAX_HOLDERS];
-	stack_entries entries[MAX_HOLDERS];
-#endif
-};
-
-struct waiter {
-	struct list_head list;
-	struct task_struct *task;
-	int wants_write;
-};
-
-static unsigned __find_holder(struct block_lock *lock,
-			      struct task_struct *task)
-{
-	unsigned i;
-
-	for (i = 0; i < MAX_HOLDERS; i++)
-		if (lock->holders[i] == task)
-			break;
-
-	BUG_ON(i == MAX_HOLDERS);
-	return i;
-}
-
-/* call this *after* you increment lock->count */
-static void __add_holder(struct block_lock *lock, struct task_struct *task)
-{
-	unsigned h = __find_holder(lock, NULL);
-#ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING
-	struct stack_trace *t;
-#endif
-
-	get_task_struct(task);
-	lock->holders[h] = task;
-
-#ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING
-	t = lock->traces + h;
-	t->nr_entries = 0;
-	t->max_entries = MAX_STACK;
-	t->entries = lock->entries[h];
-	t->skip = 2;
-	save_stack_trace(t);
-#endif
-}
-
-/* call this *before* you decrement lock->count */
-static void __del_holder(struct block_lock *lock, struct task_struct *task)
-{
-	unsigned h = __find_holder(lock, task);
-	lock->holders[h] = NULL;
-	put_task_struct(task);
-}
-
-static int __check_holder(struct block_lock *lock)
-{
-	unsigned i;
-#ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING
-	static struct stack_trace t;
-	static stack_entries entries;
-#endif
-
-	for (i = 0; i < MAX_HOLDERS; i++) {
-		if (lock->holders[i] == current) {
-			DMERR("recursive lock detected in pool metadata");
-#ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING
-			DMERR("previously held here:");
-			print_stack_trace(lock->traces + i, 4);
-
-			DMERR("subsequent aquisition attempted here:");
-			t.nr_entries = 0;
-			t.max_entries = MAX_STACK;
-			t.entries = entries;
-			t.skip = 3;
-			save_stack_trace(&t);
-			print_stack_trace(&t, 4);
-#endif
-			return -EINVAL;
-		}
-	}
-
-	return 0;
-}
-
-static void __wait(struct waiter *w)
-{
-	for (;;) {
-		set_task_state(current, TASK_UNINTERRUPTIBLE);
-
-		if (!w->task)
-			break;
-
-		schedule();
-	}
-
-	set_task_state(current, TASK_RUNNING);
-}
-
-static void __wake_waiter(struct waiter *w)
-{
-	struct task_struct *task;
-
-	list_del(&w->list);
-	task = w->task;
-	smp_mb();
-	w->task = NULL;
-	wake_up_process(task);
-}
-
-/*
- * We either wake a few readers or a single writer.
- */
-static void __wake_many(struct block_lock *lock)
-{
-	struct waiter *w, *tmp;
-
-	BUG_ON(lock->count < 0);
-	list_for_each_entry_safe(w, tmp, &lock->waiters, list) {
-		if (lock->count >= MAX_HOLDERS)
-			return;
-
-		if (w->wants_write) {
-			if (lock->count > 0)
-				return; /* still read locked */
-
-			lock->count = -1;
-			__add_holder(lock, w->task);
-			__wake_waiter(w);
-			return;
-		}
-
-		lock->count++;
-		__add_holder(lock, w->task);
-		__wake_waiter(w);
-	}
-}
-
-static void bl_init(struct block_lock *lock)
-{
-	int i;
-
-	spin_lock_init(&lock->lock);
-	lock->count = 0;
-	INIT_LIST_HEAD(&lock->waiters);
-	for (i = 0; i < MAX_HOLDERS; i++)
-		lock->holders[i] = NULL;
-}
-
-static int __available_for_read(struct block_lock *lock)
-{
-	return lock->count >= 0 &&
-		lock->count < MAX_HOLDERS &&
-		list_empty(&lock->waiters);
-}
-
-static int bl_down_read(struct block_lock *lock)
-{
-	int r;
-	struct waiter w;
-
-	spin_lock(&lock->lock);
-	r = __check_holder(lock);
-	if (r) {
-		spin_unlock(&lock->lock);
-		return r;
-	}
-
-	if (__available_for_read(lock)) {
-		lock->count++;
-		__add_holder(lock, current);
-		spin_unlock(&lock->lock);
-		return 0;
-	}
-
-	get_task_struct(current);
-
-	w.task = current;
-	w.wants_write = 0;
-	list_add_tail(&w.list, &lock->waiters);
-	spin_unlock(&lock->lock);
-
-	__wait(&w);
-	put_task_struct(current);
-	return 0;
-}
-
-static int bl_down_read_nonblock(struct block_lock *lock)
-{
-	int r;
-
-	spin_lock(&lock->lock);
-	r = __check_holder(lock);
-	if (r)
-		goto out;
-
-	if (__available_for_read(lock)) {
-		lock->count++;
-		__add_holder(lock, current);
-		r = 0;
-	} else
-		r = -EWOULDBLOCK;
-
-out:
-	spin_unlock(&lock->lock);
-	return r;
-}
-
-static void bl_up_read(struct block_lock *lock)
-{
-	spin_lock(&lock->lock);
-	BUG_ON(lock->count <= 0);
-	__del_holder(lock, current);
-	--lock->count;
-	if (!list_empty(&lock->waiters))
-		__wake_many(lock);
-	spin_unlock(&lock->lock);
-}
-
-static int bl_down_write(struct block_lock *lock)
-{
-	int r;
-	struct waiter w;
-
-	spin_lock(&lock->lock);
-	r = __check_holder(lock);
-	if (r) {
-		spin_unlock(&lock->lock);
-		return r;
-	}
-
-	if (lock->count == 0 && list_empty(&lock->waiters)) {
-		lock->count = -1;
-		__add_holder(lock, current);
-		spin_unlock(&lock->lock);
-		return 0;
-	}
-
-	get_task_struct(current);
-	w.task = current;
-	w.wants_write = 1;
-
-	/*
-	 * Writers given priority. We know there's only one mutator in the
-	 * system, so ignoring the ordering reversal.
-	 */
-	list_add(&w.list, &lock->waiters);
-	spin_unlock(&lock->lock);
-
-	__wait(&w);
-	put_task_struct(current);
-
-	return 0;
-}
-
-static void bl_up_write(struct block_lock *lock)
-{
-	spin_lock(&lock->lock);
-	__del_holder(lock, current);
-	lock->count = 0;
-	if (!list_empty(&lock->waiters))
-		__wake_many(lock);
-	spin_unlock(&lock->lock);
-}
-
-static void report_recursive_bug(dm_block_t b, int r)
-{
-	if (r == -EINVAL)
-		DMERR("recursive acquisition of block %llu requested.",
-		      (unsigned long long) b);
-}
-
-/*----------------------------------------------------------------*/
-
-/*
- * Block manager is currently implemented using dm-bufio.  struct
- * dm_block_manager and struct dm_block map directly onto a couple of
- * structs in the bufio interface.  I want to retain the freedom to move
- * away from bufio in the future.  So these structs are just cast within
- * this .c file, rather than making it through to the public interface.
- */
-static struct dm_buffer *to_buffer(struct dm_block *b)
-{
-	return (struct dm_buffer *) b;
-}
-
-static struct dm_bufio_client *to_bufio(struct dm_block_manager *bm)
-{
-	return (struct dm_bufio_client *) bm;
-}
-
-dm_block_t dm_block_location(struct dm_block *b)
-{
-	return dm_bufio_get_block_number(to_buffer(b));
-}
-EXPORT_SYMBOL_GPL(dm_block_location);
-
-void *dm_block_data(struct dm_block *b)
-{
-	return dm_bufio_get_block_data(to_buffer(b));
-}
-EXPORT_SYMBOL_GPL(dm_block_data);
-
-struct buffer_aux {
-	struct dm_block_validator *validator;
-	struct block_lock lock;
-	int write_locked;
-};
-
-static void dm_block_manager_alloc_callback(struct dm_buffer *buf)
-{
-	struct buffer_aux *aux = dm_bufio_get_aux_data(buf);
-	aux->validator = NULL;
-	bl_init(&aux->lock);
-}
-
-static void dm_block_manager_write_callback(struct dm_buffer *buf)
-{
-	struct buffer_aux *aux = dm_bufio_get_aux_data(buf);
-	if (aux->validator) {
-		aux->validator->prepare_for_write(aux->validator, (struct dm_block *) buf,
-			 dm_bufio_get_block_size(dm_bufio_get_client(buf)));
-	}
-}
-
-/*----------------------------------------------------------------
- * Public interface
- *--------------------------------------------------------------*/
-struct dm_block_manager *dm_block_manager_create(struct block_device *bdev,
-						 unsigned block_size,
-						 unsigned cache_size,
-						 unsigned max_held_per_thread)
-{
-	return (struct dm_block_manager *)
-		dm_bufio_client_create(bdev, block_size, max_held_per_thread,
-				       sizeof(struct buffer_aux),
-				       dm_block_manager_alloc_callback,
-				       dm_block_manager_write_callback);
-}
-EXPORT_SYMBOL_GPL(dm_block_manager_create);
-
-void dm_block_manager_destroy(struct dm_block_manager *bm)
-{
-	return dm_bufio_client_destroy(to_bufio(bm));
-}
-EXPORT_SYMBOL_GPL(dm_block_manager_destroy);
-
-unsigned dm_bm_block_size(struct dm_block_manager *bm)
-{
-	return dm_bufio_get_block_size(to_bufio(bm));
-}
-EXPORT_SYMBOL_GPL(dm_bm_block_size);
-
-dm_block_t dm_bm_nr_blocks(struct dm_block_manager *bm)
-{
-	return dm_bufio_get_device_size(to_bufio(bm));
-}
-
-static int dm_bm_validate_buffer(struct dm_block_manager *bm,
-				 struct dm_buffer *buf,
-				 struct buffer_aux *aux,
-				 struct dm_block_validator *v)
-{
-	if (unlikely(!aux->validator)) {
-		int r;
-		if (!v)
-			return 0;
-		r = v->check(v, (struct dm_block *) buf, dm_bufio_get_block_size(to_bufio(bm)));
-		if (unlikely(r))
-			return r;
-		aux->validator = v;
-	} else {
-		if (unlikely(aux->validator != v)) {
-			DMERR("validator mismatch (old=%s vs new=%s) for block %llu",
-				aux->validator->name, v ? v->name : "NULL",
-				(unsigned long long)
-					dm_bufio_get_block_number(buf));
-			return -EINVAL;
-		}
-	}
-
-	return 0;
-}
-int dm_bm_read_lock(struct dm_block_manager *bm, dm_block_t b,
-		    struct dm_block_validator *v,
-		    struct dm_block **result)
-{
-	struct buffer_aux *aux;
-	void *p;
-	int r;
-
-	p = dm_bufio_read(to_bufio(bm), b, (struct dm_buffer **) result);
-	if (unlikely(IS_ERR(p)))
-		return PTR_ERR(p);
-
-	aux = dm_bufio_get_aux_data(to_buffer(*result));
-	r = bl_down_read(&aux->lock);
-	if (unlikely(r)) {
-		dm_bufio_release(to_buffer(*result));
-		report_recursive_bug(b, r);
-		return r;
-	}
-
-	aux->write_locked = 0;
-
-	r = dm_bm_validate_buffer(bm, to_buffer(*result), aux, v);
-	if (unlikely(r)) {
-		bl_up_read(&aux->lock);
-		dm_bufio_release(to_buffer(*result));
-		return r;
-	}
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(dm_bm_read_lock);
-
-int dm_bm_write_lock(struct dm_block_manager *bm,
-		     dm_block_t b, struct dm_block_validator *v,
-		     struct dm_block **result)
-{
-	struct buffer_aux *aux;
-	void *p;
-	int r;
-
-	p = dm_bufio_read(to_bufio(bm), b, (struct dm_buffer **) result);
-	if (unlikely(IS_ERR(p)))
-		return PTR_ERR(p);
-
-	aux = dm_bufio_get_aux_data(to_buffer(*result));
-	r = bl_down_write(&aux->lock);
-	if (r) {
-		dm_bufio_release(to_buffer(*result));
-		report_recursive_bug(b, r);
-		return r;
-	}
-
-	aux->write_locked = 1;
-
-	r = dm_bm_validate_buffer(bm, to_buffer(*result), aux, v);
-	if (unlikely(r)) {
-		bl_up_write(&aux->lock);
-		dm_bufio_release(to_buffer(*result));
-		return r;
-	}
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(dm_bm_write_lock);
-
-int dm_bm_read_try_lock(struct dm_block_manager *bm,
-			dm_block_t b, struct dm_block_validator *v,
-			struct dm_block **result)
-{
-	struct buffer_aux *aux;
-	void *p;
-	int r;
-
-	p = dm_bufio_get(to_bufio(bm), b, (struct dm_buffer **) result);
-	if (unlikely(IS_ERR(p)))
-		return PTR_ERR(p);
-	if (unlikely(!p))
-		return -EWOULDBLOCK;
-
-	aux = dm_bufio_get_aux_data(to_buffer(*result));
-	r = bl_down_read_nonblock(&aux->lock);
-	if (r < 0) {
-		dm_bufio_release(to_buffer(*result));
-		report_recursive_bug(b, r);
-		return r;
-	}
-	aux->write_locked = 0;
-
-	r = dm_bm_validate_buffer(bm, to_buffer(*result), aux, v);
-	if (unlikely(r)) {
-		bl_up_read(&aux->lock);
-		dm_bufio_release(to_buffer(*result));
-		return r;
-	}
-
-	return 0;
-}
-
-int dm_bm_write_lock_zero(struct dm_block_manager *bm,
-			  dm_block_t b, struct dm_block_validator *v,
-			  struct dm_block **result)
-{
-	int r;
-	struct buffer_aux *aux;
-	void *p;
-
-	p = dm_bufio_new(to_bufio(bm), b, (struct dm_buffer **) result);
-	if (unlikely(IS_ERR(p)))
-		return PTR_ERR(p);
-
-	memset(p, 0, dm_bm_block_size(bm));
-
-	aux = dm_bufio_get_aux_data(to_buffer(*result));
-	r = bl_down_write(&aux->lock);
-	if (r) {
-		dm_bufio_release(to_buffer(*result));
-		return r;
-	}
-
-	aux->write_locked = 1;
-	aux->validator = v;
-
-	return 0;
-}
-
-int dm_bm_unlock(struct dm_block *b)
-{
-	struct buffer_aux *aux;
-	aux = dm_bufio_get_aux_data(to_buffer(b));
-
-	if (aux->write_locked) {
-		dm_bufio_mark_buffer_dirty(to_buffer(b));
-		bl_up_write(&aux->lock);
-	} else
-		bl_up_read(&aux->lock);
-
-	dm_bufio_release(to_buffer(b));
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(dm_bm_unlock);
-
-int dm_bm_unlock_move(struct dm_block *b, dm_block_t n)
-{
-	struct buffer_aux *aux;
-
-	aux = dm_bufio_get_aux_data(to_buffer(b));
-
-	if (aux->write_locked) {
-		dm_bufio_mark_buffer_dirty(to_buffer(b));
-		bl_up_write(&aux->lock);
-	} else
-		bl_up_read(&aux->lock);
-
-	dm_bufio_release_move(to_buffer(b), n);
-	return 0;
-}
-
-int dm_bm_flush_and_unlock(struct dm_block_manager *bm,
-			   struct dm_block *superblock)
-{
-	int r;
-
-	r = dm_bufio_write_dirty_buffers(to_bufio(bm));
-	if (unlikely(r))
-		return r;
-	r = dm_bufio_issue_flush(to_bufio(bm));
-	if (unlikely(r))
-		return r;
-
-	dm_bm_unlock(superblock);
-
-	r = dm_bufio_write_dirty_buffers(to_bufio(bm));
-	if (unlikely(r))
-		return r;
-	r = dm_bufio_issue_flush(to_bufio(bm));
-	if (unlikely(r))
-		return r;
-
-	return 0;
-}
-
-u32 dm_bm_checksum(const void *data, size_t len, u32 init_xor)
-{
-	return crc32c(~(u32) 0, data, len) ^ init_xor;
-}
-EXPORT_SYMBOL_GPL(dm_bm_checksum);
-
-/*----------------------------------------------------------------*/
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Joe Thornber <dm-devel@redhat.com>");
-MODULE_DESCRIPTION("Immutable metadata library for dm");
-
-/*----------------------------------------------------------------*/
diff --git a/ANDROID_3.4.5/drivers/md/persistent-data/dm-block-manager.h b/ANDROID_3.4.5/drivers/md/persistent-data/dm-block-manager.h
deleted file mode 100644
index 924833d2..00000000
--- a/ANDROID_3.4.5/drivers/md/persistent-data/dm-block-manager.h
+++ /dev/null
@@ -1,123 +0,0 @@
-/*
- * Copyright (C) 2011 Red Hat, Inc.
- *
- * This file is released under the GPL.
- */
-
-#ifndef _LINUX_DM_BLOCK_MANAGER_H
-#define _LINUX_DM_BLOCK_MANAGER_H
-
-#include <linux/types.h>
-#include <linux/blkdev.h>
-
-/*----------------------------------------------------------------*/
-
-/*
- * Block number.
- */
-typedef uint64_t dm_block_t;
-struct dm_block;
-
-dm_block_t dm_block_location(struct dm_block *b);
-void *dm_block_data(struct dm_block *b);
-
-/*----------------------------------------------------------------*/
-
-/*
- * @name should be a unique identifier for the block manager, no longer
- * than 32 chars.
- *
- * @max_held_per_thread should be the maximum number of locks, read or
- * write, that an individual thread holds at any one time.
- */
-struct dm_block_manager;
-struct dm_block_manager *dm_block_manager_create(
-	struct block_device *bdev, unsigned block_size,
-	unsigned cache_size, unsigned max_held_per_thread);
-void dm_block_manager_destroy(struct dm_block_manager *bm);
-
-unsigned dm_bm_block_size(struct dm_block_manager *bm);
-dm_block_t dm_bm_nr_blocks(struct dm_block_manager *bm);
-
-/*----------------------------------------------------------------*/
-
-/*
- * The validator allows the caller to verify newly-read data and modify
- * the data just before writing, e.g. to calculate checksums.  It's
- * important to be consistent with your use of validators.  The only time
- * you can change validators is if you call dm_bm_write_lock_zero.
- */
-struct dm_block_validator {
-	const char *name;
-	void (*prepare_for_write)(struct dm_block_validator *v, struct dm_block *b, size_t block_size);
-
-	/*
-	 * Return 0 if the checksum is valid or < 0 on error.
-	 */
-	int (*check)(struct dm_block_validator *v, struct dm_block *b, size_t block_size);
-};
-
-/*----------------------------------------------------------------*/
-
-/*
- * You can have multiple concurrent readers or a single writer holding a
- * block lock.
- */
-
-/*
- * dm_bm_lock() locks a block and returns through @result a pointer to
- * memory that holds a copy of that block.  If you have write-locked the
- * block then any changes you make to memory pointed to by @result will be
- * written back to the disk sometime after dm_bm_unlock is called.
- */
-int dm_bm_read_lock(struct dm_block_manager *bm, dm_block_t b,
-		    struct dm_block_validator *v,
-		    struct dm_block **result);
-
-int dm_bm_write_lock(struct dm_block_manager *bm, dm_block_t b,
-		     struct dm_block_validator *v,
-		     struct dm_block **result);
-
-/*
- * The *_try_lock variants return -EWOULDBLOCK if the block isn't
- * available immediately.
- */
-int dm_bm_read_try_lock(struct dm_block_manager *bm, dm_block_t b,
-			struct dm_block_validator *v,
-			struct dm_block **result);
-
-/*
- * Use dm_bm_write_lock_zero() when you know you're going to
- * overwrite the block completely.  It saves a disk read.
- */
-int dm_bm_write_lock_zero(struct dm_block_manager *bm, dm_block_t b,
-			  struct dm_block_validator *v,
-			  struct dm_block **result);
-
-int dm_bm_unlock(struct dm_block *b);
-
-/*
- * An optimisation; we often want to copy a block's contents to a new
- * block.  eg, as part of the shadowing operation.  It's far better for
- * bufio to do this move behind the scenes than hold 2 locks and memcpy the
- * data.
- */
-int dm_bm_unlock_move(struct dm_block *b, dm_block_t n);
-
-/*
- * It's a common idiom to have a superblock that should be committed last.
- *
- * @superblock should be write-locked on entry. It will be unlocked during
- * this function.  All dirty blocks are guaranteed to be written and flushed
- * before the superblock.
- *
- * This method always blocks.
- */
-int dm_bm_flush_and_unlock(struct dm_block_manager *bm,
-			   struct dm_block *superblock);
-
-u32 dm_bm_checksum(const void *data, size_t len, u32 init_xor);
-
-/*----------------------------------------------------------------*/
-
-#endif	/* _LINUX_DM_BLOCK_MANAGER_H */
diff --git a/ANDROID_3.4.5/drivers/md/persistent-data/dm-btree-internal.h b/ANDROID_3.4.5/drivers/md/persistent-data/dm-btree-internal.h
deleted file mode 100644
index 5709bfea..00000000
--- a/ANDROID_3.4.5/drivers/md/persistent-data/dm-btree-internal.h
+++ /dev/null
@@ -1,134 +0,0 @@
-/*
- * Copyright (C) 2011 Red Hat, Inc.
- *
- * This file is released under the GPL.
- */
-
-#ifndef DM_BTREE_INTERNAL_H
-#define DM_BTREE_INTERNAL_H
-
-#include "dm-btree.h"
-
-/*----------------------------------------------------------------*/
-
-/*
- * We'll need 2 accessor functions for n->csum and n->blocknr
- * to support dm-btree-spine.c in that case.
- */
-
-enum node_flags {
-	INTERNAL_NODE = 1,
-	LEAF_NODE = 1 << 1
-};
-
-/*
- * Every btree node begins with this structure.  Make sure it's a multiple
- * of 8-bytes in size, otherwise the 64bit keys will be mis-aligned.
- */
-struct node_header {
-	__le32 csum;
-	__le32 flags;
-	__le64 blocknr; /* Block this node is supposed to live in. */
-
-	__le32 nr_entries;
-	__le32 max_entries;
-	__le32 value_size;
-	__le32 padding;
-} __packed;
-
-struct node {
-	struct node_header header;
-	__le64 keys[0];
-} __packed;
-
-
-void inc_children(struct dm_transaction_manager *tm, struct node *n,
-		  struct dm_btree_value_type *vt);
-
-int new_block(struct dm_btree_info *info, struct dm_block **result);
-int unlock_block(struct dm_btree_info *info, struct dm_block *b);
-
-/*
- * Spines keep track of the rolling locks.  There are 2 variants, read-only
- * and one that uses shadowing.  These are separate structs to allow the
- * type checker to spot misuse, for example accidentally calling read_lock
- * on a shadow spine.
- */
-struct ro_spine {
-	struct dm_btree_info *info;
-
-	int count;
-	struct dm_block *nodes[2];
-};
-
-void init_ro_spine(struct ro_spine *s, struct dm_btree_info *info);
-int exit_ro_spine(struct ro_spine *s);
-int ro_step(struct ro_spine *s, dm_block_t new_child);
-struct node *ro_node(struct ro_spine *s);
-
-struct shadow_spine {
-	struct dm_btree_info *info;
-
-	int count;
-	struct dm_block *nodes[2];
-
-	dm_block_t root;
-};
-
-void init_shadow_spine(struct shadow_spine *s, struct dm_btree_info *info);
-int exit_shadow_spine(struct shadow_spine *s);
-
-int shadow_step(struct shadow_spine *s, dm_block_t b,
-		struct dm_btree_value_type *vt);
-
-/*
- * The spine must have at least one entry before calling this.
- */
-struct dm_block *shadow_current(struct shadow_spine *s);
-
-/*
- * The spine must have at least two entries before calling this.
- */
-struct dm_block *shadow_parent(struct shadow_spine *s);
-
-int shadow_has_parent(struct shadow_spine *s);
-
-int shadow_root(struct shadow_spine *s);
-
-/*
- * Some inlines.
- */
-static inline __le64 *key_ptr(struct node *n, uint32_t index)
-{
-	return n->keys + index;
-}
-
-static inline void *value_base(struct node *n)
-{
-	return &n->keys[le32_to_cpu(n->header.max_entries)];
-}
-
-static inline void *value_ptr(struct node *n, uint32_t index)
-{
-	uint32_t value_size = le32_to_cpu(n->header.value_size);
-	return value_base(n) + (value_size * index);
-}
-
-/*
- * Assumes the values are suitably-aligned and converts to core format.
- */
-static inline uint64_t value64(struct node *n, uint32_t index)
-{
-	__le64 *values_le = value_base(n);
-
-	return le64_to_cpu(values_le[index]);
-}
-
-/*
- * Searching for a key within a single node.
- */
-int lower_bound(struct node *n, uint64_t key);
-
-extern struct dm_block_validator btree_node_validator;
-
-#endif	/* DM_BTREE_INTERNAL_H */
diff --git a/ANDROID_3.4.5/drivers/md/persistent-data/dm-btree-remove.c b/ANDROID_3.4.5/drivers/md/persistent-data/dm-btree-remove.c
deleted file mode 100644
index aa71e235..00000000
--- a/ANDROID_3.4.5/drivers/md/persistent-data/dm-btree-remove.c
+++ /dev/null
@@ -1,590 +0,0 @@
-/*
- * Copyright (C) 2011 Red Hat, Inc.
- *
- * This file is released under the GPL.
- */
-
-#include "dm-btree.h"
-#include "dm-btree-internal.h"
-#include "dm-transaction-manager.h"
-
-#include <linux/export.h>
-
-/*
- * Removing an entry from a btree
- * ==============================
- *
- * A very important constraint for our btree is that no node, except the
- * root, may have fewer than a certain number of entries.
- * (MIN_ENTRIES <= nr_entries <= MAX_ENTRIES).
- *
- * Ensuring this is complicated by the way we want to only ever hold the
- * locks on 2 nodes concurrently, and only change nodes in a top to bottom
- * fashion.
- *
- * Each node may have a left or right sibling.  When decending the spine,
- * if a node contains only MIN_ENTRIES then we try and increase this to at
- * least MIN_ENTRIES + 1.  We do this in the following ways:
- *
- * [A] No siblings => this can only happen if the node is the root, in which
- *     case we copy the childs contents over the root.
- *
- * [B] No left sibling
- *     ==> rebalance(node, right sibling)
- *
- * [C] No right sibling
- *     ==> rebalance(left sibling, node)
- *
- * [D] Both siblings, total_entries(left, node, right) <= DEL_THRESHOLD
- *     ==> delete node adding it's contents to left and right
- *
- * [E] Both siblings, total_entries(left, node, right) > DEL_THRESHOLD
- *     ==> rebalance(left, node, right)
- *
- * After these operations it's possible that the our original node no
- * longer contains the desired sub tree.  For this reason this rebalancing
- * is performed on the children of the current node.  This also avoids
- * having a special case for the root.
- *
- * Once this rebalancing has occurred we can then step into the child node
- * for internal nodes.  Or delete the entry for leaf nodes.
- */
-
-/*
- * Some little utilities for moving node data around.
- */
-static void node_shift(struct node *n, int shift)
-{
-	uint32_t nr_entries = le32_to_cpu(n->header.nr_entries);
-	uint32_t value_size = le32_to_cpu(n->header.value_size);
-
-	if (shift < 0) {
-		shift = -shift;
-		BUG_ON(shift > nr_entries);
-		BUG_ON((void *) key_ptr(n, shift) >= value_ptr(n, shift));
-		memmove(key_ptr(n, 0),
-			key_ptr(n, shift),
-			(nr_entries - shift) * sizeof(__le64));
-		memmove(value_ptr(n, 0),
-			value_ptr(n, shift),
-			(nr_entries - shift) * value_size);
-	} else {
-		BUG_ON(nr_entries + shift > le32_to_cpu(n->header.max_entries));
-		memmove(key_ptr(n, shift),
-			key_ptr(n, 0),
-			nr_entries * sizeof(__le64));
-		memmove(value_ptr(n, shift),
-			value_ptr(n, 0),
-			nr_entries * value_size);
-	}
-}
-
-static void node_copy(struct node *left, struct node *right, int shift)
-{
-	uint32_t nr_left = le32_to_cpu(left->header.nr_entries);
-	uint32_t value_size = le32_to_cpu(left->header.value_size);
-	BUG_ON(value_size != le32_to_cpu(right->header.value_size));
-
-	if (shift < 0) {
-		shift = -shift;
-		BUG_ON(nr_left + shift > le32_to_cpu(left->header.max_entries));
-		memcpy(key_ptr(left, nr_left),
-		       key_ptr(right, 0),
-		       shift * sizeof(__le64));
-		memcpy(value_ptr(left, nr_left),
-		       value_ptr(right, 0),
-		       shift * value_size);
-	} else {
-		BUG_ON(shift > le32_to_cpu(right->header.max_entries));
-		memcpy(key_ptr(right, 0),
-		       key_ptr(left, nr_left - shift),
-		       shift * sizeof(__le64));
-		memcpy(value_ptr(right, 0),
-		       value_ptr(left, nr_left - shift),
-		       shift * value_size);
-	}
-}
-
-/*
- * Delete a specific entry from a leaf node.
- */
-static void delete_at(struct node *n, unsigned index)
-{
-	unsigned nr_entries = le32_to_cpu(n->header.nr_entries);
-	unsigned nr_to_copy = nr_entries - (index + 1);
-	uint32_t value_size = le32_to_cpu(n->header.value_size);
-	BUG_ON(index >= nr_entries);
-
-	if (nr_to_copy) {
-		memmove(key_ptr(n, index),
-			key_ptr(n, index + 1),
-			nr_to_copy * sizeof(__le64));
-
-		memmove(value_ptr(n, index),
-			value_ptr(n, index + 1),
-			nr_to_copy * value_size);
-	}
-
-	n->header.nr_entries = cpu_to_le32(nr_entries - 1);
-}
-
-static unsigned merge_threshold(struct node *n)
-{
-	return le32_to_cpu(n->header.max_entries) / 3;
-}
-
-struct child {
-	unsigned index;
-	struct dm_block *block;
-	struct node *n;
-};
-
-static struct dm_btree_value_type le64_type = {
-	.context = NULL,
-	.size = sizeof(__le64),
-	.inc = NULL,
-	.dec = NULL,
-	.equal = NULL
-};
-
-static int init_child(struct dm_btree_info *info, struct node *parent,
-		      unsigned index, struct child *result)
-{
-	int r, inc;
-	dm_block_t root;
-
-	result->index = index;
-	root = value64(parent, index);
-
-	r = dm_tm_shadow_block(info->tm, root, &btree_node_validator,
-			       &result->block, &inc);
-	if (r)
-		return r;
-
-	result->n = dm_block_data(result->block);
-
-	if (inc)
-		inc_children(info->tm, result->n, &le64_type);
-
-	*((__le64 *) value_ptr(parent, index)) =
-		cpu_to_le64(dm_block_location(result->block));
-
-	return 0;
-}
-
-static int exit_child(struct dm_btree_info *info, struct child *c)
-{
-	return dm_tm_unlock(info->tm, c->block);
-}
-
-static void shift(struct node *left, struct node *right, int count)
-{
-	uint32_t nr_left = le32_to_cpu(left->header.nr_entries);
-	uint32_t nr_right = le32_to_cpu(right->header.nr_entries);
-	uint32_t max_entries = le32_to_cpu(left->header.max_entries);
-	uint32_t r_max_entries = le32_to_cpu(right->header.max_entries);
-
-	BUG_ON(max_entries != r_max_entries);
-	BUG_ON(nr_left - count > max_entries);
-	BUG_ON(nr_right + count > max_entries);
-
-	if (!count)
-		return;
-
-	if (count > 0) {
-		node_shift(right, count);
-		node_copy(left, right, count);
-	} else {
-		node_copy(left, right, count);
-		node_shift(right, count);
-	}
-
-	left->header.nr_entries = cpu_to_le32(nr_left - count);
-	right->header.nr_entries = cpu_to_le32(nr_right + count);
-}
-
-static void __rebalance2(struct dm_btree_info *info, struct node *parent,
-			 struct child *l, struct child *r)
-{
-	struct node *left = l->n;
-	struct node *right = r->n;
-	uint32_t nr_left = le32_to_cpu(left->header.nr_entries);
-	uint32_t nr_right = le32_to_cpu(right->header.nr_entries);
-	unsigned threshold = 2 * merge_threshold(left) + 1;
-
-	if (nr_left + nr_right < threshold) {
-		/*
-		 * Merge
-		 */
-		node_copy(left, right, -nr_right);
-		left->header.nr_entries = cpu_to_le32(nr_left + nr_right);
-		delete_at(parent, r->index);
-
-		/*
-		 * We need to decrement the right block, but not it's
-		 * children, since they're still referenced by left.
-		 */
-		dm_tm_dec(info->tm, dm_block_location(r->block));
-	} else {
-		/*
-		 * Rebalance.
-		 */
-		unsigned target_left = (nr_left + nr_right) / 2;
-		shift(left, right, nr_left - target_left);
-		*key_ptr(parent, r->index) = right->keys[0];
-	}
-}
-
-static int rebalance2(struct shadow_spine *s, struct dm_btree_info *info,
-		      unsigned left_index)
-{
-	int r;
-	struct node *parent;
-	struct child left, right;
-
-	parent = dm_block_data(shadow_current(s));
-
-	r = init_child(info, parent, left_index, &left);
-	if (r)
-		return r;
-
-	r = init_child(info, parent, left_index + 1, &right);
-	if (r) {
-		exit_child(info, &left);
-		return r;
-	}
-
-	__rebalance2(info, parent, &left, &right);
-
-	r = exit_child(info, &left);
-	if (r) {
-		exit_child(info, &right);
-		return r;
-	}
-
-	return exit_child(info, &right);
-}
-
-/*
- * We dump as many entries from center as possible into left, then the rest
- * in right, then rebalance2.  This wastes some cpu, but I want something
- * simple atm.
- */
-static void delete_center_node(struct dm_btree_info *info, struct node *parent,
-			       struct child *l, struct child *c, struct child *r,
-			       struct node *left, struct node *center, struct node *right,
-			       uint32_t nr_left, uint32_t nr_center, uint32_t nr_right)
-{
-	uint32_t max_entries = le32_to_cpu(left->header.max_entries);
-	unsigned shift = min(max_entries - nr_left, nr_center);
-
-	BUG_ON(nr_left + shift > max_entries);
-	node_copy(left, center, -shift);
-	left->header.nr_entries = cpu_to_le32(nr_left + shift);
-
-	if (shift != nr_center) {
-		shift = nr_center - shift;
-		BUG_ON((nr_right + shift) > max_entries);
-		node_shift(right, shift);
-		node_copy(center, right, shift);
-		right->header.nr_entries = cpu_to_le32(nr_right + shift);
-	}
-	*key_ptr(parent, r->index) = right->keys[0];
-
-	delete_at(parent, c->index);
-	r->index--;
-
-	dm_tm_dec(info->tm, dm_block_location(c->block));
-	__rebalance2(info, parent, l, r);
-}
-
-/*
- * Redistributes entries among 3 sibling nodes.
- */
-static void redistribute3(struct dm_btree_info *info, struct node *parent,
-			  struct child *l, struct child *c, struct child *r,
-			  struct node *left, struct node *center, struct node *right,
-			  uint32_t nr_left, uint32_t nr_center, uint32_t nr_right)
-{
-	int s;
-	uint32_t max_entries = le32_to_cpu(left->header.max_entries);
-	unsigned target = (nr_left + nr_center + nr_right) / 3;
-	BUG_ON(target > max_entries);
-
-	if (nr_left < nr_right) {
-		s = nr_left - target;
-
-		if (s < 0 && nr_center < -s) {
-			/* not enough in central node */
-			shift(left, center, nr_center);
-			s = nr_center - target;
-			shift(left, right, s);
-			nr_right += s;
-		} else
-			shift(left, center, s);
-
-		shift(center, right, target - nr_right);
-
-	} else {
-		s = target - nr_right;
-		if (s > 0 && nr_center < s) {
-			/* not enough in central node */
-			shift(center, right, nr_center);
-			s = target - nr_center;
-			shift(left, right, s);
-			nr_left -= s;
-		} else
-			shift(center, right, s);
-
-		shift(left, center, nr_left - target);
-	}
-
-	*key_ptr(parent, c->index) = center->keys[0];
-	*key_ptr(parent, r->index) = right->keys[0];
-}
-
-static void __rebalance3(struct dm_btree_info *info, struct node *parent,
-			 struct child *l, struct child *c, struct child *r)
-{
-	struct node *left = l->n;
-	struct node *center = c->n;
-	struct node *right = r->n;
-
-	uint32_t nr_left = le32_to_cpu(left->header.nr_entries);
-	uint32_t nr_center = le32_to_cpu(center->header.nr_entries);
-	uint32_t nr_right = le32_to_cpu(right->header.nr_entries);
-
-	unsigned threshold = merge_threshold(left) * 4 + 1;
-
-	BUG_ON(left->header.max_entries != center->header.max_entries);
-	BUG_ON(center->header.max_entries != right->header.max_entries);
-
-	if ((nr_left + nr_center + nr_right) < threshold)
-		delete_center_node(info, parent, l, c, r, left, center, right,
-				   nr_left, nr_center, nr_right);
-	else
-		redistribute3(info, parent, l, c, r, left, center, right,
-			      nr_left, nr_center, nr_right);
-}
-
-static int rebalance3(struct shadow_spine *s, struct dm_btree_info *info,
-		      unsigned left_index)
-{
-	int r;
-	struct node *parent = dm_block_data(shadow_current(s));
-	struct child left, center, right;
-
-	/*
-	 * FIXME: fill out an array?
-	 */
-	r = init_child(info, parent, left_index, &left);
-	if (r)
-		return r;
-
-	r = init_child(info, parent, left_index + 1, &center);
-	if (r) {
-		exit_child(info, &left);
-		return r;
-	}
-
-	r = init_child(info, parent, left_index + 2, &right);
-	if (r) {
-		exit_child(info, &left);
-		exit_child(info, &center);
-		return r;
-	}
-
-	__rebalance3(info, parent, &left, &center, &right);
-
-	r = exit_child(info, &left);
-	if (r) {
-		exit_child(info, &center);
-		exit_child(info, &right);
-		return r;
-	}
-
-	r = exit_child(info, &center);
-	if (r) {
-		exit_child(info, &right);
-		return r;
-	}
-
-	r = exit_child(info, &right);
-	if (r)
-		return r;
-
-	return 0;
-}
-
-static int get_nr_entries(struct dm_transaction_manager *tm,
-			  dm_block_t b, uint32_t *result)
-{
-	int r;
-	struct dm_block *block;
-	struct node *n;
-
-	r = dm_tm_read_lock(tm, b, &btree_node_validator, &block);
-	if (r)
-		return r;
-
-	n = dm_block_data(block);
-	*result = le32_to_cpu(n->header.nr_entries);
-
-	return dm_tm_unlock(tm, block);
-}
-
-static int rebalance_children(struct shadow_spine *s,
-			      struct dm_btree_info *info, uint64_t key)
-{
-	int i, r, has_left_sibling, has_right_sibling;
-	uint32_t child_entries;
-	struct node *n;
-
-	n = dm_block_data(shadow_current(s));
-
-	if (le32_to_cpu(n->header.nr_entries) == 1) {
-		struct dm_block *child;
-		dm_block_t b = value64(n, 0);
-
-		r = dm_tm_read_lock(info->tm, b, &btree_node_validator, &child);
-		if (r)
-			return r;
-
-		memcpy(n, dm_block_data(child),
-		       dm_bm_block_size(dm_tm_get_bm(info->tm)));
-		r = dm_tm_unlock(info->tm, child);
-		if (r)
-			return r;
-
-		dm_tm_dec(info->tm, dm_block_location(child));
-		return 0;
-	}
-
-	i = lower_bound(n, key);
-	if (i < 0)
-		return -ENODATA;
-
-	r = get_nr_entries(info->tm, value64(n, i), &child_entries);
-	if (r)
-		return r;
-
-	has_left_sibling = i > 0;
-	has_right_sibling = i < (le32_to_cpu(n->header.nr_entries) - 1);
-
-	if (!has_left_sibling)
-		r = rebalance2(s, info, i);
-
-	else if (!has_right_sibling)
-		r = rebalance2(s, info, i - 1);
-
-	else
-		r = rebalance3(s, info, i - 1);
-
-	return r;
-}
-
-static int do_leaf(struct node *n, uint64_t key, unsigned *index)
-{
-	int i = lower_bound(n, key);
-
-	if ((i < 0) ||
-	    (i >= le32_to_cpu(n->header.nr_entries)) ||
-	    (le64_to_cpu(n->keys[i]) != key))
-		return -ENODATA;
-
-	*index = i;
-
-	return 0;
-}
-
-/*
- * Prepares for removal from one level of the hierarchy.  The caller must
- * call delete_at() to remove the entry at index.
- */
-static int remove_raw(struct shadow_spine *s, struct dm_btree_info *info,
-		      struct dm_btree_value_type *vt, dm_block_t root,
-		      uint64_t key, unsigned *index)
-{
-	int i = *index, r;
-	struct node *n;
-
-	for (;;) {
-		r = shadow_step(s, root, vt);
-		if (r < 0)
-			break;
-
-		/*
-		 * We have to patch up the parent node, ugly, but I don't
-		 * see a way to do this automatically as part of the spine
-		 * op.
-		 */
-		if (shadow_has_parent(s)) {
-			__le64 location = cpu_to_le64(dm_block_location(shadow_current(s)));
-			memcpy(value_ptr(dm_block_data(shadow_parent(s)), i),
-			       &location, sizeof(__le64));
-		}
-
-		n = dm_block_data(shadow_current(s));
-
-		if (le32_to_cpu(n->header.flags) & LEAF_NODE)
-			return do_leaf(n, key, index);
-
-		r = rebalance_children(s, info, key);
-		if (r)
-			break;
-
-		n = dm_block_data(shadow_current(s));
-		if (le32_to_cpu(n->header.flags) & LEAF_NODE)
-			return do_leaf(n, key, index);
-
-		i = lower_bound(n, key);
-
-		/*
-		 * We know the key is present, or else
-		 * rebalance_children would have returned
-		 * -ENODATA
-		 */
-		root = value64(n, i);
-	}
-
-	return r;
-}
-
-int dm_btree_remove(struct dm_btree_info *info, dm_block_t root,
-		    uint64_t *keys, dm_block_t *new_root)
-{
-	unsigned level, last_level = info->levels - 1;
-	int index = 0, r = 0;
-	struct shadow_spine spine;
-	struct node *n;
-
-	init_shadow_spine(&spine, info);
-	for (level = 0; level < info->levels; level++) {
-		r = remove_raw(&spine, info,
-			       (level == last_level ?
-				&info->value_type : &le64_type),
-			       root, keys[level], (unsigned *)&index);
-		if (r < 0)
-			break;
-
-		n = dm_block_data(shadow_current(&spine));
-		if (level != last_level) {
-			root = value64(n, index);
-			continue;
-		}
-
-		BUG_ON(index < 0 || index >= le32_to_cpu(n->header.nr_entries));
-
-		if (info->value_type.dec)
-			info->value_type.dec(info->value_type.context,
-					     value_ptr(n, index));
-
-		delete_at(n, index);
-	}
-
-	*new_root = shadow_root(&spine);
-	exit_shadow_spine(&spine);
-
-	return r;
-}
-EXPORT_SYMBOL_GPL(dm_btree_remove);
diff --git a/ANDROID_3.4.5/drivers/md/persistent-data/dm-btree-spine.c b/ANDROID_3.4.5/drivers/md/persistent-data/dm-btree-spine.c
deleted file mode 100644
index d9a7912e..00000000
--- a/ANDROID_3.4.5/drivers/md/persistent-data/dm-btree-spine.c
+++ /dev/null
@@ -1,244 +0,0 @@
-/*
- * Copyright (C) 2011 Red Hat, Inc.
- *
- * This file is released under the GPL.
- */
-
-#include "dm-btree-internal.h"
-#include "dm-transaction-manager.h"
-
-#include <linux/device-mapper.h>
-
-#define DM_MSG_PREFIX "btree spine"
-
-/*----------------------------------------------------------------*/
-
-#define BTREE_CSUM_XOR 121107
-
-static int node_check(struct dm_block_validator *v,
-		      struct dm_block *b,
-		      size_t block_size);
-
-static void node_prepare_for_write(struct dm_block_validator *v,
-				   struct dm_block *b,
-				   size_t block_size)
-{
-	struct node *n = dm_block_data(b);
-	struct node_header *h = &n->header;
-
-	h->blocknr = cpu_to_le64(dm_block_location(b));
-	h->csum = cpu_to_le32(dm_bm_checksum(&h->flags,
-					     block_size - sizeof(__le32),
-					     BTREE_CSUM_XOR));
-
-	BUG_ON(node_check(v, b, 4096));
-}
-
-static int node_check(struct dm_block_validator *v,
-		      struct dm_block *b,
-		      size_t block_size)
-{
-	struct node *n = dm_block_data(b);
-	struct node_header *h = &n->header;
-	size_t value_size;
-	__le32 csum_disk;
-	uint32_t flags;
-
-	if (dm_block_location(b) != le64_to_cpu(h->blocknr)) {
-		DMERR("node_check failed blocknr %llu wanted %llu",
-		      le64_to_cpu(h->blocknr), dm_block_location(b));
-		return -ENOTBLK;
-	}
-
-	csum_disk = cpu_to_le32(dm_bm_checksum(&h->flags,
-					       block_size - sizeof(__le32),
-					       BTREE_CSUM_XOR));
-	if (csum_disk != h->csum) {
-		DMERR("node_check failed csum %u wanted %u",
-		      le32_to_cpu(csum_disk), le32_to_cpu(h->csum));
-		return -EILSEQ;
-	}
-
-	value_size = le32_to_cpu(h->value_size);
-
-	if (sizeof(struct node_header) +
-	    (sizeof(__le64) + value_size) * le32_to_cpu(h->max_entries) > block_size) {
-		DMERR("node_check failed: max_entries too large");
-		return -EILSEQ;
-	}
-
-	if (le32_to_cpu(h->nr_entries) > le32_to_cpu(h->max_entries)) {
-		DMERR("node_check failed, too many entries");
-		return -EILSEQ;
-	}
-
-	/*
-	 * The node must be either INTERNAL or LEAF.
-	 */
-	flags = le32_to_cpu(h->flags);
-	if (!(flags & INTERNAL_NODE) && !(flags & LEAF_NODE)) {
-		DMERR("node_check failed, node is neither INTERNAL or LEAF");
-		return -EILSEQ;
-	}
-
-	return 0;
-}
-
-struct dm_block_validator btree_node_validator = {
-	.name = "btree_node",
-	.prepare_for_write = node_prepare_for_write,
-	.check = node_check
-};
-
-/*----------------------------------------------------------------*/
-
-static int bn_read_lock(struct dm_btree_info *info, dm_block_t b,
-		 struct dm_block **result)
-{
-	return dm_tm_read_lock(info->tm, b, &btree_node_validator, result);
-}
-
-static int bn_shadow(struct dm_btree_info *info, dm_block_t orig,
-	      struct dm_btree_value_type *vt,
-	      struct dm_block **result)
-{
-	int r, inc;
-
-	r = dm_tm_shadow_block(info->tm, orig, &btree_node_validator,
-			       result, &inc);
-	if (!r && inc)
-		inc_children(info->tm, dm_block_data(*result), vt);
-
-	return r;
-}
-
-int new_block(struct dm_btree_info *info, struct dm_block **result)
-{
-	return dm_tm_new_block(info->tm, &btree_node_validator, result);
-}
-
-int unlock_block(struct dm_btree_info *info, struct dm_block *b)
-{
-	return dm_tm_unlock(info->tm, b);
-}
-
-/*----------------------------------------------------------------*/
-
-void init_ro_spine(struct ro_spine *s, struct dm_btree_info *info)
-{
-	s->info = info;
-	s->count = 0;
-	s->nodes[0] = NULL;
-	s->nodes[1] = NULL;
-}
-
-int exit_ro_spine(struct ro_spine *s)
-{
-	int r = 0, i;
-
-	for (i = 0; i < s->count; i++) {
-		int r2 = unlock_block(s->info, s->nodes[i]);
-		if (r2 < 0)
-			r = r2;
-	}
-
-	return r;
-}
-
-int ro_step(struct ro_spine *s, dm_block_t new_child)
-{
-	int r;
-
-	if (s->count == 2) {
-		r = unlock_block(s->info, s->nodes[0]);
-		if (r < 0)
-			return r;
-		s->nodes[0] = s->nodes[1];
-		s->count--;
-	}
-
-	r = bn_read_lock(s->info, new_child, s->nodes + s->count);
-	if (!r)
-		s->count++;
-
-	return r;
-}
-
-struct node *ro_node(struct ro_spine *s)
-{
-	struct dm_block *block;
-
-	BUG_ON(!s->count);
-	block = s->nodes[s->count - 1];
-
-	return dm_block_data(block);
-}
-
-/*----------------------------------------------------------------*/
-
-void init_shadow_spine(struct shadow_spine *s, struct dm_btree_info *info)
-{
-	s->info = info;
-	s->count = 0;
-}
-
-int exit_shadow_spine(struct shadow_spine *s)
-{
-	int r = 0, i;
-
-	for (i = 0; i < s->count; i++) {
-		int r2 = unlock_block(s->info, s->nodes[i]);
-		if (r2 < 0)
-			r = r2;
-	}
-
-	return r;
-}
-
-int shadow_step(struct shadow_spine *s, dm_block_t b,
-		struct dm_btree_value_type *vt)
-{
-	int r;
-
-	if (s->count == 2) {
-		r = unlock_block(s->info, s->nodes[0]);
-		if (r < 0)
-			return r;
-		s->nodes[0] = s->nodes[1];
-		s->count--;
-	}
-
-	r = bn_shadow(s->info, b, vt, s->nodes + s->count);
-	if (!r) {
-		if (!s->count)
-			s->root = dm_block_location(s->nodes[0]);
-
-		s->count++;
-	}
-
-	return r;
-}
-
-struct dm_block *shadow_current(struct shadow_spine *s)
-{
-	BUG_ON(!s->count);
-
-	return s->nodes[s->count - 1];
-}
-
-struct dm_block *shadow_parent(struct shadow_spine *s)
-{
-	BUG_ON(s->count != 2);
-
-	return s->count == 2 ? s->nodes[0] : NULL;
-}
-
-int shadow_has_parent(struct shadow_spine *s)
-{
-	return s->count >= 2;
-}
-
-int shadow_root(struct shadow_spine *s)
-{
-	return s->root;
-}
diff --git a/ANDROID_3.4.5/drivers/md/persistent-data/dm-btree.c b/ANDROID_3.4.5/drivers/md/persistent-data/dm-btree.c
deleted file mode 100644
index d12b2cc5..00000000
--- a/ANDROID_3.4.5/drivers/md/persistent-data/dm-btree.c
+++ /dev/null
@@ -1,804 +0,0 @@
-/*
- * Copyright (C) 2011 Red Hat, Inc.
- *
- * This file is released under the GPL.
- */
-
-#include "dm-btree-internal.h"
-#include "dm-space-map.h"
-#include "dm-transaction-manager.h"
-
-#include <linux/export.h>
-#include <linux/device-mapper.h>
-
-#define DM_MSG_PREFIX "btree"
-
-/*----------------------------------------------------------------
- * Array manipulation
- *--------------------------------------------------------------*/
-static void memcpy_disk(void *dest, const void *src, size_t len)
-	__dm_written_to_disk(src)
-{
-	memcpy(dest, src, len);
-	__dm_unbless_for_disk(src);
-}
-
-static void array_insert(void *base, size_t elt_size, unsigned nr_elts,
-			 unsigned index, void *elt)
-	__dm_written_to_disk(elt)
-{
-	if (index < nr_elts)
-		memmove(base + (elt_size * (index + 1)),
-			base + (elt_size * index),
-			(nr_elts - index) * elt_size);
-
-	memcpy_disk(base + (elt_size * index), elt, elt_size);
-}
-
-/*----------------------------------------------------------------*/
-
-/* makes the assumption that no two keys are the same. */
-static int bsearch(struct node *n, uint64_t key, int want_hi)
-{
-	int lo = -1, hi = le32_to_cpu(n->header.nr_entries);
-
-	while (hi - lo > 1) {
-		int mid = lo + ((hi - lo) / 2);
-		uint64_t mid_key = le64_to_cpu(n->keys[mid]);
-
-		if (mid_key == key)
-			return mid;
-
-		if (mid_key < key)
-			lo = mid;
-		else
-			hi = mid;
-	}
-
-	return want_hi ? hi : lo;
-}
-
-int lower_bound(struct node *n, uint64_t key)
-{
-	return bsearch(n, key, 0);
-}
-
-void inc_children(struct dm_transaction_manager *tm, struct node *n,
-		  struct dm_btree_value_type *vt)
-{
-	unsigned i;
-	uint32_t nr_entries = le32_to_cpu(n->header.nr_entries);
-
-	if (le32_to_cpu(n->header.flags) & INTERNAL_NODE)
-		for (i = 0; i < nr_entries; i++)
-			dm_tm_inc(tm, value64(n, i));
-	else if (vt->inc)
-		for (i = 0; i < nr_entries; i++)
-			vt->inc(vt->context, value_ptr(n, i));
-}
-
-static int insert_at(size_t value_size, struct node *node, unsigned index,
-		      uint64_t key, void *value)
-		      __dm_written_to_disk(value)
-{
-	uint32_t nr_entries = le32_to_cpu(node->header.nr_entries);
-	__le64 key_le = cpu_to_le64(key);
-
-	if (index > nr_entries ||
-	    index >= le32_to_cpu(node->header.max_entries)) {
-		DMERR("too many entries in btree node for insert");
-		__dm_unbless_for_disk(value);
-		return -ENOMEM;
-	}
-
-	__dm_bless_for_disk(&key_le);
-
-	array_insert(node->keys, sizeof(*node->keys), nr_entries, index, &key_le);
-	array_insert(value_base(node), value_size, nr_entries, index, value);
-	node->header.nr_entries = cpu_to_le32(nr_entries + 1);
-
-	return 0;
-}
-
-/*----------------------------------------------------------------*/
-
-/*
- * We want 3n entries (for some n).  This works more nicely for repeated
- * insert remove loops than (2n + 1).
- */
-static uint32_t calc_max_entries(size_t value_size, size_t block_size)
-{
-	uint32_t total, n;
-	size_t elt_size = sizeof(uint64_t) + value_size; /* key + value */
-
-	block_size -= sizeof(struct node_header);
-	total = block_size / elt_size;
-	n = total / 3;		/* rounds down */
-
-	return 3 * n;
-}
-
-int dm_btree_empty(struct dm_btree_info *info, dm_block_t *root)
-{
-	int r;
-	struct dm_block *b;
-	struct node *n;
-	size_t block_size;
-	uint32_t max_entries;
-
-	r = new_block(info, &b);
-	if (r < 0)
-		return r;
-
-	block_size = dm_bm_block_size(dm_tm_get_bm(info->tm));
-	max_entries = calc_max_entries(info->value_type.size, block_size);
-
-	n = dm_block_data(b);
-	memset(n, 0, block_size);
-	n->header.flags = cpu_to_le32(LEAF_NODE);
-	n->header.nr_entries = cpu_to_le32(0);
-	n->header.max_entries = cpu_to_le32(max_entries);
-	n->header.value_size = cpu_to_le32(info->value_type.size);
-
-	*root = dm_block_location(b);
-	return unlock_block(info, b);
-}
-EXPORT_SYMBOL_GPL(dm_btree_empty);
-
-/*----------------------------------------------------------------*/
-
-/*
- * Deletion uses a recursive algorithm, since we have limited stack space
- * we explicitly manage our own stack on the heap.
- */
-#define MAX_SPINE_DEPTH 64
-struct frame {
-	struct dm_block *b;
-	struct node *n;
-	unsigned level;
-	unsigned nr_children;
-	unsigned current_child;
-};
-
-struct del_stack {
-	struct dm_transaction_manager *tm;
-	int top;
-	struct frame spine[MAX_SPINE_DEPTH];
-};
-
-static int top_frame(struct del_stack *s, struct frame **f)
-{
-	if (s->top < 0) {
-		DMERR("btree deletion stack empty");
-		return -EINVAL;
-	}
-
-	*f = s->spine + s->top;
-
-	return 0;
-}
-
-static int unprocessed_frames(struct del_stack *s)
-{
-	return s->top >= 0;
-}
-
-static int push_frame(struct del_stack *s, dm_block_t b, unsigned level)
-{
-	int r;
-	uint32_t ref_count;
-
-	if (s->top >= MAX_SPINE_DEPTH - 1) {
-		DMERR("btree deletion stack out of memory");
-		return -ENOMEM;
-	}
-
-	r = dm_tm_ref(s->tm, b, &ref_count);
-	if (r)
-		return r;
-
-	if (ref_count > 1)
-		/*
-		 * This is a shared node, so we can just decrement it's
-		 * reference counter and leave the children.
-		 */
-		dm_tm_dec(s->tm, b);
-
-	else {
-		struct frame *f = s->spine + ++s->top;
-
-		r = dm_tm_read_lock(s->tm, b, &btree_node_validator, &f->b);
-		if (r) {
-			s->top--;
-			return r;
-		}
-
-		f->n = dm_block_data(f->b);
-		f->level = level;
-		f->nr_children = le32_to_cpu(f->n->header.nr_entries);
-		f->current_child = 0;
-	}
-
-	return 0;
-}
-
-static void pop_frame(struct del_stack *s)
-{
-	struct frame *f = s->spine + s->top--;
-
-	dm_tm_dec(s->tm, dm_block_location(f->b));
-	dm_tm_unlock(s->tm, f->b);
-}
-
-int dm_btree_del(struct dm_btree_info *info, dm_block_t root)
-{
-	int r;
-	struct del_stack *s;
-
-	s = kmalloc(sizeof(*s), GFP_KERNEL);
-	if (!s)
-		return -ENOMEM;
-	s->tm = info->tm;
-	s->top = -1;
-
-	r = push_frame(s, root, 1);
-	if (r)
-		goto out;
-
-	while (unprocessed_frames(s)) {
-		uint32_t flags;
-		struct frame *f;
-		dm_block_t b;
-
-		r = top_frame(s, &f);
-		if (r)
-			goto out;
-
-		if (f->current_child >= f->nr_children) {
-			pop_frame(s);
-			continue;
-		}
-
-		flags = le32_to_cpu(f->n->header.flags);
-		if (flags & INTERNAL_NODE) {
-			b = value64(f->n, f->current_child);
-			f->current_child++;
-			r = push_frame(s, b, f->level);
-			if (r)
-				goto out;
-
-		} else if (f->level != (info->levels - 1)) {
-			b = value64(f->n, f->current_child);
-			f->current_child++;
-			r = push_frame(s, b, f->level + 1);
-			if (r)
-				goto out;
-
-		} else {
-			if (info->value_type.dec) {
-				unsigned i;
-
-				for (i = 0; i < f->nr_children; i++)
-					info->value_type.dec(info->value_type.context,
-							     value_ptr(f->n, i));
-			}
-			f->current_child = f->nr_children;
-		}
-	}
-
-out:
-	kfree(s);
-	return r;
-}
-EXPORT_SYMBOL_GPL(dm_btree_del);
-
-/*----------------------------------------------------------------*/
-
-static int btree_lookup_raw(struct ro_spine *s, dm_block_t block, uint64_t key,
-			    int (*search_fn)(struct node *, uint64_t),
-			    uint64_t *result_key, void *v, size_t value_size)
-{
-	int i, r;
-	uint32_t flags, nr_entries;
-
-	do {
-		r = ro_step(s, block);
-		if (r < 0)
-			return r;
-
-		i = search_fn(ro_node(s), key);
-
-		flags = le32_to_cpu(ro_node(s)->header.flags);
-		nr_entries = le32_to_cpu(ro_node(s)->header.nr_entries);
-		if (i < 0 || i >= nr_entries)
-			return -ENODATA;
-
-		if (flags & INTERNAL_NODE)
-			block = value64(ro_node(s), i);
-
-	} while (!(flags & LEAF_NODE));
-
-	*result_key = le64_to_cpu(ro_node(s)->keys[i]);
-	memcpy(v, value_ptr(ro_node(s), i), value_size);
-
-	return 0;
-}
-
-int dm_btree_lookup(struct dm_btree_info *info, dm_block_t root,
-		    uint64_t *keys, void *value_le)
-{
-	unsigned level, last_level = info->levels - 1;
-	int r = -ENODATA;
-	uint64_t rkey;
-	__le64 internal_value_le;
-	struct ro_spine spine;
-
-	init_ro_spine(&spine, info);
-	for (level = 0; level < info->levels; level++) {
-		size_t size;
-		void *value_p;
-
-		if (level == last_level) {
-			value_p = value_le;
-			size = info->value_type.size;
-
-		} else {
-			value_p = &internal_value_le;
-			size = sizeof(uint64_t);
-		}
-
-		r = btree_lookup_raw(&spine, root, keys[level],
-				     lower_bound, &rkey,
-				     value_p, size);
-
-		if (!r) {
-			if (rkey != keys[level]) {
-				exit_ro_spine(&spine);
-				return -ENODATA;
-			}
-		} else {
-			exit_ro_spine(&spine);
-			return r;
-		}
-
-		root = le64_to_cpu(internal_value_le);
-	}
-	exit_ro_spine(&spine);
-
-	return r;
-}
-EXPORT_SYMBOL_GPL(dm_btree_lookup);
-
-/*
- * Splits a node by creating a sibling node and shifting half the nodes
- * contents across.  Assumes there is a parent node, and it has room for
- * another child.
- *
- * Before:
- *	  +--------+
- *	  | Parent |
- *	  +--------+
- *	     |
- *	     v
- *	+----------+
- *	| A ++++++ |
- *	+----------+
- *
- *
- * After:
- *		+--------+
- *		| Parent |
- *		+--------+
- *		  |	|
- *		  v	+------+
- *	    +---------+	       |
- *	    | A* +++  |	       v
- *	    +---------+	  +-------+
- *			  | B +++ |
- *			  +-------+
- *
- * Where A* is a shadow of A.
- */
-static int btree_split_sibling(struct shadow_spine *s, dm_block_t root,
-			       unsigned parent_index, uint64_t key)
-{
-	int r;
-	size_t size;
-	unsigned nr_left, nr_right;
-	struct dm_block *left, *right, *parent;
-	struct node *ln, *rn, *pn;
-	__le64 location;
-
-	left = shadow_current(s);
-
-	r = new_block(s->info, &right);
-	if (r < 0)
-		return r;
-
-	ln = dm_block_data(left);
-	rn = dm_block_data(right);
-
-	nr_left = le32_to_cpu(ln->header.nr_entries) / 2;
-	nr_right = le32_to_cpu(ln->header.nr_entries) - nr_left;
-
-	ln->header.nr_entries = cpu_to_le32(nr_left);
-
-	rn->header.flags = ln->header.flags;
-	rn->header.nr_entries = cpu_to_le32(nr_right);
-	rn->header.max_entries = ln->header.max_entries;
-	rn->header.value_size = ln->header.value_size;
-	memcpy(rn->keys, ln->keys + nr_left, nr_right * sizeof(rn->keys[0]));
-
-	size = le32_to_cpu(ln->header.flags) & INTERNAL_NODE ?
-		sizeof(uint64_t) : s->info->value_type.size;
-	memcpy(value_ptr(rn, 0), value_ptr(ln, nr_left),
-	       size * nr_right);
-
-	/*
-	 * Patch up the parent
-	 */
-	parent = shadow_parent(s);
-
-	pn = dm_block_data(parent);
-	location = cpu_to_le64(dm_block_location(left));
-	__dm_bless_for_disk(&location);
-	memcpy_disk(value_ptr(pn, parent_index),
-		    &location, sizeof(__le64));
-
-	location = cpu_to_le64(dm_block_location(right));
-	__dm_bless_for_disk(&location);
-
-	r = insert_at(sizeof(__le64), pn, parent_index + 1,
-		      le64_to_cpu(rn->keys[0]), &location);
-	if (r)
-		return r;
-
-	if (key < le64_to_cpu(rn->keys[0])) {
-		unlock_block(s->info, right);
-		s->nodes[1] = left;
-	} else {
-		unlock_block(s->info, left);
-		s->nodes[1] = right;
-	}
-
-	return 0;
-}
-
-/*
- * Splits a node by creating two new children beneath the given node.
- *
- * Before:
- *	  +----------+
- *	  | A ++++++ |
- *	  +----------+
- *
- *
- * After:
- *	+------------+
- *	| A (shadow) |
- *	+------------+
- *	    |	|
- *   +------+	+----+
- *   |		     |
- *   v		     v
- * +-------+	 +-------+
- * | B +++ |	 | C +++ |
- * +-------+	 +-------+
- */
-static int btree_split_beneath(struct shadow_spine *s, uint64_t key)
-{
-	int r;
-	size_t size;
-	unsigned nr_left, nr_right;
-	struct dm_block *left, *right, *new_parent;
-	struct node *pn, *ln, *rn;
-	__le64 val;
-
-	new_parent = shadow_current(s);
-
-	r = new_block(s->info, &left);
-	if (r < 0)
-		return r;
-
-	r = new_block(s->info, &right);
-	if (r < 0) {
-		/* FIXME: put left */
-		return r;
-	}
-
-	pn = dm_block_data(new_parent);
-	ln = dm_block_data(left);
-	rn = dm_block_data(right);
-
-	nr_left = le32_to_cpu(pn->header.nr_entries) / 2;
-	nr_right = le32_to_cpu(pn->header.nr_entries) - nr_left;
-
-	ln->header.flags = pn->header.flags;
-	ln->header.nr_entries = cpu_to_le32(nr_left);
-	ln->header.max_entries = pn->header.max_entries;
-	ln->header.value_size = pn->header.value_size;
-
-	rn->header.flags = pn->header.flags;
-	rn->header.nr_entries = cpu_to_le32(nr_right);
-	rn->header.max_entries = pn->header.max_entries;
-	rn->header.value_size = pn->header.value_size;
-
-	memcpy(ln->keys, pn->keys, nr_left * sizeof(pn->keys[0]));
-	memcpy(rn->keys, pn->keys + nr_left, nr_right * sizeof(pn->keys[0]));
-
-	size = le32_to_cpu(pn->header.flags) & INTERNAL_NODE ?
-		sizeof(__le64) : s->info->value_type.size;
-	memcpy(value_ptr(ln, 0), value_ptr(pn, 0), nr_left * size);
-	memcpy(value_ptr(rn, 0), value_ptr(pn, nr_left),
-	       nr_right * size);
-
-	/* new_parent should just point to l and r now */
-	pn->header.flags = cpu_to_le32(INTERNAL_NODE);
-	pn->header.nr_entries = cpu_to_le32(2);
-	pn->header.max_entries = cpu_to_le32(
-		calc_max_entries(sizeof(__le64),
-				 dm_bm_block_size(
-					 dm_tm_get_bm(s->info->tm))));
-	pn->header.value_size = cpu_to_le32(sizeof(__le64));
-
-	val = cpu_to_le64(dm_block_location(left));
-	__dm_bless_for_disk(&val);
-	pn->keys[0] = ln->keys[0];
-	memcpy_disk(value_ptr(pn, 0), &val, sizeof(__le64));
-
-	val = cpu_to_le64(dm_block_location(right));
-	__dm_bless_for_disk(&val);
-	pn->keys[1] = rn->keys[0];
-	memcpy_disk(value_ptr(pn, 1), &val, sizeof(__le64));
-
-	/*
-	 * rejig the spine.  This is ugly, since it knows too
-	 * much about the spine
-	 */
-	if (s->nodes[0] != new_parent) {
-		unlock_block(s->info, s->nodes[0]);
-		s->nodes[0] = new_parent;
-	}
-	if (key < le64_to_cpu(rn->keys[0])) {
-		unlock_block(s->info, right);
-		s->nodes[1] = left;
-	} else {
-		unlock_block(s->info, left);
-		s->nodes[1] = right;
-	}
-	s->count = 2;
-
-	return 0;
-}
-
-static int btree_insert_raw(struct shadow_spine *s, dm_block_t root,
-			    struct dm_btree_value_type *vt,
-			    uint64_t key, unsigned *index)
-{
-	int r, i = *index, top = 1;
-	struct node *node;
-
-	for (;;) {
-		r = shadow_step(s, root, vt);
-		if (r < 0)
-			return r;
-
-		node = dm_block_data(shadow_current(s));
-
-		/*
-		 * We have to patch up the parent node, ugly, but I don't
-		 * see a way to do this automatically as part of the spine
-		 * op.
-		 */
-		if (shadow_has_parent(s) && i >= 0) { /* FIXME: second clause unness. */
-			__le64 location = cpu_to_le64(dm_block_location(shadow_current(s)));
-
-			__dm_bless_for_disk(&location);
-			memcpy_disk(value_ptr(dm_block_data(shadow_parent(s)), i),
-				    &location, sizeof(__le64));
-		}
-
-		node = dm_block_data(shadow_current(s));
-
-		if (node->header.nr_entries == node->header.max_entries) {
-			if (top)
-				r = btree_split_beneath(s, key);
-			else
-				r = btree_split_sibling(s, root, i, key);
-
-			if (r < 0)
-				return r;
-		}
-
-		node = dm_block_data(shadow_current(s));
-
-		i = lower_bound(node, key);
-
-		if (le32_to_cpu(node->header.flags) & LEAF_NODE)
-			break;
-
-		if (i < 0) {
-			/* change the bounds on the lowest key */
-			node->keys[0] = cpu_to_le64(key);
-			i = 0;
-		}
-
-		root = value64(node, i);
-		top = 0;
-	}
-
-	if (i < 0 || le64_to_cpu(node->keys[i]) != key)
-		i++;
-
-	*index = i;
-	return 0;
-}
-
-static int insert(struct dm_btree_info *info, dm_block_t root,
-		  uint64_t *keys, void *value, dm_block_t *new_root,
-		  int *inserted)
-		  __dm_written_to_disk(value)
-{
-	int r, need_insert;
-	unsigned level, index = -1, last_level = info->levels - 1;
-	dm_block_t block = root;
-	struct shadow_spine spine;
-	struct node *n;
-	struct dm_btree_value_type le64_type;
-
-	le64_type.context = NULL;
-	le64_type.size = sizeof(__le64);
-	le64_type.inc = NULL;
-	le64_type.dec = NULL;
-	le64_type.equal = NULL;
-
-	init_shadow_spine(&spine, info);
-
-	for (level = 0; level < (info->levels - 1); level++) {
-		r = btree_insert_raw(&spine, block, &le64_type, keys[level], &index);
-		if (r < 0)
-			goto bad;
-
-		n = dm_block_data(shadow_current(&spine));
-		need_insert = ((index >= le32_to_cpu(n->header.nr_entries)) ||
-			       (le64_to_cpu(n->keys[index]) != keys[level]));
-
-		if (need_insert) {
-			dm_block_t new_tree;
-			__le64 new_le;
-
-			r = dm_btree_empty(info, &new_tree);
-			if (r < 0)
-				goto bad;
-
-			new_le = cpu_to_le64(new_tree);
-			__dm_bless_for_disk(&new_le);
-
-			r = insert_at(sizeof(uint64_t), n, index,
-				      keys[level], &new_le);
-			if (r)
-				goto bad;
-		}
-
-		if (level < last_level)
-			block = value64(n, index);
-	}
-
-	r = btree_insert_raw(&spine, block, &info->value_type,
-			     keys[level], &index);
-	if (r < 0)
-		goto bad;
-
-	n = dm_block_data(shadow_current(&spine));
-	need_insert = ((index >= le32_to_cpu(n->header.nr_entries)) ||
-		       (le64_to_cpu(n->keys[index]) != keys[level]));
-
-	if (need_insert) {
-		if (inserted)
-			*inserted = 1;
-
-		r = insert_at(info->value_type.size, n, index,
-			      keys[level], value);
-		if (r)
-			goto bad_unblessed;
-	} else {
-		if (inserted)
-			*inserted = 0;
-
-		if (info->value_type.dec &&
-		    (!info->value_type.equal ||
-		     !info->value_type.equal(
-			     info->value_type.context,
-			     value_ptr(n, index),
-			     value))) {
-			info->value_type.dec(info->value_type.context,
-					     value_ptr(n, index));
-		}
-		memcpy_disk(value_ptr(n, index),
-			    value, info->value_type.size);
-	}
-
-	*new_root = shadow_root(&spine);
-	exit_shadow_spine(&spine);
-
-	return 0;
-
-bad:
-	__dm_unbless_for_disk(value);
-bad_unblessed:
-	exit_shadow_spine(&spine);
-	return r;
-}
-
-int dm_btree_insert(struct dm_btree_info *info, dm_block_t root,
-		    uint64_t *keys, void *value, dm_block_t *new_root)
-		    __dm_written_to_disk(value)
-{
-	return insert(info, root, keys, value, new_root, NULL);
-}
-EXPORT_SYMBOL_GPL(dm_btree_insert);
-
-int dm_btree_insert_notify(struct dm_btree_info *info, dm_block_t root,
-			   uint64_t *keys, void *value, dm_block_t *new_root,
-			   int *inserted)
-			   __dm_written_to_disk(value)
-{
-	return insert(info, root, keys, value, new_root, inserted);
-}
-EXPORT_SYMBOL_GPL(dm_btree_insert_notify);
-
-/*----------------------------------------------------------------*/
-
-static int find_highest_key(struct ro_spine *s, dm_block_t block,
-			    uint64_t *result_key, dm_block_t *next_block)
-{
-	int i, r;
-	uint32_t flags;
-
-	do {
-		r = ro_step(s, block);
-		if (r < 0)
-			return r;
-
-		flags = le32_to_cpu(ro_node(s)->header.flags);
-		i = le32_to_cpu(ro_node(s)->header.nr_entries);
-		if (!i)
-			return -ENODATA;
-		else
-			i--;
-
-		*result_key = le64_to_cpu(ro_node(s)->keys[i]);
-		if (next_block || flags & INTERNAL_NODE)
-			block = value64(ro_node(s), i);
-
-	} while (flags & INTERNAL_NODE);
-
-	if (next_block)
-		*next_block = block;
-	return 0;
-}
-
-int dm_btree_find_highest_key(struct dm_btree_info *info, dm_block_t root,
-			      uint64_t *result_keys)
-{
-	int r = 0, count = 0, level;
-	struct ro_spine spine;
-
-	init_ro_spine(&spine, info);
-	for (level = 0; level < info->levels; level++) {
-		r = find_highest_key(&spine, root, result_keys + level,
-				     level == info->levels - 1 ? NULL : &root);
-		if (r == -ENODATA) {
-			r = 0;
-			break;
-
-		} else if (r)
-			break;
-
-		count++;
-	}
-	exit_ro_spine(&spine);
-
-	return r ? r : count;
-}
-EXPORT_SYMBOL_GPL(dm_btree_find_highest_key);
diff --git a/ANDROID_3.4.5/drivers/md/persistent-data/dm-btree.h b/ANDROID_3.4.5/drivers/md/persistent-data/dm-btree.h
deleted file mode 100644
index ae02c844..00000000
--- a/ANDROID_3.4.5/drivers/md/persistent-data/dm-btree.h
+++ /dev/null
@@ -1,145 +0,0 @@
-/*
- * Copyright (C) 2011 Red Hat, Inc.
- *
- * This file is released under the GPL.
- */
-#ifndef _LINUX_DM_BTREE_H
-#define _LINUX_DM_BTREE_H
-
-#include "dm-block-manager.h"
-
-struct dm_transaction_manager;
-
-/*----------------------------------------------------------------*/
-
-/*
- * Annotations used to check on-disk metadata is handled as little-endian.
- */
-#ifdef __CHECKER__
-#  define __dm_written_to_disk(x) __releases(x)
-#  define __dm_reads_from_disk(x) __acquires(x)
-#  define __dm_bless_for_disk(x) __acquire(x)
-#  define __dm_unbless_for_disk(x) __release(x)
-#else
-#  define __dm_written_to_disk(x)
-#  define __dm_reads_from_disk(x)
-#  define __dm_bless_for_disk(x)
-#  define __dm_unbless_for_disk(x)
-#endif
-
-/*----------------------------------------------------------------*/
-
-/*
- * Manipulates hierarchical B+ trees with 64-bit keys and arbitrary-sized
- * values.
- */
-
-/*
- * Infomation about the values stored within the btree.
- */
-struct dm_btree_value_type {
-	void *context;
-
-	/*
-	 * The size in bytes of each value.
-	 */
-	uint32_t size;
-
-	/*
-	 * Any of these methods can be safely set to NULL if you do not
-	 * need the corresponding feature.
-	 */
-
-	/*
-	 * The btree is making a duplicate of the value, for instance
-	 * because previously-shared btree nodes have now diverged.
-	 * @value argument is the new copy that the copy function may modify.
-	 * (Probably it just wants to increment a reference count
-	 * somewhere.) This method is _not_ called for insertion of a new
-	 * value: It is assumed the ref count is already 1.
-	 */
-	void (*inc)(void *context, void *value);
-
-	/*
-	 * This value is being deleted.  The btree takes care of freeing
-	 * the memory pointed to by @value.  Often the del function just
-	 * needs to decrement a reference count somewhere.
-	 */
-	void (*dec)(void *context, void *value);
-
-	/*
-	 * A test for equality between two values.  When a value is
-	 * overwritten with a new one, the old one has the dec method
-	 * called _unless_ the new and old value are deemed equal.
-	 */
-	int (*equal)(void *context, void *value1, void *value2);
-};
-
-/*
- * The shape and contents of a btree.
- */
-struct dm_btree_info {
-	struct dm_transaction_manager *tm;
-
-	/*
-	 * Number of nested btrees. (Not the depth of a single tree.)
-	 */
-	unsigned levels;
-	struct dm_btree_value_type value_type;
-};
-
-/*
- * Set up an empty tree.  O(1).
- */
-int dm_btree_empty(struct dm_btree_info *info, dm_block_t *root);
-
-/*
- * Delete a tree.  O(n) - this is the slow one!  It can also block, so
- * please don't call it on an IO path.
- */
-int dm_btree_del(struct dm_btree_info *info, dm_block_t root);
-
-/*
- * All the lookup functions return -ENODATA if the key cannot be found.
- */
-
-/*
- * Tries to find a key that matches exactly.  O(ln(n))
- */
-int dm_btree_lookup(struct dm_btree_info *info, dm_block_t root,
-		    uint64_t *keys, void *value_le);
-
-/*
- * Insertion (or overwrite an existing value).  O(ln(n))
- */
-int dm_btree_insert(struct dm_btree_info *info, dm_block_t root,
-		    uint64_t *keys, void *value, dm_block_t *new_root)
-		    __dm_written_to_disk(value);
-
-/*
- * A variant of insert that indicates whether it actually inserted or just
- * overwrote.  Useful if you're keeping track of the number of entries in a
- * tree.
- */
-int dm_btree_insert_notify(struct dm_btree_info *info, dm_block_t root,
-			   uint64_t *keys, void *value, dm_block_t *new_root,
-			   int *inserted)
-			   __dm_written_to_disk(value);
-
-/*
- * Remove a key if present.  This doesn't remove empty sub trees.  Normally
- * subtrees represent a separate entity, like a snapshot map, so this is
- * correct behaviour.  O(ln(n)).
- */
-int dm_btree_remove(struct dm_btree_info *info, dm_block_t root,
-		    uint64_t *keys, dm_block_t *new_root);
-
-/*
- * Returns < 0 on failure.  Otherwise the number of key entries that have
- * been filled out.  Remember trees can have zero entries, and as such have
- * no highest key.
- */
-int dm_btree_find_highest_key(struct dm_btree_info *info, dm_block_t root,
-			      uint64_t *result_keys);
-
-#endif	/* _LINUX_DM_BTREE_H */
diff --git a/ANDROID_3.4.5/drivers/md/persistent-data/dm-persistent-data-internal.h b/ANDROID_3.4.5/drivers/md/persistent-data/dm-persistent-data-internal.h
deleted file mode 100644
index c49e26ff..00000000
--- a/ANDROID_3.4.5/drivers/md/persistent-data/dm-persistent-data-internal.h
+++ /dev/null
@@ -1,19 +0,0 @@
-/*
- * Copyright (C) 2011 Red Hat, Inc.
- *
- * This file is released under the GPL.
- */
-
-#ifndef _DM_PERSISTENT_DATA_INTERNAL_H
-#define _DM_PERSISTENT_DATA_INTERNAL_H
-
-#include "dm-block-manager.h"
-
-static inline unsigned dm_hash_block(dm_block_t b, unsigned hash_mask)
-{
-	const unsigned BIG_PRIME = 4294967291UL;
-
-	return (((unsigned) b) * BIG_PRIME) & hash_mask;
-}
-
-#endif	/* _PERSISTENT_DATA_INTERNAL_H */
diff --git a/ANDROID_3.4.5/drivers/md/persistent-data/dm-space-map-checker.c b/ANDROID_3.4.5/drivers/md/persistent-data/dm-space-map-checker.c
deleted file mode 100644
index fc90c116..00000000
--- a/ANDROID_3.4.5/drivers/md/persistent-data/dm-space-map-checker.c
+++ /dev/null
@@ -1,446 +0,0 @@
-/*
- * Copyright (C) 2011 Red Hat, Inc.
- *
- * This file is released under the GPL.
- */
-
-#include "dm-space-map-checker.h"
-
-#include <linux/device-mapper.h>
-#include <linux/export.h>
-#include <linux/vmalloc.h>
-
-#ifdef CONFIG_DM_DEBUG_SPACE_MAPS
-
-#define DM_MSG_PREFIX "space map checker"
-
-/*----------------------------------------------------------------*/
-
-struct count_array {
-	dm_block_t nr;
-	dm_block_t nr_free;
-
-	uint32_t *counts;
-};
-
-static int ca_get_count(struct count_array *ca, dm_block_t b, uint32_t *count)
-{
-	if (b >= ca->nr)
-		return -EINVAL;
-
-	*count = ca->counts[b];
-	return 0;
-}
-
-static int ca_count_more_than_one(struct count_array *ca, dm_block_t b, int *r)
-{
-	if (b >= ca->nr)
-		return -EINVAL;
-
-	*r = ca->counts[b] > 1;
-	return 0;
-}
-
-static int ca_set_count(struct count_array *ca, dm_block_t b, uint32_t count)
-{
-	uint32_t old_count;
-
-	if (b >= ca->nr)
-		return -EINVAL;
-
-	old_count = ca->counts[b];
-
-	if (!count && old_count)
-		ca->nr_free++;
-
-	else if (count && !old_count)
-		ca->nr_free--;
-
-	ca->counts[b] = count;
-	return 0;
-}
-
-static int ca_inc_block(struct count_array *ca, dm_block_t b)
-{
-	if (b >= ca->nr)
-		return -EINVAL;
-
-	ca_set_count(ca, b, ca->counts[b] + 1);
-	return 0;
-}
-
-static int ca_dec_block(struct count_array *ca, dm_block_t b)
-{
-	if (b >= ca->nr)
-		return -EINVAL;
-
-	BUG_ON(ca->counts[b] == 0);
-	ca_set_count(ca, b, ca->counts[b] - 1);
-	return 0;
-}
-
-static int ca_create(struct count_array *ca, struct dm_space_map *sm)
-{
-	int r;
-	dm_block_t nr_blocks;
-
-	r = dm_sm_get_nr_blocks(sm, &nr_blocks);
-	if (r)
-		return r;
-
-	ca->nr = nr_blocks;
-	ca->nr_free = nr_blocks;
-
-	if (!nr_blocks)
-		ca->counts = NULL;
-	else {
-		ca->counts = vzalloc(sizeof(*ca->counts) * nr_blocks);
-		if (!ca->counts)
-			return -ENOMEM;
-	}
-
-	return 0;
-}
-
-static void ca_destroy(struct count_array *ca)
-{
-	vfree(ca->counts);
-}
-
-static int ca_load(struct count_array *ca, struct dm_space_map *sm)
-{
-	int r;
-	uint32_t count;
-	dm_block_t nr_blocks, i;
-
-	r = dm_sm_get_nr_blocks(sm, &nr_blocks);
-	if (r)
-		return r;
-
-	BUG_ON(ca->nr != nr_blocks);
-
-	DMWARN("Loading debug space map from disk.  This may take some time");
-	for (i = 0; i < nr_blocks; i++) {
-		r = dm_sm_get_count(sm, i, &count);
-		if (r) {
-			DMERR("load failed");
-			return r;
-		}
-
-		ca_set_count(ca, i, count);
-	}
-	DMWARN("Load complete");
-
-	return 0;
-}
-
-static int ca_extend(struct count_array *ca, dm_block_t extra_blocks)
-{
-	dm_block_t nr_blocks = ca->nr + extra_blocks;
-	uint32_t *counts = vzalloc(sizeof(*counts) * nr_blocks);
-	if (!counts)
-		return -ENOMEM;
-
-	if (ca->counts) {
-		memcpy(counts, ca->counts, sizeof(*counts) * ca->nr);
-		ca_destroy(ca);
-	}
-	ca->nr = nr_blocks;
-	ca->nr_free += extra_blocks;
-	ca->counts = counts;
-	return 0;
-}
-
-static int ca_commit(struct count_array *old, struct count_array *new)
-{
-	if (old->nr != new->nr) {
-		BUG_ON(old->nr > new->nr);
-		ca_extend(old, new->nr - old->nr);
-	}
-
-	BUG_ON(old->nr != new->nr);
-	old->nr_free = new->nr_free;
-	memcpy(old->counts, new->counts, sizeof(*old->counts) * old->nr);
-	return 0;
-}
-
-/*----------------------------------------------------------------*/
-
-struct sm_checker {
-	struct dm_space_map sm;
-
-	struct count_array old_counts;
-	struct count_array counts;
-
-	struct dm_space_map *real_sm;
-};
-
-static void sm_checker_destroy(struct dm_space_map *sm)
-{
-	struct sm_checker *smc = container_of(sm, struct sm_checker, sm);
-
-	dm_sm_destroy(smc->real_sm);
-	ca_destroy(&smc->old_counts);
-	ca_destroy(&smc->counts);
-	kfree(smc);
-}
-
-static int sm_checker_get_nr_blocks(struct dm_space_map *sm, dm_block_t *count)
-{
-	struct sm_checker *smc = container_of(sm, struct sm_checker, sm);
-	int r = dm_sm_get_nr_blocks(smc->real_sm, count);
-	if (!r)
-		BUG_ON(smc->old_counts.nr != *count);
-	return r;
-}
-
-static int sm_checker_get_nr_free(struct dm_space_map *sm, dm_block_t *count)
-{
-	struct sm_checker *smc = container_of(sm, struct sm_checker, sm);
-	int r = dm_sm_get_nr_free(smc->real_sm, count);
-	if (!r) {
-		/*
-		 * Slow, but we know it's correct.
-		 */
-		dm_block_t b, n = 0;
-		for (b = 0; b < smc->old_counts.nr; b++)
-			if (smc->old_counts.counts[b] == 0 &&
-			    smc->counts.counts[b] == 0)
-				n++;
-
-		if (n != *count)
-			DMERR("free block counts differ, checker %u, sm-disk:%u",
-			      (unsigned) n, (unsigned) *count);
-	}
-	return r;
-}
-
-static int sm_checker_new_block(struct dm_space_map *sm, dm_block_t *b)
-{
-	struct sm_checker *smc = container_of(sm, struct sm_checker, sm);
-	int r = dm_sm_new_block(smc->real_sm, b);
-
-	if (!r) {
-		BUG_ON(*b >= smc->old_counts.nr);
-		BUG_ON(smc->old_counts.counts[*b] != 0);
-		BUG_ON(*b >= smc->counts.nr);
-		BUG_ON(smc->counts.counts[*b] != 0);
-		ca_set_count(&smc->counts, *b, 1);
-	}
-
-	return r;
-}
-
-static int sm_checker_inc_block(struct dm_space_map *sm, dm_block_t b)
-{
-	struct sm_checker *smc = container_of(sm, struct sm_checker, sm);
-	int r = dm_sm_inc_block(smc->real_sm, b);
-	int r2 = ca_inc_block(&smc->counts, b);
-	BUG_ON(r != r2);
-	return r;
-}
-
-static int sm_checker_dec_block(struct dm_space_map *sm, dm_block_t b)
-{
-	struct sm_checker *smc = container_of(sm, struct sm_checker, sm);
-	int r = dm_sm_dec_block(smc->real_sm, b);
-	int r2 = ca_dec_block(&smc->counts, b);
-	BUG_ON(r != r2);
-	return r;
-}
-
-static int sm_checker_get_count(struct dm_space_map *sm, dm_block_t b, uint32_t *result)
-{
-	struct sm_checker *smc = container_of(sm, struct sm_checker, sm);
-	uint32_t result2 = 0;
-	int r = dm_sm_get_count(smc->real_sm, b, result);
-	int r2 = ca_get_count(&smc->counts, b, &result2);
-
-	BUG_ON(r != r2);
-	if (!r)
-		BUG_ON(*result != result2);
-	return r;
-}
-
-static int sm_checker_count_more_than_one(struct dm_space_map *sm, dm_block_t b, int *result)
-{
-	struct sm_checker *smc = container_of(sm, struct sm_checker, sm);
-	int result2 = 0;
-	int r = dm_sm_count_is_more_than_one(smc->real_sm, b, result);
-	int r2 = ca_count_more_than_one(&smc->counts, b, &result2);
-
-	BUG_ON(r != r2);
-	if (!r)
-		BUG_ON(!(*result) && result2);
-	return r;
-}
-
-static int sm_checker_set_count(struct dm_space_map *sm, dm_block_t b, uint32_t count)
-{
-	struct sm_checker *smc = container_of(sm, struct sm_checker, sm);
-	uint32_t old_rc;
-	int r = dm_sm_set_count(smc->real_sm, b, count);
-	int r2;
-
-	BUG_ON(b >= smc->counts.nr);
-	old_rc = smc->counts.counts[b];
-	r2 = ca_set_count(&smc->counts, b, count);
-	BUG_ON(r != r2);
-
-	return r;
-}
-
-static int sm_checker_commit(struct dm_space_map *sm)
-{
-	struct sm_checker *smc = container_of(sm, struct sm_checker, sm);
-	int r;
-
-	r = dm_sm_commit(smc->real_sm);
-	if (r)
-		return r;
-
-	r = ca_commit(&smc->old_counts, &smc->counts);
-	if (r)
-		return r;
-
-	return 0;
-}
-
-static int sm_checker_extend(struct dm_space_map *sm, dm_block_t extra_blocks)
-{
-	struct sm_checker *smc = container_of(sm, struct sm_checker, sm);
-	int r = dm_sm_extend(smc->real_sm, extra_blocks);
-	if (r)
-		return r;
-
-	return ca_extend(&smc->counts, extra_blocks);
-}
-
-static int sm_checker_root_size(struct dm_space_map *sm, size_t *result)
-{
-	struct sm_checker *smc = container_of(sm, struct sm_checker, sm);
-	return dm_sm_root_size(smc->real_sm, result);
-}
-
-static int sm_checker_copy_root(struct dm_space_map *sm, void *copy_to_here_le, size_t len)
-{
-	struct sm_checker *smc = container_of(sm, struct sm_checker, sm);
-	return dm_sm_copy_root(smc->real_sm, copy_to_here_le, len);
-}
-
-/*----------------------------------------------------------------*/
-
-static struct dm_space_map ops_ = {
-	.destroy = sm_checker_destroy,
-	.get_nr_blocks = sm_checker_get_nr_blocks,
-	.get_nr_free = sm_checker_get_nr_free,
-	.inc_block = sm_checker_inc_block,
-	.dec_block = sm_checker_dec_block,
-	.new_block = sm_checker_new_block,
-	.get_count = sm_checker_get_count,
-	.count_is_more_than_one = sm_checker_count_more_than_one,
-	.set_count = sm_checker_set_count,
-	.commit = sm_checker_commit,
-	.extend = sm_checker_extend,
-	.root_size = sm_checker_root_size,
-	.copy_root = sm_checker_copy_root
-};
-
-struct dm_space_map *dm_sm_checker_create(struct dm_space_map *sm)
-{
-	int r;
-	struct sm_checker *smc;
-
-	if (IS_ERR_OR_NULL(sm))
-		return ERR_PTR(-EINVAL);
-
-	smc = kmalloc(sizeof(*smc), GFP_KERNEL);
-	if (!smc)
-		return ERR_PTR(-ENOMEM);
-
-	memcpy(&smc->sm, &ops_, sizeof(smc->sm));
-	r = ca_create(&smc->old_counts, sm);
-	if (r) {
-		kfree(smc);
-		return ERR_PTR(r);
-	}
-
-	r = ca_create(&smc->counts, sm);
-	if (r) {
-		ca_destroy(&smc->old_counts);
-		kfree(smc);
-		return ERR_PTR(r);
-	}
-
-	smc->real_sm = sm;
-
-	r = ca_load(&smc->counts, sm);
-	if (r) {
-		ca_destroy(&smc->counts);
-		ca_destroy(&smc->old_counts);
-		kfree(smc);
-		return ERR_PTR(r);
-	}
-
-	r = ca_commit(&smc->old_counts, &smc->counts);
-	if (r) {
-		ca_destroy(&smc->counts);
-		ca_destroy(&smc->old_counts);
-		kfree(smc);
-		return ERR_PTR(r);
-	}
-
-	return &smc->sm;
-}
-EXPORT_SYMBOL_GPL(dm_sm_checker_create);
-
-struct dm_space_map *dm_sm_checker_create_fresh(struct dm_space_map *sm)
-{
-	int r;
-	struct sm_checker *smc;
-
-	if (IS_ERR_OR_NULL(sm))
-		return ERR_PTR(-EINVAL);
-
-	smc = kmalloc(sizeof(*smc), GFP_KERNEL);
-	if (!smc)
-		return ERR_PTR(-ENOMEM);
-
-	memcpy(&smc->sm, &ops_, sizeof(smc->sm));
-	r = ca_create(&smc->old_counts, sm);
-	if (r) {
-		kfree(smc);
-		return ERR_PTR(r);
-	}
-
-	r = ca_create(&smc->counts, sm);
-	if (r) {
-		ca_destroy(&smc->old_counts);
-		kfree(smc);
-		return ERR_PTR(r);
-	}
-
-	smc->real_sm = sm;
-	return &smc->sm;
-}
-EXPORT_SYMBOL_GPL(dm_sm_checker_create_fresh);
-
-/*----------------------------------------------------------------*/
-
-#else
-
-struct dm_space_map *dm_sm_checker_create(struct dm_space_map *sm)
-{
-	return sm;
-}
-EXPORT_SYMBOL_GPL(dm_sm_checker_create);
-
-struct dm_space_map *dm_sm_checker_create_fresh(struct dm_space_map *sm)
-{
-	return sm;
-}
-EXPORT_SYMBOL_GPL(dm_sm_checker_create_fresh);
-
-/*----------------------------------------------------------------*/
-
-#endif
diff --git a/ANDROID_3.4.5/drivers/md/persistent-data/dm-space-map-checker.h b/ANDROID_3.4.5/drivers/md/persistent-data/dm-space-map-checker.h
deleted file mode 100644
index 444dccf6..00000000
--- a/ANDROID_3.4.5/drivers/md/persistent-data/dm-space-map-checker.h
+++ /dev/null
@@ -1,26 +0,0 @@
-/*
- * Copyright (C) 2011 Red Hat, Inc.
- *
- * This file is released under the GPL.
- */
-
-#ifndef SNAPSHOTS_SPACE_MAP_CHECKER_H
-#define SNAPSHOTS_SPACE_MAP_CHECKER_H
-
-#include "dm-space-map.h"
-
-/*----------------------------------------------------------------*/
-
-/*
- * This space map wraps a real on-disk space map, and verifies all of its
- * operations.  It uses a lot of memory, so only use if you have a specific
- * problem that you're debugging.
- *
- * Ownership of @sm passes.
- */
-struct dm_space_map *dm_sm_checker_create(struct dm_space_map *sm);
-struct dm_space_map *dm_sm_checker_create_fresh(struct dm_space_map *sm);
-
-/*----------------------------------------------------------------*/
-
-#endif
diff --git a/ANDROID_3.4.5/drivers/md/persistent-data/dm-space-map-common.c b/ANDROID_3.4.5/drivers/md/persistent-data/dm-space-map-common.c
deleted file mode 100644
index ff3beed6..00000000
--- a/ANDROID_3.4.5/drivers/md/persistent-data/dm-space-map-common.c
+++ /dev/null
@@ -1,702 +0,0 @@
-/*
- * Copyright (C) 2011 Red Hat, Inc.
- *
- * This file is released under the GPL.
- */
-
-#include "dm-space-map-common.h"
-#include "dm-transaction-manager.h"
-
-#include <linux/bitops.h>
-#include <linux/device-mapper.h>
-
-#define DM_MSG_PREFIX "space map common"
-
-/*----------------------------------------------------------------*/
-
-/*
- * Index validator.
- */
-#define INDEX_CSUM_XOR 160478
-
-static void index_prepare_for_write(struct dm_block_validator *v,
-				    struct dm_block *b,
-				    size_t block_size)
-{
-	struct disk_metadata_index *mi_le = dm_block_data(b);
-
-	mi_le->blocknr = cpu_to_le64(dm_block_location(b));
-	mi_le->csum = cpu_to_le32(dm_bm_checksum(&mi_le->padding,
-						 block_size - sizeof(__le32),
-						 INDEX_CSUM_XOR));
-}
-
-static int index_check(struct dm_block_validator *v,
-		       struct dm_block *b,
-		       size_t block_size)
-{
-	struct disk_metadata_index *mi_le = dm_block_data(b);
-	__le32 csum_disk;
-
-	if (dm_block_location(b) != le64_to_cpu(mi_le->blocknr)) {
-		DMERR("index_check failed blocknr %llu wanted %llu",
-		      le64_to_cpu(mi_le->blocknr), dm_block_location(b));
-		return -ENOTBLK;
-	}
-
-	csum_disk = cpu_to_le32(dm_bm_checksum(&mi_le->padding,
-					       block_size - sizeof(__le32),
-					       INDEX_CSUM_XOR));
-	if (csum_disk != mi_le->csum) {
-		DMERR("index_check failed csum %u wanted %u",
-		      le32_to_cpu(csum_disk), le32_to_cpu(mi_le->csum));
-		return -EILSEQ;
-	}
-
-	return 0;
-}
-
-static struct dm_block_validator index_validator = {
-	.name = "index",
-	.prepare_for_write = index_prepare_for_write,
-	.check = index_check
-};
-
-/*----------------------------------------------------------------*/
-
-/*
- * Bitmap validator
- */
-#define BITMAP_CSUM_XOR 240779
-
-static void bitmap_prepare_for_write(struct dm_block_validator *v,
-				     struct dm_block *b,
-				     size_t block_size)
-{
-	struct disk_bitmap_header *disk_header = dm_block_data(b);
-
-	disk_header->blocknr = cpu_to_le64(dm_block_location(b));
-	disk_header->csum = cpu_to_le32(dm_bm_checksum(&disk_header->not_used,
-						       block_size - sizeof(__le32),
-						       BITMAP_CSUM_XOR));
-}
-
-static int bitmap_check(struct dm_block_validator *v,
-			struct dm_block *b,
-			size_t block_size)
-{
-	struct disk_bitmap_header *disk_header = dm_block_data(b);
-	__le32 csum_disk;
-
-	if (dm_block_location(b) != le64_to_cpu(disk_header->blocknr)) {
-		DMERR("bitmap check failed blocknr %llu wanted %llu",
-		      le64_to_cpu(disk_header->blocknr), dm_block_location(b));
-		return -ENOTBLK;
-	}
-
-	csum_disk = cpu_to_le32(dm_bm_checksum(&disk_header->not_used,
-					       block_size - sizeof(__le32),
-					       BITMAP_CSUM_XOR));
-	if (csum_disk != disk_header->csum) {
-		DMERR("bitmap check failed csum %u wanted %u",
-		      le32_to_cpu(csum_disk), le32_to_cpu(disk_header->csum));
-		return -EILSEQ;
-	}
-
-	return 0;
-}
-
-static struct dm_block_validator dm_sm_bitmap_validator = {
-	.name = "sm_bitmap",
-	.prepare_for_write = bitmap_prepare_for_write,
-	.check = bitmap_check
-};
-
-/*----------------------------------------------------------------*/
-
-#define ENTRIES_PER_WORD 32
-#define ENTRIES_SHIFT	5
-
-static void *dm_bitmap_data(struct dm_block *b)
-{
-	return dm_block_data(b) + sizeof(struct disk_bitmap_header);
-}
-
-#define WORD_MASK_HIGH 0xAAAAAAAAAAAAAAAAULL
-
-static unsigned bitmap_word_used(void *addr, unsigned b)
-{
-	__le64 *words_le = addr;
-	__le64 *w_le = words_le + (b >> ENTRIES_SHIFT);
-
-	uint64_t bits = le64_to_cpu(*w_le);
-	uint64_t mask = (bits + WORD_MASK_HIGH + 1) & WORD_MASK_HIGH;
-
-	return !(~bits & mask);
-}
-
-static unsigned sm_lookup_bitmap(void *addr, unsigned b)
-{
-	__le64 *words_le = addr;
-	__le64 *w_le = words_le + (b >> ENTRIES_SHIFT);
-	unsigned hi, lo;
-
-	b = (b & (ENTRIES_PER_WORD - 1)) << 1;
-	hi = !!test_bit_le(b, (void *) w_le);
-	lo = !!test_bit_le(b + 1, (void *) w_le);
-	return (hi << 1) | lo;
-}
-
-static void sm_set_bitmap(void *addr, unsigned b, unsigned val)
-{
-	__le64 *words_le = addr;
-	__le64 *w_le = words_le + (b >> ENTRIES_SHIFT);
-
-	b = (b & (ENTRIES_PER_WORD - 1)) << 1;
-
-	if (val & 2)
-		__set_bit_le(b, (void *) w_le);
-	else
-		__clear_bit_le(b, (void *) w_le);
-
-	if (val & 1)
-		__set_bit_le(b + 1, (void *) w_le);
-	else
-		__clear_bit_le(b + 1, (void *) w_le);
-}
-
-static int sm_find_free(void *addr, unsigned begin, unsigned end,
-			unsigned *result)
-{
-	while (begin < end) {
-		if (!(begin & (ENTRIES_PER_WORD - 1)) &&
-		    bitmap_word_used(addr, begin)) {
-			begin += ENTRIES_PER_WORD;
-			continue;
-		}
-
-		if (!sm_lookup_bitmap(addr, begin)) {
-			*result = begin;
-			return 0;
-		}
-
-		begin++;
-	}
-
-	return -ENOSPC;
-}
-
-/*----------------------------------------------------------------*/
-
-static int sm_ll_init(struct ll_disk *ll, struct dm_transaction_manager *tm)
-{
-	ll->tm = tm;
-
-	ll->bitmap_info.tm = tm;
-	ll->bitmap_info.levels = 1;
-
-	/*
-	 * Because the new bitmap blocks are created via a shadow
-	 * operation, the old entry has already had its reference count
-	 * decremented and we don't need the btree to do any bookkeeping.
-	 */
-	ll->bitmap_info.value_type.size = sizeof(struct disk_index_entry);
-	ll->bitmap_info.value_type.inc = NULL;
-	ll->bitmap_info.value_type.dec = NULL;
-	ll->bitmap_info.value_type.equal = NULL;
-
-	ll->ref_count_info.tm = tm;
-	ll->ref_count_info.levels = 1;
-	ll->ref_count_info.value_type.size = sizeof(uint32_t);
-	ll->ref_count_info.value_type.inc = NULL;
-	ll->ref_count_info.value_type.dec = NULL;
-	ll->ref_count_info.value_type.equal = NULL;
-
-	ll->block_size = dm_bm_block_size(dm_tm_get_bm(tm));
-
-	if (ll->block_size > (1 << 30)) {
-		DMERR("block size too big to hold bitmaps");
-		return -EINVAL;
-	}
-
-	ll->entries_per_block = (ll->block_size - sizeof(struct disk_bitmap_header)) *
-		ENTRIES_PER_BYTE;
-	ll->nr_blocks = 0;
-	ll->bitmap_root = 0;
-	ll->ref_count_root = 0;
-
-	return 0;
-}
-
-int sm_ll_extend(struct ll_disk *ll, dm_block_t extra_blocks)
-{
-	int r;
-	dm_block_t i, nr_blocks, nr_indexes;
-	unsigned old_blocks, blocks;
-
-	nr_blocks = ll->nr_blocks + extra_blocks;
-	old_blocks = dm_sector_div_up(ll->nr_blocks, ll->entries_per_block);
-	blocks = dm_sector_div_up(nr_blocks, ll->entries_per_block);
-
-	nr_indexes = dm_sector_div_up(nr_blocks, ll->entries_per_block);
-	if (nr_indexes > ll->max_entries(ll)) {
-		DMERR("space map too large");
-		return -EINVAL;
-	}
-
-	for (i = old_blocks; i < blocks; i++) {
-		struct dm_block *b;
-		struct disk_index_entry idx;
-
-		r = dm_tm_new_block(ll->tm, &dm_sm_bitmap_validator, &b);
-		if (r < 0)
-			return r;
-		idx.blocknr = cpu_to_le64(dm_block_location(b));
-
-		r = dm_tm_unlock(ll->tm, b);
-		if (r < 0)
-			return r;
-
-		idx.nr_free = cpu_to_le32(ll->entries_per_block);
-		idx.none_free_before = 0;
-
-		r = ll->save_ie(ll, i, &idx);
-		if (r < 0)
-			return r;
-	}
-
-	ll->nr_blocks = nr_blocks;
-	return 0;
-}
-
-int sm_ll_lookup_bitmap(struct ll_disk *ll, dm_block_t b, uint32_t *result)
-{
-	int r;
-	dm_block_t index = b;
-	struct disk_index_entry ie_disk;
-	struct dm_block *blk;
-
-	b = do_div(index, ll->entries_per_block);
-	r = ll->load_ie(ll, index, &ie_disk);
-	if (r < 0)
-		return r;
-
-	r = dm_tm_read_lock(ll->tm, le64_to_cpu(ie_disk.blocknr),
-			    &dm_sm_bitmap_validator, &blk);
-	if (r < 0)
-		return r;
-
-	*result = sm_lookup_bitmap(dm_bitmap_data(blk), b);
-
-	return dm_tm_unlock(ll->tm, blk);
-}
-
-int sm_ll_lookup(struct ll_disk *ll, dm_block_t b, uint32_t *result)
-{
-	__le32 le_rc;
-	int r = sm_ll_lookup_bitmap(ll, b, result);
-
-	if (r)
-		return r;
-
-	if (*result != 3)
-		return r;
-
-	r = dm_btree_lookup(&ll->ref_count_info, ll->ref_count_root, &b, &le_rc);
-	if (r < 0)
-		return r;
-
-	*result = le32_to_cpu(le_rc);
-
-	return r;
-}
-
-int sm_ll_find_free_block(struct ll_disk *ll, dm_block_t begin,
-			  dm_block_t end, dm_block_t *result)
-{
-	int r;
-	struct disk_index_entry ie_disk;
-	dm_block_t i, index_begin = begin;
-	dm_block_t index_end = dm_sector_div_up(end, ll->entries_per_block);
-
-	/*
-	 * FIXME: Use shifts
-	 */
-	begin = do_div(index_begin, ll->entries_per_block);
-	end = do_div(end, ll->entries_per_block);
-
-	for (i = index_begin; i < index_end; i++, begin = 0) {
-		struct dm_block *blk;
-		unsigned position;
-		uint32_t bit_end;
-
-		r = ll->load_ie(ll, i, &ie_disk);
-		if (r < 0)
-			return r;
-
-		if (le32_to_cpu(ie_disk.nr_free) == 0)
-			continue;
-
-		r = dm_tm_read_lock(ll->tm, le64_to_cpu(ie_disk.blocknr),
-				    &dm_sm_bitmap_validator, &blk);
-		if (r < 0)
-			return r;
-
-		bit_end = (i == index_end - 1) ?  end : ll->entries_per_block;
-
-		r = sm_find_free(dm_bitmap_data(blk),
-				 max_t(unsigned, begin, le32_to_cpu(ie_disk.none_free_before)),
-				 bit_end, &position);
-		if (r == -ENOSPC) {
-			/*
-			 * This might happen because we started searching
-			 * part way through the bitmap.
-			 */
-			dm_tm_unlock(ll->tm, blk);
-			continue;
-
-		} else if (r < 0) {
-			dm_tm_unlock(ll->tm, blk);
-			return r;
-		}
-
-		r = dm_tm_unlock(ll->tm, blk);
-		if (r < 0)
-			return r;
-
-		*result = i * ll->entries_per_block + (dm_block_t) position;
-		return 0;
-	}
-
-	return -ENOSPC;
-}
-
-int sm_ll_insert(struct ll_disk *ll, dm_block_t b,
-		 uint32_t ref_count, enum allocation_event *ev)
-{
-	int r;
-	uint32_t bit, old;
-	struct dm_block *nb;
-	dm_block_t index = b;
-	struct disk_index_entry ie_disk;
-	void *bm_le;
-	int inc;
-
-	bit = do_div(index, ll->entries_per_block);
-	r = ll->load_ie(ll, index, &ie_disk);
-	if (r < 0)
-		return r;
-
-	r = dm_tm_shadow_block(ll->tm, le64_to_cpu(ie_disk.blocknr),
-			       &dm_sm_bitmap_validator, &nb, &inc);
-	if (r < 0) {
-		DMERR("dm_tm_shadow_block() failed");
-		return r;
-	}
-	ie_disk.blocknr = cpu_to_le64(dm_block_location(nb));
-
-	bm_le = dm_bitmap_data(nb);
-	old = sm_lookup_bitmap(bm_le, bit);
-
-	if (ref_count <= 2) {
-		sm_set_bitmap(bm_le, bit, ref_count);
-
-		r = dm_tm_unlock(ll->tm, nb);
-		if (r < 0)
-			return r;
-
-		if (old > 2) {
-			r = dm_btree_remove(&ll->ref_count_info,
-					    ll->ref_count_root,
-					    &b, &ll->ref_count_root);
-			if (r)
-				return r;
-		}
-
-	} else {
-		__le32 le_rc = cpu_to_le32(ref_count);
-
-		sm_set_bitmap(bm_le, bit, 3);
-		r = dm_tm_unlock(ll->tm, nb);
-		if (r < 0)
-			return r;
-
-		__dm_bless_for_disk(&le_rc);
-		r = dm_btree_insert(&ll->ref_count_info, ll->ref_count_root,
-				    &b, &le_rc, &ll->ref_count_root);
-		if (r < 0) {
-			DMERR("ref count insert failed");
-			return r;
-		}
-	}
-
-	if (ref_count && !old) {
-		*ev = SM_ALLOC;
-		ll->nr_allocated++;
-		ie_disk.nr_free = cpu_to_le32(le32_to_cpu(ie_disk.nr_free) - 1);
-		if (le32_to_cpu(ie_disk.none_free_before) == bit)
-			ie_disk.none_free_before = cpu_to_le32(bit + 1);
-
-	} else if (old && !ref_count) {
-		*ev = SM_FREE;
-		ll->nr_allocated--;
-		ie_disk.nr_free = cpu_to_le32(le32_to_cpu(ie_disk.nr_free) + 1);
-		ie_disk.none_free_before = cpu_to_le32(min(le32_to_cpu(ie_disk.none_free_before), bit));
-	}
-
-	return ll->save_ie(ll, index, &ie_disk);
-}
-
-int sm_ll_inc(struct ll_disk *ll, dm_block_t b, enum allocation_event *ev)
-{
-	int r;
-	uint32_t rc;
-
-	r = sm_ll_lookup(ll, b, &rc);
-	if (r)
-		return r;
-
-	return sm_ll_insert(ll, b, rc + 1, ev);
-}
-
-int sm_ll_dec(struct ll_disk *ll, dm_block_t b, enum allocation_event *ev)
-{
-	int r;
-	uint32_t rc;
-
-	r = sm_ll_lookup(ll, b, &rc);
-	if (r)
-		return r;
-
-	if (!rc)
-		return -EINVAL;
-
-	return sm_ll_insert(ll, b, rc - 1, ev);
-}
-
-int sm_ll_commit(struct ll_disk *ll)
-{
-	return ll->commit(ll);
-}
-
-/*----------------------------------------------------------------*/
-
-static int metadata_ll_load_ie(struct ll_disk *ll, dm_block_t index,
-			       struct disk_index_entry *ie)
-{
-	memcpy(ie, ll->mi_le.index + index, sizeof(*ie));
-	return 0;
-}
-
-static int metadata_ll_save_ie(struct ll_disk *ll, dm_block_t index,
-			       struct disk_index_entry *ie)
-{
-	memcpy(ll->mi_le.index + index, ie, sizeof(*ie));
-	return 0;
-}
-
-static int metadata_ll_init_index(struct ll_disk *ll)
-{
-	int r;
-	struct dm_block *b;
-
-	r = dm_tm_new_block(ll->tm, &index_validator, &b);
-	if (r < 0)
-		return r;
-
-	memcpy(dm_block_data(b), &ll->mi_le, sizeof(ll->mi_le));
-	ll->bitmap_root = dm_block_location(b);
-
-	return dm_tm_unlock(ll->tm, b);
-}
-
-static int metadata_ll_open(struct ll_disk *ll)
-{
-	int r;
-	struct dm_block *block;
-
-	r = dm_tm_read_lock(ll->tm, ll->bitmap_root,
-			    &index_validator, &block);
-	if (r)
-		return r;
-
-	memcpy(&ll->mi_le, dm_block_data(block), sizeof(ll->mi_le));
-	return dm_tm_unlock(ll->tm, block);
-}
-
-static dm_block_t metadata_ll_max_entries(struct ll_disk *ll)
-{
-	return MAX_METADATA_BITMAPS;
-}
-
-static int metadata_ll_commit(struct ll_disk *ll)
-{
-	int r, inc;
-	struct dm_block *b;
-
-	r = dm_tm_shadow_block(ll->tm, ll->bitmap_root, &index_validator, &b, &inc);
-	if (r)
-		return r;
-
-	memcpy(dm_block_data(b), &ll->mi_le, sizeof(ll->mi_le));
-	ll->bitmap_root = dm_block_location(b);
-
-	return dm_tm_unlock(ll->tm, b);
-}
-
-int sm_ll_new_metadata(struct ll_disk *ll, struct dm_transaction_manager *tm)
-{
-	int r;
-
-	r = sm_ll_init(ll, tm);
-	if (r < 0)
-		return r;
-
-	ll->load_ie = metadata_ll_load_ie;
-	ll->save_ie = metadata_ll_save_ie;
-	ll->init_index = metadata_ll_init_index;
-	ll->open_index = metadata_ll_open;
-	ll->max_entries = metadata_ll_max_entries;
-	ll->commit = metadata_ll_commit;
-
-	ll->nr_blocks = 0;
-	ll->nr_allocated = 0;
-
-	r = ll->init_index(ll);
-	if (r < 0)
-		return r;
-
-	r = dm_btree_empty(&ll->ref_count_info, &ll->ref_count_root);
-	if (r < 0)
-		return r;
-
-	return 0;
-}
-
-int sm_ll_open_metadata(struct ll_disk *ll, struct dm_transaction_manager *tm,
-			void *root_le, size_t len)
-{
-	int r;
-	struct disk_sm_root *smr = root_le;
-
-	if (len < sizeof(struct disk_sm_root)) {
-		DMERR("sm_metadata root too small");
-		return -ENOMEM;
-	}
-
-	r = sm_ll_init(ll, tm);
-	if (r < 0)
-		return r;
-
-	ll->load_ie = metadata_ll_load_ie;
-	ll->save_ie = metadata_ll_save_ie;
-	ll->init_index = metadata_ll_init_index;
-	ll->open_index = metadata_ll_open;
-	ll->max_entries = metadata_ll_max_entries;
-	ll->commit = metadata_ll_commit;
-
-	ll->nr_blocks = le64_to_cpu(smr->nr_blocks);
-	ll->nr_allocated = le64_to_cpu(smr->nr_allocated);
-	ll->bitmap_root = le64_to_cpu(smr->bitmap_root);
-	ll->ref_count_root = le64_to_cpu(smr->ref_count_root);
-
-	return ll->open_index(ll);
-}
-
-/*----------------------------------------------------------------*/
-
-static int disk_ll_load_ie(struct ll_disk *ll, dm_block_t index,
-			   struct disk_index_entry *ie)
-{
-	return dm_btree_lookup(&ll->bitmap_info, ll->bitmap_root, &index, ie);
-}
-
-static int disk_ll_save_ie(struct ll_disk *ll, dm_block_t index,
-			   struct disk_index_entry *ie)
-{
-	__dm_bless_for_disk(ie);
-	return dm_btree_insert(&ll->bitmap_info, ll->bitmap_root,
-			       &index, ie, &ll->bitmap_root);
-}
-
-static int disk_ll_init_index(struct ll_disk *ll)
-{
-	return dm_btree_empty(&ll->bitmap_info, &ll->bitmap_root);
-}
-
-static int disk_ll_open(struct ll_disk *ll)
-{
-	/* nothing to do */
-	return 0;
-}
-
-static dm_block_t disk_ll_max_entries(struct ll_disk *ll)
-{
-	return -1ULL;
-}
-
-static int disk_ll_commit(struct ll_disk *ll)
-{
-	return 0;
-}
-
-int sm_ll_new_disk(struct ll_disk *ll, struct dm_transaction_manager *tm)
-{
-	int r;
-
-	r = sm_ll_init(ll, tm);
-	if (r < 0)
-		return r;
-
-	ll->load_ie = disk_ll_load_ie;
-	ll->save_ie = disk_ll_save_ie;
-	ll->init_index = disk_ll_init_index;
-	ll->open_index = disk_ll_open;
-	ll->max_entries = disk_ll_max_entries;
-	ll->commit = disk_ll_commit;
-
-	ll->nr_blocks = 0;
-	ll->nr_allocated = 0;
-
-	r = ll->init_index(ll);
-	if (r < 0)
-		return r;
-
-	r = dm_btree_empty(&ll->ref_count_info, &ll->ref_count_root);
-	if (r < 0)
-		return r;
-
-	return 0;
-}
-
-int sm_ll_open_disk(struct ll_disk *ll, struct dm_transaction_manager *tm,
-		    void *root_le, size_t len)
-{
-	int r;
-	struct disk_sm_root *smr = root_le;
-
-	if (len < sizeof(struct disk_sm_root)) {
-		DMERR("sm_metadata root too small");
-		return -ENOMEM;
-	}
-
-	r = sm_ll_init(ll, tm);
-	if (r < 0)
-		return r;
-
-	ll->load_ie = disk_ll_load_ie;
-	ll->save_ie = disk_ll_save_ie;
-	ll->init_index = disk_ll_init_index;
-	ll->open_index = disk_ll_open;
-	ll->max_entries = disk_ll_max_entries;
-	ll->commit = disk_ll_commit;
-
-	ll->nr_blocks = le64_to_cpu(smr->nr_blocks);
-	ll->nr_allocated = le64_to_cpu(smr->nr_allocated);
-	ll->bitmap_root = le64_to_cpu(smr->bitmap_root);
-	ll->ref_count_root = le64_to_cpu(smr->ref_count_root);
-
-	return ll->open_index(ll);
-}
-
-/*----------------------------------------------------------------*/
diff --git a/ANDROID_3.4.5/drivers/md/persistent-data/dm-space-map-common.h b/ANDROID_3.4.5/drivers/md/persistent-data/dm-space-map-common.h
deleted file mode 100644
index 8f220821..00000000
--- a/ANDROID_3.4.5/drivers/md/persistent-data/dm-space-map-common.h
+++ /dev/null
@@ -1,126 +0,0 @@
-/*
- * Copyright (C) 2011 Red Hat, Inc.
- *
- * This file is released under the GPL.
- */
-
-#ifndef DM_SPACE_MAP_COMMON_H
-#define DM_SPACE_MAP_COMMON_H
-
-#include "dm-btree.h"
-
-/*----------------------------------------------------------------*/
-
-/*
- * Low level disk format
- *
- * Bitmap btree
- * ------------
- *
- * Each value stored in the btree is an index_entry.  This points to a
- * block that is used as a bitmap.  Within the bitmap hold 2 bits per
- * entry, which represent UNUSED = 0, REF_COUNT = 1, REF_COUNT = 2 and
- * REF_COUNT = many.
- *
- * Refcount btree
- * --------------
- *
- * Any entry that has a ref count higher than 2 gets entered in the ref
- * count tree.  The leaf values for this tree is the 32-bit ref count.
- */
-
-struct disk_index_entry {
-	__le64 blocknr;
-	__le32 nr_free;
-	__le32 none_free_before;
-} __packed;
-
-
-#define MAX_METADATA_BITMAPS 255
-struct disk_metadata_index {
-	__le32 csum;
-	__le32 padding;
-	__le64 blocknr;
-
-	struct disk_index_entry index[MAX_METADATA_BITMAPS];
-} __packed;
-
-struct ll_disk;
-
-typedef int (*load_ie_fn)(struct ll_disk *ll, dm_block_t index, struct disk_index_entry *result);
-typedef int (*save_ie_fn)(struct ll_disk *ll, dm_block_t index, struct disk_index_entry *ie);
-typedef int (*init_index_fn)(struct ll_disk *ll);
-typedef int (*open_index_fn)(struct ll_disk *ll);
-typedef dm_block_t (*max_index_entries_fn)(struct ll_disk *ll);
-typedef int (*commit_fn)(struct ll_disk *ll);
-
-struct ll_disk {
-	struct dm_transaction_manager *tm;
-	struct dm_btree_info bitmap_info;
-	struct dm_btree_info ref_count_info;
-
-	uint32_t block_size;
-	uint32_t entries_per_block;
-	dm_block_t nr_blocks;
-	dm_block_t nr_allocated;
-
-	/*
-	 * bitmap_root may be a btree root or a simple index.
-	 */
-	dm_block_t bitmap_root;
-
-	dm_block_t ref_count_root;
-
-	struct disk_metadata_index mi_le;
-	load_ie_fn load_ie;
-	save_ie_fn save_ie;
-	init_index_fn init_index;
-	open_index_fn open_index;
-	max_index_entries_fn max_entries;
-	commit_fn commit;
-};
-
-struct disk_sm_root {
-	__le64 nr_blocks;
-	__le64 nr_allocated;
-	__le64 bitmap_root;
-	__le64 ref_count_root;
-} __packed;
-
-#define ENTRIES_PER_BYTE 4
-
-struct disk_bitmap_header {
-	__le32 csum;
-	__le32 not_used;
-	__le64 blocknr;
-} __packed;
-
-enum allocation_event {
-	SM_NONE,
-	SM_ALLOC,
-	SM_FREE,
-};
-
-/*----------------------------------------------------------------*/
-
-int sm_ll_extend(struct ll_disk *ll, dm_block_t extra_blocks);
-int sm_ll_lookup_bitmap(struct ll_disk *ll, dm_block_t b, uint32_t *result);
-int sm_ll_lookup(struct ll_disk *ll, dm_block_t b, uint32_t *result);
-int sm_ll_find_free_block(struct ll_disk *ll, dm_block_t begin,
-			  dm_block_t end, dm_block_t *result);
-int sm_ll_insert(struct ll_disk *ll, dm_block_t b, uint32_t ref_count, enum allocation_event *ev);
-int sm_ll_inc(struct ll_disk *ll, dm_block_t b, enum allocation_event *ev);
-int sm_ll_dec(struct ll_disk *ll, dm_block_t b, enum allocation_event *ev);
-int sm_ll_commit(struct ll_disk *ll);
-
-int sm_ll_new_metadata(struct ll_disk *ll, struct dm_transaction_manager *tm);
-int sm_ll_open_metadata(struct ll_disk *ll, struct dm_transaction_manager *tm,
-			void *root_le, size_t len);
-
-int sm_ll_new_disk(struct ll_disk *ll, struct dm_transaction_manager *tm);
-int sm_ll_open_disk(struct ll_disk *ll, struct dm_transaction_manager *tm,
-		    void *root_le, size_t len);
-
-/*----------------------------------------------------------------*/
-
-#endif	/* DM_SPACE_MAP_COMMON_H */
diff --git a/ANDROID_3.4.5/drivers/md/persistent-data/dm-space-map-disk.c b/ANDROID_3.4.5/drivers/md/persistent-data/dm-space-map-disk.c
deleted file mode 100644
index 3d0ed533..00000000
--- a/ANDROID_3.4.5/drivers/md/persistent-data/dm-space-map-disk.c
+++ /dev/null
@@ -1,344 +0,0 @@
-/*
- * Copyright (C) 2011 Red Hat, Inc.
- *
- * This file is released under the GPL.
- */
-
-#include "dm-space-map-checker.h"
-#include "dm-space-map-common.h"
-#include "dm-space-map-disk.h"
-#include "dm-space-map.h"
-#include "dm-transaction-manager.h"
-
-#include <linux/list.h>
-#include <linux/slab.h>
-#include <linux/export.h>
-#include <linux/device-mapper.h>
-
-#define DM_MSG_PREFIX "space map disk"
-
-/*----------------------------------------------------------------*/
-
-/*
- * Space map interface.
- */
-struct sm_disk {
-	struct dm_space_map sm;
-
-	struct ll_disk ll;
-	struct ll_disk old_ll;
-
-	dm_block_t begin;
-	dm_block_t nr_allocated_this_transaction;
-};
-
-static void sm_disk_destroy(struct dm_space_map *sm)
-{
-	struct sm_disk *smd = container_of(sm, struct sm_disk, sm);
-
-	kfree(smd);
-}
-
-static int sm_disk_extend(struct dm_space_map *sm, dm_block_t extra_blocks)
-{
-	struct sm_disk *smd = container_of(sm, struct sm_disk, sm);
-
-	return sm_ll_extend(&smd->ll, extra_blocks);
-}
-
-static int sm_disk_get_nr_blocks(struct dm_space_map *sm, dm_block_t *count)
-{
-	struct sm_disk *smd = container_of(sm, struct sm_disk, sm);
-	*count = smd->old_ll.nr_blocks;
-
-	return 0;
-}
-
-static int sm_disk_get_nr_free(struct dm_space_map *sm, dm_block_t *count)
-{
-	struct sm_disk *smd = container_of(sm, struct sm_disk, sm);
-	*count = (smd->old_ll.nr_blocks - smd->old_ll.nr_allocated) - smd->nr_allocated_this_transaction;
-
-	return 0;
-}
-
-static int sm_disk_get_count(struct dm_space_map *sm, dm_block_t b,
-			     uint32_t *result)
-{
-	struct sm_disk *smd = container_of(sm, struct sm_disk, sm);
-	return sm_ll_lookup(&smd->ll, b, result);
-}
-
-static int sm_disk_count_is_more_than_one(struct dm_space_map *sm, dm_block_t b,
-					  int *result)
-{
-	int r;
-	uint32_t count;
-
-	r = sm_disk_get_count(sm, b, &count);
-	if (r)
-		return r;
-
-	return count > 1;
-}
-
-static int sm_disk_set_count(struct dm_space_map *sm, dm_block_t b,
-			     uint32_t count)
-{
-	int r;
-	uint32_t old_count;
-	enum allocation_event ev;
-	struct sm_disk *smd = container_of(sm, struct sm_disk, sm);
-
-	r = sm_ll_insert(&smd->ll, b, count, &ev);
-	if (!r) {
-		switch (ev) {
-		case SM_NONE:
-			break;
-
-		case SM_ALLOC:
-			/*
-			 * This _must_ be free in the prior transaction
-			 * otherwise we've lost atomicity.
-			 */
-			smd->nr_allocated_this_transaction++;
-			break;
-
-		case SM_FREE:
-			/*
-			 * It's only free if it's also free in the last
-			 * transaction.
-			 */
-			r = sm_ll_lookup(&smd->old_ll, b, &old_count);
-			if (r)
-				return r;
-
-			if (!old_count)
-				smd->nr_allocated_this_transaction--;
-			break;
-		}
-	}
-
-	return r;
-}
-
-static int sm_disk_inc_block(struct dm_space_map *sm, dm_block_t b)
-{
-	int r;
-	enum allocation_event ev;
-	struct sm_disk *smd = container_of(sm, struct sm_disk, sm);
-
-	r = sm_ll_inc(&smd->ll, b, &ev);
-	if (!r && (ev == SM_ALLOC))
-		/*
-		 * This _must_ be free in the prior transaction
-		 * otherwise we've lost atomicity.
-		 */
-		smd->nr_allocated_this_transaction++;
-
-	return r;
-}
-
-static int sm_disk_dec_block(struct dm_space_map *sm, dm_block_t b)
-{
-	int r;
-	uint32_t old_count;
-	enum allocation_event ev;
-	struct sm_disk *smd = container_of(sm, struct sm_disk, sm);
-
-	r = sm_ll_dec(&smd->ll, b, &ev);
-	if (!r && (ev == SM_FREE)) {
-		/*
-		 * It's only free if it's also free in the last
-		 * transaction.
-		 */
-		r = sm_ll_lookup(&smd->old_ll, b, &old_count);
-		if (r)
-			return r;
-
-		if (!old_count)
-			smd->nr_allocated_this_transaction--;
-	}
-
-	return r;
-}
-
-static int sm_disk_new_block(struct dm_space_map *sm, dm_block_t *b)
-{
-	int r;
-	enum allocation_event ev;
-	struct sm_disk *smd = container_of(sm, struct sm_disk, sm);
-
-	/* FIXME: we should loop round a couple of times */
-	r = sm_ll_find_free_block(&smd->old_ll, smd->begin, smd->old_ll.nr_blocks, b);
-	if (r)
-		return r;
-
-	smd->begin = *b + 1;
-	r = sm_ll_inc(&smd->ll, *b, &ev);
-	if (!r) {
-		BUG_ON(ev != SM_ALLOC);
-		smd->nr_allocated_this_transaction++;
-	}
-
-	return r;
-}
-
-static int sm_disk_commit(struct dm_space_map *sm)
-{
-	int r;
-	dm_block_t nr_free;
-	struct sm_disk *smd = container_of(sm, struct sm_disk, sm);
-
-	r = sm_disk_get_nr_free(sm, &nr_free);
-	if (r)
-		return r;
-
-	r = sm_ll_commit(&smd->ll);
-	if (r)
-		return r;
-
-	memcpy(&smd->old_ll, &smd->ll, sizeof(smd->old_ll));
-	smd->begin = 0;
-	smd->nr_allocated_this_transaction = 0;
-
-	r = sm_disk_get_nr_free(sm, &nr_free);
-	if (r)
-		return r;
-
-	return 0;
-}
-
-static int sm_disk_root_size(struct dm_space_map *sm, size_t *result)
-{
-	*result = sizeof(struct disk_sm_root);
-
-	return 0;
-}
-
-static int sm_disk_copy_root(struct dm_space_map *sm, void *where_le, size_t max)
-{
-	struct sm_disk *smd = container_of(sm, struct sm_disk, sm);
-	struct disk_sm_root root_le;
-
-	root_le.nr_blocks = cpu_to_le64(smd->ll.nr_blocks);
-	root_le.nr_allocated = cpu_to_le64(smd->ll.nr_allocated);
-	root_le.bitmap_root = cpu_to_le64(smd->ll.bitmap_root);
-	root_le.ref_count_root = cpu_to_le64(smd->ll.ref_count_root);
-
-	if (max < sizeof(root_le))
-		return -ENOSPC;
-
-	memcpy(where_le, &root_le, sizeof(root_le));
-
-	return 0;
-}
-
-/*----------------------------------------------------------------*/
-
-static struct dm_space_map ops = {
-	.destroy = sm_disk_destroy,
-	.extend = sm_disk_extend,
-	.get_nr_blocks = sm_disk_get_nr_blocks,
-	.get_nr_free = sm_disk_get_nr_free,
-	.get_count = sm_disk_get_count,
-	.count_is_more_than_one = sm_disk_count_is_more_than_one,
-	.set_count = sm_disk_set_count,
-	.inc_block = sm_disk_inc_block,
-	.dec_block = sm_disk_dec_block,
-	.new_block = sm_disk_new_block,
-	.commit = sm_disk_commit,
-	.root_size = sm_disk_root_size,
-	.copy_root = sm_disk_copy_root
-};
-
-static struct dm_space_map *dm_sm_disk_create_real(
-	struct dm_transaction_manager *tm,
-	dm_block_t nr_blocks)
-{
-	int r;
-	struct sm_disk *smd;
-
-	smd = kmalloc(sizeof(*smd), GFP_KERNEL);
-	if (!smd)
-		return ERR_PTR(-ENOMEM);
-
-	smd->begin = 0;
-	smd->nr_allocated_this_transaction = 0;
-	memcpy(&smd->sm, &ops, sizeof(smd->sm));
-
-	r = sm_ll_new_disk(&smd->ll, tm);
-	if (r)
-		goto bad;
-
-	r = sm_ll_extend(&smd->ll, nr_blocks);
-	if (r)
-		goto bad;
-
-	r = sm_disk_commit(&smd->sm);
-	if (r)
-		goto bad;
-
-	return &smd->sm;
-
-bad:
-	kfree(smd);
-	return ERR_PTR(r);
-}
-
-struct dm_space_map *dm_sm_disk_create(struct dm_transaction_manager *tm,
-				       dm_block_t nr_blocks)
-{
-	struct dm_space_map *sm = dm_sm_disk_create_real(tm, nr_blocks);
-	struct dm_space_map *smc;
-
-	if (IS_ERR_OR_NULL(sm))
-		return sm;
-
-	smc = dm_sm_checker_create_fresh(sm);
-	if (IS_ERR(smc))
-		dm_sm_destroy(sm);
-
-	return smc;
-}
-EXPORT_SYMBOL_GPL(dm_sm_disk_create);
-
-static struct dm_space_map *dm_sm_disk_open_real(
-	struct dm_transaction_manager *tm,
-	void *root_le, size_t len)
-{
-	int r;
-	struct sm_disk *smd;
-
-	smd = kmalloc(sizeof(*smd), GFP_KERNEL);
-	if (!smd)
-		return ERR_PTR(-ENOMEM);
-
-	smd->begin = 0;
-	smd->nr_allocated_this_transaction = 0;
-	memcpy(&smd->sm, &ops, sizeof(smd->sm));
-
-	r = sm_ll_open_disk(&smd->ll, tm, root_le, len);
-	if (r)
-		goto bad;
-
-	r = sm_disk_commit(&smd->sm);
-	if (r)
-		goto bad;
-
-	return &smd->sm;
-
-bad:
-	kfree(smd);
-	return ERR_PTR(r);
-}
-
-struct dm_space_map *dm_sm_disk_open(struct dm_transaction_manager *tm,
-				     void *root_le, size_t len)
-{
-	return dm_sm_checker_create(
-		dm_sm_disk_open_real(tm, root_le, len));
-}
-EXPORT_SYMBOL_GPL(dm_sm_disk_open);
-
-/*----------------------------------------------------------------*/
diff --git a/ANDROID_3.4.5/drivers/md/persistent-data/dm-space-map-disk.h b/ANDROID_3.4.5/drivers/md/persistent-data/dm-space-map-disk.h
deleted file mode 100644
index 447a0a9a..00000000
--- a/ANDROID_3.4.5/drivers/md/persistent-data/dm-space-map-disk.h
+++ /dev/null
@@ -1,25 +0,0 @@
-/*
- * Copyright (C) 2011 Red Hat, Inc.
- *
- * This file is released under the GPL.
- */
-
-#ifndef _LINUX_DM_SPACE_MAP_DISK_H
-#define _LINUX_DM_SPACE_MAP_DISK_H
-
-#include "dm-block-manager.h"
-
-struct dm_space_map;
-struct dm_transaction_manager;
-
-/*
- * Unfortunately we have to use two-phase construction due to the cycle
- * between the tm and sm.
- */
-struct dm_space_map *dm_sm_disk_create(struct dm_transaction_manager *tm,
-				       dm_block_t nr_blocks);
-
-struct dm_space_map *dm_sm_disk_open(struct dm_transaction_manager *tm,
-				     void *root, size_t len);
-
-#endif /* _LINUX_DM_SPACE_MAP_DISK_H */
diff --git a/ANDROID_3.4.5/drivers/md/persistent-data/dm-space-map-metadata.c b/ANDROID_3.4.5/drivers/md/persistent-data/dm-space-map-metadata.c
deleted file mode 100644
index e89ae5e7..00000000
--- a/ANDROID_3.4.5/drivers/md/persistent-data/dm-space-map-metadata.c
+++ /dev/null
@@ -1,596 +0,0 @@
-/*
- * Copyright (C) 2011 Red Hat, Inc.
- *
- * This file is released under the GPL.
- */
-
-#include "dm-space-map.h"
-#include "dm-space-map-common.h"
-#include "dm-space-map-metadata.h"
-
-#include <linux/list.h>
-#include <linux/slab.h>
-#include <linux/device-mapper.h>
-
-#define DM_MSG_PREFIX "space map metadata"
-
-/*----------------------------------------------------------------*/
-
-/*
- * Space map interface.
- *
- * The low level disk format is written using the standard btree and
- * transaction manager.  This means that performing disk operations may
- * cause us to recurse into the space map in order to allocate new blocks.
- * For this reason we have a pool of pre-allocated blocks large enough to
- * service any metadata_ll_disk operation.
- */
-
-/*
- * FIXME: we should calculate this based on the size of the device.
- * Only the metadata space map needs this functionality.
- */
-#define MAX_RECURSIVE_ALLOCATIONS 1024
-
-enum block_op_type {
-	BOP_INC,
-	BOP_DEC
-};
-
-struct block_op {
-	enum block_op_type type;
-	dm_block_t block;
-};
-
-struct sm_metadata {
-	struct dm_space_map sm;
-
-	struct ll_disk ll;
-	struct ll_disk old_ll;
-
-	dm_block_t begin;
-
-	unsigned recursion_count;
-	unsigned allocated_this_transaction;
-	unsigned nr_uncommitted;
-	struct block_op uncommitted[MAX_RECURSIVE_ALLOCATIONS];
-};
-
-static int add_bop(struct sm_metadata *smm, enum block_op_type type, dm_block_t b)
-{
-	struct block_op *op;
-
-	if (smm->nr_uncommitted == MAX_RECURSIVE_ALLOCATIONS) {
-		DMERR("too many recursive allocations");
-		return -ENOMEM;
-	}
-
-	op = smm->uncommitted + smm->nr_uncommitted++;
-	op->type = type;
-	op->block = b;
-
-	return 0;
-}
-
-static int commit_bop(struct sm_metadata *smm, struct block_op *op)
-{
-	int r = 0;
-	enum allocation_event ev;
-
-	switch (op->type) {
-	case BOP_INC:
-		r = sm_ll_inc(&smm->ll, op->block, &ev);
-		break;
-
-	case BOP_DEC:
-		r = sm_ll_dec(&smm->ll, op->block, &ev);
-		break;
-	}
-
-	return r;
-}
-
-static void in(struct sm_metadata *smm)
-{
-	smm->recursion_count++;
-}
-
-static int out(struct sm_metadata *smm)
-{
-	int r = 0;
-
-	/*
-	 * If we're not recursing then very bad things are happening.
-	 */
-	if (!smm->recursion_count) {
-		DMERR("lost track of recursion depth");
-		return -ENOMEM;
-	}
-
-	if (smm->recursion_count == 1 && smm->nr_uncommitted) {
-		while (smm->nr_uncommitted && !r) {
-			smm->nr_uncommitted--;
-			r = commit_bop(smm, smm->uncommitted +
-				       smm->nr_uncommitted);
-			if (r)
-				break;
-		}
-	}
-
-	smm->recursion_count--;
-
-	return r;
-}
-
-/*
- * When using the out() function above, we often want to combine an error
- * code for the operation run in the recursive context with that from
- * out().
- */
-static int combine_errors(int r1, int r2)
-{
-	return r1 ? r1 : r2;
-}
-
-static int recursing(struct sm_metadata *smm)
-{
-	return smm->recursion_count;
-}
-
-static void sm_metadata_destroy(struct dm_space_map *sm)
-{
-	struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
-
-	kfree(smm);
-}
-
-static int sm_metadata_extend(struct dm_space_map *sm, dm_block_t extra_blocks)
-{
-	DMERR("doesn't support extend");
-	return -EINVAL;
-}
-
-static int sm_metadata_get_nr_blocks(struct dm_space_map *sm, dm_block_t *count)
-{
-	struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
-
-	*count = smm->ll.nr_blocks;
-
-	return 0;
-}
-
-static int sm_metadata_get_nr_free(struct dm_space_map *sm, dm_block_t *count)
-{
-	struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
-
-	*count = smm->old_ll.nr_blocks - smm->old_ll.nr_allocated -
-		 smm->allocated_this_transaction;
-
-	return 0;
-}
-
-static int sm_metadata_get_count(struct dm_space_map *sm, dm_block_t b,
-				 uint32_t *result)
-{
-	int r, i;
-	struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
-	unsigned adjustment = 0;
-
-	/*
-	 * We may have some uncommitted adjustments to add.  This list
-	 * should always be really short.
-	 */
-	for (i = 0; i < smm->nr_uncommitted; i++) {
-		struct block_op *op = smm->uncommitted + i;
-
-		if (op->block != b)
-			continue;
-
-		switch (op->type) {
-		case BOP_INC:
-			adjustment++;
-			break;
-
-		case BOP_DEC:
-			adjustment--;
-			break;
-		}
-	}
-
-	r = sm_ll_lookup(&smm->ll, b, result);
-	if (r)
-		return r;
-
-	*result += adjustment;
-
-	return 0;
-}
-
-static int sm_metadata_count_is_more_than_one(struct dm_space_map *sm,
-					      dm_block_t b, int *result)
-{
-	int r, i, adjustment = 0;
-	struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
-	uint32_t rc;
-
-	/*
-	 * We may have some uncommitted adjustments to add.  This list
-	 * should always be really short.
-	 */
-	for (i = 0; i < smm->nr_uncommitted; i++) {
-		struct block_op *op = smm->uncommitted + i;
-
-		if (op->block != b)
-			continue;
-
-		switch (op->type) {
-		case BOP_INC:
-			adjustment++;
-			break;
-
-		case BOP_DEC:
-			adjustment--;
-			break;
-		}
-	}
-
-	if (adjustment > 1) {
-		*result = 1;
-		return 0;
-	}
-
-	r = sm_ll_lookup_bitmap(&smm->ll, b, &rc);
-	if (r)
-		return r;
-
-	if (rc == 3)
-		/*
-		 * We err on the side of caution, and always return true.
-		 */
-		*result = 1;
-	else
-		*result = rc + adjustment > 1;
-
-	return 0;
-}
-
-static int sm_metadata_set_count(struct dm_space_map *sm, dm_block_t b,
-				 uint32_t count)
-{
-	int r, r2;
-	enum allocation_event ev;
-	struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
-
-	if (smm->recursion_count) {
-		DMERR("cannot recurse set_count()");
-		return -EINVAL;
-	}
-
-	in(smm);
-	r = sm_ll_insert(&smm->ll, b, count, &ev);
-	r2 = out(smm);
-
-	return combine_errors(r, r2);
-}
-
-static int sm_metadata_inc_block(struct dm_space_map *sm, dm_block_t b)
-{
-	int r, r2 = 0;
-	enum allocation_event ev;
-	struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
-
-	if (recursing(smm))
-		r = add_bop(smm, BOP_INC, b);
-	else {
-		in(smm);
-		r = sm_ll_inc(&smm->ll, b, &ev);
-		r2 = out(smm);
-	}
-
-	return combine_errors(r, r2);
-}
-
-static int sm_metadata_dec_block(struct dm_space_map *sm, dm_block_t b)
-{
-	int r, r2 = 0;
-	enum allocation_event ev;
-	struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
-
-	if (recursing(smm))
-		r = add_bop(smm, BOP_DEC, b);
-	else {
-		in(smm);
-		r = sm_ll_dec(&smm->ll, b, &ev);
-		r2 = out(smm);
-	}
-
-	return combine_errors(r, r2);
-}
-
-static int sm_metadata_new_block_(struct dm_space_map *sm, dm_block_t *b)
-{
-	int r, r2 = 0;
-	enum allocation_event ev;
-	struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
-
-	r = sm_ll_find_free_block(&smm->old_ll, smm->begin, smm->old_ll.nr_blocks, b);
-	if (r)
-		return r;
-
-	smm->begin = *b + 1;
-
-	if (recursing(smm))
-		r = add_bop(smm, BOP_INC, *b);
-	else {
-		in(smm);
-		r = sm_ll_inc(&smm->ll, *b, &ev);
-		r2 = out(smm);
-	}
-
-	if (!r)
-		smm->allocated_this_transaction++;
-
-	return combine_errors(r, r2);
-}
-
-static int sm_metadata_new_block(struct dm_space_map *sm, dm_block_t *b)
-{
-	int r = sm_metadata_new_block_(sm, b);
-	if (r)
-		DMERR("out of metadata space");
-	return r;
-}
-
-static int sm_metadata_commit(struct dm_space_map *sm)
-{
-	int r;
-	struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
-
-	r = sm_ll_commit(&smm->ll);
-	if (r)
-		return r;
-
-	memcpy(&smm->old_ll, &smm->ll, sizeof(smm->old_ll));
-	smm->begin = 0;
-	smm->allocated_this_transaction = 0;
-
-	return 0;
-}
-
-static int sm_metadata_root_size(struct dm_space_map *sm, size_t *result)
-{
-	*result = sizeof(struct disk_sm_root);
-
-	return 0;
-}
-
-static int sm_metadata_copy_root(struct dm_space_map *sm, void *where_le, size_t max)
-{
-	struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
-	struct disk_sm_root root_le;
-
-	root_le.nr_blocks = cpu_to_le64(smm->ll.nr_blocks);
-	root_le.nr_allocated = cpu_to_le64(smm->ll.nr_allocated);
-	root_le.bitmap_root = cpu_to_le64(smm->ll.bitmap_root);
-	root_le.ref_count_root = cpu_to_le64(smm->ll.ref_count_root);
-
-	if (max < sizeof(root_le))
-		return -ENOSPC;
-
-	memcpy(where_le, &root_le, sizeof(root_le));
-
-	return 0;
-}
-
-static struct dm_space_map ops = {
-	.destroy = sm_metadata_destroy,
-	.extend = sm_metadata_extend,
-	.get_nr_blocks = sm_metadata_get_nr_blocks,
-	.get_nr_free = sm_metadata_get_nr_free,
-	.get_count = sm_metadata_get_count,
-	.count_is_more_than_one = sm_metadata_count_is_more_than_one,
-	.set_count = sm_metadata_set_count,
-	.inc_block = sm_metadata_inc_block,
-	.dec_block = sm_metadata_dec_block,
-	.new_block = sm_metadata_new_block,
-	.commit = sm_metadata_commit,
-	.root_size = sm_metadata_root_size,
-	.copy_root = sm_metadata_copy_root
-};
-
-/*----------------------------------------------------------------*/
-
-/*
- * When a new space map is created that manages its own space.  We use
- * this tiny bootstrap allocator.
- */
-static void sm_bootstrap_destroy(struct dm_space_map *sm)
-{
-}
-
-static int sm_bootstrap_extend(struct dm_space_map *sm, dm_block_t extra_blocks)
-{
-	DMERR("boostrap doesn't support extend");
-
-	return -EINVAL;
-}
-
-static int sm_bootstrap_get_nr_blocks(struct dm_space_map *sm, dm_block_t *count)
-{
-	struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
-
-	return smm->ll.nr_blocks;
-}
-
-static int sm_bootstrap_get_nr_free(struct dm_space_map *sm, dm_block_t *count)
-{
-	struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
-
-	*count = smm->ll.nr_blocks - smm->begin;
-
-	return 0;
-}
-
-static int sm_bootstrap_get_count(struct dm_space_map *sm, dm_block_t b,
-				  uint32_t *result)
-{
-	struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
-
-	return b < smm->begin ? 1 : 0;
-}
-
-static int sm_bootstrap_count_is_more_than_one(struct dm_space_map *sm,
-					       dm_block_t b, int *result)
-{
-	*result = 0;
-
-	return 0;
-}
-
-static int sm_bootstrap_set_count(struct dm_space_map *sm, dm_block_t b,
-				  uint32_t count)
-{
-	DMERR("boostrap doesn't support set_count");
-
-	return -EINVAL;
-}
-
-static int sm_bootstrap_new_block(struct dm_space_map *sm, dm_block_t *b)
-{
-	struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
-
-	/*
-	 * We know the entire device is unused.
-	 */
-	if (smm->begin == smm->ll.nr_blocks)
-		return -ENOSPC;
-
-	*b = smm->begin++;
-
-	return 0;
-}
-
-static int sm_bootstrap_inc_block(struct dm_space_map *sm, dm_block_t b)
-{
-	struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
-
-	return add_bop(smm, BOP_INC, b);
-}
-
-static int sm_bootstrap_dec_block(struct dm_space_map *sm, dm_block_t b)
-{
-	struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
-
-	return add_bop(smm, BOP_DEC, b);
-}
-
-static int sm_bootstrap_commit(struct dm_space_map *sm)
-{
-	return 0;
-}
-
-static int sm_bootstrap_root_size(struct dm_space_map *sm, size_t *result)
-{
-	DMERR("boostrap doesn't support root_size");
-
-	return -EINVAL;
-}
-
-static int sm_bootstrap_copy_root(struct dm_space_map *sm, void *where,
-				  size_t max)
-{
-	DMERR("boostrap doesn't support copy_root");
-
-	return -EINVAL;
-}
-
-static struct dm_space_map bootstrap_ops = {
-	.destroy = sm_bootstrap_destroy,
-	.extend = sm_bootstrap_extend,
-	.get_nr_blocks = sm_bootstrap_get_nr_blocks,
-	.get_nr_free = sm_bootstrap_get_nr_free,
-	.get_count = sm_bootstrap_get_count,
-	.count_is_more_than_one = sm_bootstrap_count_is_more_than_one,
-	.set_count = sm_bootstrap_set_count,
-	.inc_block = sm_bootstrap_inc_block,
-	.dec_block = sm_bootstrap_dec_block,
-	.new_block = sm_bootstrap_new_block,
-	.commit = sm_bootstrap_commit,
-	.root_size = sm_bootstrap_root_size,
-	.copy_root = sm_bootstrap_copy_root
-};
-
-/*----------------------------------------------------------------*/
-
-struct dm_space_map *dm_sm_metadata_init(void)
-{
-	struct sm_metadata *smm;
-
-	smm = kmalloc(sizeof(*smm), GFP_KERNEL);
-	if (!smm)
-		return ERR_PTR(-ENOMEM);
-
-	memcpy(&smm->sm, &ops, sizeof(smm->sm));
-
-	return &smm->sm;
-}
-
-int dm_sm_metadata_create(struct dm_space_map *sm,
-			  struct dm_transaction_manager *tm,
-			  dm_block_t nr_blocks,
-			  dm_block_t superblock)
-{
-	int r;
-	dm_block_t i;
-	enum allocation_event ev;
-	struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
-
-	smm->begin = superblock + 1;
-	smm->recursion_count = 0;
-	smm->allocated_this_transaction = 0;
-	smm->nr_uncommitted = 0;
-
-	memcpy(&smm->sm, &bootstrap_ops, sizeof(smm->sm));
-
-	r = sm_ll_new_metadata(&smm->ll, tm);
-	if (r)
-		return r;
-
-	r = sm_ll_extend(&smm->ll, nr_blocks);
-	if (r)
-		return r;
-
-	memcpy(&smm->sm, &ops, sizeof(smm->sm));
-
-	/*
-	 * Now we need to update the newly created data structures with the
-	 * allocated blocks that they were built from.
-	 */
-	for (i = superblock; !r && i < smm->begin; i++)
-		r = sm_ll_inc(&smm->ll, i, &ev);
-
-	if (r)
-		return r;
-
-	return sm_metadata_commit(sm);
-}
-
-int dm_sm_metadata_open(struct dm_space_map *sm,
-			struct dm_transaction_manager *tm,
-			void *root_le, size_t len)
-{
-	int r;
-	struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
-
-	r = sm_ll_open_metadata(&smm->ll, tm, root_le, len);
-	if (r)
-		return r;
-
-	smm->begin = 0;
-	smm->recursion_count = 0;
-	smm->allocated_this_transaction = 0;
-	smm->nr_uncommitted = 0;
-
-	memcpy(&smm->old_ll, &smm->ll, sizeof(smm->old_ll));
-	return 0;
-}
diff --git a/ANDROID_3.4.5/drivers/md/persistent-data/dm-space-map-metadata.h b/ANDROID_3.4.5/drivers/md/persistent-data/dm-space-map-metadata.h
deleted file mode 100644
index 39bba080..00000000
--- a/ANDROID_3.4.5/drivers/md/persistent-data/dm-space-map-metadata.h
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * Copyright (C) 2011 Red Hat, Inc.
- *
- * This file is released under the GPL.
- */
-
-#ifndef DM_SPACE_MAP_METADATA_H
-#define DM_SPACE_MAP_METADATA_H
-
-#include "dm-transaction-manager.h"
-
-/*
- * Unfortunately we have to use two-phase construction due to the cycle
- * between the tm and sm.
- */
-struct dm_space_map *dm_sm_metadata_init(void);
-
-/*
- * Create a fresh space map.
- */
-int dm_sm_metadata_create(struct dm_space_map *sm,
-			  struct dm_transaction_manager *tm,
-			  dm_block_t nr_blocks,
-			  dm_block_t superblock);
-
-/*
- * Open from a previously-recorded root.
- */
-int dm_sm_metadata_open(struct dm_space_map *sm,
-			struct dm_transaction_manager *tm,
-			void *root_le, size_t len);
-
-#endif	/* DM_SPACE_MAP_METADATA_H */
diff --git a/ANDROID_3.4.5/drivers/md/persistent-data/dm-space-map.h b/ANDROID_3.4.5/drivers/md/persistent-data/dm-space-map.h
deleted file mode 100644
index 1cbfc6b1..00000000
--- a/ANDROID_3.4.5/drivers/md/persistent-data/dm-space-map.h
+++ /dev/null
@@ -1,134 +0,0 @@
-/*
- * Copyright (C) 2011 Red Hat, Inc.
- *
- * This file is released under the GPL.
- */
-
-#ifndef _LINUX_DM_SPACE_MAP_H
-#define _LINUX_DM_SPACE_MAP_H
-
-#include "dm-block-manager.h"
-
-/*
- * struct dm_space_map keeps a record of how many times each block in a device
- * is referenced.  It needs to be fixed on disk as part of the transaction.
- */
-struct dm_space_map {
-	void (*destroy)(struct dm_space_map *sm);
-
-	/*
-	 * You must commit before allocating the newly added space.
-	 */
-	int (*extend)(struct dm_space_map *sm, dm_block_t extra_blocks);
-
-	/*
-	 * Extensions do not appear in this count until after commit has
-	 * been called.
-	 */
-	int (*get_nr_blocks)(struct dm_space_map *sm, dm_block_t *count);
-
-	/*
-	 * Space maps must never allocate a block from the previous
-	 * transaction, in case we need to rollback.  This complicates the
-	 * semantics of get_nr_free(), it should return the number of blocks
-	 * that are available for allocation _now_.  For instance you may
-	 * have blocks with a zero reference count that will not be
-	 * available for allocation until after the next commit.
-	 */
-	int (*get_nr_free)(struct dm_space_map *sm, dm_block_t *count);
-
-	int (*get_count)(struct dm_space_map *sm, dm_block_t b, uint32_t *result);
-	int (*count_is_more_than_one)(struct dm_space_map *sm, dm_block_t b,
-				      int *result);
-	int (*set_count)(struct dm_space_map *sm, dm_block_t b, uint32_t count);
-
-	int (*commit)(struct dm_space_map *sm);
-
-	int (*inc_block)(struct dm_space_map *sm, dm_block_t b);
-	int (*dec_block)(struct dm_space_map *sm, dm_block_t b);
-
-	/*
-	 * new_block will increment the returned block.
-	 */
-	int (*new_block)(struct dm_space_map *sm, dm_block_t *b);
-
-	/*
-	 * The root contains all the information needed to fix the space map.
-	 * Generally this info is small, so squirrel it away in a disk block
-	 * along with other info.
-	 */
-	int (*root_size)(struct dm_space_map *sm, size_t *result);
-	int (*copy_root)(struct dm_space_map *sm, void *copy_to_here_le, size_t len);
-};
-
-/*----------------------------------------------------------------*/
-
-static inline void dm_sm_destroy(struct dm_space_map *sm)
-{
-	sm->destroy(sm);
-}
-
-static inline int dm_sm_extend(struct dm_space_map *sm, dm_block_t extra_blocks)
-{
-	return sm->extend(sm, extra_blocks);
-}
-
-static inline int dm_sm_get_nr_blocks(struct dm_space_map *sm, dm_block_t *count)
-{
-	return sm->get_nr_blocks(sm, count);
-}
-
-static inline int dm_sm_get_nr_free(struct dm_space_map *sm, dm_block_t *count)
-{
-	return sm->get_nr_free(sm, count);
-}
-
-static inline int dm_sm_get_count(struct dm_space_map *sm, dm_block_t b,
-				  uint32_t *result)
-{
-	return sm->get_count(sm, b, result);
-}
-
-static inline int dm_sm_count_is_more_than_one(struct dm_space_map *sm,
-					       dm_block_t b, int *result)
-{
-	return sm->count_is_more_than_one(sm, b, result);
-}
-
-static inline int dm_sm_set_count(struct dm_space_map *sm, dm_block_t b,
-				  uint32_t count)
-{
-	return sm->set_count(sm, b, count);
-}
-
-static inline int dm_sm_commit(struct dm_space_map *sm)
-{
-	return sm->commit(sm);
-}
-
-static inline int dm_sm_inc_block(struct dm_space_map *sm, dm_block_t b)
-{
-	return sm->inc_block(sm, b);
-}
-
-static inline int dm_sm_dec_block(struct dm_space_map *sm, dm_block_t b)
-{
-	return sm->dec_block(sm, b);
-}
-
-static inline int dm_sm_new_block(struct dm_space_map *sm, dm_block_t *b)
-{
-	return sm->new_block(sm, b);
-}
-
-static inline int dm_sm_root_size(struct dm_space_map *sm, size_t *result)
-{
-	return sm->root_size(sm, result);
-}
-
-static inline int dm_sm_copy_root(struct dm_space_map *sm, void *copy_to_here_le, size_t len)
-{
-	return sm->copy_root(sm, copy_to_here_le, len);
-}
-
-#endif	/* _LINUX_DM_SPACE_MAP_H */
diff --git a/ANDROID_3.4.5/drivers/md/persistent-data/dm-transaction-manager.c b/ANDROID_3.4.5/drivers/md/persistent-data/dm-transaction-manager.c
deleted file mode 100644
index ba54aacf..00000000
--- a/ANDROID_3.4.5/drivers/md/persistent-data/dm-transaction-manager.c
+++ /dev/null
@@ -1,407 +0,0 @@
-/*
- * Copyright (C) 2011 Red Hat, Inc.
- *
- * This file is released under the GPL.
- */
-#include "dm-transaction-manager.h"
-#include "dm-space-map.h"
-#include "dm-space-map-checker.h"
-#include "dm-space-map-disk.h"
-#include "dm-space-map-metadata.h"
-#include "dm-persistent-data-internal.h"
-
-#include <linux/export.h>
-#include <linux/slab.h>
-#include <linux/device-mapper.h>
-
-#define DM_MSG_PREFIX "transaction manager"
-
-/*----------------------------------------------------------------*/
-
-struct shadow_info {
-	struct hlist_node hlist;
-	dm_block_t where;
-};
-
-/*
- * It would be nice if we scaled with the size of transaction.
- */
-#define HASH_SIZE 256
-#define HASH_MASK (HASH_SIZE - 1)
-
-struct dm_transaction_manager {
-	int is_clone;
-	struct dm_transaction_manager *real;
-
-	struct dm_block_manager *bm;
-	struct dm_space_map *sm;
-
-	spinlock_t lock;
-	struct hlist_head buckets[HASH_SIZE];
-};
-
-/*----------------------------------------------------------------*/
-
-static int is_shadow(struct dm_transaction_manager *tm, dm_block_t b)
-{
-	int r = 0;
-	unsigned bucket = dm_hash_block(b, HASH_MASK);
-	struct shadow_info *si;
-	struct hlist_node *n;
-
-	spin_lock(&tm->lock);
-	hlist_for_each_entry(si, n, tm->buckets + bucket, hlist)
-		if (si->where == b) {
-			r = 1;
-			break;
-		}
-	spin_unlock(&tm->lock);
-
-	return r;
-}
-
-/*
- * This can silently fail if there's no memory.  We're ok with this since
- * creating redundant shadows causes no harm.
- */
-static void insert_shadow(struct dm_transaction_manager *tm, dm_block_t b)
-{
-	unsigned bucket;
-	struct shadow_info *si;
-
-	si = kmalloc(sizeof(*si), GFP_NOIO);
-	if (si) {
-		si->where = b;
-		bucket = dm_hash_block(b, HASH_MASK);
-		spin_lock(&tm->lock);
-		hlist_add_head(&si->hlist, tm->buckets + bucket);
-		spin_unlock(&tm->lock);
-	}
-}
-
-static void wipe_shadow_table(struct dm_transaction_manager *tm)
-{
-	struct shadow_info *si;
-	struct hlist_node *n, *tmp;
-	struct hlist_head *bucket;
-	int i;
-
-	spin_lock(&tm->lock);
-	for (i = 0; i < HASH_SIZE; i++) {
-		bucket = tm->buckets + i;
-		hlist_for_each_entry_safe(si, n, tmp, bucket, hlist)
-			kfree(si);
-
-		INIT_HLIST_HEAD(bucket);
-	}
-
-	spin_unlock(&tm->lock);
-}
-
-/*----------------------------------------------------------------*/
-
-static struct dm_transaction_manager *dm_tm_create(struct dm_block_manager *bm,
-						   struct dm_space_map *sm)
-{
-	int i;
-	struct dm_transaction_manager *tm;
-
-	tm = kmalloc(sizeof(*tm), GFP_KERNEL);
-	if (!tm)
-		return ERR_PTR(-ENOMEM);
-
-	tm->is_clone = 0;
-	tm->real = NULL;
-	tm->bm = bm;
-	tm->sm = sm;
-
-	spin_lock_init(&tm->lock);
-	for (i = 0; i < HASH_SIZE; i++)
-		INIT_HLIST_HEAD(tm->buckets + i);
-
-	return tm;
-}
-
-struct dm_transaction_manager *dm_tm_create_non_blocking_clone(struct dm_transaction_manager *real)
-{
-	struct dm_transaction_manager *tm;
-
-	tm = kmalloc(sizeof(*tm), GFP_KERNEL);
-	if (tm) {
-		tm->is_clone = 1;
-		tm->real = real;
-	}
-
-	return tm;
-}
-EXPORT_SYMBOL_GPL(dm_tm_create_non_blocking_clone);
-
-void dm_tm_destroy(struct dm_transaction_manager *tm)
-{
-	if (!tm->is_clone)
-		wipe_shadow_table(tm);
-
-	kfree(tm);
-}
-EXPORT_SYMBOL_GPL(dm_tm_destroy);
-
-int dm_tm_pre_commit(struct dm_transaction_manager *tm)
-{
-	int r;
-
-	if (tm->is_clone)
-		return -EWOULDBLOCK;
-
-	r = dm_sm_commit(tm->sm);
-	if (r < 0)
-		return r;
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(dm_tm_pre_commit);
-
-int dm_tm_commit(struct dm_transaction_manager *tm, struct dm_block *root)
-{
-	if (tm->is_clone)
-		return -EWOULDBLOCK;
-
-	wipe_shadow_table(tm);
-
-	return dm_bm_flush_and_unlock(tm->bm, root);
-}
-EXPORT_SYMBOL_GPL(dm_tm_commit);
-
-int dm_tm_new_block(struct dm_transaction_manager *tm,
-		    struct dm_block_validator *v,
-		    struct dm_block **result)
-{
-	int r;
-	dm_block_t new_block;
-
-	if (tm->is_clone)
-		return -EWOULDBLOCK;
-
-	r = dm_sm_new_block(tm->sm, &new_block);
-	if (r < 0)
-		return r;
-
-	r = dm_bm_write_lock_zero(tm->bm, new_block, v, result);
-	if (r < 0) {
-		dm_sm_dec_block(tm->sm, new_block);
-		return r;
-	}
-
-	/*
-	 * New blocks count as shadows in that they don't need to be
-	 * shadowed again.
-	 */
-	insert_shadow(tm, new_block);
-
-	return 0;
-}
-
-static int __shadow_block(struct dm_transaction_manager *tm, dm_block_t orig,
-			  struct dm_block_validator *v,
-			  struct dm_block **result)
-{
-	int r;
-	dm_block_t new;
-	struct dm_block *orig_block;
-
-	r = dm_sm_new_block(tm->sm, &new);
-	if (r < 0)
-		return r;
-
-	r = dm_sm_dec_block(tm->sm, orig);
-	if (r < 0)
-		return r;
-
-	r = dm_bm_read_lock(tm->bm, orig, v, &orig_block);
-	if (r < 0)
-		return r;
-
-	r = dm_bm_unlock_move(orig_block, new);
-	if (r < 0) {
-		dm_bm_unlock(orig_block);
-		return r;
-	}
-
-	return dm_bm_write_lock(tm->bm, new, v, result);
-}
-
-int dm_tm_shadow_block(struct dm_transaction_manager *tm, dm_block_t orig,
-		       struct dm_block_validator *v, struct dm_block **result,
-		       int *inc_children)
-{
-	int r;
-
-	if (tm->is_clone)
-		return -EWOULDBLOCK;
-
-	r = dm_sm_count_is_more_than_one(tm->sm, orig, inc_children);
-	if (r < 0)
-		return r;
-
-	if (is_shadow(tm, orig) && !*inc_children)
-		return dm_bm_write_lock(tm->bm, orig, v, result);
-
-	r = __shadow_block(tm, orig, v, result);
-	if (r < 0)
-		return r;
-	insert_shadow(tm, dm_block_location(*result));
-
-	return r;
-}
-
-int dm_tm_read_lock(struct dm_transaction_manager *tm, dm_block_t b,
-		    struct dm_block_validator *v,
-		    struct dm_block **blk)
-{
-	if (tm->is_clone)
-		return dm_bm_read_try_lock(tm->real->bm, b, v, blk);
-
-	return dm_bm_read_lock(tm->bm, b, v, blk);
-}
-
-int dm_tm_unlock(struct dm_transaction_manager *tm, struct dm_block *b)
-{
-	return dm_bm_unlock(b);
-}
-EXPORT_SYMBOL_GPL(dm_tm_unlock);
-
-void dm_tm_inc(struct dm_transaction_manager *tm, dm_block_t b)
-{
-	/*
-	 * The non-blocking clone doesn't support this.
-	 */
-	BUG_ON(tm->is_clone);
-
-	dm_sm_inc_block(tm->sm, b);
-}
-EXPORT_SYMBOL_GPL(dm_tm_inc);
-
-void dm_tm_dec(struct dm_transaction_manager *tm, dm_block_t b)
-{
-	/*
-	 * The non-blocking clone doesn't support this.
-	 */
-	BUG_ON(tm->is_clone);
-
-	dm_sm_dec_block(tm->sm, b);
-}
-EXPORT_SYMBOL_GPL(dm_tm_dec);
-
-int dm_tm_ref(struct dm_transaction_manager *tm, dm_block_t b,
-	      uint32_t *result)
-{
-	if (tm->is_clone)
-		return -EWOULDBLOCK;
-
-	return dm_sm_get_count(tm->sm, b, result);
-}
-
-struct dm_block_manager *dm_tm_get_bm(struct dm_transaction_manager *tm)
-{
-	return tm->bm;
-}
-
-/*----------------------------------------------------------------*/
-
-static int dm_tm_create_internal(struct dm_block_manager *bm,
-				 dm_block_t sb_location,
-				 struct dm_block_validator *sb_validator,
-				 size_t root_offset, size_t root_max_len,
-				 struct dm_transaction_manager **tm,
-				 struct dm_space_map **sm,
-				 struct dm_block **sblock,
-				 int create)
-{
-	int r;
-	struct dm_space_map *inner;
-
-	inner = dm_sm_metadata_init();
-	if (IS_ERR(inner))
-		return PTR_ERR(inner);
-
-	*tm = dm_tm_create(bm, inner);
-	if (IS_ERR(*tm)) {
-		dm_sm_destroy(inner);
-		return PTR_ERR(*tm);
-	}
-
-	if (create) {
-		r = dm_bm_write_lock_zero(dm_tm_get_bm(*tm), sb_location,
-					  sb_validator, sblock);
-		if (r < 0) {
-			DMERR("couldn't lock superblock");
-			goto bad1;
-		}
-
-		r = dm_sm_metadata_create(inner, *tm, dm_bm_nr_blocks(bm),
-					  sb_location);
-		if (r) {
-			DMERR("couldn't create metadata space map");
-			goto bad2;
-		}
-
-		*sm = dm_sm_checker_create(inner);
-		if (IS_ERR(*sm)) {
-			r = PTR_ERR(*sm);
-			goto bad2;
-		}
-
-	} else {
-		r = dm_bm_write_lock(dm_tm_get_bm(*tm), sb_location,
-				     sb_validator, sblock);
-		if (r < 0) {
-			DMERR("couldn't lock superblock");
-			goto bad1;
-		}
-
-		r = dm_sm_metadata_open(inner, *tm,
-					dm_block_data(*sblock) + root_offset,
-					root_max_len);
-		if (r) {
-			DMERR("couldn't open metadata space map");
-			goto bad2;
-		}
-
-		*sm = dm_sm_checker_create(inner);
-		if (IS_ERR(*sm)) {
-			r = PTR_ERR(*sm);
-			goto bad2;
-		}
-	}
-
-	return 0;
-
-bad2:
-	dm_tm_unlock(*tm, *sblock);
-bad1:
-	dm_tm_destroy(*tm);
-	dm_sm_destroy(inner);
-	return r;
-}
-
-int dm_tm_create_with_sm(struct dm_block_manager *bm, dm_block_t sb_location,
-			 struct dm_block_validator *sb_validator,
-			 struct dm_transaction_manager **tm,
-			 struct dm_space_map **sm, struct dm_block **sblock)
-{
-	return dm_tm_create_internal(bm, sb_location, sb_validator,
-				     0, 0, tm, sm, sblock, 1);
-}
-EXPORT_SYMBOL_GPL(dm_tm_create_with_sm);
-
-int dm_tm_open_with_sm(struct dm_block_manager *bm, dm_block_t sb_location,
-		       struct dm_block_validator *sb_validator,
-		       size_t root_offset, size_t root_max_len,
-		       struct dm_transaction_manager **tm,
-		       struct dm_space_map **sm, struct dm_block **sblock)
-{
-	return dm_tm_create_internal(bm, sb_location, sb_validator, root_offset,
-				     root_max_len, tm, sm, sblock, 0);
-}
-EXPORT_SYMBOL_GPL(dm_tm_open_with_sm);
-
-/*----------------------------------------------------------------*/
diff --git a/ANDROID_3.4.5/drivers/md/persistent-data/dm-transaction-manager.h b/ANDROID_3.4.5/drivers/md/persistent-data/dm-transaction-manager.h
deleted file mode 100644
index 6da78487..00000000
--- a/ANDROID_3.4.5/drivers/md/persistent-data/dm-transaction-manager.h
+++ /dev/null
@@ -1,130 +0,0 @@
-/*
- * Copyright (C) 2011 Red Hat, Inc.
- *
- * This file is released under the GPL.
- */
-
-#ifndef _LINUX_DM_TRANSACTION_MANAGER_H
-#define _LINUX_DM_TRANSACTION_MANAGER_H
-
-#include "dm-block-manager.h"
-
-struct dm_transaction_manager;
-struct dm_space_map;
-
-/*----------------------------------------------------------------*/
-
-/*
- * This manages the scope of a transaction.  It also enforces immutability
- * of the on-disk data structures by limiting access to writeable blocks.
- *
- * Clients should not fiddle with the block manager directly.
- */
-
-void dm_tm_destroy(struct dm_transaction_manager *tm);
-
-/*
- * The non-blocking version of a transaction manager is intended for use in
- * fast path code that needs to do lookups e.g. a dm mapping function.
- * You create the non-blocking variant from a normal tm.  The interface is
- * the same, except that most functions will just return -EWOULDBLOCK.
- * Methods that return void yet may block should not be called on a clone
- * viz. dm_tm_inc, dm_tm_dec.  Call dm_tm_destroy() as you would with a normal
- * tm when you've finished with it.  You may not destroy the original prior
- * to clones.
- */
-struct dm_transaction_manager *dm_tm_create_non_blocking_clone(struct dm_transaction_manager *real);
-
-/*
- * We use a 2-phase commit here.
- *
- * i) In the first phase the block manager is told to start flushing, and
- * the changes to the space map are written to disk.  You should interrogate
- * your particular space map to get detail of its root node etc. to be
- * included in your superblock.
- *
- * ii) @root will be committed last.  You shouldn't use more than the
- * first 512 bytes of @root if you wish the transaction to survive a power
- * failure.  You *must* have a write lock held on @root for both stage (i)
- * and (ii).  The commit will drop the write lock.
- */
-int dm_tm_pre_commit(struct dm_transaction_manager *tm);
-int dm_tm_commit(struct dm_transaction_manager *tm, struct dm_block *root);
-
-/*
- * These methods are the only way to get hold of a writeable block.
- */
-
-/*
- * dm_tm_new_block() is pretty self-explanatory.  Make sure you do actually
- * write to the whole of @data before you unlock, otherwise you could get
- * a data leak.  (The other option is for tm_new_block() to zero new blocks
- * before handing them out, which will be redundant in most, if not all,
- * cases).
- * Zeroes the new block and returns with write lock held.
- */
-int dm_tm_new_block(struct dm_transaction_manager *tm,
-		    struct dm_block_validator *v,
-		    struct dm_block **result);
-
-/*
- * dm_tm_shadow_block() allocates a new block and copies the data from @orig
- * to it.  It then decrements the reference count on original block.  Use
- * this to update the contents of a block in a data structure, don't
- * confuse this with a clone - you shouldn't access the orig block after
- * this operation.  Because the tm knows the scope of the transaction it
- * can optimise requests for a shadow of a shadow to a no-op.  Don't forget
- * to unlock when you've finished with the shadow.
- *
- * The @inc_children flag is used to tell the caller whether it needs to
- * adjust reference counts for children.  (Data in the block may refer to
- * other blocks.)
- *
- * Shadowing implicitly drops a reference on @orig so you must not have
- * it locked when you call this.
- */
-int dm_tm_shadow_block(struct dm_transaction_manager *tm, dm_block_t orig,
-		       struct dm_block_validator *v,
-		       struct dm_block **result, int *inc_children);
-
-/*
- * Read access.  You can lock any block you want.  If there's a write lock
- * on it outstanding then it'll block.
- */
-int dm_tm_read_lock(struct dm_transaction_manager *tm, dm_block_t b,
-		    struct dm_block_validator *v,
-		    struct dm_block **result);
-
-int dm_tm_unlock(struct dm_transaction_manager *tm, struct dm_block *b);
-
-/*
- * Functions for altering the reference count of a block directly.
- */
-void dm_tm_inc(struct dm_transaction_manager *tm, dm_block_t b);
-
-void dm_tm_dec(struct dm_transaction_manager *tm, dm_block_t b);
-
-int dm_tm_ref(struct dm_transaction_manager *tm, dm_block_t b,
-	      uint32_t *result);
-
-struct dm_block_manager *dm_tm_get_bm(struct dm_transaction_manager *tm);
-
-/*
- * A little utility that ties the knot by producing a transaction manager
- * that has a space map managed by the transaction manager...
- *
- * Returns a tm that has an open transaction to write the new disk sm.
- * Caller should store the new sm root and commit.
- */
-int dm_tm_create_with_sm(struct dm_block_manager *bm, dm_block_t sb_location,
-			 struct dm_block_validator *sb_validator,
-			 struct dm_transaction_manager **tm,
-			 struct dm_space_map **sm, struct dm_block **sblock);
-
-int dm_tm_open_with_sm(struct dm_block_manager *bm, dm_block_t sb_location,
-		       struct dm_block_validator *sb_validator,
-		       size_t root_offset, size_t root_max_len,
-		       struct dm_transaction_manager **tm,
-		       struct dm_space_map **sm, struct dm_block **sblock);
-
-#endif	/* _LINUX_DM_TRANSACTION_MANAGER_H */
diff --git a/ANDROID_3.4.5/drivers/md/raid0.c b/ANDROID_3.4.5/drivers/md/raid0.c
deleted file mode 100644
index de63a1fc..00000000
--- a/ANDROID_3.4.5/drivers/md/raid0.c
+++ /dev/null
@@ -1,739 +0,0 @@
-/*
-   raid0.c : Multiple Devices driver for Linux
-             Copyright (C) 1994-96 Marc ZYNGIER
-	     <zyngier@ufr-info-p7.ibp.fr> or
-	     <maz@gloups.fdn.fr>
-             Copyright (C) 1999, 2000 Ingo Molnar, Red Hat
-
-
-   RAID-0 management functions.
-
-   This program is free software; you can redistribute it and/or modify
-   it under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 2, or (at your option)
-   any later version.
-   
-   You should have received a copy of the GNU General Public License
-   (for example /usr/src/linux/COPYING); if not, write to the Free
-   Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.  
-*/
-
-#include <linux/blkdev.h>
-#include <linux/seq_file.h>
-#include <linux/module.h>
-#include <linux/slab.h>
-#include "md.h"
-#include "raid0.h"
-#include "raid5.h"
-
-static int raid0_congested(void *data, int bits)
-{
-	struct mddev *mddev = data;
-	struct r0conf *conf = mddev->private;
-	struct md_rdev **devlist = conf->devlist;
-	int raid_disks = conf->strip_zone[0].nb_dev;
-	int i, ret = 0;
-
-	if (mddev_congested(mddev, bits))
-		return 1;
-
-	for (i = 0; i < raid_disks && !ret ; i++) {
-		struct request_queue *q = bdev_get_queue(devlist[i]->bdev);
-
-		ret |= bdi_congested(&q->backing_dev_info, bits);
-	}
-	return ret;
-}
-
-/*
- * inform the user of the raid configuration
-*/
-static void dump_zones(struct mddev *mddev)
-{
-	int j, k;
-	sector_t zone_size = 0;
-	sector_t zone_start = 0;
-	char b[BDEVNAME_SIZE];
-	struct r0conf *conf = mddev->private;
-	int raid_disks = conf->strip_zone[0].nb_dev;
-	printk(KERN_INFO "md: RAID0 configuration for %s - %d zone%s\n",
-	       mdname(mddev),
-	       conf->nr_strip_zones, conf->nr_strip_zones==1?"":"s");
-	for (j = 0; j < conf->nr_strip_zones; j++) {
-		printk(KERN_INFO "md: zone%d=[", j);
-		for (k = 0; k < conf->strip_zone[j].nb_dev; k++)
-			printk(KERN_CONT "%s%s", k?"/":"",
-			bdevname(conf->devlist[j*raid_disks
-						+ k]->bdev, b));
-		printk(KERN_CONT "]\n");
-
-		zone_size  = conf->strip_zone[j].zone_end - zone_start;
-		printk(KERN_INFO "      zone-offset=%10lluKB, "
-				"device-offset=%10lluKB, size=%10lluKB\n",
-			(unsigned long long)zone_start>>1,
-			(unsigned long long)conf->strip_zone[j].dev_start>>1,
-			(unsigned long long)zone_size>>1);
-		zone_start = conf->strip_zone[j].zone_end;
-	}
-	printk(KERN_INFO "\n");
-}
-
-static int create_strip_zones(struct mddev *mddev, struct r0conf **private_conf)
-{
-	int i, c, err;
-	sector_t curr_zone_end, sectors;
-	struct md_rdev *smallest, *rdev1, *rdev2, *rdev, **dev;
-	struct strip_zone *zone;
-	int cnt;
-	char b[BDEVNAME_SIZE];
-	char b2[BDEVNAME_SIZE];
-	struct r0conf *conf = kzalloc(sizeof(*conf), GFP_KERNEL);
-
-	if (!conf)
-		return -ENOMEM;
-	rdev_for_each(rdev1, mddev) {
-		pr_debug("md/raid0:%s: looking at %s\n",
-			 mdname(mddev),
-			 bdevname(rdev1->bdev, b));
-		c = 0;
-
-		/* round size to chunk_size */
-		sectors = rdev1->sectors;
-		sector_div(sectors, mddev->chunk_sectors);
-		rdev1->sectors = sectors * mddev->chunk_sectors;
-
-		rdev_for_each(rdev2, mddev) {
-			pr_debug("md/raid0:%s:   comparing %s(%llu)"
-				 " with %s(%llu)\n",
-				 mdname(mddev),
-				 bdevname(rdev1->bdev,b),
-				 (unsigned long long)rdev1->sectors,
-				 bdevname(rdev2->bdev,b2),
-				 (unsigned long long)rdev2->sectors);
-			if (rdev2 == rdev1) {
-				pr_debug("md/raid0:%s:   END\n",
-					 mdname(mddev));
-				break;
-			}
-			if (rdev2->sectors == rdev1->sectors) {
-				/*
-				 * Not unique, don't count it as a new
-				 * group
-				 */
-				pr_debug("md/raid0:%s:   EQUAL\n",
-					 mdname(mddev));
-				c = 1;
-				break;
-			}
-			pr_debug("md/raid0:%s:   NOT EQUAL\n",
-				 mdname(mddev));
-		}
-		if (!c) {
-			pr_debug("md/raid0:%s:   ==> UNIQUE\n",
-				 mdname(mddev));
-			conf->nr_strip_zones++;
-			pr_debug("md/raid0:%s: %d zones\n",
-				 mdname(mddev), conf->nr_strip_zones);
-		}
-	}
-	pr_debug("md/raid0:%s: FINAL %d zones\n",
-		 mdname(mddev), conf->nr_strip_zones);
-	err = -ENOMEM;
-	conf->strip_zone = kzalloc(sizeof(struct strip_zone)*
-				conf->nr_strip_zones, GFP_KERNEL);
-	if (!conf->strip_zone)
-		goto abort;
-	conf->devlist = kzalloc(sizeof(struct md_rdev*)*
-				conf->nr_strip_zones*mddev->raid_disks,
-				GFP_KERNEL);
-	if (!conf->devlist)
-		goto abort;
-
-	/* The first zone must contain all devices, so here we check that
-	 * there is a proper alignment of slots to devices and find them all
-	 */
-	zone = &conf->strip_zone[0];
-	cnt = 0;
-	smallest = NULL;
-	dev = conf->devlist;
-	err = -EINVAL;
-	rdev_for_each(rdev1, mddev) {
-		int j = rdev1->raid_disk;
-
-		if (mddev->level == 10) {
-			/* taking over a raid10-n2 array */
-			j /= 2;
-			rdev1->new_raid_disk = j;
-		}
-
-		if (mddev->level == 1) {
-			/* taiking over a raid1 array-
-			 * we have only one active disk
-			 */
-			j = 0;
-			rdev1->new_raid_disk = j;
-		}
-
-		if (j < 0 || j >= mddev->raid_disks) {
-			printk(KERN_ERR "md/raid0:%s: bad disk number %d - "
-			       "aborting!\n", mdname(mddev), j);
-			goto abort;
-		}
-		if (dev[j]) {
-			printk(KERN_ERR "md/raid0:%s: multiple devices for %d - "
-			       "aborting!\n", mdname(mddev), j);
-			goto abort;
-		}
-		dev[j] = rdev1;
-
-		disk_stack_limits(mddev->gendisk, rdev1->bdev,
-				  rdev1->data_offset << 9);
-
-		if (rdev1->bdev->bd_disk->queue->merge_bvec_fn)
-			conf->has_merge_bvec = 1;
-
-		if (!smallest || (rdev1->sectors < smallest->sectors))
-			smallest = rdev1;
-		cnt++;
-	}
-	if (cnt != mddev->raid_disks) {
-		printk(KERN_ERR "md/raid0:%s: too few disks (%d of %d) - "
-		       "aborting!\n", mdname(mddev), cnt, mddev->raid_disks);
-		goto abort;
-	}
-	zone->nb_dev = cnt;
-	zone->zone_end = smallest->sectors * cnt;
-
-	curr_zone_end = zone->zone_end;
-
-	/* now do the other zones */
-	for (i = 1; i < conf->nr_strip_zones; i++)
-	{
-		int j;
-
-		zone = conf->strip_zone + i;
-		dev = conf->devlist + i * mddev->raid_disks;
-
-		pr_debug("md/raid0:%s: zone %d\n", mdname(mddev), i);
-		zone->dev_start = smallest->sectors;
-		smallest = NULL;
-		c = 0;
-
-		for (j=0; j<cnt; j++) {
-			rdev = conf->devlist[j];
-			if (rdev->sectors <= zone->dev_start) {
-				pr_debug("md/raid0:%s: checking %s ... nope\n",
-					 mdname(mddev),
-					 bdevname(rdev->bdev, b));
-				continue;
-			}
-			pr_debug("md/raid0:%s: checking %s ..."
-				 " contained as device %d\n",
-				 mdname(mddev),
-				 bdevname(rdev->bdev, b), c);
-			dev[c] = rdev;
-			c++;
-			if (!smallest || rdev->sectors < smallest->sectors) {
-				smallest = rdev;
-				pr_debug("md/raid0:%s:  (%llu) is smallest!.\n",
-					 mdname(mddev),
-					 (unsigned long long)rdev->sectors);
-			}
-		}
-
-		zone->nb_dev = c;
-		sectors = (smallest->sectors - zone->dev_start) * c;
-		pr_debug("md/raid0:%s: zone->nb_dev: %d, sectors: %llu\n",
-			 mdname(mddev),
-			 zone->nb_dev, (unsigned long long)sectors);
-
-		curr_zone_end += sectors;
-		zone->zone_end = curr_zone_end;
-
-		pr_debug("md/raid0:%s: current zone start: %llu\n",
-			 mdname(mddev),
-			 (unsigned long long)smallest->sectors);
-	}
-	mddev->queue->backing_dev_info.congested_fn = raid0_congested;
-	mddev->queue->backing_dev_info.congested_data = mddev;
-
-	/*
-	 * now since we have the hard sector sizes, we can make sure
-	 * chunk size is a multiple of that sector size
-	 */
-	if ((mddev->chunk_sectors << 9) % queue_logical_block_size(mddev->queue)) {
-		printk(KERN_ERR "md/raid0:%s: chunk_size of %d not valid\n",
-		       mdname(mddev),
-		       mddev->chunk_sectors << 9);
-		goto abort;
-	}
-
-	blk_queue_io_min(mddev->queue, mddev->chunk_sectors << 9);
-	blk_queue_io_opt(mddev->queue,
-			 (mddev->chunk_sectors << 9) * mddev->raid_disks);
-
-	pr_debug("md/raid0:%s: done.\n", mdname(mddev));
-	*private_conf = conf;
-
-	return 0;
-abort:
-	kfree(conf->strip_zone);
-	kfree(conf->devlist);
-	kfree(conf);
-	*private_conf = NULL;
-	return err;
-}
-
-/* Find the zone which holds a particular offset
- * Update *sectorp to be an offset in that zone
- */
-static struct strip_zone *find_zone(struct r0conf *conf,
-				    sector_t *sectorp)
-{
-	int i;
-	struct strip_zone *z = conf->strip_zone;
-	sector_t sector = *sectorp;
-
-	for (i = 0; i < conf->nr_strip_zones; i++)
-		if (sector < z[i].zone_end) {
-			if (i)
-				*sectorp = sector - z[i-1].zone_end;
-			return z + i;
-		}
-	BUG();
-}
-
-/*
- * remaps the bio to the target device. we separate two flows.
- * power 2 flow and a general flow for the sake of perfromance
-*/
-static struct md_rdev *map_sector(struct mddev *mddev, struct strip_zone *zone,
-				sector_t sector, sector_t *sector_offset)
-{
-	unsigned int sect_in_chunk;
-	sector_t chunk;
-	struct r0conf *conf = mddev->private;
-	int raid_disks = conf->strip_zone[0].nb_dev;
-	unsigned int chunk_sects = mddev->chunk_sectors;
-
-	if (is_power_of_2(chunk_sects)) {
-		int chunksect_bits = ffz(~chunk_sects);
-		/* find the sector offset inside the chunk */
-		sect_in_chunk  = sector & (chunk_sects - 1);
-		sector >>= chunksect_bits;
-		/* chunk in zone */
-		chunk = *sector_offset;
-		/* quotient is the chunk in real device*/
-		sector_div(chunk, zone->nb_dev << chunksect_bits);
-	} else{
-		sect_in_chunk = sector_div(sector, chunk_sects);
-		chunk = *sector_offset;
-		sector_div(chunk, chunk_sects * zone->nb_dev);
-	}
-	/*
-	*  position the bio over the real device
-	*  real sector = chunk in device + starting of zone
-	*	+ the position in the chunk
-	*/
-	*sector_offset = (chunk * chunk_sects) + sect_in_chunk;
-	return conf->devlist[(zone - conf->strip_zone)*raid_disks
-			     + sector_div(sector, zone->nb_dev)];
-}
-
-/**
- *	raid0_mergeable_bvec -- tell bio layer if two requests can be merged
- *	@q: request queue
- *	@bvm: properties of new bio
- *	@biovec: the request that could be merged to it.
- *
- *	Return amount of bytes we can accept at this offset
- */
-static int raid0_mergeable_bvec(struct request_queue *q,
-				struct bvec_merge_data *bvm,
-				struct bio_vec *biovec)
-{
-	struct mddev *mddev = q->queuedata;
-	struct r0conf *conf = mddev->private;
-	sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev);
-	sector_t sector_offset = sector;
-	int max;
-	unsigned int chunk_sectors = mddev->chunk_sectors;
-	unsigned int bio_sectors = bvm->bi_size >> 9;
-	struct strip_zone *zone;
-	struct md_rdev *rdev;
-	struct request_queue *subq;
-
-	if (is_power_of_2(chunk_sectors))
-		max =  (chunk_sectors - ((sector & (chunk_sectors-1))
-						+ bio_sectors)) << 9;
-	else
-		max =  (chunk_sectors - (sector_div(sector, chunk_sectors)
-						+ bio_sectors)) << 9;
-	if (max < 0)
-		max = 0; /* bio_add cannot handle a negative return */
-	if (max <= biovec->bv_len && bio_sectors == 0)
-		return biovec->bv_len;
-	if (max < biovec->bv_len)
-		/* too small already, no need to check further */
-		return max;
-	if (!conf->has_merge_bvec)
-		return max;
-
-	/* May need to check subordinate device */
-	sector = sector_offset;
-	zone = find_zone(mddev->private, &sector_offset);
-	rdev = map_sector(mddev, zone, sector, &sector_offset);
-	subq = bdev_get_queue(rdev->bdev);
-	if (subq->merge_bvec_fn) {
-		bvm->bi_bdev = rdev->bdev;
-		bvm->bi_sector = sector_offset + zone->dev_start +
-			rdev->data_offset;
-		return min(max, subq->merge_bvec_fn(subq, bvm, biovec));
-	} else
-		return max;
-}
-
-static sector_t raid0_size(struct mddev *mddev, sector_t sectors, int raid_disks)
-{
-	sector_t array_sectors = 0;
-	struct md_rdev *rdev;
-
-	WARN_ONCE(sectors || raid_disks,
-		  "%s does not support generic reshape\n", __func__);
-
-	rdev_for_each(rdev, mddev)
-		array_sectors += rdev->sectors;
-
-	return array_sectors;
-}
-
-static int raid0_stop(struct mddev *mddev);
-
-static int raid0_run(struct mddev *mddev)
-{
-	struct r0conf *conf;
-	int ret;
-
-	if (mddev->chunk_sectors == 0) {
-		printk(KERN_ERR "md/raid0:%s: chunk size must be set.\n",
-		       mdname(mddev));
-		return -EINVAL;
-	}
-	if (md_check_no_bitmap(mddev))
-		return -EINVAL;
-	blk_queue_max_hw_sectors(mddev->queue, mddev->chunk_sectors);
-
-	/* if private is not null, we are here after takeover */
-	if (mddev->private == NULL) {
-		ret = create_strip_zones(mddev, &conf);
-		if (ret < 0)
-			return ret;
-		mddev->private = conf;
-	}
-	conf = mddev->private;
-
-	/* calculate array device size */
-	md_set_array_sectors(mddev, raid0_size(mddev, 0, 0));
-
-	printk(KERN_INFO "md/raid0:%s: md_size is %llu sectors.\n",
-	       mdname(mddev),
-	       (unsigned long long)mddev->array_sectors);
-	/* calculate the max read-ahead size.
-	 * For read-ahead of large files to be effective, we need to
-	 * readahead at least twice a whole stripe. i.e. number of devices
-	 * multiplied by chunk size times 2.
-	 * If an individual device has an ra_pages greater than the
-	 * chunk size, then we will not drive that device as hard as it
-	 * wants.  We consider this a configuration error: a larger
-	 * chunksize should be used in that case.
-	 */
-	{
-		int stripe = mddev->raid_disks *
-			(mddev->chunk_sectors << 9) / PAGE_SIZE;
-		if (mddev->queue->backing_dev_info.ra_pages < 2* stripe)
-			mddev->queue->backing_dev_info.ra_pages = 2* stripe;
-	}
-
-	blk_queue_merge_bvec(mddev->queue, raid0_mergeable_bvec);
-	dump_zones(mddev);
-
-	ret = md_integrity_register(mddev);
-	if (ret)
-		raid0_stop(mddev);
-
-	return ret;
-}
-
-static int raid0_stop(struct mddev *mddev)
-{
-	struct r0conf *conf = mddev->private;
-
-	blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/
-	kfree(conf->strip_zone);
-	kfree(conf->devlist);
-	kfree(conf);
-	mddev->private = NULL;
-	return 0;
-}
-
-/*
- * Is io distribute over 1 or more chunks ?
-*/
-static inline int is_io_in_chunk_boundary(struct mddev *mddev,
-			unsigned int chunk_sects, struct bio *bio)
-{
-	if (likely(is_power_of_2(chunk_sects))) {
-		return chunk_sects >= ((bio->bi_sector & (chunk_sects-1))
-					+ (bio->bi_size >> 9));
-	} else{
-		sector_t sector = bio->bi_sector;
-		return chunk_sects >= (sector_div(sector, chunk_sects)
-						+ (bio->bi_size >> 9));
-	}
-}
-
-static void raid0_make_request(struct mddev *mddev, struct bio *bio)
-{
-	unsigned int chunk_sects;
-	sector_t sector_offset;
-	struct strip_zone *zone;
-	struct md_rdev *tmp_dev;
-
-	if (unlikely(bio->bi_rw & REQ_FLUSH)) {
-		md_flush_request(mddev, bio);
-		return;
-	}
-
-	chunk_sects = mddev->chunk_sectors;
-	if (unlikely(!is_io_in_chunk_boundary(mddev, chunk_sects, bio))) {
-		sector_t sector = bio->bi_sector;
-		struct bio_pair *bp;
-		/* Sanity check -- queue functions should prevent this happening */
-		if (bio->bi_vcnt != 1 ||
-		    bio->bi_idx != 0)
-			goto bad_map;
-		/* This is a one page bio that upper layers
-		 * refuse to split for us, so we need to split it.
-		 */
-		if (likely(is_power_of_2(chunk_sects)))
-			bp = bio_split(bio, chunk_sects - (sector &
-							   (chunk_sects-1)));
-		else
-			bp = bio_split(bio, chunk_sects -
-				       sector_div(sector, chunk_sects));
-		raid0_make_request(mddev, &bp->bio1);
-		raid0_make_request(mddev, &bp->bio2);
-		bio_pair_release(bp);
-		return;
-	}
-
-	sector_offset = bio->bi_sector;
-	zone = find_zone(mddev->private, &sector_offset);
-	tmp_dev = map_sector(mddev, zone, bio->bi_sector,
-			     &sector_offset);
-	bio->bi_bdev = tmp_dev->bdev;
-	bio->bi_sector = sector_offset + zone->dev_start +
-		tmp_dev->data_offset;
-
-	generic_make_request(bio);
-	return;
-
-bad_map:
-	printk("md/raid0:%s: make_request bug: can't convert block across chunks"
-	       " or bigger than %dk %llu %d\n",
-	       mdname(mddev), chunk_sects / 2,
-	       (unsigned long long)bio->bi_sector, bio->bi_size >> 10);
-
-	bio_io_error(bio);
-	return;
-}
-
-static void raid0_status(struct seq_file *seq, struct mddev *mddev)
-{
-	seq_printf(seq, " %dk chunks", mddev->chunk_sectors / 2);
-	return;
-}
-
-static void *raid0_takeover_raid45(struct mddev *mddev)
-{
-	struct md_rdev *rdev;
-	struct r0conf *priv_conf;
-
-	if (mddev->degraded != 1) {
-		printk(KERN_ERR "md/raid0:%s: raid5 must be degraded! Degraded disks: %d\n",
-		       mdname(mddev),
-		       mddev->degraded);
-		return ERR_PTR(-EINVAL);
-	}
-
-	rdev_for_each(rdev, mddev) {
-		/* check slot number for a disk */
-		if (rdev->raid_disk == mddev->raid_disks-1) {
-			printk(KERN_ERR "md/raid0:%s: raid5 must have missing parity disk!\n",
-			       mdname(mddev));
-			return ERR_PTR(-EINVAL);
-		}
-	}
-
-	/* Set new parameters */
-	mddev->new_level = 0;
-	mddev->new_layout = 0;
-	mddev->new_chunk_sectors = mddev->chunk_sectors;
-	mddev->raid_disks--;
-	mddev->delta_disks = -1;
-	/* make sure it will be not marked as dirty */
-	mddev->recovery_cp = MaxSector;
-
-	create_strip_zones(mddev, &priv_conf);
-	return priv_conf;
-}
-
-static void *raid0_takeover_raid10(struct mddev *mddev)
-{
-	struct r0conf *priv_conf;
-
-	/* Check layout:
-	 *  - far_copies must be 1
-	 *  - near_copies must be 2
-	 *  - disks number must be even
-	 *  - all mirrors must be already degraded
-	 */
-	if (mddev->layout != ((1 << 8) + 2)) {
-		printk(KERN_ERR "md/raid0:%s:: Raid0 cannot takover layout: 0x%x\n",
-		       mdname(mddev),
-		       mddev->layout);
-		return ERR_PTR(-EINVAL);
-	}
-	if (mddev->raid_disks & 1) {
-		printk(KERN_ERR "md/raid0:%s: Raid0 cannot takover Raid10 with odd disk number.\n",
-		       mdname(mddev));
-		return ERR_PTR(-EINVAL);
-	}
-	if (mddev->degraded != (mddev->raid_disks>>1)) {
-		printk(KERN_ERR "md/raid0:%s: All mirrors must be already degraded!\n",
-		       mdname(mddev));
-		return ERR_PTR(-EINVAL);
-	}
-
-	/* Set new parameters */
-	mddev->new_level = 0;
-	mddev->new_layout = 0;
-	mddev->new_chunk_sectors = mddev->chunk_sectors;
-	mddev->delta_disks = - mddev->raid_disks / 2;
-	mddev->raid_disks += mddev->delta_disks;
-	mddev->degraded = 0;
-	/* make sure it will be not marked as dirty */
-	mddev->recovery_cp = MaxSector;
-
-	create_strip_zones(mddev, &priv_conf);
-	return priv_conf;
-}
-
-static void *raid0_takeover_raid1(struct mddev *mddev)
-{
-	struct r0conf *priv_conf;
-	int chunksect;
-
-	/* Check layout:
-	 *  - (N - 1) mirror drives must be already faulty
-	 */
-	if ((mddev->raid_disks - 1) != mddev->degraded) {
-		printk(KERN_ERR "md/raid0:%s: (N - 1) mirrors drives must be already faulty!\n",
-		       mdname(mddev));
-		return ERR_PTR(-EINVAL);
-	}
-
-	/*
-	 * a raid1 doesn't have the notion of chunk size, so
-	 * figure out the largest suitable size we can use.
-	 */
-	chunksect = 64 * 2; /* 64K by default */
-
-	/* The array must be an exact multiple of chunksize */
-	while (chunksect && (mddev->array_sectors & (chunksect - 1)))
-		chunksect >>= 1;
-
-	if ((chunksect << 9) < PAGE_SIZE)
-		/* array size does not allow a suitable chunk size */
-		return ERR_PTR(-EINVAL);
-
-	/* Set new parameters */
-	mddev->new_level = 0;
-	mddev->new_layout = 0;
-	mddev->new_chunk_sectors = chunksect;
-	mddev->chunk_sectors = chunksect;
-	mddev->delta_disks = 1 - mddev->raid_disks;
-	mddev->raid_disks = 1;
-	/* make sure it will be not marked as dirty */
-	mddev->recovery_cp = MaxSector;
-
-	create_strip_zones(mddev, &priv_conf);
-	return priv_conf;
-}
-
-static void *raid0_takeover(struct mddev *mddev)
-{
-	/* raid0 can take over:
-	 *  raid4 - if all data disks are active.
-	 *  raid5 - providing it is Raid4 layout and one disk is faulty
-	 *  raid10 - assuming we have all necessary active disks
-	 *  raid1 - with (N -1) mirror drives faulty
-	 */
-	if (mddev->level == 4)
-		return raid0_takeover_raid45(mddev);
-
-	if (mddev->level == 5) {
-		if (mddev->layout == ALGORITHM_PARITY_N)
-			return raid0_takeover_raid45(mddev);
-
-		printk(KERN_ERR "md/raid0:%s: Raid can only takeover Raid5 with layout: %d\n",
-		       mdname(mddev), ALGORITHM_PARITY_N);
-	}
-
-	if (mddev->level == 10)
-		return raid0_takeover_raid10(mddev);
-
-	if (mddev->level == 1)
-		return raid0_takeover_raid1(mddev);
-
-	printk(KERN_ERR "Takeover from raid%i to raid0 not supported\n",
-		mddev->level);
-
-	return ERR_PTR(-EINVAL);
-}
-
-static void raid0_quiesce(struct mddev *mddev, int state)
-{
-}
-
-static struct md_personality raid0_personality=
-{
-	.name		= "raid0",
-	.level		= 0,
-	.owner		= THIS_MODULE,
-	.make_request	= raid0_make_request,
-	.run		= raid0_run,
-	.stop		= raid0_stop,
-	.status		= raid0_status,
-	.size		= raid0_size,
-	.takeover	= raid0_takeover,
-	.quiesce	= raid0_quiesce,
-};
-
-static int __init raid0_init (void)
-{
-	return register_md_personality (&raid0_personality);
-}
-
-static void raid0_exit (void)
-{
-	unregister_md_personality (&raid0_personality);
-}
-
-module_init(raid0_init);
-module_exit(raid0_exit);
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("RAID0 (striping) personality for MD");
-MODULE_ALIAS("md-personality-2"); /* RAID0 */
-MODULE_ALIAS("md-raid0");
-MODULE_ALIAS("md-level-0");
diff --git a/ANDROID_3.4.5/drivers/md/raid0.h b/ANDROID_3.4.5/drivers/md/raid0.h
deleted file mode 100644
index 05539d9c..00000000
--- a/ANDROID_3.4.5/drivers/md/raid0.h
+++ /dev/null
@@ -1,19 +0,0 @@
-#ifndef _RAID0_H
-#define _RAID0_H
-
-struct strip_zone {
-	sector_t zone_end;	/* Start of the next zone (in sectors) */
-	sector_t dev_start;	/* Zone offset in real dev (in sectors) */
-	int	 nb_dev;	/* # of devices attached to the zone */
-};
-
-struct r0conf {
-	struct strip_zone	*strip_zone;
-	struct md_rdev		**devlist; /* lists of rdevs, pointed to
-					    * by strip_zone->dev */
-	int			nr_strip_zones;
-	int			has_merge_bvec;	/* at least one member has
-						 * a merge_bvec_fn */
-};
-
-#endif
diff --git a/ANDROID_3.4.5/drivers/md/raid1.c b/ANDROID_3.4.5/drivers/md/raid1.c
deleted file mode 100644
index d7e95772..00000000
--- a/ANDROID_3.4.5/drivers/md/raid1.c
+++ /dev/null
@@ -1,2953 +0,0 @@
-/*
- * raid1.c : Multiple Devices driver for Linux
- *
- * Copyright (C) 1999, 2000, 2001 Ingo Molnar, Red Hat
- *
- * Copyright (C) 1996, 1997, 1998 Ingo Molnar, Miguel de Icaza, Gadi Oxman
- *
- * RAID-1 management functions.
- *
- * Better read-balancing code written by Mika Kuoppala <miku@iki.fi>, 2000
- *
- * Fixes to reconstruction by Jakob Østergaard" <jakob@ostenfeld.dk>
- * Various fixes by Neil Brown <neilb@cse.unsw.edu.au>
- *
- * Changes by Peter T. Breuer <ptb@it.uc3m.es> 31/1/2003 to support
- * bitmapped intelligence in resync:
- *
- *      - bitmap marked during normal i/o
- *      - bitmap used to skip nondirty blocks during sync
- *
- * Additions to bitmap code, (C) 2003-2004 Paul Clements, SteelEye Technology:
- * - persistent bitmap code
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2, or (at your option)
- * any later version.
- *
- * You should have received a copy of the GNU General Public License
- * (for example /usr/src/linux/COPYING); if not, write to the Free
- * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#include <linux/slab.h>
-#include <linux/delay.h>
-#include <linux/blkdev.h>
-#include <linux/module.h>
-#include <linux/seq_file.h>
-#include <linux/ratelimit.h>
-#include "md.h"
-#include "raid1.h"
-#include "bitmap.h"
-
-/*
- * Number of guaranteed r1bios in case of extreme VM load:
- */
-#define	NR_RAID1_BIOS 256
-
-/* When there are this many requests queue to be written by
- * the raid1 thread, we become 'congested' to provide back-pressure
- * for writeback.
- */
-static int max_queued_requests = 1024;
-
-static void allow_barrier(struct r1conf *conf);
-static void lower_barrier(struct r1conf *conf);
-
-static void * r1bio_pool_alloc(gfp_t gfp_flags, void *data)
-{
-	struct pool_info *pi = data;
-	int size = offsetof(struct r1bio, bios[pi->raid_disks]);
-
-	/* allocate a r1bio with room for raid_disks entries in the bios array */
-	return kzalloc(size, gfp_flags);
-}
-
-static void r1bio_pool_free(void *r1_bio, void *data)
-{
-	kfree(r1_bio);
-}
-
-#define RESYNC_BLOCK_SIZE (64*1024)
-//#define RESYNC_BLOCK_SIZE PAGE_SIZE
-#define RESYNC_SECTORS (RESYNC_BLOCK_SIZE >> 9)
-#define RESYNC_PAGES ((RESYNC_BLOCK_SIZE + PAGE_SIZE-1) / PAGE_SIZE)
-#define RESYNC_WINDOW (2048*1024)
-
-static void * r1buf_pool_alloc(gfp_t gfp_flags, void *data)
-{
-	struct pool_info *pi = data;
-	struct page *page;
-	struct r1bio *r1_bio;
-	struct bio *bio;
-	int i, j;
-
-	r1_bio = r1bio_pool_alloc(gfp_flags, pi);
-	if (!r1_bio)
-		return NULL;
-
-	/*
-	 * Allocate bios : 1 for reading, n-1 for writing
-	 */
-	for (j = pi->raid_disks ; j-- ; ) {
-		bio = bio_kmalloc(gfp_flags, RESYNC_PAGES);
-		if (!bio)
-			goto out_free_bio;
-		r1_bio->bios[j] = bio;
-	}
-	/*
-	 * Allocate RESYNC_PAGES data pages and attach them to
-	 * the first bio.
-	 * If this is a user-requested check/repair, allocate
-	 * RESYNC_PAGES for each bio.
-	 */
-	if (test_bit(MD_RECOVERY_REQUESTED, &pi->mddev->recovery))
-		j = pi->raid_disks;
-	else
-		j = 1;
-	while(j--) {
-		bio = r1_bio->bios[j];
-		for (i = 0; i < RESYNC_PAGES; i++) {
-			page = alloc_page(gfp_flags);
-			if (unlikely(!page))
-				goto out_free_pages;
-
-			bio->bi_io_vec[i].bv_page = page;
-			bio->bi_vcnt = i+1;
-		}
-	}
-	/* If not user-requests, copy the page pointers to all bios */
-	if (!test_bit(MD_RECOVERY_REQUESTED, &pi->mddev->recovery)) {
-		for (i=0; i<RESYNC_PAGES ; i++)
-			for (j=1; j<pi->raid_disks; j++)
-				r1_bio->bios[j]->bi_io_vec[i].bv_page =
-					r1_bio->bios[0]->bi_io_vec[i].bv_page;
-	}
-
-	r1_bio->master_bio = NULL;
-
-	return r1_bio;
-
-out_free_pages:
-	for (j=0 ; j < pi->raid_disks; j++)
-		for (i=0; i < r1_bio->bios[j]->bi_vcnt ; i++)
-			put_page(r1_bio->bios[j]->bi_io_vec[i].bv_page);
-	j = -1;
-out_free_bio:
-	while (++j < pi->raid_disks)
-		bio_put(r1_bio->bios[j]);
-	r1bio_pool_free(r1_bio, data);
-	return NULL;
-}
-
-static void r1buf_pool_free(void *__r1_bio, void *data)
-{
-	struct pool_info *pi = data;
-	int i,j;
-	struct r1bio *r1bio = __r1_bio;
-
-	for (i = 0; i < RESYNC_PAGES; i++)
-		for (j = pi->raid_disks; j-- ;) {
-			if (j == 0 ||
-			    r1bio->bios[j]->bi_io_vec[i].bv_page !=
-			    r1bio->bios[0]->bi_io_vec[i].bv_page)
-				safe_put_page(r1bio->bios[j]->bi_io_vec[i].bv_page);
-		}
-	for (i=0 ; i < pi->raid_disks; i++)
-		bio_put(r1bio->bios[i]);
-
-	r1bio_pool_free(r1bio, data);
-}
-
-static void put_all_bios(struct r1conf *conf, struct r1bio *r1_bio)
-{
-	int i;
-
-	for (i = 0; i < conf->raid_disks * 2; i++) {
-		struct bio **bio = r1_bio->bios + i;
-		if (!BIO_SPECIAL(*bio))
-			bio_put(*bio);
-		*bio = NULL;
-	}
-}
-
-static void free_r1bio(struct r1bio *r1_bio)
-{
-	struct r1conf *conf = r1_bio->mddev->private;
-
-	put_all_bios(conf, r1_bio);
-	mempool_free(r1_bio, conf->r1bio_pool);
-}
-
-static void put_buf(struct r1bio *r1_bio)
-{
-	struct r1conf *conf = r1_bio->mddev->private;
-	int i;
-
-	for (i = 0; i < conf->raid_disks * 2; i++) {
-		struct bio *bio = r1_bio->bios[i];
-		if (bio->bi_end_io)
-			rdev_dec_pending(conf->mirrors[i].rdev, r1_bio->mddev);
-	}
-
-	mempool_free(r1_bio, conf->r1buf_pool);
-
-	lower_barrier(conf);
-}
-
-static void reschedule_retry(struct r1bio *r1_bio)
-{
-	unsigned long flags;
-	struct mddev *mddev = r1_bio->mddev;
-	struct r1conf *conf = mddev->private;
-
-	spin_lock_irqsave(&conf->device_lock, flags);
-	list_add(&r1_bio->retry_list, &conf->retry_list);
-	conf->nr_queued ++;
-	spin_unlock_irqrestore(&conf->device_lock, flags);
-
-	wake_up(&conf->wait_barrier);
-	md_wakeup_thread(mddev->thread);
-}
-
-/*
- * raid_end_bio_io() is called when we have finished servicing a mirrored
- * operation and are ready to return a success/failure code to the buffer
- * cache layer.
- */
-static void call_bio_endio(struct r1bio *r1_bio)
-{
-	struct bio *bio = r1_bio->master_bio;
-	int done;
-	struct r1conf *conf = r1_bio->mddev->private;
-
-	if (bio->bi_phys_segments) {
-		unsigned long flags;
-		spin_lock_irqsave(&conf->device_lock, flags);
-		bio->bi_phys_segments--;
-		done = (bio->bi_phys_segments == 0);
-		spin_unlock_irqrestore(&conf->device_lock, flags);
-	} else
-		done = 1;
-
-	if (!test_bit(R1BIO_Uptodate, &r1_bio->state))
-		clear_bit(BIO_UPTODATE, &bio->bi_flags);
-	if (done) {
-		bio_endio(bio, 0);
-		/*
-		 * Wake up any possible resync thread that waits for the device
-		 * to go idle.
-		 */
-		allow_barrier(conf);
-	}
-}
-
-static void raid_end_bio_io(struct r1bio *r1_bio)
-{
-	struct bio *bio = r1_bio->master_bio;
-
-	/* if nobody has done the final endio yet, do it now */
-	if (!test_and_set_bit(R1BIO_Returned, &r1_bio->state)) {
-		pr_debug("raid1: sync end %s on sectors %llu-%llu\n",
-			 (bio_data_dir(bio) == WRITE) ? "write" : "read",
-			 (unsigned long long) bio->bi_sector,
-			 (unsigned long long) bio->bi_sector +
-			 (bio->bi_size >> 9) - 1);
-
-		call_bio_endio(r1_bio);
-	}
-	free_r1bio(r1_bio);
-}
-
-/*
- * Update disk head position estimator based on IRQ completion info.
- */
-static inline void update_head_pos(int disk, struct r1bio *r1_bio)
-{
-	struct r1conf *conf = r1_bio->mddev->private;
-
-	conf->mirrors[disk].head_position =
-		r1_bio->sector + (r1_bio->sectors);
-}
-
-/*
- * Find the disk number which triggered given bio
- */
-static int find_bio_disk(struct r1bio *r1_bio, struct bio *bio)
-{
-	int mirror;
-	struct r1conf *conf = r1_bio->mddev->private;
-	int raid_disks = conf->raid_disks;
-
-	for (mirror = 0; mirror < raid_disks * 2; mirror++)
-		if (r1_bio->bios[mirror] == bio)
-			break;
-
-	BUG_ON(mirror == raid_disks * 2);
-	update_head_pos(mirror, r1_bio);
-
-	return mirror;
-}
-
-static void raid1_end_read_request(struct bio *bio, int error)
-{
-	int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
-	struct r1bio *r1_bio = bio->bi_private;
-	int mirror;
-	struct r1conf *conf = r1_bio->mddev->private;
-
-	mirror = r1_bio->read_disk;
-	/*
-	 * this branch is our 'one mirror IO has finished' event handler:
-	 */
-	update_head_pos(mirror, r1_bio);
-
-	if (uptodate)
-		set_bit(R1BIO_Uptodate, &r1_bio->state);
-	else {
-		/* If all other devices have failed, we want to return
-		 * the error upwards rather than fail the last device.
-		 * Here we redefine "uptodate" to mean "Don't want to retry"
-		 */
-		unsigned long flags;
-		spin_lock_irqsave(&conf->device_lock, flags);
-		if (r1_bio->mddev->degraded == conf->raid_disks ||
-		    (r1_bio->mddev->degraded == conf->raid_disks-1 &&
-		     !test_bit(Faulty, &conf->mirrors[mirror].rdev->flags)))
-			uptodate = 1;
-		spin_unlock_irqrestore(&conf->device_lock, flags);
-	}
-
-	if (uptodate)
-		raid_end_bio_io(r1_bio);
-	else {
-		/*
-		 * oops, read error:
-		 */
-		char b[BDEVNAME_SIZE];
-		printk_ratelimited(
-			KERN_ERR "md/raid1:%s: %s: "
-			"rescheduling sector %llu\n",
-			mdname(conf->mddev),
-			bdevname(conf->mirrors[mirror].rdev->bdev,
-				 b),
-			(unsigned long long)r1_bio->sector);
-		set_bit(R1BIO_ReadError, &r1_bio->state);
-		reschedule_retry(r1_bio);
-	}
-
-	rdev_dec_pending(conf->mirrors[mirror].rdev, conf->mddev);
-}
-
-static void close_write(struct r1bio *r1_bio)
-{
-	/* it really is the end of this request */
-	if (test_bit(R1BIO_BehindIO, &r1_bio->state)) {
-		/* free extra copy of the data pages */
-		int i = r1_bio->behind_page_count;
-		while (i--)
-			safe_put_page(r1_bio->behind_bvecs[i].bv_page);
-		kfree(r1_bio->behind_bvecs);
-		r1_bio->behind_bvecs = NULL;
-	}
-	/* clear the bitmap if all writes complete successfully */
-	bitmap_endwrite(r1_bio->mddev->bitmap, r1_bio->sector,
-			r1_bio->sectors,
-			!test_bit(R1BIO_Degraded, &r1_bio->state),
-			test_bit(R1BIO_BehindIO, &r1_bio->state));
-	md_write_end(r1_bio->mddev);
-}
-
-static void r1_bio_write_done(struct r1bio *r1_bio)
-{
-	if (!atomic_dec_and_test(&r1_bio->remaining))
-		return;
-
-	if (test_bit(R1BIO_WriteError, &r1_bio->state))
-		reschedule_retry(r1_bio);
-	else {
-		close_write(r1_bio);
-		if (test_bit(R1BIO_MadeGood, &r1_bio->state))
-			reschedule_retry(r1_bio);
-		else
-			raid_end_bio_io(r1_bio);
-	}
-}
-
-static void raid1_end_write_request(struct bio *bio, int error)
-{
-	int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
-	struct r1bio *r1_bio = bio->bi_private;
-	int mirror, behind = test_bit(R1BIO_BehindIO, &r1_bio->state);
-	struct r1conf *conf = r1_bio->mddev->private;
-	struct bio *to_put = NULL;
-
-	mirror = find_bio_disk(r1_bio, bio);
-
-	/*
-	 * 'one mirror IO has finished' event handler:
-	 */
-	if (!uptodate) {
-		set_bit(WriteErrorSeen,
-			&conf->mirrors[mirror].rdev->flags);
-		if (!test_and_set_bit(WantReplacement,
-				      &conf->mirrors[mirror].rdev->flags))
-			set_bit(MD_RECOVERY_NEEDED, &
-				conf->mddev->recovery);
-
-		set_bit(R1BIO_WriteError, &r1_bio->state);
-	} else {
-		/*
-		 * Set R1BIO_Uptodate in our master bio, so that we
-		 * will return a good error code for to the higher
-		 * levels even if IO on some other mirrored buffer
-		 * fails.
-		 *
-		 * The 'master' represents the composite IO operation
-		 * to user-side. So if something waits for IO, then it
-		 * will wait for the 'master' bio.
-		 */
-		sector_t first_bad;
-		int bad_sectors;
-
-		r1_bio->bios[mirror] = NULL;
-		to_put = bio;
-		set_bit(R1BIO_Uptodate, &r1_bio->state);
-
-		/* Maybe we can clear some bad blocks. */
-		if (is_badblock(conf->mirrors[mirror].rdev,
-				r1_bio->sector, r1_bio->sectors,
-				&first_bad, &bad_sectors)) {
-			r1_bio->bios[mirror] = IO_MADE_GOOD;
-			set_bit(R1BIO_MadeGood, &r1_bio->state);
-		}
-	}
-
-	if (behind) {
-		if (test_bit(WriteMostly, &conf->mirrors[mirror].rdev->flags))
-			atomic_dec(&r1_bio->behind_remaining);
-
-		/*
-		 * In behind mode, we ACK the master bio once the I/O
-		 * has safely reached all non-writemostly
-		 * disks. Setting the Returned bit ensures that this
-		 * gets done only once -- we don't ever want to return
-		 * -EIO here, instead we'll wait
-		 */
-		if (atomic_read(&r1_bio->behind_remaining) >= (atomic_read(&r1_bio->remaining)-1) &&
-		    test_bit(R1BIO_Uptodate, &r1_bio->state)) {
-			/* Maybe we can return now */
-			if (!test_and_set_bit(R1BIO_Returned, &r1_bio->state)) {
-				struct bio *mbio = r1_bio->master_bio;
-				pr_debug("raid1: behind end write sectors"
-					 " %llu-%llu\n",
-					 (unsigned long long) mbio->bi_sector,
-					 (unsigned long long) mbio->bi_sector +
-					 (mbio->bi_size >> 9) - 1);
-				call_bio_endio(r1_bio);
-			}
-		}
-	}
-	if (r1_bio->bios[mirror] == NULL)
-		rdev_dec_pending(conf->mirrors[mirror].rdev,
-				 conf->mddev);
-
-	/*
-	 * Let's see if all mirrored write operations have finished
-	 * already.
-	 */
-	r1_bio_write_done(r1_bio);
-
-	if (to_put)
-		bio_put(to_put);
-}
-
-
-/*
- * This routine returns the disk from which the requested read should
- * be done. There is a per-array 'next expected sequential IO' sector
- * number - if this matches on the next IO then we use the last disk.
- * There is also a per-disk 'last know head position' sector that is
- * maintained from IRQ contexts, both the normal and the resync IO
- * completion handlers update this position correctly. If there is no
- * perfect sequential match then we pick the disk whose head is closest.
- *
- * If there are 2 mirrors in the same 2 devices, performance degrades
- * because position is mirror, not device based.
- *
- * The rdev for the device selected will have nr_pending incremented.
- */
-static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sectors)
-{
-	const sector_t this_sector = r1_bio->sector;
-	int sectors;
-	int best_good_sectors;
-	int start_disk;
-	int best_disk;
-	int i;
-	sector_t best_dist;
-	struct md_rdev *rdev;
-	int choose_first;
-
-	rcu_read_lock();
-	/*
-	 * Check if we can balance. We can balance on the whole
-	 * device if no resync is going on, or below the resync window.
-	 * We take the first readable disk when above the resync window.
-	 */
- retry:
-	sectors = r1_bio->sectors;
-	best_disk = -1;
-	best_dist = MaxSector;
-	best_good_sectors = 0;
-
-	if (conf->mddev->recovery_cp < MaxSector &&
-	    (this_sector + sectors >= conf->next_resync)) {
-		choose_first = 1;
-		start_disk = 0;
-	} else {
-		choose_first = 0;
-		start_disk = conf->last_used;
-	}
-
-	for (i = 0 ; i < conf->raid_disks * 2 ; i++) {
-		sector_t dist;
-		sector_t first_bad;
-		int bad_sectors;
-
-		int disk = start_disk + i;
-		if (disk >= conf->raid_disks)
-			disk -= conf->raid_disks;
-
-		rdev = rcu_dereference(conf->mirrors[disk].rdev);
-		if (r1_bio->bios[disk] == IO_BLOCKED
-		    || rdev == NULL
-		    || test_bit(Unmerged, &rdev->flags)
-		    || test_bit(Faulty, &rdev->flags))
-			continue;
-		if (!test_bit(In_sync, &rdev->flags) &&
-		    rdev->recovery_offset < this_sector + sectors)
-			continue;
-		if (test_bit(WriteMostly, &rdev->flags)) {
-			/* Don't balance among write-mostly, just
-			 * use the first as a last resort */
-			if (best_disk < 0) {
-				if (is_badblock(rdev, this_sector, sectors,
-						&first_bad, &bad_sectors)) {
-					if (first_bad < this_sector)
-						/* Cannot use this */
-						continue;
-					best_good_sectors = first_bad - this_sector;
-				} else
-					best_good_sectors = sectors;
-				best_disk = disk;
-			}
-			continue;
-		}
-		/* This is a reasonable device to use.  It might
-		 * even be best.
-		 */
-		if (is_badblock(rdev, this_sector, sectors,
-				&first_bad, &bad_sectors)) {
-			if (best_dist < MaxSector)
-				/* already have a better device */
-				continue;
-			if (first_bad <= this_sector) {
-				/* cannot read here. If this is the 'primary'
-				 * device, then we must not read beyond
-				 * bad_sectors from another device..
-				 */
-				bad_sectors -= (this_sector - first_bad);
-				if (choose_first && sectors > bad_sectors)
-					sectors = bad_sectors;
-				if (best_good_sectors > sectors)
-					best_good_sectors = sectors;
-
-			} else {
-				sector_t good_sectors = first_bad - this_sector;
-				if (good_sectors > best_good_sectors) {
-					best_good_sectors = good_sectors;
-					best_disk = disk;
-				}
-				if (choose_first)
-					break;
-			}
-			continue;
-		} else
-			best_good_sectors = sectors;
-
-		dist = abs(this_sector - conf->mirrors[disk].head_position);
-		if (choose_first
-		    /* Don't change to another disk for sequential reads */
-		    || conf->next_seq_sect == this_sector
-		    || dist == 0
-		    /* If device is idle, use it */
-		    || atomic_read(&rdev->nr_pending) == 0) {
-			best_disk = disk;
-			break;
-		}
-		if (dist < best_dist) {
-			best_dist = dist;
-			best_disk = disk;
-		}
-	}
-
-	if (best_disk >= 0) {
-		rdev = rcu_dereference(conf->mirrors[best_disk].rdev);
-		if (!rdev)
-			goto retry;
-		atomic_inc(&rdev->nr_pending);
-		if (test_bit(Faulty, &rdev->flags)) {
-			/* cannot risk returning a device that failed
-			 * before we inc'ed nr_pending
-			 */
-			rdev_dec_pending(rdev, conf->mddev);
-			goto retry;
-		}
-		sectors = best_good_sectors;
-		conf->next_seq_sect = this_sector + sectors;
-		conf->last_used = best_disk;
-	}
-	rcu_read_unlock();
-	*max_sectors = sectors;
-
-	return best_disk;
-}
-
-static int raid1_mergeable_bvec(struct request_queue *q,
-				struct bvec_merge_data *bvm,
-				struct bio_vec *biovec)
-{
-	struct mddev *mddev = q->queuedata;
-	struct r1conf *conf = mddev->private;
-	sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev);
-	int max = biovec->bv_len;
-
-	if (mddev->merge_check_needed) {
-		int disk;
-		rcu_read_lock();
-		for (disk = 0; disk < conf->raid_disks * 2; disk++) {
-			struct md_rdev *rdev = rcu_dereference(
-				conf->mirrors[disk].rdev);
-			if (rdev && !test_bit(Faulty, &rdev->flags)) {
-				struct request_queue *q =
-					bdev_get_queue(rdev->bdev);
-				if (q->merge_bvec_fn) {
-					bvm->bi_sector = sector +
-						rdev->data_offset;
-					bvm->bi_bdev = rdev->bdev;
-					max = min(max, q->merge_bvec_fn(
-							  q, bvm, biovec));
-				}
-			}
-		}
-		rcu_read_unlock();
-	}
-	return max;
-
-}
-
-int md_raid1_congested(struct mddev *mddev, int bits)
-{
-	struct r1conf *conf = mddev->private;
-	int i, ret = 0;
-
-	if ((bits & (1 << BDI_async_congested)) &&
-	    conf->pending_count >= max_queued_requests)
-		return 1;
-
-	rcu_read_lock();
-	for (i = 0; i < conf->raid_disks * 2; i++) {
-		struct md_rdev *rdev = rcu_dereference(conf->mirrors[i].rdev);
-		if (rdev && !test_bit(Faulty, &rdev->flags)) {
-			struct request_queue *q = bdev_get_queue(rdev->bdev);
-
-			BUG_ON(!q);
-
-			/* Note the '|| 1' - when read_balance prefers
-			 * non-congested targets, it can be removed
-			 */
-			if ((bits & (1<<BDI_async_congested)) || 1)
-				ret |= bdi_congested(&q->backing_dev_info, bits);
-			else
-				ret &= bdi_congested(&q->backing_dev_info, bits);
-		}
-	}
-	rcu_read_unlock();
-	return ret;
-}
-EXPORT_SYMBOL_GPL(md_raid1_congested);
-
-static int raid1_congested(void *data, int bits)
-{
-	struct mddev *mddev = data;
-
-	return mddev_congested(mddev, bits) ||
-		md_raid1_congested(mddev, bits);
-}
-
-static void flush_pending_writes(struct r1conf *conf)
-{
-	/* Any writes that have been queued but are awaiting
-	 * bitmap updates get flushed here.
-	 */
-	spin_lock_irq(&conf->device_lock);
-
-	if (conf->pending_bio_list.head) {
-		struct bio *bio;
-		bio = bio_list_get(&conf->pending_bio_list);
-		conf->pending_count = 0;
-		spin_unlock_irq(&conf->device_lock);
-		/* flush any pending bitmap writes to
-		 * disk before proceeding w/ I/O */
-		bitmap_unplug(conf->mddev->bitmap);
-		wake_up(&conf->wait_barrier);
-
-		while (bio) { /* submit pending writes */
-			struct bio *next = bio->bi_next;
-			bio->bi_next = NULL;
-			generic_make_request(bio);
-			bio = next;
-		}
-	} else
-		spin_unlock_irq(&conf->device_lock);
-}
-
-/* Barriers....
- * Sometimes we need to suspend IO while we do something else,
- * either some resync/recovery, or reconfigure the array.
- * To do this we raise a 'barrier'.
- * The 'barrier' is a counter that can be raised multiple times
- * to count how many activities are happening which preclude
- * normal IO.
- * We can only raise the barrier if there is no pending IO.
- * i.e. if nr_pending == 0.
- * We choose only to raise the barrier if no-one is waiting for the
- * barrier to go down.  This means that as soon as an IO request
- * is ready, no other operations which require a barrier will start
- * until the IO request has had a chance.
- *
- * So: regular IO calls 'wait_barrier'.  When that returns there
- *    is no backgroup IO happening,  It must arrange to call
- *    allow_barrier when it has finished its IO.
- * backgroup IO calls must call raise_barrier.  Once that returns
- *    there is no normal IO happeing.  It must arrange to call
- *    lower_barrier when the particular background IO completes.
- */
-#define RESYNC_DEPTH 32
-
-static void raise_barrier(struct r1conf *conf)
-{
-	spin_lock_irq(&conf->resync_lock);
-
-	/* Wait until no block IO is waiting */
-	wait_event_lock_irq(conf->wait_barrier, !conf->nr_waiting,
-			    conf->resync_lock, );
-
-	/* block any new IO from starting */
-	conf->barrier++;
-
-	/* Now wait for all pending IO to complete */
-	wait_event_lock_irq(conf->wait_barrier,
-			    !conf->nr_pending && conf->barrier < RESYNC_DEPTH,
-			    conf->resync_lock, );
-
-	spin_unlock_irq(&conf->resync_lock);
-}
-
-static void lower_barrier(struct r1conf *conf)
-{
-	unsigned long flags;
-	BUG_ON(conf->barrier <= 0);
-	spin_lock_irqsave(&conf->resync_lock, flags);
-	conf->barrier--;
-	spin_unlock_irqrestore(&conf->resync_lock, flags);
-	wake_up(&conf->wait_barrier);
-}
-
-static void wait_barrier(struct r1conf *conf)
-{
-	spin_lock_irq(&conf->resync_lock);
-	if (conf->barrier) {
-		conf->nr_waiting++;
-		/* Wait for the barrier to drop.
-		 * However if there are already pending
-		 * requests (preventing the barrier from
-		 * rising completely), and the
-		 * pre-process bio queue isn't empty,
-		 * then don't wait, as we need to empty
-		 * that queue to get the nr_pending
-		 * count down.
-		 */
-		wait_event_lock_irq(conf->wait_barrier,
-				    !conf->barrier ||
-				    (conf->nr_pending &&
-				     current->bio_list &&
-				     !bio_list_empty(current->bio_list)),
-				    conf->resync_lock,
-			);
-		conf->nr_waiting--;
-	}
-	conf->nr_pending++;
-	spin_unlock_irq(&conf->resync_lock);
-}
-
-static void allow_barrier(struct r1conf *conf)
-{
-	unsigned long flags;
-	spin_lock_irqsave(&conf->resync_lock, flags);
-	conf->nr_pending--;
-	spin_unlock_irqrestore(&conf->resync_lock, flags);
-	wake_up(&conf->wait_barrier);
-}
-
-static void freeze_array(struct r1conf *conf)
-{
-	/* stop syncio and normal IO and wait for everything to
-	 * go quite.
-	 * We increment barrier and nr_waiting, and then
-	 * wait until nr_pending match nr_queued+1
-	 * This is called in the context of one normal IO request
-	 * that has failed. Thus any sync request that might be pending
-	 * will be blocked by nr_pending, and we need to wait for
-	 * pending IO requests to complete or be queued for re-try.
-	 * Thus the number queued (nr_queued) plus this request (1)
-	 * must match the number of pending IOs (nr_pending) before
-	 * we continue.
-	 */
-	spin_lock_irq(&conf->resync_lock);
-	conf->barrier++;
-	conf->nr_waiting++;
-	wait_event_lock_irq(conf->wait_barrier,
-			    conf->nr_pending == conf->nr_queued+1,
-			    conf->resync_lock,
-			    flush_pending_writes(conf));
-	spin_unlock_irq(&conf->resync_lock);
-}
-static void unfreeze_array(struct r1conf *conf)
-{
-	/* reverse the effect of the freeze */
-	spin_lock_irq(&conf->resync_lock);
-	conf->barrier--;
-	conf->nr_waiting--;
-	wake_up(&conf->wait_barrier);
-	spin_unlock_irq(&conf->resync_lock);
-}
-
-
-/* duplicate the data pages for behind I/O 
- */
-static void alloc_behind_pages(struct bio *bio, struct r1bio *r1_bio)
-{
-	int i;
-	struct bio_vec *bvec;
-	struct bio_vec *bvecs = kzalloc(bio->bi_vcnt * sizeof(struct bio_vec),
-					GFP_NOIO);
-	if (unlikely(!bvecs))
-		return;
-
-	bio_for_each_segment(bvec, bio, i) {
-		bvecs[i] = *bvec;
-		bvecs[i].bv_page = alloc_page(GFP_NOIO);
-		if (unlikely(!bvecs[i].bv_page))
-			goto do_sync_io;
-		memcpy(kmap(bvecs[i].bv_page) + bvec->bv_offset,
-		       kmap(bvec->bv_page) + bvec->bv_offset, bvec->bv_len);
-		kunmap(bvecs[i].bv_page);
-		kunmap(bvec->bv_page);
-	}
-	r1_bio->behind_bvecs = bvecs;
-	r1_bio->behind_page_count = bio->bi_vcnt;
-	set_bit(R1BIO_BehindIO, &r1_bio->state);
-	return;
-
-do_sync_io:
-	for (i = 0; i < bio->bi_vcnt; i++)
-		if (bvecs[i].bv_page)
-			put_page(bvecs[i].bv_page);
-	kfree(bvecs);
-	pr_debug("%dB behind alloc failed, doing sync I/O\n", bio->bi_size);
-}
-
-static void make_request(struct mddev *mddev, struct bio * bio)
-{
-	struct r1conf *conf = mddev->private;
-	struct mirror_info *mirror;
-	struct r1bio *r1_bio;
-	struct bio *read_bio;
-	int i, disks;
-	struct bitmap *bitmap;
-	unsigned long flags;
-	const int rw = bio_data_dir(bio);
-	const unsigned long do_sync = (bio->bi_rw & REQ_SYNC);
-	const unsigned long do_flush_fua = (bio->bi_rw & (REQ_FLUSH | REQ_FUA));
-	struct md_rdev *blocked_rdev;
-	int plugged;
-	int first_clone;
-	int sectors_handled;
-	int max_sectors;
-
-	/*
-	 * Register the new request and wait if the reconstruction
-	 * thread has put up a bar for new requests.
-	 * Continue immediately if no resync is active currently.
-	 */
-
-	md_write_start(mddev, bio); /* wait on superblock update early */
-
-	if (bio_data_dir(bio) == WRITE &&
-	    bio->bi_sector + bio->bi_size/512 > mddev->suspend_lo &&
-	    bio->bi_sector < mddev->suspend_hi) {
-		/* As the suspend_* range is controlled by
-		 * userspace, we want an interruptible
-		 * wait.
-		 */
-		DEFINE_WAIT(w);
-		for (;;) {
-			flush_signals(current);
-			prepare_to_wait(&conf->wait_barrier,
-					&w, TASK_INTERRUPTIBLE);
-			if (bio->bi_sector + bio->bi_size/512 <= mddev->suspend_lo ||
-			    bio->bi_sector >= mddev->suspend_hi)
-				break;
-			schedule();
-		}
-		finish_wait(&conf->wait_barrier, &w);
-	}
-
-	wait_barrier(conf);
-
-	bitmap = mddev->bitmap;
-
-	/*
-	 * make_request() can abort the operation when READA is being
-	 * used and no empty request is available.
-	 *
-	 */
-	r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO);
-
-	r1_bio->master_bio = bio;
-	r1_bio->sectors = bio->bi_size >> 9;
-	r1_bio->state = 0;
-	r1_bio->mddev = mddev;
-	r1_bio->sector = bio->bi_sector;
-
-	/* We might need to issue multiple reads to different
-	 * devices if there are bad blocks around, so we keep
-	 * track of the number of reads in bio->bi_phys_segments.
-	 * If this is 0, there is only one r1_bio and no locking
-	 * will be needed when requests complete.  If it is
-	 * non-zero, then it is the number of not-completed requests.
-	 */
-	bio->bi_phys_segments = 0;
-	clear_bit(BIO_SEG_VALID, &bio->bi_flags);
-
-	if (rw == READ) {
-		/*
-		 * read balancing logic:
-		 */
-		int rdisk;
-
-read_again:
-		rdisk = read_balance(conf, r1_bio, &max_sectors);
-
-		if (rdisk < 0) {
-			/* couldn't find anywhere to read from */
-			raid_end_bio_io(r1_bio);
-			return;
-		}
-		mirror = conf->mirrors + rdisk;
-
-		if (test_bit(WriteMostly, &mirror->rdev->flags) &&
-		    bitmap) {
-			/* Reading from a write-mostly device must
-			 * take care not to over-take any writes
-			 * that are 'behind'
-			 */
-			wait_event(bitmap->behind_wait,
-				   atomic_read(&bitmap->behind_writes) == 0);
-		}
-		r1_bio->read_disk = rdisk;
-
-		read_bio = bio_clone_mddev(bio, GFP_NOIO, mddev);
-		md_trim_bio(read_bio, r1_bio->sector - bio->bi_sector,
-			    max_sectors);
-
-		r1_bio->bios[rdisk] = read_bio;
-
-		read_bio->bi_sector = r1_bio->sector + mirror->rdev->data_offset;
-		read_bio->bi_bdev = mirror->rdev->bdev;
-		read_bio->bi_end_io = raid1_end_read_request;
-		read_bio->bi_rw = READ | do_sync;
-		read_bio->bi_private = r1_bio;
-
-		if (max_sectors < r1_bio->sectors) {
-			/* could not read all from this device, so we will
-			 * need another r1_bio.
-			 */
-
-			sectors_handled = (r1_bio->sector + max_sectors
-					   - bio->bi_sector);
-			r1_bio->sectors = max_sectors;
-			spin_lock_irq(&conf->device_lock);
-			if (bio->bi_phys_segments == 0)
-				bio->bi_phys_segments = 2;
-			else
-				bio->bi_phys_segments++;
-			spin_unlock_irq(&conf->device_lock);
-			/* Cannot call generic_make_request directly
-			 * as that will be queued in __make_request
-			 * and subsequent mempool_alloc might block waiting
-			 * for it.  So hand bio over to raid1d.
-			 */
-			reschedule_retry(r1_bio);
-
-			r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO);
-
-			r1_bio->master_bio = bio;
-			r1_bio->sectors = (bio->bi_size >> 9) - sectors_handled;
-			r1_bio->state = 0;
-			r1_bio->mddev = mddev;
-			r1_bio->sector = bio->bi_sector + sectors_handled;
-			goto read_again;
-		} else
-			generic_make_request(read_bio);
-		return;
-	}
-
-	/*
-	 * WRITE:
-	 */
-	if (conf->pending_count >= max_queued_requests) {
-		md_wakeup_thread(mddev->thread);
-		wait_event(conf->wait_barrier,
-			   conf->pending_count < max_queued_requests);
-	}
-	/* first select target devices under rcu_lock and
-	 * inc refcount on their rdev.  Record them by setting
-	 * bios[x] to bio
-	 * If there are known/acknowledged bad blocks on any device on
-	 * which we have seen a write error, we want to avoid writing those
-	 * blocks.
-	 * This potentially requires several writes to write around
-	 * the bad blocks.  Each set of writes gets it's own r1bio
-	 * with a set of bios attached.
-	 */
-	plugged = mddev_check_plugged(mddev);
-
-	disks = conf->raid_disks * 2;
- retry_write:
-	blocked_rdev = NULL;
-	rcu_read_lock();
-	max_sectors = r1_bio->sectors;
-	for (i = 0;  i < disks; i++) {
-		struct md_rdev *rdev = rcu_dereference(conf->mirrors[i].rdev);
-		if (rdev && unlikely(test_bit(Blocked, &rdev->flags))) {
-			atomic_inc(&rdev->nr_pending);
-			blocked_rdev = rdev;
-			break;
-		}
-		r1_bio->bios[i] = NULL;
-		if (!rdev || test_bit(Faulty, &rdev->flags)
-		    || test_bit(Unmerged, &rdev->flags)) {
-			if (i < conf->raid_disks)
-				set_bit(R1BIO_Degraded, &r1_bio->state);
-			continue;
-		}
-
-		atomic_inc(&rdev->nr_pending);
-		if (test_bit(WriteErrorSeen, &rdev->flags)) {
-			sector_t first_bad;
-			int bad_sectors;
-			int is_bad;
-
-			is_bad = is_badblock(rdev, r1_bio->sector,
-					     max_sectors,
-					     &first_bad, &bad_sectors);
-			if (is_bad < 0) {
-				/* mustn't write here until the bad block is
-				 * acknowledged*/
-				set_bit(BlockedBadBlocks, &rdev->flags);
-				blocked_rdev = rdev;
-				break;
-			}
-			if (is_bad && first_bad <= r1_bio->sector) {
-				/* Cannot write here at all */
-				bad_sectors -= (r1_bio->sector - first_bad);
-				if (bad_sectors < max_sectors)
-					/* mustn't write more than bad_sectors
-					 * to other devices yet
-					 */
-					max_sectors = bad_sectors;
-				rdev_dec_pending(rdev, mddev);
-				/* We don't set R1BIO_Degraded as that
-				 * only applies if the disk is
-				 * missing, so it might be re-added,
-				 * and we want to know to recover this
-				 * chunk.
-				 * In this case the device is here,
-				 * and the fact that this chunk is not
-				 * in-sync is recorded in the bad
-				 * block log
-				 */
-				continue;
-			}
-			if (is_bad) {
-				int good_sectors = first_bad - r1_bio->sector;
-				if (good_sectors < max_sectors)
-					max_sectors = good_sectors;
-			}
-		}
-		r1_bio->bios[i] = bio;
-	}
-	rcu_read_unlock();
-
-	if (unlikely(blocked_rdev)) {
-		/* Wait for this device to become unblocked */
-		int j;
-
-		for (j = 0; j < i; j++)
-			if (r1_bio->bios[j])
-				rdev_dec_pending(conf->mirrors[j].rdev, mddev);
-		r1_bio->state = 0;
-		allow_barrier(conf);
-		md_wait_for_blocked_rdev(blocked_rdev, mddev);
-		wait_barrier(conf);
-		goto retry_write;
-	}
-
-	if (max_sectors < r1_bio->sectors) {
-		/* We are splitting this write into multiple parts, so
-		 * we need to prepare for allocating another r1_bio.
-		 */
-		r1_bio->sectors = max_sectors;
-		spin_lock_irq(&conf->device_lock);
-		if (bio->bi_phys_segments == 0)
-			bio->bi_phys_segments = 2;
-		else
-			bio->bi_phys_segments++;
-		spin_unlock_irq(&conf->device_lock);
-	}
-	sectors_handled = r1_bio->sector + max_sectors - bio->bi_sector;
-
-	atomic_set(&r1_bio->remaining, 1);
-	atomic_set(&r1_bio->behind_remaining, 0);
-
-	first_clone = 1;
-	for (i = 0; i < disks; i++) {
-		struct bio *mbio;
-		if (!r1_bio->bios[i])
-			continue;
-
-		mbio = bio_clone_mddev(bio, GFP_NOIO, mddev);
-		md_trim_bio(mbio, r1_bio->sector - bio->bi_sector, max_sectors);
-
-		if (first_clone) {
-			/* do behind I/O ?
-			 * Not if there are too many, or cannot
-			 * allocate memory, or a reader on WriteMostly
-			 * is waiting for behind writes to flush */
-			if (bitmap &&
-			    (atomic_read(&bitmap->behind_writes)
-			     < mddev->bitmap_info.max_write_behind) &&
-			    !waitqueue_active(&bitmap->behind_wait))
-				alloc_behind_pages(mbio, r1_bio);
-
-			bitmap_startwrite(bitmap, r1_bio->sector,
-					  r1_bio->sectors,
-					  test_bit(R1BIO_BehindIO,
-						   &r1_bio->state));
-			first_clone = 0;
-		}
-		if (r1_bio->behind_bvecs) {
-			struct bio_vec *bvec;
-			int j;
-
-			/* Yes, I really want the '__' version so that
-			 * we clear any unused pointer in the io_vec, rather
-			 * than leave them unchanged.  This is important
-			 * because when we come to free the pages, we won't
-			 * know the original bi_idx, so we just free
-			 * them all
-			 */
-			__bio_for_each_segment(bvec, mbio, j, 0)
-				bvec->bv_page = r1_bio->behind_bvecs[j].bv_page;
-			if (test_bit(WriteMostly, &conf->mirrors[i].rdev->flags))
-				atomic_inc(&r1_bio->behind_remaining);
-		}
-
-		r1_bio->bios[i] = mbio;
-
-		mbio->bi_sector	= (r1_bio->sector +
-				   conf->mirrors[i].rdev->data_offset);
-		mbio->bi_bdev = conf->mirrors[i].rdev->bdev;
-		mbio->bi_end_io	= raid1_end_write_request;
-		mbio->bi_rw = WRITE | do_flush_fua | do_sync;
-		mbio->bi_private = r1_bio;
-
-		atomic_inc(&r1_bio->remaining);
-		spin_lock_irqsave(&conf->device_lock, flags);
-		bio_list_add(&conf->pending_bio_list, mbio);
-		conf->pending_count++;
-		spin_unlock_irqrestore(&conf->device_lock, flags);
-	}
-	/* Mustn't call r1_bio_write_done before this next test,
-	 * as it could result in the bio being freed.
-	 */
-	if (sectors_handled < (bio->bi_size >> 9)) {
-		r1_bio_write_done(r1_bio);
-		/* We need another r1_bio.  It has already been counted
-		 * in bio->bi_phys_segments
-		 */
-		r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO);
-		r1_bio->master_bio = bio;
-		r1_bio->sectors = (bio->bi_size >> 9) - sectors_handled;
-		r1_bio->state = 0;
-		r1_bio->mddev = mddev;
-		r1_bio->sector = bio->bi_sector + sectors_handled;
-		goto retry_write;
-	}
-
-	r1_bio_write_done(r1_bio);
-
-	/* In case raid1d snuck in to freeze_array */
-	wake_up(&conf->wait_barrier);
-
-	if (do_sync || !bitmap || !plugged)
-		md_wakeup_thread(mddev->thread);
-}
-
-static void status(struct seq_file *seq, struct mddev *mddev)
-{
-	struct r1conf *conf = mddev->private;
-	int i;
-
-	seq_printf(seq, " [%d/%d] [", conf->raid_disks,
-		   conf->raid_disks - mddev->degraded);
-	rcu_read_lock();
-	for (i = 0; i < conf->raid_disks; i++) {
-		struct md_rdev *rdev = rcu_dereference(conf->mirrors[i].rdev);
-		seq_printf(seq, "%s",
-			   rdev && test_bit(In_sync, &rdev->flags) ? "U" : "_");
-	}
-	rcu_read_unlock();
-	seq_printf(seq, "]");
-}
-
-
-static void error(struct mddev *mddev, struct md_rdev *rdev)
-{
-	char b[BDEVNAME_SIZE];
-	struct r1conf *conf = mddev->private;
-
-	/*
-	 * If it is not operational, then we have already marked it as dead
-	 * else if it is the last working disks, ignore the error, let the
-	 * next level up know.
-	 * else mark the drive as failed
-	 */
-	if (test_bit(In_sync, &rdev->flags)
-	    && (conf->raid_disks - mddev->degraded) == 1) {
-		/*
-		 * Don't fail the drive, act as though we were just a
-		 * normal single drive.
-		 * However don't try a recovery from this drive as
-		 * it is very likely to fail.
-		 */
-		conf->recovery_disabled = mddev->recovery_disabled;
-		return;
-	}
-	set_bit(Blocked, &rdev->flags);
-	if (test_and_clear_bit(In_sync, &rdev->flags)) {
-		unsigned long flags;
-		spin_lock_irqsave(&conf->device_lock, flags);
-		mddev->degraded++;
-		set_bit(Faulty, &rdev->flags);
-		spin_unlock_irqrestore(&conf->device_lock, flags);
-		/*
-		 * if recovery is running, make sure it aborts.
-		 */
-		set_bit(MD_RECOVERY_INTR, &mddev->recovery);
-	} else
-		set_bit(Faulty, &rdev->flags);
-	set_bit(MD_CHANGE_DEVS, &mddev->flags);
-	printk(KERN_ALERT
-	       "md/raid1:%s: Disk failure on %s, disabling device.\n"
-	       "md/raid1:%s: Operation continuing on %d devices.\n",
-	       mdname(mddev), bdevname(rdev->bdev, b),
-	       mdname(mddev), conf->raid_disks - mddev->degraded);
-}
-
-static void print_conf(struct r1conf *conf)
-{
-	int i;
-
-	printk(KERN_DEBUG "RAID1 conf printout:\n");
-	if (!conf) {
-		printk(KERN_DEBUG "(!conf)\n");
-		return;
-	}
-	printk(KERN_DEBUG " --- wd:%d rd:%d\n", conf->raid_disks - conf->mddev->degraded,
-		conf->raid_disks);
-
-	rcu_read_lock();
-	for (i = 0; i < conf->raid_disks; i++) {
-		char b[BDEVNAME_SIZE];
-		struct md_rdev *rdev = rcu_dereference(conf->mirrors[i].rdev);
-		if (rdev)
-			printk(KERN_DEBUG " disk %d, wo:%d, o:%d, dev:%s\n",
-			       i, !test_bit(In_sync, &rdev->flags),
-			       !test_bit(Faulty, &rdev->flags),
-			       bdevname(rdev->bdev,b));
-	}
-	rcu_read_unlock();
-}
-
-static void close_sync(struct r1conf *conf)
-{
-	wait_barrier(conf);
-	allow_barrier(conf);
-
-	mempool_destroy(conf->r1buf_pool);
-	conf->r1buf_pool = NULL;
-}
-
-static int raid1_spare_active(struct mddev *mddev)
-{
-	int i;
-	struct r1conf *conf = mddev->private;
-	int count = 0;
-	unsigned long flags;
-
-	/*
-	 * Find all failed disks within the RAID1 configuration 
-	 * and mark them readable.
-	 * Called under mddev lock, so rcu protection not needed.
-	 */
-	for (i = 0; i < conf->raid_disks; i++) {
-		struct md_rdev *rdev = conf->mirrors[i].rdev;
-		struct md_rdev *repl = conf->mirrors[conf->raid_disks + i].rdev;
-		if (repl
-		    && repl->recovery_offset == MaxSector
-		    && !test_bit(Faulty, &repl->flags)
-		    && !test_and_set_bit(In_sync, &repl->flags)) {
-			/* replacement has just become active */
-			if (!rdev ||
-			    !test_and_clear_bit(In_sync, &rdev->flags))
-				count++;
-			if (rdev) {
-				/* Replaced device not technically
-				 * faulty, but we need to be sure
-				 * it gets removed and never re-added
-				 */
-				set_bit(Faulty, &rdev->flags);
-				sysfs_notify_dirent_safe(
-					rdev->sysfs_state);
-			}
-		}
-		if (rdev
-		    && !test_bit(Faulty, &rdev->flags)
-		    && !test_and_set_bit(In_sync, &rdev->flags)) {
-			count++;
-			sysfs_notify_dirent_safe(rdev->sysfs_state);
-		}
-	}
-	spin_lock_irqsave(&conf->device_lock, flags);
-	mddev->degraded -= count;
-	spin_unlock_irqrestore(&conf->device_lock, flags);
-
-	print_conf(conf);
-	return count;
-}
-
-
-static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev)
-{
-	struct r1conf *conf = mddev->private;
-	int err = -EEXIST;
-	int mirror = 0;
-	struct mirror_info *p;
-	int first = 0;
-	int last = conf->raid_disks - 1;
-	struct request_queue *q = bdev_get_queue(rdev->bdev);
-
-	if (mddev->recovery_disabled == conf->recovery_disabled)
-		return -EBUSY;
-
-	if (rdev->raid_disk >= 0)
-		first = last = rdev->raid_disk;
-
-	if (q->merge_bvec_fn) {
-		set_bit(Unmerged, &rdev->flags);
-		mddev->merge_check_needed = 1;
-	}
-
-	for (mirror = first; mirror <= last; mirror++) {
-		p = conf->mirrors+mirror;
-		if (!p->rdev) {
-
-			disk_stack_limits(mddev->gendisk, rdev->bdev,
-					  rdev->data_offset << 9);
-
-			p->head_position = 0;
-			rdev->raid_disk = mirror;
-			err = 0;
-			/* As all devices are equivalent, we don't need a full recovery
-			 * if this was recently any drive of the array
-			 */
-			if (rdev->saved_raid_disk < 0)
-				conf->fullsync = 1;
-			rcu_assign_pointer(p->rdev, rdev);
-			break;
-		}
-		if (test_bit(WantReplacement, &p->rdev->flags) &&
-		    p[conf->raid_disks].rdev == NULL) {
-			/* Add this device as a replacement */
-			clear_bit(In_sync, &rdev->flags);
-			set_bit(Replacement, &rdev->flags);
-			rdev->raid_disk = mirror;
-			err = 0;
-			conf->fullsync = 1;
-			rcu_assign_pointer(p[conf->raid_disks].rdev, rdev);
-			break;
-		}
-	}
-	if (err == 0 && test_bit(Unmerged, &rdev->flags)) {
-		/* Some requests might not have seen this new
-		 * merge_bvec_fn.  We must wait for them to complete
-		 * before merging the device fully.
-		 * First we make sure any code which has tested
-		 * our function has submitted the request, then
-		 * we wait for all outstanding requests to complete.
-		 */
-		synchronize_sched();
-		raise_barrier(conf);
-		lower_barrier(conf);
-		clear_bit(Unmerged, &rdev->flags);
-	}
-	md_integrity_add_rdev(rdev, mddev);
-	print_conf(conf);
-	return err;
-}
-
-static int raid1_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
-{
-	struct r1conf *conf = mddev->private;
-	int err = 0;
-	int number = rdev->raid_disk;
-	struct mirror_info *p = conf->mirrors+ number;
-
-	if (rdev != p->rdev)
-		p = conf->mirrors + conf->raid_disks + number;
-
-	print_conf(conf);
-	if (rdev == p->rdev) {
-		if (test_bit(In_sync, &rdev->flags) ||
-		    atomic_read(&rdev->nr_pending)) {
-			err = -EBUSY;
-			goto abort;
-		}
-		/* Only remove non-faulty devices if recovery
-		 * is not possible.
-		 */
-		if (!test_bit(Faulty, &rdev->flags) &&
-		    mddev->recovery_disabled != conf->recovery_disabled &&
-		    mddev->degraded < conf->raid_disks) {
-			err = -EBUSY;
-			goto abort;
-		}
-		p->rdev = NULL;
-		synchronize_rcu();
-		if (atomic_read(&rdev->nr_pending)) {
-			/* lost the race, try later */
-			err = -EBUSY;
-			p->rdev = rdev;
-			goto abort;
-		} else if (conf->mirrors[conf->raid_disks + number].rdev) {
-			/* We just removed a device that is being replaced.
-			 * Move down the replacement.  We drain all IO before
-			 * doing this to avoid confusion.
-			 */
-			struct md_rdev *repl =
-				conf->mirrors[conf->raid_disks + number].rdev;
-			raise_barrier(conf);
-			clear_bit(Replacement, &repl->flags);
-			p->rdev = repl;
-			conf->mirrors[conf->raid_disks + number].rdev = NULL;
-			lower_barrier(conf);
-			clear_bit(WantReplacement, &rdev->flags);
-		} else
-			clear_bit(WantReplacement, &rdev->flags);
-		err = md_integrity_register(mddev);
-	}
-abort:
-
-	print_conf(conf);
-	return err;
-}
-
-
-static void end_sync_read(struct bio *bio, int error)
-{
-	struct r1bio *r1_bio = bio->bi_private;
-
-	update_head_pos(r1_bio->read_disk, r1_bio);
-
-	/*
-	 * we have read a block, now it needs to be re-written,
-	 * or re-read if the read failed.
-	 * We don't do much here, just schedule handling by raid1d
-	 */
-	if (test_bit(BIO_UPTODATE, &bio->bi_flags))
-		set_bit(R1BIO_Uptodate, &r1_bio->state);
-
-	if (atomic_dec_and_test(&r1_bio->remaining))
-		reschedule_retry(r1_bio);
-}
-
-static void end_sync_write(struct bio *bio, int error)
-{
-	int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
-	struct r1bio *r1_bio = bio->bi_private;
-	struct mddev *mddev = r1_bio->mddev;
-	struct r1conf *conf = mddev->private;
-	int mirror=0;
-	sector_t first_bad;
-	int bad_sectors;
-
-	mirror = find_bio_disk(r1_bio, bio);
-
-	if (!uptodate) {
-		sector_t sync_blocks = 0;
-		sector_t s = r1_bio->sector;
-		long sectors_to_go = r1_bio->sectors;
-		/* make sure these bits doesn't get cleared. */
-		do {
-			bitmap_end_sync(mddev->bitmap, s,
-					&sync_blocks, 1);
-			s += sync_blocks;
-			sectors_to_go -= sync_blocks;
-		} while (sectors_to_go > 0);
-		set_bit(WriteErrorSeen,
-			&conf->mirrors[mirror].rdev->flags);
-		if (!test_and_set_bit(WantReplacement,
-				      &conf->mirrors[mirror].rdev->flags))
-			set_bit(MD_RECOVERY_NEEDED, &
-				mddev->recovery);
-		set_bit(R1BIO_WriteError, &r1_bio->state);
-	} else if (is_badblock(conf->mirrors[mirror].rdev,
-			       r1_bio->sector,
-			       r1_bio->sectors,
-			       &first_bad, &bad_sectors) &&
-		   !is_badblock(conf->mirrors[r1_bio->read_disk].rdev,
-				r1_bio->sector,
-				r1_bio->sectors,
-				&first_bad, &bad_sectors)
-		)
-		set_bit(R1BIO_MadeGood, &r1_bio->state);
-
-	if (atomic_dec_and_test(&r1_bio->remaining)) {
-		int s = r1_bio->sectors;
-		if (test_bit(R1BIO_MadeGood, &r1_bio->state) ||
-		    test_bit(R1BIO_WriteError, &r1_bio->state))
-			reschedule_retry(r1_bio);
-		else {
-			put_buf(r1_bio);
-			md_done_sync(mddev, s, uptodate);
-		}
-	}
-}
-
-static int r1_sync_page_io(struct md_rdev *rdev, sector_t sector,
-			    int sectors, struct page *page, int rw)
-{
-	if (sync_page_io(rdev, sector, sectors << 9, page, rw, false))
-		/* success */
-		return 1;
-	if (rw == WRITE) {
-		set_bit(WriteErrorSeen, &rdev->flags);
-		if (!test_and_set_bit(WantReplacement,
-				      &rdev->flags))
-			set_bit(MD_RECOVERY_NEEDED, &
-				rdev->mddev->recovery);
-	}
-	/* need to record an error - either for the block or the device */
-	if (!rdev_set_badblocks(rdev, sector, sectors, 0))
-		md_error(rdev->mddev, rdev);
-	return 0;
-}
-
-static int fix_sync_read_error(struct r1bio *r1_bio)
-{
-	/* Try some synchronous reads of other devices to get
-	 * good data, much like with normal read errors.  Only
-	 * read into the pages we already have so we don't
-	 * need to re-issue the read request.
-	 * We don't need to freeze the array, because being in an
-	 * active sync request, there is no normal IO, and
-	 * no overlapping syncs.
-	 * We don't need to check is_badblock() again as we
-	 * made sure that anything with a bad block in range
-	 * will have bi_end_io clear.
-	 */
-	struct mddev *mddev = r1_bio->mddev;
-	struct r1conf *conf = mddev->private;
-	struct bio *bio = r1_bio->bios[r1_bio->read_disk];
-	sector_t sect = r1_bio->sector;
-	int sectors = r1_bio->sectors;
-	int idx = 0;
-
-	while(sectors) {
-		int s = sectors;
-		int d = r1_bio->read_disk;
-		int success = 0;
-		struct md_rdev *rdev;
-		int start;
-
-		if (s > (PAGE_SIZE>>9))
-			s = PAGE_SIZE >> 9;
-		do {
-			if (r1_bio->bios[d]->bi_end_io == end_sync_read) {
-				/* No rcu protection needed here devices
-				 * can only be removed when no resync is
-				 * active, and resync is currently active
-				 */
-				rdev = conf->mirrors[d].rdev;
-				if (sync_page_io(rdev, sect, s<<9,
-						 bio->bi_io_vec[idx].bv_page,
-						 READ, false)) {
-					success = 1;
-					break;
-				}
-			}
-			d++;
-			if (d == conf->raid_disks * 2)
-				d = 0;
-		} while (!success && d != r1_bio->read_disk);
-
-		if (!success) {
-			char b[BDEVNAME_SIZE];
-			int abort = 0;
-			/* Cannot read from anywhere, this block is lost.
-			 * Record a bad block on each device.  If that doesn't
-			 * work just disable and interrupt the recovery.
-			 * Don't fail devices as that won't really help.
-			 */
-			printk(KERN_ALERT "md/raid1:%s: %s: unrecoverable I/O read error"
-			       " for block %llu\n",
-			       mdname(mddev),
-			       bdevname(bio->bi_bdev, b),
-			       (unsigned long long)r1_bio->sector);
-			for (d = 0; d < conf->raid_disks * 2; d++) {
-				rdev = conf->mirrors[d].rdev;
-				if (!rdev || test_bit(Faulty, &rdev->flags))
-					continue;
-				if (!rdev_set_badblocks(rdev, sect, s, 0))
-					abort = 1;
-			}
-			if (abort) {
-				conf->recovery_disabled =
-					mddev->recovery_disabled;
-				set_bit(MD_RECOVERY_INTR, &mddev->recovery);
-				md_done_sync(mddev, r1_bio->sectors, 0);
-				put_buf(r1_bio);
-				return 0;
-			}
-			/* Try next page */
-			sectors -= s;
-			sect += s;
-			idx++;
-			continue;
-		}
-
-		start = d;
-		/* write it back and re-read */
-		while (d != r1_bio->read_disk) {
-			if (d == 0)
-				d = conf->raid_disks * 2;
-			d--;
-			if (r1_bio->bios[d]->bi_end_io != end_sync_read)
-				continue;
-			rdev = conf->mirrors[d].rdev;
-			if (r1_sync_page_io(rdev, sect, s,
-					    bio->bi_io_vec[idx].bv_page,
-					    WRITE) == 0) {
-				r1_bio->bios[d]->bi_end_io = NULL;
-				rdev_dec_pending(rdev, mddev);
-			}
-		}
-		d = start;
-		while (d != r1_bio->read_disk) {
-			if (d == 0)
-				d = conf->raid_disks * 2;
-			d--;
-			if (r1_bio->bios[d]->bi_end_io != end_sync_read)
-				continue;
-			rdev = conf->mirrors[d].rdev;
-			if (r1_sync_page_io(rdev, sect, s,
-					    bio->bi_io_vec[idx].bv_page,
-					    READ) != 0)
-				atomic_add(s, &rdev->corrected_errors);
-		}
-		sectors -= s;
-		sect += s;
-		idx ++;
-	}
-	set_bit(R1BIO_Uptodate, &r1_bio->state);
-	set_bit(BIO_UPTODATE, &bio->bi_flags);
-	return 1;
-}
-
-static int process_checks(struct r1bio *r1_bio)
-{
-	/* We have read all readable devices.  If we haven't
-	 * got the block, then there is no hope left.
-	 * If we have, then we want to do a comparison
-	 * and skip the write if everything is the same.
-	 * If any blocks failed to read, then we need to
-	 * attempt an over-write
-	 */
-	struct mddev *mddev = r1_bio->mddev;
-	struct r1conf *conf = mddev->private;
-	int primary;
-	int i;
-	int vcnt;
-
-	for (primary = 0; primary < conf->raid_disks * 2; primary++)
-		if (r1_bio->bios[primary]->bi_end_io == end_sync_read &&
-		    test_bit(BIO_UPTODATE, &r1_bio->bios[primary]->bi_flags)) {
-			r1_bio->bios[primary]->bi_end_io = NULL;
-			rdev_dec_pending(conf->mirrors[primary].rdev, mddev);
-			break;
-		}
-	r1_bio->read_disk = primary;
-	vcnt = (r1_bio->sectors + PAGE_SIZE / 512 - 1) >> (PAGE_SHIFT - 9);
-	for (i = 0; i < conf->raid_disks * 2; i++) {
-		int j;
-		struct bio *pbio = r1_bio->bios[primary];
-		struct bio *sbio = r1_bio->bios[i];
-		int size;
-
-		if (r1_bio->bios[i]->bi_end_io != end_sync_read)
-			continue;
-
-		if (test_bit(BIO_UPTODATE, &sbio->bi_flags)) {
-			for (j = vcnt; j-- ; ) {
-				struct page *p, *s;
-				p = pbio->bi_io_vec[j].bv_page;
-				s = sbio->bi_io_vec[j].bv_page;
-				if (memcmp(page_address(p),
-					   page_address(s),
-					   sbio->bi_io_vec[j].bv_len))
-					break;
-			}
-		} else
-			j = 0;
-		if (j >= 0)
-			mddev->resync_mismatches += r1_bio->sectors;
-		if (j < 0 || (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)
-			      && test_bit(BIO_UPTODATE, &sbio->bi_flags))) {
-			/* No need to write to this device. */
-			sbio->bi_end_io = NULL;
-			rdev_dec_pending(conf->mirrors[i].rdev, mddev);
-			continue;
-		}
-		/* fixup the bio for reuse */
-		sbio->bi_vcnt = vcnt;
-		sbio->bi_size = r1_bio->sectors << 9;
-		sbio->bi_idx = 0;
-		sbio->bi_phys_segments = 0;
-		sbio->bi_flags &= ~(BIO_POOL_MASK - 1);
-		sbio->bi_flags |= 1 << BIO_UPTODATE;
-		sbio->bi_next = NULL;
-		sbio->bi_sector = r1_bio->sector +
-			conf->mirrors[i].rdev->data_offset;
-		sbio->bi_bdev = conf->mirrors[i].rdev->bdev;
-		size = sbio->bi_size;
-		for (j = 0; j < vcnt ; j++) {
-			struct bio_vec *bi;
-			bi = &sbio->bi_io_vec[j];
-			bi->bv_offset = 0;
-			if (size > PAGE_SIZE)
-				bi->bv_len = PAGE_SIZE;
-			else
-				bi->bv_len = size;
-			size -= PAGE_SIZE;
-			memcpy(page_address(bi->bv_page),
-			       page_address(pbio->bi_io_vec[j].bv_page),
-			       PAGE_SIZE);
-		}
-	}
-	return 0;
-}
-
-static void sync_request_write(struct mddev *mddev, struct r1bio *r1_bio)
-{
-	struct r1conf *conf = mddev->private;
-	int i;
-	int disks = conf->raid_disks * 2;
-	struct bio *bio, *wbio;
-
-	bio = r1_bio->bios[r1_bio->read_disk];
-
-	if (!test_bit(R1BIO_Uptodate, &r1_bio->state))
-		/* ouch - failed to read all of that. */
-		if (!fix_sync_read_error(r1_bio))
-			return;
-
-	if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
-		if (process_checks(r1_bio) < 0)
-			return;
-	/*
-	 * schedule writes
-	 */
-	atomic_set(&r1_bio->remaining, 1);
-	for (i = 0; i < disks ; i++) {
-		wbio = r1_bio->bios[i];
-		if (wbio->bi_end_io == NULL ||
-		    (wbio->bi_end_io == end_sync_read &&
-		     (i == r1_bio->read_disk ||
-		      !test_bit(MD_RECOVERY_SYNC, &mddev->recovery))))
-			continue;
-
-		wbio->bi_rw = WRITE;
-		wbio->bi_end_io = end_sync_write;
-		atomic_inc(&r1_bio->remaining);
-		md_sync_acct(conf->mirrors[i].rdev->bdev, wbio->bi_size >> 9);
-
-		generic_make_request(wbio);
-	}
-
-	if (atomic_dec_and_test(&r1_bio->remaining)) {
-		/* if we're here, all write(s) have completed, so clean up */
-		md_done_sync(mddev, r1_bio->sectors, 1);
-		put_buf(r1_bio);
-	}
-}
-
-/*
- * This is a kernel thread which:
- *
- *	1.	Retries failed read operations on working mirrors.
- *	2.	Updates the raid superblock when problems encounter.
- *	3.	Performs writes following reads for array synchronising.
- */
-
-static void fix_read_error(struct r1conf *conf, int read_disk,
-			   sector_t sect, int sectors)
-{
-	struct mddev *mddev = conf->mddev;
-	while(sectors) {
-		int s = sectors;
-		int d = read_disk;
-		int success = 0;
-		int start;
-		struct md_rdev *rdev;
-
-		if (s > (PAGE_SIZE>>9))
-			s = PAGE_SIZE >> 9;
-
-		do {
-			/* Note: no rcu protection needed here
-			 * as this is synchronous in the raid1d thread
-			 * which is the thread that might remove
-			 * a device.  If raid1d ever becomes multi-threaded....
-			 */
-			sector_t first_bad;
-			int bad_sectors;
-
-			rdev = conf->mirrors[d].rdev;
-			if (rdev &&
-			    test_bit(In_sync, &rdev->flags) &&
-			    is_badblock(rdev, sect, s,
-					&first_bad, &bad_sectors) == 0 &&
-			    sync_page_io(rdev, sect, s<<9,
-					 conf->tmppage, READ, false))
-				success = 1;
-			else {
-				d++;
-				if (d == conf->raid_disks * 2)
-					d = 0;
-			}
-		} while (!success && d != read_disk);
-
-		if (!success) {
-			/* Cannot read from anywhere - mark it bad */
-			struct md_rdev *rdev = conf->mirrors[read_disk].rdev;
-			if (!rdev_set_badblocks(rdev, sect, s, 0))
-				md_error(mddev, rdev);
-			break;
-		}
-		/* write it back and re-read */
-		start = d;
-		while (d != read_disk) {
-			if (d==0)
-				d = conf->raid_disks * 2;
-			d--;
-			rdev = conf->mirrors[d].rdev;
-			if (rdev &&
-			    test_bit(In_sync, &rdev->flags))
-				r1_sync_page_io(rdev, sect, s,
-						conf->tmppage, WRITE);
-		}
-		d = start;
-		while (d != read_disk) {
-			char b[BDEVNAME_SIZE];
-			if (d==0)
-				d = conf->raid_disks * 2;
-			d--;
-			rdev = conf->mirrors[d].rdev;
-			if (rdev &&
-			    test_bit(In_sync, &rdev->flags)) {
-				if (r1_sync_page_io(rdev, sect, s,
-						    conf->tmppage, READ)) {
-					atomic_add(s, &rdev->corrected_errors);
-					printk(KERN_INFO
-					       "md/raid1:%s: read error corrected "
-					       "(%d sectors at %llu on %s)\n",
-					       mdname(mddev), s,
-					       (unsigned long long)(sect +
-					           rdev->data_offset),
-					       bdevname(rdev->bdev, b));
-				}
-			}
-		}
-		sectors -= s;
-		sect += s;
-	}
-}
-
-static void bi_complete(struct bio *bio, int error)
-{
-	complete((struct completion *)bio->bi_private);
-}
-
-static int submit_bio_wait(int rw, struct bio *bio)
-{
-	struct completion event;
-	rw |= REQ_SYNC;
-
-	init_completion(&event);
-	bio->bi_private = &event;
-	bio->bi_end_io = bi_complete;
-	submit_bio(rw, bio);
-	wait_for_completion(&event);
-
-	return test_bit(BIO_UPTODATE, &bio->bi_flags);
-}
-
-static int narrow_write_error(struct r1bio *r1_bio, int i)
-{
-	struct mddev *mddev = r1_bio->mddev;
-	struct r1conf *conf = mddev->private;
-	struct md_rdev *rdev = conf->mirrors[i].rdev;
-	int vcnt, idx;
-	struct bio_vec *vec;
-
-	/* bio has the data to be written to device 'i' where
-	 * we just recently had a write error.
-	 * We repeatedly clone the bio and trim down to one block,
-	 * then try the write.  Where the write fails we record
-	 * a bad block.
-	 * It is conceivable that the bio doesn't exactly align with
-	 * blocks.  We must handle this somehow.
-	 *
-	 * We currently own a reference on the rdev.
-	 */
-
-	int block_sectors;
-	sector_t sector;
-	int sectors;
-	int sect_to_write = r1_bio->sectors;
-	int ok = 1;
-
-	if (rdev->badblocks.shift < 0)
-		return 0;
-
-	block_sectors = 1 << rdev->badblocks.shift;
-	sector = r1_bio->sector;
-	sectors = ((sector + block_sectors)
-		   & ~(sector_t)(block_sectors - 1))
-		- sector;
-
-	if (test_bit(R1BIO_BehindIO, &r1_bio->state)) {
-		vcnt = r1_bio->behind_page_count;
-		vec = r1_bio->behind_bvecs;
-		idx = 0;
-		while (vec[idx].bv_page == NULL)
-			idx++;
-	} else {
-		vcnt = r1_bio->master_bio->bi_vcnt;
-		vec = r1_bio->master_bio->bi_io_vec;
-		idx = r1_bio->master_bio->bi_idx;
-	}
-	while (sect_to_write) {
-		struct bio *wbio;
-		if (sectors > sect_to_write)
-			sectors = sect_to_write;
-		/* Write at 'sector' for 'sectors'*/
-
-		wbio = bio_alloc_mddev(GFP_NOIO, vcnt, mddev);
-		memcpy(wbio->bi_io_vec, vec, vcnt * sizeof(struct bio_vec));
-		wbio->bi_sector = r1_bio->sector;
-		wbio->bi_rw = WRITE;
-		wbio->bi_vcnt = vcnt;
-		wbio->bi_size = r1_bio->sectors << 9;
-		wbio->bi_idx = idx;
-
-		md_trim_bio(wbio, sector - r1_bio->sector, sectors);
-		wbio->bi_sector += rdev->data_offset;
-		wbio->bi_bdev = rdev->bdev;
-		if (submit_bio_wait(WRITE, wbio) == 0)
-			/* failure! */
-			ok = rdev_set_badblocks(rdev, sector,
-						sectors, 0)
-				&& ok;
-
-		bio_put(wbio);
-		sect_to_write -= sectors;
-		sector += sectors;
-		sectors = block_sectors;
-	}
-	return ok;
-}
-
-static void handle_sync_write_finished(struct r1conf *conf, struct r1bio *r1_bio)
-{
-	int m;
-	int s = r1_bio->sectors;
-	for (m = 0; m < conf->raid_disks * 2 ; m++) {
-		struct md_rdev *rdev = conf->mirrors[m].rdev;
-		struct bio *bio = r1_bio->bios[m];
-		if (bio->bi_end_io == NULL)
-			continue;
-		if (test_bit(BIO_UPTODATE, &bio->bi_flags) &&
-		    test_bit(R1BIO_MadeGood, &r1_bio->state)) {
-			rdev_clear_badblocks(rdev, r1_bio->sector, s);
-		}
-		if (!test_bit(BIO_UPTODATE, &bio->bi_flags) &&
-		    test_bit(R1BIO_WriteError, &r1_bio->state)) {
-			if (!rdev_set_badblocks(rdev, r1_bio->sector, s, 0))
-				md_error(conf->mddev, rdev);
-		}
-	}
-	put_buf(r1_bio);
-	md_done_sync(conf->mddev, s, 1);
-}
-
-static void handle_write_finished(struct r1conf *conf, struct r1bio *r1_bio)
-{
-	int m;
-	for (m = 0; m < conf->raid_disks * 2 ; m++)
-		if (r1_bio->bios[m] == IO_MADE_GOOD) {
-			struct md_rdev *rdev = conf->mirrors[m].rdev;
-			rdev_clear_badblocks(rdev,
-					     r1_bio->sector,
-					     r1_bio->sectors);
-			rdev_dec_pending(rdev, conf->mddev);
-		} else if (r1_bio->bios[m] != NULL) {
-			/* This drive got a write error.  We need to
-			 * narrow down and record precise write
-			 * errors.
-			 */
-			if (!narrow_write_error(r1_bio, m)) {
-				md_error(conf->mddev,
-					 conf->mirrors[m].rdev);
-				/* an I/O failed, we can't clear the bitmap */
-				set_bit(R1BIO_Degraded, &r1_bio->state);
-			}
-			rdev_dec_pending(conf->mirrors[m].rdev,
-					 conf->mddev);
-		}
-	if (test_bit(R1BIO_WriteError, &r1_bio->state))
-		close_write(r1_bio);
-	raid_end_bio_io(r1_bio);
-}
-
-static void handle_read_error(struct r1conf *conf, struct r1bio *r1_bio)
-{
-	int disk;
-	int max_sectors;
-	struct mddev *mddev = conf->mddev;
-	struct bio *bio;
-	char b[BDEVNAME_SIZE];
-	struct md_rdev *rdev;
-
-	clear_bit(R1BIO_ReadError, &r1_bio->state);
-	/* we got a read error. Maybe the drive is bad.  Maybe just
-	 * the block and we can fix it.
-	 * We freeze all other IO, and try reading the block from
-	 * other devices.  When we find one, we re-write
-	 * and check it that fixes the read error.
-	 * This is all done synchronously while the array is
-	 * frozen
-	 */
-	if (mddev->ro == 0) {
-		freeze_array(conf);
-		fix_read_error(conf, r1_bio->read_disk,
-			       r1_bio->sector, r1_bio->sectors);
-		unfreeze_array(conf);
-	} else
-		md_error(mddev, conf->mirrors[r1_bio->read_disk].rdev);
-
-	bio = r1_bio->bios[r1_bio->read_disk];
-	bdevname(bio->bi_bdev, b);
-read_more:
-	disk = read_balance(conf, r1_bio, &max_sectors);
-	if (disk == -1) {
-		printk(KERN_ALERT "md/raid1:%s: %s: unrecoverable I/O"
-		       " read error for block %llu\n",
-		       mdname(mddev), b, (unsigned long long)r1_bio->sector);
-		raid_end_bio_io(r1_bio);
-	} else {
-		const unsigned long do_sync
-			= r1_bio->master_bio->bi_rw & REQ_SYNC;
-		if (bio) {
-			r1_bio->bios[r1_bio->read_disk] =
-				mddev->ro ? IO_BLOCKED : NULL;
-			bio_put(bio);
-		}
-		r1_bio->read_disk = disk;
-		bio = bio_clone_mddev(r1_bio->master_bio, GFP_NOIO, mddev);
-		md_trim_bio(bio, r1_bio->sector - bio->bi_sector, max_sectors);
-		r1_bio->bios[r1_bio->read_disk] = bio;
-		rdev = conf->mirrors[disk].rdev;
-		printk_ratelimited(KERN_ERR
-				   "md/raid1:%s: redirecting sector %llu"
-				   " to other mirror: %s\n",
-				   mdname(mddev),
-				   (unsigned long long)r1_bio->sector,
-				   bdevname(rdev->bdev, b));
-		bio->bi_sector = r1_bio->sector + rdev->data_offset;
-		bio->bi_bdev = rdev->bdev;
-		bio->bi_end_io = raid1_end_read_request;
-		bio->bi_rw = READ | do_sync;
-		bio->bi_private = r1_bio;
-		if (max_sectors < r1_bio->sectors) {
-			/* Drat - have to split this up more */
-			struct bio *mbio = r1_bio->master_bio;
-			int sectors_handled = (r1_bio->sector + max_sectors
-					       - mbio->bi_sector);
-			r1_bio->sectors = max_sectors;
-			spin_lock_irq(&conf->device_lock);
-			if (mbio->bi_phys_segments == 0)
-				mbio->bi_phys_segments = 2;
-			else
-				mbio->bi_phys_segments++;
-			spin_unlock_irq(&conf->device_lock);
-			generic_make_request(bio);
-			bio = NULL;
-
-			r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO);
-
-			r1_bio->master_bio = mbio;
-			r1_bio->sectors = (mbio->bi_size >> 9)
-					  - sectors_handled;
-			r1_bio->state = 0;
-			set_bit(R1BIO_ReadError, &r1_bio->state);
-			r1_bio->mddev = mddev;
-			r1_bio->sector = mbio->bi_sector + sectors_handled;
-
-			goto read_more;
-		} else
-			generic_make_request(bio);
-	}
-}
-
-static void raid1d(struct mddev *mddev)
-{
-	struct r1bio *r1_bio;
-	unsigned long flags;
-	struct r1conf *conf = mddev->private;
-	struct list_head *head = &conf->retry_list;
-	struct blk_plug plug;
-
-	md_check_recovery(mddev);
-
-	blk_start_plug(&plug);
-	for (;;) {
-
-		if (atomic_read(&mddev->plug_cnt) == 0)
-			flush_pending_writes(conf);
-
-		spin_lock_irqsave(&conf->device_lock, flags);
-		if (list_empty(head)) {
-			spin_unlock_irqrestore(&conf->device_lock, flags);
-			break;
-		}
-		r1_bio = list_entry(head->prev, struct r1bio, retry_list);
-		list_del(head->prev);
-		conf->nr_queued--;
-		spin_unlock_irqrestore(&conf->device_lock, flags);
-
-		mddev = r1_bio->mddev;
-		conf = mddev->private;
-		if (test_bit(R1BIO_IsSync, &r1_bio->state)) {
-			if (test_bit(R1BIO_MadeGood, &r1_bio->state) ||
-			    test_bit(R1BIO_WriteError, &r1_bio->state))
-				handle_sync_write_finished(conf, r1_bio);
-			else
-				sync_request_write(mddev, r1_bio);
-		} else if (test_bit(R1BIO_MadeGood, &r1_bio->state) ||
-			   test_bit(R1BIO_WriteError, &r1_bio->state))
-			handle_write_finished(conf, r1_bio);
-		else if (test_bit(R1BIO_ReadError, &r1_bio->state))
-			handle_read_error(conf, r1_bio);
-		else
-			/* just a partial read to be scheduled from separate
-			 * context
-			 */
-			generic_make_request(r1_bio->bios[r1_bio->read_disk]);
-
-		cond_resched();
-		if (mddev->flags & ~(1<<MD_CHANGE_PENDING))
-			md_check_recovery(mddev);
-	}
-	blk_finish_plug(&plug);
-}
-
-
-static int init_resync(struct r1conf *conf)
-{
-	int buffs;
-
-	buffs = RESYNC_WINDOW / RESYNC_BLOCK_SIZE;
-	BUG_ON(conf->r1buf_pool);
-	conf->r1buf_pool = mempool_create(buffs, r1buf_pool_alloc, r1buf_pool_free,
-					  conf->poolinfo);
-	if (!conf->r1buf_pool)
-		return -ENOMEM;
-	conf->next_resync = 0;
-	return 0;
-}
-
-/*
- * perform a "sync" on one "block"
- *
- * We need to make sure that no normal I/O request - particularly write
- * requests - conflict with active sync requests.
- *
- * This is achieved by tracking pending requests and a 'barrier' concept
- * that can be installed to exclude normal IO requests.
- */
-
-static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, int *skipped, int go_faster)
-{
-	struct r1conf *conf = mddev->private;
-	struct r1bio *r1_bio;
-	struct bio *bio;
-	sector_t max_sector, nr_sectors;
-	int disk = -1;
-	int i;
-	int wonly = -1;
-	int write_targets = 0, read_targets = 0;
-	sector_t sync_blocks;
-	int still_degraded = 0;
-	int good_sectors = RESYNC_SECTORS;
-	int min_bad = 0; /* number of sectors that are bad in all devices */
-
-	if (!conf->r1buf_pool)
-		if (init_resync(conf))
-			return 0;
-
-	max_sector = mddev->dev_sectors;
-	if (sector_nr >= max_sector) {
-		/* If we aborted, we need to abort the
-		 * sync on the 'current' bitmap chunk (there will
-		 * only be one in raid1 resync.
-		 * We can find the current addess in mddev->curr_resync
-		 */
-		if (mddev->curr_resync < max_sector) /* aborted */
-			bitmap_end_sync(mddev->bitmap, mddev->curr_resync,
-						&sync_blocks, 1);
-		else /* completed sync */
-			conf->fullsync = 0;
-
-		bitmap_close_sync(mddev->bitmap);
-		close_sync(conf);
-		return 0;
-	}
-
-	if (mddev->bitmap == NULL &&
-	    mddev->recovery_cp == MaxSector &&
-	    !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery) &&
-	    conf->fullsync == 0) {
-		*skipped = 1;
-		return max_sector - sector_nr;
-	}
-	/* before building a request, check if we can skip these blocks..
-	 * This call the bitmap_start_sync doesn't actually record anything
-	 */
-	if (!bitmap_start_sync(mddev->bitmap, sector_nr, &sync_blocks, 1) &&
-	    !conf->fullsync && !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) {
-		/* We can skip this block, and probably several more */
-		*skipped = 1;
-		return sync_blocks;
-	}
-	/*
-	 * If there is non-resync activity waiting for a turn,
-	 * and resync is going fast enough,
-	 * then let it though before starting on this new sync request.
-	 */
-	if (!go_faster && conf->nr_waiting)
-		msleep_interruptible(1000);
-
-	bitmap_cond_end_sync(mddev->bitmap, sector_nr);
-	r1_bio = mempool_alloc(conf->r1buf_pool, GFP_NOIO);
-	raise_barrier(conf);
-
-	conf->next_resync = sector_nr;
-
-	rcu_read_lock();
-	/*
-	 * If we get a correctably read error during resync or recovery,
-	 * we might want to read from a different device.  So we
-	 * flag all drives that could conceivably be read from for READ,
-	 * and any others (which will be non-In_sync devices) for WRITE.
-	 * If a read fails, we try reading from something else for which READ
-	 * is OK.
-	 */
-
-	r1_bio->mddev = mddev;
-	r1_bio->sector = sector_nr;
-	r1_bio->state = 0;
-	set_bit(R1BIO_IsSync, &r1_bio->state);
-
-	for (i = 0; i < conf->raid_disks * 2; i++) {
-		struct md_rdev *rdev;
-		bio = r1_bio->bios[i];
-
-		/* take from bio_init */
-		bio->bi_next = NULL;
-		bio->bi_flags &= ~(BIO_POOL_MASK-1);
-		bio->bi_flags |= 1 << BIO_UPTODATE;
-		bio->bi_rw = READ;
-		bio->bi_vcnt = 0;
-		bio->bi_idx = 0;
-		bio->bi_phys_segments = 0;
-		bio->bi_size = 0;
-		bio->bi_end_io = NULL;
-		bio->bi_private = NULL;
-
-		rdev = rcu_dereference(conf->mirrors[i].rdev);
-		if (rdev == NULL ||
-		    test_bit(Faulty, &rdev->flags)) {
-			if (i < conf->raid_disks)
-				still_degraded = 1;
-		} else if (!test_bit(In_sync, &rdev->flags)) {
-			bio->bi_rw = WRITE;
-			bio->bi_end_io = end_sync_write;
-			write_targets ++;
-		} else {
-			/* may need to read from here */
-			sector_t first_bad = MaxSector;
-			int bad_sectors;
-
-			if (is_badblock(rdev, sector_nr, good_sectors,
-					&first_bad, &bad_sectors)) {
-				if (first_bad > sector_nr)
-					good_sectors = first_bad - sector_nr;
-				else {
-					bad_sectors -= (sector_nr - first_bad);
-					if (min_bad == 0 ||
-					    min_bad > bad_sectors)
-						min_bad = bad_sectors;
-				}
-			}
-			if (sector_nr < first_bad) {
-				if (test_bit(WriteMostly, &rdev->flags)) {
-					if (wonly < 0)
-						wonly = i;
-				} else {
-					if (disk < 0)
-						disk = i;
-				}
-				bio->bi_rw = READ;
-				bio->bi_end_io = end_sync_read;
-				read_targets++;
-			}
-		}
-		if (bio->bi_end_io) {
-			atomic_inc(&rdev->nr_pending);
-			bio->bi_sector = sector_nr + rdev->data_offset;
-			bio->bi_bdev = rdev->bdev;
-			bio->bi_private = r1_bio;
-		}
-	}
-	rcu_read_unlock();
-	if (disk < 0)
-		disk = wonly;
-	r1_bio->read_disk = disk;
-
-	if (read_targets == 0 && min_bad > 0) {
-		/* These sectors are bad on all InSync devices, so we
-		 * need to mark them bad on all write targets
-		 */
-		int ok = 1;
-		for (i = 0 ; i < conf->raid_disks * 2 ; i++)
-			if (r1_bio->bios[i]->bi_end_io == end_sync_write) {
-				struct md_rdev *rdev = conf->mirrors[i].rdev;
-				ok = rdev_set_badblocks(rdev, sector_nr,
-							min_bad, 0
-					) && ok;
-			}
-		set_bit(MD_CHANGE_DEVS, &mddev->flags);
-		*skipped = 1;
-		put_buf(r1_bio);
-
-		if (!ok) {
-			/* Cannot record the badblocks, so need to
-			 * abort the resync.
-			 * If there are multiple read targets, could just
-			 * fail the really bad ones ???
-			 */
-			conf->recovery_disabled = mddev->recovery_disabled;
-			set_bit(MD_RECOVERY_INTR, &mddev->recovery);
-			return 0;
-		} else
-			return min_bad;
-
-	}
-	if (min_bad > 0 && min_bad < good_sectors) {
-		/* only resync enough to reach the next bad->good
-		 * transition */
-		good_sectors = min_bad;
-	}
-
-	if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) && read_targets > 0)
-		/* extra read targets are also write targets */
-		write_targets += read_targets-1;
-
-	if (write_targets == 0 || read_targets == 0) {
-		/* There is nowhere to write, so all non-sync
-		 * drives must be failed - so we are finished
-		 */
-		sector_t rv = max_sector - sector_nr;
-		*skipped = 1;
-		put_buf(r1_bio);
-		return rv;
-	}
-
-	if (max_sector > mddev->resync_max)
-		max_sector = mddev->resync_max; /* Don't do IO beyond here */
-	if (max_sector > sector_nr + good_sectors)
-		max_sector = sector_nr + good_sectors;
-	nr_sectors = 0;
-	sync_blocks = 0;
-	do {
-		struct page *page;
-		int len = PAGE_SIZE;
-		if (sector_nr + (len>>9) > max_sector)
-			len = (max_sector - sector_nr) << 9;
-		if (len == 0)
-			break;
-		if (sync_blocks == 0) {
-			if (!bitmap_start_sync(mddev->bitmap, sector_nr,
-					       &sync_blocks, still_degraded) &&
-			    !conf->fullsync &&
-			    !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
-				break;
-			BUG_ON(sync_blocks < (PAGE_SIZE>>9));
-			if ((len >> 9) > sync_blocks)
-				len = sync_blocks<<9;
-		}
-
-		for (i = 0 ; i < conf->raid_disks * 2; i++) {
-			bio = r1_bio->bios[i];
-			if (bio->bi_end_io) {
-				page = bio->bi_io_vec[bio->bi_vcnt].bv_page;
-				if (bio_add_page(bio, page, len, 0) == 0) {
-					/* stop here */
-					bio->bi_io_vec[bio->bi_vcnt].bv_page = page;
-					while (i > 0) {
-						i--;
-						bio = r1_bio->bios[i];
-						if (bio->bi_end_io==NULL)
-							continue;
-						/* remove last page from this bio */
-						bio->bi_vcnt--;
-						bio->bi_size -= len;
-						bio->bi_flags &= ~(1<< BIO_SEG_VALID);
-					}
-					goto bio_full;
-				}
-			}
-		}
-		nr_sectors += len>>9;
-		sector_nr += len>>9;
-		sync_blocks -= (len>>9);
-	} while (r1_bio->bios[disk]->bi_vcnt < RESYNC_PAGES);
- bio_full:
-	r1_bio->sectors = nr_sectors;
-
-	/* For a user-requested sync, we read all readable devices and do a
-	 * compare
-	 */
-	if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) {
-		atomic_set(&r1_bio->remaining, read_targets);
-		for (i = 0; i < conf->raid_disks * 2; i++) {
-			bio = r1_bio->bios[i];
-			if (bio->bi_end_io == end_sync_read) {
-				md_sync_acct(bio->bi_bdev, nr_sectors);
-				generic_make_request(bio);
-			}
-		}
-	} else {
-		atomic_set(&r1_bio->remaining, 1);
-		bio = r1_bio->bios[r1_bio->read_disk];
-		md_sync_acct(bio->bi_bdev, nr_sectors);
-		generic_make_request(bio);
-
-	}
-	return nr_sectors;
-}
-
-static sector_t raid1_size(struct mddev *mddev, sector_t sectors, int raid_disks)
-{
-	if (sectors)
-		return sectors;
-
-	return mddev->dev_sectors;
-}
-
-static struct r1conf *setup_conf(struct mddev *mddev)
-{
-	struct r1conf *conf;
-	int i;
-	struct mirror_info *disk;
-	struct md_rdev *rdev;
-	int err = -ENOMEM;
-
-	conf = kzalloc(sizeof(struct r1conf), GFP_KERNEL);
-	if (!conf)
-		goto abort;
-
-	conf->mirrors = kzalloc(sizeof(struct mirror_info)
-				* mddev->raid_disks * 2,
-				 GFP_KERNEL);
-	if (!conf->mirrors)
-		goto abort;
-
-	conf->tmppage = alloc_page(GFP_KERNEL);
-	if (!conf->tmppage)
-		goto abort;
-
-	conf->poolinfo = kzalloc(sizeof(*conf->poolinfo), GFP_KERNEL);
-	if (!conf->poolinfo)
-		goto abort;
-	conf->poolinfo->raid_disks = mddev->raid_disks * 2;
-	conf->r1bio_pool = mempool_create(NR_RAID1_BIOS, r1bio_pool_alloc,
-					  r1bio_pool_free,
-					  conf->poolinfo);
-	if (!conf->r1bio_pool)
-		goto abort;
-
-	conf->poolinfo->mddev = mddev;
-
-	err = -EINVAL;
-	spin_lock_init(&conf->device_lock);
-	rdev_for_each(rdev, mddev) {
-		struct request_queue *q;
-		int disk_idx = rdev->raid_disk;
-		if (disk_idx >= mddev->raid_disks
-		    || disk_idx < 0)
-			continue;
-		if (test_bit(Replacement, &rdev->flags))
-			disk = conf->mirrors + conf->raid_disks + disk_idx;
-		else
-			disk = conf->mirrors + disk_idx;
-
-		if (disk->rdev)
-			goto abort;
-		disk->rdev = rdev;
-		q = bdev_get_queue(rdev->bdev);
-		if (q->merge_bvec_fn)
-			mddev->merge_check_needed = 1;
-
-		disk->head_position = 0;
-	}
-	conf->raid_disks = mddev->raid_disks;
-	conf->mddev = mddev;
-	INIT_LIST_HEAD(&conf->retry_list);
-
-	spin_lock_init(&conf->resync_lock);
-	init_waitqueue_head(&conf->wait_barrier);
-
-	bio_list_init(&conf->pending_bio_list);
-	conf->pending_count = 0;
-	conf->recovery_disabled = mddev->recovery_disabled - 1;
-
-	err = -EIO;
-	conf->last_used = -1;
-	for (i = 0; i < conf->raid_disks * 2; i++) {
-
-		disk = conf->mirrors + i;
-
-		if (i < conf->raid_disks &&
-		    disk[conf->raid_disks].rdev) {
-			/* This slot has a replacement. */
-			if (!disk->rdev) {
-				/* No original, just make the replacement
-				 * a recovering spare
-				 */
-				disk->rdev =
-					disk[conf->raid_disks].rdev;
-				disk[conf->raid_disks].rdev = NULL;
-			} else if (!test_bit(In_sync, &disk->rdev->flags))
-				/* Original is not in_sync - bad */
-				goto abort;
-		}
-
-		if (!disk->rdev ||
-		    !test_bit(In_sync, &disk->rdev->flags)) {
-			disk->head_position = 0;
-			if (disk->rdev)
-				conf->fullsync = 1;
-		} else if (conf->last_used < 0)
-			/*
-			 * The first working device is used as a
-			 * starting point to read balancing.
-			 */
-			conf->last_used = i;
-	}
-
-	if (conf->last_used < 0) {
-		printk(KERN_ERR "md/raid1:%s: no operational mirrors\n",
-		       mdname(mddev));
-		goto abort;
-	}
-	err = -ENOMEM;
-	conf->thread = md_register_thread(raid1d, mddev, NULL);
-	if (!conf->thread) {
-		printk(KERN_ERR
-		       "md/raid1:%s: couldn't allocate thread\n",
-		       mdname(mddev));
-		goto abort;
-	}
-
-	return conf;
-
- abort:
-	if (conf) {
-		if (conf->r1bio_pool)
-			mempool_destroy(conf->r1bio_pool);
-		kfree(conf->mirrors);
-		safe_put_page(conf->tmppage);
-		kfree(conf->poolinfo);
-		kfree(conf);
-	}
-	return ERR_PTR(err);
-}
-
-static int stop(struct mddev *mddev);
-static int run(struct mddev *mddev)
-{
-	struct r1conf *conf;
-	int i;
-	struct md_rdev *rdev;
-	int ret;
-
-	if (mddev->level != 1) {
-		printk(KERN_ERR "md/raid1:%s: raid level not set to mirroring (%d)\n",
-		       mdname(mddev), mddev->level);
-		return -EIO;
-	}
-	if (mddev->reshape_position != MaxSector) {
-		printk(KERN_ERR "md/raid1:%s: reshape_position set but not supported\n",
-		       mdname(mddev));
-		return -EIO;
-	}
-	/*
-	 * copy the already verified devices into our private RAID1
-	 * bookkeeping area. [whatever we allocate in run(),
-	 * should be freed in stop()]
-	 */
-	if (mddev->private == NULL)
-		conf = setup_conf(mddev);
-	else
-		conf = mddev->private;
-
-	if (IS_ERR(conf))
-		return PTR_ERR(conf);
-
-	rdev_for_each(rdev, mddev) {
-		if (!mddev->gendisk)
-			continue;
-		disk_stack_limits(mddev->gendisk, rdev->bdev,
-				  rdev->data_offset << 9);
-	}
-
-	mddev->degraded = 0;
-	for (i=0; i < conf->raid_disks; i++)
-		if (conf->mirrors[i].rdev == NULL ||
-		    !test_bit(In_sync, &conf->mirrors[i].rdev->flags) ||
-		    test_bit(Faulty, &conf->mirrors[i].rdev->flags))
-			mddev->degraded++;
-
-	if (conf->raid_disks - mddev->degraded == 1)
-		mddev->recovery_cp = MaxSector;
-
-	if (mddev->recovery_cp != MaxSector)
-		printk(KERN_NOTICE "md/raid1:%s: not clean"
-		       " -- starting background reconstruction\n",
-		       mdname(mddev));
-	printk(KERN_INFO 
-		"md/raid1:%s: active with %d out of %d mirrors\n",
-		mdname(mddev), mddev->raid_disks - mddev->degraded, 
-		mddev->raid_disks);
-
-	/*
-	 * Ok, everything is just fine now
-	 */
-	mddev->thread = conf->thread;
-	conf->thread = NULL;
-	mddev->private = conf;
-
-	md_set_array_sectors(mddev, raid1_size(mddev, 0, 0));
-
-	if (mddev->queue) {
-		mddev->queue->backing_dev_info.congested_fn = raid1_congested;
-		mddev->queue->backing_dev_info.congested_data = mddev;
-		blk_queue_merge_bvec(mddev->queue, raid1_mergeable_bvec);
-	}
-
-	ret =  md_integrity_register(mddev);
-	if (ret)
-		stop(mddev);
-	return ret;
-}
-
-static int stop(struct mddev *mddev)
-{
-	struct r1conf *conf = mddev->private;
-	struct bitmap *bitmap = mddev->bitmap;
-
-	/* wait for behind writes to complete */
-	if (bitmap && atomic_read(&bitmap->behind_writes) > 0) {
-		printk(KERN_INFO "md/raid1:%s: behind writes in progress - waiting to stop.\n",
-		       mdname(mddev));
-		/* need to kick something here to make sure I/O goes? */
-		wait_event(bitmap->behind_wait,
-			   atomic_read(&bitmap->behind_writes) == 0);
-	}
-
-	raise_barrier(conf);
-	lower_barrier(conf);
-
-	md_unregister_thread(&mddev->thread);
-	if (conf->r1bio_pool)
-		mempool_destroy(conf->r1bio_pool);
-	kfree(conf->mirrors);
-	kfree(conf->poolinfo);
-	kfree(conf);
-	mddev->private = NULL;
-	return 0;
-}
-
-static int raid1_resize(struct mddev *mddev, sector_t sectors)
-{
-	/* no resync is happening, and there is enough space
-	 * on all devices, so we can resize.
-	 * We need to make sure resync covers any new space.
-	 * If the array is shrinking we should possibly wait until
-	 * any io in the removed space completes, but it hardly seems
-	 * worth it.
-	 */
-	md_set_array_sectors(mddev, raid1_size(mddev, sectors, 0));
-	if (mddev->array_sectors > raid1_size(mddev, sectors, 0))
-		return -EINVAL;
-	set_capacity(mddev->gendisk, mddev->array_sectors);
-	revalidate_disk(mddev->gendisk);
-	if (sectors > mddev->dev_sectors &&
-	    mddev->recovery_cp > mddev->dev_sectors) {
-		mddev->recovery_cp = mddev->dev_sectors;
-		set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
-	}
-	mddev->dev_sectors = sectors;
-	mddev->resync_max_sectors = sectors;
-	return 0;
-}
-
-static int raid1_reshape(struct mddev *mddev)
-{
-	/* We need to:
-	 * 1/ resize the r1bio_pool
-	 * 2/ resize conf->mirrors
-	 *
-	 * We allocate a new r1bio_pool if we can.
-	 * Then raise a device barrier and wait until all IO stops.
-	 * Then resize conf->mirrors and swap in the new r1bio pool.
-	 *
-	 * At the same time, we "pack" the devices so that all the missing
-	 * devices have the higher raid_disk numbers.
-	 */
-	mempool_t *newpool, *oldpool;
-	struct pool_info *newpoolinfo;
-	struct mirror_info *newmirrors;
-	struct r1conf *conf = mddev->private;
-	int cnt, raid_disks;
-	unsigned long flags;
-	int d, d2, err;
-
-	/* Cannot change chunk_size, layout, or level */
-	if (mddev->chunk_sectors != mddev->new_chunk_sectors ||
-	    mddev->layout != mddev->new_layout ||
-	    mddev->level != mddev->new_level) {
-		mddev->new_chunk_sectors = mddev->chunk_sectors;
-		mddev->new_layout = mddev->layout;
-		mddev->new_level = mddev->level;
-		return -EINVAL;
-	}
-
-	err = md_allow_write(mddev);
-	if (err)
-		return err;
-
-	raid_disks = mddev->raid_disks + mddev->delta_disks;
-
-	if (raid_disks < conf->raid_disks) {
-		cnt=0;
-		for (d= 0; d < conf->raid_disks; d++)
-			if (conf->mirrors[d].rdev)
-				cnt++;
-		if (cnt > raid_disks)
-			return -EBUSY;
-	}
-
-	newpoolinfo = kmalloc(sizeof(*newpoolinfo), GFP_KERNEL);
-	if (!newpoolinfo)
-		return -ENOMEM;
-	newpoolinfo->mddev = mddev;
-	newpoolinfo->raid_disks = raid_disks * 2;
-
-	newpool = mempool_create(NR_RAID1_BIOS, r1bio_pool_alloc,
-				 r1bio_pool_free, newpoolinfo);
-	if (!newpool) {
-		kfree(newpoolinfo);
-		return -ENOMEM;
-	}
-	newmirrors = kzalloc(sizeof(struct mirror_info) * raid_disks * 2,
-			     GFP_KERNEL);
-	if (!newmirrors) {
-		kfree(newpoolinfo);
-		mempool_destroy(newpool);
-		return -ENOMEM;
-	}
-
-	raise_barrier(conf);
-
-	/* ok, everything is stopped */
-	oldpool = conf->r1bio_pool;
-	conf->r1bio_pool = newpool;
-
-	for (d = d2 = 0; d < conf->raid_disks; d++) {
-		struct md_rdev *rdev = conf->mirrors[d].rdev;
-		if (rdev && rdev->raid_disk != d2) {
-			sysfs_unlink_rdev(mddev, rdev);
-			rdev->raid_disk = d2;
-			sysfs_unlink_rdev(mddev, rdev);
-			if (sysfs_link_rdev(mddev, rdev))
-				printk(KERN_WARNING
-				       "md/raid1:%s: cannot register rd%d\n",
-				       mdname(mddev), rdev->raid_disk);
-		}
-		if (rdev)
-			newmirrors[d2++].rdev = rdev;
-	}
-	kfree(conf->mirrors);
-	conf->mirrors = newmirrors;
-	kfree(conf->poolinfo);
-	conf->poolinfo = newpoolinfo;
-
-	spin_lock_irqsave(&conf->device_lock, flags);
-	mddev->degraded += (raid_disks - conf->raid_disks);
-	spin_unlock_irqrestore(&conf->device_lock, flags);
-	conf->raid_disks = mddev->raid_disks = raid_disks;
-	mddev->delta_disks = 0;
-
-	conf->last_used = 0; /* just make sure it is in-range */
-	lower_barrier(conf);
-
-	set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
-	md_wakeup_thread(mddev->thread);
-
-	mempool_destroy(oldpool);
-	return 0;
-}
-
-static void raid1_quiesce(struct mddev *mddev, int state)
-{
-	struct r1conf *conf = mddev->private;
-
-	switch(state) {
-	case 2: /* wake for suspend */
-		wake_up(&conf->wait_barrier);
-		break;
-	case 1:
-		raise_barrier(conf);
-		break;
-	case 0:
-		lower_barrier(conf);
-		break;
-	}
-}
-
-static void *raid1_takeover(struct mddev *mddev)
-{
-	/* raid1 can take over:
-	 *  raid5 with 2 devices, any layout or chunk size
-	 */
-	if (mddev->level == 5 && mddev->raid_disks == 2) {
-		struct r1conf *conf;
-		mddev->new_level = 1;
-		mddev->new_layout = 0;
-		mddev->new_chunk_sectors = 0;
-		conf = setup_conf(mddev);
-		if (!IS_ERR(conf))
-			conf->barrier = 1;
-		return conf;
-	}
-	return ERR_PTR(-EINVAL);
-}
-
-static struct md_personality raid1_personality =
-{
-	.name		= "raid1",
-	.level		= 1,
-	.owner		= THIS_MODULE,
-	.make_request	= make_request,
-	.run		= run,
-	.stop		= stop,
-	.status		= status,
-	.error_handler	= error,
-	.hot_add_disk	= raid1_add_disk,
-	.hot_remove_disk= raid1_remove_disk,
-	.spare_active	= raid1_spare_active,
-	.sync_request	= sync_request,
-	.resize		= raid1_resize,
-	.size		= raid1_size,
-	.check_reshape	= raid1_reshape,
-	.quiesce	= raid1_quiesce,
-	.takeover	= raid1_takeover,
-};
-
-static int __init raid_init(void)
-{
-	return register_md_personality(&raid1_personality);
-}
-
-static void raid_exit(void)
-{
-	unregister_md_personality(&raid1_personality);
-}
-
-module_init(raid_init);
-module_exit(raid_exit);
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("RAID1 (mirroring) personality for MD");
-MODULE_ALIAS("md-personality-3"); /* RAID1 */
-MODULE_ALIAS("md-raid1");
-MODULE_ALIAS("md-level-1");
-
-module_param(max_queued_requests, int, S_IRUGO|S_IWUSR);
diff --git a/ANDROID_3.4.5/drivers/md/raid1.h b/ANDROID_3.4.5/drivers/md/raid1.h
deleted file mode 100644
index 80ded139..00000000
--- a/ANDROID_3.4.5/drivers/md/raid1.h
+++ /dev/null
@@ -1,177 +0,0 @@
-#ifndef _RAID1_H
-#define _RAID1_H
-
-struct mirror_info {
-	struct md_rdev	*rdev;
-	sector_t	head_position;
-};
-
-/*
- * memory pools need a pointer to the mddev, so they can force an unplug
- * when memory is tight, and a count of the number of drives that the
- * pool was allocated for, so they know how much to allocate and free.
- * mddev->raid_disks cannot be used, as it can change while a pool is active
- * These two datums are stored in a kmalloced struct.
- * The 'raid_disks' here is twice the raid_disks in r1conf.
- * This allows space for each 'real' device can have a replacement in the
- * second half of the array.
- */
-
-struct pool_info {
-	struct mddev *mddev;
-	int	raid_disks;
-};
-
-struct r1conf {
-	struct mddev		*mddev;
-	struct mirror_info	*mirrors;	/* twice 'raid_disks' to
-						 * allow for replacements.
-						 */
-	int			raid_disks;
-
-	/* When choose the best device for a read (read_balance())
-	 * we try to keep sequential reads one the same device
-	 * using 'last_used' and 'next_seq_sect'
-	 */
-	int			last_used;
-	sector_t		next_seq_sect;
-	/* During resync, read_balancing is only allowed on the part
-	 * of the array that has been resynced.  'next_resync' tells us
-	 * where that is.
-	 */
-	sector_t		next_resync;
-
-	spinlock_t		device_lock;
-
-	/* list of 'struct r1bio' that need to be processed by raid1d,
-	 * whether to retry a read, writeout a resync or recovery
-	 * block, or anything else.
-	 */
-	struct list_head	retry_list;
-
-	/* queue pending writes to be submitted on unplug */
-	struct bio_list		pending_bio_list;
-	int			pending_count;
-
-	/* for use when syncing mirrors:
-	 * We don't allow both normal IO and resync/recovery IO at
-	 * the same time - resync/recovery can only happen when there
-	 * is no other IO.  So when either is active, the other has to wait.
-	 * See more details description in raid1.c near raise_barrier().
-	 */
-	wait_queue_head_t	wait_barrier;
-	spinlock_t		resync_lock;
-	int			nr_pending;
-	int			nr_waiting;
-	int			nr_queued;
-	int			barrier;
-
-	/* Set to 1 if a full sync is needed, (fresh device added).
-	 * Cleared when a sync completes.
-	 */
-	int			fullsync;
-
-	/* When the same as mddev->recovery_disabled we don't allow
-	 * recovery to be attempted as we expect a read error.
-	 */
-	int			recovery_disabled;
-
-
-	/* poolinfo contains information about the content of the
-	 * mempools - it changes when the array grows or shrinks
-	 */
-	struct pool_info	*poolinfo;
-	mempool_t		*r1bio_pool;
-	mempool_t		*r1buf_pool;
-
-	/* temporary buffer to synchronous IO when attempting to repair
-	 * a read error.
-	 */
-	struct page		*tmppage;
-
-
-	/* When taking over an array from a different personality, we store
-	 * the new thread here until we fully activate the array.
-	 */
-	struct md_thread	*thread;
-};
-
-/*
- * this is our 'private' RAID1 bio.
- *
- * it contains information about what kind of IO operations were started
- * for this RAID1 operation, and about their status:
- */
-
-struct r1bio {
-	atomic_t		remaining; /* 'have we finished' count,
-					    * used from IRQ handlers
-					    */
-	atomic_t		behind_remaining; /* number of write-behind ios remaining
-						 * in this BehindIO request
-						 */
-	sector_t		sector;
-	int			sectors;
-	unsigned long		state;
-	struct mddev		*mddev;
-	/*
-	 * original bio going to /dev/mdx
-	 */
-	struct bio		*master_bio;
-	/*
-	 * if the IO is in READ direction, then this is where we read
-	 */
-	int			read_disk;
-
-	struct list_head	retry_list;
-	/* Next two are only valid when R1BIO_BehindIO is set */
-	struct bio_vec		*behind_bvecs;
-	int			behind_page_count;
-	/*
-	 * if the IO is in WRITE direction, then multiple bios are used.
-	 * We choose the number when they are allocated.
-	 */
-	struct bio		*bios[0];
-	/* DO NOT PUT ANY NEW FIELDS HERE - bios array is contiguously alloced*/
-};
-
-/* when we get a read error on a read-only array, we redirect to another
- * device without failing the first device, or trying to over-write to
- * correct the read error.  To keep track of bad blocks on a per-bio
- * level, we store IO_BLOCKED in the appropriate 'bios' pointer
- */
-#define IO_BLOCKED ((struct bio *)1)
-/* When we successfully write to a known bad-block, we need to remove the
- * bad-block marking which must be done from process context.  So we record
- * the success by setting bios[n] to IO_MADE_GOOD
- */
-#define IO_MADE_GOOD ((struct bio *)2)
-
-#define BIO_SPECIAL(bio) ((unsigned long)bio <= 2)
-
-/* bits for r1bio.state */
-#define	R1BIO_Uptodate	0
-#define	R1BIO_IsSync	1
-#define	R1BIO_Degraded	2
-#define	R1BIO_BehindIO	3
-/* Set ReadError on bios that experience a readerror so that
- * raid1d knows what to do with them.
- */
-#define R1BIO_ReadError 4
-/* For write-behind requests, we call bi_end_io when
- * the last non-write-behind device completes, providing
- * any write was successful.  Otherwise we call when
- * any write-behind write succeeds, otherwise we call
- * with failure when last write completes (and all failed).
- * Record that bi_end_io was called with this flag...
- */
-#define	R1BIO_Returned 6
-/* If a write for this request means we can clear some
- * known-bad-block records, we set this flag
- */
-#define	R1BIO_MadeGood 7
-#define	R1BIO_WriteError 8
-
-extern int md_raid1_congested(struct mddev *mddev, int bits);
-
-#endif
diff --git a/ANDROID_3.4.5/drivers/md/raid10.c b/ANDROID_3.4.5/drivers/md/raid10.c
deleted file mode 100644
index a954c95d..00000000
--- a/ANDROID_3.4.5/drivers/md/raid10.c
+++ /dev/null
@@ -1,3584 +0,0 @@
-/*
- * raid10.c : Multiple Devices driver for Linux
- *
- * Copyright (C) 2000-2004 Neil Brown
- *
- * RAID-10 support for md.
- *
- * Base on code in raid1.c.  See raid1.c for further copyright information.
- *
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2, or (at your option)
- * any later version.
- *
- * You should have received a copy of the GNU General Public License
- * (for example /usr/src/linux/COPYING); if not, write to the Free
- * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#include <linux/slab.h>
-#include <linux/delay.h>
-#include <linux/blkdev.h>
-#include <linux/module.h>
-#include <linux/seq_file.h>
-#include <linux/ratelimit.h>
-#include "md.h"
-#include "raid10.h"
-#include "raid0.h"
-#include "bitmap.h"
-
-/*
- * RAID10 provides a combination of RAID0 and RAID1 functionality.
- * The layout of data is defined by
- *    chunk_size
- *    raid_disks
- *    near_copies (stored in low byte of layout)
- *    far_copies (stored in second byte of layout)
- *    far_offset (stored in bit 16 of layout )
- *
- * The data to be stored is divided into chunks using chunksize.
- * Each device is divided into far_copies sections.
- * In each section, chunks are laid out in a style similar to raid0, but
- * near_copies copies of each chunk is stored (each on a different drive).
- * The starting device for each section is offset near_copies from the starting
- * device of the previous section.
- * Thus they are (near_copies*far_copies) of each chunk, and each is on a different
- * drive.
- * near_copies and far_copies must be at least one, and their product is at most
- * raid_disks.
- *
- * If far_offset is true, then the far_copies are handled a bit differently.
- * The copies are still in different stripes, but instead of be very far apart
- * on disk, there are adjacent stripes.
- */
-
-/*
- * Number of guaranteed r10bios in case of extreme VM load:
- */
-#define	NR_RAID10_BIOS 256
-
-/* When there are this many requests queue to be written by
- * the raid10 thread, we become 'congested' to provide back-pressure
- * for writeback.
- */
-static int max_queued_requests = 1024;
-
-static void allow_barrier(struct r10conf *conf);
-static void lower_barrier(struct r10conf *conf);
-static int enough(struct r10conf *conf, int ignore);
-
-static void * r10bio_pool_alloc(gfp_t gfp_flags, void *data)
-{
-	struct r10conf *conf = data;
-	int size = offsetof(struct r10bio, devs[conf->copies]);
-
-	/* allocate a r10bio with room for raid_disks entries in the
-	 * bios array */
-	return kzalloc(size, gfp_flags);
-}
-
-static void r10bio_pool_free(void *r10_bio, void *data)
-{
-	kfree(r10_bio);
-}
-
-/* Maximum size of each resync request */
-#define RESYNC_BLOCK_SIZE (64*1024)
-#define RESYNC_PAGES ((RESYNC_BLOCK_SIZE + PAGE_SIZE-1) / PAGE_SIZE)
-/* amount of memory to reserve for resync requests */
-#define RESYNC_WINDOW (1024*1024)
-/* maximum number of concurrent requests, memory permitting */
-#define RESYNC_DEPTH (32*1024*1024/RESYNC_BLOCK_SIZE)
-
-/*
- * When performing a resync, we need to read and compare, so
- * we need as many pages are there are copies.
- * When performing a recovery, we need 2 bios, one for read,
- * one for write (we recover only one drive per r10buf)
- *
- */
-static void * r10buf_pool_alloc(gfp_t gfp_flags, void *data)
-{
-	struct r10conf *conf = data;
-	struct page *page;
-	struct r10bio *r10_bio;
-	struct bio *bio;
-	int i, j;
-	int nalloc;
-
-	r10_bio = r10bio_pool_alloc(gfp_flags, conf);
-	if (!r10_bio)
-		return NULL;
-
-	if (test_bit(MD_RECOVERY_SYNC, &conf->mddev->recovery))
-		nalloc = conf->copies; /* resync */
-	else
-		nalloc = 2; /* recovery */
-
-	/*
-	 * Allocate bios.
-	 */
-	for (j = nalloc ; j-- ; ) {
-		bio = bio_kmalloc(gfp_flags, RESYNC_PAGES);
-		if (!bio)
-			goto out_free_bio;
-		r10_bio->devs[j].bio = bio;
-		if (!conf->have_replacement)
-			continue;
-		bio = bio_kmalloc(gfp_flags, RESYNC_PAGES);
-		if (!bio)
-			goto out_free_bio;
-		r10_bio->devs[j].repl_bio = bio;
-	}
-	/*
-	 * Allocate RESYNC_PAGES data pages and attach them
-	 * where needed.
-	 */
-	for (j = 0 ; j < nalloc; j++) {
-		struct bio *rbio = r10_bio->devs[j].repl_bio;
-		bio = r10_bio->devs[j].bio;
-		for (i = 0; i < RESYNC_PAGES; i++) {
-			if (j == 1 && !test_bit(MD_RECOVERY_SYNC,
-						&conf->mddev->recovery)) {
-				/* we can share bv_page's during recovery */
-				struct bio *rbio = r10_bio->devs[0].bio;
-				page = rbio->bi_io_vec[i].bv_page;
-				get_page(page);
-			} else
-				page = alloc_page(gfp_flags);
-			if (unlikely(!page))
-				goto out_free_pages;
-
-			bio->bi_io_vec[i].bv_page = page;
-			if (rbio)
-				rbio->bi_io_vec[i].bv_page = page;
-		}
-	}
-
-	return r10_bio;
-
-out_free_pages:
-	for ( ; i > 0 ; i--)
-		safe_put_page(bio->bi_io_vec[i-1].bv_page);
-	while (j--)
-		for (i = 0; i < RESYNC_PAGES ; i++)
-			safe_put_page(r10_bio->devs[j].bio->bi_io_vec[i].bv_page);
-	j = -1;
-out_free_bio:
-	while (++j < nalloc) {
-		bio_put(r10_bio->devs[j].bio);
-		if (r10_bio->devs[j].repl_bio)
-			bio_put(r10_bio->devs[j].repl_bio);
-	}
-	r10bio_pool_free(r10_bio, conf);
-	return NULL;
-}
-
-static void r10buf_pool_free(void *__r10_bio, void *data)
-{
-	int i;
-	struct r10conf *conf = data;
-	struct r10bio *r10bio = __r10_bio;
-	int j;
-
-	for (j=0; j < conf->copies; j++) {
-		struct bio *bio = r10bio->devs[j].bio;
-		if (bio) {
-			for (i = 0; i < RESYNC_PAGES; i++) {
-				safe_put_page(bio->bi_io_vec[i].bv_page);
-				bio->bi_io_vec[i].bv_page = NULL;
-			}
-			bio_put(bio);
-		}
-		bio = r10bio->devs[j].repl_bio;
-		if (bio)
-			bio_put(bio);
-	}
-	r10bio_pool_free(r10bio, conf);
-}
-
-static void put_all_bios(struct r10conf *conf, struct r10bio *r10_bio)
-{
-	int i;
-
-	for (i = 0; i < conf->copies; i++) {
-		struct bio **bio = & r10_bio->devs[i].bio;
-		if (!BIO_SPECIAL(*bio))
-			bio_put(*bio);
-		*bio = NULL;
-		bio = &r10_bio->devs[i].repl_bio;
-		if (r10_bio->read_slot < 0 && !BIO_SPECIAL(*bio))
-			bio_put(*bio);
-		*bio = NULL;
-	}
-}
-
-static void free_r10bio(struct r10bio *r10_bio)
-{
-	struct r10conf *conf = r10_bio->mddev->private;
-
-	put_all_bios(conf, r10_bio);
-	mempool_free(r10_bio, conf->r10bio_pool);
-}
-
-static void put_buf(struct r10bio *r10_bio)
-{
-	struct r10conf *conf = r10_bio->mddev->private;
-
-	mempool_free(r10_bio, conf->r10buf_pool);
-
-	lower_barrier(conf);
-}
-
-static void reschedule_retry(struct r10bio *r10_bio)
-{
-	unsigned long flags;
-	struct mddev *mddev = r10_bio->mddev;
-	struct r10conf *conf = mddev->private;
-
-	spin_lock_irqsave(&conf->device_lock, flags);
-	list_add(&r10_bio->retry_list, &conf->retry_list);
-	conf->nr_queued ++;
-	spin_unlock_irqrestore(&conf->device_lock, flags);
-
-	/* wake up frozen array... */
-	wake_up(&conf->wait_barrier);
-
-	md_wakeup_thread(mddev->thread);
-}
-
-/*
- * raid_end_bio_io() is called when we have finished servicing a mirrored
- * operation and are ready to return a success/failure code to the buffer
- * cache layer.
- */
-static void raid_end_bio_io(struct r10bio *r10_bio)
-{
-	struct bio *bio = r10_bio->master_bio;
-	int done;
-	struct r10conf *conf = r10_bio->mddev->private;
-
-	if (bio->bi_phys_segments) {
-		unsigned long flags;
-		spin_lock_irqsave(&conf->device_lock, flags);
-		bio->bi_phys_segments--;
-		done = (bio->bi_phys_segments == 0);
-		spin_unlock_irqrestore(&conf->device_lock, flags);
-	} else
-		done = 1;
-	if (!test_bit(R10BIO_Uptodate, &r10_bio->state))
-		clear_bit(BIO_UPTODATE, &bio->bi_flags);
-	if (done) {
-		bio_endio(bio, 0);
-		/*
-		 * Wake up any possible resync thread that waits for the device
-		 * to go idle.
-		 */
-		allow_barrier(conf);
-	}
-	free_r10bio(r10_bio);
-}
-
-/*
- * Update disk head position estimator based on IRQ completion info.
- */
-static inline void update_head_pos(int slot, struct r10bio *r10_bio)
-{
-	struct r10conf *conf = r10_bio->mddev->private;
-
-	conf->mirrors[r10_bio->devs[slot].devnum].head_position =
-		r10_bio->devs[slot].addr + (r10_bio->sectors);
-}
-
-/*
- * Find the disk number which triggered given bio
- */
-static int find_bio_disk(struct r10conf *conf, struct r10bio *r10_bio,
-			 struct bio *bio, int *slotp, int *replp)
-{
-	int slot;
-	int repl = 0;
-
-	for (slot = 0; slot < conf->copies; slot++) {
-		if (r10_bio->devs[slot].bio == bio)
-			break;
-		if (r10_bio->devs[slot].repl_bio == bio) {
-			repl = 1;
-			break;
-		}
-	}
-
-	BUG_ON(slot == conf->copies);
-	update_head_pos(slot, r10_bio);
-
-	if (slotp)
-		*slotp = slot;
-	if (replp)
-		*replp = repl;
-	return r10_bio->devs[slot].devnum;
-}
-
-static void raid10_end_read_request(struct bio *bio, int error)
-{
-	int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
-	struct r10bio *r10_bio = bio->bi_private;
-	int slot, dev;
-	struct md_rdev *rdev;
-	struct r10conf *conf = r10_bio->mddev->private;
-
-
-	slot = r10_bio->read_slot;
-	dev = r10_bio->devs[slot].devnum;
-	rdev = r10_bio->devs[slot].rdev;
-	/*
-	 * this branch is our 'one mirror IO has finished' event handler:
-	 */
-	update_head_pos(slot, r10_bio);
-
-	if (uptodate) {
-		/*
-		 * Set R10BIO_Uptodate in our master bio, so that
-		 * we will return a good error code to the higher
-		 * levels even if IO on some other mirrored buffer fails.
-		 *
-		 * The 'master' represents the composite IO operation to
-		 * user-side. So if something waits for IO, then it will
-		 * wait for the 'master' bio.
-		 */
-		set_bit(R10BIO_Uptodate, &r10_bio->state);
-	} else {
-		/* If all other devices that store this block have
-		 * failed, we want to return the error upwards rather
-		 * than fail the last device.  Here we redefine
-		 * "uptodate" to mean "Don't want to retry"
-		 */
-		unsigned long flags;
-		spin_lock_irqsave(&conf->device_lock, flags);
-		if (!enough(conf, rdev->raid_disk))
-			uptodate = 1;
-		spin_unlock_irqrestore(&conf->device_lock, flags);
-	}
-	if (uptodate) {
-		raid_end_bio_io(r10_bio);
-		rdev_dec_pending(rdev, conf->mddev);
-	} else {
-		/*
-		 * oops, read error - keep the refcount on the rdev
-		 */
-		char b[BDEVNAME_SIZE];
-		printk_ratelimited(KERN_ERR
-				   "md/raid10:%s: %s: rescheduling sector %llu\n",
-				   mdname(conf->mddev),
-				   bdevname(rdev->bdev, b),
-				   (unsigned long long)r10_bio->sector);
-		set_bit(R10BIO_ReadError, &r10_bio->state);
-		reschedule_retry(r10_bio);
-	}
-}
-
-static void close_write(struct r10bio *r10_bio)
-{
-	/* clear the bitmap if all writes complete successfully */
-	bitmap_endwrite(r10_bio->mddev->bitmap, r10_bio->sector,
-			r10_bio->sectors,
-			!test_bit(R10BIO_Degraded, &r10_bio->state),
-			0);
-	md_write_end(r10_bio->mddev);
-}
-
-static void one_write_done(struct r10bio *r10_bio)
-{
-	if (atomic_dec_and_test(&r10_bio->remaining)) {
-		if (test_bit(R10BIO_WriteError, &r10_bio->state))
-			reschedule_retry(r10_bio);
-		else {
-			close_write(r10_bio);
-			if (test_bit(R10BIO_MadeGood, &r10_bio->state))
-				reschedule_retry(r10_bio);
-			else
-				raid_end_bio_io(r10_bio);
-		}
-	}
-}
-
-static void raid10_end_write_request(struct bio *bio, int error)
-{
-	int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
-	struct r10bio *r10_bio = bio->bi_private;
-	int dev;
-	int dec_rdev = 1;
-	struct r10conf *conf = r10_bio->mddev->private;
-	int slot, repl;
-	struct md_rdev *rdev = NULL;
-
-	dev = find_bio_disk(conf, r10_bio, bio, &slot, &repl);
-
-	if (repl)
-		rdev = conf->mirrors[dev].replacement;
-	if (!rdev) {
-		smp_rmb();
-		repl = 0;
-		rdev = conf->mirrors[dev].rdev;
-	}
-	/*
-	 * this branch is our 'one mirror IO has finished' event handler:
-	 */
-	if (!uptodate) {
-		if (repl)
-			/* Never record new bad blocks to replacement,
-			 * just fail it.
-			 */
-			md_error(rdev->mddev, rdev);
-		else {
-			set_bit(WriteErrorSeen,	&rdev->flags);
-			if (!test_and_set_bit(WantReplacement, &rdev->flags))
-				set_bit(MD_RECOVERY_NEEDED,
-					&rdev->mddev->recovery);
-			set_bit(R10BIO_WriteError, &r10_bio->state);
-			dec_rdev = 0;
-		}
-	} else {
-		/*
-		 * Set R10BIO_Uptodate in our master bio, so that
-		 * we will return a good error code for to the higher
-		 * levels even if IO on some other mirrored buffer fails.
-		 *
-		 * The 'master' represents the composite IO operation to
-		 * user-side. So if something waits for IO, then it will
-		 * wait for the 'master' bio.
-		 */
-		sector_t first_bad;
-		int bad_sectors;
-
-		set_bit(R10BIO_Uptodate, &r10_bio->state);
-
-		/* Maybe we can clear some bad blocks. */
-		if (is_badblock(rdev,
-				r10_bio->devs[slot].addr,
-				r10_bio->sectors,
-				&first_bad, &bad_sectors)) {
-			bio_put(bio);
-			if (repl)
-				r10_bio->devs[slot].repl_bio = IO_MADE_GOOD;
-			else
-				r10_bio->devs[slot].bio = IO_MADE_GOOD;
-			dec_rdev = 0;
-			set_bit(R10BIO_MadeGood, &r10_bio->state);
-		}
-	}
-
-	/*
-	 *
-	 * Let's see if all mirrored write operations have finished
-	 * already.
-	 */
-	one_write_done(r10_bio);
-	if (dec_rdev)
-		rdev_dec_pending(conf->mirrors[dev].rdev, conf->mddev);
-}
-
-/*
- * RAID10 layout manager
- * As well as the chunksize and raid_disks count, there are two
- * parameters: near_copies and far_copies.
- * near_copies * far_copies must be <= raid_disks.
- * Normally one of these will be 1.
- * If both are 1, we get raid0.
- * If near_copies == raid_disks, we get raid1.
- *
- * Chunks are laid out in raid0 style with near_copies copies of the
- * first chunk, followed by near_copies copies of the next chunk and
- * so on.
- * If far_copies > 1, then after 1/far_copies of the array has been assigned
- * as described above, we start again with a device offset of near_copies.
- * So we effectively have another copy of the whole array further down all
- * the drives, but with blocks on different drives.
- * With this layout, and block is never stored twice on the one device.
- *
- * raid10_find_phys finds the sector offset of a given virtual sector
- * on each device that it is on.
- *
- * raid10_find_virt does the reverse mapping, from a device and a
- * sector offset to a virtual address
- */
-
-static void raid10_find_phys(struct r10conf *conf, struct r10bio *r10bio)
-{
-	int n,f;
-	sector_t sector;
-	sector_t chunk;
-	sector_t stripe;
-	int dev;
-
-	int slot = 0;
-
-	/* now calculate first sector/dev */
-	chunk = r10bio->sector >> conf->chunk_shift;
-	sector = r10bio->sector & conf->chunk_mask;
-
-	chunk *= conf->near_copies;
-	stripe = chunk;
-	dev = sector_div(stripe, conf->raid_disks);
-	if (conf->far_offset)
-		stripe *= conf->far_copies;
-
-	sector += stripe << conf->chunk_shift;
-
-	/* and calculate all the others */
-	for (n=0; n < conf->near_copies; n++) {
-		int d = dev;
-		sector_t s = sector;
-		r10bio->devs[slot].addr = sector;
-		r10bio->devs[slot].devnum = d;
-		slot++;
-
-		for (f = 1; f < conf->far_copies; f++) {
-			d += conf->near_copies;
-			if (d >= conf->raid_disks)
-				d -= conf->raid_disks;
-			s += conf->stride;
-			r10bio->devs[slot].devnum = d;
-			r10bio->devs[slot].addr = s;
-			slot++;
-		}
-		dev++;
-		if (dev >= conf->raid_disks) {
-			dev = 0;
-			sector += (conf->chunk_mask + 1);
-		}
-	}
-	BUG_ON(slot != conf->copies);
-}
-
-static sector_t raid10_find_virt(struct r10conf *conf, sector_t sector, int dev)
-{
-	sector_t offset, chunk, vchunk;
-
-	offset = sector & conf->chunk_mask;
-	if (conf->far_offset) {
-		int fc;
-		chunk = sector >> conf->chunk_shift;
-		fc = sector_div(chunk, conf->far_copies);
-		dev -= fc * conf->near_copies;
-		if (dev < 0)
-			dev += conf->raid_disks;
-	} else {
-		while (sector >= conf->stride) {
-			sector -= conf->stride;
-			if (dev < conf->near_copies)
-				dev += conf->raid_disks - conf->near_copies;
-			else
-				dev -= conf->near_copies;
-		}
-		chunk = sector >> conf->chunk_shift;
-	}
-	vchunk = chunk * conf->raid_disks + dev;
-	sector_div(vchunk, conf->near_copies);
-	return (vchunk << conf->chunk_shift) + offset;
-}
-
-/**
- *	raid10_mergeable_bvec -- tell bio layer if a two requests can be merged
- *	@q: request queue
- *	@bvm: properties of new bio
- *	@biovec: the request that could be merged to it.
- *
- *	Return amount of bytes we can accept at this offset
- *	This requires checking for end-of-chunk if near_copies != raid_disks,
- *	and for subordinate merge_bvec_fns if merge_check_needed.
- */
-static int raid10_mergeable_bvec(struct request_queue *q,
-				 struct bvec_merge_data *bvm,
-				 struct bio_vec *biovec)
-{
-	struct mddev *mddev = q->queuedata;
-	struct r10conf *conf = mddev->private;
-	sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev);
-	int max;
-	unsigned int chunk_sectors = mddev->chunk_sectors;
-	unsigned int bio_sectors = bvm->bi_size >> 9;
-
-	if (conf->near_copies < conf->raid_disks) {
-		max = (chunk_sectors - ((sector & (chunk_sectors - 1))
-					+ bio_sectors)) << 9;
-		if (max < 0)
-			/* bio_add cannot handle a negative return */
-			max = 0;
-		if (max <= biovec->bv_len && bio_sectors == 0)
-			return biovec->bv_len;
-	} else
-		max = biovec->bv_len;
-
-	if (mddev->merge_check_needed) {
-		struct r10bio r10_bio;
-		int s;
-		r10_bio.sector = sector;
-		raid10_find_phys(conf, &r10_bio);
-		rcu_read_lock();
-		for (s = 0; s < conf->copies; s++) {
-			int disk = r10_bio.devs[s].devnum;
-			struct md_rdev *rdev = rcu_dereference(
-				conf->mirrors[disk].rdev);
-			if (rdev && !test_bit(Faulty, &rdev->flags)) {
-				struct request_queue *q =
-					bdev_get_queue(rdev->bdev);
-				if (q->merge_bvec_fn) {
-					bvm->bi_sector = r10_bio.devs[s].addr
-						+ rdev->data_offset;
-					bvm->bi_bdev = rdev->bdev;
-					max = min(max, q->merge_bvec_fn(
-							  q, bvm, biovec));
-				}
-			}
-			rdev = rcu_dereference(conf->mirrors[disk].replacement);
-			if (rdev && !test_bit(Faulty, &rdev->flags)) {
-				struct request_queue *q =
-					bdev_get_queue(rdev->bdev);
-				if (q->merge_bvec_fn) {
-					bvm->bi_sector = r10_bio.devs[s].addr
-						+ rdev->data_offset;
-					bvm->bi_bdev = rdev->bdev;
-					max = min(max, q->merge_bvec_fn(
-							  q, bvm, biovec));
-				}
-			}
-		}
-		rcu_read_unlock();
-	}
-	return max;
-}
-
-/*
- * This routine returns the disk from which the requested read should
- * be done. There is a per-array 'next expected sequential IO' sector
- * number - if this matches on the next IO then we use the last disk.
- * There is also a per-disk 'last know head position' sector that is
- * maintained from IRQ contexts, both the normal and the resync IO
- * completion handlers update this position correctly. If there is no
- * perfect sequential match then we pick the disk whose head is closest.
- *
- * If there are 2 mirrors in the same 2 devices, performance degrades
- * because position is mirror, not device based.
- *
- * The rdev for the device selected will have nr_pending incremented.
- */
-
-/*
- * FIXME: possibly should rethink readbalancing and do it differently
- * depending on near_copies / far_copies geometry.
- */
-static struct md_rdev *read_balance(struct r10conf *conf,
-				    struct r10bio *r10_bio,
-				    int *max_sectors)
-{
-	const sector_t this_sector = r10_bio->sector;
-	int disk, slot;
-	int sectors = r10_bio->sectors;
-	int best_good_sectors;
-	sector_t new_distance, best_dist;
-	struct md_rdev *rdev, *best_rdev;
-	int do_balance;
-	int best_slot;
-
-	raid10_find_phys(conf, r10_bio);
-	rcu_read_lock();
-retry:
-	sectors = r10_bio->sectors;
-	best_slot = -1;
-	best_rdev = NULL;
-	best_dist = MaxSector;
-	best_good_sectors = 0;
-	do_balance = 1;
-	/*
-	 * Check if we can balance. We can balance on the whole
-	 * device if no resync is going on (recovery is ok), or below
-	 * the resync window. We take the first readable disk when
-	 * above the resync window.
-	 */
-	if (conf->mddev->recovery_cp < MaxSector
-	    && (this_sector + sectors >= conf->next_resync))
-		do_balance = 0;
-
-	for (slot = 0; slot < conf->copies ; slot++) {
-		sector_t first_bad;
-		int bad_sectors;
-		sector_t dev_sector;
-
-		if (r10_bio->devs[slot].bio == IO_BLOCKED)
-			continue;
-		disk = r10_bio->devs[slot].devnum;
-		rdev = rcu_dereference(conf->mirrors[disk].replacement);
-		if (rdev == NULL || test_bit(Faulty, &rdev->flags) ||
-		    test_bit(Unmerged, &rdev->flags) ||
-		    r10_bio->devs[slot].addr + sectors > rdev->recovery_offset)
-			rdev = rcu_dereference(conf->mirrors[disk].rdev);
-		if (rdev == NULL ||
-		    test_bit(Faulty, &rdev->flags) ||
-		    test_bit(Unmerged, &rdev->flags))
-			continue;
-		if (!test_bit(In_sync, &rdev->flags) &&
-		    r10_bio->devs[slot].addr + sectors > rdev->recovery_offset)
-			continue;
-
-		dev_sector = r10_bio->devs[slot].addr;
-		if (is_badblock(rdev, dev_sector, sectors,
-				&first_bad, &bad_sectors)) {
-			if (best_dist < MaxSector)
-				/* Already have a better slot */
-				continue;
-			if (first_bad <= dev_sector) {
-				/* Cannot read here.  If this is the
-				 * 'primary' device, then we must not read
-				 * beyond 'bad_sectors' from another device.
-				 */
-				bad_sectors -= (dev_sector - first_bad);
-				if (!do_balance && sectors > bad_sectors)
-					sectors = bad_sectors;
-				if (best_good_sectors > sectors)
-					best_good_sectors = sectors;
-			} else {
-				sector_t good_sectors =
-					first_bad - dev_sector;
-				if (good_sectors > best_good_sectors) {
-					best_good_sectors = good_sectors;
-					best_slot = slot;
-					best_rdev = rdev;
-				}
-				if (!do_balance)
-					/* Must read from here */
-					break;
-			}
-			continue;
-		} else
-			best_good_sectors = sectors;
-
-		if (!do_balance)
-			break;
-
-		/* This optimisation is debatable, and completely destroys
-		 * sequential read speed for 'far copies' arrays.  So only
-		 * keep it for 'near' arrays, and review those later.
-		 */
-		if (conf->near_copies > 1 && !atomic_read(&rdev->nr_pending))
-			break;
-
-		/* for far > 1 always use the lowest address */
-		if (conf->far_copies > 1)
-			new_distance = r10_bio->devs[slot].addr;
-		else
-			new_distance = abs(r10_bio->devs[slot].addr -
-					   conf->mirrors[disk].head_position);
-		if (new_distance < best_dist) {
-			best_dist = new_distance;
-			best_slot = slot;
-			best_rdev = rdev;
-		}
-	}
-	if (slot >= conf->copies) {
-		slot = best_slot;
-		rdev = best_rdev;
-	}
-
-	if (slot >= 0) {
-		atomic_inc(&rdev->nr_pending);
-		if (test_bit(Faulty, &rdev->flags)) {
-			/* Cannot risk returning a device that failed
-			 * before we inc'ed nr_pending
-			 */
-			rdev_dec_pending(rdev, conf->mddev);
-			goto retry;
-		}
-		r10_bio->read_slot = slot;
-	} else
-		rdev = NULL;
-	rcu_read_unlock();
-	*max_sectors = best_good_sectors;
-
-	return rdev;
-}
-
-static int raid10_congested(void *data, int bits)
-{
-	struct mddev *mddev = data;
-	struct r10conf *conf = mddev->private;
-	int i, ret = 0;
-
-	if ((bits & (1 << BDI_async_congested)) &&
-	    conf->pending_count >= max_queued_requests)
-		return 1;
-
-	if (mddev_congested(mddev, bits))
-		return 1;
-	rcu_read_lock();
-	for (i = 0; i < conf->raid_disks && ret == 0; i++) {
-		struct md_rdev *rdev = rcu_dereference(conf->mirrors[i].rdev);
-		if (rdev && !test_bit(Faulty, &rdev->flags)) {
-			struct request_queue *q = bdev_get_queue(rdev->bdev);
-
-			ret |= bdi_congested(&q->backing_dev_info, bits);
-		}
-	}
-	rcu_read_unlock();
-	return ret;
-}
-
-static void flush_pending_writes(struct r10conf *conf)
-{
-	/* Any writes that have been queued but are awaiting
-	 * bitmap updates get flushed here.
-	 */
-	spin_lock_irq(&conf->device_lock);
-
-	if (conf->pending_bio_list.head) {
-		struct bio *bio;
-		bio = bio_list_get(&conf->pending_bio_list);
-		conf->pending_count = 0;
-		spin_unlock_irq(&conf->device_lock);
-		/* flush any pending bitmap writes to disk
-		 * before proceeding w/ I/O */
-		bitmap_unplug(conf->mddev->bitmap);
-		wake_up(&conf->wait_barrier);
-
-		while (bio) { /* submit pending writes */
-			struct bio *next = bio->bi_next;
-			bio->bi_next = NULL;
-			generic_make_request(bio);
-			bio = next;
-		}
-	} else
-		spin_unlock_irq(&conf->device_lock);
-}
-
-/* Barriers....
- * Sometimes we need to suspend IO while we do something else,
- * either some resync/recovery, or reconfigure the array.
- * To do this we raise a 'barrier'.
- * The 'barrier' is a counter that can be raised multiple times
- * to count how many activities are happening which preclude
- * normal IO.
- * We can only raise the barrier if there is no pending IO.
- * i.e. if nr_pending == 0.
- * We choose only to raise the barrier if no-one is waiting for the
- * barrier to go down.  This means that as soon as an IO request
- * is ready, no other operations which require a barrier will start
- * until the IO request has had a chance.
- *
- * So: regular IO calls 'wait_barrier'.  When that returns there
- *    is no backgroup IO happening,  It must arrange to call
- *    allow_barrier when it has finished its IO.
- * backgroup IO calls must call raise_barrier.  Once that returns
- *    there is no normal IO happeing.  It must arrange to call
- *    lower_barrier when the particular background IO completes.
- */
-
-static void raise_barrier(struct r10conf *conf, int force)
-{
-	BUG_ON(force && !conf->barrier);
-	spin_lock_irq(&conf->resync_lock);
-
-	/* Wait until no block IO is waiting (unless 'force') */
-	wait_event_lock_irq(conf->wait_barrier, force || !conf->nr_waiting,
-			    conf->resync_lock, );
-
-	/* block any new IO from starting */
-	conf->barrier++;
-
-	/* Now wait for all pending IO to complete */
-	wait_event_lock_irq(conf->wait_barrier,
-			    !conf->nr_pending && conf->barrier < RESYNC_DEPTH,
-			    conf->resync_lock, );
-
-	spin_unlock_irq(&conf->resync_lock);
-}
-
-static void lower_barrier(struct r10conf *conf)
-{
-	unsigned long flags;
-	spin_lock_irqsave(&conf->resync_lock, flags);
-	conf->barrier--;
-	spin_unlock_irqrestore(&conf->resync_lock, flags);
-	wake_up(&conf->wait_barrier);
-}
-
-static void wait_barrier(struct r10conf *conf)
-{
-	spin_lock_irq(&conf->resync_lock);
-	if (conf->barrier) {
-		conf->nr_waiting++;
-		/* Wait for the barrier to drop.
-		 * However if there are already pending
-		 * requests (preventing the barrier from
-		 * rising completely), and the
-		 * pre-process bio queue isn't empty,
-		 * then don't wait, as we need to empty
-		 * that queue to get the nr_pending
-		 * count down.
-		 */
-		wait_event_lock_irq(conf->wait_barrier,
-				    !conf->barrier ||
-				    (conf->nr_pending &&
-				     current->bio_list &&
-				     !bio_list_empty(current->bio_list)),
-				    conf->resync_lock,
-			);
-		conf->nr_waiting--;
-	}
-	conf->nr_pending++;
-	spin_unlock_irq(&conf->resync_lock);
-}
-
-static void allow_barrier(struct r10conf *conf)
-{
-	unsigned long flags;
-	spin_lock_irqsave(&conf->resync_lock, flags);
-	conf->nr_pending--;
-	spin_unlock_irqrestore(&conf->resync_lock, flags);
-	wake_up(&conf->wait_barrier);
-}
-
-static void freeze_array(struct r10conf *conf)
-{
-	/* stop syncio and normal IO and wait for everything to
-	 * go quiet.
-	 * We increment barrier and nr_waiting, and then
-	 * wait until nr_pending match nr_queued+1
-	 * This is called in the context of one normal IO request
-	 * that has failed. Thus any sync request that might be pending
-	 * will be blocked by nr_pending, and we need to wait for
-	 * pending IO requests to complete or be queued for re-try.
-	 * Thus the number queued (nr_queued) plus this request (1)
-	 * must match the number of pending IOs (nr_pending) before
-	 * we continue.
-	 */
-	spin_lock_irq(&conf->resync_lock);
-	conf->barrier++;
-	conf->nr_waiting++;
-	wait_event_lock_irq(conf->wait_barrier,
-			    conf->nr_pending == conf->nr_queued+1,
-			    conf->resync_lock,
-			    flush_pending_writes(conf));
-
-	spin_unlock_irq(&conf->resync_lock);
-}
-
-static void unfreeze_array(struct r10conf *conf)
-{
-	/* reverse the effect of the freeze */
-	spin_lock_irq(&conf->resync_lock);
-	conf->barrier--;
-	conf->nr_waiting--;
-	wake_up(&conf->wait_barrier);
-	spin_unlock_irq(&conf->resync_lock);
-}
-
-static void make_request(struct mddev *mddev, struct bio * bio)
-{
-	struct r10conf *conf = mddev->private;
-	struct r10bio *r10_bio;
-	struct bio *read_bio;
-	int i;
-	int chunk_sects = conf->chunk_mask + 1;
-	const int rw = bio_data_dir(bio);
-	const unsigned long do_sync = (bio->bi_rw & REQ_SYNC);
-	const unsigned long do_fua = (bio->bi_rw & REQ_FUA);
-	unsigned long flags;
-	struct md_rdev *blocked_rdev;
-	int plugged;
-	int sectors_handled;
-	int max_sectors;
-
-	if (unlikely(bio->bi_rw & REQ_FLUSH)) {
-		md_flush_request(mddev, bio);
-		return;
-	}
-
-	/* If this request crosses a chunk boundary, we need to
-	 * split it.  This will only happen for 1 PAGE (or less) requests.
-	 */
-	if (unlikely( (bio->bi_sector & conf->chunk_mask) + (bio->bi_size >> 9)
-		      > chunk_sects &&
-		    conf->near_copies < conf->raid_disks)) {
-		struct bio_pair *bp;
-		/* Sanity check -- queue functions should prevent this happening */
-		if (bio->bi_vcnt != 1 ||
-		    bio->bi_idx != 0)
-			goto bad_map;
-		/* This is a one page bio that upper layers
-		 * refuse to split for us, so we need to split it.
-		 */
-		bp = bio_split(bio,
-			       chunk_sects - (bio->bi_sector & (chunk_sects - 1)) );
-
-		/* Each of these 'make_request' calls will call 'wait_barrier'.
-		 * If the first succeeds but the second blocks due to the resync
-		 * thread raising the barrier, we will deadlock because the
-		 * IO to the underlying device will be queued in generic_make_request
-		 * and will never complete, so will never reduce nr_pending.
-		 * So increment nr_waiting here so no new raise_barriers will
-		 * succeed, and so the second wait_barrier cannot block.
-		 */
-		spin_lock_irq(&conf->resync_lock);
-		conf->nr_waiting++;
-		spin_unlock_irq(&conf->resync_lock);
-
-		make_request(mddev, &bp->bio1);
-		make_request(mddev, &bp->bio2);
-
-		spin_lock_irq(&conf->resync_lock);
-		conf->nr_waiting--;
-		wake_up(&conf->wait_barrier);
-		spin_unlock_irq(&conf->resync_lock);
-
-		bio_pair_release(bp);
-		return;
-	bad_map:
-		printk("md/raid10:%s: make_request bug: can't convert block across chunks"
-		       " or bigger than %dk %llu %d\n", mdname(mddev), chunk_sects/2,
-		       (unsigned long long)bio->bi_sector, bio->bi_size >> 10);
-
-		bio_io_error(bio);
-		return;
-	}
-
-	md_write_start(mddev, bio);
-
-	/*
-	 * Register the new request and wait if the reconstruction
-	 * thread has put up a bar for new requests.
-	 * Continue immediately if no resync is active currently.
-	 */
-	wait_barrier(conf);
-
-	r10_bio = mempool_alloc(conf->r10bio_pool, GFP_NOIO);
-
-	r10_bio->master_bio = bio;
-	r10_bio->sectors = bio->bi_size >> 9;
-
-	r10_bio->mddev = mddev;
-	r10_bio->sector = bio->bi_sector;
-	r10_bio->state = 0;
-
-	/* We might need to issue multiple reads to different
-	 * devices if there are bad blocks around, so we keep
-	 * track of the number of reads in bio->bi_phys_segments.
-	 * If this is 0, there is only one r10_bio and no locking
-	 * will be needed when the request completes.  If it is
-	 * non-zero, then it is the number of not-completed requests.
-	 */
-	bio->bi_phys_segments = 0;
-	clear_bit(BIO_SEG_VALID, &bio->bi_flags);
-
-	if (rw == READ) {
-		/*
-		 * read balancing logic:
-		 */
-		struct md_rdev *rdev;
-		int slot;
-
-read_again:
-		rdev = read_balance(conf, r10_bio, &max_sectors);
-		if (!rdev) {
-			raid_end_bio_io(r10_bio);
-			return;
-		}
-		slot = r10_bio->read_slot;
-
-		read_bio = bio_clone_mddev(bio, GFP_NOIO, mddev);
-		md_trim_bio(read_bio, r10_bio->sector - bio->bi_sector,
-			    max_sectors);
-
-		r10_bio->devs[slot].bio = read_bio;
-		r10_bio->devs[slot].rdev = rdev;
-
-		read_bio->bi_sector = r10_bio->devs[slot].addr +
-			rdev->data_offset;
-		read_bio->bi_bdev = rdev->bdev;
-		read_bio->bi_end_io = raid10_end_read_request;
-		read_bio->bi_rw = READ | do_sync;
-		read_bio->bi_private = r10_bio;
-
-		if (max_sectors < r10_bio->sectors) {
-			/* Could not read all from this device, so we will
-			 * need another r10_bio.
-			 */
-			sectors_handled = (r10_bio->sectors + max_sectors
-					   - bio->bi_sector);
-			r10_bio->sectors = max_sectors;
-			spin_lock_irq(&conf->device_lock);
-			if (bio->bi_phys_segments == 0)
-				bio->bi_phys_segments = 2;
-			else
-				bio->bi_phys_segments++;
-			spin_unlock(&conf->device_lock);
-			/* Cannot call generic_make_request directly
-			 * as that will be queued in __generic_make_request
-			 * and subsequent mempool_alloc might block
-			 * waiting for it.  so hand bio over to raid10d.
-			 */
-			reschedule_retry(r10_bio);
-
-			r10_bio = mempool_alloc(conf->r10bio_pool, GFP_NOIO);
-
-			r10_bio->master_bio = bio;
-			r10_bio->sectors = ((bio->bi_size >> 9)
-					    - sectors_handled);
-			r10_bio->state = 0;
-			r10_bio->mddev = mddev;
-			r10_bio->sector = bio->bi_sector + sectors_handled;
-			goto read_again;
-		} else
-			generic_make_request(read_bio);
-		return;
-	}
-
-	/*
-	 * WRITE:
-	 */
-	if (conf->pending_count >= max_queued_requests) {
-		md_wakeup_thread(mddev->thread);
-		wait_event(conf->wait_barrier,
-			   conf->pending_count < max_queued_requests);
-	}
-	/* first select target devices under rcu_lock and
-	 * inc refcount on their rdev.  Record them by setting
-	 * bios[x] to bio
-	 * If there are known/acknowledged bad blocks on any device
-	 * on which we have seen a write error, we want to avoid
-	 * writing to those blocks.  This potentially requires several
-	 * writes to write around the bad blocks.  Each set of writes
-	 * gets its own r10_bio with a set of bios attached.  The number
-	 * of r10_bios is recored in bio->bi_phys_segments just as with
-	 * the read case.
-	 */
-	plugged = mddev_check_plugged(mddev);
-
-	r10_bio->read_slot = -1; /* make sure repl_bio gets freed */
-	raid10_find_phys(conf, r10_bio);
-retry_write:
-	blocked_rdev = NULL;
-	rcu_read_lock();
-	max_sectors = r10_bio->sectors;
-
-	for (i = 0;  i < conf->copies; i++) {
-		int d = r10_bio->devs[i].devnum;
-		struct md_rdev *rdev = rcu_dereference(conf->mirrors[d].rdev);
-		struct md_rdev *rrdev = rcu_dereference(
-			conf->mirrors[d].replacement);
-		if (rdev == rrdev)
-			rrdev = NULL;
-		if (rdev && unlikely(test_bit(Blocked, &rdev->flags))) {
-			atomic_inc(&rdev->nr_pending);
-			blocked_rdev = rdev;
-			break;
-		}
-		if (rrdev && unlikely(test_bit(Blocked, &rrdev->flags))) {
-			atomic_inc(&rrdev->nr_pending);
-			blocked_rdev = rrdev;
-			break;
-		}
-		if (rrdev && (test_bit(Faulty, &rrdev->flags)
-			      || test_bit(Unmerged, &rrdev->flags)))
-			rrdev = NULL;
-
-		r10_bio->devs[i].bio = NULL;
-		r10_bio->devs[i].repl_bio = NULL;
-		if (!rdev || test_bit(Faulty, &rdev->flags) ||
-		    test_bit(Unmerged, &rdev->flags)) {
-			set_bit(R10BIO_Degraded, &r10_bio->state);
-			continue;
-		}
-		if (test_bit(WriteErrorSeen, &rdev->flags)) {
-			sector_t first_bad;
-			sector_t dev_sector = r10_bio->devs[i].addr;
-			int bad_sectors;
-			int is_bad;
-
-			is_bad = is_badblock(rdev, dev_sector,
-					     max_sectors,
-					     &first_bad, &bad_sectors);
-			if (is_bad < 0) {
-				/* Mustn't write here until the bad block
-				 * is acknowledged
-				 */
-				atomic_inc(&rdev->nr_pending);
-				set_bit(BlockedBadBlocks, &rdev->flags);
-				blocked_rdev = rdev;
-				break;
-			}
-			if (is_bad && first_bad <= dev_sector) {
-				/* Cannot write here at all */
-				bad_sectors -= (dev_sector - first_bad);
-				if (bad_sectors < max_sectors)
-					/* Mustn't write more than bad_sectors
-					 * to other devices yet
-					 */
-					max_sectors = bad_sectors;
-				/* We don't set R10BIO_Degraded as that
-				 * only applies if the disk is missing,
-				 * so it might be re-added, and we want to
-				 * know to recover this chunk.
-				 * In this case the device is here, and the
-				 * fact that this chunk is not in-sync is
-				 * recorded in the bad block log.
-				 */
-				continue;
-			}
-			if (is_bad) {
-				int good_sectors = first_bad - dev_sector;
-				if (good_sectors < max_sectors)
-					max_sectors = good_sectors;
-			}
-		}
-		r10_bio->devs[i].bio = bio;
-		atomic_inc(&rdev->nr_pending);
-		if (rrdev) {
-			r10_bio->devs[i].repl_bio = bio;
-			atomic_inc(&rrdev->nr_pending);
-		}
-	}
-	rcu_read_unlock();
-
-	if (unlikely(blocked_rdev)) {
-		/* Have to wait for this device to get unblocked, then retry */
-		int j;
-		int d;
-
-		for (j = 0; j < i; j++) {
-			if (r10_bio->devs[j].bio) {
-				d = r10_bio->devs[j].devnum;
-				rdev_dec_pending(conf->mirrors[d].rdev, mddev);
-			}
-			if (r10_bio->devs[j].repl_bio) {
-				struct md_rdev *rdev;
-				d = r10_bio->devs[j].devnum;
-				rdev = conf->mirrors[d].replacement;
-				if (!rdev) {
-					/* Race with remove_disk */
-					smp_mb();
-					rdev = conf->mirrors[d].rdev;
-				}
-				rdev_dec_pending(rdev, mddev);
-			}
-		}
-		allow_barrier(conf);
-		md_wait_for_blocked_rdev(blocked_rdev, mddev);
-		wait_barrier(conf);
-		goto retry_write;
-	}
-
-	if (max_sectors < r10_bio->sectors) {
-		/* We are splitting this into multiple parts, so
-		 * we need to prepare for allocating another r10_bio.
-		 */
-		r10_bio->sectors = max_sectors;
-		spin_lock_irq(&conf->device_lock);
-		if (bio->bi_phys_segments == 0)
-			bio->bi_phys_segments = 2;
-		else
-			bio->bi_phys_segments++;
-		spin_unlock_irq(&conf->device_lock);
-	}
-	sectors_handled = r10_bio->sector + max_sectors - bio->bi_sector;
-
-	atomic_set(&r10_bio->remaining, 1);
-	bitmap_startwrite(mddev->bitmap, r10_bio->sector, r10_bio->sectors, 0);
-
-	for (i = 0; i < conf->copies; i++) {
-		struct bio *mbio;
-		int d = r10_bio->devs[i].devnum;
-		if (!r10_bio->devs[i].bio)
-			continue;
-
-		mbio = bio_clone_mddev(bio, GFP_NOIO, mddev);
-		md_trim_bio(mbio, r10_bio->sector - bio->bi_sector,
-			    max_sectors);
-		r10_bio->devs[i].bio = mbio;
-
-		mbio->bi_sector	= (r10_bio->devs[i].addr+
-				   conf->mirrors[d].rdev->data_offset);
-		mbio->bi_bdev = conf->mirrors[d].rdev->bdev;
-		mbio->bi_end_io	= raid10_end_write_request;
-		mbio->bi_rw = WRITE | do_sync | do_fua;
-		mbio->bi_private = r10_bio;
-
-		atomic_inc(&r10_bio->remaining);
-		spin_lock_irqsave(&conf->device_lock, flags);
-		bio_list_add(&conf->pending_bio_list, mbio);
-		conf->pending_count++;
-		spin_unlock_irqrestore(&conf->device_lock, flags);
-
-		if (!r10_bio->devs[i].repl_bio)
-			continue;
-
-		mbio = bio_clone_mddev(bio, GFP_NOIO, mddev);
-		md_trim_bio(mbio, r10_bio->sector - bio->bi_sector,
-			    max_sectors);
-		r10_bio->devs[i].repl_bio = mbio;
-
-		/* We are actively writing to the original device
-		 * so it cannot disappear, so the replacement cannot
-		 * become NULL here
-		 */
-		mbio->bi_sector	= (r10_bio->devs[i].addr+
-				   conf->mirrors[d].replacement->data_offset);
-		mbio->bi_bdev = conf->mirrors[d].replacement->bdev;
-		mbio->bi_end_io	= raid10_end_write_request;
-		mbio->bi_rw = WRITE | do_sync | do_fua;
-		mbio->bi_private = r10_bio;
-
-		atomic_inc(&r10_bio->remaining);
-		spin_lock_irqsave(&conf->device_lock, flags);
-		bio_list_add(&conf->pending_bio_list, mbio);
-		conf->pending_count++;
-		spin_unlock_irqrestore(&conf->device_lock, flags);
-	}
-
-	/* Don't remove the bias on 'remaining' (one_write_done) until
-	 * after checking if we need to go around again.
-	 */
-
-	if (sectors_handled < (bio->bi_size >> 9)) {
-		one_write_done(r10_bio);
-		/* We need another r10_bio.  It has already been counted
-		 * in bio->bi_phys_segments.
-		 */
-		r10_bio = mempool_alloc(conf->r10bio_pool, GFP_NOIO);
-
-		r10_bio->master_bio = bio;
-		r10_bio->sectors = (bio->bi_size >> 9) - sectors_handled;
-
-		r10_bio->mddev = mddev;
-		r10_bio->sector = bio->bi_sector + sectors_handled;
-		r10_bio->state = 0;
-		goto retry_write;
-	}
-	one_write_done(r10_bio);
-
-	/* In case raid10d snuck in to freeze_array */
-	wake_up(&conf->wait_barrier);
-
-	if (do_sync || !mddev->bitmap || !plugged)
-		md_wakeup_thread(mddev->thread);
-}
-
-static void status(struct seq_file *seq, struct mddev *mddev)
-{
-	struct r10conf *conf = mddev->private;
-	int i;
-
-	if (conf->near_copies < conf->raid_disks)
-		seq_printf(seq, " %dK chunks", mddev->chunk_sectors / 2);
-	if (conf->near_copies > 1)
-		seq_printf(seq, " %d near-copies", conf->near_copies);
-	if (conf->far_copies > 1) {
-		if (conf->far_offset)
-			seq_printf(seq, " %d offset-copies", conf->far_copies);
-		else
-			seq_printf(seq, " %d far-copies", conf->far_copies);
-	}
-	seq_printf(seq, " [%d/%d] [", conf->raid_disks,
-					conf->raid_disks - mddev->degraded);
-	for (i = 0; i < conf->raid_disks; i++)
-		seq_printf(seq, "%s",
-			      conf->mirrors[i].rdev &&
-			      test_bit(In_sync, &conf->mirrors[i].rdev->flags) ? "U" : "_");
-	seq_printf(seq, "]");
-}
-
-/* check if there are enough drives for
- * every block to appear on atleast one.
- * Don't consider the device numbered 'ignore'
- * as we might be about to remove it.
- */
-static int enough(struct r10conf *conf, int ignore)
-{
-	int first = 0;
-
-	do {
-		int n = conf->copies;
-		int cnt = 0;
-		while (n--) {
-			if (conf->mirrors[first].rdev &&
-			    first != ignore)
-				cnt++;
-			first = (first+1) % conf->raid_disks;
-		}
-		if (cnt == 0)
-			return 0;
-	} while (first != 0);
-	return 1;
-}
-
-static void error(struct mddev *mddev, struct md_rdev *rdev)
-{
-	char b[BDEVNAME_SIZE];
-	struct r10conf *conf = mddev->private;
-
-	/*
-	 * If it is not operational, then we have already marked it as dead
-	 * else if it is the last working disks, ignore the error, let the
-	 * next level up know.
-	 * else mark the drive as failed
-	 */
-	if (test_bit(In_sync, &rdev->flags)
-	    && !enough(conf, rdev->raid_disk))
-		/*
-		 * Don't fail the drive, just return an IO error.
-		 */
-		return;
-	if (test_and_clear_bit(In_sync, &rdev->flags)) {
-		unsigned long flags;
-		spin_lock_irqsave(&conf->device_lock, flags);
-		mddev->degraded++;
-		spin_unlock_irqrestore(&conf->device_lock, flags);
-		/*
-		 * if recovery is running, make sure it aborts.
-		 */
-		set_bit(MD_RECOVERY_INTR, &mddev->recovery);
-	}
-	set_bit(Blocked, &rdev->flags);
-	set_bit(Faulty, &rdev->flags);
-	set_bit(MD_CHANGE_DEVS, &mddev->flags);
-	printk(KERN_ALERT
-	       "md/raid10:%s: Disk failure on %s, disabling device.\n"
-	       "md/raid10:%s: Operation continuing on %d devices.\n",
-	       mdname(mddev), bdevname(rdev->bdev, b),
-	       mdname(mddev), conf->raid_disks - mddev->degraded);
-}
-
-static void print_conf(struct r10conf *conf)
-{
-	int i;
-	struct mirror_info *tmp;
-
-	printk(KERN_DEBUG "RAID10 conf printout:\n");
-	if (!conf) {
-		printk(KERN_DEBUG "(!conf)\n");
-		return;
-	}
-	printk(KERN_DEBUG " --- wd:%d rd:%d\n", conf->raid_disks - conf->mddev->degraded,
-		conf->raid_disks);
-
-	for (i = 0; i < conf->raid_disks; i++) {
-		char b[BDEVNAME_SIZE];
-		tmp = conf->mirrors + i;
-		if (tmp->rdev)
-			printk(KERN_DEBUG " disk %d, wo:%d, o:%d, dev:%s\n",
-				i, !test_bit(In_sync, &tmp->rdev->flags),
-			        !test_bit(Faulty, &tmp->rdev->flags),
-				bdevname(tmp->rdev->bdev,b));
-	}
-}
-
-static void close_sync(struct r10conf *conf)
-{
-	wait_barrier(conf);
-	allow_barrier(conf);
-
-	mempool_destroy(conf->r10buf_pool);
-	conf->r10buf_pool = NULL;
-}
-
-static int raid10_spare_active(struct mddev *mddev)
-{
-	int i;
-	struct r10conf *conf = mddev->private;
-	struct mirror_info *tmp;
-	int count = 0;
-	unsigned long flags;
-
-	/*
-	 * Find all non-in_sync disks within the RAID10 configuration
-	 * and mark them in_sync
-	 */
-	for (i = 0; i < conf->raid_disks; i++) {
-		tmp = conf->mirrors + i;
-		if (tmp->replacement
-		    && tmp->replacement->recovery_offset == MaxSector
-		    && !test_bit(Faulty, &tmp->replacement->flags)
-		    && !test_and_set_bit(In_sync, &tmp->replacement->flags)) {
-			/* Replacement has just become active */
-			if (!tmp->rdev
-			    || !test_and_clear_bit(In_sync, &tmp->rdev->flags))
-				count++;
-			if (tmp->rdev) {
-				/* Replaced device not technically faulty,
-				 * but we need to be sure it gets removed
-				 * and never re-added.
-				 */
-				set_bit(Faulty, &tmp->rdev->flags);
-				sysfs_notify_dirent_safe(
-					tmp->rdev->sysfs_state);
-			}
-			sysfs_notify_dirent_safe(tmp->replacement->sysfs_state);
-		} else if (tmp->rdev
-			   && !test_bit(Faulty, &tmp->rdev->flags)
-			   && !test_and_set_bit(In_sync, &tmp->rdev->flags)) {
-			count++;
-			sysfs_notify_dirent(tmp->rdev->sysfs_state);
-		}
-	}
-	spin_lock_irqsave(&conf->device_lock, flags);
-	mddev->degraded -= count;
-	spin_unlock_irqrestore(&conf->device_lock, flags);
-
-	print_conf(conf);
-	return count;
-}
-
-
-static int raid10_add_disk(struct mddev *mddev, struct md_rdev *rdev)
-{
-	struct r10conf *conf = mddev->private;
-	int err = -EEXIST;
-	int mirror;
-	int first = 0;
-	int last = conf->raid_disks - 1;
-	struct request_queue *q = bdev_get_queue(rdev->bdev);
-
-	if (mddev->recovery_cp < MaxSector)
-		/* only hot-add to in-sync arrays, as recovery is
-		 * very different from resync
-		 */
-		return -EBUSY;
-	if (rdev->saved_raid_disk < 0 && !enough(conf, -1))
-		return -EINVAL;
-
-	if (rdev->raid_disk >= 0)
-		first = last = rdev->raid_disk;
-
-	if (q->merge_bvec_fn) {
-		set_bit(Unmerged, &rdev->flags);
-		mddev->merge_check_needed = 1;
-	}
-
-	if (rdev->saved_raid_disk >= first &&
-	    conf->mirrors[rdev->saved_raid_disk].rdev == NULL)
-		mirror = rdev->saved_raid_disk;
-	else
-		mirror = first;
-	for ( ; mirror <= last ; mirror++) {
-		struct mirror_info *p = &conf->mirrors[mirror];
-		if (p->recovery_disabled == mddev->recovery_disabled)
-			continue;
-		if (p->rdev) {
-			if (!test_bit(WantReplacement, &p->rdev->flags) ||
-			    p->replacement != NULL)
-				continue;
-			clear_bit(In_sync, &rdev->flags);
-			set_bit(Replacement, &rdev->flags);
-			rdev->raid_disk = mirror;
-			err = 0;
-			disk_stack_limits(mddev->gendisk, rdev->bdev,
-					  rdev->data_offset << 9);
-			conf->fullsync = 1;
-			rcu_assign_pointer(p->replacement, rdev);
-			break;
-		}
-
-		disk_stack_limits(mddev->gendisk, rdev->bdev,
-				  rdev->data_offset << 9);
-
-		p->head_position = 0;
-		p->recovery_disabled = mddev->recovery_disabled - 1;
-		rdev->raid_disk = mirror;
-		err = 0;
-		if (rdev->saved_raid_disk != mirror)
-			conf->fullsync = 1;
-		rcu_assign_pointer(p->rdev, rdev);
-		break;
-	}
-	if (err == 0 && test_bit(Unmerged, &rdev->flags)) {
-		/* Some requests might not have seen this new
-		 * merge_bvec_fn.  We must wait for them to complete
-		 * before merging the device fully.
-		 * First we make sure any code which has tested
-		 * our function has submitted the request, then
-		 * we wait for all outstanding requests to complete.
-		 */
-		synchronize_sched();
-		raise_barrier(conf, 0);
-		lower_barrier(conf);
-		clear_bit(Unmerged, &rdev->flags);
-	}
-	md_integrity_add_rdev(rdev, mddev);
-	print_conf(conf);
-	return err;
-}
-
-static int raid10_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
-{
-	struct r10conf *conf = mddev->private;
-	int err = 0;
-	int number = rdev->raid_disk;
-	struct md_rdev **rdevp;
-	struct mirror_info *p = conf->mirrors + number;
-
-	print_conf(conf);
-	if (rdev == p->rdev)
-		rdevp = &p->rdev;
-	else if (rdev == p->replacement)
-		rdevp = &p->replacement;
-	else
-		return 0;
-
-	if (test_bit(In_sync, &rdev->flags) ||
-	    atomic_read(&rdev->nr_pending)) {
-		err = -EBUSY;
-		goto abort;
-	}
-	/* Only remove faulty devices if recovery
-	 * is not possible.
-	 */
-	if (!test_bit(Faulty, &rdev->flags) &&
-	    mddev->recovery_disabled != p->recovery_disabled &&
-	    (!p->replacement || p->replacement == rdev) &&
-	    enough(conf, -1)) {
-		err = -EBUSY;
-		goto abort;
-	}
-	*rdevp = NULL;
-	synchronize_rcu();
-	if (atomic_read(&rdev->nr_pending)) {
-		/* lost the race, try later */
-		err = -EBUSY;
-		*rdevp = rdev;
-		goto abort;
-	} else if (p->replacement) {
-		/* We must have just cleared 'rdev' */
-		p->rdev = p->replacement;
-		clear_bit(Replacement, &p->replacement->flags);
-		smp_mb(); /* Make sure other CPUs may see both as identical
-			   * but will never see neither -- if they are careful.
-			   */
-		p->replacement = NULL;
-		clear_bit(WantReplacement, &rdev->flags);
-	} else
-		/* We might have just remove the Replacement as faulty
-		 * Clear the flag just in case
-		 */
-		clear_bit(WantReplacement, &rdev->flags);
-
-	err = md_integrity_register(mddev);
-
-abort:
-
-	print_conf(conf);
-	return err;
-}
-
-
-static void end_sync_read(struct bio *bio, int error)
-{
-	struct r10bio *r10_bio = bio->bi_private;
-	struct r10conf *conf = r10_bio->mddev->private;
-	int d;
-
-	d = find_bio_disk(conf, r10_bio, bio, NULL, NULL);
-
-	if (test_bit(BIO_UPTODATE, &bio->bi_flags))
-		set_bit(R10BIO_Uptodate, &r10_bio->state);
-	else
-		/* The write handler will notice the lack of
-		 * R10BIO_Uptodate and record any errors etc
-		 */
-		atomic_add(r10_bio->sectors,
-			   &conf->mirrors[d].rdev->corrected_errors);
-
-	/* for reconstruct, we always reschedule after a read.
-	 * for resync, only after all reads
-	 */
-	rdev_dec_pending(conf->mirrors[d].rdev, conf->mddev);
-	if (test_bit(R10BIO_IsRecover, &r10_bio->state) ||
-	    atomic_dec_and_test(&r10_bio->remaining)) {
-		/* we have read all the blocks,
-		 * do the comparison in process context in raid10d
-		 */
-		reschedule_retry(r10_bio);
-	}
-}
-
-static void end_sync_request(struct r10bio *r10_bio)
-{
-	struct mddev *mddev = r10_bio->mddev;
-
-	while (atomic_dec_and_test(&r10_bio->remaining)) {
-		if (r10_bio->master_bio == NULL) {
-			/* the primary of several recovery bios */
-			sector_t s = r10_bio->sectors;
-			if (test_bit(R10BIO_MadeGood, &r10_bio->state) ||
-			    test_bit(R10BIO_WriteError, &r10_bio->state))
-				reschedule_retry(r10_bio);
-			else
-				put_buf(r10_bio);
-			md_done_sync(mddev, s, 1);
-			break;
-		} else {
-			struct r10bio *r10_bio2 = (struct r10bio *)r10_bio->master_bio;
-			if (test_bit(R10BIO_MadeGood, &r10_bio->state) ||
-			    test_bit(R10BIO_WriteError, &r10_bio->state))
-				reschedule_retry(r10_bio);
-			else
-				put_buf(r10_bio);
-			r10_bio = r10_bio2;
-		}
-	}
-}
-
-static void end_sync_write(struct bio *bio, int error)
-{
-	int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
-	struct r10bio *r10_bio = bio->bi_private;
-	struct mddev *mddev = r10_bio->mddev;
-	struct r10conf *conf = mddev->private;
-	int d;
-	sector_t first_bad;
-	int bad_sectors;
-	int slot;
-	int repl;
-	struct md_rdev *rdev = NULL;
-
-	d = find_bio_disk(conf, r10_bio, bio, &slot, &repl);
-	if (repl)
-		rdev = conf->mirrors[d].replacement;
-	else
-		rdev = conf->mirrors[d].rdev;
-
-	if (!uptodate) {
-		if (repl)
-			md_error(mddev, rdev);
-		else {
-			set_bit(WriteErrorSeen, &rdev->flags);
-			if (!test_and_set_bit(WantReplacement, &rdev->flags))
-				set_bit(MD_RECOVERY_NEEDED,
-					&rdev->mddev->recovery);
-			set_bit(R10BIO_WriteError, &r10_bio->state);
-		}
-	} else if (is_badblock(rdev,
-			     r10_bio->devs[slot].addr,
-			     r10_bio->sectors,
-			     &first_bad, &bad_sectors))
-		set_bit(R10BIO_MadeGood, &r10_bio->state);
-
-	rdev_dec_pending(rdev, mddev);
-
-	end_sync_request(r10_bio);
-}
-
-/*
- * Note: sync and recover and handled very differently for raid10
- * This code is for resync.
- * For resync, we read through virtual addresses and read all blocks.
- * If there is any error, we schedule a write.  The lowest numbered
- * drive is authoritative.
- * However requests come for physical address, so we need to map.
- * For every physical address there are raid_disks/copies virtual addresses,
- * which is always are least one, but is not necessarly an integer.
- * This means that a physical address can span multiple chunks, so we may
- * have to submit multiple io requests for a single sync request.
- */
-/*
- * We check if all blocks are in-sync and only write to blocks that
- * aren't in sync
- */
-static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio)
-{
-	struct r10conf *conf = mddev->private;
-	int i, first;
-	struct bio *tbio, *fbio;
-	int vcnt;
-
-	atomic_set(&r10_bio->remaining, 1);
-
-	/* find the first device with a block */
-	for (i=0; i<conf->copies; i++)
-		if (test_bit(BIO_UPTODATE, &r10_bio->devs[i].bio->bi_flags))
-			break;
-
-	if (i == conf->copies)
-		goto done;
-
-	first = i;
-	fbio = r10_bio->devs[i].bio;
-
-	vcnt = (r10_bio->sectors + (PAGE_SIZE >> 9) - 1) >> (PAGE_SHIFT - 9);
-	/* now find blocks with errors */
-	for (i=0 ; i < conf->copies ; i++) {
-		int  j, d;
-
-		tbio = r10_bio->devs[i].bio;
-
-		if (tbio->bi_end_io != end_sync_read)
-			continue;
-		if (i == first)
-			continue;
-		if (test_bit(BIO_UPTODATE, &r10_bio->devs[i].bio->bi_flags)) {
-			/* We know that the bi_io_vec layout is the same for
-			 * both 'first' and 'i', so we just compare them.
-			 * All vec entries are PAGE_SIZE;
-			 */
-			for (j = 0; j < vcnt; j++)
-				if (memcmp(page_address(fbio->bi_io_vec[j].bv_page),
-					   page_address(tbio->bi_io_vec[j].bv_page),
-					   fbio->bi_io_vec[j].bv_len))
-					break;
-			if (j == vcnt)
-				continue;
-			mddev->resync_mismatches += r10_bio->sectors;
-			if (test_bit(MD_RECOVERY_CHECK, &mddev->recovery))
-				/* Don't fix anything. */
-				continue;
-		}
-		/* Ok, we need to write this bio, either to correct an
-		 * inconsistency or to correct an unreadable block.
-		 * First we need to fixup bv_offset, bv_len and
-		 * bi_vecs, as the read request might have corrupted these
-		 */
-		tbio->bi_vcnt = vcnt;
-		tbio->bi_size = r10_bio->sectors << 9;
-		tbio->bi_idx = 0;
-		tbio->bi_phys_segments = 0;
-		tbio->bi_flags &= ~(BIO_POOL_MASK - 1);
-		tbio->bi_flags |= 1 << BIO_UPTODATE;
-		tbio->bi_next = NULL;
-		tbio->bi_rw = WRITE;
-		tbio->bi_private = r10_bio;
-		tbio->bi_sector = r10_bio->devs[i].addr;
-
-		for (j=0; j < vcnt ; j++) {
-			tbio->bi_io_vec[j].bv_offset = 0;
-			tbio->bi_io_vec[j].bv_len = PAGE_SIZE;
-
-			memcpy(page_address(tbio->bi_io_vec[j].bv_page),
-			       page_address(fbio->bi_io_vec[j].bv_page),
-			       PAGE_SIZE);
-		}
-		tbio->bi_end_io = end_sync_write;
-
-		d = r10_bio->devs[i].devnum;
-		atomic_inc(&conf->mirrors[d].rdev->nr_pending);
-		atomic_inc(&r10_bio->remaining);
-		md_sync_acct(conf->mirrors[d].rdev->bdev, tbio->bi_size >> 9);
-
-		tbio->bi_sector += conf->mirrors[d].rdev->data_offset;
-		tbio->bi_bdev = conf->mirrors[d].rdev->bdev;
-		generic_make_request(tbio);
-	}
-
-	/* Now write out to any replacement devices
-	 * that are active
-	 */
-	for (i = 0; i < conf->copies; i++) {
-		int j, d;
-
-		tbio = r10_bio->devs[i].repl_bio;
-		if (!tbio || !tbio->bi_end_io)
-			continue;
-		if (r10_bio->devs[i].bio->bi_end_io != end_sync_write
-		    && r10_bio->devs[i].bio != fbio)
-			for (j = 0; j < vcnt; j++)
-				memcpy(page_address(tbio->bi_io_vec[j].bv_page),
-				       page_address(fbio->bi_io_vec[j].bv_page),
-				       PAGE_SIZE);
-		d = r10_bio->devs[i].devnum;
-		atomic_inc(&r10_bio->remaining);
-		md_sync_acct(conf->mirrors[d].replacement->bdev,
-			     tbio->bi_size >> 9);
-		generic_make_request(tbio);
-	}
-
-done:
-	if (atomic_dec_and_test(&r10_bio->remaining)) {
-		md_done_sync(mddev, r10_bio->sectors, 1);
-		put_buf(r10_bio);
-	}
-}
-
-/*
- * Now for the recovery code.
- * Recovery happens across physical sectors.
- * We recover all non-is_sync drives by finding the virtual address of
- * each, and then choose a working drive that also has that virt address.
- * There is a separate r10_bio for each non-in_sync drive.
- * Only the first two slots are in use. The first for reading,
- * The second for writing.
- *
- */
-static void fix_recovery_read_error(struct r10bio *r10_bio)
-{
-	/* We got a read error during recovery.
-	 * We repeat the read in smaller page-sized sections.
-	 * If a read succeeds, write it to the new device or record
-	 * a bad block if we cannot.
-	 * If a read fails, record a bad block on both old and
-	 * new devices.
-	 */
-	struct mddev *mddev = r10_bio->mddev;
-	struct r10conf *conf = mddev->private;
-	struct bio *bio = r10_bio->devs[0].bio;
-	sector_t sect = 0;
-	int sectors = r10_bio->sectors;
-	int idx = 0;
-	int dr = r10_bio->devs[0].devnum;
-	int dw = r10_bio->devs[1].devnum;
-
-	while (sectors) {
-		int s = sectors;
-		struct md_rdev *rdev;
-		sector_t addr;
-		int ok;
-
-		if (s > (PAGE_SIZE>>9))
-			s = PAGE_SIZE >> 9;
-
-		rdev = conf->mirrors[dr].rdev;
-		addr = r10_bio->devs[0].addr + sect,
-		ok = sync_page_io(rdev,
-				  addr,
-				  s << 9,
-				  bio->bi_io_vec[idx].bv_page,
-				  READ, false);
-		if (ok) {
-			rdev = conf->mirrors[dw].rdev;
-			addr = r10_bio->devs[1].addr + sect;
-			ok = sync_page_io(rdev,
-					  addr,
-					  s << 9,
-					  bio->bi_io_vec[idx].bv_page,
-					  WRITE, false);
-			if (!ok) {
-				set_bit(WriteErrorSeen, &rdev->flags);
-				if (!test_and_set_bit(WantReplacement,
-						      &rdev->flags))
-					set_bit(MD_RECOVERY_NEEDED,
-						&rdev->mddev->recovery);
-			}
-		}
-		if (!ok) {
-			/* We don't worry if we cannot set a bad block -
-			 * it really is bad so there is no loss in not
-			 * recording it yet
-			 */
-			rdev_set_badblocks(rdev, addr, s, 0);
-
-			if (rdev != conf->mirrors[dw].rdev) {
-				/* need bad block on destination too */
-				struct md_rdev *rdev2 = conf->mirrors[dw].rdev;
-				addr = r10_bio->devs[1].addr + sect;
-				ok = rdev_set_badblocks(rdev2, addr, s, 0);
-				if (!ok) {
-					/* just abort the recovery */
-					printk(KERN_NOTICE
-					       "md/raid10:%s: recovery aborted"
-					       " due to read error\n",
-					       mdname(mddev));
-
-					conf->mirrors[dw].recovery_disabled
-						= mddev->recovery_disabled;
-					set_bit(MD_RECOVERY_INTR,
-						&mddev->recovery);
-					break;
-				}
-			}
-		}
-
-		sectors -= s;
-		sect += s;
-		idx++;
-	}
-}
-
-static void recovery_request_write(struct mddev *mddev, struct r10bio *r10_bio)
-{
-	struct r10conf *conf = mddev->private;
-	int d;
-	struct bio *wbio, *wbio2;
-
-	if (!test_bit(R10BIO_Uptodate, &r10_bio->state)) {
-		fix_recovery_read_error(r10_bio);
-		end_sync_request(r10_bio);
-		return;
-	}
-
-	/*
-	 * share the pages with the first bio
-	 * and submit the write request
-	 */
-	d = r10_bio->devs[1].devnum;
-	wbio = r10_bio->devs[1].bio;
-	wbio2 = r10_bio->devs[1].repl_bio;
-	if (wbio->bi_end_io) {
-		atomic_inc(&conf->mirrors[d].rdev->nr_pending);
-		md_sync_acct(conf->mirrors[d].rdev->bdev, wbio->bi_size >> 9);
-		generic_make_request(wbio);
-	}
-	if (wbio2 && wbio2->bi_end_io) {
-		atomic_inc(&conf->mirrors[d].replacement->nr_pending);
-		md_sync_acct(conf->mirrors[d].replacement->bdev,
-			     wbio2->bi_size >> 9);
-		generic_make_request(wbio2);
-	}
-}
-
-
-/*
- * Used by fix_read_error() to decay the per rdev read_errors.
- * We halve the read error count for every hour that has elapsed
- * since the last recorded read error.
- *
- */
-static void check_decay_read_errors(struct mddev *mddev, struct md_rdev *rdev)
-{
-	struct timespec cur_time_mon;
-	unsigned long hours_since_last;
-	unsigned int read_errors = atomic_read(&rdev->read_errors);
-
-	ktime_get_ts(&cur_time_mon);
-
-	if (rdev->last_read_error.tv_sec == 0 &&
-	    rdev->last_read_error.tv_nsec == 0) {
-		/* first time we've seen a read error */
-		rdev->last_read_error = cur_time_mon;
-		return;
-	}
-
-	hours_since_last = (cur_time_mon.tv_sec -
-			    rdev->last_read_error.tv_sec) / 3600;
-
-	rdev->last_read_error = cur_time_mon;
-
-	/*
-	 * if hours_since_last is > the number of bits in read_errors
-	 * just set read errors to 0. We do this to avoid
-	 * overflowing the shift of read_errors by hours_since_last.
-	 */
-	if (hours_since_last >= 8 * sizeof(read_errors))
-		atomic_set(&rdev->read_errors, 0);
-	else
-		atomic_set(&rdev->read_errors, read_errors >> hours_since_last);
-}
-
-static int r10_sync_page_io(struct md_rdev *rdev, sector_t sector,
-			    int sectors, struct page *page, int rw)
-{
-	sector_t first_bad;
-	int bad_sectors;
-
-	if (is_badblock(rdev, sector, sectors, &first_bad, &bad_sectors)
-	    && (rw == READ || test_bit(WriteErrorSeen, &rdev->flags)))
-		return -1;
-	if (sync_page_io(rdev, sector, sectors << 9, page, rw, false))
-		/* success */
-		return 1;
-	if (rw == WRITE) {
-		set_bit(WriteErrorSeen, &rdev->flags);
-		if (!test_and_set_bit(WantReplacement, &rdev->flags))
-			set_bit(MD_RECOVERY_NEEDED,
-				&rdev->mddev->recovery);
-	}
-	/* need to record an error - either for the block or the device */
-	if (!rdev_set_badblocks(rdev, sector, sectors, 0))
-		md_error(rdev->mddev, rdev);
-	return 0;
-}
-
-/*
- * This is a kernel thread which:
- *
- *	1.	Retries failed read operations on working mirrors.
- *	2.	Updates the raid superblock when problems encounter.
- *	3.	Performs writes following reads for array synchronising.
- */
-
-static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10bio *r10_bio)
-{
-	int sect = 0; /* Offset from r10_bio->sector */
-	int sectors = r10_bio->sectors;
-	struct md_rdev*rdev;
-	int max_read_errors = atomic_read(&mddev->max_corr_read_errors);
-	int d = r10_bio->devs[r10_bio->read_slot].devnum;
-
-	/* still own a reference to this rdev, so it cannot
-	 * have been cleared recently.
-	 */
-	rdev = conf->mirrors[d].rdev;
-
-	if (test_bit(Faulty, &rdev->flags))
-		/* drive has already been failed, just ignore any
-		   more fix_read_error() attempts */
-		return;
-
-	check_decay_read_errors(mddev, rdev);
-	atomic_inc(&rdev->read_errors);
-	if (atomic_read(&rdev->read_errors) > max_read_errors) {
-		char b[BDEVNAME_SIZE];
-		bdevname(rdev->bdev, b);
-
-		printk(KERN_NOTICE
-		       "md/raid10:%s: %s: Raid device exceeded "
-		       "read_error threshold [cur %d:max %d]\n",
-		       mdname(mddev), b,
-		       atomic_read(&rdev->read_errors), max_read_errors);
-		printk(KERN_NOTICE
-		       "md/raid10:%s: %s: Failing raid device\n",
-		       mdname(mddev), b);
-		md_error(mddev, conf->mirrors[d].rdev);
-		r10_bio->devs[r10_bio->read_slot].bio = IO_BLOCKED;
-		return;
-	}
-
-	while(sectors) {
-		int s = sectors;
-		int sl = r10_bio->read_slot;
-		int success = 0;
-		int start;
-
-		if (s > (PAGE_SIZE>>9))
-			s = PAGE_SIZE >> 9;
-
-		rcu_read_lock();
-		do {
-			sector_t first_bad;
-			int bad_sectors;
-
-			d = r10_bio->devs[sl].devnum;
-			rdev = rcu_dereference(conf->mirrors[d].rdev);
-			if (rdev &&
-			    !test_bit(Unmerged, &rdev->flags) &&
-			    test_bit(In_sync, &rdev->flags) &&
-			    is_badblock(rdev, r10_bio->devs[sl].addr + sect, s,
-					&first_bad, &bad_sectors) == 0) {
-				atomic_inc(&rdev->nr_pending);
-				rcu_read_unlock();
-				success = sync_page_io(rdev,
-						       r10_bio->devs[sl].addr +
-						       sect,
-						       s<<9,
-						       conf->tmppage, READ, false);
-				rdev_dec_pending(rdev, mddev);
-				rcu_read_lock();
-				if (success)
-					break;
-			}
-			sl++;
-			if (sl == conf->copies)
-				sl = 0;
-		} while (!success && sl != r10_bio->read_slot);
-		rcu_read_unlock();
-
-		if (!success) {
-			/* Cannot read from anywhere, just mark the block
-			 * as bad on the first device to discourage future
-			 * reads.
-			 */
-			int dn = r10_bio->devs[r10_bio->read_slot].devnum;
-			rdev = conf->mirrors[dn].rdev;
-
-			if (!rdev_set_badblocks(
-				    rdev,
-				    r10_bio->devs[r10_bio->read_slot].addr
-				    + sect,
-				    s, 0)) {
-				md_error(mddev, rdev);
-				r10_bio->devs[r10_bio->read_slot].bio
-					= IO_BLOCKED;
-			}
-			break;
-		}
-
-		start = sl;
-		/* write it back and re-read */
-		rcu_read_lock();
-		while (sl != r10_bio->read_slot) {
-			char b[BDEVNAME_SIZE];
-
-			if (sl==0)
-				sl = conf->copies;
-			sl--;
-			d = r10_bio->devs[sl].devnum;
-			rdev = rcu_dereference(conf->mirrors[d].rdev);
-			if (!rdev ||
-			    test_bit(Unmerged, &rdev->flags) ||
-			    !test_bit(In_sync, &rdev->flags))
-				continue;
-
-			atomic_inc(&rdev->nr_pending);
-			rcu_read_unlock();
-			if (r10_sync_page_io(rdev,
-					     r10_bio->devs[sl].addr +
-					     sect,
-					     s, conf->tmppage, WRITE)
-			    == 0) {
-				/* Well, this device is dead */
-				printk(KERN_NOTICE
-				       "md/raid10:%s: read correction "
-				       "write failed"
-				       " (%d sectors at %llu on %s)\n",
-				       mdname(mddev), s,
-				       (unsigned long long)(
-					       sect + rdev->data_offset),
-				       bdevname(rdev->bdev, b));
-				printk(KERN_NOTICE "md/raid10:%s: %s: failing "
-				       "drive\n",
-				       mdname(mddev),
-				       bdevname(rdev->bdev, b));
-			}
-			rdev_dec_pending(rdev, mddev);
-			rcu_read_lock();
-		}
-		sl = start;
-		while (sl != r10_bio->read_slot) {
-			char b[BDEVNAME_SIZE];
-
-			if (sl==0)
-				sl = conf->copies;
-			sl--;
-			d = r10_bio->devs[sl].devnum;
-			rdev = rcu_dereference(conf->mirrors[d].rdev);
-			if (!rdev ||
-			    !test_bit(In_sync, &rdev->flags))
-				continue;
-
-			atomic_inc(&rdev->nr_pending);
-			rcu_read_unlock();
-			switch (r10_sync_page_io(rdev,
-					     r10_bio->devs[sl].addr +
-					     sect,
-					     s, conf->tmppage,
-						 READ)) {
-			case 0:
-				/* Well, this device is dead */
-				printk(KERN_NOTICE
-				       "md/raid10:%s: unable to read back "
-				       "corrected sectors"
-				       " (%d sectors at %llu on %s)\n",
-				       mdname(mddev), s,
-				       (unsigned long long)(
-					       sect + rdev->data_offset),
-				       bdevname(rdev->bdev, b));
-				printk(KERN_NOTICE "md/raid10:%s: %s: failing "
-				       "drive\n",
-				       mdname(mddev),
-				       bdevname(rdev->bdev, b));
-				break;
-			case 1:
-				printk(KERN_INFO
-				       "md/raid10:%s: read error corrected"
-				       " (%d sectors at %llu on %s)\n",
-				       mdname(mddev), s,
-				       (unsigned long long)(
-					       sect + rdev->data_offset),
-				       bdevname(rdev->bdev, b));
-				atomic_add(s, &rdev->corrected_errors);
-			}
-
-			rdev_dec_pending(rdev, mddev);
-			rcu_read_lock();
-		}
-		rcu_read_unlock();
-
-		sectors -= s;
-		sect += s;
-	}
-}
-
-static void bi_complete(struct bio *bio, int error)
-{
-	complete((struct completion *)bio->bi_private);
-}
-
-static int submit_bio_wait(int rw, struct bio *bio)
-{
-	struct completion event;
-	rw |= REQ_SYNC;
-
-	init_completion(&event);
-	bio->bi_private = &event;
-	bio->bi_end_io = bi_complete;
-	submit_bio(rw, bio);
-	wait_for_completion(&event);
-
-	return test_bit(BIO_UPTODATE, &bio->bi_flags);
-}
-
-static int narrow_write_error(struct r10bio *r10_bio, int i)
-{
-	struct bio *bio = r10_bio->master_bio;
-	struct mddev *mddev = r10_bio->mddev;
-	struct r10conf *conf = mddev->private;
-	struct md_rdev *rdev = conf->mirrors[r10_bio->devs[i].devnum].rdev;
-	/* bio has the data to be written to slot 'i' where
-	 * we just recently had a write error.
-	 * We repeatedly clone the bio and trim down to one block,
-	 * then try the write.  Where the write fails we record
-	 * a bad block.
-	 * It is conceivable that the bio doesn't exactly align with
-	 * blocks.  We must handle this.
-	 *
-	 * We currently own a reference to the rdev.
-	 */
-
-	int block_sectors;
-	sector_t sector;
-	int sectors;
-	int sect_to_write = r10_bio->sectors;
-	int ok = 1;
-
-	if (rdev->badblocks.shift < 0)
-		return 0;
-
-	block_sectors = 1 << rdev->badblocks.shift;
-	sector = r10_bio->sector;
-	sectors = ((r10_bio->sector + block_sectors)
-		   & ~(sector_t)(block_sectors - 1))
-		- sector;
-
-	while (sect_to_write) {
-		struct bio *wbio;
-		if (sectors > sect_to_write)
-			sectors = sect_to_write;
-		/* Write at 'sector' for 'sectors' */
-		wbio = bio_clone_mddev(bio, GFP_NOIO, mddev);
-		md_trim_bio(wbio, sector - bio->bi_sector, sectors);
-		wbio->bi_sector = (r10_bio->devs[i].addr+
-				   rdev->data_offset+
-				   (sector - r10_bio->sector));
-		wbio->bi_bdev = rdev->bdev;
-		if (submit_bio_wait(WRITE, wbio) == 0)
-			/* Failure! */
-			ok = rdev_set_badblocks(rdev, sector,
-						sectors, 0)
-				&& ok;
-
-		bio_put(wbio);
-		sect_to_write -= sectors;
-		sector += sectors;
-		sectors = block_sectors;
-	}
-	return ok;
-}
-
-static void handle_read_error(struct mddev *mddev, struct r10bio *r10_bio)
-{
-	int slot = r10_bio->read_slot;
-	struct bio *bio;
-	struct r10conf *conf = mddev->private;
-	struct md_rdev *rdev = r10_bio->devs[slot].rdev;
-	char b[BDEVNAME_SIZE];
-	unsigned long do_sync;
-	int max_sectors;
-
-	/* we got a read error. Maybe the drive is bad.  Maybe just
-	 * the block and we can fix it.
-	 * We freeze all other IO, and try reading the block from
-	 * other devices.  When we find one, we re-write
-	 * and check it that fixes the read error.
-	 * This is all done synchronously while the array is
-	 * frozen.
-	 */
-	bio = r10_bio->devs[slot].bio;
-	bdevname(bio->bi_bdev, b);
-	bio_put(bio);
-	r10_bio->devs[slot].bio = NULL;
-
-	if (mddev->ro == 0) {
-		freeze_array(conf);
-		fix_read_error(conf, mddev, r10_bio);
-		unfreeze_array(conf);
-	} else
-		r10_bio->devs[slot].bio = IO_BLOCKED;
-
-	rdev_dec_pending(rdev, mddev);
-
-read_more:
-	rdev = read_balance(conf, r10_bio, &max_sectors);
-	if (rdev == NULL) {
-		printk(KERN_ALERT "md/raid10:%s: %s: unrecoverable I/O"
-		       " read error for block %llu\n",
-		       mdname(mddev), b,
-		       (unsigned long long)r10_bio->sector);
-		raid_end_bio_io(r10_bio);
-		return;
-	}
-
-	do_sync = (r10_bio->master_bio->bi_rw & REQ_SYNC);
-	slot = r10_bio->read_slot;
-	printk_ratelimited(
-		KERN_ERR
-		"md/raid10:%s: %s: redirecting "
-		"sector %llu to another mirror\n",
-		mdname(mddev),
-		bdevname(rdev->bdev, b),
-		(unsigned long long)r10_bio->sector);
-	bio = bio_clone_mddev(r10_bio->master_bio,
-			      GFP_NOIO, mddev);
-	md_trim_bio(bio,
-		    r10_bio->sector - bio->bi_sector,
-		    max_sectors);
-	r10_bio->devs[slot].bio = bio;
-	r10_bio->devs[slot].rdev = rdev;
-	bio->bi_sector = r10_bio->devs[slot].addr
-		+ rdev->data_offset;
-	bio->bi_bdev = rdev->bdev;
-	bio->bi_rw = READ | do_sync;
-	bio->bi_private = r10_bio;
-	bio->bi_end_io = raid10_end_read_request;
-	if (max_sectors < r10_bio->sectors) {
-		/* Drat - have to split this up more */
-		struct bio *mbio = r10_bio->master_bio;
-		int sectors_handled =
-			r10_bio->sector + max_sectors
-			- mbio->bi_sector;
-		r10_bio->sectors = max_sectors;
-		spin_lock_irq(&conf->device_lock);
-		if (mbio->bi_phys_segments == 0)
-			mbio->bi_phys_segments = 2;
-		else
-			mbio->bi_phys_segments++;
-		spin_unlock_irq(&conf->device_lock);
-		generic_make_request(bio);
-
-		r10_bio = mempool_alloc(conf->r10bio_pool,
-					GFP_NOIO);
-		r10_bio->master_bio = mbio;
-		r10_bio->sectors = (mbio->bi_size >> 9)
-			- sectors_handled;
-		r10_bio->state = 0;
-		set_bit(R10BIO_ReadError,
-			&r10_bio->state);
-		r10_bio->mddev = mddev;
-		r10_bio->sector = mbio->bi_sector
-			+ sectors_handled;
-
-		goto read_more;
-	} else
-		generic_make_request(bio);
-}
-
-static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio)
-{
-	/* Some sort of write request has finished and it
-	 * succeeded in writing where we thought there was a
-	 * bad block.  So forget the bad block.
-	 * Or possibly if failed and we need to record
-	 * a bad block.
-	 */
-	int m;
-	struct md_rdev *rdev;
-
-	if (test_bit(R10BIO_IsSync, &r10_bio->state) ||
-	    test_bit(R10BIO_IsRecover, &r10_bio->state)) {
-		for (m = 0; m < conf->copies; m++) {
-			int dev = r10_bio->devs[m].devnum;
-			rdev = conf->mirrors[dev].rdev;
-			if (r10_bio->devs[m].bio == NULL)
-				continue;
-			if (test_bit(BIO_UPTODATE,
-				     &r10_bio->devs[m].bio->bi_flags)) {
-				rdev_clear_badblocks(
-					rdev,
-					r10_bio->devs[m].addr,
-					r10_bio->sectors);
-			} else {
-				if (!rdev_set_badblocks(
-					    rdev,
-					    r10_bio->devs[m].addr,
-					    r10_bio->sectors, 0))
-					md_error(conf->mddev, rdev);
-			}
-			rdev = conf->mirrors[dev].replacement;
-			if (r10_bio->devs[m].repl_bio == NULL)
-				continue;
-			if (test_bit(BIO_UPTODATE,
-				     &r10_bio->devs[m].repl_bio->bi_flags)) {
-				rdev_clear_badblocks(
-					rdev,
-					r10_bio->devs[m].addr,
-					r10_bio->sectors);
-			} else {
-				if (!rdev_set_badblocks(
-					    rdev,
-					    r10_bio->devs[m].addr,
-					    r10_bio->sectors, 0))
-					md_error(conf->mddev, rdev);
-			}
-		}
-		put_buf(r10_bio);
-	} else {
-		for (m = 0; m < conf->copies; m++) {
-			int dev = r10_bio->devs[m].devnum;
-			struct bio *bio = r10_bio->devs[m].bio;
-			rdev = conf->mirrors[dev].rdev;
-			if (bio == IO_MADE_GOOD) {
-				rdev_clear_badblocks(
-					rdev,
-					r10_bio->devs[m].addr,
-					r10_bio->sectors);
-				rdev_dec_pending(rdev, conf->mddev);
-			} else if (bio != NULL &&
-				   !test_bit(BIO_UPTODATE, &bio->bi_flags)) {
-				if (!narrow_write_error(r10_bio, m)) {
-					md_error(conf->mddev, rdev);
-					set_bit(R10BIO_Degraded,
-						&r10_bio->state);
-				}
-				rdev_dec_pending(rdev, conf->mddev);
-			}
-			bio = r10_bio->devs[m].repl_bio;
-			rdev = conf->mirrors[dev].replacement;
-			if (rdev && bio == IO_MADE_GOOD) {
-				rdev_clear_badblocks(
-					rdev,
-					r10_bio->devs[m].addr,
-					r10_bio->sectors);
-				rdev_dec_pending(rdev, conf->mddev);
-			}
-		}
-		if (test_bit(R10BIO_WriteError,
-			     &r10_bio->state))
-			close_write(r10_bio);
-		raid_end_bio_io(r10_bio);
-	}
-}
-
-static void raid10d(struct mddev *mddev)
-{
-	struct r10bio *r10_bio;
-	unsigned long flags;
-	struct r10conf *conf = mddev->private;
-	struct list_head *head = &conf->retry_list;
-	struct blk_plug plug;
-
-	md_check_recovery(mddev);
-
-	blk_start_plug(&plug);
-	for (;;) {
-
-		flush_pending_writes(conf);
-
-		spin_lock_irqsave(&conf->device_lock, flags);
-		if (list_empty(head)) {
-			spin_unlock_irqrestore(&conf->device_lock, flags);
-			break;
-		}
-		r10_bio = list_entry(head->prev, struct r10bio, retry_list);
-		list_del(head->prev);
-		conf->nr_queued--;
-		spin_unlock_irqrestore(&conf->device_lock, flags);
-
-		mddev = r10_bio->mddev;
-		conf = mddev->private;
-		if (test_bit(R10BIO_MadeGood, &r10_bio->state) ||
-		    test_bit(R10BIO_WriteError, &r10_bio->state))
-			handle_write_completed(conf, r10_bio);
-		else if (test_bit(R10BIO_IsSync, &r10_bio->state))
-			sync_request_write(mddev, r10_bio);
-		else if (test_bit(R10BIO_IsRecover, &r10_bio->state))
-			recovery_request_write(mddev, r10_bio);
-		else if (test_bit(R10BIO_ReadError, &r10_bio->state))
-			handle_read_error(mddev, r10_bio);
-		else {
-			/* just a partial read to be scheduled from a
-			 * separate context
-			 */
-			int slot = r10_bio->read_slot;
-			generic_make_request(r10_bio->devs[slot].bio);
-		}
-
-		cond_resched();
-		if (mddev->flags & ~(1<<MD_CHANGE_PENDING))
-			md_check_recovery(mddev);
-	}
-	blk_finish_plug(&plug);
-}
-
-
-static int init_resync(struct r10conf *conf)
-{
-	int buffs;
-	int i;
-
-	buffs = RESYNC_WINDOW / RESYNC_BLOCK_SIZE;
-	BUG_ON(conf->r10buf_pool);
-	conf->have_replacement = 0;
-	for (i = 0; i < conf->raid_disks; i++)
-		if (conf->mirrors[i].replacement)
-			conf->have_replacement = 1;
-	conf->r10buf_pool = mempool_create(buffs, r10buf_pool_alloc, r10buf_pool_free, conf);
-	if (!conf->r10buf_pool)
-		return -ENOMEM;
-	conf->next_resync = 0;
-	return 0;
-}
-
-/*
- * perform a "sync" on one "block"
- *
- * We need to make sure that no normal I/O request - particularly write
- * requests - conflict with active sync requests.
- *
- * This is achieved by tracking pending requests and a 'barrier' concept
- * that can be installed to exclude normal IO requests.
- *
- * Resync and recovery are handled very differently.
- * We differentiate by looking at MD_RECOVERY_SYNC in mddev->recovery.
- *
- * For resync, we iterate over virtual addresses, read all copies,
- * and update if there are differences.  If only one copy is live,
- * skip it.
- * For recovery, we iterate over physical addresses, read a good
- * value for each non-in_sync drive, and over-write.
- *
- * So, for recovery we may have several outstanding complex requests for a
- * given address, one for each out-of-sync device.  We model this by allocating
- * a number of r10_bio structures, one for each out-of-sync device.
- * As we setup these structures, we collect all bio's together into a list
- * which we then process collectively to add pages, and then process again
- * to pass to generic_make_request.
- *
- * The r10_bio structures are linked using a borrowed master_bio pointer.
- * This link is counted in ->remaining.  When the r10_bio that points to NULL
- * has its remaining count decremented to 0, the whole complex operation
- * is complete.
- *
- */
-
-static sector_t sync_request(struct mddev *mddev, sector_t sector_nr,
-			     int *skipped, int go_faster)
-{
-	struct r10conf *conf = mddev->private;
-	struct r10bio *r10_bio;
-	struct bio *biolist = NULL, *bio;
-	sector_t max_sector, nr_sectors;
-	int i;
-	int max_sync;
-	sector_t sync_blocks;
-	sector_t sectors_skipped = 0;
-	int chunks_skipped = 0;
-
-	if (!conf->r10buf_pool)
-		if (init_resync(conf))
-			return 0;
-
- skipped:
-	max_sector = mddev->dev_sectors;
-	if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery))
-		max_sector = mddev->resync_max_sectors;
-	if (sector_nr >= max_sector) {
-		/* If we aborted, we need to abort the
-		 * sync on the 'current' bitmap chucks (there can
-		 * be several when recovering multiple devices).
-		 * as we may have started syncing it but not finished.
-		 * We can find the current address in
-		 * mddev->curr_resync, but for recovery,
-		 * we need to convert that to several
-		 * virtual addresses.
-		 */
-		if (mddev->curr_resync < max_sector) { /* aborted */
-			if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery))
-				bitmap_end_sync(mddev->bitmap, mddev->curr_resync,
-						&sync_blocks, 1);
-			else for (i=0; i<conf->raid_disks; i++) {
-				sector_t sect =
-					raid10_find_virt(conf, mddev->curr_resync, i);
-				bitmap_end_sync(mddev->bitmap, sect,
-						&sync_blocks, 1);
-			}
-		} else {
-			/* completed sync */
-			if ((!mddev->bitmap || conf->fullsync)
-			    && conf->have_replacement
-			    && test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
-				/* Completed a full sync so the replacements
-				 * are now fully recovered.
-				 */
-				for (i = 0; i < conf->raid_disks; i++)
-					if (conf->mirrors[i].replacement)
-						conf->mirrors[i].replacement
-							->recovery_offset
-							= MaxSector;
-			}
-			conf->fullsync = 0;
-		}
-		bitmap_close_sync(mddev->bitmap);
-		close_sync(conf);
-		*skipped = 1;
-		return sectors_skipped;
-	}
-	if (chunks_skipped >= conf->raid_disks) {
-		/* if there has been nothing to do on any drive,
-		 * then there is nothing to do at all..
-		 */
-		*skipped = 1;
-		return (max_sector - sector_nr) + sectors_skipped;
-	}
-
-	if (max_sector > mddev->resync_max)
-		max_sector = mddev->resync_max; /* Don't do IO beyond here */
-
-	/* make sure whole request will fit in a chunk - if chunks
-	 * are meaningful
-	 */
-	if (conf->near_copies < conf->raid_disks &&
-	    max_sector > (sector_nr | conf->chunk_mask))
-		max_sector = (sector_nr | conf->chunk_mask) + 1;
-	/*
-	 * If there is non-resync activity waiting for us then
-	 * put in a delay to throttle resync.
-	 */
-	if (!go_faster && conf->nr_waiting)
-		msleep_interruptible(1000);
-
-	/* Again, very different code for resync and recovery.
-	 * Both must result in an r10bio with a list of bios that
-	 * have bi_end_io, bi_sector, bi_bdev set,
-	 * and bi_private set to the r10bio.
-	 * For recovery, we may actually create several r10bios
-	 * with 2 bios in each, that correspond to the bios in the main one.
-	 * In this case, the subordinate r10bios link back through a
-	 * borrowed master_bio pointer, and the counter in the master
-	 * includes a ref from each subordinate.
-	 */
-	/* First, we decide what to do and set ->bi_end_io
-	 * To end_sync_read if we want to read, and
-	 * end_sync_write if we will want to write.
-	 */
-
-	max_sync = RESYNC_PAGES << (PAGE_SHIFT-9);
-	if (!test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
-		/* recovery... the complicated one */
-		int j;
-		r10_bio = NULL;
-
-		for (i=0 ; i<conf->raid_disks; i++) {
-			int still_degraded;
-			struct r10bio *rb2;
-			sector_t sect;
-			int must_sync;
-			int any_working;
-			struct mirror_info *mirror = &conf->mirrors[i];
-
-			if ((mirror->rdev == NULL ||
-			     test_bit(In_sync, &mirror->rdev->flags))
-			    &&
-			    (mirror->replacement == NULL ||
-			     test_bit(Faulty,
-				      &mirror->replacement->flags)))
-				continue;
-
-			still_degraded = 0;
-			/* want to reconstruct this device */
-			rb2 = r10_bio;
-			sect = raid10_find_virt(conf, sector_nr, i);
-			if (sect >= mddev->resync_max_sectors) {
-				/* last stripe is not complete - don't
-				 * try to recover this sector.
-				 */
-				continue;
-			}
-			/* Unless we are doing a full sync, or a replacement
-			 * we only need to recover the block if it is set in
-			 * the bitmap
-			 */
-			must_sync = bitmap_start_sync(mddev->bitmap, sect,
-						      &sync_blocks, 1);
-			if (sync_blocks < max_sync)
-				max_sync = sync_blocks;
-			if (!must_sync &&
-			    mirror->replacement == NULL &&
-			    !conf->fullsync) {
-				/* yep, skip the sync_blocks here, but don't assume
-				 * that there will never be anything to do here
-				 */
-				chunks_skipped = -1;
-				continue;
-			}
-
-			r10_bio = mempool_alloc(conf->r10buf_pool, GFP_NOIO);
-			raise_barrier(conf, rb2 != NULL);
-			atomic_set(&r10_bio->remaining, 0);
-
-			r10_bio->master_bio = (struct bio*)rb2;
-			if (rb2)
-				atomic_inc(&rb2->remaining);
-			r10_bio->mddev = mddev;
-			set_bit(R10BIO_IsRecover, &r10_bio->state);
-			r10_bio->sector = sect;
-
-			raid10_find_phys(conf, r10_bio);
-
-			/* Need to check if the array will still be
-			 * degraded
-			 */
-			for (j=0; j<conf->raid_disks; j++)
-				if (conf->mirrors[j].rdev == NULL ||
-				    test_bit(Faulty, &conf->mirrors[j].rdev->flags)) {
-					still_degraded = 1;
-					break;
-				}
-
-			must_sync = bitmap_start_sync(mddev->bitmap, sect,
-						      &sync_blocks, still_degraded);
-
-			any_working = 0;
-			for (j=0; j<conf->copies;j++) {
-				int k;
-				int d = r10_bio->devs[j].devnum;
-				sector_t from_addr, to_addr;
-				struct md_rdev *rdev;
-				sector_t sector, first_bad;
-				int bad_sectors;
-				if (!conf->mirrors[d].rdev ||
-				    !test_bit(In_sync, &conf->mirrors[d].rdev->flags))
-					continue;
-				/* This is where we read from */
-				any_working = 1;
-				rdev = conf->mirrors[d].rdev;
-				sector = r10_bio->devs[j].addr;
-
-				if (is_badblock(rdev, sector, max_sync,
-						&first_bad, &bad_sectors)) {
-					if (first_bad > sector)
-						max_sync = first_bad - sector;
-					else {
-						bad_sectors -= (sector
-								- first_bad);
-						if (max_sync > bad_sectors)
-							max_sync = bad_sectors;
-						continue;
-					}
-				}
-				bio = r10_bio->devs[0].bio;
-				bio->bi_next = biolist;
-				biolist = bio;
-				bio->bi_private = r10_bio;
-				bio->bi_end_io = end_sync_read;
-				bio->bi_rw = READ;
-				from_addr = r10_bio->devs[j].addr;
-				bio->bi_sector = from_addr + rdev->data_offset;
-				bio->bi_bdev = rdev->bdev;
-				atomic_inc(&rdev->nr_pending);
-				/* and we write to 'i' (if not in_sync) */
-
-				for (k=0; k<conf->copies; k++)
-					if (r10_bio->devs[k].devnum == i)
-						break;
-				BUG_ON(k == conf->copies);
-				to_addr = r10_bio->devs[k].addr;
-				r10_bio->devs[0].devnum = d;
-				r10_bio->devs[0].addr = from_addr;
-				r10_bio->devs[1].devnum = i;
-				r10_bio->devs[1].addr = to_addr;
-
-				rdev = mirror->rdev;
-				if (!test_bit(In_sync, &rdev->flags)) {
-					bio = r10_bio->devs[1].bio;
-					bio->bi_next = biolist;
-					biolist = bio;
-					bio->bi_private = r10_bio;
-					bio->bi_end_io = end_sync_write;
-					bio->bi_rw = WRITE;
-					bio->bi_sector = to_addr
-						+ rdev->data_offset;
-					bio->bi_bdev = rdev->bdev;
-					atomic_inc(&r10_bio->remaining);
-				} else
-					r10_bio->devs[1].bio->bi_end_io = NULL;
-
-				/* and maybe write to replacement */
-				bio = r10_bio->devs[1].repl_bio;
-				if (bio)
-					bio->bi_end_io = NULL;
-				rdev = mirror->replacement;
-				/* Note: if rdev != NULL, then bio
-				 * cannot be NULL as r10buf_pool_alloc will
-				 * have allocated it.
-				 * So the second test here is pointless.
-				 * But it keeps semantic-checkers happy, and
-				 * this comment keeps human reviewers
-				 * happy.
-				 */
-				if (rdev == NULL || bio == NULL ||
-				    test_bit(Faulty, &rdev->flags))
-					break;
-				bio->bi_next = biolist;
-				biolist = bio;
-				bio->bi_private = r10_bio;
-				bio->bi_end_io = end_sync_write;
-				bio->bi_rw = WRITE;
-				bio->bi_sector = to_addr + rdev->data_offset;
-				bio->bi_bdev = rdev->bdev;
-				atomic_inc(&r10_bio->remaining);
-				break;
-			}
-			if (j == conf->copies) {
-				/* Cannot recover, so abort the recovery or
-				 * record a bad block */
-				put_buf(r10_bio);
-				if (rb2)
-					atomic_dec(&rb2->remaining);
-				r10_bio = rb2;
-				if (any_working) {
-					/* problem is that there are bad blocks
-					 * on other device(s)
-					 */
-					int k;
-					for (k = 0; k < conf->copies; k++)
-						if (r10_bio->devs[k].devnum == i)
-							break;
-					if (!test_bit(In_sync,
-						      &mirror->rdev->flags)
-					    && !rdev_set_badblocks(
-						    mirror->rdev,
-						    r10_bio->devs[k].addr,
-						    max_sync, 0))
-						any_working = 0;
-					if (mirror->replacement &&
-					    !rdev_set_badblocks(
-						    mirror->replacement,
-						    r10_bio->devs[k].addr,
-						    max_sync, 0))
-						any_working = 0;
-				}
-				if (!any_working)  {
-					if (!test_and_set_bit(MD_RECOVERY_INTR,
-							      &mddev->recovery))
-						printk(KERN_INFO "md/raid10:%s: insufficient "
-						       "working devices for recovery.\n",
-						       mdname(mddev));
-					mirror->recovery_disabled
-						= mddev->recovery_disabled;
-				}
-				break;
-			}
-		}
-		if (biolist == NULL) {
-			while (r10_bio) {
-				struct r10bio *rb2 = r10_bio;
-				r10_bio = (struct r10bio*) rb2->master_bio;
-				rb2->master_bio = NULL;
-				put_buf(rb2);
-			}
-			goto giveup;
-		}
-	} else {
-		/* resync. Schedule a read for every block at this virt offset */
-		int count = 0;
-
-		bitmap_cond_end_sync(mddev->bitmap, sector_nr);
-
-		if (!bitmap_start_sync(mddev->bitmap, sector_nr,
-				       &sync_blocks, mddev->degraded) &&
-		    !conf->fullsync && !test_bit(MD_RECOVERY_REQUESTED,
-						 &mddev->recovery)) {
-			/* We can skip this block */
-			*skipped = 1;
-			return sync_blocks + sectors_skipped;
-		}
-		if (sync_blocks < max_sync)
-			max_sync = sync_blocks;
-		r10_bio = mempool_alloc(conf->r10buf_pool, GFP_NOIO);
-
-		r10_bio->mddev = mddev;
-		atomic_set(&r10_bio->remaining, 0);
-		raise_barrier(conf, 0);
-		conf->next_resync = sector_nr;
-
-		r10_bio->master_bio = NULL;
-		r10_bio->sector = sector_nr;
-		set_bit(R10BIO_IsSync, &r10_bio->state);
-		raid10_find_phys(conf, r10_bio);
-		r10_bio->sectors = (sector_nr | conf->chunk_mask) - sector_nr +1;
-
-		for (i=0; i<conf->copies; i++) {
-			int d = r10_bio->devs[i].devnum;
-			sector_t first_bad, sector;
-			int bad_sectors;
-
-			if (r10_bio->devs[i].repl_bio)
-				r10_bio->devs[i].repl_bio->bi_end_io = NULL;
-
-			bio = r10_bio->devs[i].bio;
-			bio->bi_end_io = NULL;
-			clear_bit(BIO_UPTODATE, &bio->bi_flags);
-			if (conf->mirrors[d].rdev == NULL ||
-			    test_bit(Faulty, &conf->mirrors[d].rdev->flags))
-				continue;
-			sector = r10_bio->devs[i].addr;
-			if (is_badblock(conf->mirrors[d].rdev,
-					sector, max_sync,
-					&first_bad, &bad_sectors)) {
-				if (first_bad > sector)
-					max_sync = first_bad - sector;
-				else {
-					bad_sectors -= (sector - first_bad);
-					if (max_sync > bad_sectors)
-						max_sync = max_sync;
-					continue;
-				}
-			}
-			atomic_inc(&conf->mirrors[d].rdev->nr_pending);
-			atomic_inc(&r10_bio->remaining);
-			bio->bi_next = biolist;
-			biolist = bio;
-			bio->bi_private = r10_bio;
-			bio->bi_end_io = end_sync_read;
-			bio->bi_rw = READ;
-			bio->bi_sector = sector +
-				conf->mirrors[d].rdev->data_offset;
-			bio->bi_bdev = conf->mirrors[d].rdev->bdev;
-			count++;
-
-			if (conf->mirrors[d].replacement == NULL ||
-			    test_bit(Faulty,
-				     &conf->mirrors[d].replacement->flags))
-				continue;
-
-			/* Need to set up for writing to the replacement */
-			bio = r10_bio->devs[i].repl_bio;
-			clear_bit(BIO_UPTODATE, &bio->bi_flags);
-
-			sector = r10_bio->devs[i].addr;
-			atomic_inc(&conf->mirrors[d].rdev->nr_pending);
-			bio->bi_next = biolist;
-			biolist = bio;
-			bio->bi_private = r10_bio;
-			bio->bi_end_io = end_sync_write;
-			bio->bi_rw = WRITE;
-			bio->bi_sector = sector +
-				conf->mirrors[d].replacement->data_offset;
-			bio->bi_bdev = conf->mirrors[d].replacement->bdev;
-			count++;
-		}
-
-		if (count < 2) {
-			for (i=0; i<conf->copies; i++) {
-				int d = r10_bio->devs[i].devnum;
-				if (r10_bio->devs[i].bio->bi_end_io)
-					rdev_dec_pending(conf->mirrors[d].rdev,
-							 mddev);
-				if (r10_bio->devs[i].repl_bio &&
-				    r10_bio->devs[i].repl_bio->bi_end_io)
-					rdev_dec_pending(
-						conf->mirrors[d].replacement,
-						mddev);
-			}
-			put_buf(r10_bio);
-			biolist = NULL;
-			goto giveup;
-		}
-	}
-
-	for (bio = biolist; bio ; bio=bio->bi_next) {
-
-		bio->bi_flags &= ~(BIO_POOL_MASK - 1);
-		if (bio->bi_end_io)
-			bio->bi_flags |= 1 << BIO_UPTODATE;
-		bio->bi_vcnt = 0;
-		bio->bi_idx = 0;
-		bio->bi_phys_segments = 0;
-		bio->bi_size = 0;
-	}
-
-	nr_sectors = 0;
-	if (sector_nr + max_sync < max_sector)
-		max_sector = sector_nr + max_sync;
-	do {
-		struct page *page;
-		int len = PAGE_SIZE;
-		if (sector_nr + (len>>9) > max_sector)
-			len = (max_sector - sector_nr) << 9;
-		if (len == 0)
-			break;
-		for (bio= biolist ; bio ; bio=bio->bi_next) {
-			struct bio *bio2;
-			page = bio->bi_io_vec[bio->bi_vcnt].bv_page;
-			if (bio_add_page(bio, page, len, 0))
-				continue;
-
-			/* stop here */
-			bio->bi_io_vec[bio->bi_vcnt].bv_page = page;
-			for (bio2 = biolist;
-			     bio2 && bio2 != bio;
-			     bio2 = bio2->bi_next) {
-				/* remove last page from this bio */
-				bio2->bi_vcnt--;
-				bio2->bi_size -= len;
-				bio2->bi_flags &= ~(1<< BIO_SEG_VALID);
-			}
-			goto bio_full;
-		}
-		nr_sectors += len>>9;
-		sector_nr += len>>9;
-	} while (biolist->bi_vcnt < RESYNC_PAGES);
- bio_full:
-	r10_bio->sectors = nr_sectors;
-
-	while (biolist) {
-		bio = biolist;
-		biolist = biolist->bi_next;
-
-		bio->bi_next = NULL;
-		r10_bio = bio->bi_private;
-		r10_bio->sectors = nr_sectors;
-
-		if (bio->bi_end_io == end_sync_read) {
-			md_sync_acct(bio->bi_bdev, nr_sectors);
-			generic_make_request(bio);
-		}
-	}
-
-	if (sectors_skipped)
-		/* pretend they weren't skipped, it makes
-		 * no important difference in this case
-		 */
-		md_done_sync(mddev, sectors_skipped, 1);
-
-	return sectors_skipped + nr_sectors;
- giveup:
-	/* There is nowhere to write, so all non-sync
-	 * drives must be failed or in resync, all drives
-	 * have a bad block, so try the next chunk...
-	 */
-	if (sector_nr + max_sync < max_sector)
-		max_sector = sector_nr + max_sync;
-
-	sectors_skipped += (max_sector - sector_nr);
-	chunks_skipped ++;
-	sector_nr = max_sector;
-	goto skipped;
-}
-
-static sector_t
-raid10_size(struct mddev *mddev, sector_t sectors, int raid_disks)
-{
-	sector_t size;
-	struct r10conf *conf = mddev->private;
-
-	if (!raid_disks)
-		raid_disks = conf->raid_disks;
-	if (!sectors)
-		sectors = conf->dev_sectors;
-
-	size = sectors >> conf->chunk_shift;
-	sector_div(size, conf->far_copies);
-	size = size * raid_disks;
-	sector_div(size, conf->near_copies);
-
-	return size << conf->chunk_shift;
-}
-
-static void calc_sectors(struct r10conf *conf, sector_t size)
-{
-	/* Calculate the number of sectors-per-device that will
-	 * actually be used, and set conf->dev_sectors and
-	 * conf->stride
-	 */
-
-	size = size >> conf->chunk_shift;
-	sector_div(size, conf->far_copies);
-	size = size * conf->raid_disks;
-	sector_div(size, conf->near_copies);
-	/* 'size' is now the number of chunks in the array */
-	/* calculate "used chunks per device" */
-	size = size * conf->copies;
-
-	/* We need to round up when dividing by raid_disks to
-	 * get the stride size.
-	 */
-	size = DIV_ROUND_UP_SECTOR_T(size, conf->raid_disks);
-
-	conf->dev_sectors = size << conf->chunk_shift;
-
-	if (conf->far_offset)
-		conf->stride = 1 << conf->chunk_shift;
-	else {
-		sector_div(size, conf->far_copies);
-		conf->stride = size << conf->chunk_shift;
-	}
-}
-
-static struct r10conf *setup_conf(struct mddev *mddev)
-{
-	struct r10conf *conf = NULL;
-	int nc, fc, fo;
-	int err = -EINVAL;
-
-	if (mddev->new_chunk_sectors < (PAGE_SIZE >> 9) ||
-	    !is_power_of_2(mddev->new_chunk_sectors)) {
-		printk(KERN_ERR "md/raid10:%s: chunk size must be "
-		       "at least PAGE_SIZE(%ld) and be a power of 2.\n",
-		       mdname(mddev), PAGE_SIZE);
-		goto out;
-	}
-
-	nc = mddev->new_layout & 255;
-	fc = (mddev->new_layout >> 8) & 255;
-	fo = mddev->new_layout & (1<<16);
-
-	if ((nc*fc) <2 || (nc*fc) > mddev->raid_disks ||
-	    (mddev->new_layout >> 17)) {
-		printk(KERN_ERR "md/raid10:%s: unsupported raid10 layout: 0x%8x\n",
-		       mdname(mddev), mddev->new_layout);
-		goto out;
-	}
-
-	err = -ENOMEM;
-	conf = kzalloc(sizeof(struct r10conf), GFP_KERNEL);
-	if (!conf)
-		goto out;
-
-	conf->mirrors = kzalloc(sizeof(struct mirror_info)*mddev->raid_disks,
-				GFP_KERNEL);
-	if (!conf->mirrors)
-		goto out;
-
-	conf->tmppage = alloc_page(GFP_KERNEL);
-	if (!conf->tmppage)
-		goto out;
-
-
-	conf->raid_disks = mddev->raid_disks;
-	conf->near_copies = nc;
-	conf->far_copies = fc;
-	conf->copies = nc*fc;
-	conf->far_offset = fo;
-	conf->chunk_mask = mddev->new_chunk_sectors - 1;
-	conf->chunk_shift = ffz(~mddev->new_chunk_sectors);
-
-	conf->r10bio_pool = mempool_create(NR_RAID10_BIOS, r10bio_pool_alloc,
-					   r10bio_pool_free, conf);
-	if (!conf->r10bio_pool)
-		goto out;
-
-	calc_sectors(conf, mddev->dev_sectors);
-
-	spin_lock_init(&conf->device_lock);
-	INIT_LIST_HEAD(&conf->retry_list);
-
-	spin_lock_init(&conf->resync_lock);
-	init_waitqueue_head(&conf->wait_barrier);
-
-	conf->thread = md_register_thread(raid10d, mddev, NULL);
-	if (!conf->thread)
-		goto out;
-
-	conf->mddev = mddev;
-	return conf;
-
- out:
-	printk(KERN_ERR "md/raid10:%s: couldn't allocate memory.\n",
-	       mdname(mddev));
-	if (conf) {
-		if (conf->r10bio_pool)
-			mempool_destroy(conf->r10bio_pool);
-		kfree(conf->mirrors);
-		safe_put_page(conf->tmppage);
-		kfree(conf);
-	}
-	return ERR_PTR(err);
-}
-
-static int run(struct mddev *mddev)
-{
-	struct r10conf *conf;
-	int i, disk_idx, chunk_size;
-	struct mirror_info *disk;
-	struct md_rdev *rdev;
-	sector_t size;
-
-	/*
-	 * copy the already verified devices into our private RAID10
-	 * bookkeeping area. [whatever we allocate in run(),
-	 * should be freed in stop()]
-	 */
-
-	if (mddev->private == NULL) {
-		conf = setup_conf(mddev);
-		if (IS_ERR(conf))
-			return PTR_ERR(conf);
-		mddev->private = conf;
-	}
-	conf = mddev->private;
-	if (!conf)
-		goto out;
-
-	mddev->thread = conf->thread;
-	conf->thread = NULL;
-
-	chunk_size = mddev->chunk_sectors << 9;
-	blk_queue_io_min(mddev->queue, chunk_size);
-	if (conf->raid_disks % conf->near_copies)
-		blk_queue_io_opt(mddev->queue, chunk_size * conf->raid_disks);
-	else
-		blk_queue_io_opt(mddev->queue, chunk_size *
-				 (conf->raid_disks / conf->near_copies));
-
-	rdev_for_each(rdev, mddev) {
-		struct request_queue *q;
-		disk_idx = rdev->raid_disk;
-		if (disk_idx >= conf->raid_disks
-		    || disk_idx < 0)
-			continue;
-		disk = conf->mirrors + disk_idx;
-
-		if (test_bit(Replacement, &rdev->flags)) {
-			if (disk->replacement)
-				goto out_free_conf;
-			disk->replacement = rdev;
-		} else {
-			if (disk->rdev)
-				goto out_free_conf;
-			disk->rdev = rdev;
-		}
-		q = bdev_get_queue(rdev->bdev);
-		if (q->merge_bvec_fn)
-			mddev->merge_check_needed = 1;
-
-		disk_stack_limits(mddev->gendisk, rdev->bdev,
-				  rdev->data_offset << 9);
-
-		disk->head_position = 0;
-	}
-	/* need to check that every block has at least one working mirror */
-	if (!enough(conf, -1)) {
-		printk(KERN_ERR "md/raid10:%s: not enough operational mirrors.\n",
-		       mdname(mddev));
-		goto out_free_conf;
-	}
-
-	mddev->degraded = 0;
-	for (i = 0; i < conf->raid_disks; i++) {
-
-		disk = conf->mirrors + i;
-
-		if (!disk->rdev && disk->replacement) {
-			/* The replacement is all we have - use it */
-			disk->rdev = disk->replacement;
-			disk->replacement = NULL;
-			clear_bit(Replacement, &disk->rdev->flags);
-		}
-
-		if (!disk->rdev ||
-		    !test_bit(In_sync, &disk->rdev->flags)) {
-			disk->head_position = 0;
-			mddev->degraded++;
-			if (disk->rdev)
-				conf->fullsync = 1;
-		}
-		disk->recovery_disabled = mddev->recovery_disabled - 1;
-	}
-
-	if (mddev->recovery_cp != MaxSector)
-		printk(KERN_NOTICE "md/raid10:%s: not clean"
-		       " -- starting background reconstruction\n",
-		       mdname(mddev));
-	printk(KERN_INFO
-		"md/raid10:%s: active with %d out of %d devices\n",
-		mdname(mddev), conf->raid_disks - mddev->degraded,
-		conf->raid_disks);
-	/*
-	 * Ok, everything is just fine now
-	 */
-	mddev->dev_sectors = conf->dev_sectors;
-	size = raid10_size(mddev, 0, 0);
-	md_set_array_sectors(mddev, size);
-	mddev->resync_max_sectors = size;
-
-	mddev->queue->backing_dev_info.congested_fn = raid10_congested;
-	mddev->queue->backing_dev_info.congested_data = mddev;
-
-	/* Calculate max read-ahead size.
-	 * We need to readahead at least twice a whole stripe....
-	 * maybe...
-	 */
-	{
-		int stripe = conf->raid_disks *
-			((mddev->chunk_sectors << 9) / PAGE_SIZE);
-		stripe /= conf->near_copies;
-		if (mddev->queue->backing_dev_info.ra_pages < 2* stripe)
-			mddev->queue->backing_dev_info.ra_pages = 2* stripe;
-	}
-
-	blk_queue_merge_bvec(mddev->queue, raid10_mergeable_bvec);
-
-	if (md_integrity_register(mddev))
-		goto out_free_conf;
-
-	return 0;
-
-out_free_conf:
-	md_unregister_thread(&mddev->thread);
-	if (conf->r10bio_pool)
-		mempool_destroy(conf->r10bio_pool);
-	safe_put_page(conf->tmppage);
-	kfree(conf->mirrors);
-	kfree(conf);
-	mddev->private = NULL;
-out:
-	return -EIO;
-}
-
-static int stop(struct mddev *mddev)
-{
-	struct r10conf *conf = mddev->private;
-
-	raise_barrier(conf, 0);
-	lower_barrier(conf);
-
-	md_unregister_thread(&mddev->thread);
-	blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/
-	if (conf->r10bio_pool)
-		mempool_destroy(conf->r10bio_pool);
-	kfree(conf->mirrors);
-	kfree(conf);
-	mddev->private = NULL;
-	return 0;
-}
-
-static void raid10_quiesce(struct mddev *mddev, int state)
-{
-	struct r10conf *conf = mddev->private;
-
-	switch(state) {
-	case 1:
-		raise_barrier(conf, 0);
-		break;
-	case 0:
-		lower_barrier(conf);
-		break;
-	}
-}
-
-static int raid10_resize(struct mddev *mddev, sector_t sectors)
-{
-	/* Resize of 'far' arrays is not supported.
-	 * For 'near' and 'offset' arrays we can set the
-	 * number of sectors used to be an appropriate multiple
-	 * of the chunk size.
-	 * For 'offset', this is far_copies*chunksize.
-	 * For 'near' the multiplier is the LCM of
-	 * near_copies and raid_disks.
-	 * So if far_copies > 1 && !far_offset, fail.
-	 * Else find LCM(raid_disks, near_copy)*far_copies and
-	 * multiply by chunk_size.  Then round to this number.
-	 * This is mostly done by raid10_size()
-	 */
-	struct r10conf *conf = mddev->private;
-	sector_t oldsize, size;
-
-	if (conf->far_copies > 1 && !conf->far_offset)
-		return -EINVAL;
-
-	oldsize = raid10_size(mddev, 0, 0);
-	size = raid10_size(mddev, sectors, 0);
-	md_set_array_sectors(mddev, size);
-	if (mddev->array_sectors > size)
-		return -EINVAL;
-	set_capacity(mddev->gendisk, mddev->array_sectors);
-	revalidate_disk(mddev->gendisk);
-	if (sectors > mddev->dev_sectors &&
-	    mddev->recovery_cp > oldsize) {
-		mddev->recovery_cp = oldsize;
-		set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
-	}
-	calc_sectors(conf, sectors);
-	mddev->dev_sectors = conf->dev_sectors;
-	mddev->resync_max_sectors = size;
-	return 0;
-}
-
-static void *raid10_takeover_raid0(struct mddev *mddev)
-{
-	struct md_rdev *rdev;
-	struct r10conf *conf;
-
-	if (mddev->degraded > 0) {
-		printk(KERN_ERR "md/raid10:%s: Error: degraded raid0!\n",
-		       mdname(mddev));
-		return ERR_PTR(-EINVAL);
-	}
-
-	/* Set new parameters */
-	mddev->new_level = 10;
-	/* new layout: far_copies = 1, near_copies = 2 */
-	mddev->new_layout = (1<<8) + 2;
-	mddev->new_chunk_sectors = mddev->chunk_sectors;
-	mddev->delta_disks = mddev->raid_disks;
-	mddev->raid_disks *= 2;
-	/* make sure it will be not marked as dirty */
-	mddev->recovery_cp = MaxSector;
-
-	conf = setup_conf(mddev);
-	if (!IS_ERR(conf)) {
-		rdev_for_each(rdev, mddev)
-			if (rdev->raid_disk >= 0)
-				rdev->new_raid_disk = rdev->raid_disk * 2;
-		conf->barrier = 1;
-	}
-
-	return conf;
-}
-
-static void *raid10_takeover(struct mddev *mddev)
-{
-	struct r0conf *raid0_conf;
-
-	/* raid10 can take over:
-	 *  raid0 - providing it has only two drives
-	 */
-	if (mddev->level == 0) {
-		/* for raid0 takeover only one zone is supported */
-		raid0_conf = mddev->private;
-		if (raid0_conf->nr_strip_zones > 1) {
-			printk(KERN_ERR "md/raid10:%s: cannot takeover raid 0"
-			       " with more than one zone.\n",
-			       mdname(mddev));
-			return ERR_PTR(-EINVAL);
-		}
-		return raid10_takeover_raid0(mddev);
-	}
-	return ERR_PTR(-EINVAL);
-}
-
-static struct md_personality raid10_personality =
-{
-	.name		= "raid10",
-	.level		= 10,
-	.owner		= THIS_MODULE,
-	.make_request	= make_request,
-	.run		= run,
-	.stop		= stop,
-	.status		= status,
-	.error_handler	= error,
-	.hot_add_disk	= raid10_add_disk,
-	.hot_remove_disk= raid10_remove_disk,
-	.spare_active	= raid10_spare_active,
-	.sync_request	= sync_request,
-	.quiesce	= raid10_quiesce,
-	.size		= raid10_size,
-	.resize		= raid10_resize,
-	.takeover	= raid10_takeover,
-};
-
-static int __init raid_init(void)
-{
-	return register_md_personality(&raid10_personality);
-}
-
-static void raid_exit(void)
-{
-	unregister_md_personality(&raid10_personality);
-}
-
-module_init(raid_init);
-module_exit(raid_exit);
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("RAID10 (striped mirror) personality for MD");
-MODULE_ALIAS("md-personality-9"); /* RAID10 */
-MODULE_ALIAS("md-raid10");
-MODULE_ALIAS("md-level-10");
-
-module_param(max_queued_requests, int, S_IRUGO|S_IWUSR);
diff --git a/ANDROID_3.4.5/drivers/md/raid10.h b/ANDROID_3.4.5/drivers/md/raid10.h
deleted file mode 100644
index 7c615613..00000000
--- a/ANDROID_3.4.5/drivers/md/raid10.h
+++ /dev/null
@@ -1,150 +0,0 @@
-#ifndef _RAID10_H
-#define _RAID10_H
-
-struct mirror_info {
-	struct md_rdev	*rdev, *replacement;
-	sector_t	head_position;
-	int		recovery_disabled;	/* matches
-						 * mddev->recovery_disabled
-						 * when we shouldn't try
-						 * recovering this device.
-						 */
-};
-
-struct r10conf {
-	struct mddev		*mddev;
-	struct mirror_info	*mirrors;
-	int			raid_disks;
-	spinlock_t		device_lock;
-
-	/* geometry */
-	int			near_copies;  /* number of copies laid out
-					       * raid0 style */
-	int 			far_copies;   /* number of copies laid out
-					       * at large strides across drives
-					       */
-	int			far_offset;   /* far_copies are offset by 1
-					       * stripe instead of many
-					       */
-	int			copies;	      /* near_copies * far_copies.
-					       * must be <= raid_disks
-					       */
-	sector_t		stride;	      /* distance between far copies.
-					       * This is size / far_copies unless
-					       * far_offset, in which case it is
-					       * 1 stripe.
-					       */
-
-	sector_t		dev_sectors;  /* temp copy of
-					       * mddev->dev_sectors */
-
-	int			chunk_shift; /* shift from chunks to sectors */
-	sector_t		chunk_mask;
-
-	struct list_head	retry_list;
-	/* queue pending writes and submit them on unplug */
-	struct bio_list		pending_bio_list;
-	int			pending_count;
-
-	spinlock_t		resync_lock;
-	int			nr_pending;
-	int			nr_waiting;
-	int			nr_queued;
-	int			barrier;
-	sector_t		next_resync;
-	int			fullsync;  /* set to 1 if a full sync is needed,
-					    * (fresh device added).
-					    * Cleared when a sync completes.
-					    */
-	int			have_replacement; /* There is at least one
-						   * replacement device.
-						   */
-	wait_queue_head_t	wait_barrier;
-
-	mempool_t		*r10bio_pool;
-	mempool_t		*r10buf_pool;
-	struct page		*tmppage;
-
-	/* When taking over an array from a different personality, we store
-	 * the new thread here until we fully activate the array.
-	 */
-	struct md_thread	*thread;
-};
-
-/*
- * this is our 'private' RAID10 bio.
- *
- * it contains information about what kind of IO operations were started
- * for this RAID10 operation, and about their status:
- */
-
-struct r10bio {
-	atomic_t		remaining; /* 'have we finished' count,
-					    * used from IRQ handlers
-					    */
-	sector_t		sector;	/* virtual sector number */
-	int			sectors;
-	unsigned long		state;
-	struct mddev		*mddev;
-	/*
-	 * original bio going to /dev/mdx
-	 */
-	struct bio		*master_bio;
-	/*
-	 * if the IO is in READ direction, then this is where we read
-	 */
-	int			read_slot;
-
-	struct list_head	retry_list;
-	/*
-	 * if the IO is in WRITE direction, then multiple bios are used,
-	 * one for each copy.
-	 * When resyncing we also use one for each copy.
-	 * When reconstructing, we use 2 bios, one for read, one for write.
-	 * We choose the number when they are allocated.
-	 * We sometimes need an extra bio to write to the replacement.
-	 */
-	struct {
-		struct bio	*bio;
-		union {
-			struct bio	*repl_bio; /* used for resync and
-						    * writes */
-			struct md_rdev	*rdev;	   /* used for reads
-						    * (read_slot >= 0) */
-		};
-		sector_t	addr;
-		int		devnum;
-	} devs[0];
-};
-
-/* when we get a read error on a read-only array, we redirect to another
- * device without failing the first device, or trying to over-write to
- * correct the read error.  To keep track of bad blocks on a per-bio
- * level, we store IO_BLOCKED in the appropriate 'bios' pointer
- */
-#define IO_BLOCKED ((struct bio*)1)
-/* When we successfully write to a known bad-block, we need to remove the
- * bad-block marking which must be done from process context.  So we record
- * the success by setting devs[n].bio to IO_MADE_GOOD
- */
-#define IO_MADE_GOOD ((struct bio *)2)
-
-#define BIO_SPECIAL(bio) ((unsigned long)bio <= 2)
-
-/* bits for r10bio.state */
-enum r10bio_state {
-	R10BIO_Uptodate,
-	R10BIO_IsSync,
-	R10BIO_IsRecover,
-	R10BIO_Degraded,
-/* Set ReadError on bios that experience a read error
- * so that raid10d knows what to do with them.
- */
-	R10BIO_ReadError,
-/* If a write for this request means we can clear some
- * known-bad-block records, we set this flag.
- */
-	R10BIO_MadeGood,
-	R10BIO_WriteError,
-};
-#endif
diff --git a/ANDROID_3.4.5/drivers/md/raid5.c b/ANDROID_3.4.5/drivers/md/raid5.c
deleted file mode 100644
index 73a58007..00000000
--- a/ANDROID_3.4.5/drivers/md/raid5.c
+++ /dev/null
@@ -1,6050 +0,0 @@
-/*
- * raid5.c : Multiple Devices driver for Linux
- *	   Copyright (C) 1996, 1997 Ingo Molnar, Miguel de Icaza, Gadi Oxman
- *	   Copyright (C) 1999, 2000 Ingo Molnar
- *	   Copyright (C) 2002, 2003 H. Peter Anvin
- *
- * RAID-4/5/6 management functions.
- * Thanks to Penguin Computing for making the RAID-6 development possible
- * by donating a test server!
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2, or (at your option)
- * any later version.
- *
- * You should have received a copy of the GNU General Public License
- * (for example /usr/src/linux/COPYING); if not, write to the Free
- * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-/*
- * BITMAP UNPLUGGING:
- *
- * The sequencing for updating the bitmap reliably is a little
- * subtle (and I got it wrong the first time) so it deserves some
- * explanation.
- *
- * We group bitmap updates into batches.  Each batch has a number.
- * We may write out several batches at once, but that isn't very important.
- * conf->seq_write is the number of the last batch successfully written.
- * conf->seq_flush is the number of the last batch that was closed to
- *    new additions.
- * When we discover that we will need to write to any block in a stripe
- * (in add_stripe_bio) we update the in-memory bitmap and record in sh->bm_seq
- * the number of the batch it will be in. This is seq_flush+1.
- * When we are ready to do a write, if that batch hasn't been written yet,
- *   we plug the array and queue the stripe for later.
- * When an unplug happens, we increment bm_flush, thus closing the current
- *   batch.
- * When we notice that bm_flush > bm_write, we write out all pending updates
- * to the bitmap, and advance bm_write to where bm_flush was.
- * This may occasionally write a bit out twice, but is sure never to
- * miss any bits.
- */
-
-#include <linux/blkdev.h>
-#include <linux/kthread.h>
-#include <linux/raid/pq.h>
-#include <linux/async_tx.h>
-#include <linux/module.h>
-#include <linux/async.h>
-#include <linux/seq_file.h>
-#include <linux/cpu.h>
-#include <linux/slab.h>
-#include <linux/ratelimit.h>
-#include "md.h"
-#include "raid5.h"
-#include "raid0.h"
-#include "bitmap.h"
-
-/*
- * Stripe cache
- */
-
-#define NR_STRIPES		256
-#define STRIPE_SIZE		PAGE_SIZE
-#define STRIPE_SHIFT		(PAGE_SHIFT - 9)
-#define STRIPE_SECTORS		(STRIPE_SIZE>>9)
-#define	IO_THRESHOLD		1
-#define BYPASS_THRESHOLD	1
-#define NR_HASH			(PAGE_SIZE / sizeof(struct hlist_head))
-#define HASH_MASK		(NR_HASH - 1)
-
-static inline struct hlist_head *stripe_hash(struct r5conf *conf, sector_t sect)
-{
-	int hash = (sect >> STRIPE_SHIFT) & HASH_MASK;
-	return &conf->stripe_hashtbl[hash];
-}
-
-/* bio's attached to a stripe+device for I/O are linked together in bi_sector
- * order without overlap.  There may be several bio's per stripe+device, and
- * a bio could span several devices.
- * When walking this list for a particular stripe+device, we must never proceed
- * beyond a bio that extends past this device, as the next bio might no longer
- * be valid.
- * This function is used to determine the 'next' bio in the list, given the sector
- * of the current stripe+device
- */
-static inline struct bio *r5_next_bio(struct bio *bio, sector_t sector)
-{
-	int sectors = bio->bi_size >> 9;
-	if (bio->bi_sector + sectors < sector + STRIPE_SECTORS)
-		return bio->bi_next;
-	else
-		return NULL;
-}
-
-/*
- * We maintain a biased count of active stripes in the bottom 16 bits of
- * bi_phys_segments, and a count of processed stripes in the upper 16 bits
- */
-static inline int raid5_bi_phys_segments(struct bio *bio)
-{
-	return bio->bi_phys_segments & 0xffff;
-}
-
-static inline int raid5_bi_hw_segments(struct bio *bio)
-{
-	return (bio->bi_phys_segments >> 16) & 0xffff;
-}
-
-static inline int raid5_dec_bi_phys_segments(struct bio *bio)
-{
-	--bio->bi_phys_segments;
-	return raid5_bi_phys_segments(bio);
-}
-
-static inline int raid5_dec_bi_hw_segments(struct bio *bio)
-{
-	unsigned short val = raid5_bi_hw_segments(bio);
-
-	--val;
-	bio->bi_phys_segments = (val << 16) | raid5_bi_phys_segments(bio);
-	return val;
-}
-
-static inline void raid5_set_bi_hw_segments(struct bio *bio, unsigned int cnt)
-{
-	bio->bi_phys_segments = raid5_bi_phys_segments(bio) | (cnt << 16);
-}
-
-/* Find first data disk in a raid6 stripe */
-static inline int raid6_d0(struct stripe_head *sh)
-{
-	if (sh->ddf_layout)
-		/* ddf always start from first device */
-		return 0;
-	/* md starts just after Q block */
-	if (sh->qd_idx == sh->disks - 1)
-		return 0;
-	else
-		return sh->qd_idx + 1;
-}
-static inline int raid6_next_disk(int disk, int raid_disks)
-{
-	disk++;
-	return (disk < raid_disks) ? disk : 0;
-}
-
-/* When walking through the disks in a raid5, starting at raid6_d0,
- * We need to map each disk to a 'slot', where the data disks are slot
- * 0 .. raid_disks-3, the parity disk is raid_disks-2 and the Q disk
- * is raid_disks-1.  This help does that mapping.
- */
-static int raid6_idx_to_slot(int idx, struct stripe_head *sh,
-			     int *count, int syndrome_disks)
-{
-	int slot = *count;
-
-	if (sh->ddf_layout)
-		(*count)++;
-	if (idx == sh->pd_idx)
-		return syndrome_disks;
-	if (idx == sh->qd_idx)
-		return syndrome_disks + 1;
-	if (!sh->ddf_layout)
-		(*count)++;
-	return slot;
-}
-
-static void return_io(struct bio *return_bi)
-{
-	struct bio *bi = return_bi;
-	while (bi) {
-
-		return_bi = bi->bi_next;
-		bi->bi_next = NULL;
-		bi->bi_size = 0;
-		bio_endio(bi, 0);
-		bi = return_bi;
-	}
-}
-
-static void print_raid5_conf (struct r5conf *conf);
-
-static int stripe_operations_active(struct stripe_head *sh)
-{
-	return sh->check_state || sh->reconstruct_state ||
-	       test_bit(STRIPE_BIOFILL_RUN, &sh->state) ||
-	       test_bit(STRIPE_COMPUTE_RUN, &sh->state);
-}
-
-static void __release_stripe(struct r5conf *conf, struct stripe_head *sh)
-{
-	if (atomic_dec_and_test(&sh->count)) {
-		BUG_ON(!list_empty(&sh->lru));
-		BUG_ON(atomic_read(&conf->active_stripes)==0);
-		if (test_bit(STRIPE_HANDLE, &sh->state)) {
-			if (test_bit(STRIPE_DELAYED, &sh->state) &&
-			    !test_bit(STRIPE_PREREAD_ACTIVE, &sh->state))
-				list_add_tail(&sh->lru, &conf->delayed_list);
-			else if (test_bit(STRIPE_BIT_DELAY, &sh->state) &&
-				   sh->bm_seq - conf->seq_write > 0)
-				list_add_tail(&sh->lru, &conf->bitmap_list);
-			else {
-				clear_bit(STRIPE_DELAYED, &sh->state);
-				clear_bit(STRIPE_BIT_DELAY, &sh->state);
-				list_add_tail(&sh->lru, &conf->handle_list);
-			}
-			md_wakeup_thread(conf->mddev->thread);
-		} else {
-			BUG_ON(stripe_operations_active(sh));
-			if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state))
-				if (atomic_dec_return(&conf->preread_active_stripes)
-				    < IO_THRESHOLD)
-					md_wakeup_thread(conf->mddev->thread);
-			atomic_dec(&conf->active_stripes);
-			if (!test_bit(STRIPE_EXPANDING, &sh->state)) {
-				list_add_tail(&sh->lru, &conf->inactive_list);
-				wake_up(&conf->wait_for_stripe);
-				if (conf->retry_read_aligned)
-					md_wakeup_thread(conf->mddev->thread);
-			}
-		}
-	}
-}
-
-static void release_stripe(struct stripe_head *sh)
-{
-	struct r5conf *conf = sh->raid_conf;
-	unsigned long flags;
-
-	spin_lock_irqsave(&conf->device_lock, flags);
-	__release_stripe(conf, sh);
-	spin_unlock_irqrestore(&conf->device_lock, flags);
-}
-
-static inline void remove_hash(struct stripe_head *sh)
-{
-	pr_debug("remove_hash(), stripe %llu\n",
-		(unsigned long long)sh->sector);
-
-	hlist_del_init(&sh->hash);
-}
-
-static inline void insert_hash(struct r5conf *conf, struct stripe_head *sh)
-{
-	struct hlist_head *hp = stripe_hash(conf, sh->sector);
-
-	pr_debug("insert_hash(), stripe %llu\n",
-		(unsigned long long)sh->sector);
-
-	hlist_add_head(&sh->hash, hp);
-}
-
-
-/* find an idle stripe, make sure it is unhashed, and return it. */
-static struct stripe_head *get_free_stripe(struct r5conf *conf)
-{
-	struct stripe_head *sh = NULL;
-	struct list_head *first;
-
-	if (list_empty(&conf->inactive_list))
-		goto out;
-	first = conf->inactive_list.next;
-	sh = list_entry(first, struct stripe_head, lru);
-	list_del_init(first);
-	remove_hash(sh);
-	atomic_inc(&conf->active_stripes);
-out:
-	return sh;
-}
-
-static void shrink_buffers(struct stripe_head *sh)
-{
-	struct page *p;
-	int i;
-	int num = sh->raid_conf->pool_size;
-
-	for (i = 0; i < num ; i++) {
-		p = sh->dev[i].page;
-		if (!p)
-			continue;
-		sh->dev[i].page = NULL;
-		put_page(p);
-	}
-}
-
-static int grow_buffers(struct stripe_head *sh)
-{
-	int i;
-	int num = sh->raid_conf->pool_size;
-
-	for (i = 0; i < num; i++) {
-		struct page *page;
-
-		if (!(page = alloc_page(GFP_KERNEL))) {
-			return 1;
-		}
-		sh->dev[i].page = page;
-	}
-	return 0;
-}
-
-static void raid5_build_block(struct stripe_head *sh, int i, int previous);
-static void stripe_set_idx(sector_t stripe, struct r5conf *conf, int previous,
-			    struct stripe_head *sh);
-
-static void init_stripe(struct stripe_head *sh, sector_t sector, int previous)
-{
-	struct r5conf *conf = sh->raid_conf;
-	int i;
-
-	BUG_ON(atomic_read(&sh->count) != 0);
-	BUG_ON(test_bit(STRIPE_HANDLE, &sh->state));
-	BUG_ON(stripe_operations_active(sh));
-
-	pr_debug("init_stripe called, stripe %llu\n",
-		(unsigned long long)sh->sector);
-
-	remove_hash(sh);
-
-	sh->generation = conf->generation - previous;
-	sh->disks = previous ? conf->previous_raid_disks : conf->raid_disks;
-	sh->sector = sector;
-	stripe_set_idx(sector, conf, previous, sh);
-	sh->state = 0;
-
-
-	for (i = sh->disks; i--; ) {
-		struct r5dev *dev = &sh->dev[i];
-
-		if (dev->toread || dev->read || dev->towrite || dev->written ||
-		    test_bit(R5_LOCKED, &dev->flags)) {
-			printk(KERN_ERR "sector=%llx i=%d %p %p %p %p %d\n",
-			       (unsigned long long)sh->sector, i, dev->toread,
-			       dev->read, dev->towrite, dev->written,
-			       test_bit(R5_LOCKED, &dev->flags));
-			WARN_ON(1);
-		}
-		dev->flags = 0;
-		raid5_build_block(sh, i, previous);
-	}
-	insert_hash(conf, sh);
-}
-
-static struct stripe_head *__find_stripe(struct r5conf *conf, sector_t sector,
-					 short generation)
-{
-	struct stripe_head *sh;
-	struct hlist_node *hn;
-
-	pr_debug("__find_stripe, sector %llu\n", (unsigned long long)sector);
-	hlist_for_each_entry(sh, hn, stripe_hash(conf, sector), hash)
-		if (sh->sector == sector && sh->generation == generation)
-			return sh;
-	pr_debug("__stripe %llu not in cache\n", (unsigned long long)sector);
-	return NULL;
-}
-
-/*
- * Need to check if array has failed when deciding whether to:
- *  - start an array
- *  - remove non-faulty devices
- *  - add a spare
- *  - allow a reshape
- * This determination is simple when no reshape is happening.
- * However if there is a reshape, we need to carefully check
- * both the before and after sections.
- * This is because some failed devices may only affect one
- * of the two sections, and some non-in_sync devices may
- * be insync in the section most affected by failed devices.
- */
-static int calc_degraded(struct r5conf *conf)
-{
-	int degraded, degraded2;
-	int i;
-
-	rcu_read_lock();
-	degraded = 0;
-	for (i = 0; i < conf->previous_raid_disks; i++) {
-		struct md_rdev *rdev = rcu_dereference(conf->disks[i].rdev);
-		if (!rdev || test_bit(Faulty, &rdev->flags))
-			degraded++;
-		else if (test_bit(In_sync, &rdev->flags))
-			;
-		else
-			/* not in-sync or faulty.
-			 * If the reshape increases the number of devices,
-			 * this is being recovered by the reshape, so
-			 * this 'previous' section is not in_sync.
-			 * If the number of devices is being reduced however,
-			 * the device can only be part of the array if
-			 * we are reverting a reshape, so this section will
-			 * be in-sync.
-			 */
-			if (conf->raid_disks >= conf->previous_raid_disks)
-				degraded++;
-	}
-	rcu_read_unlock();
-	if (conf->raid_disks == conf->previous_raid_disks)
-		return degraded;
-	rcu_read_lock();
-	degraded2 = 0;
-	for (i = 0; i < conf->raid_disks; i++) {
-		struct md_rdev *rdev = rcu_dereference(conf->disks[i].rdev);
-		if (!rdev || test_bit(Faulty, &rdev->flags))
-			degraded2++;
-		else if (test_bit(In_sync, &rdev->flags))
-			;
-		else
-			/* not in-sync or faulty.
-			 * If reshape increases the number of devices, this
-			 * section has already been recovered, else it
-			 * almost certainly hasn't.
-			 */
-			if (conf->raid_disks <= conf->previous_raid_disks)
-				degraded2++;
-	}
-	rcu_read_unlock();
-	if (degraded2 > degraded)
-		return degraded2;
-	return degraded;
-}
-
-static int has_failed(struct r5conf *conf)
-{
-	int degraded;
-
-	if (conf->mddev->reshape_position == MaxSector)
-		return conf->mddev->degraded > conf->max_degraded;
-
-	degraded = calc_degraded(conf);
-	if (degraded > conf->max_degraded)
-		return 1;
-	return 0;
-}
-
-static struct stripe_head *
-get_active_stripe(struct r5conf *conf, sector_t sector,
-		  int previous, int noblock, int noquiesce)
-{
-	struct stripe_head *sh;
-
-	pr_debug("get_stripe, sector %llu\n", (unsigned long long)sector);
-
-	spin_lock_irq(&conf->device_lock);
-
-	do {
-		wait_event_lock_irq(conf->wait_for_stripe,
-				    conf->quiesce == 0 || noquiesce,
-				    conf->device_lock, /* nothing */);
-		sh = __find_stripe(conf, sector, conf->generation - previous);
-		if (!sh) {
-			if (!conf->inactive_blocked)
-				sh = get_free_stripe(conf);
-			if (noblock && sh == NULL)
-				break;
-			if (!sh) {
-				conf->inactive_blocked = 1;
-				wait_event_lock_irq(conf->wait_for_stripe,
-						    !list_empty(&conf->inactive_list) &&
-						    (atomic_read(&conf->active_stripes)
-						     < (conf->max_nr_stripes *3/4)
-						     || !conf->inactive_blocked),
-						    conf->device_lock,
-						    );
-				conf->inactive_blocked = 0;
-			} else
-				init_stripe(sh, sector, previous);
-		} else {
-			if (atomic_read(&sh->count)) {
-				BUG_ON(!list_empty(&sh->lru)
-				    && !test_bit(STRIPE_EXPANDING, &sh->state));
-			} else {
-				if (!test_bit(STRIPE_HANDLE, &sh->state))
-					atomic_inc(&conf->active_stripes);
-				if (list_empty(&sh->lru) &&
-				    !test_bit(STRIPE_EXPANDING, &sh->state))
-					BUG();
-				list_del_init(&sh->lru);
-			}
-		}
-	} while (sh == NULL);
-
-	if (sh)
-		atomic_inc(&sh->count);
-
-	spin_unlock_irq(&conf->device_lock);
-	return sh;
-}
-
-static void
-raid5_end_read_request(struct bio *bi, int error);
-static void
-raid5_end_write_request(struct bio *bi, int error);
-
-static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
-{
-	struct r5conf *conf = sh->raid_conf;
-	int i, disks = sh->disks;
-
-	might_sleep();
-
-	for (i = disks; i--; ) {
-		int rw;
-		int replace_only = 0;
-		struct bio *bi, *rbi;
-		struct md_rdev *rdev, *rrdev = NULL;
-		if (test_and_clear_bit(R5_Wantwrite, &sh->dev[i].flags)) {
-			if (test_and_clear_bit(R5_WantFUA, &sh->dev[i].flags))
-				rw = WRITE_FUA;
-			else
-				rw = WRITE;
-		} else if (test_and_clear_bit(R5_Wantread, &sh->dev[i].flags))
-			rw = READ;
-		else if (test_and_clear_bit(R5_WantReplace,
-					    &sh->dev[i].flags)) {
-			rw = WRITE;
-			replace_only = 1;
-		} else
-			continue;
-
-		bi = &sh->dev[i].req;
-		rbi = &sh->dev[i].rreq; /* For writing to replacement */
-
-		bi->bi_rw = rw;
-		rbi->bi_rw = rw;
-		if (rw & WRITE) {
-			bi->bi_end_io = raid5_end_write_request;
-			rbi->bi_end_io = raid5_end_write_request;
-		} else
-			bi->bi_end_io = raid5_end_read_request;
-
-		rcu_read_lock();
-		rrdev = rcu_dereference(conf->disks[i].replacement);
-		smp_mb(); /* Ensure that if rrdev is NULL, rdev won't be */
-		rdev = rcu_dereference(conf->disks[i].rdev);
-		if (!rdev) {
-			rdev = rrdev;
-			rrdev = NULL;
-		}
-		if (rw & WRITE) {
-			if (replace_only)
-				rdev = NULL;
-			if (rdev == rrdev)
-				/* We raced and saw duplicates */
-				rrdev = NULL;
-		} else {
-			if (test_bit(R5_ReadRepl, &sh->dev[i].flags) && rrdev)
-				rdev = rrdev;
-			rrdev = NULL;
-		}
-
-		if (rdev && test_bit(Faulty, &rdev->flags))
-			rdev = NULL;
-		if (rdev)
-			atomic_inc(&rdev->nr_pending);
-		if (rrdev && test_bit(Faulty, &rrdev->flags))
-			rrdev = NULL;
-		if (rrdev)
-			atomic_inc(&rrdev->nr_pending);
-		rcu_read_unlock();
-
-		/* We have already checked bad blocks for reads.  Now
-		 * need to check for writes.  We never accept write errors
-		 * on the replacement, so we don't to check rrdev.
-		 */
-		while ((rw & WRITE) && rdev &&
-		       test_bit(WriteErrorSeen, &rdev->flags)) {
-			sector_t first_bad;
-			int bad_sectors;
-			int bad = is_badblock(rdev, sh->sector, STRIPE_SECTORS,
-					      &first_bad, &bad_sectors);
-			if (!bad)
-				break;
-
-			if (bad < 0) {
-				set_bit(BlockedBadBlocks, &rdev->flags);
-				if (!conf->mddev->external &&
-				    conf->mddev->flags) {
-					/* It is very unlikely, but we might
-					 * still need to write out the
-					 * bad block log - better give it
-					 * a chance*/
-					md_check_recovery(conf->mddev);
-				}
-				/*
-				 * Because md_wait_for_blocked_rdev
-				 * will dec nr_pending, we must
-				 * increment it first.
-				 */
-				atomic_inc(&rdev->nr_pending);
-				md_wait_for_blocked_rdev(rdev, conf->mddev);
-			} else {
-				/* Acknowledged bad block - skip the write */
-				rdev_dec_pending(rdev, conf->mddev);
-				rdev = NULL;
-			}
-		}
-
-		if (rdev) {
-			if (s->syncing || s->expanding || s->expanded
-			    || s->replacing)
-				md_sync_acct(rdev->bdev, STRIPE_SECTORS);
-
-			set_bit(STRIPE_IO_STARTED, &sh->state);
-
-			bi->bi_bdev = rdev->bdev;
-			pr_debug("%s: for %llu schedule op %ld on disc %d\n",
-				__func__, (unsigned long long)sh->sector,
-				bi->bi_rw, i);
-			atomic_inc(&sh->count);
-			bi->bi_sector = sh->sector + rdev->data_offset;
-			bi->bi_flags = 1 << BIO_UPTODATE;
-			bi->bi_idx = 0;
-			bi->bi_io_vec[0].bv_len = STRIPE_SIZE;
-			bi->bi_io_vec[0].bv_offset = 0;
-			bi->bi_size = STRIPE_SIZE;
-			bi->bi_next = NULL;
-			if (rrdev)
-				set_bit(R5_DOUBLE_LOCKED, &sh->dev[i].flags);
-			generic_make_request(bi);
-		}
-		if (rrdev) {
-			if (s->syncing || s->expanding || s->expanded
-			    || s->replacing)
-				md_sync_acct(rrdev->bdev, STRIPE_SECTORS);
-
-			set_bit(STRIPE_IO_STARTED, &sh->state);
-
-			rbi->bi_bdev = rrdev->bdev;
-			pr_debug("%s: for %llu schedule op %ld on "
-				 "replacement disc %d\n",
-				__func__, (unsigned long long)sh->sector,
-				rbi->bi_rw, i);
-			atomic_inc(&sh->count);
-			rbi->bi_sector = sh->sector + rrdev->data_offset;
-			rbi->bi_flags = 1 << BIO_UPTODATE;
-			rbi->bi_idx = 0;
-			rbi->bi_io_vec[0].bv_len = STRIPE_SIZE;
-			rbi->bi_io_vec[0].bv_offset = 0;
-			rbi->bi_size = STRIPE_SIZE;
-			rbi->bi_next = NULL;
-			generic_make_request(rbi);
-		}
-		if (!rdev && !rrdev) {
-			if (rw & WRITE)
-				set_bit(STRIPE_DEGRADED, &sh->state);
-			pr_debug("skip op %ld on disc %d for sector %llu\n",
-				bi->bi_rw, i, (unsigned long long)sh->sector);
-			clear_bit(R5_LOCKED, &sh->dev[i].flags);
-			set_bit(STRIPE_HANDLE, &sh->state);
-		}
-	}
-}
-
-static struct dma_async_tx_descriptor *
-async_copy_data(int frombio, struct bio *bio, struct page *page,
-	sector_t sector, struct dma_async_tx_descriptor *tx)
-{
-	struct bio_vec *bvl;
-	struct page *bio_page;
-	int i;
-	int page_offset;
-	struct async_submit_ctl submit;
-	enum async_tx_flags flags = 0;
-
-	if (bio->bi_sector >= sector)
-		page_offset = (signed)(bio->bi_sector - sector) * 512;
-	else
-		page_offset = (signed)(sector - bio->bi_sector) * -512;
-
-	if (frombio)
-		flags |= ASYNC_TX_FENCE;
-	init_async_submit(&submit, flags, tx, NULL, NULL, NULL);
-
-	bio_for_each_segment(bvl, bio, i) {
-		int len = bvl->bv_len;
-		int clen;
-		int b_offset = 0;
-
-		if (page_offset < 0) {
-			b_offset = -page_offset;
-			page_offset += b_offset;
-			len -= b_offset;
-		}
-
-		if (len > 0 && page_offset + len > STRIPE_SIZE)
-			clen = STRIPE_SIZE - page_offset;
-		else
-			clen = len;
-
-		if (clen > 0) {
-			b_offset += bvl->bv_offset;
-			bio_page = bvl->bv_page;
-			if (frombio)
-				tx = async_memcpy(page, bio_page, page_offset,
-						  b_offset, clen, &submit);
-			else
-				tx = async_memcpy(bio_page, page, b_offset,
-						  page_offset, clen, &submit);
-		}
-		/* chain the operations */
-		submit.depend_tx = tx;
-
-		if (clen < len) /* hit end of page */
-			break;
-		page_offset +=  len;
-	}
-
-	return tx;
-}
-
-static void ops_complete_biofill(void *stripe_head_ref)
-{
-	struct stripe_head *sh = stripe_head_ref;
-	struct bio *return_bi = NULL;
-	struct r5conf *conf = sh->raid_conf;
-	int i;
-
-	pr_debug("%s: stripe %llu\n", __func__,
-		(unsigned long long)sh->sector);
-
-	/* clear completed biofills */
-	spin_lock_irq(&conf->device_lock);
-	for (i = sh->disks; i--; ) {
-		struct r5dev *dev = &sh->dev[i];
-
-		/* acknowledge completion of a biofill operation */
-		/* and check if we need to reply to a read request,
-		 * new R5_Wantfill requests are held off until
-		 * !STRIPE_BIOFILL_RUN
-		 */
-		if (test_and_clear_bit(R5_Wantfill, &dev->flags)) {
-			struct bio *rbi, *rbi2;
-
-			BUG_ON(!dev->read);
-			rbi = dev->read;
-			dev->read = NULL;
-			while (rbi && rbi->bi_sector <
-				dev->sector + STRIPE_SECTORS) {
-				rbi2 = r5_next_bio(rbi, dev->sector);
-				if (!raid5_dec_bi_phys_segments(rbi)) {
-					rbi->bi_next = return_bi;
-					return_bi = rbi;
-				}
-				rbi = rbi2;
-			}
-		}
-	}
-	spin_unlock_irq(&conf->device_lock);
-	clear_bit(STRIPE_BIOFILL_RUN, &sh->state);
-
-	return_io(return_bi);
-
-	set_bit(STRIPE_HANDLE, &sh->state);
-	release_stripe(sh);
-}
-
-static void ops_run_biofill(struct stripe_head *sh)
-{
-	struct dma_async_tx_descriptor *tx = NULL;
-	struct r5conf *conf = sh->raid_conf;
-	struct async_submit_ctl submit;
-	int i;
-
-	pr_debug("%s: stripe %llu\n", __func__,
-		(unsigned long long)sh->sector);
-
-	for (i = sh->disks; i--; ) {
-		struct r5dev *dev = &sh->dev[i];
-		if (test_bit(R5_Wantfill, &dev->flags)) {
-			struct bio *rbi;
-			spin_lock_irq(&conf->device_lock);
-			dev->read = rbi = dev->toread;
-			dev->toread = NULL;
-			spin_unlock_irq(&conf->device_lock);
-			while (rbi && rbi->bi_sector <
-				dev->sector + STRIPE_SECTORS) {
-				tx = async_copy_data(0, rbi, dev->page,
-					dev->sector, tx);
-				rbi = r5_next_bio(rbi, dev->sector);
-			}
-		}
-	}
-
-	atomic_inc(&sh->count);
-	init_async_submit(&submit, ASYNC_TX_ACK, tx, ops_complete_biofill, sh, NULL);
-	async_trigger_callback(&submit);
-}
-
-static void mark_target_uptodate(struct stripe_head *sh, int target)
-{
-	struct r5dev *tgt;
-
-	if (target < 0)
-		return;
-
-	tgt = &sh->dev[target];
-	set_bit(R5_UPTODATE, &tgt->flags);
-	BUG_ON(!test_bit(R5_Wantcompute, &tgt->flags));
-	clear_bit(R5_Wantcompute, &tgt->flags);
-}
-
-static void ops_complete_compute(void *stripe_head_ref)
-{
-	struct stripe_head *sh = stripe_head_ref;
-
-	pr_debug("%s: stripe %llu\n", __func__,
-		(unsigned long long)sh->sector);
-
-	/* mark the computed target(s) as uptodate */
-	mark_target_uptodate(sh, sh->ops.target);
-	mark_target_uptodate(sh, sh->ops.target2);
-
-	clear_bit(STRIPE_COMPUTE_RUN, &sh->state);
-	if (sh->check_state == check_state_compute_run)
-		sh->check_state = check_state_compute_result;
-	set_bit(STRIPE_HANDLE, &sh->state);
-	release_stripe(sh);
-}
-
-/* return a pointer to the address conversion region of the scribble buffer */
-static addr_conv_t *to_addr_conv(struct stripe_head *sh,
-				 struct raid5_percpu *percpu)
-{
-	return percpu->scribble + sizeof(struct page *) * (sh->disks + 2);
-}
-
-static struct dma_async_tx_descriptor *
-ops_run_compute5(struct stripe_head *sh, struct raid5_percpu *percpu)
-{
-	int disks = sh->disks;
-	struct page **xor_srcs = percpu->scribble;
-	int target = sh->ops.target;
-	struct r5dev *tgt = &sh->dev[target];
-	struct page *xor_dest = tgt->page;
-	int count = 0;
-	struct dma_async_tx_descriptor *tx;
-	struct async_submit_ctl submit;
-	int i;
-
-	pr_debug("%s: stripe %llu block: %d\n",
-		__func__, (unsigned long long)sh->sector, target);
-	BUG_ON(!test_bit(R5_Wantcompute, &tgt->flags));
-
-	for (i = disks; i--; )
-		if (i != target)
-			xor_srcs[count++] = sh->dev[i].page;
-
-	atomic_inc(&sh->count);
-
-	init_async_submit(&submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_ZERO_DST, NULL,
-			  ops_complete_compute, sh, to_addr_conv(sh, percpu));
-	if (unlikely(count == 1))
-		tx = async_memcpy(xor_dest, xor_srcs[0], 0, 0, STRIPE_SIZE, &submit);
-	else
-		tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, &submit);
-
-	return tx;
-}
-
-/* set_syndrome_sources - populate source buffers for gen_syndrome
- * @srcs - (struct page *) array of size sh->disks
- * @sh - stripe_head to parse
- *
- * Populates srcs in proper layout order for the stripe and returns the
- * 'count' of sources to be used in a call to async_gen_syndrome.  The P
- * destination buffer is recorded in srcs[count] and the Q destination
- * is recorded in srcs[count+1]].
- */
-static int set_syndrome_sources(struct page **srcs, struct stripe_head *sh)
-{
-	int disks = sh->disks;
-	int syndrome_disks = sh->ddf_layout ? disks : (disks - 2);
-	int d0_idx = raid6_d0(sh);
-	int count;
-	int i;
-
-	for (i = 0; i < disks; i++)
-		srcs[i] = NULL;
-
-	count = 0;
-	i = d0_idx;
-	do {
-		int slot = raid6_idx_to_slot(i, sh, &count, syndrome_disks);
-
-		srcs[slot] = sh->dev[i].page;
-		i = raid6_next_disk(i, disks);
-	} while (i != d0_idx);
-
-	return syndrome_disks;
-}
-
-static struct dma_async_tx_descriptor *
-ops_run_compute6_1(struct stripe_head *sh, struct raid5_percpu *percpu)
-{
-	int disks = sh->disks;
-	struct page **blocks = percpu->scribble;
-	int target;
-	int qd_idx = sh->qd_idx;
-	struct dma_async_tx_descriptor *tx;
-	struct async_submit_ctl submit;
-	struct r5dev *tgt;
-	struct page *dest;
-	int i;
-	int count;
-
-	if (sh->ops.target < 0)
-		target = sh->ops.target2;
-	else if (sh->ops.target2 < 0)
-		target = sh->ops.target;
-	else
-		/* we should only have one valid target */
-		BUG();
-	BUG_ON(target < 0);
-	pr_debug("%s: stripe %llu block: %d\n",
-		__func__, (unsigned long long)sh->sector, target);
-
-	tgt = &sh->dev[target];
-	BUG_ON(!test_bit(R5_Wantcompute, &tgt->flags));
-	dest = tgt->page;
-
-	atomic_inc(&sh->count);
-
-	if (target == qd_idx) {
-		count = set_syndrome_sources(blocks, sh);
-		blocks[count] = NULL; /* regenerating p is not necessary */
-		BUG_ON(blocks[count+1] != dest); /* q should already be set */
-		init_async_submit(&submit, ASYNC_TX_FENCE, NULL,
-				  ops_complete_compute, sh,
-				  to_addr_conv(sh, percpu));
-		tx = async_gen_syndrome(blocks, 0, count+2, STRIPE_SIZE, &submit);
-	} else {
-		/* Compute any data- or p-drive using XOR */
-		count = 0;
-		for (i = disks; i-- ; ) {
-			if (i == target || i == qd_idx)
-				continue;
-			blocks[count++] = sh->dev[i].page;
-		}
-
-		init_async_submit(&submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_ZERO_DST,
-				  NULL, ops_complete_compute, sh,
-				  to_addr_conv(sh, percpu));
-		tx = async_xor(dest, blocks, 0, count, STRIPE_SIZE, &submit);
-	}
-
-	return tx;
-}
-
-static struct dma_async_tx_descriptor *
-ops_run_compute6_2(struct stripe_head *sh, struct raid5_percpu *percpu)
-{
-	int i, count, disks = sh->disks;
-	int syndrome_disks = sh->ddf_layout ? disks : disks-2;
-	int d0_idx = raid6_d0(sh);
-	int faila = -1, failb = -1;
-	int target = sh->ops.target;
-	int target2 = sh->ops.target2;
-	struct r5dev *tgt = &sh->dev[target];
-	struct r5dev *tgt2 = &sh->dev[target2];
-	struct dma_async_tx_descriptor *tx;
-	struct page **blocks = percpu->scribble;
-	struct async_submit_ctl submit;
-
-	pr_debug("%s: stripe %llu block1: %d block2: %d\n",
-		 __func__, (unsigned long long)sh->sector, target, target2);
-	BUG_ON(target < 0 || target2 < 0);
-	BUG_ON(!test_bit(R5_Wantcompute, &tgt->flags));
-	BUG_ON(!test_bit(R5_Wantcompute, &tgt2->flags));
-
-	/* we need to open-code set_syndrome_sources to handle the
-	 * slot number conversion for 'faila' and 'failb'
-	 */
-	for (i = 0; i < disks ; i++)
-		blocks[i] = NULL;
-	count = 0;
-	i = d0_idx;
-	do {
-		int slot = raid6_idx_to_slot(i, sh, &count, syndrome_disks);
-
-		blocks[slot] = sh->dev[i].page;
-
-		if (i == target)
-			faila = slot;
-		if (i == target2)
-			failb = slot;
-		i = raid6_next_disk(i, disks);
-	} while (i != d0_idx);
-
-	BUG_ON(faila == failb);
-	if (failb < faila)
-		swap(faila, failb);
-	pr_debug("%s: stripe: %llu faila: %d failb: %d\n",
-		 __func__, (unsigned long long)sh->sector, faila, failb);
-
-	atomic_inc(&sh->count);
-
-	if (failb == syndrome_disks+1) {
-		/* Q disk is one of the missing disks */
-		if (faila == syndrome_disks) {
-			/* Missing P+Q, just recompute */
-			init_async_submit(&submit, ASYNC_TX_FENCE, NULL,
-					  ops_complete_compute, sh,
-					  to_addr_conv(sh, percpu));
-			return async_gen_syndrome(blocks, 0, syndrome_disks+2,
-						  STRIPE_SIZE, &submit);
-		} else {
-			struct page *dest;
-			int data_target;
-			int qd_idx = sh->qd_idx;
-
-			/* Missing D+Q: recompute D from P, then recompute Q */
-			if (target == qd_idx)
-				data_target = target2;
-			else
-				data_target = target;
-
-			count = 0;
-			for (i = disks; i-- ; ) {
-				if (i == data_target || i == qd_idx)
-					continue;
-				blocks[count++] = sh->dev[i].page;
-			}
-			dest = sh->dev[data_target].page;
-			init_async_submit(&submit,
-					  ASYNC_TX_FENCE|ASYNC_TX_XOR_ZERO_DST,
-					  NULL, NULL, NULL,
-					  to_addr_conv(sh, percpu));
-			tx = async_xor(dest, blocks, 0, count, STRIPE_SIZE,
-				       &submit);
-
-			count = set_syndrome_sources(blocks, sh);
-			init_async_submit(&submit, ASYNC_TX_FENCE, tx,
-					  ops_complete_compute, sh,
-					  to_addr_conv(sh, percpu));
-			return async_gen_syndrome(blocks, 0, count+2,
-						  STRIPE_SIZE, &submit);
-		}
-	} else {
-		init_async_submit(&submit, ASYNC_TX_FENCE, NULL,
-				  ops_complete_compute, sh,
-				  to_addr_conv(sh, percpu));
-		if (failb == syndrome_disks) {
-			/* We're missing D+P. */
-			return async_raid6_datap_recov(syndrome_disks+2,
-						       STRIPE_SIZE, faila,
-						       blocks, &submit);
-		} else {
-			/* We're missing D+D. */
-			return async_raid6_2data_recov(syndrome_disks+2,
-						       STRIPE_SIZE, faila, failb,
-						       blocks, &submit);
-		}
-	}
-}
-
-
-static void ops_complete_prexor(void *stripe_head_ref)
-{
-	struct stripe_head *sh = stripe_head_ref;
-
-	pr_debug("%s: stripe %llu\n", __func__,
-		(unsigned long long)sh->sector);
-}
-
-static struct dma_async_tx_descriptor *
-ops_run_prexor(struct stripe_head *sh, struct raid5_percpu *percpu,
-	       struct dma_async_tx_descriptor *tx)
-{
-	int disks = sh->disks;
-	struct page **xor_srcs = percpu->scribble;
-	int count = 0, pd_idx = sh->pd_idx, i;
-	struct async_submit_ctl submit;
-
-	/* existing parity data subtracted */
-	struct page *xor_dest = xor_srcs[count++] = sh->dev[pd_idx].page;
-
-	pr_debug("%s: stripe %llu\n", __func__,
-		(unsigned long long)sh->sector);
-
-	for (i = disks; i--; ) {
-		struct r5dev *dev = &sh->dev[i];
-		/* Only process blocks that are known to be uptodate */
-		if (test_bit(R5_Wantdrain, &dev->flags))
-			xor_srcs[count++] = dev->page;
-	}
-
-	init_async_submit(&submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST, tx,
-			  ops_complete_prexor, sh, to_addr_conv(sh, percpu));
-	tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, &submit);
-
-	return tx;
-}
-
-static struct dma_async_tx_descriptor *
-ops_run_biodrain(struct stripe_head *sh, struct dma_async_tx_descriptor *tx)
-{
-	int disks = sh->disks;
-	int i;
-
-	pr_debug("%s: stripe %llu\n", __func__,
-		(unsigned long long)sh->sector);
-
-	for (i = disks; i--; ) {
-		struct r5dev *dev = &sh->dev[i];
-		struct bio *chosen;
-
-		if (test_and_clear_bit(R5_Wantdrain, &dev->flags)) {
-			struct bio *wbi;
-
-			spin_lock_irq(&sh->raid_conf->device_lock);
-			chosen = dev->towrite;
-			dev->towrite = NULL;
-			BUG_ON(dev->written);
-			wbi = dev->written = chosen;
-			spin_unlock_irq(&sh->raid_conf->device_lock);
-
-			while (wbi && wbi->bi_sector <
-				dev->sector + STRIPE_SECTORS) {
-				if (wbi->bi_rw & REQ_FUA)
-					set_bit(R5_WantFUA, &dev->flags);
-				tx = async_copy_data(1, wbi, dev->page,
-					dev->sector, tx);
-				wbi = r5_next_bio(wbi, dev->sector);
-			}
-		}
-	}
-
-	return tx;
-}
-
-static void ops_complete_reconstruct(void *stripe_head_ref)
-{
-	struct stripe_head *sh = stripe_head_ref;
-	int disks = sh->disks;
-	int pd_idx = sh->pd_idx;
-	int qd_idx = sh->qd_idx;
-	int i;
-	bool fua = false;
-
-	pr_debug("%s: stripe %llu\n", __func__,
-		(unsigned long long)sh->sector);
-
-	for (i = disks; i--; )
-		fua |= test_bit(R5_WantFUA, &sh->dev[i].flags);
-
-	for (i = disks; i--; ) {
-		struct r5dev *dev = &sh->dev[i];
-
-		if (dev->written || i == pd_idx || i == qd_idx) {
-			set_bit(R5_UPTODATE, &dev->flags);
-			if (fua)
-				set_bit(R5_WantFUA, &dev->flags);
-		}
-	}
-
-	if (sh->reconstruct_state == reconstruct_state_drain_run)
-		sh->reconstruct_state = reconstruct_state_drain_result;
-	else if (sh->reconstruct_state == reconstruct_state_prexor_drain_run)
-		sh->reconstruct_state = reconstruct_state_prexor_drain_result;
-	else {
-		BUG_ON(sh->reconstruct_state != reconstruct_state_run);
-		sh->reconstruct_state = reconstruct_state_result;
-	}
-
-	set_bit(STRIPE_HANDLE, &sh->state);
-	release_stripe(sh);
-}
-
-static void
-ops_run_reconstruct5(struct stripe_head *sh, struct raid5_percpu *percpu,
-		     struct dma_async_tx_descriptor *tx)
-{
-	int disks = sh->disks;
-	struct page **xor_srcs = percpu->scribble;
-	struct async_submit_ctl submit;
-	int count = 0, pd_idx = sh->pd_idx, i;
-	struct page *xor_dest;
-	int prexor = 0;
-	unsigned long flags;
-
-	pr_debug("%s: stripe %llu\n", __func__,
-		(unsigned long long)sh->sector);
-
-	/* check if prexor is active which means only process blocks
-	 * that are part of a read-modify-write (written)
-	 */
-	if (sh->reconstruct_state == reconstruct_state_prexor_drain_run) {
-		prexor = 1;
-		xor_dest = xor_srcs[count++] = sh->dev[pd_idx].page;
-		for (i = disks; i--; ) {
-			struct r5dev *dev = &sh->dev[i];
-			if (dev->written)
-				xor_srcs[count++] = dev->page;
-		}
-	} else {
-		xor_dest = sh->dev[pd_idx].page;
-		for (i = disks; i--; ) {
-			struct r5dev *dev = &sh->dev[i];
-			if (i != pd_idx)
-				xor_srcs[count++] = dev->page;
-		}
-	}
-
-	/* 1/ if we prexor'd then the dest is reused as a source
-	 * 2/ if we did not prexor then we are redoing the parity
-	 * set ASYNC_TX_XOR_DROP_DST and ASYNC_TX_XOR_ZERO_DST
-	 * for the synchronous xor case
-	 */
-	flags = ASYNC_TX_ACK |
-		(prexor ? ASYNC_TX_XOR_DROP_DST : ASYNC_TX_XOR_ZERO_DST);
-
-	atomic_inc(&sh->count);
-
-	init_async_submit(&submit, flags, tx, ops_complete_reconstruct, sh,
-			  to_addr_conv(sh, percpu));
-	if (unlikely(count == 1))
-		tx = async_memcpy(xor_dest, xor_srcs[0], 0, 0, STRIPE_SIZE, &submit);
-	else
-		tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, &submit);
-}
-
-static void
-ops_run_reconstruct6(struct stripe_head *sh, struct raid5_percpu *percpu,
-		     struct dma_async_tx_descriptor *tx)
-{
-	struct async_submit_ctl submit;
-	struct page **blocks = percpu->scribble;
-	int count;
-
-	pr_debug("%s: stripe %llu\n", __func__, (unsigned long long)sh->sector);
-
-	count = set_syndrome_sources(blocks, sh);
-
-	atomic_inc(&sh->count);
-
-	init_async_submit(&submit, ASYNC_TX_ACK, tx, ops_complete_reconstruct,
-			  sh, to_addr_conv(sh, percpu));
-	async_gen_syndrome(blocks, 0, count+2, STRIPE_SIZE,  &submit);
-}
-
-static void ops_complete_check(void *stripe_head_ref)
-{
-	struct stripe_head *sh = stripe_head_ref;
-
-	pr_debug("%s: stripe %llu\n", __func__,
-		(unsigned long long)sh->sector);
-
-	sh->check_state = check_state_check_result;
-	set_bit(STRIPE_HANDLE, &sh->state);
-	release_stripe(sh);
-}
-
-static void ops_run_check_p(struct stripe_head *sh, struct raid5_percpu *percpu)
-{
-	int disks = sh->disks;
-	int pd_idx = sh->pd_idx;
-	int qd_idx = sh->qd_idx;
-	struct page *xor_dest;
-	struct page **xor_srcs = percpu->scribble;
-	struct dma_async_tx_descriptor *tx;
-	struct async_submit_ctl submit;
-	int count;
-	int i;
-
-	pr_debug("%s: stripe %llu\n", __func__,
-		(unsigned long long)sh->sector);
-
-	count = 0;
-	xor_dest = sh->dev[pd_idx].page;
-	xor_srcs[count++] = xor_dest;
-	for (i = disks; i--; ) {
-		if (i == pd_idx || i == qd_idx)
-			continue;
-		xor_srcs[count++] = sh->dev[i].page;
-	}
-
-	init_async_submit(&submit, 0, NULL, NULL, NULL,
-			  to_addr_conv(sh, percpu));
-	tx = async_xor_val(xor_dest, xor_srcs, 0, count, STRIPE_SIZE,
-			   &sh->ops.zero_sum_result, &submit);
-
-	atomic_inc(&sh->count);
-	init_async_submit(&submit, ASYNC_TX_ACK, tx, ops_complete_check, sh, NULL);
-	tx = async_trigger_callback(&submit);
-}
-
-static void ops_run_check_pq(struct stripe_head *sh, struct raid5_percpu *percpu, int checkp)
-{
-	struct page **srcs = percpu->scribble;
-	struct async_submit_ctl submit;
-	int count;
-
-	pr_debug("%s: stripe %llu checkp: %d\n", __func__,
-		(unsigned long long)sh->sector, checkp);
-
-	count = set_syndrome_sources(srcs, sh);
-	if (!checkp)
-		srcs[count] = NULL;
-
-	atomic_inc(&sh->count);
-	init_async_submit(&submit, ASYNC_TX_ACK, NULL, ops_complete_check,
-			  sh, to_addr_conv(sh, percpu));
-	async_syndrome_val(srcs, 0, count+2, STRIPE_SIZE,
-			   &sh->ops.zero_sum_result, percpu->spare_page, &submit);
-}
-
-static void __raid_run_ops(struct stripe_head *sh, unsigned long ops_request)
-{
-	int overlap_clear = 0, i, disks = sh->disks;
-	struct dma_async_tx_descriptor *tx = NULL;
-	struct r5conf *conf = sh->raid_conf;
-	int level = conf->level;
-	struct raid5_percpu *percpu;
-	unsigned long cpu;
-
-	cpu = get_cpu();
-	percpu = per_cpu_ptr(conf->percpu, cpu);
-	if (test_bit(STRIPE_OP_BIOFILL, &ops_request)) {
-		ops_run_biofill(sh);
-		overlap_clear++;
-	}
-
-	if (test_bit(STRIPE_OP_COMPUTE_BLK, &ops_request)) {
-		if (level < 6)
-			tx = ops_run_compute5(sh, percpu);
-		else {
-			if (sh->ops.target2 < 0 || sh->ops.target < 0)
-				tx = ops_run_compute6_1(sh, percpu);
-			else
-				tx = ops_run_compute6_2(sh, percpu);
-		}
-		/* terminate the chain if reconstruct is not set to be run */
-		if (tx && !test_bit(STRIPE_OP_RECONSTRUCT, &ops_request))
-			async_tx_ack(tx);
-	}
-
-	if (test_bit(STRIPE_OP_PREXOR, &ops_request))
-		tx = ops_run_prexor(sh, percpu, tx);
-
-	if (test_bit(STRIPE_OP_BIODRAIN, &ops_request)) {
-		tx = ops_run_biodrain(sh, tx);
-		overlap_clear++;
-	}
-
-	if (test_bit(STRIPE_OP_RECONSTRUCT, &ops_request)) {
-		if (level < 6)
-			ops_run_reconstruct5(sh, percpu, tx);
-		else
-			ops_run_reconstruct6(sh, percpu, tx);
-	}
-
-	if (test_bit(STRIPE_OP_CHECK, &ops_request)) {
-		if (sh->check_state == check_state_run)
-			ops_run_check_p(sh, percpu);
-		else if (sh->check_state == check_state_run_q)
-			ops_run_check_pq(sh, percpu, 0);
-		else if (sh->check_state == check_state_run_pq)
-			ops_run_check_pq(sh, percpu, 1);
-		else
-			BUG();
-	}
-
-	if (overlap_clear)
-		for (i = disks; i--; ) {
-			struct r5dev *dev = &sh->dev[i];
-			if (test_and_clear_bit(R5_Overlap, &dev->flags))
-				wake_up(&sh->raid_conf->wait_for_overlap);
-		}
-	put_cpu();
-}
-
-#ifdef CONFIG_MULTICORE_RAID456
-static void async_run_ops(void *param, async_cookie_t cookie)
-{
-	struct stripe_head *sh = param;
-	unsigned long ops_request = sh->ops.request;
-
-	clear_bit_unlock(STRIPE_OPS_REQ_PENDING, &sh->state);
-	wake_up(&sh->ops.wait_for_ops);
-
-	__raid_run_ops(sh, ops_request);
-	release_stripe(sh);
-}
-
-static void raid_run_ops(struct stripe_head *sh, unsigned long ops_request)
-{
-	/* since handle_stripe can be called outside of raid5d context
-	 * we need to ensure sh->ops.request is de-staged before another
-	 * request arrives
-	 */
-	wait_event(sh->ops.wait_for_ops,
-		   !test_and_set_bit_lock(STRIPE_OPS_REQ_PENDING, &sh->state));
-	sh->ops.request = ops_request;
-
-	atomic_inc(&sh->count);
-	async_schedule(async_run_ops, sh);
-}
-#else
-#define raid_run_ops __raid_run_ops
-#endif
-
-static int grow_one_stripe(struct r5conf *conf)
-{
-	struct stripe_head *sh;
-	sh = kmem_cache_zalloc(conf->slab_cache, GFP_KERNEL);
-	if (!sh)
-		return 0;
-
-	sh->raid_conf = conf;
-	#ifdef CONFIG_MULTICORE_RAID456
-	init_waitqueue_head(&sh->ops.wait_for_ops);
-	#endif
-
-	if (grow_buffers(sh)) {
-		shrink_buffers(sh);
-		kmem_cache_free(conf->slab_cache, sh);
-		return 0;
-	}
-	/* we just created an active stripe so... */
-	atomic_set(&sh->count, 1);
-	atomic_inc(&conf->active_stripes);
-	INIT_LIST_HEAD(&sh->lru);
-	release_stripe(sh);
-	return 1;
-}
-
-static int grow_stripes(struct r5conf *conf, int num)
-{
-	struct kmem_cache *sc;
-	int devs = max(conf->raid_disks, conf->previous_raid_disks);
-
-	if (conf->mddev->gendisk)
-		sprintf(conf->cache_name[0],
-			"raid%d-%s", conf->level, mdname(conf->mddev));
-	else
-		sprintf(conf->cache_name[0],
-			"raid%d-%p", conf->level, conf->mddev);
-	sprintf(conf->cache_name[1], "%s-alt", conf->cache_name[0]);
-
-	conf->active_name = 0;
-	sc = kmem_cache_create(conf->cache_name[conf->active_name],
-			       sizeof(struct stripe_head)+(devs-1)*sizeof(struct r5dev),
-			       0, 0, NULL);
-	if (!sc)
-		return 1;
-	conf->slab_cache = sc;
-	conf->pool_size = devs;
-	while (num--)
-		if (!grow_one_stripe(conf))
-			return 1;
-	return 0;
-}
-
-/**
- * scribble_len - return the required size of the scribble region
- * @num - total number of disks in the array
- *
- * The size must be enough to contain:
- * 1/ a struct page pointer for each device in the array +2
- * 2/ room to convert each entry in (1) to its corresponding dma
- *    (dma_map_page()) or page (page_address()) address.
- *
- * Note: the +2 is for the destination buffers of the ddf/raid6 case where we
- * calculate over all devices (not just the data blocks), using zeros in place
- * of the P and Q blocks.
- */
-static size_t scribble_len(int num)
-{
-	size_t len;
-
-	len = sizeof(struct page *) * (num+2) + sizeof(addr_conv_t) * (num+2);
-
-	return len;
-}
-
-static int resize_stripes(struct r5conf *conf, int newsize)
-{
-	/* Make all the stripes able to hold 'newsize' devices.
-	 * New slots in each stripe get 'page' set to a new page.
-	 *
-	 * This happens in stages:
-	 * 1/ create a new kmem_cache and allocate the required number of
-	 *    stripe_heads.
-	 * 2/ gather all the old stripe_heads and tranfer the pages across
-	 *    to the new stripe_heads.  This will have the side effect of
-	 *    freezing the array as once all stripe_heads have been collected,
-	 *    no IO will be possible.  Old stripe heads are freed once their
-	 *    pages have been transferred over, and the old kmem_cache is
-	 *    freed when all stripes are done.
-	 * 3/ reallocate conf->disks to be suitable bigger.  If this fails,
-	 *    we simple return a failre status - no need to clean anything up.
-	 * 4/ allocate new pages for the new slots in the new stripe_heads.
-	 *    If this fails, we don't bother trying the shrink the
-	 *    stripe_heads down again, we just leave them as they are.
-	 *    As each stripe_head is processed the new one is released into
-	 *    active service.
-	 *
-	 * Once step2 is started, we cannot afford to wait for a write,
-	 * so we use GFP_NOIO allocations.
-	 */
-	struct stripe_head *osh, *nsh;
-	LIST_HEAD(newstripes);
-	struct disk_info *ndisks;
-	unsigned long cpu;
-	int err;
-	struct kmem_cache *sc;
-	int i;
-
-	if (newsize <= conf->pool_size)
-		return 0; /* never bother to shrink */
-
-	err = md_allow_write(conf->mddev);
-	if (err)
-		return err;
-
-	/* Step 1 */
-	sc = kmem_cache_create(conf->cache_name[1-conf->active_name],
-			       sizeof(struct stripe_head)+(newsize-1)*sizeof(struct r5dev),
-			       0, 0, NULL);
-	if (!sc)
-		return -ENOMEM;
-
-	for (i = conf->max_nr_stripes; i; i--) {
-		nsh = kmem_cache_zalloc(sc, GFP_KERNEL);
-		if (!nsh)
-			break;
-
-		nsh->raid_conf = conf;
-		#ifdef CONFIG_MULTICORE_RAID456
-		init_waitqueue_head(&nsh->ops.wait_for_ops);
-		#endif
-
-		list_add(&nsh->lru, &newstripes);
-	}
-	if (i) {
-		/* didn't get enough, give up */
-		while (!list_empty(&newstripes)) {
-			nsh = list_entry(newstripes.next, struct stripe_head, lru);
-			list_del(&nsh->lru);
-			kmem_cache_free(sc, nsh);
-		}
-		kmem_cache_destroy(sc);
-		return -ENOMEM;
-	}
-	/* Step 2 - Must use GFP_NOIO now.
-	 * OK, we have enough stripes, start collecting inactive
-	 * stripes and copying them over
-	 */
-	list_for_each_entry(nsh, &newstripes, lru) {
-		spin_lock_irq(&conf->device_lock);
-		wait_event_lock_irq(conf->wait_for_stripe,
-				    !list_empty(&conf->inactive_list),
-				    conf->device_lock,
-				    );
-		osh = get_free_stripe(conf);
-		spin_unlock_irq(&conf->device_lock);
-		atomic_set(&nsh->count, 1);
-		for(i=0; i<conf->pool_size; i++)
-			nsh->dev[i].page = osh->dev[i].page;
-		for( ; i<newsize; i++)
-			nsh->dev[i].page = NULL;
-		kmem_cache_free(conf->slab_cache, osh);
-	}
-	kmem_cache_destroy(conf->slab_cache);
-
-	/* Step 3.
-	 * At this point, we are holding all the stripes so the array
-	 * is completely stalled, so now is a good time to resize
-	 * conf->disks and the scribble region
-	 */
-	ndisks = kzalloc(newsize * sizeof(struct disk_info), GFP_NOIO);
-	if (ndisks) {
-		for (i=0; i<conf->raid_disks; i++)
-			ndisks[i] = conf->disks[i];
-		kfree(conf->disks);
-		conf->disks = ndisks;
-	} else
-		err = -ENOMEM;
-
-	get_online_cpus();
-	conf->scribble_len = scribble_len(newsize);
-	for_each_present_cpu(cpu) {
-		struct raid5_percpu *percpu;
-		void *scribble;
-
-		percpu = per_cpu_ptr(conf->percpu, cpu);
-		scribble = kmalloc(conf->scribble_len, GFP_NOIO);
-
-		if (scribble) {
-			kfree(percpu->scribble);
-			percpu->scribble = scribble;
-		} else {
-			err = -ENOMEM;
-			break;
-		}
-	}
-	put_online_cpus();
-
-	/* Step 4, return new stripes to service */
-	while(!list_empty(&newstripes)) {
-		nsh = list_entry(newstripes.next, struct stripe_head, lru);
-		list_del_init(&nsh->lru);
-
-		for (i=conf->raid_disks; i < newsize; i++)
-			if (nsh->dev[i].page == NULL) {
-				struct page *p = alloc_page(GFP_NOIO);
-				nsh->dev[i].page = p;
-				if (!p)
-					err = -ENOMEM;
-			}
-		release_stripe(nsh);
-	}
-	/* critical section pass, GFP_NOIO no longer needed */
-
-	conf->slab_cache = sc;
-	conf->active_name = 1-conf->active_name;
-	conf->pool_size = newsize;
-	return err;
-}
-
-static int drop_one_stripe(struct r5conf *conf)
-{
-	struct stripe_head *sh;
-
-	spin_lock_irq(&conf->device_lock);
-	sh = get_free_stripe(conf);
-	spin_unlock_irq(&conf->device_lock);
-	if (!sh)
-		return 0;
-	BUG_ON(atomic_read(&sh->count));
-	shrink_buffers(sh);
-	kmem_cache_free(conf->slab_cache, sh);
-	atomic_dec(&conf->active_stripes);
-	return 1;
-}
-
-static void shrink_stripes(struct r5conf *conf)
-{
-	while (drop_one_stripe(conf))
-		;
-
-	if (conf->slab_cache)
-		kmem_cache_destroy(conf->slab_cache);
-	conf->slab_cache = NULL;
-}
-
-static void raid5_end_read_request(struct bio * bi, int error)
-{
-	struct stripe_head *sh = bi->bi_private;
-	struct r5conf *conf = sh->raid_conf;
-	int disks = sh->disks, i;
-	int uptodate = test_bit(BIO_UPTODATE, &bi->bi_flags);
-	char b[BDEVNAME_SIZE];
-	struct md_rdev *rdev = NULL;
-
-
-	for (i=0 ; i<disks; i++)
-		if (bi == &sh->dev[i].req)
-			break;
-
-	pr_debug("end_read_request %llu/%d, count: %d, uptodate %d.\n",
-		(unsigned long long)sh->sector, i, atomic_read(&sh->count),
-		uptodate);
-	if (i == disks) {
-		BUG();
-		return;
-	}
-	if (test_bit(R5_ReadRepl, &sh->dev[i].flags))
-		/* If replacement finished while this request was outstanding,
-		 * 'replacement' might be NULL already.
-		 * In that case it moved down to 'rdev'.
-		 * rdev is not removed until all requests are finished.
-		 */
-		rdev = conf->disks[i].replacement;
-	if (!rdev)
-		rdev = conf->disks[i].rdev;
-
-	if (uptodate) {
-		set_bit(R5_UPTODATE, &sh->dev[i].flags);
-		if (test_bit(R5_ReadError, &sh->dev[i].flags)) {
-			/* Note that this cannot happen on a
-			 * replacement device.  We just fail those on
-			 * any error
-			 */
-			printk_ratelimited(
-				KERN_INFO
-				"md/raid:%s: read error corrected"
-				" (%lu sectors at %llu on %s)\n",
-				mdname(conf->mddev), STRIPE_SECTORS,
-				(unsigned long long)(sh->sector
-						     + rdev->data_offset),
-				bdevname(rdev->bdev, b));
-			atomic_add(STRIPE_SECTORS, &rdev->corrected_errors);
-			clear_bit(R5_ReadError, &sh->dev[i].flags);
-			clear_bit(R5_ReWrite, &sh->dev[i].flags);
-		}
-		if (atomic_read(&rdev->read_errors))
-			atomic_set(&rdev->read_errors, 0);
-	} else {
-		const char *bdn = bdevname(rdev->bdev, b);
-		int retry = 0;
-
-		clear_bit(R5_UPTODATE, &sh->dev[i].flags);
-		atomic_inc(&rdev->read_errors);
-		if (test_bit(R5_ReadRepl, &sh->dev[i].flags))
-			printk_ratelimited(
-				KERN_WARNING
-				"md/raid:%s: read error on replacement device "
-				"(sector %llu on %s).\n",
-				mdname(conf->mddev),
-				(unsigned long long)(sh->sector
-						     + rdev->data_offset),
-				bdn);
-		else if (conf->mddev->degraded >= conf->max_degraded)
-			printk_ratelimited(
-				KERN_WARNING
-				"md/raid:%s: read error not correctable "
-				"(sector %llu on %s).\n",
-				mdname(conf->mddev),
-				(unsigned long long)(sh->sector
-						     + rdev->data_offset),
-				bdn);
-		else if (test_bit(R5_ReWrite, &sh->dev[i].flags))
-			/* Oh, no!!! */
-			printk_ratelimited(
-				KERN_WARNING
-				"md/raid:%s: read error NOT corrected!! "
-				"(sector %llu on %s).\n",
-				mdname(conf->mddev),
-				(unsigned long long)(sh->sector
-						     + rdev->data_offset),
-				bdn);
-		else if (atomic_read(&rdev->read_errors)
-			 > conf->max_nr_stripes)
-			printk(KERN_WARNING
-			       "md/raid:%s: Too many read errors, failing device %s.\n",
-			       mdname(conf->mddev), bdn);
-		else
-			retry = 1;
-		if (retry)
-			set_bit(R5_ReadError, &sh->dev[i].flags);
-		else {
-			clear_bit(R5_ReadError, &sh->dev[i].flags);
-			clear_bit(R5_ReWrite, &sh->dev[i].flags);
-			md_error(conf->mddev, rdev);
-		}
-	}
-	rdev_dec_pending(rdev, conf->mddev);
-	clear_bit(R5_LOCKED, &sh->dev[i].flags);
-	set_bit(STRIPE_HANDLE, &sh->state);
-	release_stripe(sh);
-}
-
-static void raid5_end_write_request(struct bio *bi, int error)
-{
-	struct stripe_head *sh = bi->bi_private;
-	struct r5conf *conf = sh->raid_conf;
-	int disks = sh->disks, i;
-	struct md_rdev *uninitialized_var(rdev);
-	int uptodate = test_bit(BIO_UPTODATE, &bi->bi_flags);
-	sector_t first_bad;
-	int bad_sectors;
-	int replacement = 0;
-
-	for (i = 0 ; i < disks; i++) {
-		if (bi == &sh->dev[i].req) {
-			rdev = conf->disks[i].rdev;
-			break;
-		}
-		if (bi == &sh->dev[i].rreq) {
-			rdev = conf->disks[i].replacement;
-			if (rdev)
-				replacement = 1;
-			else
-				/* rdev was removed and 'replacement'
-				 * replaced it.  rdev is not removed
-				 * until all requests are finished.
-				 */
-				rdev = conf->disks[i].rdev;
-			break;
-		}
-	}
-	pr_debug("end_write_request %llu/%d, count %d, uptodate: %d.\n",
-		(unsigned long long)sh->sector, i, atomic_read(&sh->count),
-		uptodate);
-	if (i == disks) {
-		BUG();
-		return;
-	}
-
-	if (replacement) {
-		if (!uptodate)
-			md_error(conf->mddev, rdev);
-		else if (is_badblock(rdev, sh->sector,
-				     STRIPE_SECTORS,
-				     &first_bad, &bad_sectors))
-			set_bit(R5_MadeGoodRepl, &sh->dev[i].flags);
-	} else {
-		if (!uptodate) {
-			set_bit(WriteErrorSeen, &rdev->flags);
-			set_bit(R5_WriteError, &sh->dev[i].flags);
-			if (!test_and_set_bit(WantReplacement, &rdev->flags))
-				set_bit(MD_RECOVERY_NEEDED,
-					&rdev->mddev->recovery);
-		} else if (is_badblock(rdev, sh->sector,
-				       STRIPE_SECTORS,
-				       &first_bad, &bad_sectors))
-			set_bit(R5_MadeGood, &sh->dev[i].flags);
-	}
-	rdev_dec_pending(rdev, conf->mddev);
-
-	if (!test_and_clear_bit(R5_DOUBLE_LOCKED, &sh->dev[i].flags))
-		clear_bit(R5_LOCKED, &sh->dev[i].flags);
-	set_bit(STRIPE_HANDLE, &sh->state);
-	release_stripe(sh);
-}
-
-static sector_t compute_blocknr(struct stripe_head *sh, int i, int previous);
-	
-static void raid5_build_block(struct stripe_head *sh, int i, int previous)
-{
-	struct r5dev *dev = &sh->dev[i];
-
-	bio_init(&dev->req);
-	dev->req.bi_io_vec = &dev->vec;
-	dev->req.bi_vcnt++;
-	dev->req.bi_max_vecs++;
-	dev->req.bi_private = sh;
-	dev->vec.bv_page = dev->page;
-
-	bio_init(&dev->rreq);
-	dev->rreq.bi_io_vec = &dev->rvec;
-	dev->rreq.bi_vcnt++;
-	dev->rreq.bi_max_vecs++;
-	dev->rreq.bi_private = sh;
-	dev->rvec.bv_page = dev->page;
-
-	dev->flags = 0;
-	dev->sector = compute_blocknr(sh, i, previous);
-}
-
-static void error(struct mddev *mddev, struct md_rdev *rdev)
-{
-	char b[BDEVNAME_SIZE];
-	struct r5conf *conf = mddev->private;
-	unsigned long flags;
-	pr_debug("raid456: error called\n");
-
-	spin_lock_irqsave(&conf->device_lock, flags);
-	clear_bit(In_sync, &rdev->flags);
-	mddev->degraded = calc_degraded(conf);
-	spin_unlock_irqrestore(&conf->device_lock, flags);
-	set_bit(MD_RECOVERY_INTR, &mddev->recovery);
-
-	set_bit(Blocked, &rdev->flags);
-	set_bit(Faulty, &rdev->flags);
-	set_bit(MD_CHANGE_DEVS, &mddev->flags);
-	printk(KERN_ALERT
-	       "md/raid:%s: Disk failure on %s, disabling device.\n"
-	       "md/raid:%s: Operation continuing on %d devices.\n",
-	       mdname(mddev),
-	       bdevname(rdev->bdev, b),
-	       mdname(mddev),
-	       conf->raid_disks - mddev->degraded);
-}
-
-/*
- * Input: a 'big' sector number,
- * Output: index of the data and parity disk, and the sector # in them.
- */
-static sector_t raid5_compute_sector(struct r5conf *conf, sector_t r_sector,
-				     int previous, int *dd_idx,
-				     struct stripe_head *sh)
-{
-	sector_t stripe, stripe2;
-	sector_t chunk_number;
-	unsigned int chunk_offset;
-	int pd_idx, qd_idx;
-	int ddf_layout = 0;
-	sector_t new_sector;
-	int algorithm = previous ? conf->prev_algo
-				 : conf->algorithm;
-	int sectors_per_chunk = previous ? conf->prev_chunk_sectors
-					 : conf->chunk_sectors;
-	int raid_disks = previous ? conf->previous_raid_disks
-				  : conf->raid_disks;
-	int data_disks = raid_disks - conf->max_degraded;
-
-	/* First compute the information on this sector */
-
-	/*
-	 * Compute the chunk number and the sector offset inside the chunk
-	 */
-	chunk_offset = sector_div(r_sector, sectors_per_chunk);
-	chunk_number = r_sector;
-
-	/*
-	 * Compute the stripe number
-	 */
-	stripe = chunk_number;
-	*dd_idx = sector_div(stripe, data_disks);
-	stripe2 = stripe;
-	/*
-	 * Select the parity disk based on the user selected algorithm.
-	 */
-	pd_idx = qd_idx = -1;
-	switch(conf->level) {
-	case 4:
-		pd_idx = data_disks;
-		break;
-	case 5:
-		switch (algorithm) {
-		case ALGORITHM_LEFT_ASYMMETRIC:
-			pd_idx = data_disks - sector_div(stripe2, raid_disks);
-			if (*dd_idx >= pd_idx)
-				(*dd_idx)++;
-			break;
-		case ALGORITHM_RIGHT_ASYMMETRIC:
-			pd_idx = sector_div(stripe2, raid_disks);
-			if (*dd_idx >= pd_idx)
-				(*dd_idx)++;
-			break;
-		case ALGORITHM_LEFT_SYMMETRIC:
-			pd_idx = data_disks - sector_div(stripe2, raid_disks);
-			*dd_idx = (pd_idx + 1 + *dd_idx) % raid_disks;
-			break;
-		case ALGORITHM_RIGHT_SYMMETRIC:
-			pd_idx = sector_div(stripe2, raid_disks);
-			*dd_idx = (pd_idx + 1 + *dd_idx) % raid_disks;
-			break;
-		case ALGORITHM_PARITY_0:
-			pd_idx = 0;
-			(*dd_idx)++;
-			break;
-		case ALGORITHM_PARITY_N:
-			pd_idx = data_disks;
-			break;
-		default:
-			BUG();
-		}
-		break;
-	case 6:
-
-		switch (algorithm) {
-		case ALGORITHM_LEFT_ASYMMETRIC:
-			pd_idx = raid_disks - 1 - sector_div(stripe2, raid_disks);
-			qd_idx = pd_idx + 1;
-			if (pd_idx == raid_disks-1) {
-				(*dd_idx)++;	/* Q D D D P */
-				qd_idx = 0;
-			} else if (*dd_idx >= pd_idx)
-				(*dd_idx) += 2; /* D D P Q D */
-			break;
-		case ALGORITHM_RIGHT_ASYMMETRIC:
-			pd_idx = sector_div(stripe2, raid_disks);
-			qd_idx = pd_idx + 1;
-			if (pd_idx == raid_disks-1) {
-				(*dd_idx)++;	/* Q D D D P */
-				qd_idx = 0;
-			} else if (*dd_idx >= pd_idx)
-				(*dd_idx) += 2; /* D D P Q D */
-			break;
-		case ALGORITHM_LEFT_SYMMETRIC:
-			pd_idx = raid_disks - 1 - sector_div(stripe2, raid_disks);
-			qd_idx = (pd_idx + 1) % raid_disks;
-			*dd_idx = (pd_idx + 2 + *dd_idx) % raid_disks;
-			break;
-		case ALGORITHM_RIGHT_SYMMETRIC:
-			pd_idx = sector_div(stripe2, raid_disks);
-			qd_idx = (pd_idx + 1) % raid_disks;
-			*dd_idx = (pd_idx + 2 + *dd_idx) % raid_disks;
-			break;
-
-		case ALGORITHM_PARITY_0:
-			pd_idx = 0;
-			qd_idx = 1;
-			(*dd_idx) += 2;
-			break;
-		case ALGORITHM_PARITY_N:
-			pd_idx = data_disks;
-			qd_idx = data_disks + 1;
-			break;
-
-		case ALGORITHM_ROTATING_ZERO_RESTART:
-			/* Exactly the same as RIGHT_ASYMMETRIC, but or
-			 * of blocks for computing Q is different.
-			 */
-			pd_idx = sector_div(stripe2, raid_disks);
-			qd_idx = pd_idx + 1;
-			if (pd_idx == raid_disks-1) {
-				(*dd_idx)++;	/* Q D D D P */
-				qd_idx = 0;
-			} else if (*dd_idx >= pd_idx)
-				(*dd_idx) += 2; /* D D P Q D */
-			ddf_layout = 1;
-			break;
-
-		case ALGORITHM_ROTATING_N_RESTART:
-			/* Same a left_asymmetric, by first stripe is
-			 * D D D P Q  rather than
-			 * Q D D D P
-			 */
-			stripe2 += 1;
-			pd_idx = raid_disks - 1 - sector_div(stripe2, raid_disks);
-			qd_idx = pd_idx + 1;
-			if (pd_idx == raid_disks-1) {
-				(*dd_idx)++;	/* Q D D D P */
-				qd_idx = 0;
-			} else if (*dd_idx >= pd_idx)
-				(*dd_idx) += 2; /* D D P Q D */
-			ddf_layout = 1;
-			break;
-
-		case ALGORITHM_ROTATING_N_CONTINUE:
-			/* Same as left_symmetric but Q is before P */
-			pd_idx = raid_disks - 1 - sector_div(stripe2, raid_disks);
-			qd_idx = (pd_idx + raid_disks - 1) % raid_disks;
-			*dd_idx = (pd_idx + 1 + *dd_idx) % raid_disks;
-			ddf_layout = 1;
-			break;
-
-		case ALGORITHM_LEFT_ASYMMETRIC_6:
-			/* RAID5 left_asymmetric, with Q on last device */
-			pd_idx = data_disks - sector_div(stripe2, raid_disks-1);
-			if (*dd_idx >= pd_idx)
-				(*dd_idx)++;
-			qd_idx = raid_disks - 1;
-			break;
-
-		case ALGORITHM_RIGHT_ASYMMETRIC_6:
-			pd_idx = sector_div(stripe2, raid_disks-1);
-			if (*dd_idx >= pd_idx)
-				(*dd_idx)++;
-			qd_idx = raid_disks - 1;
-			break;
-
-		case ALGORITHM_LEFT_SYMMETRIC_6:
-			pd_idx = data_disks - sector_div(stripe2, raid_disks-1);
-			*dd_idx = (pd_idx + 1 + *dd_idx) % (raid_disks-1);
-			qd_idx = raid_disks - 1;
-			break;
-
-		case ALGORITHM_RIGHT_SYMMETRIC_6:
-			pd_idx = sector_div(stripe2, raid_disks-1);
-			*dd_idx = (pd_idx + 1 + *dd_idx) % (raid_disks-1);
-			qd_idx = raid_disks - 1;
-			break;
-
-		case ALGORITHM_PARITY_0_6:
-			pd_idx = 0;
-			(*dd_idx)++;
-			qd_idx = raid_disks - 1;
-			break;
-
-		default:
-			BUG();
-		}
-		break;
-	}
-
-	if (sh) {
-		sh->pd_idx = pd_idx;
-		sh->qd_idx = qd_idx;
-		sh->ddf_layout = ddf_layout;
-	}
-	/*
-	 * Finally, compute the new sector number
-	 */
-	new_sector = (sector_t)stripe * sectors_per_chunk + chunk_offset;
-	return new_sector;
-}
-
-
-static sector_t compute_blocknr(struct stripe_head *sh, int i, int previous)
-{
-	struct r5conf *conf = sh->raid_conf;
-	int raid_disks = sh->disks;
-	int data_disks = raid_disks - conf->max_degraded;
-	sector_t new_sector = sh->sector, check;
-	int sectors_per_chunk = previous ? conf->prev_chunk_sectors
-					 : conf->chunk_sectors;
-	int algorithm = previous ? conf->prev_algo
-				 : conf->algorithm;
-	sector_t stripe;
-	int chunk_offset;
-	sector_t chunk_number;
-	int dummy1, dd_idx = i;
-	sector_t r_sector;
-	struct stripe_head sh2;
-
-
-	chunk_offset = sector_div(new_sector, sectors_per_chunk);
-	stripe = new_sector;
-
-	if (i == sh->pd_idx)
-		return 0;
-	switch(conf->level) {
-	case 4: break;
-	case 5:
-		switch (algorithm) {
-		case ALGORITHM_LEFT_ASYMMETRIC:
-		case ALGORITHM_RIGHT_ASYMMETRIC:
-			if (i > sh->pd_idx)
-				i--;
-			break;
-		case ALGORITHM_LEFT_SYMMETRIC:
-		case ALGORITHM_RIGHT_SYMMETRIC:
-			if (i < sh->pd_idx)
-				i += raid_disks;
-			i -= (sh->pd_idx + 1);
-			break;
-		case ALGORITHM_PARITY_0:
-			i -= 1;
-			break;
-		case ALGORITHM_PARITY_N:
-			break;
-		default:
-			BUG();
-		}
-		break;
-	case 6:
-		if (i == sh->qd_idx)
-			return 0; /* It is the Q disk */
-		switch (algorithm) {
-		case ALGORITHM_LEFT_ASYMMETRIC:
-		case ALGORITHM_RIGHT_ASYMMETRIC:
-		case ALGORITHM_ROTATING_ZERO_RESTART:
-		case ALGORITHM_ROTATING_N_RESTART:
-			if (sh->pd_idx == raid_disks-1)
-				i--;	/* Q D D D P */
-			else if (i > sh->pd_idx)
-				i -= 2; /* D D P Q D */
-			break;
-		case ALGORITHM_LEFT_SYMMETRIC:
-		case ALGORITHM_RIGHT_SYMMETRIC:
-			if (sh->pd_idx == raid_disks-1)
-				i--; /* Q D D D P */
-			else {
-				/* D D P Q D */
-				if (i < sh->pd_idx)
-					i += raid_disks;
-				i -= (sh->pd_idx + 2);
-			}
-			break;
-		case ALGORITHM_PARITY_0:
-			i -= 2;
-			break;
-		case ALGORITHM_PARITY_N:
-			break;
-		case ALGORITHM_ROTATING_N_CONTINUE:
-			/* Like left_symmetric, but P is before Q */
-			if (sh->pd_idx == 0)
-				i--;	/* P D D D Q */
-			else {
-				/* D D Q P D */
-				if (i < sh->pd_idx)
-					i += raid_disks;
-				i -= (sh->pd_idx + 1);
-			}
-			break;
-		case ALGORITHM_LEFT_ASYMMETRIC_6:
-		case ALGORITHM_RIGHT_ASYMMETRIC_6:
-			if (i > sh->pd_idx)
-				i--;
-			break;
-		case ALGORITHM_LEFT_SYMMETRIC_6:
-		case ALGORITHM_RIGHT_SYMMETRIC_6:
-			if (i < sh->pd_idx)
-				i += data_disks + 1;
-			i -= (sh->pd_idx + 1);
-			break;
-		case ALGORITHM_PARITY_0_6:
-			i -= 1;
-			break;
-		default:
-			BUG();
-		}
-		break;
-	}
-
-	chunk_number = stripe * data_disks + i;
-	r_sector = chunk_number * sectors_per_chunk + chunk_offset;
-
-	check = raid5_compute_sector(conf, r_sector,
-				     previous, &dummy1, &sh2);
-	if (check != sh->sector || dummy1 != dd_idx || sh2.pd_idx != sh->pd_idx
-		|| sh2.qd_idx != sh->qd_idx) {
-		printk(KERN_ERR "md/raid:%s: compute_blocknr: map not correct\n",
-		       mdname(conf->mddev));
-		return 0;
-	}
-	return r_sector;
-}
-
-
-static void
-schedule_reconstruction(struct stripe_head *sh, struct stripe_head_state *s,
-			 int rcw, int expand)
-{
-	int i, pd_idx = sh->pd_idx, disks = sh->disks;
-	struct r5conf *conf = sh->raid_conf;
-	int level = conf->level;
-
-	if (rcw) {
-		/* if we are not expanding this is a proper write request, and
-		 * there will be bios with new data to be drained into the
-		 * stripe cache
-		 */
-		if (!expand) {
-			sh->reconstruct_state = reconstruct_state_drain_run;
-			set_bit(STRIPE_OP_BIODRAIN, &s->ops_request);
-		} else
-			sh->reconstruct_state = reconstruct_state_run;
-
-		set_bit(STRIPE_OP_RECONSTRUCT, &s->ops_request);
-
-		for (i = disks; i--; ) {
-			struct r5dev *dev = &sh->dev[i];
-
-			if (dev->towrite) {
-				set_bit(R5_LOCKED, &dev->flags);
-				set_bit(R5_Wantdrain, &dev->flags);
-				if (!expand)
-					clear_bit(R5_UPTODATE, &dev->flags);
-				s->locked++;
-			}
-		}
-		if (s->locked + conf->max_degraded == disks)
-			if (!test_and_set_bit(STRIPE_FULL_WRITE, &sh->state))
-				atomic_inc(&conf->pending_full_writes);
-	} else {
-		BUG_ON(level == 6);
-		BUG_ON(!(test_bit(R5_UPTODATE, &sh->dev[pd_idx].flags) ||
-			test_bit(R5_Wantcompute, &sh->dev[pd_idx].flags)));
-
-		sh->reconstruct_state = reconstruct_state_prexor_drain_run;
-		set_bit(STRIPE_OP_PREXOR, &s->ops_request);
-		set_bit(STRIPE_OP_BIODRAIN, &s->ops_request);
-		set_bit(STRIPE_OP_RECONSTRUCT, &s->ops_request);
-
-		for (i = disks; i--; ) {
-			struct r5dev *dev = &sh->dev[i];
-			if (i == pd_idx)
-				continue;
-
-			if (dev->towrite &&
-			    (test_bit(R5_UPTODATE, &dev->flags) ||
-			     test_bit(R5_Wantcompute, &dev->flags))) {
-				set_bit(R5_Wantdrain, &dev->flags);
-				set_bit(R5_LOCKED, &dev->flags);
-				clear_bit(R5_UPTODATE, &dev->flags);
-				s->locked++;
-			}
-		}
-	}
-
-	/* keep the parity disk(s) locked while asynchronous operations
-	 * are in flight
-	 */
-	set_bit(R5_LOCKED, &sh->dev[pd_idx].flags);
-	clear_bit(R5_UPTODATE, &sh->dev[pd_idx].flags);
-	s->locked++;
-
-	if (level == 6) {
-		int qd_idx = sh->qd_idx;
-		struct r5dev *dev = &sh->dev[qd_idx];
-
-		set_bit(R5_LOCKED, &dev->flags);
-		clear_bit(R5_UPTODATE, &dev->flags);
-		s->locked++;
-	}
-
-	pr_debug("%s: stripe %llu locked: %d ops_request: %lx\n",
-		__func__, (unsigned long long)sh->sector,
-		s->locked, s->ops_request);
-}
-
-/*
- * Each stripe/dev can have one or more bion attached.
- * toread/towrite point to the first in a chain.
- * The bi_next chain must be in order.
- */
-static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, int forwrite)
-{
-	struct bio **bip;
-	struct r5conf *conf = sh->raid_conf;
-	int firstwrite=0;
-
-	pr_debug("adding bi b#%llu to stripe s#%llu\n",
-		(unsigned long long)bi->bi_sector,
-		(unsigned long long)sh->sector);
-
-
-	spin_lock_irq(&conf->device_lock);
-	if (forwrite) {
-		bip = &sh->dev[dd_idx].towrite;
-		if (*bip == NULL && sh->dev[dd_idx].written == NULL)
-			firstwrite = 1;
-	} else
-		bip = &sh->dev[dd_idx].toread;
-	while (*bip && (*bip)->bi_sector < bi->bi_sector) {
-		if ((*bip)->bi_sector + ((*bip)->bi_size >> 9) > bi->bi_sector)
-			goto overlap;
-		bip = & (*bip)->bi_next;
-	}
-	if (*bip && (*bip)->bi_sector < bi->bi_sector + ((bi->bi_size)>>9))
-		goto overlap;
-
-	BUG_ON(*bip && bi->bi_next && (*bip) != bi->bi_next);
-	if (*bip)
-		bi->bi_next = *bip;
-	*bip = bi;
-	bi->bi_phys_segments++;
-
-	if (forwrite) {
-		/* check if page is covered */
-		sector_t sector = sh->dev[dd_idx].sector;
-		for (bi=sh->dev[dd_idx].towrite;
-		     sector < sh->dev[dd_idx].sector + STRIPE_SECTORS &&
-			     bi && bi->bi_sector <= sector;
-		     bi = r5_next_bio(bi, sh->dev[dd_idx].sector)) {
-			if (bi->bi_sector + (bi->bi_size>>9) >= sector)
-				sector = bi->bi_sector + (bi->bi_size>>9);
-		}
-		if (sector >= sh->dev[dd_idx].sector + STRIPE_SECTORS)
-			set_bit(R5_OVERWRITE, &sh->dev[dd_idx].flags);
-	}
-	spin_unlock_irq(&conf->device_lock);
-
-	pr_debug("added bi b#%llu to stripe s#%llu, disk %d.\n",
-		(unsigned long long)(*bip)->bi_sector,
-		(unsigned long long)sh->sector, dd_idx);
-
-	if (conf->mddev->bitmap && firstwrite) {
-		bitmap_startwrite(conf->mddev->bitmap, sh->sector,
-				  STRIPE_SECTORS, 0);
-		sh->bm_seq = conf->seq_flush+1;
-		set_bit(STRIPE_BIT_DELAY, &sh->state);
-	}
-	return 1;
-
- overlap:
-	set_bit(R5_Overlap, &sh->dev[dd_idx].flags);
-	spin_unlock_irq(&conf->device_lock);
-	return 0;
-}
-
-static void end_reshape(struct r5conf *conf);
-
-static void stripe_set_idx(sector_t stripe, struct r5conf *conf, int previous,
-			    struct stripe_head *sh)
-{
-	int sectors_per_chunk =
-		previous ? conf->prev_chunk_sectors : conf->chunk_sectors;
-	int dd_idx;
-	int chunk_offset = sector_div(stripe, sectors_per_chunk);
-	int disks = previous ? conf->previous_raid_disks : conf->raid_disks;
-
-	raid5_compute_sector(conf,
-			     stripe * (disks - conf->max_degraded)
-			     *sectors_per_chunk + chunk_offset,
-			     previous,
-			     &dd_idx, sh);
-}
-
-static void
-handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh,
-				struct stripe_head_state *s, int disks,
-				struct bio **return_bi)
-{
-	int i;
-	for (i = disks; i--; ) {
-		struct bio *bi;
-		int bitmap_end = 0;
-
-		if (test_bit(R5_ReadError, &sh->dev[i].flags)) {
-			struct md_rdev *rdev;
-			rcu_read_lock();
-			rdev = rcu_dereference(conf->disks[i].rdev);
-			if (rdev && test_bit(In_sync, &rdev->flags))
-				atomic_inc(&rdev->nr_pending);
-			else
-				rdev = NULL;
-			rcu_read_unlock();
-			if (rdev) {
-				if (!rdev_set_badblocks(
-					    rdev,
-					    sh->sector,
-					    STRIPE_SECTORS, 0))
-					md_error(conf->mddev, rdev);
-				rdev_dec_pending(rdev, conf->mddev);
-			}
-		}
-		spin_lock_irq(&conf->device_lock);
-		/* fail all writes first */
-		bi = sh->dev[i].towrite;
-		sh->dev[i].towrite = NULL;
-		if (bi) {
-			s->to_write--;
-			bitmap_end = 1;
-		}
-
-		if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags))
-			wake_up(&conf->wait_for_overlap);
-
-		while (bi && bi->bi_sector <
-			sh->dev[i].sector + STRIPE_SECTORS) {
-			struct bio *nextbi = r5_next_bio(bi, sh->dev[i].sector);
-			clear_bit(BIO_UPTODATE, &bi->bi_flags);
-			if (!raid5_dec_bi_phys_segments(bi)) {
-				md_write_end(conf->mddev);
-				bi->bi_next = *return_bi;
-				*return_bi = bi;
-			}
-			bi = nextbi;
-		}
-		/* and fail all 'written' */
-		bi = sh->dev[i].written;
-		sh->dev[i].written = NULL;
-		if (bi) bitmap_end = 1;
-		while (bi && bi->bi_sector <
-		       sh->dev[i].sector + STRIPE_SECTORS) {
-			struct bio *bi2 = r5_next_bio(bi, sh->dev[i].sector);
-			clear_bit(BIO_UPTODATE, &bi->bi_flags);
-			if (!raid5_dec_bi_phys_segments(bi)) {
-				md_write_end(conf->mddev);
-				bi->bi_next = *return_bi;
-				*return_bi = bi;
-			}
-			bi = bi2;
-		}
-
-		/* fail any reads if this device is non-operational and
-		 * the data has not reached the cache yet.
-		 */
-		if (!test_bit(R5_Wantfill, &sh->dev[i].flags) &&
-		    (!test_bit(R5_Insync, &sh->dev[i].flags) ||
-		      test_bit(R5_ReadError, &sh->dev[i].flags))) {
-			bi = sh->dev[i].toread;
-			sh->dev[i].toread = NULL;
-			if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags))
-				wake_up(&conf->wait_for_overlap);
-			if (bi) s->to_read--;
-			while (bi && bi->bi_sector <
-			       sh->dev[i].sector + STRIPE_SECTORS) {
-				struct bio *nextbi =
-					r5_next_bio(bi, sh->dev[i].sector);
-				clear_bit(BIO_UPTODATE, &bi->bi_flags);
-				if (!raid5_dec_bi_phys_segments(bi)) {
-					bi->bi_next = *return_bi;
-					*return_bi = bi;
-				}
-				bi = nextbi;
-			}
-		}
-		spin_unlock_irq(&conf->device_lock);
-		if (bitmap_end)
-			bitmap_endwrite(conf->mddev->bitmap, sh->sector,
-					STRIPE_SECTORS, 0, 0);
-		/* If we were in the middle of a write the parity block might
-		 * still be locked - so just clear all R5_LOCKED flags
-		 */
-		clear_bit(R5_LOCKED, &sh->dev[i].flags);
-	}
-
-	if (test_and_clear_bit(STRIPE_FULL_WRITE, &sh->state))
-		if (atomic_dec_and_test(&conf->pending_full_writes))
-			md_wakeup_thread(conf->mddev->thread);
-}
-
-static void
-handle_failed_sync(struct r5conf *conf, struct stripe_head *sh,
-		   struct stripe_head_state *s)
-{
-	int abort = 0;
-	int i;
-
-	clear_bit(STRIPE_SYNCING, &sh->state);
-	s->syncing = 0;
-	s->replacing = 0;
-	/* There is nothing more to do for sync/check/repair.
-	 * Don't even need to abort as that is handled elsewhere
-	 * if needed, and not always wanted e.g. if there is a known
-	 * bad block here.
-	 * For recover/replace we need to record a bad block on all
-	 * non-sync devices, or abort the recovery
-	 */
-	if (test_bit(MD_RECOVERY_RECOVER, &conf->mddev->recovery)) {
-		/* During recovery devices cannot be removed, so
-		 * locking and refcounting of rdevs is not needed
-		 */
-		for (i = 0; i < conf->raid_disks; i++) {
-			struct md_rdev *rdev = conf->disks[i].rdev;
-			if (rdev
-			    && !test_bit(Faulty, &rdev->flags)
-			    && !test_bit(In_sync, &rdev->flags)
-			    && !rdev_set_badblocks(rdev, sh->sector,
-						   STRIPE_SECTORS, 0))
-				abort = 1;
-			rdev = conf->disks[i].replacement;
-			if (rdev
-			    && !test_bit(Faulty, &rdev->flags)
-			    && !test_bit(In_sync, &rdev->flags)
-			    && !rdev_set_badblocks(rdev, sh->sector,
-						   STRIPE_SECTORS, 0))
-				abort = 1;
-		}
-		if (abort)
-			conf->recovery_disabled =
-				conf->mddev->recovery_disabled;
-	}
-	md_done_sync(conf->mddev, STRIPE_SECTORS, !abort);
-}
-
-static int want_replace(struct stripe_head *sh, int disk_idx)
-{
-	struct md_rdev *rdev;
-	int rv = 0;
-	/* Doing recovery so rcu locking not required */
-	rdev = sh->raid_conf->disks[disk_idx].replacement;
-	if (rdev
-	    && !test_bit(Faulty, &rdev->flags)
-	    && !test_bit(In_sync, &rdev->flags)
-	    && (rdev->recovery_offset <= sh->sector
-		|| rdev->mddev->recovery_cp <= sh->sector))
-		rv = 1;
-
-	return rv;
-}
-
-/* fetch_block - checks the given member device to see if its data needs
- * to be read or computed to satisfy a request.
- *
- * Returns 1 when no more member devices need to be checked, otherwise returns
- * 0 to tell the loop in handle_stripe_fill to continue
- */
-static int fetch_block(struct stripe_head *sh, struct stripe_head_state *s,
-		       int disk_idx, int disks)
-{
-	struct r5dev *dev = &sh->dev[disk_idx];
-	struct r5dev *fdev[2] = { &sh->dev[s->failed_num[0]],
-				  &sh->dev[s->failed_num[1]] };
-
-	/* is the data in this block needed, and can we get it? */
-	if (!test_bit(R5_LOCKED, &dev->flags) &&
-	    !test_bit(R5_UPTODATE, &dev->flags) &&
-	    (dev->toread ||
-	     (dev->towrite && !test_bit(R5_OVERWRITE, &dev->flags)) ||
-	     s->syncing || s->expanding ||
-	     (s->replacing && want_replace(sh, disk_idx)) ||
-	     (s->failed >= 1 && fdev[0]->toread) ||
-	     (s->failed >= 2 && fdev[1]->toread) ||
-	     (sh->raid_conf->level <= 5 && s->failed && fdev[0]->towrite &&
-	      !test_bit(R5_OVERWRITE, &fdev[0]->flags)) ||
-	     (sh->raid_conf->level == 6 && s->failed && s->to_write))) {
-		/* we would like to get this block, possibly by computing it,
-		 * otherwise read it if the backing disk is insync
-		 */
-		BUG_ON(test_bit(R5_Wantcompute, &dev->flags));
-		BUG_ON(test_bit(R5_Wantread, &dev->flags));
-		if ((s->uptodate == disks - 1) &&
-		    (s->failed && (disk_idx == s->failed_num[0] ||
-				   disk_idx == s->failed_num[1]))) {
-			/* have disk failed, and we're requested to fetch it;
-			 * do compute it
-			 */
-			pr_debug("Computing stripe %llu block %d\n",
-			       (unsigned long long)sh->sector, disk_idx);
-			set_bit(STRIPE_COMPUTE_RUN, &sh->state);
-			set_bit(STRIPE_OP_COMPUTE_BLK, &s->ops_request);
-			set_bit(R5_Wantcompute, &dev->flags);
-			sh->ops.target = disk_idx;
-			sh->ops.target2 = -1; /* no 2nd target */
-			s->req_compute = 1;
-			/* Careful: from this point on 'uptodate' is in the eye
-			 * of raid_run_ops which services 'compute' operations
-			 * before writes. R5_Wantcompute flags a block that will
-			 * be R5_UPTODATE by the time it is needed for a
-			 * subsequent operation.
-			 */
-			s->uptodate++;
-			return 1;
-		} else if (s->uptodate == disks-2 && s->failed >= 2) {
-			/* Computing 2-failure is *very* expensive; only
-			 * do it if failed >= 2
-			 */
-			int other;
-			for (other = disks; other--; ) {
-				if (other == disk_idx)
-					continue;
-				if (!test_bit(R5_UPTODATE,
-				      &sh->dev[other].flags))
-					break;
-			}
-			BUG_ON(other < 0);
-			pr_debug("Computing stripe %llu blocks %d,%d\n",
-			       (unsigned long long)sh->sector,
-			       disk_idx, other);
-			set_bit(STRIPE_COMPUTE_RUN, &sh->state);
-			set_bit(STRIPE_OP_COMPUTE_BLK, &s->ops_request);
-			set_bit(R5_Wantcompute, &sh->dev[disk_idx].flags);
-			set_bit(R5_Wantcompute, &sh->dev[other].flags);
-			sh->ops.target = disk_idx;
-			sh->ops.target2 = other;
-			s->uptodate += 2;
-			s->req_compute = 1;
-			return 1;
-		} else if (test_bit(R5_Insync, &dev->flags)) {
-			set_bit(R5_LOCKED, &dev->flags);
-			set_bit(R5_Wantread, &dev->flags);
-			s->locked++;
-			pr_debug("Reading block %d (sync=%d)\n",
-				disk_idx, s->syncing);
-		}
-	}
-
-	return 0;
-}
-
-/**
- * handle_stripe_fill - read or compute data to satisfy pending requests.
- */
-static void handle_stripe_fill(struct stripe_head *sh,
-			       struct stripe_head_state *s,
-			       int disks)
-{
-	int i;
-
-	/* look for blocks to read/compute, skip this if a compute
-	 * is already in flight, or if the stripe contents are in the
-	 * midst of changing due to a write
-	 */
-	if (!test_bit(STRIPE_COMPUTE_RUN, &sh->state) && !sh->check_state &&
-	    !sh->reconstruct_state)
-		for (i = disks; i--; )
-			if (fetch_block(sh, s, i, disks))
-				break;
-	set_bit(STRIPE_HANDLE, &sh->state);
-}
-
-
-/* handle_stripe_clean_event
- * any written block on an uptodate or failed drive can be returned.
- * Note that if we 'wrote' to a failed drive, it will be UPTODATE, but
- * never LOCKED, so we don't need to test 'failed' directly.
- */
-static void handle_stripe_clean_event(struct r5conf *conf,
-	struct stripe_head *sh, int disks, struct bio **return_bi)
-{
-	int i;
-	struct r5dev *dev;
-
-	for (i = disks; i--; )
-		if (sh->dev[i].written) {
-			dev = &sh->dev[i];
-			if (!test_bit(R5_LOCKED, &dev->flags) &&
-				test_bit(R5_UPTODATE, &dev->flags)) {
-				/* We can return any write requests */
-				struct bio *wbi, *wbi2;
-				int bitmap_end = 0;
-				pr_debug("Return write for disc %d\n", i);
-				spin_lock_irq(&conf->device_lock);
-				wbi = dev->written;
-				dev->written = NULL;
-				while (wbi && wbi->bi_sector <
-					dev->sector + STRIPE_SECTORS) {
-					wbi2 = r5_next_bio(wbi, dev->sector);
-					if (!raid5_dec_bi_phys_segments(wbi)) {
-						md_write_end(conf->mddev);
-						wbi->bi_next = *return_bi;
-						*return_bi = wbi;
-					}
-					wbi = wbi2;
-				}
-				if (dev->towrite == NULL)
-					bitmap_end = 1;
-				spin_unlock_irq(&conf->device_lock);
-				if (bitmap_end)
-					bitmap_endwrite(conf->mddev->bitmap,
-							sh->sector,
-							STRIPE_SECTORS,
-					 !test_bit(STRIPE_DEGRADED, &sh->state),
-							0);
-			}
-		}
-
-	if (test_and_clear_bit(STRIPE_FULL_WRITE, &sh->state))
-		if (atomic_dec_and_test(&conf->pending_full_writes))
-			md_wakeup_thread(conf->mddev->thread);
-}
-
-static void handle_stripe_dirtying(struct r5conf *conf,
-				   struct stripe_head *sh,
-				   struct stripe_head_state *s,
-				   int disks)
-{
-	int rmw = 0, rcw = 0, i;
-	if (conf->max_degraded == 2) {
-		/* RAID6 requires 'rcw' in current implementation
-		 * Calculate the real rcw later - for now fake it
-		 * look like rcw is cheaper
-		 */
-		rcw = 1; rmw = 2;
-	} else for (i = disks; i--; ) {
-		/* would I have to read this buffer for read_modify_write */
-		struct r5dev *dev = &sh->dev[i];
-		if ((dev->towrite || i == sh->pd_idx) &&
-		    !test_bit(R5_LOCKED, &dev->flags) &&
-		    !(test_bit(R5_UPTODATE, &dev->flags) ||
-		      test_bit(R5_Wantcompute, &dev->flags))) {
-			if (test_bit(R5_Insync, &dev->flags))
-				rmw++;
-			else
-				rmw += 2*disks;  /* cannot read it */
-		}
-		/* Would I have to read this buffer for reconstruct_write */
-		if (!test_bit(R5_OVERWRITE, &dev->flags) && i != sh->pd_idx &&
-		    !test_bit(R5_LOCKED, &dev->flags) &&
-		    !(test_bit(R5_UPTODATE, &dev->flags) ||
-		    test_bit(R5_Wantcompute, &dev->flags))) {
-			if (test_bit(R5_Insync, &dev->flags)) rcw++;
-			else
-				rcw += 2*disks;
-		}
-	}
-	pr_debug("for sector %llu, rmw=%d rcw=%d\n",
-		(unsigned long long)sh->sector, rmw, rcw);
-	set_bit(STRIPE_HANDLE, &sh->state);
-	if (rmw < rcw && rmw > 0)
-		/* prefer read-modify-write, but need to get some data */
-		for (i = disks; i--; ) {
-			struct r5dev *dev = &sh->dev[i];
-			if ((dev->towrite || i == sh->pd_idx) &&
-			    !test_bit(R5_LOCKED, &dev->flags) &&
-			    !(test_bit(R5_UPTODATE, &dev->flags) ||
-			    test_bit(R5_Wantcompute, &dev->flags)) &&
-			    test_bit(R5_Insync, &dev->flags)) {
-				if (
-				  test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
-					pr_debug("Read_old block "
-						"%d for r-m-w\n", i);
-					set_bit(R5_LOCKED, &dev->flags);
-					set_bit(R5_Wantread, &dev->flags);
-					s->locked++;
-				} else {
-					set_bit(STRIPE_DELAYED, &sh->state);
-					set_bit(STRIPE_HANDLE, &sh->state);
-				}
-			}
-		}
-	if (rcw <= rmw && rcw > 0) {
-		/* want reconstruct write, but need to get some data */
-		rcw = 0;
-		for (i = disks; i--; ) {
-			struct r5dev *dev = &sh->dev[i];
-			if (!test_bit(R5_OVERWRITE, &dev->flags) &&
-			    i != sh->pd_idx && i != sh->qd_idx &&
-			    !test_bit(R5_LOCKED, &dev->flags) &&
-			    !(test_bit(R5_UPTODATE, &dev->flags) ||
-			      test_bit(R5_Wantcompute, &dev->flags))) {
-				rcw++;
-				if (!test_bit(R5_Insync, &dev->flags))
-					continue; /* it's a failed drive */
-				if (
-				  test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
-					pr_debug("Read_old block "
-						"%d for Reconstruct\n", i);
-					set_bit(R5_LOCKED, &dev->flags);
-					set_bit(R5_Wantread, &dev->flags);
-					s->locked++;
-				} else {
-					set_bit(STRIPE_DELAYED, &sh->state);
-					set_bit(STRIPE_HANDLE, &sh->state);
-				}
-			}
-		}
-	}
-	/* now if nothing is locked, and if we have enough data,
-	 * we can start a write request
-	 */
-	/* since handle_stripe can be called at any time we need to handle the
-	 * case where a compute block operation has been submitted and then a
-	 * subsequent call wants to start a write request.  raid_run_ops only
-	 * handles the case where compute block and reconstruct are requested
-	 * simultaneously.  If this is not the case then new writes need to be
-	 * held off until the compute completes.
-	 */
-	if ((s->req_compute || !test_bit(STRIPE_COMPUTE_RUN, &sh->state)) &&
-	    (s->locked == 0 && (rcw == 0 || rmw == 0) &&
-	    !test_bit(STRIPE_BIT_DELAY, &sh->state)))
-		schedule_reconstruction(sh, s, rcw == 0, 0);
-}
-
-static void handle_parity_checks5(struct r5conf *conf, struct stripe_head *sh,
-				struct stripe_head_state *s, int disks)
-{
-	struct r5dev *dev = NULL;
-
-	set_bit(STRIPE_HANDLE, &sh->state);
-
-	switch (sh->check_state) {
-	case check_state_idle:
-		/* start a new check operation if there are no failures */
-		if (s->failed == 0) {
-			BUG_ON(s->uptodate != disks);
-			sh->check_state = check_state_run;
-			set_bit(STRIPE_OP_CHECK, &s->ops_request);
-			clear_bit(R5_UPTODATE, &sh->dev[sh->pd_idx].flags);
-			s->uptodate--;
-			break;
-		}
-		dev = &sh->dev[s->failed_num[0]];
-		/* fall through */
-	case check_state_compute_result:
-		sh->check_state = check_state_idle;
-		if (!dev)
-			dev = &sh->dev[sh->pd_idx];
-
-		/* check that a write has not made the stripe insync */
-		if (test_bit(STRIPE_INSYNC, &sh->state))
-			break;
-
-		/* either failed parity check, or recovery is happening */
-		BUG_ON(!test_bit(R5_UPTODATE, &dev->flags));
-		BUG_ON(s->uptodate != disks);
-
-		set_bit(R5_LOCKED, &dev->flags);
-		s->locked++;
-		set_bit(R5_Wantwrite, &dev->flags);
-
-		clear_bit(STRIPE_DEGRADED, &sh->state);
-		set_bit(STRIPE_INSYNC, &sh->state);
-		break;
-	case check_state_run:
-		break; /* we will be called again upon completion */
-	case check_state_check_result:
-		sh->check_state = check_state_idle;
-
-		/* if a failure occurred during the check operation, leave
-		 * STRIPE_INSYNC not set and let the stripe be handled again
-		 */
-		if (s->failed)
-			break;
-
-		/* handle a successful check operation, if parity is correct
-		 * we are done.  Otherwise update the mismatch count and repair
-		 * parity if !MD_RECOVERY_CHECK
-		 */
-		if ((sh->ops.zero_sum_result & SUM_CHECK_P_RESULT) == 0)
-			/* parity is correct (on disc,
-			 * not in buffer any more)
-			 */
-			set_bit(STRIPE_INSYNC, &sh->state);
-		else {
-			conf->mddev->resync_mismatches += STRIPE_SECTORS;
-			if (test_bit(MD_RECOVERY_CHECK, &conf->mddev->recovery))
-				/* don't try to repair!! */
-				set_bit(STRIPE_INSYNC, &sh->state);
-			else {
-				sh->check_state = check_state_compute_run;
-				set_bit(STRIPE_COMPUTE_RUN, &sh->state);
-				set_bit(STRIPE_OP_COMPUTE_BLK, &s->ops_request);
-				set_bit(R5_Wantcompute,
-					&sh->dev[sh->pd_idx].flags);
-				sh->ops.target = sh->pd_idx;
-				sh->ops.target2 = -1;
-				s->uptodate++;
-			}
-		}
-		break;
-	case check_state_compute_run:
-		break;
-	default:
-		printk(KERN_ERR "%s: unknown check_state: %d sector: %llu\n",
-		       __func__, sh->check_state,
-		       (unsigned long long) sh->sector);
-		BUG();
-	}
-}
-
-
-static void handle_parity_checks6(struct r5conf *conf, struct stripe_head *sh,
-				  struct stripe_head_state *s,
-				  int disks)
-{
-	int pd_idx = sh->pd_idx;
-	int qd_idx = sh->qd_idx;
-	struct r5dev *dev;
-
-	set_bit(STRIPE_HANDLE, &sh->state);
-
-	BUG_ON(s->failed > 2);
-
-	/* Want to check and possibly repair P and Q.
-	 * However there could be one 'failed' device, in which
-	 * case we can only check one of them, possibly using the
-	 * other to generate missing data
-	 */
-
-	switch (sh->check_state) {
-	case check_state_idle:
-		/* start a new check operation if there are < 2 failures */
-		if (s->failed == s->q_failed) {
-			/* The only possible failed device holds Q, so it
-			 * makes sense to check P (If anything else were failed,
-			 * we would have used P to recreate it).
-			 */
-			sh->check_state = check_state_run;
-		}
-		if (!s->q_failed && s->failed < 2) {
-			/* Q is not failed, and we didn't use it to generate
-			 * anything, so it makes sense to check it
-			 */
-			if (sh->check_state == check_state_run)
-				sh->check_state = check_state_run_pq;
-			else
-				sh->check_state = check_state_run_q;
-		}
-
-		/* discard potentially stale zero_sum_result */
-		sh->ops.zero_sum_result = 0;
-
-		if (sh->check_state == check_state_run) {
-			/* async_xor_zero_sum destroys the contents of P */
-			clear_bit(R5_UPTODATE, &sh->dev[pd_idx].flags);
-			s->uptodate--;
-		}
-		if (sh->check_state >= check_state_run &&
-		    sh->check_state <= check_state_run_pq) {
-			/* async_syndrome_zero_sum preserves P and Q, so
-			 * no need to mark them !uptodate here
-			 */
-			set_bit(STRIPE_OP_CHECK, &s->ops_request);
-			break;
-		}
-
-		/* we have 2-disk failure */
-		BUG_ON(s->failed != 2);
-		/* fall through */
-	case check_state_compute_result:
-		sh->check_state = check_state_idle;
-
-		/* check that a write has not made the stripe insync */
-		if (test_bit(STRIPE_INSYNC, &sh->state))
-			break;
-
-		/* now write out any block on a failed drive,
-		 * or P or Q if they were recomputed
-		 */
-		BUG_ON(s->uptodate < disks - 1); /* We don't need Q to recover */
-		if (s->failed == 2) {
-			dev = &sh->dev[s->failed_num[1]];
-			s->locked++;
-			set_bit(R5_LOCKED, &dev->flags);
-			set_bit(R5_Wantwrite, &dev->flags);
-		}
-		if (s->failed >= 1) {
-			dev = &sh->dev[s->failed_num[0]];
-			s->locked++;
-			set_bit(R5_LOCKED, &dev->flags);
-			set_bit(R5_Wantwrite, &dev->flags);
-		}
-		if (sh->ops.zero_sum_result & SUM_CHECK_P_RESULT) {
-			dev = &sh->dev[pd_idx];
-			s->locked++;
-			set_bit(R5_LOCKED, &dev->flags);
-			set_bit(R5_Wantwrite, &dev->flags);
-		}
-		if (sh->ops.zero_sum_result & SUM_CHECK_Q_RESULT) {
-			dev = &sh->dev[qd_idx];
-			s->locked++;
-			set_bit(R5_LOCKED, &dev->flags);
-			set_bit(R5_Wantwrite, &dev->flags);
-		}
-		clear_bit(STRIPE_DEGRADED, &sh->state);
-
-		set_bit(STRIPE_INSYNC, &sh->state);
-		break;
-	case check_state_run:
-	case check_state_run_q:
-	case check_state_run_pq:
-		break; /* we will be called again upon completion */
-	case check_state_check_result:
-		sh->check_state = check_state_idle;
-
-		/* handle a successful check operation, if parity is correct
-		 * we are done.  Otherwise update the mismatch count and repair
-		 * parity if !MD_RECOVERY_CHECK
-		 */
-		if (sh->ops.zero_sum_result == 0) {
-			/* both parities are correct */
-			if (!s->failed)
-				set_bit(STRIPE_INSYNC, &sh->state);
-			else {
-				/* in contrast to the raid5 case we can validate
-				 * parity, but still have a failure to write
-				 * back
-				 */
-				sh->check_state = check_state_compute_result;
-				/* Returning at this point means that we may go
-				 * off and bring p and/or q uptodate again so
-				 * we make sure to check zero_sum_result again
-				 * to verify if p or q need writeback
-				 */
-			}
-		} else {
-			conf->mddev->resync_mismatches += STRIPE_SECTORS;
-			if (test_bit(MD_RECOVERY_CHECK, &conf->mddev->recovery))
-				/* don't try to repair!! */
-				set_bit(STRIPE_INSYNC, &sh->state);
-			else {
-				int *target = &sh->ops.target;
-
-				sh->ops.target = -1;
-				sh->ops.target2 = -1;
-				sh->check_state = check_state_compute_run;
-				set_bit(STRIPE_COMPUTE_RUN, &sh->state);
-				set_bit(STRIPE_OP_COMPUTE_BLK, &s->ops_request);
-				if (sh->ops.zero_sum_result & SUM_CHECK_P_RESULT) {
-					set_bit(R5_Wantcompute,
-						&sh->dev[pd_idx].flags);
-					*target = pd_idx;
-					target = &sh->ops.target2;
-					s->uptodate++;
-				}
-				if (sh->ops.zero_sum_result & SUM_CHECK_Q_RESULT) {
-					set_bit(R5_Wantcompute,
-						&sh->dev[qd_idx].flags);
-					*target = qd_idx;
-					s->uptodate++;
-				}
-			}
-		}
-		break;
-	case check_state_compute_run:
-		break;
-	default:
-		printk(KERN_ERR "%s: unknown check_state: %d sector: %llu\n",
-		       __func__, sh->check_state,
-		       (unsigned long long) sh->sector);
-		BUG();
-	}
-}
-
-static void handle_stripe_expansion(struct r5conf *conf, struct stripe_head *sh)
-{
-	int i;
-
-	/* We have read all the blocks in this stripe and now we need to
-	 * copy some of them into a target stripe for expand.
-	 */
-	struct dma_async_tx_descriptor *tx = NULL;
-	clear_bit(STRIPE_EXPAND_SOURCE, &sh->state);
-	for (i = 0; i < sh->disks; i++)
-		if (i != sh->pd_idx && i != sh->qd_idx) {
-			int dd_idx, j;
-			struct stripe_head *sh2;
-			struct async_submit_ctl submit;
-
-			sector_t bn = compute_blocknr(sh, i, 1);
-			sector_t s = raid5_compute_sector(conf, bn, 0,
-							  &dd_idx, NULL);
-			sh2 = get_active_stripe(conf, s, 0, 1, 1);
-			if (sh2 == NULL)
-				/* so far only the early blocks of this stripe
-				 * have been requested.  When later blocks
-				 * get requested, we will try again
-				 */
-				continue;
-			if (!test_bit(STRIPE_EXPANDING, &sh2->state) ||
-			   test_bit(R5_Expanded, &sh2->dev[dd_idx].flags)) {
-				/* must have already done this block */
-				release_stripe(sh2);
-				continue;
-			}
-
-			/* place all the copies on one channel */
-			init_async_submit(&submit, 0, tx, NULL, NULL, NULL);
-			tx = async_memcpy(sh2->dev[dd_idx].page,
-					  sh->dev[i].page, 0, 0, STRIPE_SIZE,
-					  &submit);
-
-			set_bit(R5_Expanded, &sh2->dev[dd_idx].flags);
-			set_bit(R5_UPTODATE, &sh2->dev[dd_idx].flags);
-			for (j = 0; j < conf->raid_disks; j++)
-				if (j != sh2->pd_idx &&
-				    j != sh2->qd_idx &&
-				    !test_bit(R5_Expanded, &sh2->dev[j].flags))
-					break;
-			if (j == conf->raid_disks) {
-				set_bit(STRIPE_EXPAND_READY, &sh2->state);
-				set_bit(STRIPE_HANDLE, &sh2->state);
-			}
-			release_stripe(sh2);
-
-		}
-	/* done submitting copies, wait for them to complete */
-	if (tx) {
-		async_tx_ack(tx);
-		dma_wait_for_async_tx(tx);
-	}
-}
-
-/*
- * handle_stripe - do things to a stripe.
- *
- * We lock the stripe by setting STRIPE_ACTIVE and then examine the
- * state of various bits to see what needs to be done.
- * Possible results:
- *    return some read requests which now have data
- *    return some write requests which are safely on storage
- *    schedule a read on some buffers
- *    schedule a write of some buffers
- *    return confirmation of parity correctness
- *
- */
-
-static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s)
-{
-	struct r5conf *conf = sh->raid_conf;
-	int disks = sh->disks;
-	struct r5dev *dev;
-	int i;
-	int do_recovery = 0;
-
-	memset(s, 0, sizeof(*s));
-
-	s->expanding = test_bit(STRIPE_EXPAND_SOURCE, &sh->state);
-	s->expanded = test_bit(STRIPE_EXPAND_READY, &sh->state);
-	s->failed_num[0] = -1;
-	s->failed_num[1] = -1;
-
-	/* Now to look around and see what can be done */
-	rcu_read_lock();
-	spin_lock_irq(&conf->device_lock);
-	for (i=disks; i--; ) {
-		struct md_rdev *rdev;
-		sector_t first_bad;
-		int bad_sectors;
-		int is_bad = 0;
-
-		dev = &sh->dev[i];
-
-		pr_debug("check %d: state 0x%lx read %p write %p written %p\n",
-			 i, dev->flags,
-			 dev->toread, dev->towrite, dev->written);
-		/* maybe we can reply to a read
-		 *
-		 * new wantfill requests are only permitted while
-		 * ops_complete_biofill is guaranteed to be inactive
-		 */
-		if (test_bit(R5_UPTODATE, &dev->flags) && dev->toread &&
-		    !test_bit(STRIPE_BIOFILL_RUN, &sh->state))
-			set_bit(R5_Wantfill, &dev->flags);
-
-		/* now count some things */
-		if (test_bit(R5_LOCKED, &dev->flags))
-			s->locked++;
-		if (test_bit(R5_UPTODATE, &dev->flags))
-			s->uptodate++;
-		if (test_bit(R5_Wantcompute, &dev->flags)) {
-			s->compute++;
-			BUG_ON(s->compute > 2);
-		}
-
-		if (test_bit(R5_Wantfill, &dev->flags))
-			s->to_fill++;
-		else if (dev->toread)
-			s->to_read++;
-		if (dev->towrite) {
-			s->to_write++;
-			if (!test_bit(R5_OVERWRITE, &dev->flags))
-				s->non_overwrite++;
-		}
-		if (dev->written)
-			s->written++;
-		/* Prefer to use the replacement for reads, but only
-		 * if it is recovered enough and has no bad blocks.
-		 */
-		rdev = rcu_dereference(conf->disks[i].replacement);
-		if (rdev && !test_bit(Faulty, &rdev->flags) &&
-		    rdev->recovery_offset >= sh->sector + STRIPE_SECTORS &&
-		    !is_badblock(rdev, sh->sector, STRIPE_SECTORS,
-				 &first_bad, &bad_sectors))
-			set_bit(R5_ReadRepl, &dev->flags);
-		else {
-			if (rdev)
-				set_bit(R5_NeedReplace, &dev->flags);
-			rdev = rcu_dereference(conf->disks[i].rdev);
-			clear_bit(R5_ReadRepl, &dev->flags);
-		}
-		if (rdev && test_bit(Faulty, &rdev->flags))
-			rdev = NULL;
-		if (rdev) {
-			is_bad = is_badblock(rdev, sh->sector, STRIPE_SECTORS,
-					     &first_bad, &bad_sectors);
-			if (s->blocked_rdev == NULL
-			    && (test_bit(Blocked, &rdev->flags)
-				|| is_bad < 0)) {
-				if (is_bad < 0)
-					set_bit(BlockedBadBlocks,
-						&rdev->flags);
-				s->blocked_rdev = rdev;
-				atomic_inc(&rdev->nr_pending);
-			}
-		}
-		clear_bit(R5_Insync, &dev->flags);
-		if (!rdev)
-			/* Not in-sync */;
-		else if (is_bad) {
-			/* also not in-sync */
-			if (!test_bit(WriteErrorSeen, &rdev->flags) &&
-			    test_bit(R5_UPTODATE, &dev->flags)) {
-				/* treat as in-sync, but with a read error
-				 * which we can now try to correct
-				 */
-				set_bit(R5_Insync, &dev->flags);
-				set_bit(R5_ReadError, &dev->flags);
-			}
-		} else if (test_bit(In_sync, &rdev->flags))
-			set_bit(R5_Insync, &dev->flags);
-		else if (sh->sector + STRIPE_SECTORS <= rdev->recovery_offset)
-			/* in sync if before recovery_offset */
-			set_bit(R5_Insync, &dev->flags);
-		else if (test_bit(R5_UPTODATE, &dev->flags) &&
-			 test_bit(R5_Expanded, &dev->flags))
-			/* If we've reshaped into here, we assume it is Insync.
-			 * We will shortly update recovery_offset to make
-			 * it official.
-			 */
-			set_bit(R5_Insync, &dev->flags);
-
-		if (rdev && test_bit(R5_WriteError, &dev->flags)) {
-			/* This flag does not apply to '.replacement'
-			 * only to .rdev, so make sure to check that*/
-			struct md_rdev *rdev2 = rcu_dereference(
-				conf->disks[i].rdev);
-			if (rdev2 == rdev)
-				clear_bit(R5_Insync, &dev->flags);
-			if (rdev2 && !test_bit(Faulty, &rdev2->flags)) {
-				s->handle_bad_blocks = 1;
-				atomic_inc(&rdev2->nr_pending);
-			} else
-				clear_bit(R5_WriteError, &dev->flags);
-		}
-		if (rdev && test_bit(R5_MadeGood, &dev->flags)) {
-			/* This flag does not apply to '.replacement'
-			 * only to .rdev, so make sure to check that*/
-			struct md_rdev *rdev2 = rcu_dereference(
-				conf->disks[i].rdev);
-			if (rdev2 && !test_bit(Faulty, &rdev2->flags)) {
-				s->handle_bad_blocks = 1;
-				atomic_inc(&rdev2->nr_pending);
-			} else
-				clear_bit(R5_MadeGood, &dev->flags);
-		}
-		if (test_bit(R5_MadeGoodRepl, &dev->flags)) {
-			struct md_rdev *rdev2 = rcu_dereference(
-				conf->disks[i].replacement);
-			if (rdev2 && !test_bit(Faulty, &rdev2->flags)) {
-				s->handle_bad_blocks = 1;
-				atomic_inc(&rdev2->nr_pending);
-			} else
-				clear_bit(R5_MadeGoodRepl, &dev->flags);
-		}
-		if (!test_bit(R5_Insync, &dev->flags)) {
-			/* The ReadError flag will just be confusing now */
-			clear_bit(R5_ReadError, &dev->flags);
-			clear_bit(R5_ReWrite, &dev->flags);
-		}
-		if (test_bit(R5_ReadError, &dev->flags))
-			clear_bit(R5_Insync, &dev->flags);
-		if (!test_bit(R5_Insync, &dev->flags)) {
-			if (s->failed < 2)
-				s->failed_num[s->failed] = i;
-			s->failed++;
-			if (rdev && !test_bit(Faulty, &rdev->flags))
-				do_recovery = 1;
-		}
-	}
-	spin_unlock_irq(&conf->device_lock);
-	if (test_bit(STRIPE_SYNCING, &sh->state)) {
-		/* If there is a failed device being replaced,
-		 *     we must be recovering.
-		 * else if we are after recovery_cp, we must be syncing
-		 * else if MD_RECOVERY_REQUESTED is set, we also are syncing.
-		 * else we can only be replacing
-		 * sync and recovery both need to read all devices, and so
-		 * use the same flag.
-		 */
-		if (do_recovery ||
-		    sh->sector >= conf->mddev->recovery_cp ||
-		    test_bit(MD_RECOVERY_REQUESTED, &(conf->mddev->recovery)))
-			s->syncing = 1;
-		else
-			s->replacing = 1;
-	}
-	rcu_read_unlock();
-}
-
-static void handle_stripe(struct stripe_head *sh)
-{
-	struct stripe_head_state s;
-	struct r5conf *conf = sh->raid_conf;
-	int i;
-	int prexor;
-	int disks = sh->disks;
-	struct r5dev *pdev, *qdev;
-
-	clear_bit(STRIPE_HANDLE, &sh->state);
-	if (test_and_set_bit_lock(STRIPE_ACTIVE, &sh->state)) {
-		/* already being handled, ensure it gets handled
-		 * again when current action finishes */
-		set_bit(STRIPE_HANDLE, &sh->state);
-		return;
-	}
-
-	if (test_and_clear_bit(STRIPE_SYNC_REQUESTED, &sh->state)) {
-		set_bit(STRIPE_SYNCING, &sh->state);
-		clear_bit(STRIPE_INSYNC, &sh->state);
-	}
-	clear_bit(STRIPE_DELAYED, &sh->state);
-
-	pr_debug("handling stripe %llu, state=%#lx cnt=%d, "
-		"pd_idx=%d, qd_idx=%d\n, check:%d, reconstruct:%d\n",
-	       (unsigned long long)sh->sector, sh->state,
-	       atomic_read(&sh->count), sh->pd_idx, sh->qd_idx,
-	       sh->check_state, sh->reconstruct_state);
-
-	analyse_stripe(sh, &s);
-
-	if (s.handle_bad_blocks) {
-		set_bit(STRIPE_HANDLE, &sh->state);
-		goto finish;
-	}
-
-	if (unlikely(s.blocked_rdev)) {
-		if (s.syncing || s.expanding || s.expanded ||
-		    s.replacing || s.to_write || s.written) {
-			set_bit(STRIPE_HANDLE, &sh->state);
-			goto finish;
-		}
-		/* There is nothing for the blocked_rdev to block */
-		rdev_dec_pending(s.blocked_rdev, conf->mddev);
-		s.blocked_rdev = NULL;
-	}
-
-	if (s.to_fill && !test_bit(STRIPE_BIOFILL_RUN, &sh->state)) {
-		set_bit(STRIPE_OP_BIOFILL, &s.ops_request);
-		set_bit(STRIPE_BIOFILL_RUN, &sh->state);
-	}
-
-	pr_debug("locked=%d uptodate=%d to_read=%d"
-	       " to_write=%d failed=%d failed_num=%d,%d\n",
-	       s.locked, s.uptodate, s.to_read, s.to_write, s.failed,
-	       s.failed_num[0], s.failed_num[1]);
-	/* check if the array has lost more than max_degraded devices and,
-	 * if so, some requests might need to be failed.
-	 */
-	if (s.failed > conf->max_degraded) {
-		sh->check_state = 0;
-		sh->reconstruct_state = 0;
-		if (s.to_read+s.to_write+s.written)
-			handle_failed_stripe(conf, sh, &s, disks, &s.return_bi);
-		if (s.syncing + s.replacing)
-			handle_failed_sync(conf, sh, &s);
-	}
-
-	/*
-	 * might be able to return some write requests if the parity blocks
-	 * are safe, or on a failed drive
-	 */
-	pdev = &sh->dev[sh->pd_idx];
-	s.p_failed = (s.failed >= 1 && s.failed_num[0] == sh->pd_idx)
-		|| (s.failed >= 2 && s.failed_num[1] == sh->pd_idx);
-	qdev = &sh->dev[sh->qd_idx];
-	s.q_failed = (s.failed >= 1 && s.failed_num[0] == sh->qd_idx)
-		|| (s.failed >= 2 && s.failed_num[1] == sh->qd_idx)
-		|| conf->level < 6;
-
-	if (s.written &&
-	    (s.p_failed || ((test_bit(R5_Insync, &pdev->flags)
-			     && !test_bit(R5_LOCKED, &pdev->flags)
-			     && test_bit(R5_UPTODATE, &pdev->flags)))) &&
-	    (s.q_failed || ((test_bit(R5_Insync, &qdev->flags)
-			     && !test_bit(R5_LOCKED, &qdev->flags)
-			     && test_bit(R5_UPTODATE, &qdev->flags)))))
-		handle_stripe_clean_event(conf, sh, disks, &s.return_bi);
-
-	/* Now we might consider reading some blocks, either to check/generate
-	 * parity, or to satisfy requests
-	 * or to load a block that is being partially written.
-	 */
-	if (s.to_read || s.non_overwrite
-	    || (conf->level == 6 && s.to_write && s.failed)
-	    || (s.syncing && (s.uptodate + s.compute < disks))
-	    || s.replacing
-	    || s.expanding)
-		handle_stripe_fill(sh, &s, disks);
-
-	/* Now we check to see if any write operations have recently
-	 * completed
-	 */
-	prexor = 0;
-	if (sh->reconstruct_state == reconstruct_state_prexor_drain_result)
-		prexor = 1;
-	if (sh->reconstruct_state == reconstruct_state_drain_result ||
-	    sh->reconstruct_state == reconstruct_state_prexor_drain_result) {
-		sh->reconstruct_state = reconstruct_state_idle;
-
-		/* All the 'written' buffers and the parity block are ready to
-		 * be written back to disk
-		 */
-		BUG_ON(!test_bit(R5_UPTODATE, &sh->dev[sh->pd_idx].flags));
-		BUG_ON(sh->qd_idx >= 0 &&
-		       !test_bit(R5_UPTODATE, &sh->dev[sh->qd_idx].flags));
-		for (i = disks; i--; ) {
-			struct r5dev *dev = &sh->dev[i];
-			if (test_bit(R5_LOCKED, &dev->flags) &&
-				(i == sh->pd_idx || i == sh->qd_idx ||
-				 dev->written)) {
-				pr_debug("Writing block %d\n", i);
-				set_bit(R5_Wantwrite, &dev->flags);
-				if (prexor)
-					continue;
-				if (!test_bit(R5_Insync, &dev->flags) ||
-				    ((i == sh->pd_idx || i == sh->qd_idx)  &&
-				     s.failed == 0))
-					set_bit(STRIPE_INSYNC, &sh->state);
-			}
-		}
-		if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state))
-			s.dec_preread_active = 1;
-	}
-
-	/* Now to consider new write requests and what else, if anything
-	 * should be read.  We do not handle new writes when:
-	 * 1/ A 'write' operation (copy+xor) is already in flight.
-	 * 2/ A 'check' operation is in flight, as it may clobber the parity
-	 *    block.
-	 */
-	if (s.to_write && !sh->reconstruct_state && !sh->check_state)
-		handle_stripe_dirtying(conf, sh, &s, disks);
-
-	/* maybe we need to check and possibly fix the parity for this stripe
-	 * Any reads will already have been scheduled, so we just see if enough
-	 * data is available.  The parity check is held off while parity
-	 * dependent operations are in flight.
-	 */
-	if (sh->check_state ||
-	    (s.syncing && s.locked == 0 &&
-	     !test_bit(STRIPE_COMPUTE_RUN, &sh->state) &&
-	     !test_bit(STRIPE_INSYNC, &sh->state))) {
-		if (conf->level == 6)
-			handle_parity_checks6(conf, sh, &s, disks);
-		else
-			handle_parity_checks5(conf, sh, &s, disks);
-	}
-
-	if (s.replacing && s.locked == 0
-	    && !test_bit(STRIPE_INSYNC, &sh->state)) {
-		/* Write out to replacement devices where possible */
-		for (i = 0; i < conf->raid_disks; i++)
-			if (test_bit(R5_UPTODATE, &sh->dev[i].flags) &&
-			    test_bit(R5_NeedReplace, &sh->dev[i].flags)) {
-				set_bit(R5_WantReplace, &sh->dev[i].flags);
-				set_bit(R5_LOCKED, &sh->dev[i].flags);
-				s.locked++;
-			}
-		set_bit(STRIPE_INSYNC, &sh->state);
-	}
-	if ((s.syncing || s.replacing) && s.locked == 0 &&
-	    test_bit(STRIPE_INSYNC, &sh->state)) {
-		md_done_sync(conf->mddev, STRIPE_SECTORS, 1);
-		clear_bit(STRIPE_SYNCING, &sh->state);
-	}
-
-	/* If the failed drives are just a ReadError, then we might need
-	 * to progress the repair/check process
-	 */
-	if (s.failed <= conf->max_degraded && !conf->mddev->ro)
-		for (i = 0; i < s.failed; i++) {
-			struct r5dev *dev = &sh->dev[s.failed_num[i]];
-			if (test_bit(R5_ReadError, &dev->flags)
-			    && !test_bit(R5_LOCKED, &dev->flags)
-			    && test_bit(R5_UPTODATE, &dev->flags)
-				) {
-				if (!test_bit(R5_ReWrite, &dev->flags)) {
-					set_bit(R5_Wantwrite, &dev->flags);
-					set_bit(R5_ReWrite, &dev->flags);
-					set_bit(R5_LOCKED, &dev->flags);
-					s.locked++;
-				} else {
-					/* let's read it back */
-					set_bit(R5_Wantread, &dev->flags);
-					set_bit(R5_LOCKED, &dev->flags);
-					s.locked++;
-				}
-			}
-		}
-
-
-	/* Finish reconstruct operations initiated by the expansion process */
-	if (sh->reconstruct_state == reconstruct_state_result) {
-		struct stripe_head *sh_src
-			= get_active_stripe(conf, sh->sector, 1, 1, 1);
-		if (sh_src && test_bit(STRIPE_EXPAND_SOURCE, &sh_src->state)) {
-			/* sh cannot be written until sh_src has been read.
-			 * so arrange for sh to be delayed a little
-			 */
-			set_bit(STRIPE_DELAYED, &sh->state);
-			set_bit(STRIPE_HANDLE, &sh->state);
-			if (!test_and_set_bit(STRIPE_PREREAD_ACTIVE,
-					      &sh_src->state))
-				atomic_inc(&conf->preread_active_stripes);
-			release_stripe(sh_src);
-			goto finish;
-		}
-		if (sh_src)
-			release_stripe(sh_src);
-
-		sh->reconstruct_state = reconstruct_state_idle;
-		clear_bit(STRIPE_EXPANDING, &sh->state);
-		for (i = conf->raid_disks; i--; ) {
-			set_bit(R5_Wantwrite, &sh->dev[i].flags);
-			set_bit(R5_LOCKED, &sh->dev[i].flags);
-			s.locked++;
-		}
-	}
-
-	if (s.expanded && test_bit(STRIPE_EXPANDING, &sh->state) &&
-	    !sh->reconstruct_state) {
-		/* Need to write out all blocks after computing parity */
-		sh->disks = conf->raid_disks;
-		stripe_set_idx(sh->sector, conf, 0, sh);
-		schedule_reconstruction(sh, &s, 1, 1);
-	} else if (s.expanded && !sh->reconstruct_state && s.locked == 0) {
-		clear_bit(STRIPE_EXPAND_READY, &sh->state);
-		atomic_dec(&conf->reshape_stripes);
-		wake_up(&conf->wait_for_overlap);
-		md_done_sync(conf->mddev, STRIPE_SECTORS, 1);
-	}
-
-	if (s.expanding && s.locked == 0 &&
-	    !test_bit(STRIPE_COMPUTE_RUN, &sh->state))
-		handle_stripe_expansion(conf, sh);
-
-finish:
-	/* wait for this device to become unblocked */
-	if (conf->mddev->external && unlikely(s.blocked_rdev))
-		md_wait_for_blocked_rdev(s.blocked_rdev, conf->mddev);
-
-	if (s.handle_bad_blocks)
-		for (i = disks; i--; ) {
-			struct md_rdev *rdev;
-			struct r5dev *dev = &sh->dev[i];
-			if (test_and_clear_bit(R5_WriteError, &dev->flags)) {
-				/* We own a safe reference to the rdev */
-				rdev = conf->disks[i].rdev;
-				if (!rdev_set_badblocks(rdev, sh->sector,
-							STRIPE_SECTORS, 0))
-					md_error(conf->mddev, rdev);
-				rdev_dec_pending(rdev, conf->mddev);
-			}
-			if (test_and_clear_bit(R5_MadeGood, &dev->flags)) {
-				rdev = conf->disks[i].rdev;
-				rdev_clear_badblocks(rdev, sh->sector,
-						     STRIPE_SECTORS);
-				rdev_dec_pending(rdev, conf->mddev);
-			}
-			if (test_and_clear_bit(R5_MadeGoodRepl, &dev->flags)) {
-				rdev = conf->disks[i].replacement;
-				if (!rdev)
-					/* rdev have been moved down */
-					rdev = conf->disks[i].rdev;
-				rdev_clear_badblocks(rdev, sh->sector,
-						     STRIPE_SECTORS);
-				rdev_dec_pending(rdev, conf->mddev);
-			}
-		}
-
-	if (s.ops_request)
-		raid_run_ops(sh, s.ops_request);
-
-	ops_run_io(sh, &s);
-
-	if (s.dec_preread_active) {
-		/* We delay this until after ops_run_io so that if make_request
-		 * is waiting on a flush, it won't continue until the writes
-		 * have actually been submitted.
-		 */
-		atomic_dec(&conf->preread_active_stripes);
-		if (atomic_read(&conf->preread_active_stripes) <
-		    IO_THRESHOLD)
-			md_wakeup_thread(conf->mddev->thread);
-	}
-
-	return_io(s.return_bi);
-
-	clear_bit_unlock(STRIPE_ACTIVE, &sh->state);
-}
-
-static void raid5_activate_delayed(struct r5conf *conf)
-{
-	if (atomic_read(&conf->preread_active_stripes) < IO_THRESHOLD) {
-		while (!list_empty(&conf->delayed_list)) {
-			struct list_head *l = conf->delayed_list.next;
-			struct stripe_head *sh;
-			sh = list_entry(l, struct stripe_head, lru);
-			list_del_init(l);
-			clear_bit(STRIPE_DELAYED, &sh->state);
-			if (!test_and_set_bit(STRIPE_PREREAD_ACTIVE, &sh->state))
-				atomic_inc(&conf->preread_active_stripes);
-			list_add_tail(&sh->lru, &conf->hold_list);
-		}
-	}
-}
-
-static void activate_bit_delay(struct r5conf *conf)
-{
-	/* device_lock is held */
-	struct list_head head;
-	list_add(&head, &conf->bitmap_list);
-	list_del_init(&conf->bitmap_list);
-	while (!list_empty(&head)) {
-		struct stripe_head *sh = list_entry(head.next, struct stripe_head, lru);
-		list_del_init(&sh->lru);
-		atomic_inc(&sh->count);
-		__release_stripe(conf, sh);
-	}
-}
-
-int md_raid5_congested(struct mddev *mddev, int bits)
-{
-	struct r5conf *conf = mddev->private;
-
-	/* No difference between reads and writes.  Just check
-	 * how busy the stripe_cache is
-	 */
-
-	if (conf->inactive_blocked)
-		return 1;
-	if (conf->quiesce)
-		return 1;
-	if (list_empty_careful(&conf->inactive_list))
-		return 1;
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(md_raid5_congested);
-
-static int raid5_congested(void *data, int bits)
-{
-	struct mddev *mddev = data;
-
-	return mddev_congested(mddev, bits) ||
-		md_raid5_congested(mddev, bits);
-}
-
-/* We want read requests to align with chunks where possible,
- * but write requests don't need to.
- */
-static int raid5_mergeable_bvec(struct request_queue *q,
-				struct bvec_merge_data *bvm,
-				struct bio_vec *biovec)
-{
-	struct mddev *mddev = q->queuedata;
-	sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev);
-	int max;
-	unsigned int chunk_sectors = mddev->chunk_sectors;
-	unsigned int bio_sectors = bvm->bi_size >> 9;
-
-	if ((bvm->bi_rw & 1) == WRITE)
-		return biovec->bv_len; /* always allow writes to be mergeable */
-
-	if (mddev->new_chunk_sectors < mddev->chunk_sectors)
-		chunk_sectors = mddev->new_chunk_sectors;
-	max =  (chunk_sectors - ((sector & (chunk_sectors - 1)) + bio_sectors)) << 9;
-	if (max < 0) max = 0;
-	if (max <= biovec->bv_len && bio_sectors == 0)
-		return biovec->bv_len;
-	else
-		return max;
-}
-
-
-static int in_chunk_boundary(struct mddev *mddev, struct bio *bio)
-{
-	sector_t sector = bio->bi_sector + get_start_sect(bio->bi_bdev);
-	unsigned int chunk_sectors = mddev->chunk_sectors;
-	unsigned int bio_sectors = bio->bi_size >> 9;
-
-	if (mddev->new_chunk_sectors < mddev->chunk_sectors)
-		chunk_sectors = mddev->new_chunk_sectors;
-	return  chunk_sectors >=
-		((sector & (chunk_sectors - 1)) + bio_sectors);
-}
-
-/*
- *  add bio to the retry LIFO  ( in O(1) ... we are in interrupt )
- *  later sampled by raid5d.
- */
-static void add_bio_to_retry(struct bio *bi,struct r5conf *conf)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&conf->device_lock, flags);
-
-	bi->bi_next = conf->retry_read_aligned_list;
-	conf->retry_read_aligned_list = bi;
-
-	spin_unlock_irqrestore(&conf->device_lock, flags);
-	md_wakeup_thread(conf->mddev->thread);
-}
-
-
-static struct bio *remove_bio_from_retry(struct r5conf *conf)
-{
-	struct bio *bi;
-
-	bi = conf->retry_read_aligned;
-	if (bi) {
-		conf->retry_read_aligned = NULL;
-		return bi;
-	}
-	bi = conf->retry_read_aligned_list;
-	if(bi) {
-		conf->retry_read_aligned_list = bi->bi_next;
-		bi->bi_next = NULL;
-		/*
-		 * this sets the active strip count to 1 and the processed
-		 * strip count to zero (upper 8 bits)
-		 */
-		bi->bi_phys_segments = 1; /* biased count of active stripes */
-	}
-
-	return bi;
-}
-
-
-/*
- *  The "raid5_align_endio" should check if the read succeeded and if it
- *  did, call bio_endio on the original bio (having bio_put the new bio
- *  first).
- *  If the read failed..
- */
-static void raid5_align_endio(struct bio *bi, int error)
-{
-	struct bio* raid_bi  = bi->bi_private;
-	struct mddev *mddev;
-	struct r5conf *conf;
-	int uptodate = test_bit(BIO_UPTODATE, &bi->bi_flags);
-	struct md_rdev *rdev;
-
-	bio_put(bi);
-
-	rdev = (void*)raid_bi->bi_next;
-	raid_bi->bi_next = NULL;
-	mddev = rdev->mddev;
-	conf = mddev->private;
-
-	rdev_dec_pending(rdev, conf->mddev);
-
-	if (!error && uptodate) {
-		bio_endio(raid_bi, 0);
-		if (atomic_dec_and_test(&conf->active_aligned_reads))
-			wake_up(&conf->wait_for_stripe);
-		return;
-	}
-
-
-	pr_debug("raid5_align_endio : io error...handing IO for a retry\n");
-
-	add_bio_to_retry(raid_bi, conf);
-}
-
-static int bio_fits_rdev(struct bio *bi)
-{
-	struct request_queue *q = bdev_get_queue(bi->bi_bdev);
-
-	if ((bi->bi_size>>9) > queue_max_sectors(q))
-		return 0;
-	blk_recount_segments(q, bi);
-	if (bi->bi_phys_segments > queue_max_segments(q))
-		return 0;
-
-	if (q->merge_bvec_fn)
-		/* it's too hard to apply the merge_bvec_fn at this stage,
-		 * just just give up
-		 */
-		return 0;
-
-	return 1;
-}
-
-
-static int chunk_aligned_read(struct mddev *mddev, struct bio * raid_bio)
-{
-	struct r5conf *conf = mddev->private;
-	int dd_idx;
-	struct bio* align_bi;
-	struct md_rdev *rdev;
-	sector_t end_sector;
-
-	if (!in_chunk_boundary(mddev, raid_bio)) {
-		pr_debug("chunk_aligned_read : non aligned\n");
-		return 0;
-	}
-	/*
-	 * use bio_clone_mddev to make a copy of the bio
-	 */
-	align_bi = bio_clone_mddev(raid_bio, GFP_NOIO, mddev);
-	if (!align_bi)
-		return 0;
-	/*
-	 *   set bi_end_io to a new function, and set bi_private to the
-	 *     original bio.
-	 */
-	align_bi->bi_end_io  = raid5_align_endio;
-	align_bi->bi_private = raid_bio;
-	/*
-	 *	compute position
-	 */
-	align_bi->bi_sector =  raid5_compute_sector(conf, raid_bio->bi_sector,
-						    0,
-						    &dd_idx, NULL);
-
-	end_sector = align_bi->bi_sector + (align_bi->bi_size >> 9);
-	rcu_read_lock();
-	rdev = rcu_dereference(conf->disks[dd_idx].replacement);
-	if (!rdev || test_bit(Faulty, &rdev->flags) ||
-	    rdev->recovery_offset < end_sector) {
-		rdev = rcu_dereference(conf->disks[dd_idx].rdev);
-		if (rdev &&
-		    (test_bit(Faulty, &rdev->flags) ||
-		    !(test_bit(In_sync, &rdev->flags) ||
-		      rdev->recovery_offset >= end_sector)))
-			rdev = NULL;
-	}
-	if (rdev) {
-		sector_t first_bad;
-		int bad_sectors;
-
-		atomic_inc(&rdev->nr_pending);
-		rcu_read_unlock();
-		raid_bio->bi_next = (void*)rdev;
-		align_bi->bi_bdev =  rdev->bdev;
-		align_bi->bi_flags &= ~(1 << BIO_SEG_VALID);
-
-		if (!bio_fits_rdev(align_bi) ||
-		    is_badblock(rdev, align_bi->bi_sector, align_bi->bi_size>>9,
-				&first_bad, &bad_sectors)) {
-			/* too big in some way, or has a known bad block */
-			bio_put(align_bi);
-			rdev_dec_pending(rdev, mddev);
-			return 0;
-		}
-
-		/* No reshape active, so we can trust rdev->data_offset */
-		align_bi->bi_sector += rdev->data_offset;
-
-		spin_lock_irq(&conf->device_lock);
-		wait_event_lock_irq(conf->wait_for_stripe,
-				    conf->quiesce == 0,
-				    conf->device_lock, /* nothing */);
-		atomic_inc(&conf->active_aligned_reads);
-		spin_unlock_irq(&conf->device_lock);
-
-		generic_make_request(align_bi);
-		return 1;
-	} else {
-		rcu_read_unlock();
-		bio_put(align_bi);
-		return 0;
-	}
-}
-
-/* __get_priority_stripe - get the next stripe to process
- *
- * Full stripe writes are allowed to pass preread active stripes up until
- * the bypass_threshold is exceeded.  In general the bypass_count
- * increments when the handle_list is handled before the hold_list; however, it
- * will not be incremented when STRIPE_IO_STARTED is sampled set signifying a
- * stripe with in flight i/o.  The bypass_count will be reset when the
- * head of the hold_list has changed, i.e. the head was promoted to the
- * handle_list.
- */
-static struct stripe_head *__get_priority_stripe(struct r5conf *conf)
-{
-	struct stripe_head *sh;
-
-	pr_debug("%s: handle: %s hold: %s full_writes: %d bypass_count: %d\n",
-		  __func__,
-		  list_empty(&conf->handle_list) ? "empty" : "busy",
-		  list_empty(&conf->hold_list) ? "empty" : "busy",
-		  atomic_read(&conf->pending_full_writes), conf->bypass_count);
-
-	if (!list_empty(&conf->handle_list)) {
-		sh = list_entry(conf->handle_list.next, typeof(*sh), lru);
-
-		if (list_empty(&conf->hold_list))
-			conf->bypass_count = 0;
-		else if (!test_bit(STRIPE_IO_STARTED, &sh->state)) {
-			if (conf->hold_list.next == conf->last_hold)
-				conf->bypass_count++;
-			else {
-				conf->last_hold = conf->hold_list.next;
-				conf->bypass_count -= conf->bypass_threshold;
-				if (conf->bypass_count < 0)
-					conf->bypass_count = 0;
-			}
-		}
-	} else if (!list_empty(&conf->hold_list) &&
-		   ((conf->bypass_threshold &&
-		     conf->bypass_count > conf->bypass_threshold) ||
-		    atomic_read(&conf->pending_full_writes) == 0)) {
-		sh = list_entry(conf->hold_list.next,
-				typeof(*sh), lru);
-		conf->bypass_count -= conf->bypass_threshold;
-		if (conf->bypass_count < 0)
-			conf->bypass_count = 0;
-	} else
-		return NULL;
-
-	list_del_init(&sh->lru);
-	atomic_inc(&sh->count);
-	BUG_ON(atomic_read(&sh->count) != 1);
-	return sh;
-}
-
-static void make_request(struct mddev *mddev, struct bio * bi)
-{
-	struct r5conf *conf = mddev->private;
-	int dd_idx;
-	sector_t new_sector;
-	sector_t logical_sector, last_sector;
-	struct stripe_head *sh;
-	const int rw = bio_data_dir(bi);
-	int remaining;
-	int plugged;
-
-	if (unlikely(bi->bi_rw & REQ_FLUSH)) {
-		md_flush_request(mddev, bi);
-		return;
-	}
-
-	md_write_start(mddev, bi);
-
-	if (rw == READ &&
-	     mddev->reshape_position == MaxSector &&
-	     chunk_aligned_read(mddev,bi))
-		return;
-
-	logical_sector = bi->bi_sector & ~((sector_t)STRIPE_SECTORS-1);
-	last_sector = bi->bi_sector + (bi->bi_size>>9);
-	bi->bi_next = NULL;
-	bi->bi_phys_segments = 1;	/* over-loaded to count active stripes */
-
-	plugged = mddev_check_plugged(mddev);
-	for (;logical_sector < last_sector; logical_sector += STRIPE_SECTORS) {
-		DEFINE_WAIT(w);
-		int disks, data_disks;
-		int previous;
-
-	retry:
-		previous = 0;
-		disks = conf->raid_disks;
-		prepare_to_wait(&conf->wait_for_overlap, &w, TASK_UNINTERRUPTIBLE);
-		if (unlikely(conf->reshape_progress != MaxSector)) {
-			/* spinlock is needed as reshape_progress may be
-			 * 64bit on a 32bit platform, and so it might be
-			 * possible to see a half-updated value
-			 * Of course reshape_progress could change after
-			 * the lock is dropped, so once we get a reference
-			 * to the stripe that we think it is, we will have
-			 * to check again.
-			 */
-			spin_lock_irq(&conf->device_lock);
-			if (mddev->delta_disks < 0
-			    ? logical_sector < conf->reshape_progress
-			    : logical_sector >= conf->reshape_progress) {
-				disks = conf->previous_raid_disks;
-				previous = 1;
-			} else {
-				if (mddev->delta_disks < 0
-				    ? logical_sector < conf->reshape_safe
-				    : logical_sector >= conf->reshape_safe) {
-					spin_unlock_irq(&conf->device_lock);
-					schedule();
-					goto retry;
-				}
-			}
-			spin_unlock_irq(&conf->device_lock);
-		}
-		data_disks = disks - conf->max_degraded;
-
-		new_sector = raid5_compute_sector(conf, logical_sector,
-						  previous,
-						  &dd_idx, NULL);
-		pr_debug("raid456: make_request, sector %llu logical %llu\n",
-			(unsigned long long)new_sector, 
-			(unsigned long long)logical_sector);
-
-		sh = get_active_stripe(conf, new_sector, previous,
-				       (bi->bi_rw&RWA_MASK), 0);
-		if (sh) {
-			if (unlikely(previous)) {
-				/* expansion might have moved on while waiting for a
-				 * stripe, so we must do the range check again.
-				 * Expansion could still move past after this
-				 * test, but as we are holding a reference to
-				 * 'sh', we know that if that happens,
-				 *  STRIPE_EXPANDING will get set and the expansion
-				 * won't proceed until we finish with the stripe.
-				 */
-				int must_retry = 0;
-				spin_lock_irq(&conf->device_lock);
-				if (mddev->delta_disks < 0
-				    ? logical_sector >= conf->reshape_progress
-				    : logical_sector < conf->reshape_progress)
-					/* mismatch, need to try again */
-					must_retry = 1;
-				spin_unlock_irq(&conf->device_lock);
-				if (must_retry) {
-					release_stripe(sh);
-					schedule();
-					goto retry;
-				}
-			}
-
-			if (rw == WRITE &&
-			    logical_sector >= mddev->suspend_lo &&
-			    logical_sector < mddev->suspend_hi) {
-				release_stripe(sh);
-				/* As the suspend_* range is controlled by
-				 * userspace, we want an interruptible
-				 * wait.
-				 */
-				flush_signals(current);
-				prepare_to_wait(&conf->wait_for_overlap,
-						&w, TASK_INTERRUPTIBLE);
-				if (logical_sector >= mddev->suspend_lo &&
-				    logical_sector < mddev->suspend_hi)
-					schedule();
-				goto retry;
-			}
-
-			if (test_bit(STRIPE_EXPANDING, &sh->state) ||
-			    !add_stripe_bio(sh, bi, dd_idx, rw)) {
-				/* Stripe is busy expanding or
-				 * add failed due to overlap.  Flush everything
-				 * and wait a while
-				 */
-				md_wakeup_thread(mddev->thread);
-				release_stripe(sh);
-				schedule();
-				goto retry;
-			}
-			finish_wait(&conf->wait_for_overlap, &w);
-			set_bit(STRIPE_HANDLE, &sh->state);
-			clear_bit(STRIPE_DELAYED, &sh->state);
-			if ((bi->bi_rw & REQ_SYNC) &&
-			    !test_and_set_bit(STRIPE_PREREAD_ACTIVE, &sh->state))
-				atomic_inc(&conf->preread_active_stripes);
-			release_stripe(sh);
-		} else {
-			/* cannot get stripe for read-ahead, just give-up */
-			clear_bit(BIO_UPTODATE, &bi->bi_flags);
-			finish_wait(&conf->wait_for_overlap, &w);
-			break;
-		}
-			
-	}
-	if (!plugged)
-		md_wakeup_thread(mddev->thread);
-
-	spin_lock_irq(&conf->device_lock);
-	remaining = raid5_dec_bi_phys_segments(bi);
-	spin_unlock_irq(&conf->device_lock);
-	if (remaining == 0) {
-
-		if ( rw == WRITE )
-			md_write_end(mddev);
-
-		bio_endio(bi, 0);
-	}
-}
-
-static sector_t raid5_size(struct mddev *mddev, sector_t sectors, int raid_disks);
-
-static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, int *skipped)
-{
-	/* reshaping is quite different to recovery/resync so it is
-	 * handled quite separately ... here.
-	 *
-	 * On each call to sync_request, we gather one chunk worth of
-	 * destination stripes and flag them as expanding.
-	 * Then we find all the source stripes and request reads.
-	 * As the reads complete, handle_stripe will copy the data
-	 * into the destination stripe and release that stripe.
-	 */
-	struct r5conf *conf = mddev->private;
-	struct stripe_head *sh;
-	sector_t first_sector, last_sector;
-	int raid_disks = conf->previous_raid_disks;
-	int data_disks = raid_disks - conf->max_degraded;
-	int new_data_disks = conf->raid_disks - conf->max_degraded;
-	int i;
-	int dd_idx;
-	sector_t writepos, readpos, safepos;
-	sector_t stripe_addr;
-	int reshape_sectors;
-	struct list_head stripes;
-
-	if (sector_nr == 0) {
-		/* If restarting in the middle, skip the initial sectors */
-		if (mddev->delta_disks < 0 &&
-		    conf->reshape_progress < raid5_size(mddev, 0, 0)) {
-			sector_nr = raid5_size(mddev, 0, 0)
-				- conf->reshape_progress;
-		} else if (mddev->delta_disks >= 0 &&
-			   conf->reshape_progress > 0)
-			sector_nr = conf->reshape_progress;
-		sector_div(sector_nr, new_data_disks);
-		if (sector_nr) {
-			mddev->curr_resync_completed = sector_nr;
-			sysfs_notify(&mddev->kobj, NULL, "sync_completed");
-			*skipped = 1;
-			return sector_nr;
-		}
-	}
-
-	/* We need to process a full chunk at a time.
-	 * If old and new chunk sizes differ, we need to process the
-	 * largest of these
-	 */
-	if (mddev->new_chunk_sectors > mddev->chunk_sectors)
-		reshape_sectors = mddev->new_chunk_sectors;
-	else
-		reshape_sectors = mddev->chunk_sectors;
-
-	/* we update the metadata when there is more than 3Meg
-	 * in the block range (that is rather arbitrary, should
-	 * probably be time based) or when the data about to be
-	 * copied would over-write the source of the data at
-	 * the front of the range.
-	 * i.e. one new_stripe along from reshape_progress new_maps
-	 * to after where reshape_safe old_maps to
-	 */
-	writepos = conf->reshape_progress;
-	sector_div(writepos, new_data_disks);
-	readpos = conf->reshape_progress;
-	sector_div(readpos, data_disks);
-	safepos = conf->reshape_safe;
-	sector_div(safepos, data_disks);
-	if (mddev->delta_disks < 0) {
-		writepos -= min_t(sector_t, reshape_sectors, writepos);
-		readpos += reshape_sectors;
-		safepos += reshape_sectors;
-	} else {
-		writepos += reshape_sectors;
-		readpos -= min_t(sector_t, reshape_sectors, readpos);
-		safepos -= min_t(sector_t, reshape_sectors, safepos);
-	}
-
-	/* 'writepos' is the most advanced device address we might write.
-	 * 'readpos' is the least advanced device address we might read.
-	 * 'safepos' is the least address recorded in the metadata as having
-	 *     been reshaped.
-	 * If 'readpos' is behind 'writepos', then there is no way that we can
-	 * ensure safety in the face of a crash - that must be done by userspace
-	 * making a backup of the data.  So in that case there is no particular
-	 * rush to update metadata.
-	 * Otherwise if 'safepos' is behind 'writepos', then we really need to
-	 * update the metadata to advance 'safepos' to match 'readpos' so that
-	 * we can be safe in the event of a crash.
-	 * So we insist on updating metadata if safepos is behind writepos and
-	 * readpos is beyond writepos.
-	 * In any case, update the metadata every 10 seconds.
-	 * Maybe that number should be configurable, but I'm not sure it is
-	 * worth it.... maybe it could be a multiple of safemode_delay???
-	 */
-	if ((mddev->delta_disks < 0
-	     ? (safepos > writepos && readpos < writepos)
-	     : (safepos < writepos && readpos > writepos)) ||
-	    time_after(jiffies, conf->reshape_checkpoint + 10*HZ)) {
-		/* Cannot proceed until we've updated the superblock... */
-		wait_event(conf->wait_for_overlap,
-			   atomic_read(&conf->reshape_stripes)==0);
-		mddev->reshape_position = conf->reshape_progress;
-		mddev->curr_resync_completed = sector_nr;
-		conf->reshape_checkpoint = jiffies;
-		set_bit(MD_CHANGE_DEVS, &mddev->flags);
-		md_wakeup_thread(mddev->thread);
-		wait_event(mddev->sb_wait, mddev->flags == 0 ||
-			   kthread_should_stop());
-		spin_lock_irq(&conf->device_lock);
-		conf->reshape_safe = mddev->reshape_position;
-		spin_unlock_irq(&conf->device_lock);
-		wake_up(&conf->wait_for_overlap);
-		sysfs_notify(&mddev->kobj, NULL, "sync_completed");
-	}
-
-	if (mddev->delta_disks < 0) {
-		BUG_ON(conf->reshape_progress == 0);
-		stripe_addr = writepos;
-		BUG_ON((mddev->dev_sectors &
-			~((sector_t)reshape_sectors - 1))
-		       - reshape_sectors - stripe_addr
-		       != sector_nr);
-	} else {
-		BUG_ON(writepos != sector_nr + reshape_sectors);
-		stripe_addr = sector_nr;
-	}
-	INIT_LIST_HEAD(&stripes);
-	for (i = 0; i < reshape_sectors; i += STRIPE_SECTORS) {
-		int j;
-		int skipped_disk = 0;
-		sh = get_active_stripe(conf, stripe_addr+i, 0, 0, 1);
-		set_bit(STRIPE_EXPANDING, &sh->state);
-		atomic_inc(&conf->reshape_stripes);
-		/* If any of this stripe is beyond the end of the old
-		 * array, then we need to zero those blocks
-		 */
-		for (j=sh->disks; j--;) {
-			sector_t s;
-			if (j == sh->pd_idx)
-				continue;
-			if (conf->level == 6 &&
-			    j == sh->qd_idx)
-				continue;
-			s = compute_blocknr(sh, j, 0);
-			if (s < raid5_size(mddev, 0, 0)) {
-				skipped_disk = 1;
-				continue;
-			}
-			memset(page_address(sh->dev[j].page), 0, STRIPE_SIZE);
-			set_bit(R5_Expanded, &sh->dev[j].flags);
-			set_bit(R5_UPTODATE, &sh->dev[j].flags);
-		}
-		if (!skipped_disk) {
-			set_bit(STRIPE_EXPAND_READY, &sh->state);
-			set_bit(STRIPE_HANDLE, &sh->state);
-		}
-		list_add(&sh->lru, &stripes);
-	}
-	spin_lock_irq(&conf->device_lock);
-	if (mddev->delta_disks < 0)
-		conf->reshape_progress -= reshape_sectors * new_data_disks;
-	else
-		conf->reshape_progress += reshape_sectors * new_data_disks;
-	spin_unlock_irq(&conf->device_lock);
-	/* Ok, those stripe are ready. We can start scheduling
-	 * reads on the source stripes.
-	 * The source stripes are determined by mapping the first and last
-	 * block on the destination stripes.
-	 */
-	first_sector =
-		raid5_compute_sector(conf, stripe_addr*(new_data_disks),
-				     1, &dd_idx, NULL);
-	last_sector =
-		raid5_compute_sector(conf, ((stripe_addr+reshape_sectors)
-					    * new_data_disks - 1),
-				     1, &dd_idx, NULL);
-	if (last_sector >= mddev->dev_sectors)
-		last_sector = mddev->dev_sectors - 1;
-	while (first_sector <= last_sector) {
-		sh = get_active_stripe(conf, first_sector, 1, 0, 1);
-		set_bit(STRIPE_EXPAND_SOURCE, &sh->state);
-		set_bit(STRIPE_HANDLE, &sh->state);
-		release_stripe(sh);
-		first_sector += STRIPE_SECTORS;
-	}
-	/* Now that the sources are clearly marked, we can release
-	 * the destination stripes
-	 */
-	while (!list_empty(&stripes)) {
-		sh = list_entry(stripes.next, struct stripe_head, lru);
-		list_del_init(&sh->lru);
-		release_stripe(sh);
-	}
-	/* If this takes us to the resync_max point where we have to pause,
-	 * then we need to write out the superblock.
-	 */
-	sector_nr += reshape_sectors;
-	if ((sector_nr - mddev->curr_resync_completed) * 2
-	    >= mddev->resync_max - mddev->curr_resync_completed) {
-		/* Cannot proceed until we've updated the superblock... */
-		wait_event(conf->wait_for_overlap,
-			   atomic_read(&conf->reshape_stripes) == 0);
-		mddev->reshape_position = conf->reshape_progress;
-		mddev->curr_resync_completed = sector_nr;
-		conf->reshape_checkpoint = jiffies;
-		set_bit(MD_CHANGE_DEVS, &mddev->flags);
-		md_wakeup_thread(mddev->thread);
-		wait_event(mddev->sb_wait,
-			   !test_bit(MD_CHANGE_DEVS, &mddev->flags)
-			   || kthread_should_stop());
-		spin_lock_irq(&conf->device_lock);
-		conf->reshape_safe = mddev->reshape_position;
-		spin_unlock_irq(&conf->device_lock);
-		wake_up(&conf->wait_for_overlap);
-		sysfs_notify(&mddev->kobj, NULL, "sync_completed");
-	}
-	return reshape_sectors;
-}
-
-/* FIXME go_faster isn't used */
-static inline sector_t sync_request(struct mddev *mddev, sector_t sector_nr, int *skipped, int go_faster)
-{
-	struct r5conf *conf = mddev->private;
-	struct stripe_head *sh;
-	sector_t max_sector = mddev->dev_sectors;
-	sector_t sync_blocks;
-	int still_degraded = 0;
-	int i;
-
-	if (sector_nr >= max_sector) {
-		/* just being told to finish up .. nothing much to do */
-
-		if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)) {
-			end_reshape(conf);
-			return 0;
-		}
-
-		if (mddev->curr_resync < max_sector) /* aborted */
-			bitmap_end_sync(mddev->bitmap, mddev->curr_resync,
-					&sync_blocks, 1);
-		else /* completed sync */
-			conf->fullsync = 0;
-		bitmap_close_sync(mddev->bitmap);
-
-		return 0;
-	}
-
-	/* Allow raid5_quiesce to complete */
-	wait_event(conf->wait_for_overlap, conf->quiesce != 2);
-
-	if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
-		return reshape_request(mddev, sector_nr, skipped);
-
-	/* No need to check resync_max as we never do more than one
-	 * stripe, and as resync_max will always be on a chunk boundary,
-	 * if the check in md_do_sync didn't fire, there is no chance
-	 * of overstepping resync_max here
-	 */
-
-	/* if there is too many failed drives and we are trying
-	 * to resync, then assert that we are finished, because there is
-	 * nothing we can do.
-	 */
-	if (mddev->degraded >= conf->max_degraded &&
-	    test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
-		sector_t rv = mddev->dev_sectors - sector_nr;
-		*skipped = 1;
-		return rv;
-	}
-	if (!bitmap_start_sync(mddev->bitmap, sector_nr, &sync_blocks, 1) &&
-	    !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery) &&
-	    !conf->fullsync && sync_blocks >= STRIPE_SECTORS) {
-		/* we can skip this block, and probably more */
-		sync_blocks /= STRIPE_SECTORS;
-		*skipped = 1;
-		return sync_blocks * STRIPE_SECTORS; /* keep things rounded to whole stripes */
-	}
-
-	bitmap_cond_end_sync(mddev->bitmap, sector_nr);
-
-	sh = get_active_stripe(conf, sector_nr, 0, 1, 0);
-	if (sh == NULL) {
-		sh = get_active_stripe(conf, sector_nr, 0, 0, 0);
-		/* make sure we don't swamp the stripe cache if someone else
-		 * is trying to get access
-		 */
-		schedule_timeout_uninterruptible(1);
-	}
-	/* Need to check if array will still be degraded after recovery/resync
-	 * We don't need to check the 'failed' flag as when that gets set,
-	 * recovery aborts.
-	 */
-	for (i = 0; i < conf->raid_disks; i++)
-		if (conf->disks[i].rdev == NULL)
-			still_degraded = 1;
-
-	bitmap_start_sync(mddev->bitmap, sector_nr, &sync_blocks, still_degraded);
-
-	set_bit(STRIPE_SYNC_REQUESTED, &sh->state);
-
-	handle_stripe(sh);
-	release_stripe(sh);
-
-	return STRIPE_SECTORS;
-}
-
-static int  retry_aligned_read(struct r5conf *conf, struct bio *raid_bio)
-{
-	/* We may not be able to submit a whole bio at once as there
-	 * may not be enough stripe_heads available.
-	 * We cannot pre-allocate enough stripe_heads as we may need
-	 * more than exist in the cache (if we allow ever large chunks).
-	 * So we do one stripe head at a time and record in
-	 * ->bi_hw_segments how many have been done.
-	 *
-	 * We *know* that this entire raid_bio is in one chunk, so
-	 * it will be only one 'dd_idx' and only need one call to raid5_compute_sector.
-	 */
-	struct stripe_head *sh;
-	int dd_idx;
-	sector_t sector, logical_sector, last_sector;
-	int scnt = 0;
-	int remaining;
-	int handled = 0;
-
-	logical_sector = raid_bio->bi_sector & ~((sector_t)STRIPE_SECTORS-1);
-	sector = raid5_compute_sector(conf, logical_sector,
-				      0, &dd_idx, NULL);
-	last_sector = raid_bio->bi_sector + (raid_bio->bi_size>>9);
-
-	for (; logical_sector < last_sector;
-	     logical_sector += STRIPE_SECTORS,
-		     sector += STRIPE_SECTORS,
-		     scnt++) {
-
-		if (scnt < raid5_bi_hw_segments(raid_bio))
-			/* already done this stripe */
-			continue;
-
-		sh = get_active_stripe(conf, sector, 0, 1, 0);
-
-		if (!sh) {
-			/* failed to get a stripe - must wait */
-			raid5_set_bi_hw_segments(raid_bio, scnt);
-			conf->retry_read_aligned = raid_bio;
-			return handled;
-		}
-
-		if (!add_stripe_bio(sh, raid_bio, dd_idx, 0)) {
-			release_stripe(sh);
-			raid5_set_bi_hw_segments(raid_bio, scnt);
-			conf->retry_read_aligned = raid_bio;
-			return handled;
-		}
-
-		handle_stripe(sh);
-		release_stripe(sh);
-		handled++;
-	}
-	spin_lock_irq(&conf->device_lock);
-	remaining = raid5_dec_bi_phys_segments(raid_bio);
-	spin_unlock_irq(&conf->device_lock);
-	if (remaining == 0)
-		bio_endio(raid_bio, 0);
-	if (atomic_dec_and_test(&conf->active_aligned_reads))
-		wake_up(&conf->wait_for_stripe);
-	return handled;
-}
-
-
-/*
- * This is our raid5 kernel thread.
- *
- * We scan the hash table for stripes which can be handled now.
- * During the scan, completed stripes are saved for us by the interrupt
- * handler, so that they will not have to wait for our next wakeup.
- */
-static void raid5d(struct mddev *mddev)
-{
-	struct stripe_head *sh;
-	struct r5conf *conf = mddev->private;
-	int handled;
-	struct blk_plug plug;
-
-	pr_debug("+++ raid5d active\n");
-
-	md_check_recovery(mddev);
-
-	blk_start_plug(&plug);
-	handled = 0;
-	spin_lock_irq(&conf->device_lock);
-	while (1) {
-		struct bio *bio;
-
-		if (atomic_read(&mddev->plug_cnt) == 0 &&
-		    !list_empty(&conf->bitmap_list)) {
-			/* Now is a good time to flush some bitmap updates */
-			conf->seq_flush++;
-			spin_unlock_irq(&conf->device_lock);
-			bitmap_unplug(mddev->bitmap);
-			spin_lock_irq(&conf->device_lock);
-			conf->seq_write = conf->seq_flush;
-			activate_bit_delay(conf);
-		}
-		if (atomic_read(&mddev->plug_cnt) == 0)
-			raid5_activate_delayed(conf);
-
-		while ((bio = remove_bio_from_retry(conf))) {
-			int ok;
-			spin_unlock_irq(&conf->device_lock);
-			ok = retry_aligned_read(conf, bio);
-			spin_lock_irq(&conf->device_lock);
-			if (!ok)
-				break;
-			handled++;
-		}
-
-		sh = __get_priority_stripe(conf);
-
-		if (!sh)
-			break;
-		spin_unlock_irq(&conf->device_lock);
-		
-		handled++;
-		handle_stripe(sh);
-		release_stripe(sh);
-		cond_resched();
-
-		if (mddev->flags & ~(1<<MD_CHANGE_PENDING))
-			md_check_recovery(mddev);
-
-		spin_lock_irq(&conf->device_lock);
-	}
-	pr_debug("%d stripes handled\n", handled);
-
-	spin_unlock_irq(&conf->device_lock);
-
-	async_tx_issue_pending_all();
-	blk_finish_plug(&plug);
-
-	pr_debug("--- raid5d inactive\n");
-}
-
-static ssize_t
-raid5_show_stripe_cache_size(struct mddev *mddev, char *page)
-{
-	struct r5conf *conf = mddev->private;
-	if (conf)
-		return sprintf(page, "%d\n", conf->max_nr_stripes);
-	else
-		return 0;
-}
-
-int
-raid5_set_cache_size(struct mddev *mddev, int size)
-{
-	struct r5conf *conf = mddev->private;
-	int err;
-
-	if (size <= 16 || size > 32768)
-		return -EINVAL;
-	while (size < conf->max_nr_stripes) {
-		if (drop_one_stripe(conf))
-			conf->max_nr_stripes--;
-		else
-			break;
-	}
-	err = md_allow_write(mddev);
-	if (err)
-		return err;
-	while (size > conf->max_nr_stripes) {
-		if (grow_one_stripe(conf))
-			conf->max_nr_stripes++;
-		else break;
-	}
-	return 0;
-}
-EXPORT_SYMBOL(raid5_set_cache_size);
-
-static ssize_t
-raid5_store_stripe_cache_size(struct mddev *mddev, const char *page, size_t len)
-{
-	struct r5conf *conf = mddev->private;
-	unsigned long new;
-	int err;
-
-	if (len >= PAGE_SIZE)
-		return -EINVAL;
-	if (!conf)
-		return -ENODEV;
-
-	if (strict_strtoul(page, 10, &new))
-		return -EINVAL;
-	err = raid5_set_cache_size(mddev, new);
-	if (err)
-		return err;
-	return len;
-}
-
-static struct md_sysfs_entry
-raid5_stripecache_size = __ATTR(stripe_cache_size, S_IRUGO | S_IWUSR,
-				raid5_show_stripe_cache_size,
-				raid5_store_stripe_cache_size);
-
-static ssize_t
-raid5_show_preread_threshold(struct mddev *mddev, char *page)
-{
-	struct r5conf *conf = mddev->private;
-	if (conf)
-		return sprintf(page, "%d\n", conf->bypass_threshold);
-	else
-		return 0;
-}
-
-static ssize_t
-raid5_store_preread_threshold(struct mddev *mddev, const char *page, size_t len)
-{
-	struct r5conf *conf = mddev->private;
-	unsigned long new;
-	if (len >= PAGE_SIZE)
-		return -EINVAL;
-	if (!conf)
-		return -ENODEV;
-
-	if (strict_strtoul(page, 10, &new))
-		return -EINVAL;
-	if (new > conf->max_nr_stripes)
-		return -EINVAL;
-	conf->bypass_threshold = new;
-	return len;
-}
-
-static struct md_sysfs_entry
-raid5_preread_bypass_threshold = __ATTR(preread_bypass_threshold,
-					S_IRUGO | S_IWUSR,
-					raid5_show_preread_threshold,
-					raid5_store_preread_threshold);
-
-static ssize_t
-stripe_cache_active_show(struct mddev *mddev, char *page)
-{
-	struct r5conf *conf = mddev->private;
-	if (conf)
-		return sprintf(page, "%d\n", atomic_read(&conf->active_stripes));
-	else
-		return 0;
-}
-
-static struct md_sysfs_entry
-raid5_stripecache_active = __ATTR_RO(stripe_cache_active);
-
-static struct attribute *raid5_attrs[] =  {
-	&raid5_stripecache_size.attr,
-	&raid5_stripecache_active.attr,
-	&raid5_preread_bypass_threshold.attr,
-	NULL,
-};
-static struct attribute_group raid5_attrs_group = {
-	.name = NULL,
-	.attrs = raid5_attrs,
-};
-
-static sector_t
-raid5_size(struct mddev *mddev, sector_t sectors, int raid_disks)
-{
-	struct r5conf *conf = mddev->private;
-
-	if (!sectors)
-		sectors = mddev->dev_sectors;
-	if (!raid_disks)
-		/* size is defined by the smallest of previous and new size */
-		raid_disks = min(conf->raid_disks, conf->previous_raid_disks);
-
-	sectors &= ~((sector_t)mddev->chunk_sectors - 1);
-	sectors &= ~((sector_t)mddev->new_chunk_sectors - 1);
-	return sectors * (raid_disks - conf->max_degraded);
-}
-
-static void raid5_free_percpu(struct r5conf *conf)
-{
-	struct raid5_percpu *percpu;
-	unsigned long cpu;
-
-	if (!conf->percpu)
-		return;
-
-	get_online_cpus();
-	for_each_possible_cpu(cpu) {
-		percpu = per_cpu_ptr(conf->percpu, cpu);
-		safe_put_page(percpu->spare_page);
-		kfree(percpu->scribble);
-	}
-#ifdef CONFIG_HOTPLUG_CPU
-	unregister_cpu_notifier(&conf->cpu_notify);
-#endif
-	put_online_cpus();
-
-	free_percpu(conf->percpu);
-}
-
-static void free_conf(struct r5conf *conf)
-{
-	shrink_stripes(conf);
-	raid5_free_percpu(conf);
-	kfree(conf->disks);
-	kfree(conf->stripe_hashtbl);
-	kfree(conf);
-}
-
-#ifdef CONFIG_HOTPLUG_CPU
-static int raid456_cpu_notify(struct notifier_block *nfb, unsigned long action,
-			      void *hcpu)
-{
-	struct r5conf *conf = container_of(nfb, struct r5conf, cpu_notify);
-	long cpu = (long)hcpu;
-	struct raid5_percpu *percpu = per_cpu_ptr(conf->percpu, cpu);
-
-	switch (action) {
-	case CPU_UP_PREPARE:
-	case CPU_UP_PREPARE_FROZEN:
-		if (conf->level == 6 && !percpu->spare_page)
-			percpu->spare_page = alloc_page(GFP_KERNEL);
-		if (!percpu->scribble)
-			percpu->scribble = kmalloc(conf->scribble_len, GFP_KERNEL);
-
-		if (!percpu->scribble ||
-		    (conf->level == 6 && !percpu->spare_page)) {
-			safe_put_page(percpu->spare_page);
-			kfree(percpu->scribble);
-			pr_err("%s: failed memory allocation for cpu%ld\n",
-			       __func__, cpu);
-			return notifier_from_errno(-ENOMEM);
-		}
-		break;
-	case CPU_DEAD:
-	case CPU_DEAD_FROZEN:
-		safe_put_page(percpu->spare_page);
-		kfree(percpu->scribble);
-		percpu->spare_page = NULL;
-		percpu->scribble = NULL;
-		break;
-	default:
-		break;
-	}
-	return NOTIFY_OK;
-}
-#endif
-
-static int raid5_alloc_percpu(struct r5conf *conf)
-{
-	unsigned long cpu;
-	struct page *spare_page;
-	struct raid5_percpu __percpu *allcpus;
-	void *scribble;
-	int err;
-
-	allcpus = alloc_percpu(struct raid5_percpu);
-	if (!allcpus)
-		return -ENOMEM;
-	conf->percpu = allcpus;
-
-	get_online_cpus();
-	err = 0;
-	for_each_present_cpu(cpu) {
-		if (conf->level == 6) {
-			spare_page = alloc_page(GFP_KERNEL);
-			if (!spare_page) {
-				err = -ENOMEM;
-				break;
-			}
-			per_cpu_ptr(conf->percpu, cpu)->spare_page = spare_page;
-		}
-		scribble = kmalloc(conf->scribble_len, GFP_KERNEL);
-		if (!scribble) {
-			err = -ENOMEM;
-			break;
-		}
-		per_cpu_ptr(conf->percpu, cpu)->scribble = scribble;
-	}
-#ifdef CONFIG_HOTPLUG_CPU
-	conf->cpu_notify.notifier_call = raid456_cpu_notify;
-	conf->cpu_notify.priority = 0;
-	if (err == 0)
-		err = register_cpu_notifier(&conf->cpu_notify);
-#endif
-	put_online_cpus();
-
-	return err;
-}
-
-static struct r5conf *setup_conf(struct mddev *mddev)
-{
-	struct r5conf *conf;
-	int raid_disk, memory, max_disks;
-	struct md_rdev *rdev;
-	struct disk_info *disk;
-
-	if (mddev->new_level != 5
-	    && mddev->new_level != 4
-	    && mddev->new_level != 6) {
-		printk(KERN_ERR "md/raid:%s: raid level not set to 4/5/6 (%d)\n",
-		       mdname(mddev), mddev->new_level);
-		return ERR_PTR(-EIO);
-	}
-	if ((mddev->new_level == 5
-	     && !algorithm_valid_raid5(mddev->new_layout)) ||
-	    (mddev->new_level == 6
-	     && !algorithm_valid_raid6(mddev->new_layout))) {
-		printk(KERN_ERR "md/raid:%s: layout %d not supported\n",
-		       mdname(mddev), mddev->new_layout);
-		return ERR_PTR(-EIO);
-	}
-	if (mddev->new_level == 6 && mddev->raid_disks < 4) {
-		printk(KERN_ERR "md/raid:%s: not enough configured devices (%d, minimum 4)\n",
-		       mdname(mddev), mddev->raid_disks);
-		return ERR_PTR(-EINVAL);
-	}
-
-	if (!mddev->new_chunk_sectors ||
-	    (mddev->new_chunk_sectors << 9) % PAGE_SIZE ||
-	    !is_power_of_2(mddev->new_chunk_sectors)) {
-		printk(KERN_ERR "md/raid:%s: invalid chunk size %d\n",
-		       mdname(mddev), mddev->new_chunk_sectors << 9);
-		return ERR_PTR(-EINVAL);
-	}
-
-	conf = kzalloc(sizeof(struct r5conf), GFP_KERNEL);
-	if (conf == NULL)
-		goto abort;
-	spin_lock_init(&conf->device_lock);
-	init_waitqueue_head(&conf->wait_for_stripe);
-	init_waitqueue_head(&conf->wait_for_overlap);
-	INIT_LIST_HEAD(&conf->handle_list);
-	INIT_LIST_HEAD(&conf->hold_list);
-	INIT_LIST_HEAD(&conf->delayed_list);
-	INIT_LIST_HEAD(&conf->bitmap_list);
-	INIT_LIST_HEAD(&conf->inactive_list);
-	atomic_set(&conf->active_stripes, 0);
-	atomic_set(&conf->preread_active_stripes, 0);
-	atomic_set(&conf->active_aligned_reads, 0);
-	conf->bypass_threshold = BYPASS_THRESHOLD;
-	conf->recovery_disabled = mddev->recovery_disabled - 1;
-
-	conf->raid_disks = mddev->raid_disks;
-	if (mddev->reshape_position == MaxSector)
-		conf->previous_raid_disks = mddev->raid_disks;
-	else
-		conf->previous_raid_disks = mddev->raid_disks - mddev->delta_disks;
-	max_disks = max(conf->raid_disks, conf->previous_raid_disks);
-	conf->scribble_len = scribble_len(max_disks);
-
-	conf->disks = kzalloc(max_disks * sizeof(struct disk_info),
-			      GFP_KERNEL);
-	if (!conf->disks)
-		goto abort;
-
-	conf->mddev = mddev;
-
-	if ((conf->stripe_hashtbl = kzalloc(PAGE_SIZE, GFP_KERNEL)) == NULL)
-		goto abort;
-
-	conf->level = mddev->new_level;
-	if (raid5_alloc_percpu(conf) != 0)
-		goto abort;
-
-	pr_debug("raid456: run(%s) called.\n", mdname(mddev));
-
-	rdev_for_each(rdev, mddev) {
-		raid_disk = rdev->raid_disk;
-		if (raid_disk >= max_disks
-		    || raid_disk < 0)
-			continue;
-		disk = conf->disks + raid_disk;
-
-		if (test_bit(Replacement, &rdev->flags)) {
-			if (disk->replacement)
-				goto abort;
-			disk->replacement = rdev;
-		} else {
-			if (disk->rdev)
-				goto abort;
-			disk->rdev = rdev;
-		}
-
-		if (test_bit(In_sync, &rdev->flags)) {
-			char b[BDEVNAME_SIZE];
-			printk(KERN_INFO "md/raid:%s: device %s operational as raid"
-			       " disk %d\n",
-			       mdname(mddev), bdevname(rdev->bdev, b), raid_disk);
-		} else if (rdev->saved_raid_disk != raid_disk)
-			/* Cannot rely on bitmap to complete recovery */
-			conf->fullsync = 1;
-	}
-
-	conf->chunk_sectors = mddev->new_chunk_sectors;
-	conf->level = mddev->new_level;
-	if (conf->level == 6)
-		conf->max_degraded = 2;
-	else
-		conf->max_degraded = 1;
-	conf->algorithm = mddev->new_layout;
-	conf->max_nr_stripes = NR_STRIPES;
-	conf->reshape_progress = mddev->reshape_position;
-	if (conf->reshape_progress != MaxSector) {
-		conf->prev_chunk_sectors = mddev->chunk_sectors;
-		conf->prev_algo = mddev->layout;
-	}
-
-	memory = conf->max_nr_stripes * (sizeof(struct stripe_head) +
-		 max_disks * ((sizeof(struct bio) + PAGE_SIZE))) / 1024;
-	if (grow_stripes(conf, conf->max_nr_stripes)) {
-		printk(KERN_ERR
-		       "md/raid:%s: couldn't allocate %dkB for buffers\n",
-		       mdname(mddev), memory);
-		goto abort;
-	} else
-		printk(KERN_INFO "md/raid:%s: allocated %dkB\n",
-		       mdname(mddev), memory);
-
-	conf->thread = md_register_thread(raid5d, mddev, NULL);
-	if (!conf->thread) {
-		printk(KERN_ERR
-		       "md/raid:%s: couldn't allocate thread.\n",
-		       mdname(mddev));
-		goto abort;
-	}
-
-	return conf;
-
- abort:
-	if (conf) {
-		free_conf(conf);
-		return ERR_PTR(-EIO);
-	} else
-		return ERR_PTR(-ENOMEM);
-}
-
-
-static int only_parity(int raid_disk, int algo, int raid_disks, int max_degraded)
-{
-	switch (algo) {
-	case ALGORITHM_PARITY_0:
-		if (raid_disk < max_degraded)
-			return 1;
-		break;
-	case ALGORITHM_PARITY_N:
-		if (raid_disk >= raid_disks - max_degraded)
-			return 1;
-		break;
-	case ALGORITHM_PARITY_0_6:
-		if (raid_disk == 0 || 
-		    raid_disk == raid_disks - 1)
-			return 1;
-		break;
-	case ALGORITHM_LEFT_ASYMMETRIC_6:
-	case ALGORITHM_RIGHT_ASYMMETRIC_6:
-	case ALGORITHM_LEFT_SYMMETRIC_6:
-	case ALGORITHM_RIGHT_SYMMETRIC_6:
-		if (raid_disk == raid_disks - 1)
-			return 1;
-	}
-	return 0;
-}
-
-static int run(struct mddev *mddev)
-{
-	struct r5conf *conf;
-	int working_disks = 0;
-	int dirty_parity_disks = 0;
-	struct md_rdev *rdev;
-	sector_t reshape_offset = 0;
-	int i;
-
-	if (mddev->recovery_cp != MaxSector)
-		printk(KERN_NOTICE "md/raid:%s: not clean"
-		       " -- starting background reconstruction\n",
-		       mdname(mddev));
-	if (mddev->reshape_position != MaxSector) {
-		/* Check that we can continue the reshape.
-		 * Currently only disks can change, it must
-		 * increase, and we must be past the point where
-		 * a stripe over-writes itself
-		 */
-		sector_t here_new, here_old;
-		int old_disks;
-		int max_degraded = (mddev->level == 6 ? 2 : 1);
-
-		if (mddev->new_level != mddev->level) {
-			printk(KERN_ERR "md/raid:%s: unsupported reshape "
-			       "required - aborting.\n",
-			       mdname(mddev));
-			return -EINVAL;
-		}
-		old_disks = mddev->raid_disks - mddev->delta_disks;
-		/* reshape_position must be on a new-stripe boundary, and one
-		 * further up in new geometry must map after here in old
-		 * geometry.
-		 */
-		here_new = mddev->reshape_position;
-		if (sector_div(here_new, mddev->new_chunk_sectors *
-			       (mddev->raid_disks - max_degraded))) {
-			printk(KERN_ERR "md/raid:%s: reshape_position not "
-			       "on a stripe boundary\n", mdname(mddev));
-			return -EINVAL;
-		}
-		reshape_offset = here_new * mddev->new_chunk_sectors;
-		/* here_new is the stripe we will write to */
-		here_old = mddev->reshape_position;
-		sector_div(here_old, mddev->chunk_sectors *
-			   (old_disks-max_degraded));
-		/* here_old is the first stripe that we might need to read
-		 * from */
-		if (mddev->delta_disks == 0) {
-			/* We cannot be sure it is safe to start an in-place
-			 * reshape.  It is only safe if user-space if monitoring
-			 * and taking constant backups.
-			 * mdadm always starts a situation like this in
-			 * readonly mode so it can take control before
-			 * allowing any writes.  So just check for that.
-			 */
-			if ((here_new * mddev->new_chunk_sectors != 
-			     here_old * mddev->chunk_sectors) ||
-			    mddev->ro == 0) {
-				printk(KERN_ERR "md/raid:%s: in-place reshape must be started"
-				       " in read-only mode - aborting\n",
-				       mdname(mddev));
-				return -EINVAL;
-			}
-		} else if (mddev->delta_disks < 0
-		    ? (here_new * mddev->new_chunk_sectors <=
-		       here_old * mddev->chunk_sectors)
-		    : (here_new * mddev->new_chunk_sectors >=
-		       here_old * mddev->chunk_sectors)) {
-			/* Reading from the same stripe as writing to - bad */
-			printk(KERN_ERR "md/raid:%s: reshape_position too early for "
-			       "auto-recovery - aborting.\n",
-			       mdname(mddev));
-			return -EINVAL;
-		}
-		printk(KERN_INFO "md/raid:%s: reshape will continue\n",
-		       mdname(mddev));
-		/* OK, we should be able to continue; */
-	} else {
-		BUG_ON(mddev->level != mddev->new_level);
-		BUG_ON(mddev->layout != mddev->new_layout);
-		BUG_ON(mddev->chunk_sectors != mddev->new_chunk_sectors);
-		BUG_ON(mddev->delta_disks != 0);
-	}
-
-	if (mddev->private == NULL)
-		conf = setup_conf(mddev);
-	else
-		conf = mddev->private;
-
-	if (IS_ERR(conf))
-		return PTR_ERR(conf);
-
-	mddev->thread = conf->thread;
-	conf->thread = NULL;
-	mddev->private = conf;
-
-	for (i = 0; i < conf->raid_disks && conf->previous_raid_disks;
-	     i++) {
-		rdev = conf->disks[i].rdev;
-		if (!rdev && conf->disks[i].replacement) {
-			/* The replacement is all we have yet */
-			rdev = conf->disks[i].replacement;
-			conf->disks[i].replacement = NULL;
-			clear_bit(Replacement, &rdev->flags);
-			conf->disks[i].rdev = rdev;
-		}
-		if (!rdev)
-			continue;
-		if (conf->disks[i].replacement &&
-		    conf->reshape_progress != MaxSector) {
-			/* replacements and reshape simply do not mix. */
-			printk(KERN_ERR "md: cannot handle concurrent "
-			       "replacement and reshape.\n");
-			goto abort;
-		}
-		if (test_bit(In_sync, &rdev->flags)) {
-			working_disks++;
-			continue;
-		}
-		/* This disc is not fully in-sync.  However if it
-		 * just stored parity (beyond the recovery_offset),
-		 * when we don't need to be concerned about the
-		 * array being dirty.
-		 * When reshape goes 'backwards', we never have
-		 * partially completed devices, so we only need
-		 * to worry about reshape going forwards.
-		 */
-		/* Hack because v0.91 doesn't store recovery_offset properly. */
-		if (mddev->major_version == 0 &&
-		    mddev->minor_version > 90)
-			rdev->recovery_offset = reshape_offset;
-			
-		if (rdev->recovery_offset < reshape_offset) {
-			/* We need to check old and new layout */
-			if (!only_parity(rdev->raid_disk,
-					 conf->algorithm,
-					 conf->raid_disks,
-					 conf->max_degraded))
-				continue;
-		}
-		if (!only_parity(rdev->raid_disk,
-				 conf->prev_algo,
-				 conf->previous_raid_disks,
-				 conf->max_degraded))
-			continue;
-		dirty_parity_disks++;
-	}
-
-	/*
-	 * 0 for a fully functional array, 1 or 2 for a degraded array.
-	 */
-	mddev->degraded = calc_degraded(conf);
-
-	if (has_failed(conf)) {
-		printk(KERN_ERR "md/raid:%s: not enough operational devices"
-			" (%d/%d failed)\n",
-			mdname(mddev), mddev->degraded, conf->raid_disks);
-		goto abort;
-	}
-
-	/* device size must be a multiple of chunk size */
-	mddev->dev_sectors &= ~(mddev->chunk_sectors - 1);
-	mddev->resync_max_sectors = mddev->dev_sectors;
-
-	if (mddev->degraded > dirty_parity_disks &&
-	    mddev->recovery_cp != MaxSector) {
-		if (mddev->ok_start_degraded)
-			printk(KERN_WARNING
-			       "md/raid:%s: starting dirty degraded array"
-			       " - data corruption possible.\n",
-			       mdname(mddev));
-		else {
-			printk(KERN_ERR
-			       "md/raid:%s: cannot start dirty degraded array.\n",
-			       mdname(mddev));
-			goto abort;
-		}
-	}
-
-	if (mddev->degraded == 0)
-		printk(KERN_INFO "md/raid:%s: raid level %d active with %d out of %d"
-		       " devices, algorithm %d\n", mdname(mddev), conf->level,
-		       mddev->raid_disks-mddev->degraded, mddev->raid_disks,
-		       mddev->new_layout);
-	else
-		printk(KERN_ALERT "md/raid:%s: raid level %d active with %d"
-		       " out of %d devices, algorithm %d\n",
-		       mdname(mddev), conf->level,
-		       mddev->raid_disks - mddev->degraded,
-		       mddev->raid_disks, mddev->new_layout);
-
-	print_raid5_conf(conf);
-
-	if (conf->reshape_progress != MaxSector) {
-		conf->reshape_safe = conf->reshape_progress;
-		atomic_set(&conf->reshape_stripes, 0);
-		clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
-		clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
-		set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
-		set_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
-		mddev->sync_thread = md_register_thread(md_do_sync, mddev,
-							"reshape");
-	}
-
-
-	/* Ok, everything is just fine now */
-	if (mddev->to_remove == &raid5_attrs_group)
-		mddev->to_remove = NULL;
-	else if (mddev->kobj.sd &&
-	    sysfs_create_group(&mddev->kobj, &raid5_attrs_group))
-		printk(KERN_WARNING
-		       "raid5: failed to create sysfs attributes for %s\n",
-		       mdname(mddev));
-	md_set_array_sectors(mddev, raid5_size(mddev, 0, 0));
-
-	if (mddev->queue) {
-		int chunk_size;
-		/* read-ahead size must cover two whole stripes, which
-		 * is 2 * (datadisks) * chunksize where 'n' is the
-		 * number of raid devices
-		 */
-		int data_disks = conf->previous_raid_disks - conf->max_degraded;
-		int stripe = data_disks *
-			((mddev->chunk_sectors << 9) / PAGE_SIZE);
-		if (mddev->queue->backing_dev_info.ra_pages < 2 * stripe)
-			mddev->queue->backing_dev_info.ra_pages = 2 * stripe;
-
-		blk_queue_merge_bvec(mddev->queue, raid5_mergeable_bvec);
-
-		mddev->queue->backing_dev_info.congested_data = mddev;
-		mddev->queue->backing_dev_info.congested_fn = raid5_congested;
-
-		chunk_size = mddev->chunk_sectors << 9;
-		blk_queue_io_min(mddev->queue, chunk_size);
-		blk_queue_io_opt(mddev->queue, chunk_size *
-				 (conf->raid_disks - conf->max_degraded));
-
-		rdev_for_each(rdev, mddev)
-			disk_stack_limits(mddev->gendisk, rdev->bdev,
-					  rdev->data_offset << 9);
-	}
-
-	return 0;
-abort:
-	md_unregister_thread(&mddev->thread);
-	print_raid5_conf(conf);
-	free_conf(conf);
-	mddev->private = NULL;
-	printk(KERN_ALERT "md/raid:%s: failed to run raid set.\n", mdname(mddev));
-	return -EIO;
-}
-
-static int stop(struct mddev *mddev)
-{
-	struct r5conf *conf = mddev->private;
-
-	md_unregister_thread(&mddev->thread);
-	if (mddev->queue)
-		mddev->queue->backing_dev_info.congested_fn = NULL;
-	free_conf(conf);
-	mddev->private = NULL;
-	mddev->to_remove = &raid5_attrs_group;
-	return 0;
-}
-
-static void status(struct seq_file *seq, struct mddev *mddev)
-{
-	struct r5conf *conf = mddev->private;
-	int i;
-
-	seq_printf(seq, " level %d, %dk chunk, algorithm %d", mddev->level,
-		mddev->chunk_sectors / 2, mddev->layout);
-	seq_printf (seq, " [%d/%d] [", conf->raid_disks, conf->raid_disks - mddev->degraded);
-	for (i = 0; i < conf->raid_disks; i++)
-		seq_printf (seq, "%s",
-			       conf->disks[i].rdev &&
-			       test_bit(In_sync, &conf->disks[i].rdev->flags) ? "U" : "_");
-	seq_printf (seq, "]");
-}
-
-static void print_raid5_conf (struct r5conf *conf)
-{
-	int i;
-	struct disk_info *tmp;
-
-	printk(KERN_DEBUG "RAID conf printout:\n");
-	if (!conf) {
-		printk("(conf==NULL)\n");
-		return;
-	}
-	printk(KERN_DEBUG " --- level:%d rd:%d wd:%d\n", conf->level,
-	       conf->raid_disks,
-	       conf->raid_disks - conf->mddev->degraded);
-
-	for (i = 0; i < conf->raid_disks; i++) {
-		char b[BDEVNAME_SIZE];
-		tmp = conf->disks + i;
-		if (tmp->rdev)
-			printk(KERN_DEBUG " disk %d, o:%d, dev:%s\n",
-			       i, !test_bit(Faulty, &tmp->rdev->flags),
-			       bdevname(tmp->rdev->bdev, b));
-	}
-}
-
-static int raid5_spare_active(struct mddev *mddev)
-{
-	int i;
-	struct r5conf *conf = mddev->private;
-	struct disk_info *tmp;
-	int count = 0;
-	unsigned long flags;
-
-	for (i = 0; i < conf->raid_disks; i++) {
-		tmp = conf->disks + i;
-		if (tmp->replacement
-		    && tmp->replacement->recovery_offset == MaxSector
-		    && !test_bit(Faulty, &tmp->replacement->flags)
-		    && !test_and_set_bit(In_sync, &tmp->replacement->flags)) {
-			/* Replacement has just become active. */
-			if (!tmp->rdev
-			    || !test_and_clear_bit(In_sync, &tmp->rdev->flags))
-				count++;
-			if (tmp->rdev) {
-				/* Replaced device not technically faulty,
-				 * but we need to be sure it gets removed
-				 * and never re-added.
-				 */
-				set_bit(Faulty, &tmp->rdev->flags);
-				sysfs_notify_dirent_safe(
-					tmp->rdev->sysfs_state);
-			}
-			sysfs_notify_dirent_safe(tmp->replacement->sysfs_state);
-		} else if (tmp->rdev
-		    && tmp->rdev->recovery_offset == MaxSector
-		    && !test_bit(Faulty, &tmp->rdev->flags)
-		    && !test_and_set_bit(In_sync, &tmp->rdev->flags)) {
-			count++;
-			sysfs_notify_dirent_safe(tmp->rdev->sysfs_state);
-		}
-	}
-	spin_lock_irqsave(&conf->device_lock, flags);
-	mddev->degraded = calc_degraded(conf);
-	spin_unlock_irqrestore(&conf->device_lock, flags);
-	print_raid5_conf(conf);
-	return count;
-}
-
-static int raid5_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
-{
-	struct r5conf *conf = mddev->private;
-	int err = 0;
-	int number = rdev->raid_disk;
-	struct md_rdev **rdevp;
-	struct disk_info *p = conf->disks + number;
-
-	print_raid5_conf(conf);
-	if (rdev == p->rdev)
-		rdevp = &p->rdev;
-	else if (rdev == p->replacement)
-		rdevp = &p->replacement;
-	else
-		return 0;
-
-	if (number >= conf->raid_disks &&
-	    conf->reshape_progress == MaxSector)
-		clear_bit(In_sync, &rdev->flags);
-
-	if (test_bit(In_sync, &rdev->flags) ||
-	    atomic_read(&rdev->nr_pending)) {
-		err = -EBUSY;
-		goto abort;
-	}
-	/* Only remove non-faulty devices if recovery
-	 * isn't possible.
-	 */
-	if (!test_bit(Faulty, &rdev->flags) &&
-	    mddev->recovery_disabled != conf->recovery_disabled &&
-	    !has_failed(conf) &&
-	    (!p->replacement || p->replacement == rdev) &&
-	    number < conf->raid_disks) {
-		err = -EBUSY;
-		goto abort;
-	}
-	*rdevp = NULL;
-	synchronize_rcu();
-	if (atomic_read(&rdev->nr_pending)) {
-		/* lost the race, try later */
-		err = -EBUSY;
-		*rdevp = rdev;
-	} else if (p->replacement) {
-		/* We must have just cleared 'rdev' */
-		p->rdev = p->replacement;
-		clear_bit(Replacement, &p->replacement->flags);
-		smp_mb(); /* Make sure other CPUs may see both as identical
-			   * but will never see neither - if they are careful
-			   */
-		p->replacement = NULL;
-		clear_bit(WantReplacement, &rdev->flags);
-	} else
-		/* We might have just removed the Replacement as faulty-
-		 * clear the bit just in case
-		 */
-		clear_bit(WantReplacement, &rdev->flags);
-abort:
-
-	print_raid5_conf(conf);
-	return err;
-}
-
-static int raid5_add_disk(struct mddev *mddev, struct md_rdev *rdev)
-{
-	struct r5conf *conf = mddev->private;
-	int err = -EEXIST;
-	int disk;
-	struct disk_info *p;
-	int first = 0;
-	int last = conf->raid_disks - 1;
-
-	if (mddev->recovery_disabled == conf->recovery_disabled)
-		return -EBUSY;
-
-	if (rdev->saved_raid_disk < 0 && has_failed(conf))
-		/* no point adding a device */
-		return -EINVAL;
-
-	if (rdev->raid_disk >= 0)
-		first = last = rdev->raid_disk;
-
-	/*
-	 * find the disk ... but prefer rdev->saved_raid_disk
-	 * if possible.
-	 */
-	if (rdev->saved_raid_disk >= 0 &&
-	    rdev->saved_raid_disk >= first &&
-	    conf->disks[rdev->saved_raid_disk].rdev == NULL)
-		disk = rdev->saved_raid_disk;
-	else
-		disk = first;
-	for ( ; disk <= last ; disk++) {
-		p = conf->disks + disk;
-		if (p->rdev == NULL) {
-			clear_bit(In_sync, &rdev->flags);
-			rdev->raid_disk = disk;
-			err = 0;
-			if (rdev->saved_raid_disk != disk)
-				conf->fullsync = 1;
-			rcu_assign_pointer(p->rdev, rdev);
-			break;
-		}
-		if (test_bit(WantReplacement, &p->rdev->flags) &&
-		    p->replacement == NULL) {
-			clear_bit(In_sync, &rdev->flags);
-			set_bit(Replacement, &rdev->flags);
-			rdev->raid_disk = disk;
-			err = 0;
-			conf->fullsync = 1;
-			rcu_assign_pointer(p->replacement, rdev);
-			break;
-		}
-	}
-	print_raid5_conf(conf);
-	return err;
-}
-
-static int raid5_resize(struct mddev *mddev, sector_t sectors)
-{
-	/* no resync is happening, and there is enough space
-	 * on all devices, so we can resize.
-	 * We need to make sure resync covers any new space.
-	 * If the array is shrinking we should possibly wait until
-	 * any io in the removed space completes, but it hardly seems
-	 * worth it.
-	 */
-	sectors &= ~((sector_t)mddev->chunk_sectors - 1);
-	md_set_array_sectors(mddev, raid5_size(mddev, sectors,
-					       mddev->raid_disks));
-	if (mddev->array_sectors >
-	    raid5_size(mddev, sectors, mddev->raid_disks))
-		return -EINVAL;
-	set_capacity(mddev->gendisk, mddev->array_sectors);
-	revalidate_disk(mddev->gendisk);
-	if (sectors > mddev->dev_sectors &&
-	    mddev->recovery_cp > mddev->dev_sectors) {
-		mddev->recovery_cp = mddev->dev_sectors;
-		set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
-	}
-	mddev->dev_sectors = sectors;
-	mddev->resync_max_sectors = sectors;
-	return 0;
-}
-
-static int check_stripe_cache(struct mddev *mddev)
-{
-	/* Can only proceed if there are plenty of stripe_heads.
-	 * We need a minimum of one full stripe,, and for sensible progress
-	 * it is best to have about 4 times that.
-	 * If we require 4 times, then the default 256 4K stripe_heads will
-	 * allow for chunk sizes up to 256K, which is probably OK.
-	 * If the chunk size is greater, user-space should request more
-	 * stripe_heads first.
-	 */
-	struct r5conf *conf = mddev->private;
-	if (((mddev->chunk_sectors << 9) / STRIPE_SIZE) * 4
-	    > conf->max_nr_stripes ||
-	    ((mddev->new_chunk_sectors << 9) / STRIPE_SIZE) * 4
-	    > conf->max_nr_stripes) {
-		printk(KERN_WARNING "md/raid:%s: reshape: not enough stripes.  Needed %lu\n",
-		       mdname(mddev),
-		       ((max(mddev->chunk_sectors, mddev->new_chunk_sectors) << 9)
-			/ STRIPE_SIZE)*4);
-		return 0;
-	}
-	return 1;
-}
-
-static int check_reshape(struct mddev *mddev)
-{
-	struct r5conf *conf = mddev->private;
-
-	if (mddev->delta_disks == 0 &&
-	    mddev->new_layout == mddev->layout &&
-	    mddev->new_chunk_sectors == mddev->chunk_sectors)
-		return 0; /* nothing to do */
-	if (mddev->bitmap)
-		/* Cannot grow a bitmap yet */
-		return -EBUSY;
-	if (has_failed(conf))
-		return -EINVAL;
-	if (mddev->delta_disks < 0) {
-		/* We might be able to shrink, but the devices must
-		 * be made bigger first.
-		 * For raid6, 4 is the minimum size.
-		 * Otherwise 2 is the minimum
-		 */
-		int min = 2;
-		if (mddev->level == 6)
-			min = 4;
-		if (mddev->raid_disks + mddev->delta_disks < min)
-			return -EINVAL;
-	}
-
-	if (!check_stripe_cache(mddev))
-		return -ENOSPC;
-
-	return resize_stripes(conf, conf->raid_disks + mddev->delta_disks);
-}
-
-static int raid5_start_reshape(struct mddev *mddev)
-{
-	struct r5conf *conf = mddev->private;
-	struct md_rdev *rdev;
-	int spares = 0;
-	unsigned long flags;
-
-	if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
-		return -EBUSY;
-
-	if (!check_stripe_cache(mddev))
-		return -ENOSPC;
-
-	rdev_for_each(rdev, mddev)
-		if (!test_bit(In_sync, &rdev->flags)
-		    && !test_bit(Faulty, &rdev->flags))
-			spares++;
-
-	if (spares - mddev->degraded < mddev->delta_disks - conf->max_degraded)
-		/* Not enough devices even to make a degraded array
-		 * of that size
-		 */
-		return -EINVAL;
-
-	/* Refuse to reduce size of the array.  Any reductions in
-	 * array size must be through explicit setting of array_size
-	 * attribute.
-	 */
-	if (raid5_size(mddev, 0, conf->raid_disks + mddev->delta_disks)
-	    < mddev->array_sectors) {
-		printk(KERN_ERR "md/raid:%s: array size must be reduced "
-		       "before number of disks\n", mdname(mddev));
-		return -EINVAL;
-	}
-
-	atomic_set(&conf->reshape_stripes, 0);
-	spin_lock_irq(&conf->device_lock);
-	conf->previous_raid_disks = conf->raid_disks;
-	conf->raid_disks += mddev->delta_disks;
-	conf->prev_chunk_sectors = conf->chunk_sectors;
-	conf->chunk_sectors = mddev->new_chunk_sectors;
-	conf->prev_algo = conf->algorithm;
-	conf->algorithm = mddev->new_layout;
-	if (mddev->delta_disks < 0)
-		conf->reshape_progress = raid5_size(mddev, 0, 0);
-	else
-		conf->reshape_progress = 0;
-	conf->reshape_safe = conf->reshape_progress;
-	conf->generation++;
-	spin_unlock_irq(&conf->device_lock);
-
-	/* Add some new drives, as many as will fit.
-	 * We know there are enough to make the newly sized array work.
-	 * Don't add devices if we are reducing the number of
-	 * devices in the array.  This is because it is not possible
-	 * to correctly record the "partially reconstructed" state of
-	 * such devices during the reshape and confusion could result.
-	 */
-	if (mddev->delta_disks >= 0) {
-		rdev_for_each(rdev, mddev)
-			if (rdev->raid_disk < 0 &&
-			    !test_bit(Faulty, &rdev->flags)) {
-				if (raid5_add_disk(mddev, rdev) == 0) {
-					if (rdev->raid_disk
-					    >= conf->previous_raid_disks)
-						set_bit(In_sync, &rdev->flags);
-					else
-						rdev->recovery_offset = 0;
-
-					if (sysfs_link_rdev(mddev, rdev))
-						/* Failure here is OK */;
-				}
-			} else if (rdev->raid_disk >= conf->previous_raid_disks
-				   && !test_bit(Faulty, &rdev->flags)) {
-				/* This is a spare that was manually added */
-				set_bit(In_sync, &rdev->flags);
-			}
-
-		/* When a reshape changes the number of devices,
-		 * ->degraded is measured against the larger of the
-		 * pre and post number of devices.
-		 */
-		spin_lock_irqsave(&conf->device_lock, flags);
-		mddev->degraded = calc_degraded(conf);
-		spin_unlock_irqrestore(&conf->device_lock, flags);
-	}
-	mddev->raid_disks = conf->raid_disks;
-	mddev->reshape_position = conf->reshape_progress;
-	set_bit(MD_CHANGE_DEVS, &mddev->flags);
-
-	clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
-	clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
-	set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
-	set_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
-	mddev->sync_thread = md_register_thread(md_do_sync, mddev,
-						"reshape");
-	if (!mddev->sync_thread) {
-		mddev->recovery = 0;
-		spin_lock_irq(&conf->device_lock);
-		mddev->raid_disks = conf->raid_disks = conf->previous_raid_disks;
-		conf->reshape_progress = MaxSector;
-		mddev->reshape_position = MaxSector;
-		spin_unlock_irq(&conf->device_lock);
-		return -EAGAIN;
-	}
-	conf->reshape_checkpoint = jiffies;
-	md_wakeup_thread(mddev->sync_thread);
-	md_new_event(mddev);
-	return 0;
-}
-
-/* This is called from the reshape thread and should make any
- * changes needed in 'conf'
- */
-static void end_reshape(struct r5conf *conf)
-{
-
-	if (!test_bit(MD_RECOVERY_INTR, &conf->mddev->recovery)) {
-
-		spin_lock_irq(&conf->device_lock);
-		conf->previous_raid_disks = conf->raid_disks;
-		conf->reshape_progress = MaxSector;
-		spin_unlock_irq(&conf->device_lock);
-		wake_up(&conf->wait_for_overlap);
-
-		/* read-ahead size must cover two whole stripes, which is
-		 * 2 * (datadisks) * chunksize where 'n' is the number of raid devices
-		 */
-		if (conf->mddev->queue) {
-			int data_disks = conf->raid_disks - conf->max_degraded;
-			int stripe = data_disks * ((conf->chunk_sectors << 9)
-						   / PAGE_SIZE);
-			if (conf->mddev->queue->backing_dev_info.ra_pages < 2 * stripe)
-				conf->mddev->queue->backing_dev_info.ra_pages = 2 * stripe;
-		}
-	}
-}
-
-/* This is called from the raid5d thread with mddev_lock held.
- * It makes config changes to the device.
- */
-static void raid5_finish_reshape(struct mddev *mddev)
-{
-	struct r5conf *conf = mddev->private;
-
-	if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
-
-		if (mddev->delta_disks > 0) {
-			md_set_array_sectors(mddev, raid5_size(mddev, 0, 0));
-			set_capacity(mddev->gendisk, mddev->array_sectors);
-			revalidate_disk(mddev->gendisk);
-		} else {
-			int d;
-			spin_lock_irq(&conf->device_lock);
-			mddev->degraded = calc_degraded(conf);
-			spin_unlock_irq(&conf->device_lock);
-			for (d = conf->raid_disks ;
-			     d < conf->raid_disks - mddev->delta_disks;
-			     d++) {
-				struct md_rdev *rdev = conf->disks[d].rdev;
-				if (rdev &&
-				    raid5_remove_disk(mddev, rdev) == 0) {
-					sysfs_unlink_rdev(mddev, rdev);
-					rdev->raid_disk = -1;
-				}
-			}
-		}
-		mddev->layout = conf->algorithm;
-		mddev->chunk_sectors = conf->chunk_sectors;
-		mddev->reshape_position = MaxSector;
-		mddev->delta_disks = 0;
-	}
-}
-
-static void raid5_quiesce(struct mddev *mddev, int state)
-{
-	struct r5conf *conf = mddev->private;
-
-	switch(state) {
-	case 2: /* resume for a suspend */
-		wake_up(&conf->wait_for_overlap);
-		break;
-
-	case 1: /* stop all writes */
-		spin_lock_irq(&conf->device_lock);
-		/* '2' tells resync/reshape to pause so that all
-		 * active stripes can drain
-		 */
-		conf->quiesce = 2;
-		wait_event_lock_irq(conf->wait_for_stripe,
-				    atomic_read(&conf->active_stripes) == 0 &&
-				    atomic_read(&conf->active_aligned_reads) == 0,
-				    conf->device_lock, /* nothing */);
-		conf->quiesce = 1;
-		spin_unlock_irq(&conf->device_lock);
-		/* allow reshape to continue */
-		wake_up(&conf->wait_for_overlap);
-		break;
-
-	case 0: /* re-enable writes */
-		spin_lock_irq(&conf->device_lock);
-		conf->quiesce = 0;
-		wake_up(&conf->wait_for_stripe);
-		wake_up(&conf->wait_for_overlap);
-		spin_unlock_irq(&conf->device_lock);
-		break;
-	}
-}
-
-
-static void *raid45_takeover_raid0(struct mddev *mddev, int level)
-{
-	struct r0conf *raid0_conf = mddev->private;
-	sector_t sectors;
-
-	/* for raid0 takeover only one zone is supported */
-	if (raid0_conf->nr_strip_zones > 1) {
-		printk(KERN_ERR "md/raid:%s: cannot takeover raid0 with more than one zone.\n",
-		       mdname(mddev));
-		return ERR_PTR(-EINVAL);
-	}
-
-	sectors = raid0_conf->strip_zone[0].zone_end;
-	sector_div(sectors, raid0_conf->strip_zone[0].nb_dev);
-	mddev->dev_sectors = sectors;
-	mddev->new_level = level;
-	mddev->new_layout = ALGORITHM_PARITY_N;
-	mddev->new_chunk_sectors = mddev->chunk_sectors;
-	mddev->raid_disks += 1;
-	mddev->delta_disks = 1;
-	/* make sure it will be not marked as dirty */
-	mddev->recovery_cp = MaxSector;
-
-	return setup_conf(mddev);
-}
-
-
-static void *raid5_takeover_raid1(struct mddev *mddev)
-{
-	int chunksect;
-
-	if (mddev->raid_disks != 2 ||
-	    mddev->degraded > 1)
-		return ERR_PTR(-EINVAL);
-
-	/* Should check if there are write-behind devices? */
-
-	chunksect = 64*2; /* 64K by default */
-
-	/* The array must be an exact multiple of chunksize */
-	while (chunksect && (mddev->array_sectors & (chunksect-1)))
-		chunksect >>= 1;
-
-	if ((chunksect<<9) < STRIPE_SIZE)
-		/* array size does not allow a suitable chunk size */
-		return ERR_PTR(-EINVAL);
-
-	mddev->new_level = 5;
-	mddev->new_layout = ALGORITHM_LEFT_SYMMETRIC;
-	mddev->new_chunk_sectors = chunksect;
-
-	return setup_conf(mddev);
-}
-
-static void *raid5_takeover_raid6(struct mddev *mddev)
-{
-	int new_layout;
-
-	switch (mddev->layout) {
-	case ALGORITHM_LEFT_ASYMMETRIC_6:
-		new_layout = ALGORITHM_LEFT_ASYMMETRIC;
-		break;
-	case ALGORITHM_RIGHT_ASYMMETRIC_6:
-		new_layout = ALGORITHM_RIGHT_ASYMMETRIC;
-		break;
-	case ALGORITHM_LEFT_SYMMETRIC_6:
-		new_layout = ALGORITHM_LEFT_SYMMETRIC;
-		break;
-	case ALGORITHM_RIGHT_SYMMETRIC_6:
-		new_layout = ALGORITHM_RIGHT_SYMMETRIC;
-		break;
-	case ALGORITHM_PARITY_0_6:
-		new_layout = ALGORITHM_PARITY_0;
-		break;
-	case ALGORITHM_PARITY_N:
-		new_layout = ALGORITHM_PARITY_N;
-		break;
-	default:
-		return ERR_PTR(-EINVAL);
-	}
-	mddev->new_level = 5;
-	mddev->new_layout = new_layout;
-	mddev->delta_disks = -1;
-	mddev->raid_disks -= 1;
-	return setup_conf(mddev);
-}
-
-
-static int raid5_check_reshape(struct mddev *mddev)
-{
-	/* For a 2-drive array, the layout and chunk size can be changed
-	 * immediately as not restriping is needed.
-	 * For larger arrays we record the new value - after validation
-	 * to be used by a reshape pass.
-	 */
-	struct r5conf *conf = mddev->private;
-	int new_chunk = mddev->new_chunk_sectors;
-
-	if (mddev->new_layout >= 0 && !algorithm_valid_raid5(mddev->new_layout))
-		return -EINVAL;
-	if (new_chunk > 0) {
-		if (!is_power_of_2(new_chunk))
-			return -EINVAL;
-		if (new_chunk < (PAGE_SIZE>>9))
-			return -EINVAL;
-		if (mddev->array_sectors & (new_chunk-1))
-			/* not factor of array size */
-			return -EINVAL;
-	}
-
-	/* They look valid */
-
-	if (mddev->raid_disks == 2) {
-		/* can make the change immediately */
-		if (mddev->new_layout >= 0) {
-			conf->algorithm = mddev->new_layout;
-			mddev->layout = mddev->new_layout;
-		}
-		if (new_chunk > 0) {
-			conf->chunk_sectors = new_chunk ;
-			mddev->chunk_sectors = new_chunk;
-		}
-		set_bit(MD_CHANGE_DEVS, &mddev->flags);
-		md_wakeup_thread(mddev->thread);
-	}
-	return check_reshape(mddev);
-}
-
-static int raid6_check_reshape(struct mddev *mddev)
-{
-	int new_chunk = mddev->new_chunk_sectors;
-
-	if (mddev->new_layout >= 0 && !algorithm_valid_raid6(mddev->new_layout))
-		return -EINVAL;
-	if (new_chunk > 0) {
-		if (!is_power_of_2(new_chunk))
-			return -EINVAL;
-		if (new_chunk < (PAGE_SIZE >> 9))
-			return -EINVAL;
-		if (mddev->array_sectors & (new_chunk-1))
-			/* not factor of array size */
-			return -EINVAL;
-	}
-
-	/* They look valid */
-	return check_reshape(mddev);
-}
-
-static void *raid5_takeover(struct mddev *mddev)
-{
-	/* raid5 can take over:
-	 *  raid0 - if there is only one strip zone - make it a raid4 layout
-	 *  raid1 - if there are two drives.  We need to know the chunk size
-	 *  raid4 - trivial - just use a raid4 layout.
-	 *  raid6 - Providing it is a *_6 layout
-	 */
-	if (mddev->level == 0)
-		return raid45_takeover_raid0(mddev, 5);
-	if (mddev->level == 1)
-		return raid5_takeover_raid1(mddev);
-	if (mddev->level == 4) {
-		mddev->new_layout = ALGORITHM_PARITY_N;
-		mddev->new_level = 5;
-		return setup_conf(mddev);
-	}
-	if (mddev->level == 6)
-		return raid5_takeover_raid6(mddev);
-
-	return ERR_PTR(-EINVAL);
-}
-
-static void *raid4_takeover(struct mddev *mddev)
-{
-	/* raid4 can take over:
-	 *  raid0 - if there is only one strip zone
-	 *  raid5 - if layout is right
-	 */
-	if (mddev->level == 0)
-		return raid45_takeover_raid0(mddev, 4);
-	if (mddev->level == 5 &&
-	    mddev->layout == ALGORITHM_PARITY_N) {
-		mddev->new_layout = 0;
-		mddev->new_level = 4;
-		return setup_conf(mddev);
-	}
-	return ERR_PTR(-EINVAL);
-}
-
-static struct md_personality raid5_personality;
-
-static void *raid6_takeover(struct mddev *mddev)
-{
-	/* Currently can only take over a raid5.  We map the
-	 * personality to an equivalent raid6 personality
-	 * with the Q block at the end.
-	 */
-	int new_layout;
-
-	if (mddev->pers != &raid5_personality)
-		return ERR_PTR(-EINVAL);
-	if (mddev->degraded > 1)
-		return ERR_PTR(-EINVAL);
-	if (mddev->raid_disks > 253)
-		return ERR_PTR(-EINVAL);
-	if (mddev->raid_disks < 3)
-		return ERR_PTR(-EINVAL);
-
-	switch (mddev->layout) {
-	case ALGORITHM_LEFT_ASYMMETRIC:
-		new_layout = ALGORITHM_LEFT_ASYMMETRIC_6;
-		break;
-	case ALGORITHM_RIGHT_ASYMMETRIC:
-		new_layout = ALGORITHM_RIGHT_ASYMMETRIC_6;
-		break;
-	case ALGORITHM_LEFT_SYMMETRIC:
-		new_layout = ALGORITHM_LEFT_SYMMETRIC_6;
-		break;
-	case ALGORITHM_RIGHT_SYMMETRIC:
-		new_layout = ALGORITHM_RIGHT_SYMMETRIC_6;
-		break;
-	case ALGORITHM_PARITY_0:
-		new_layout = ALGORITHM_PARITY_0_6;
-		break;
-	case ALGORITHM_PARITY_N:
-		new_layout = ALGORITHM_PARITY_N;
-		break;
-	default:
-		return ERR_PTR(-EINVAL);
-	}
-	mddev->new_level = 6;
-	mddev->new_layout = new_layout;
-	mddev->delta_disks = 1;
-	mddev->raid_disks += 1;
-	return setup_conf(mddev);
-}
-
-
-static struct md_personality raid6_personality =
-{
-	.name		= "raid6",
-	.level		= 6,
-	.owner		= THIS_MODULE,
-	.make_request	= make_request,
-	.run		= run,
-	.stop		= stop,
-	.status		= status,
-	.error_handler	= error,
-	.hot_add_disk	= raid5_add_disk,
-	.hot_remove_disk= raid5_remove_disk,
-	.spare_active	= raid5_spare_active,
-	.sync_request	= sync_request,
-	.resize		= raid5_resize,
-	.size		= raid5_size,
-	.check_reshape	= raid6_check_reshape,
-	.start_reshape  = raid5_start_reshape,
-	.finish_reshape = raid5_finish_reshape,
-	.quiesce	= raid5_quiesce,
-	.takeover	= raid6_takeover,
-};
-static struct md_personality raid5_personality =
-{
-	.name		= "raid5",
-	.level		= 5,
-	.owner		= THIS_MODULE,
-	.make_request	= make_request,
-	.run		= run,
-	.stop		= stop,
-	.status		= status,
-	.error_handler	= error,
-	.hot_add_disk	= raid5_add_disk,
-	.hot_remove_disk= raid5_remove_disk,
-	.spare_active	= raid5_spare_active,
-	.sync_request	= sync_request,
-	.resize		= raid5_resize,
-	.size		= raid5_size,
-	.check_reshape	= raid5_check_reshape,
-	.start_reshape  = raid5_start_reshape,
-	.finish_reshape = raid5_finish_reshape,
-	.quiesce	= raid5_quiesce,
-	.takeover	= raid5_takeover,
-};
-
-static struct md_personality raid4_personality =
-{
-	.name		= "raid4",
-	.level		= 4,
-	.owner		= THIS_MODULE,
-	.make_request	= make_request,
-	.run		= run,
-	.stop		= stop,
-	.status		= status,
-	.error_handler	= error,
-	.hot_add_disk	= raid5_add_disk,
-	.hot_remove_disk= raid5_remove_disk,
-	.spare_active	= raid5_spare_active,
-	.sync_request	= sync_request,
-	.resize		= raid5_resize,
-	.size		= raid5_size,
-	.check_reshape	= raid5_check_reshape,
-	.start_reshape  = raid5_start_reshape,
-	.finish_reshape = raid5_finish_reshape,
-	.quiesce	= raid5_quiesce,
-	.takeover	= raid4_takeover,
-};
-
-static int __init raid5_init(void)
-{
-	register_md_personality(&raid6_personality);
-	register_md_personality(&raid5_personality);
-	register_md_personality(&raid4_personality);
-	return 0;
-}
-
-static void raid5_exit(void)
-{
-	unregister_md_personality(&raid6_personality);
-	unregister_md_personality(&raid5_personality);
-	unregister_md_personality(&raid4_personality);
-}
-
-module_init(raid5_init);
-module_exit(raid5_exit);
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("RAID4/5/6 (striping with parity) personality for MD");
-MODULE_ALIAS("md-personality-4"); /* RAID5 */
-MODULE_ALIAS("md-raid5");
-MODULE_ALIAS("md-raid4");
-MODULE_ALIAS("md-level-5");
-MODULE_ALIAS("md-level-4");
-MODULE_ALIAS("md-personality-8"); /* RAID6 */
-MODULE_ALIAS("md-raid6");
-MODULE_ALIAS("md-level-6");
-
-/* This used to be two separate modules, they were: */
-MODULE_ALIAS("raid5");
-MODULE_ALIAS("raid6");
diff --git a/ANDROID_3.4.5/drivers/md/raid5.h b/ANDROID_3.4.5/drivers/md/raid5.h
deleted file mode 100644
index 8d8e1393..00000000
--- a/ANDROID_3.4.5/drivers/md/raid5.h
+++ /dev/null
@@ -1,519 +0,0 @@
-#ifndef _RAID5_H
-#define _RAID5_H
-
-#include <linux/raid/xor.h>
-#include <linux/dmaengine.h>
-
-/*
- *
- * Each stripe contains one buffer per device.  Each buffer can be in
- * one of a number of states stored in "flags".  Changes between
- * these states happen *almost* exclusively under the protection of the
- * STRIPE_ACTIVE flag.  Some very specific changes can happen in bi_end_io, and
- * these are not protected by STRIPE_ACTIVE.
- *
- * The flag bits that are used to represent these states are:
- *   R5_UPTODATE and R5_LOCKED
- *
- * State Empty == !UPTODATE, !LOCK
- *        We have no data, and there is no active request
- * State Want == !UPTODATE, LOCK
- *        A read request is being submitted for this block
- * State Dirty == UPTODATE, LOCK
- *        Some new data is in this buffer, and it is being written out
- * State Clean == UPTODATE, !LOCK
- *        We have valid data which is the same as on disc
- *
- * The possible state transitions are:
- *
- *  Empty -> Want   - on read or write to get old data for  parity calc
- *  Empty -> Dirty  - on compute_parity to satisfy write/sync request.
- *  Empty -> Clean  - on compute_block when computing a block for failed drive
- *  Want  -> Empty  - on failed read
- *  Want  -> Clean  - on successful completion of read request
- *  Dirty -> Clean  - on successful completion of write request
- *  Dirty -> Clean  - on failed write
- *  Clean -> Dirty  - on compute_parity to satisfy write/sync (RECONSTRUCT or RMW)
- *
- * The Want->Empty, Want->Clean, Dirty->Clean, transitions
- * all happen in b_end_io at interrupt time.
- * Each sets the Uptodate bit before releasing the Lock bit.
- * This leaves one multi-stage transition:
- *    Want->Dirty->Clean
- * This is safe because thinking that a Clean buffer is actually dirty
- * will at worst delay some action, and the stripe will be scheduled
- * for attention after the transition is complete.
- *
- * There is one possibility that is not covered by these states.  That
- * is if one drive has failed and there is a spare being rebuilt.  We
- * can't distinguish between a clean block that has been generated
- * from parity calculations, and a clean block that has been
- * successfully written to the spare ( or to parity when resyncing).
- * To distingush these states we have a stripe bit STRIPE_INSYNC that
- * is set whenever a write is scheduled to the spare, or to the parity
- * disc if there is no spare.  A sync request clears this bit, and
- * when we find it set with no buffers locked, we know the sync is
- * complete.
- *
- * Buffers for the md device that arrive via make_request are attached
- * to the appropriate stripe in one of two lists linked on b_reqnext.
- * One list (bh_read) for read requests, one (bh_write) for write.
- * There should never be more than one buffer on the two lists
- * together, but we are not guaranteed of that so we allow for more.
- *
- * If a buffer is on the read list when the associated cache buffer is
- * Uptodate, the data is copied into the read buffer and it's b_end_io
- * routine is called.  This may happen in the end_request routine only
- * if the buffer has just successfully been read.  end_request should
- * remove the buffers from the list and then set the Uptodate bit on
- * the buffer.  Other threads may do this only if they first check
- * that the Uptodate bit is set.  Once they have checked that they may
- * take buffers off the read queue.
- *
- * When a buffer on the write list is committed for write it is copied
- * into the cache buffer, which is then marked dirty, and moved onto a
- * third list, the written list (bh_written).  Once both the parity
- * block and the cached buffer are successfully written, any buffer on
- * a written list can be returned with b_end_io.
- *
- * The write list and read list both act as fifos.  The read list,
- * write list and written list are protected by the device_lock.
- * The device_lock is only for list manipulations and will only be
- * held for a very short time.  It can be claimed from interrupts.
- *
- *
- * Stripes in the stripe cache can be on one of two lists (or on
- * neither).  The "inactive_list" contains stripes which are not
- * currently being used for any request.  They can freely be reused
- * for another stripe.  The "handle_list" contains stripes that need
- * to be handled in some way.  Both of these are fifo queues.  Each
- * stripe is also (potentially) linked to a hash bucket in the hash
- * table so that it can be found by sector number.  Stripes that are
- * not hashed must be on the inactive_list, and will normally be at
- * the front.  All stripes start life this way.
- *
- * The inactive_list, handle_list and hash bucket lists are all protected by the
- * device_lock.
- *  - stripes have a reference counter. If count==0, they are on a list.
- *  - If a stripe might need handling, STRIPE_HANDLE is set.
- *  - When refcount reaches zero, then if STRIPE_HANDLE it is put on
- *    handle_list else inactive_list
- *
- * This, combined with the fact that STRIPE_HANDLE is only ever
- * cleared while a stripe has a non-zero count means that if the
- * refcount is 0 and STRIPE_HANDLE is set, then it is on the
- * handle_list and if recount is 0 and STRIPE_HANDLE is not set, then
- * the stripe is on inactive_list.
- *
- * The possible transitions are:
- *  activate an unhashed/inactive stripe (get_active_stripe())
- *     lockdev check-hash unlink-stripe cnt++ clean-stripe hash-stripe unlockdev
- *  activate a hashed, possibly active stripe (get_active_stripe())
- *     lockdev check-hash if(!cnt++)unlink-stripe unlockdev
- *  attach a request to an active stripe (add_stripe_bh())
- *     lockdev attach-buffer unlockdev
- *  handle a stripe (handle_stripe())
- *     setSTRIPE_ACTIVE,  clrSTRIPE_HANDLE ...
- *		(lockdev check-buffers unlockdev) ..
- *		change-state ..
- *		record io/ops needed clearSTRIPE_ACTIVE schedule io/ops
- *  release an active stripe (release_stripe())
- *     lockdev if (!--cnt) { if  STRIPE_HANDLE, add to handle_list else add to inactive-list } unlockdev
- *
- * The refcount counts each thread that have activated the stripe,
- * plus raid5d if it is handling it, plus one for each active request
- * on a cached buffer, and plus one if the stripe is undergoing stripe
- * operations.
- *
- * The stripe operations are:
- * -copying data between the stripe cache and user application buffers
- * -computing blocks to save a disk access, or to recover a missing block
- * -updating the parity on a write operation (reconstruct write and
- *  read-modify-write)
- * -checking parity correctness
- * -running i/o to disk
- * These operations are carried out by raid5_run_ops which uses the async_tx
- * api to (optionally) offload operations to dedicated hardware engines.
- * When requesting an operation handle_stripe sets the pending bit for the
- * operation and increments the count.  raid5_run_ops is then run whenever
- * the count is non-zero.
- * There are some critical dependencies between the operations that prevent some
- * from being requested while another is in flight.
- * 1/ Parity check operations destroy the in cache version of the parity block,
- *    so we prevent parity dependent operations like writes and compute_blocks
- *    from starting while a check is in progress.  Some dma engines can perform
- *    the check without damaging the parity block, in these cases the parity
- *    block is re-marked up to date (assuming the check was successful) and is
- *    not re-read from disk.
- * 2/ When a write operation is requested we immediately lock the affected
- *    blocks, and mark them as not up to date.  This causes new read requests
- *    to be held off, as well as parity checks and compute block operations.
- * 3/ Once a compute block operation has been requested handle_stripe treats
- *    that block as if it is up to date.  raid5_run_ops guaruntees that any
- *    operation that is dependent on the compute block result is initiated after
- *    the compute block completes.
- */
-
-/*
- * Operations state - intermediate states that are visible outside of 
- *   STRIPE_ACTIVE.
- * In general _idle indicates nothing is running, _run indicates a data
- * processing operation is active, and _result means the data processing result
- * is stable and can be acted upon.  For simple operations like biofill and
- * compute that only have an _idle and _run state they are indicated with
- * sh->state flags (STRIPE_BIOFILL_RUN and STRIPE_COMPUTE_RUN)
- */
-/**
- * enum check_states - handles syncing / repairing a stripe
- * @check_state_idle - check operations are quiesced
- * @check_state_run - check operation is running
- * @check_state_result - set outside lock when check result is valid
- * @check_state_compute_run - check failed and we are repairing
- * @check_state_compute_result - set outside lock when compute result is valid
- */
-enum check_states {
-	check_state_idle = 0,
-	check_state_run, /* xor parity check */
-	check_state_run_q, /* q-parity check */
-	check_state_run_pq, /* pq dual parity check */
-	check_state_check_result,
-	check_state_compute_run, /* parity repair */
-	check_state_compute_result,
-};
-
-/**
- * enum reconstruct_states - handles writing or expanding a stripe
- */
-enum reconstruct_states {
-	reconstruct_state_idle = 0,
-	reconstruct_state_prexor_drain_run,	/* prexor-write */
-	reconstruct_state_drain_run,		/* write */
-	reconstruct_state_run,			/* expand */
-	reconstruct_state_prexor_drain_result,
-	reconstruct_state_drain_result,
-	reconstruct_state_result,
-};
-
-struct stripe_head {
-	struct hlist_node	hash;
-	struct list_head	lru;	      /* inactive_list or handle_list */
-	struct r5conf		*raid_conf;
-	short			generation;	/* increments with every
-						 * reshape */
-	sector_t		sector;		/* sector of this row */
-	short			pd_idx;		/* parity disk index */
-	short			qd_idx;		/* 'Q' disk index for raid6 */
-	short			ddf_layout;/* use DDF ordering to calculate Q */
-	unsigned long		state;		/* state flags */
-	atomic_t		count;	      /* nr of active thread/requests */
-	int			bm_seq;	/* sequence number for bitmap flushes */
-	int			disks;		/* disks in stripe */
-	enum check_states	check_state;
-	enum reconstruct_states reconstruct_state;
-	/**
-	 * struct stripe_operations
-	 * @target - STRIPE_OP_COMPUTE_BLK target
-	 * @target2 - 2nd compute target in the raid6 case
-	 * @zero_sum_result - P and Q verification flags
-	 * @request - async service request flags for raid_run_ops
-	 */
-	struct stripe_operations {
-		int 		     target, target2;
-		enum sum_check_flags zero_sum_result;
-		#ifdef CONFIG_MULTICORE_RAID456
-		unsigned long	     request;
-		wait_queue_head_t    wait_for_ops;
-		#endif
-	} ops;
-	struct r5dev {
-		/* rreq and rvec are used for the replacement device when
-		 * writing data to both devices.
-		 */
-		struct bio	req, rreq;
-		struct bio_vec	vec, rvec;
-		struct page	*page;
-		struct bio	*toread, *read, *towrite, *written;
-		sector_t	sector;			/* sector of this page */
-		unsigned long	flags;
-	} dev[1]; /* allocated with extra space depending of RAID geometry */
-};
-
-/* stripe_head_state - collects and tracks the dynamic state of a stripe_head
- *     for handle_stripe.
- */
-struct stripe_head_state {
-	/* 'syncing' means that we need to read all devices, either
-	 * to check/correct parity, or to reconstruct a missing device.
-	 * 'replacing' means we are replacing one or more drives and
-	 * the source is valid at this point so we don't need to
-	 * read all devices, just the replacement targets.
-	 */
-	int syncing, expanding, expanded, replacing;
-	int locked, uptodate, to_read, to_write, failed, written;
-	int to_fill, compute, req_compute, non_overwrite;
-	int failed_num[2];
-	int p_failed, q_failed;
-	int dec_preread_active;
-	unsigned long ops_request;
-
-	struct bio *return_bi;
-	struct md_rdev *blocked_rdev;
-	int handle_bad_blocks;
-};
-
-/* Flags for struct r5dev.flags */
-enum r5dev_flags {
-	R5_UPTODATE,	/* page contains current data */
-	R5_LOCKED,	/* IO has been submitted on "req" */
-	R5_DOUBLE_LOCKED,/* Cannot clear R5_LOCKED until 2 writes complete */
-	R5_OVERWRITE,	/* towrite covers whole page */
-/* and some that are internal to handle_stripe */
-	R5_Insync,	/* rdev && rdev->in_sync at start */
-	R5_Wantread,	/* want to schedule a read */
-	R5_Wantwrite,
-	R5_Overlap,	/* There is a pending overlapping request
-			 * on this block */
-	R5_ReadError,	/* seen a read error here recently */
-	R5_ReWrite,	/* have tried to over-write the readerror */
-
-	R5_Expanded,	/* This block now has post-expand data */
-	R5_Wantcompute,	/* compute_block in progress treat as
-			 * uptodate
-			 */
-	R5_Wantfill,	/* dev->toread contains a bio that needs
-			 * filling
-			 */
-	R5_Wantdrain,	/* dev->towrite needs to be drained */
-	R5_WantFUA,	/* Write should be FUA */
-	R5_WriteError,	/* got a write error - need to record it */
-	R5_MadeGood,	/* A bad block has been fixed by writing to it */
-	R5_ReadRepl,	/* Will/did read from replacement rather than orig */
-	R5_MadeGoodRepl,/* A bad block on the replacement device has been
-			 * fixed by writing to it */
-	R5_NeedReplace,	/* This device has a replacement which is not
-			 * up-to-date at this stripe. */
-	R5_WantReplace, /* We need to update the replacement, we have read
-			 * data in, and now is a good time to write it out.
-			 */
-};
-
-/*
- * Stripe state
- */
-enum {
-	STRIPE_ACTIVE,
-	STRIPE_HANDLE,
-	STRIPE_SYNC_REQUESTED,
-	STRIPE_SYNCING,
-	STRIPE_INSYNC,
-	STRIPE_PREREAD_ACTIVE,
-	STRIPE_DELAYED,
-	STRIPE_DEGRADED,
-	STRIPE_BIT_DELAY,
-	STRIPE_EXPANDING,
-	STRIPE_EXPAND_SOURCE,
-	STRIPE_EXPAND_READY,
-	STRIPE_IO_STARTED,	/* do not count towards 'bypass_count' */
-	STRIPE_FULL_WRITE,	/* all blocks are set to be overwritten */
-	STRIPE_BIOFILL_RUN,
-	STRIPE_COMPUTE_RUN,
-	STRIPE_OPS_REQ_PENDING,
-};
-
-/*
- * Operation request flags
- */
-enum {
-	STRIPE_OP_BIOFILL,
-	STRIPE_OP_COMPUTE_BLK,
-	STRIPE_OP_PREXOR,
-	STRIPE_OP_BIODRAIN,
-	STRIPE_OP_RECONSTRUCT,
-	STRIPE_OP_CHECK,
-};
-/*
- * Plugging:
- *
- * To improve write throughput, we need to delay the handling of some
- * stripes until there has been a chance that several write requests
- * for the one stripe have all been collected.
- * In particular, any write request that would require pre-reading
- * is put on a "delayed" queue until there are no stripes currently
- * in a pre-read phase.  Further, if the "delayed" queue is empty when
- * a stripe is put on it then we "plug" the queue and do not process it
- * until an unplug call is made. (the unplug_io_fn() is called).
- *
- * When preread is initiated on a stripe, we set PREREAD_ACTIVE and add
- * it to the count of prereading stripes.
- * When write is initiated, or the stripe refcnt == 0 (just in case) we
- * clear the PREREAD_ACTIVE flag and decrement the count
- * Whenever the 'handle' queue is empty and the device is not plugged, we
- * move any strips from delayed to handle and clear the DELAYED flag and set
- * PREREAD_ACTIVE.
- * In stripe_handle, if we find pre-reading is necessary, we do it if
- * PREREAD_ACTIVE is set, else we set DELAYED which will send it to the delayed queue.
- * HANDLE gets cleared if stripe_handle leaves nothing locked.
- */
-
-
-struct disk_info {
-	struct md_rdev	*rdev, *replacement;
-};
-
-struct r5conf {
-	struct hlist_head	*stripe_hashtbl;
-	struct mddev		*mddev;
-	int			chunk_sectors;
-	int			level, algorithm;
-	int			max_degraded;
-	int			raid_disks;
-	int			max_nr_stripes;
-
-	/* reshape_progress is the leading edge of a 'reshape'
-	 * It has value MaxSector when no reshape is happening
-	 * If delta_disks < 0, it is the last sector we started work on,
-	 * else is it the next sector to work on.
-	 */
-	sector_t		reshape_progress;
-	/* reshape_safe is the trailing edge of a reshape.  We know that
-	 * before (or after) this address, all reshape has completed.
-	 */
-	sector_t		reshape_safe;
-	int			previous_raid_disks;
-	int			prev_chunk_sectors;
-	int			prev_algo;
-	short			generation; /* increments with every reshape */
-	unsigned long		reshape_checkpoint; /* Time we last updated
-						     * metadata */
-
-	struct list_head	handle_list; /* stripes needing handling */
-	struct list_head	hold_list; /* preread ready stripes */
-	struct list_head	delayed_list; /* stripes that have plugged requests */
-	struct list_head	bitmap_list; /* stripes delaying awaiting bitmap update */
-	struct bio		*retry_read_aligned; /* currently retrying aligned bios   */
-	struct bio		*retry_read_aligned_list; /* aligned bios retry list  */
-	atomic_t		preread_active_stripes; /* stripes with scheduled io */
-	atomic_t		active_aligned_reads;
-	atomic_t		pending_full_writes; /* full write backlog */
-	int			bypass_count; /* bypassed prereads */
-	int			bypass_threshold; /* preread nice */
-	struct list_head	*last_hold; /* detect hold_list promotions */
-
-	atomic_t		reshape_stripes; /* stripes with pending writes for reshape */
-	/* unfortunately we need two cache names as we temporarily have
-	 * two caches.
-	 */
-	int			active_name;
-	char			cache_name[2][32];
-	struct kmem_cache		*slab_cache; /* for allocating stripes */
-
-	int			seq_flush, seq_write;
-	int			quiesce;
-
-	int			fullsync;  /* set to 1 if a full sync is needed,
-					    * (fresh device added).
-					    * Cleared when a sync completes.
-					    */
-	int			recovery_disabled;
-	/* per cpu variables */
-	struct raid5_percpu {
-		struct page	*spare_page; /* Used when checking P/Q in raid6 */
-		void		*scribble;   /* space for constructing buffer
-					      * lists and performing address
-					      * conversions
-					      */
-	} __percpu *percpu;
-	size_t			scribble_len; /* size of scribble region must be
-					       * associated with conf to handle
-					       * cpu hotplug while reshaping
-					       */
-#ifdef CONFIG_HOTPLUG_CPU
-	struct notifier_block	cpu_notify;
-#endif
-
-	/*
-	 * Free stripes pool
-	 */
-	atomic_t		active_stripes;
-	struct list_head	inactive_list;
-	wait_queue_head_t	wait_for_stripe;
-	wait_queue_head_t	wait_for_overlap;
-	int			inactive_blocked;	/* release of inactive stripes blocked,
-							 * waiting for 25% to be free
-							 */
-	int			pool_size; /* number of disks in stripeheads in pool */
-	spinlock_t		device_lock;
-	struct disk_info	*disks;
-
-	/* When taking over an array from a different personality, we store
-	 * the new thread here until we fully activate the array.
-	 */
-	struct md_thread	*thread;
-};
-
-/*
- * Our supported algorithms
- */
-#define ALGORITHM_LEFT_ASYMMETRIC	0 /* Rotating Parity N with Data Restart */
-#define ALGORITHM_RIGHT_ASYMMETRIC	1 /* Rotating Parity 0 with Data Restart */
-#define ALGORITHM_LEFT_SYMMETRIC	2 /* Rotating Parity N with Data Continuation */
-#define ALGORITHM_RIGHT_SYMMETRIC	3 /* Rotating Parity 0 with Data Continuation */
-
-/* Define non-rotating (raid4) algorithms.  These allow
- * conversion of raid4 to raid5.
- */
-#define ALGORITHM_PARITY_0		4 /* P or P,Q are initial devices */
-#define ALGORITHM_PARITY_N		5 /* P or P,Q are final devices. */
-
-/* DDF RAID6 layouts differ from md/raid6 layouts in two ways.
- * Firstly, the exact positioning of the parity block is slightly
- * different between the 'LEFT_*' modes of md and the "_N_*" modes
- * of DDF.
- * Secondly, or order of datablocks over which the Q syndrome is computed
- * is different.
- * Consequently we have different layouts for DDF/raid6 than md/raid6.
- * These layouts are from the DDFv1.2 spec.
- * Interestingly DDFv1.2-Errata-A does not specify N_CONTINUE but
- * leaves RLQ=3 as 'Vendor Specific'
- */
-
-#define ALGORITHM_ROTATING_ZERO_RESTART	8 /* DDF PRL=6 RLQ=1 */
-#define ALGORITHM_ROTATING_N_RESTART	9 /* DDF PRL=6 RLQ=2 */
-#define ALGORITHM_ROTATING_N_CONTINUE	10 /*DDF PRL=6 RLQ=3 */
-
-
-/* For every RAID5 algorithm we define a RAID6 algorithm
- * with exactly the same layout for data and parity, and
- * with the Q block always on the last device (N-1).
- * This allows trivial conversion from RAID5 to RAID6
- */
-#define ALGORITHM_LEFT_ASYMMETRIC_6	16
-#define ALGORITHM_RIGHT_ASYMMETRIC_6	17
-#define ALGORITHM_LEFT_SYMMETRIC_6	18
-#define ALGORITHM_RIGHT_SYMMETRIC_6	19
-#define ALGORITHM_PARITY_0_6		20
-#define ALGORITHM_PARITY_N_6		ALGORITHM_PARITY_N
-
-static inline int algorithm_valid_raid5(int layout)
-{
-	return (layout >= 0) &&
-		(layout <= 5);
-}
-static inline int algorithm_valid_raid6(int layout)
-{
-	return (layout >= 0 && layout <= 5)
-		||
-		(layout >= 8 && layout <= 10)
-		||
-		(layout >= 16 && layout <= 20);
-}
-
-static inline int algorithm_is_DDF(int layout)
-{
-	return layout >= 8 && layout <= 10;
-}
-
-extern int md_raid5_congested(struct mddev *mddev, int bits);
-extern void md_raid5_kick_device(struct r5conf *conf);
-extern int raid5_set_cache_size(struct mddev *mddev, int size);
-#endif