21 files changed, 0 insertions, 17485 deletions
diff --git a/ANDROID_3.4.5/fs/ceph/Kconfig b/ANDROID_3.4.5/fs/ceph/Kconfig
deleted file mode 100644
index 9eb134ea..00000000
--- a/ANDROID_3.4.5/fs/ceph/Kconfig
+++ /dev/null
@@ -1,18 +0,0 @@
-config CEPH_FS
-        tristate "Ceph distributed file system (EXPERIMENTAL)"
-	depends on INET && EXPERIMENTAL
-	select CEPH_LIB
-	select LIBCRC32C
-	select CRYPTO_AES
-	select CRYPTO
-	default n
-	help
-	  Choose Y or M here to include support for mounting the
-	  experimental Ceph distributed file system.  Ceph is an extremely
-	  scalable file system designed to provide high performance,
-	  reliable access to petabytes of storage.
-
-	  More information at http://ceph.newdream.net/.
-
-	  If unsure, say N.
-
diff --git a/ANDROID_3.4.5/fs/ceph/Makefile b/ANDROID_3.4.5/fs/ceph/Makefile
deleted file mode 100644
index bd352125..00000000
--- a/ANDROID_3.4.5/fs/ceph/Makefile
+++ /dev/null
@@ -1,11 +0,0 @@
-#
-# Makefile for CEPH filesystem.
-#
-
-obj-$(CONFIG_CEPH_FS) += ceph.o
-
-ceph-y := super.o inode.o dir.o file.o locks.o addr.o ioctl.o \
-	export.o caps.o snap.o xattr.o \
-	mds_client.o mdsmap.o strings.o ceph_frag.o \
-	debugfs.o
-
diff --git a/ANDROID_3.4.5/fs/ceph/addr.c b/ANDROID_3.4.5/fs/ceph/addr.c
deleted file mode 100644
index 173b1d22..00000000
--- a/ANDROID_3.4.5/fs/ceph/addr.c
+++ /dev/null
@@ -1,1234 +0,0 @@
-#include <linux/ceph/ceph_debug.h>
-
-#include <linux/backing-dev.h>
-#include <linux/fs.h>
-#include <linux/mm.h>
-#include <linux/pagemap.h>
-#include <linux/writeback.h>	/* generic_writepages */
-#include <linux/slab.h>
-#include <linux/pagevec.h>
-#include <linux/task_io_accounting_ops.h>
-
-#include "super.h"
-#include "mds_client.h"
-#include <linux/ceph/osd_client.h>
-
-/*
- * Ceph address space ops.
- *
- * There are a few funny things going on here.
- *
- * The page->private field is used to reference a struct
- * ceph_snap_context for _every_ dirty page.  This indicates which
- * snapshot the page was logically dirtied in, and thus which snap
- * context needs to be associated with the osd write during writeback.
- *
- * Similarly, struct ceph_inode_info maintains a set of counters to
- * count dirty pages on the inode.  In the absence of snapshots,
- * i_wrbuffer_ref == i_wrbuffer_ref_head == the dirty page count.
- *
- * When a snapshot is taken (that is, when the client receives
- * notification that a snapshot was taken), each inode with caps and
- * with dirty pages (dirty pages implies there is a cap) gets a new
- * ceph_cap_snap in the i_cap_snaps list (which is sorted in ascending
- * order, new snaps go to the tail).  The i_wrbuffer_ref_head count is
- * moved to capsnap->dirty. (Unless a sync write is currently in
- * progress.  In that case, the capsnap is said to be "pending", new
- * writes cannot start, and the capsnap isn't "finalized" until the
- * write completes (or fails) and a final size/mtime for the inode for
- * that snap can be settled upon.)  i_wrbuffer_ref_head is reset to 0.
- *
- * On writeback, we must submit writes to the osd IN SNAP ORDER.  So,
- * we look for the first capsnap in i_cap_snaps and write out pages in
- * that snap context _only_.  Then we move on to the next capsnap,
- * eventually reaching the "live" or "head" context (i.e., pages that
- * are not yet snapped) and are writing the most recently dirtied
- * pages.
- *
- * Invalidate and so forth must take care to ensure the dirty page
- * accounting is preserved.
- */
-
-#define CONGESTION_ON_THRESH(congestion_kb) (congestion_kb >> (PAGE_SHIFT-10))
-#define CONGESTION_OFF_THRESH(congestion_kb)				\
-	(CONGESTION_ON_THRESH(congestion_kb) -				\
-	 (CONGESTION_ON_THRESH(congestion_kb) >> 2))
-
-
-
-/*
- * Dirty a page.  Optimistically adjust accounting, on the assumption
- * that we won't race with invalidate.  If we do, readjust.
- */
-static int ceph_set_page_dirty(struct page *page)
-{
-	struct address_space *mapping = page->mapping;
-	struct inode *inode;
-	struct ceph_inode_info *ci;
-	int undo = 0;
-	struct ceph_snap_context *snapc;
-
-	if (unlikely(!mapping))
-		return !TestSetPageDirty(page);
-
-	if (TestSetPageDirty(page)) {
-		dout("%p set_page_dirty %p idx %lu -- already dirty\n",
-		     mapping->host, page, page->index);
-		return 0;
-	}
-
-	inode = mapping->host;
-	ci = ceph_inode(inode);
-
-	/*
-	 * Note that we're grabbing a snapc ref here without holding
-	 * any locks!
-	 */
-	snapc = ceph_get_snap_context(ci->i_snap_realm->cached_context);
-
-	/* dirty the head */
-	spin_lock(&ci->i_ceph_lock);
-	if (ci->i_head_snapc == NULL)
-		ci->i_head_snapc = ceph_get_snap_context(snapc);
-	++ci->i_wrbuffer_ref_head;
-	if (ci->i_wrbuffer_ref == 0)
-		ihold(inode);
-	++ci->i_wrbuffer_ref;
-	dout("%p set_page_dirty %p idx %lu head %d/%d -> %d/%d "
-	     "snapc %p seq %lld (%d snaps)\n",
-	     mapping->host, page, page->index,
-	     ci->i_wrbuffer_ref-1, ci->i_wrbuffer_ref_head-1,
-	     ci->i_wrbuffer_ref, ci->i_wrbuffer_ref_head,
-	     snapc, snapc->seq, snapc->num_snaps);
-	spin_unlock(&ci->i_ceph_lock);
-
-	/* now adjust page */
-	spin_lock_irq(&mapping->tree_lock);
-	if (page->mapping) {	/* Race with truncate? */
-		WARN_ON_ONCE(!PageUptodate(page));
-		account_page_dirtied(page, page->mapping);
-		radix_tree_tag_set(&mapping->page_tree,
-				page_index(page), PAGECACHE_TAG_DIRTY);
-
-		/*
-		 * Reference snap context in page->private.  Also set
-		 * PagePrivate so that we get invalidatepage callback.
-		 */
-		page->private = (unsigned long)snapc;
-		SetPagePrivate(page);
-	} else {
-		dout("ANON set_page_dirty %p (raced truncate?)\n", page);
-		undo = 1;
-	}
-
-	spin_unlock_irq(&mapping->tree_lock);
-
-	if (undo)
-		/* whoops, we failed to dirty the page */
-		ceph_put_wrbuffer_cap_refs(ci, 1, snapc);
-
-	__mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
-
-	BUG_ON(!PageDirty(page));
-	return 1;
-}
-
-/*
- * If we are truncating the full page (i.e. offset == 0), adjust the
- * dirty page counters appropriately.  Only called if there is private
- * data on the page.
- */
-static void ceph_invalidatepage(struct page *page, unsigned long offset)
-{
-	struct inode *inode;
-	struct ceph_inode_info *ci;
-	struct ceph_snap_context *snapc = (void *)page->private;
-
-	BUG_ON(!PageLocked(page));
-	BUG_ON(!page->private);
-	BUG_ON(!PagePrivate(page));
-	BUG_ON(!page->mapping);
-
-	inode = page->mapping->host;
-
-	/*
-	 * We can get non-dirty pages here due to races between
-	 * set_page_dirty and truncate_complete_page; just spit out a
-	 * warning, in case we end up with accounting problems later.
-	 */
-	if (!PageDirty(page))
-		pr_err("%p invalidatepage %p page not dirty\n", inode, page);
-
-	if (offset == 0)
-		ClearPageChecked(page);
-
-	ci = ceph_inode(inode);
-	if (offset == 0) {
-		dout("%p invalidatepage %p idx %lu full dirty page %lu\n",
-		     inode, page, page->index, offset);
-		ceph_put_wrbuffer_cap_refs(ci, 1, snapc);
-		ceph_put_snap_context(snapc);
-		page->private = 0;
-		ClearPagePrivate(page);
-	} else {
-		dout("%p invalidatepage %p idx %lu partial dirty page\n",
-		     inode, page, page->index);
-	}
-}
-
-/* just a sanity check */
-static int ceph_releasepage(struct page *page, gfp_t g)
-{
-	struct inode *inode = page->mapping ? page->mapping->host : NULL;
-	dout("%p releasepage %p idx %lu\n", inode, page, page->index);
-	WARN_ON(PageDirty(page));
-	WARN_ON(page->private);
-	WARN_ON(PagePrivate(page));
-	return 0;
-}
-
-/*
- * read a single page, without unlocking it.
- */
-static int readpage_nounlock(struct file *filp, struct page *page)
-{
-	struct inode *inode = filp->f_dentry->d_inode;
-	struct ceph_inode_info *ci = ceph_inode(inode);
-	struct ceph_osd_client *osdc = 
-		&ceph_inode_to_client(inode)->client->osdc;
-	int err = 0;
-	u64 len = PAGE_CACHE_SIZE;
-
-	dout("readpage inode %p file %p page %p index %lu\n",
-	     inode, filp, page, page->index);
-	err = ceph_osdc_readpages(osdc, ceph_vino(inode), &ci->i_layout,
-				  page->index << PAGE_CACHE_SHIFT, &len,
-				  ci->i_truncate_seq, ci->i_truncate_size,
-				  &page, 1, 0);
-	if (err == -ENOENT)
-		err = 0;
-	if (err < 0) {
-		SetPageError(page);
-		goto out;
-	} else if (err < PAGE_CACHE_SIZE) {
-		/* zero fill remainder of page */
-		zero_user_segment(page, err, PAGE_CACHE_SIZE);
-	}
-	SetPageUptodate(page);
-
-out:
-	return err < 0 ? err : 0;
-}
-
-static int ceph_readpage(struct file *filp, struct page *page)
-{
-	int r = readpage_nounlock(filp, page);
-	unlock_page(page);
-	return r;
-}
-
-/*
- * Finish an async read(ahead) op.
- */
-static void finish_read(struct ceph_osd_request *req, struct ceph_msg *msg)
-{
-	struct inode *inode = req->r_inode;
-	struct ceph_osd_reply_head *replyhead;
-	int rc, bytes;
-	int i;
-
-	/* parse reply */
-	replyhead = msg->front.iov_base;
-	WARN_ON(le32_to_cpu(replyhead->num_ops) == 0);
-	rc = le32_to_cpu(replyhead->result);
-	bytes = le32_to_cpu(msg->hdr.data_len);
-
-	dout("finish_read %p req %p rc %d bytes %d\n", inode, req, rc, bytes);
-
-	/* unlock all pages, zeroing any data we didn't read */
-	for (i = 0; i < req->r_num_pages; i++, bytes -= PAGE_CACHE_SIZE) {
-		struct page *page = req->r_pages[i];
-
-		if (bytes < (int)PAGE_CACHE_SIZE) {
-			/* zero (remainder of) page */
-			int s = bytes < 0 ? 0 : bytes;
-			zero_user_segment(page, s, PAGE_CACHE_SIZE);
-		}
- 		dout("finish_read %p uptodate %p idx %lu\n", inode, page,
-		     page->index);
-		flush_dcache_page(page);
-		SetPageUptodate(page);
-		unlock_page(page);
-		page_cache_release(page);
-	}
-	kfree(req->r_pages);
-}
-
-/*
- * start an async read(ahead) operation.  return nr_pages we submitted
- * a read for on success, or negative error code.
- */
-static int start_read(struct inode *inode, struct list_head *page_list, int max)
-{
-	struct ceph_osd_client *osdc =
-		&ceph_inode_to_client(inode)->client->osdc;
-	struct ceph_inode_info *ci = ceph_inode(inode);
-	struct page *page = list_entry(page_list->prev, struct page, lru);
-	struct ceph_osd_request *req;
-	u64 off;
-	u64 len;
-	int i;
-	struct page **pages;
-	pgoff_t next_index;
-	int nr_pages = 0;
-	int ret;
-
-	off = page->index << PAGE_CACHE_SHIFT;
-
-	/* count pages */
-	next_index = page->index;
-	list_for_each_entry_reverse(page, page_list, lru) {
-		if (page->index != next_index)
-			break;
-		nr_pages++;
-		next_index++;
-		if (max && nr_pages == max)
-			break;
-	}
-	len = nr_pages << PAGE_CACHE_SHIFT;
-	dout("start_read %p nr_pages %d is %lld~%lld\n", inode, nr_pages,
-	     off, len);
-
-	req = ceph_osdc_new_request(osdc, &ci->i_layout, ceph_vino(inode),
-				    off, &len,
-				    CEPH_OSD_OP_READ, CEPH_OSD_FLAG_READ,
-				    NULL, 0,
-				    ci->i_truncate_seq, ci->i_truncate_size,
-				    NULL, false, 1, 0);
-	if (!req)
-		return -ENOMEM;
-
-	/* build page vector */
-	nr_pages = len >> PAGE_CACHE_SHIFT;
-	pages = kmalloc(sizeof(*pages) * nr_pages, GFP_NOFS);
-	ret = -ENOMEM;
-	if (!pages)
-		goto out;
-	for (i = 0; i < nr_pages; ++i) {
-		page = list_entry(page_list->prev, struct page, lru);
-		BUG_ON(PageLocked(page));
-		list_del(&page->lru);
-		
- 		dout("start_read %p adding %p idx %lu\n", inode, page,
-		     page->index);
-		if (add_to_page_cache_lru(page, &inode->i_data, page->index,
-					  GFP_NOFS)) {
-			page_cache_release(page);
-			dout("start_read %p add_to_page_cache failed %p\n",
-			     inode, page);
-			nr_pages = i;
-			goto out_pages;
-		}
-		pages[i] = page;
-	}
-	req->r_pages = pages;
-	req->r_num_pages = nr_pages;
-	req->r_callback = finish_read;
-	req->r_inode = inode;
-
-	dout("start_read %p starting %p %lld~%lld\n", inode, req, off, len);
-	ret = ceph_osdc_start_request(osdc, req, false);
-	if (ret < 0)
-		goto out_pages;
-	ceph_osdc_put_request(req);
-	return nr_pages;
-
-out_pages:
-	ceph_release_page_vector(pages, nr_pages);
-out:
-	ceph_osdc_put_request(req);
-	return ret;
-}
-
-
-/*
- * Read multiple pages.  Leave pages we don't read + unlock in page_list;
- * the caller (VM) cleans them up.
- */
-static int ceph_readpages(struct file *file, struct address_space *mapping,
-			  struct list_head *page_list, unsigned nr_pages)
-{
-	struct inode *inode = file->f_dentry->d_inode;
-	struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
-	int rc = 0;
-	int max = 0;
-
-	if (fsc->mount_options->rsize >= PAGE_CACHE_SIZE)
-		max = (fsc->mount_options->rsize + PAGE_CACHE_SIZE - 1)
-			>> PAGE_SHIFT;
-
-	dout("readpages %p file %p nr_pages %d max %d\n", inode, file, nr_pages,
-	     max);
-	while (!list_empty(page_list)) {
-		rc = start_read(inode, page_list, max);
-		if (rc < 0)
-			goto out;
-		BUG_ON(rc == 0);
-	}
-out:
-	dout("readpages %p file %p ret %d\n", inode, file, rc);
-	return rc;
-}
-
-/*
- * Get ref for the oldest snapc for an inode with dirty data... that is, the
- * only snap context we are allowed to write back.
- */
-static struct ceph_snap_context *get_oldest_context(struct inode *inode,
-						    u64 *snap_size)
-{
-	struct ceph_inode_info *ci = ceph_inode(inode);
-	struct ceph_snap_context *snapc = NULL;
-	struct ceph_cap_snap *capsnap = NULL;
-
-	spin_lock(&ci->i_ceph_lock);
-	list_for_each_entry(capsnap, &ci->i_cap_snaps, ci_item) {
-		dout(" cap_snap %p snapc %p has %d dirty pages\n", capsnap,
-		     capsnap->context, capsnap->dirty_pages);
-		if (capsnap->dirty_pages) {
-			snapc = ceph_get_snap_context(capsnap->context);
-			if (snap_size)
-				*snap_size = capsnap->size;
-			break;
-		}
-	}
-	if (!snapc && ci->i_wrbuffer_ref_head) {
-		snapc = ceph_get_snap_context(ci->i_head_snapc);
-		dout(" head snapc %p has %d dirty pages\n",
-		     snapc, ci->i_wrbuffer_ref_head);
-	}
-	spin_unlock(&ci->i_ceph_lock);
-	return snapc;
-}
-
-/*
- * Write a single page, but leave the page locked.
- *
- * If we get a write error, set the page error bit, but still adjust the
- * dirty page accounting (i.e., page is no longer dirty).
- */
-static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
-{
-	struct inode *inode;
-	struct ceph_inode_info *ci;
-	struct ceph_fs_client *fsc;
-	struct ceph_osd_client *osdc;
-	loff_t page_off = page->index << PAGE_CACHE_SHIFT;
-	int len = PAGE_CACHE_SIZE;
-	loff_t i_size;
-	int err = 0;
-	struct ceph_snap_context *snapc, *oldest;
-	u64 snap_size = 0;
-	long writeback_stat;
-
-	dout("writepage %p idx %lu\n", page, page->index);
-
-	if (!page->mapping || !page->mapping->host) {
-		dout("writepage %p - no mapping\n", page);
-		return -EFAULT;
-	}
-	inode = page->mapping->host;
-	ci = ceph_inode(inode);
-	fsc = ceph_inode_to_client(inode);
-	osdc = &fsc->client->osdc;
-
-	/* verify this is a writeable snap context */
-	snapc = (void *)page->private;
-	if (snapc == NULL) {
-		dout("writepage %p page %p not dirty?\n", inode, page);
-		goto out;
-	}
-	oldest = get_oldest_context(inode, &snap_size);
-	if (snapc->seq > oldest->seq) {
-		dout("writepage %p page %p snapc %p not writeable - noop\n",
-		     inode, page, (void *)page->private);
-		/* we should only noop if called by kswapd */
-		WARN_ON((current->flags & PF_MEMALLOC) == 0);
-		ceph_put_snap_context(oldest);
-		goto out;
-	}
-	ceph_put_snap_context(oldest);
-
-	/* is this a partial page at end of file? */
-	if (snap_size)
-		i_size = snap_size;
-	else
-		i_size = i_size_read(inode);
-	if (i_size < page_off + len)
-		len = i_size - page_off;
-
-	dout("writepage %p page %p index %lu on %llu~%u snapc %p\n",
-	     inode, page, page->index, page_off, len, snapc);
-
-	writeback_stat = atomic_long_inc_return(&fsc->writeback_count);
-	if (writeback_stat >
-	    CONGESTION_ON_THRESH(fsc->mount_options->congestion_kb))
-		set_bdi_congested(&fsc->backing_dev_info, BLK_RW_ASYNC);
-
-	set_page_writeback(page);
-	err = ceph_osdc_writepages(osdc, ceph_vino(inode),
-				   &ci->i_layout, snapc,
-				   page_off, len,
-				   ci->i_truncate_seq, ci->i_truncate_size,
-				   &inode->i_mtime,
-				   &page, 1, 0, 0, true);
-	if (err < 0) {
-		dout("writepage setting page/mapping error %d %p\n", err, page);
-		SetPageError(page);
-		mapping_set_error(&inode->i_data, err);
-		if (wbc)
-			wbc->pages_skipped++;
-	} else {
-		dout("writepage cleaned page %p\n", page);
-		err = 0;  /* vfs expects us to return 0 */
-	}
-	page->private = 0;
-	ClearPagePrivate(page);
-	end_page_writeback(page);
-	ceph_put_wrbuffer_cap_refs(ci, 1, snapc);
-	ceph_put_snap_context(snapc);  /* page's reference */
-out:
-	return err;
-}
-
-static int ceph_writepage(struct page *page, struct writeback_control *wbc)
-{
-	int err;
-	struct inode *inode = page->mapping->host;
-	BUG_ON(!inode);
-	ihold(inode);
-	err = writepage_nounlock(page, wbc);
-	unlock_page(page);
-	iput(inode);
-	return err;
-}
-
-
-/*
- * lame release_pages helper.  release_pages() isn't exported to
- * modules.
- */
-static void ceph_release_pages(struct page **pages, int num)
-{
-	struct pagevec pvec;
-	int i;
-
-	pagevec_init(&pvec, 0);
-	for (i = 0; i < num; i++) {
-		if (pagevec_add(&pvec, pages[i]) == 0)
-			pagevec_release(&pvec);
-	}
-	pagevec_release(&pvec);
-}
-
-
-/*
- * async writeback completion handler.
- *
- * If we get an error, set the mapping error bit, but not the individual
- * page error bits.
- */
-static void writepages_finish(struct ceph_osd_request *req,
-			      struct ceph_msg *msg)
-{
-	struct inode *inode = req->r_inode;
-	struct ceph_osd_reply_head *replyhead;
-	struct ceph_osd_op *op;
-	struct ceph_inode_info *ci = ceph_inode(inode);
-	unsigned wrote;
-	struct page *page;
-	int i;
-	struct ceph_snap_context *snapc = req->r_snapc;
-	struct address_space *mapping = inode->i_mapping;
-	__s32 rc = -EIO;
-	u64 bytes = 0;
-	struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
-	long writeback_stat;
-	unsigned issued = ceph_caps_issued(ci);
-
-	/* parse reply */
-	replyhead = msg->front.iov_base;
-	WARN_ON(le32_to_cpu(replyhead->num_ops) == 0);
-	op = (void *)(replyhead + 1);
-	rc = le32_to_cpu(replyhead->result);
-	bytes = le64_to_cpu(op->extent.length);
-
-	if (rc >= 0) {
-		/*
-		 * Assume we wrote the pages we originally sent.  The
-		 * osd might reply with fewer pages if our writeback
-		 * raced with a truncation and was adjusted at the osd,
-		 * so don't believe the reply.
-		 */
-		wrote = req->r_num_pages;
-	} else {
-		wrote = 0;
-		mapping_set_error(mapping, rc);
-	}
-	dout("writepages_finish %p rc %d bytes %llu wrote %d (pages)\n",
-	     inode, rc, bytes, wrote);
-
-	/* clean all pages */
-	for (i = 0; i < req->r_num_pages; i++) {
-		page = req->r_pages[i];
-		BUG_ON(!page);
-		WARN_ON(!PageUptodate(page));
-
-		writeback_stat =
-			atomic_long_dec_return(&fsc->writeback_count);
-		if (writeback_stat <
-		    CONGESTION_OFF_THRESH(fsc->mount_options->congestion_kb))
-			clear_bdi_congested(&fsc->backing_dev_info,
-					    BLK_RW_ASYNC);
-
-		ceph_put_snap_context((void *)page->private);
-		page->private = 0;
-		ClearPagePrivate(page);
-		dout("unlocking %d %p\n", i, page);
-		end_page_writeback(page);
-
-		/*
-		 * We lost the cache cap, need to truncate the page before
-		 * it is unlocked, otherwise we'd truncate it later in the
-		 * page truncation thread, possibly losing some data that
-		 * raced its way in
-		 */
-		if ((issued & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) == 0)
-			generic_error_remove_page(inode->i_mapping, page);
-
-		unlock_page(page);
-	}
-	dout("%p wrote+cleaned %d pages\n", inode, wrote);
-	ceph_put_wrbuffer_cap_refs(ci, req->r_num_pages, snapc);
-
-	ceph_release_pages(req->r_pages, req->r_num_pages);
-	if (req->r_pages_from_pool)
-		mempool_free(req->r_pages,
-			     ceph_sb_to_client(inode->i_sb)->wb_pagevec_pool);
-	else
-		kfree(req->r_pages);
-	ceph_osdc_put_request(req);
-}
-
-/*
- * allocate a page vec, either directly, or if necessary, via a the
- * mempool.  we avoid the mempool if we can because req->r_num_pages
- * may be less than the maximum write size.
- */
-static void alloc_page_vec(struct ceph_fs_client *fsc,
-			   struct ceph_osd_request *req)
-{
-	req->r_pages = kmalloc(sizeof(struct page *) * req->r_num_pages,
-			       GFP_NOFS);
-	if (!req->r_pages) {
-		req->r_pages = mempool_alloc(fsc->wb_pagevec_pool, GFP_NOFS);
-		req->r_pages_from_pool = 1;
-		WARN_ON(!req->r_pages);
-	}
-}
-
-/*
- * initiate async writeback
- */
-static int ceph_writepages_start(struct address_space *mapping,
-				 struct writeback_control *wbc)
-{
-	struct inode *inode = mapping->host;
-	struct ceph_inode_info *ci = ceph_inode(inode);
-	struct ceph_fs_client *fsc;
-	pgoff_t index, start, end;
-	int range_whole = 0;
-	int should_loop = 1;
-	pgoff_t max_pages = 0, max_pages_ever = 0;
-	struct ceph_snap_context *snapc = NULL, *last_snapc = NULL, *pgsnapc;
-	struct pagevec pvec;
-	int done = 0;
-	int rc = 0;
-	unsigned wsize = 1 << inode->i_blkbits;
-	struct ceph_osd_request *req = NULL;
-	int do_sync;
-	u64 snap_size = 0;
-
-	/*
-	 * Include a 'sync' in the OSD request if this is a data
-	 * integrity write (e.g., O_SYNC write or fsync()), or if our
-	 * cap is being revoked.
-	 */
-	do_sync = wbc->sync_mode == WB_SYNC_ALL;
-	if (ceph_caps_revoking(ci, CEPH_CAP_FILE_BUFFER))
-		do_sync = 1;
-	dout("writepages_start %p dosync=%d (mode=%s)\n",
-	     inode, do_sync,
-	     wbc->sync_mode == WB_SYNC_NONE ? "NONE" :
-	     (wbc->sync_mode == WB_SYNC_ALL ? "ALL" : "HOLD"));
-
-	fsc = ceph_inode_to_client(inode);
-	if (fsc->mount_state == CEPH_MOUNT_SHUTDOWN) {
-		pr_warning("writepage_start %p on forced umount\n", inode);
-		return -EIO; /* we're in a forced umount, don't write! */
-	}
-	if (fsc->mount_options->wsize && fsc->mount_options->wsize < wsize)
-		wsize = fsc->mount_options->wsize;
-	if (wsize < PAGE_CACHE_SIZE)
-		wsize = PAGE_CACHE_SIZE;
-	max_pages_ever = wsize >> PAGE_CACHE_SHIFT;
-
-	pagevec_init(&pvec, 0);
-
-	/* where to start/end? */
-	if (wbc->range_cyclic) {
-		start = mapping->writeback_index; /* Start from prev offset */
-		end = -1;
-		dout(" cyclic, start at %lu\n", start);
-	} else {
-		start = wbc->range_start >> PAGE_CACHE_SHIFT;
-		end = wbc->range_end >> PAGE_CACHE_SHIFT;
-		if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
-			range_whole = 1;
-		should_loop = 0;
-		dout(" not cyclic, %lu to %lu\n", start, end);
-	}
-	index = start;
-
-retry:
-	/* find oldest snap context with dirty data */
-	ceph_put_snap_context(snapc);
-	snapc = get_oldest_context(inode, &snap_size);
-	if (!snapc) {
-		/* hmm, why does writepages get called when there
-		   is no dirty data? */
-		dout(" no snap context with dirty data?\n");
-		goto out;
-	}
-	dout(" oldest snapc is %p seq %lld (%d snaps)\n",
-	     snapc, snapc->seq, snapc->num_snaps);
-	if (last_snapc && snapc != last_snapc) {
-		/* if we switched to a newer snapc, restart our scan at the
-		 * start of the original file range. */
-		dout("  snapc differs from last pass, restarting at %lu\n",
-		     index);
-		index = start;
-	}
-	last_snapc = snapc;
-
-	while (!done && index <= end) {
-		unsigned i;
-		int first;
-		pgoff_t next;
-		int pvec_pages, locked_pages;
-		struct page *page;
-		int want;
-		u64 offset, len;
-		struct ceph_osd_request_head *reqhead;
-		struct ceph_osd_op *op;
-		long writeback_stat;
-
-		next = 0;
-		locked_pages = 0;
-		max_pages = max_pages_ever;
-
-get_more_pages:
-		first = -1;
-		want = min(end - index,
-			   min((pgoff_t)PAGEVEC_SIZE,
-			       max_pages - (pgoff_t)locked_pages) - 1)
-			+ 1;
-		pvec_pages = pagevec_lookup_tag(&pvec, mapping, &index,
-						PAGECACHE_TAG_DIRTY,
-						want);
-		dout("pagevec_lookup_tag got %d\n", pvec_pages);
-		if (!pvec_pages && !locked_pages)
-			break;
-		for (i = 0; i < pvec_pages && locked_pages < max_pages; i++) {
-			page = pvec.pages[i];
-			dout("? %p idx %lu\n", page, page->index);
-			if (locked_pages == 0)
-				lock_page(page);  /* first page */
-			else if (!trylock_page(page))
-				break;
-
-			/* only dirty pages, or our accounting breaks */
-			if (unlikely(!PageDirty(page)) ||
-			    unlikely(page->mapping != mapping)) {
-				dout("!dirty or !mapping %p\n", page);
-				unlock_page(page);
-				break;
-			}
-			if (!wbc->range_cyclic && page->index > end) {
-				dout("end of range %p\n", page);
-				done = 1;
-				unlock_page(page);
-				break;
-			}
-			if (next && (page->index != next)) {
-				dout("not consecutive %p\n", page);
-				unlock_page(page);
-				break;
-			}
-			if (wbc->sync_mode != WB_SYNC_NONE) {
-				dout("waiting on writeback %p\n", page);
-				wait_on_page_writeback(page);
-			}
-			if ((snap_size && page_offset(page) > snap_size) ||
-			    (!snap_size &&
-			     page_offset(page) > i_size_read(inode))) {
-				dout("%p page eof %llu\n", page, snap_size ?
-				     snap_size : i_size_read(inode));
-				done = 1;
-				unlock_page(page);
-				break;
-			}
-			if (PageWriteback(page)) {
-				dout("%p under writeback\n", page);
-				unlock_page(page);
-				break;
-			}
-
-			/* only if matching snap context */
-			pgsnapc = (void *)page->private;
-			if (pgsnapc->seq > snapc->seq) {
-				dout("page snapc %p %lld > oldest %p %lld\n",
-				     pgsnapc, pgsnapc->seq, snapc, snapc->seq);
-				unlock_page(page);
-				if (!locked_pages)
-					continue; /* keep looking for snap */
-				break;
-			}
-
-			if (!clear_page_dirty_for_io(page)) {
-				dout("%p !clear_page_dirty_for_io\n", page);
-				unlock_page(page);
-				break;
-			}
-
-			/* ok */
-			if (locked_pages == 0) {
-				/* prepare async write request */
-				offset = (unsigned long long)page->index
-					<< PAGE_CACHE_SHIFT;
-				len = wsize;
-				req = ceph_osdc_new_request(&fsc->client->osdc,
-					    &ci->i_layout,
-					    ceph_vino(inode),
-					    offset, &len,
-					    CEPH_OSD_OP_WRITE,
-					    CEPH_OSD_FLAG_WRITE |
-						    CEPH_OSD_FLAG_ONDISK,
-					    snapc, do_sync,
-					    ci->i_truncate_seq,
-					    ci->i_truncate_size,
-					    &inode->i_mtime, true, 1, 0);
-
-				if (!req) {
-					rc = -ENOMEM;
-					unlock_page(page);
-					break;
-				}
-
-				max_pages = req->r_num_pages;
-
-				alloc_page_vec(fsc, req);
-				req->r_callback = writepages_finish;
-				req->r_inode = inode;
-			}
-
-			/* note position of first page in pvec */
-			if (first < 0)
-				first = i;
-			dout("%p will write page %p idx %lu\n",
-			     inode, page, page->index);
-
-			writeback_stat =
-			       atomic_long_inc_return(&fsc->writeback_count);
-			if (writeback_stat > CONGESTION_ON_THRESH(
-				    fsc->mount_options->congestion_kb)) {
-				set_bdi_congested(&fsc->backing_dev_info,
-						  BLK_RW_ASYNC);
-			}
-
-			set_page_writeback(page);
-			req->r_pages[locked_pages] = page;
-			locked_pages++;
-			next = page->index + 1;
-		}
-
-		/* did we get anything? */
-		if (!locked_pages)
-			goto release_pvec_pages;
-		if (i) {
-			int j;
-			BUG_ON(!locked_pages || first < 0);
-
-			if (pvec_pages && i == pvec_pages &&
-			    locked_pages < max_pages) {
-				dout("reached end pvec, trying for more\n");
-				pagevec_reinit(&pvec);
-				goto get_more_pages;
-			}
-
-			/* shift unused pages over in the pvec...  we
-			 * will need to release them below. */
-			for (j = i; j < pvec_pages; j++) {
-				dout(" pvec leftover page %p\n",
-				     pvec.pages[j]);
-				pvec.pages[j-i+first] = pvec.pages[j];
-			}
-			pvec.nr -= i-first;
-		}
-
-		/* submit the write */
-		offset = req->r_pages[0]->index << PAGE_CACHE_SHIFT;
-		len = min((snap_size ? snap_size : i_size_read(inode)) - offset,
-			  (u64)locked_pages << PAGE_CACHE_SHIFT);
-		dout("writepages got %d pages at %llu~%llu\n",
-		     locked_pages, offset, len);
-
-		/* revise final length, page count */
-		req->r_num_pages = locked_pages;
-		reqhead = req->r_request->front.iov_base;
-		op = (void *)(reqhead + 1);
-		op->extent.length = cpu_to_le64(len);
-		op->payload_len = cpu_to_le32(len);
-		req->r_request->hdr.data_len = cpu_to_le32(len);
-
-		rc = ceph_osdc_start_request(&fsc->client->osdc, req, true);
-		BUG_ON(rc);
-		req = NULL;
-
-		/* continue? */
-		index = next;
-		wbc->nr_to_write -= locked_pages;
-		if (wbc->nr_to_write <= 0)
-			done = 1;
-
-release_pvec_pages:
-		dout("pagevec_release on %d pages (%p)\n", (int)pvec.nr,
-		     pvec.nr ? pvec.pages[0] : NULL);
-		pagevec_release(&pvec);
-
-		if (locked_pages && !done)
-			goto retry;
-	}
-
-	if (should_loop && !done) {
-		/* more to do; loop back to beginning of file */
-		dout("writepages looping back to beginning of file\n");
-		should_loop = 0;
-		index = 0;
-		goto retry;
-	}
-
-	if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
-		mapping->writeback_index = index;
-
-out:
-	if (req)
-		ceph_osdc_put_request(req);
-	ceph_put_snap_context(snapc);
-	dout("writepages done, rc = %d\n", rc);
-	return rc;
-}
-
-
-
-/*
- * See if a given @snapc is either writeable, or already written.
- */
-static int context_is_writeable_or_written(struct inode *inode,
-					   struct ceph_snap_context *snapc)
-{
-	struct ceph_snap_context *oldest = get_oldest_context(inode, NULL);
-	int ret = !oldest || snapc->seq <= oldest->seq;
-
-	ceph_put_snap_context(oldest);
-	return ret;
-}
-
-/*
- * We are only allowed to write into/dirty the page if the page is
- * clean, or already dirty within the same snap context.
- *
- * called with page locked.
- * return success with page locked,
- * or any failure (incl -EAGAIN) with page unlocked.
- */
-static int ceph_update_writeable_page(struct file *file,
-			    loff_t pos, unsigned len,
-			    struct page *page)
-{
-	struct inode *inode = file->f_dentry->d_inode;
-	struct ceph_inode_info *ci = ceph_inode(inode);
-	struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
-	loff_t page_off = pos & PAGE_CACHE_MASK;
-	int pos_in_page = pos & ~PAGE_CACHE_MASK;
-	int end_in_page = pos_in_page + len;
-	loff_t i_size;
-	int r;
-	struct ceph_snap_context *snapc, *oldest;
-
-retry_locked:
-	/* writepages currently holds page lock, but if we change that later, */
-	wait_on_page_writeback(page);
-
-	/* check snap context */
-	BUG_ON(!ci->i_snap_realm);
-	down_read(&mdsc->snap_rwsem);
-	BUG_ON(!ci->i_snap_realm->cached_context);
-	snapc = (void *)page->private;
-	if (snapc && snapc != ci->i_head_snapc) {
-		/*
-		 * this page is already dirty in another (older) snap
-		 * context!  is it writeable now?
-		 */
-		oldest = get_oldest_context(inode, NULL);
-		up_read(&mdsc->snap_rwsem);
-
-		if (snapc->seq > oldest->seq) {
-			ceph_put_snap_context(oldest);
-			dout(" page %p snapc %p not current or oldest\n",
-			     page, snapc);
-			/*
-			 * queue for writeback, and wait for snapc to
-			 * be writeable or written
-			 */
-			snapc = ceph_get_snap_context(snapc);
-			unlock_page(page);
-			ceph_queue_writeback(inode);
-			r = wait_event_interruptible(ci->i_cap_wq,
-			       context_is_writeable_or_written(inode, snapc));
-			ceph_put_snap_context(snapc);
-			if (r == -ERESTARTSYS)
-				return r;
-			return -EAGAIN;
-		}
-		ceph_put_snap_context(oldest);
-
-		/* yay, writeable, do it now (without dropping page lock) */
-		dout(" page %p snapc %p not current, but oldest\n",
-		     page, snapc);
-		if (!clear_page_dirty_for_io(page))
-			goto retry_locked;
-		r = writepage_nounlock(page, NULL);
-		if (r < 0)
-			goto fail_nosnap;
-		goto retry_locked;
-	}
-
-	if (PageUptodate(page)) {
-		dout(" page %p already uptodate\n", page);
-		return 0;
-	}
-
-	/* full page? */
-	if (pos_in_page == 0 && len == PAGE_CACHE_SIZE)
-		return 0;
-
-	/* past end of file? */
-	i_size = inode->i_size;   /* caller holds i_mutex */
-
-	if (i_size + len > inode->i_sb->s_maxbytes) {
-		/* file is too big */
-		r = -EINVAL;
-		goto fail;
-	}
-
-	if (page_off >= i_size ||
-	    (pos_in_page == 0 && (pos+len) >= i_size &&
-	     end_in_page - pos_in_page != PAGE_CACHE_SIZE)) {
-		dout(" zeroing %p 0 - %d and %d - %d\n",
-		     page, pos_in_page, end_in_page, (int)PAGE_CACHE_SIZE);
-		zero_user_segments(page,
-				   0, pos_in_page,
-				   end_in_page, PAGE_CACHE_SIZE);
-		return 0;
-	}
-
-	/* we need to read it. */
-	up_read(&mdsc->snap_rwsem);
-	r = readpage_nounlock(file, page);
-	if (r < 0)
-		goto fail_nosnap;
-	goto retry_locked;
-
-fail:
-	up_read(&mdsc->snap_rwsem);
-fail_nosnap:
-	unlock_page(page);
-	return r;
-}
-
-/*
- * We are only allowed to write into/dirty the page if the page is
- * clean, or already dirty within the same snap context.
- */
-static int ceph_write_begin(struct file *file, struct address_space *mapping,
-			    loff_t pos, unsigned len, unsigned flags,
-			    struct page **pagep, void **fsdata)
-{
-	struct inode *inode = file->f_dentry->d_inode;
-	struct page *page;
-	pgoff_t index = pos >> PAGE_CACHE_SHIFT;
-	int r;
-
-	do {
-		/* get a page */
-		page = grab_cache_page_write_begin(mapping, index, 0);
-		if (!page)
-			return -ENOMEM;
-		*pagep = page;
-
-		dout("write_begin file %p inode %p page %p %d~%d\n", file,
-		     inode, page, (int)pos, (int)len);
-
-		r = ceph_update_writeable_page(file, pos, len, page);
-	} while (r == -EAGAIN);
-
-	return r;
-}
-
-/*
- * we don't do anything in here that simple_write_end doesn't do
- * except adjust dirty page accounting and drop read lock on
- * mdsc->snap_rwsem.
- */
-static int ceph_write_end(struct file *file, struct address_space *mapping,
-			  loff_t pos, unsigned len, unsigned copied,
-			  struct page *page, void *fsdata)
-{
-	struct inode *inode = file->f_dentry->d_inode;
-	struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
-	struct ceph_mds_client *mdsc = fsc->mdsc;
-	unsigned from = pos & (PAGE_CACHE_SIZE - 1);
-	int check_cap = 0;
-
-	dout("write_end file %p inode %p page %p %d~%d (%d)\n", file,
-	     inode, page, (int)pos, (int)copied, (int)len);
-
-	/* zero the stale part of the page if we did a short copy */
-	if (copied < len)
-		zero_user_segment(page, from+copied, len);
-
-	/* did file size increase? */
-	/* (no need for i_size_read(); we caller holds i_mutex */
-	if (pos+copied > inode->i_size)
-		check_cap = ceph_inode_set_size(inode, pos+copied);
-
-	if (!PageUptodate(page))
-		SetPageUptodate(page);
-
-	set_page_dirty(page);
-
-	unlock_page(page);
-	up_read(&mdsc->snap_rwsem);
-	page_cache_release(page);
-
-	if (check_cap)
-		ceph_check_caps(ceph_inode(inode), CHECK_CAPS_AUTHONLY, NULL);
-
-	return copied;
-}
-
-/*
- * we set .direct_IO to indicate direct io is supported, but since we
- * intercept O_DIRECT reads and writes early, this function should
- * never get called.
- */
-static ssize_t ceph_direct_io(int rw, struct kiocb *iocb,
-			      const struct iovec *iov,
-			      loff_t pos, unsigned long nr_segs)
-{
-	WARN_ON(1);
-	return -EINVAL;
-}
-
-const struct address_space_operations ceph_aops = {
-	.readpage = ceph_readpage,
-	.readpages = ceph_readpages,
-	.writepage = ceph_writepage,
-	.writepages = ceph_writepages_start,
-	.write_begin = ceph_write_begin,
-	.write_end = ceph_write_end,
-	.set_page_dirty = ceph_set_page_dirty,
-	.invalidatepage = ceph_invalidatepage,
-	.releasepage = ceph_releasepage,
-	.direct_IO = ceph_direct_io,
-};
-
-
-/*
- * vm ops
- */
-
-/*
- * Reuse write_begin here for simplicity.
- */
-static int ceph_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
-{
-	struct inode *inode = vma->vm_file->f_dentry->d_inode;
-	struct page *page = vmf->page;
-	struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
-	loff_t off = page->index << PAGE_CACHE_SHIFT;
-	loff_t size, len;
-	int ret;
-
-	size = i_size_read(inode);
-	if (off + PAGE_CACHE_SIZE <= size)
-		len = PAGE_CACHE_SIZE;
-	else
-		len = size & ~PAGE_CACHE_MASK;
-
-	dout("page_mkwrite %p %llu~%llu page %p idx %lu\n", inode,
-	     off, len, page, page->index);
-
-	lock_page(page);
-
-	ret = VM_FAULT_NOPAGE;
-	if ((off > size) ||
-	    (page->mapping != inode->i_mapping))
-		goto out;
-
-	ret = ceph_update_writeable_page(vma->vm_file, off, len, page);
-	if (ret == 0) {
-		/* success.  we'll keep the page locked. */
-		set_page_dirty(page);
-		up_read(&mdsc->snap_rwsem);
-		ret = VM_FAULT_LOCKED;
-	} else {
-		if (ret == -ENOMEM)
-			ret = VM_FAULT_OOM;
-		else
-			ret = VM_FAULT_SIGBUS;
-	}
-out:
-	dout("page_mkwrite %p %llu~%llu = %d\n", inode, off, len, ret);
-	if (ret != VM_FAULT_LOCKED)
-		unlock_page(page);
-	return ret;
-}
-
-static struct vm_operations_struct ceph_vmops = {
-	.fault		= filemap_fault,
-	.page_mkwrite	= ceph_page_mkwrite,
-};
-
-int ceph_mmap(struct file *file, struct vm_area_struct *vma)
-{
-	struct address_space *mapping = file->f_mapping;
-
-	if (!mapping->a_ops->readpage)
-		return -ENOEXEC;
-	file_accessed(file);
-	vma->vm_ops = &ceph_vmops;
-	vma->vm_flags |= VM_CAN_NONLINEAR;
-	return 0;
-}
diff --git a/ANDROID_3.4.5/fs/ceph/caps.c b/ANDROID_3.4.5/fs/ceph/caps.c
deleted file mode 100644
index 620daad2..00000000
--- a/ANDROID_3.4.5/fs/ceph/caps.c
+++ /dev/null
@@ -1,3088 +0,0 @@
-#include <linux/ceph/ceph_debug.h>
-
-#include <linux/fs.h>
-#include <linux/kernel.h>
-#include <linux/sched.h>
-#include <linux/slab.h>
-#include <linux/vmalloc.h>
-#include <linux/wait.h>
-#include <linux/writeback.h>
-
-#include "super.h"
-#include "mds_client.h"
-#include <linux/ceph/decode.h>
-#include <linux/ceph/messenger.h>
-
-/*
- * Capability management
- *
- * The Ceph metadata servers control client access to inode metadata
- * and file data by issuing capabilities, granting clients permission
- * to read and/or write both inode field and file data to OSDs
- * (storage nodes).  Each capability consists of a set of bits
- * indicating which operations are allowed.
- *
- * If the client holds a *_SHARED cap, the client has a coherent value
- * that can be safely read from the cached inode.
- *
- * In the case of a *_EXCL (exclusive) or FILE_WR capabilities, the
- * client is allowed to change inode attributes (e.g., file size,
- * mtime), note its dirty state in the ceph_cap, and asynchronously
- * flush that metadata change to the MDS.
- *
- * In the event of a conflicting operation (perhaps by another
- * client), the MDS will revoke the conflicting client capabilities.
- *
- * In order for a client to cache an inode, it must hold a capability
- * with at least one MDS server.  When inodes are released, release
- * notifications are batched and periodically sent en masse to the MDS
- * cluster to release server state.
- */
-
-
-/*
- * Generate readable cap strings for debugging output.
- */
-#define MAX_CAP_STR 20
-static char cap_str[MAX_CAP_STR][40];
-static DEFINE_SPINLOCK(cap_str_lock);
-static int last_cap_str;
-
-static char *gcap_string(char *s, int c)
-{
-	if (c & CEPH_CAP_GSHARED)
-		*s++ = 's';
-	if (c & CEPH_CAP_GEXCL)
-		*s++ = 'x';
-	if (c & CEPH_CAP_GCACHE)
-		*s++ = 'c';
-	if (c & CEPH_CAP_GRD)
-		*s++ = 'r';
-	if (c & CEPH_CAP_GWR)
-		*s++ = 'w';
-	if (c & CEPH_CAP_GBUFFER)
-		*s++ = 'b';
-	if (c & CEPH_CAP_GLAZYIO)
-		*s++ = 'l';
-	return s;
-}
-
-const char *ceph_cap_string(int caps)
-{
-	int i;
-	char *s;
-	int c;
-
-	spin_lock(&cap_str_lock);
-	i = last_cap_str++;
-	if (last_cap_str == MAX_CAP_STR)
-		last_cap_str = 0;
-	spin_unlock(&cap_str_lock);
-
-	s = cap_str[i];
-
-	if (caps & CEPH_CAP_PIN)
-		*s++ = 'p';
-
-	c = (caps >> CEPH_CAP_SAUTH) & 3;
-	if (c) {
-		*s++ = 'A';
-		s = gcap_string(s, c);
-	}
-
-	c = (caps >> CEPH_CAP_SLINK) & 3;
-	if (c) {
-		*s++ = 'L';
-		s = gcap_string(s, c);
-	}
-
-	c = (caps >> CEPH_CAP_SXATTR) & 3;
-	if (c) {
-		*s++ = 'X';
-		s = gcap_string(s, c);
-	}
-
-	c = caps >> CEPH_CAP_SFILE;
-	if (c) {
-		*s++ = 'F';
-		s = gcap_string(s, c);
-	}
-
-	if (s == cap_str[i])
-		*s++ = '-';
-	*s = 0;
-	return cap_str[i];
-}
-
-void ceph_caps_init(struct ceph_mds_client *mdsc)
-{
-	INIT_LIST_HEAD(&mdsc->caps_list);
-	spin_lock_init(&mdsc->caps_list_lock);
-}
-
-void ceph_caps_finalize(struct ceph_mds_client *mdsc)
-{
-	struct ceph_cap *cap;
-
-	spin_lock(&mdsc->caps_list_lock);
-	while (!list_empty(&mdsc->caps_list)) {
-		cap = list_first_entry(&mdsc->caps_list,
-				       struct ceph_cap, caps_item);
-		list_del(&cap->caps_item);
-		kmem_cache_free(ceph_cap_cachep, cap);
-	}
-	mdsc->caps_total_count = 0;
-	mdsc->caps_avail_count = 0;
-	mdsc->caps_use_count = 0;
-	mdsc->caps_reserve_count = 0;
-	mdsc->caps_min_count = 0;
-	spin_unlock(&mdsc->caps_list_lock);
-}
-
-void ceph_adjust_min_caps(struct ceph_mds_client *mdsc, int delta)
-{
-	spin_lock(&mdsc->caps_list_lock);
-	mdsc->caps_min_count += delta;
-	BUG_ON(mdsc->caps_min_count < 0);
-	spin_unlock(&mdsc->caps_list_lock);
-}
-
-int ceph_reserve_caps(struct ceph_mds_client *mdsc,
-		      struct ceph_cap_reservation *ctx, int need)
-{
-	int i;
-	struct ceph_cap *cap;
-	int have;
-	int alloc = 0;
-	LIST_HEAD(newcaps);
-	int ret = 0;
-
-	dout("reserve caps ctx=%p need=%d\n", ctx, need);
-
-	/* first reserve any caps that are already allocated */
-	spin_lock(&mdsc->caps_list_lock);
-	if (mdsc->caps_avail_count >= need)
-		have = need;
-	else
-		have = mdsc->caps_avail_count;
-	mdsc->caps_avail_count -= have;
-	mdsc->caps_reserve_count += have;
-	BUG_ON(mdsc->caps_total_count != mdsc->caps_use_count +
-					 mdsc->caps_reserve_count +
-					 mdsc->caps_avail_count);
-	spin_unlock(&mdsc->caps_list_lock);
-
-	for (i = have; i < need; i++) {
-		cap = kmem_cache_alloc(ceph_cap_cachep, GFP_NOFS);
-		if (!cap) {
-			ret = -ENOMEM;
-			goto out_alloc_count;
-		}
-		list_add(&cap->caps_item, &newcaps);
-		alloc++;
-	}
-	BUG_ON(have + alloc != need);
-
-	spin_lock(&mdsc->caps_list_lock);
-	mdsc->caps_total_count += alloc;
-	mdsc->caps_reserve_count += alloc;
-	list_splice(&newcaps, &mdsc->caps_list);
-
-	BUG_ON(mdsc->caps_total_count != mdsc->caps_use_count +
-					 mdsc->caps_reserve_count +
-					 mdsc->caps_avail_count);
-	spin_unlock(&mdsc->caps_list_lock);
-
-	ctx->count = need;
-	dout("reserve caps ctx=%p %d = %d used + %d resv + %d avail\n",
-	     ctx, mdsc->caps_total_count, mdsc->caps_use_count,
-	     mdsc->caps_reserve_count, mdsc->caps_avail_count);
-	return 0;
-
-out_alloc_count:
-	/* we didn't manage to reserve as much as we needed */
-	pr_warning("reserve caps ctx=%p ENOMEM need=%d got=%d\n",
-		   ctx, need, have);
-	return ret;
-}
-
-int ceph_unreserve_caps(struct ceph_mds_client *mdsc,
-			struct ceph_cap_reservation *ctx)
-{
-	dout("unreserve caps ctx=%p count=%d\n", ctx, ctx->count);
-	if (ctx->count) {
-		spin_lock(&mdsc->caps_list_lock);
-		BUG_ON(mdsc->caps_reserve_count < ctx->count);
-		mdsc->caps_reserve_count -= ctx->count;
-		mdsc->caps_avail_count += ctx->count;
-		ctx->count = 0;
-		dout("unreserve caps %d = %d used + %d resv + %d avail\n",
-		     mdsc->caps_total_count, mdsc->caps_use_count,
-		     mdsc->caps_reserve_count, mdsc->caps_avail_count);
-		BUG_ON(mdsc->caps_total_count != mdsc->caps_use_count +
-						 mdsc->caps_reserve_count +
-						 mdsc->caps_avail_count);
-		spin_unlock(&mdsc->caps_list_lock);
-	}
-	return 0;
-}
-
-static struct ceph_cap *get_cap(struct ceph_mds_client *mdsc,
-				struct ceph_cap_reservation *ctx)
-{
-	struct ceph_cap *cap = NULL;
-
-	/* temporary, until we do something about cap import/export */
-	if (!ctx) {
-		cap = kmem_cache_alloc(ceph_cap_cachep, GFP_NOFS);
-		if (cap) {
-			mdsc->caps_use_count++;
-			mdsc->caps_total_count++;
-		}
-		return cap;
-	}
-
-	spin_lock(&mdsc->caps_list_lock);
-	dout("get_cap ctx=%p (%d) %d = %d used + %d resv + %d avail\n",
-	     ctx, ctx->count, mdsc->caps_total_count, mdsc->caps_use_count,
-	     mdsc->caps_reserve_count, mdsc->caps_avail_count);
-	BUG_ON(!ctx->count);
-	BUG_ON(ctx->count > mdsc->caps_reserve_count);
-	BUG_ON(list_empty(&mdsc->caps_list));
-
-	ctx->count--;
-	mdsc->caps_reserve_count--;
-	mdsc->caps_use_count++;
-
-	cap = list_first_entry(&mdsc->caps_list, struct ceph_cap, caps_item);
-	list_del(&cap->caps_item);
-
-	BUG_ON(mdsc->caps_total_count != mdsc->caps_use_count +
-	       mdsc->caps_reserve_count + mdsc->caps_avail_count);
-	spin_unlock(&mdsc->caps_list_lock);
-	return cap;
-}
-
-void ceph_put_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap)
-{
-	spin_lock(&mdsc->caps_list_lock);
-	dout("put_cap %p %d = %d used + %d resv + %d avail\n",
-	     cap, mdsc->caps_total_count, mdsc->caps_use_count,
-	     mdsc->caps_reserve_count, mdsc->caps_avail_count);
-	mdsc->caps_use_count--;
-	/*
-	 * Keep some preallocated caps around (ceph_min_count), to
-	 * avoid lots of free/alloc churn.
-	 */
-	if (mdsc->caps_avail_count >= mdsc->caps_reserve_count +
-				      mdsc->caps_min_count) {
-		mdsc->caps_total_count--;
-		kmem_cache_free(ceph_cap_cachep, cap);
-	} else {
-		mdsc->caps_avail_count++;
-		list_add(&cap->caps_item, &mdsc->caps_list);
-	}
-
-	BUG_ON(mdsc->caps_total_count != mdsc->caps_use_count +
-	       mdsc->caps_reserve_count + mdsc->caps_avail_count);
-	spin_unlock(&mdsc->caps_list_lock);
-}
-
-void ceph_reservation_status(struct ceph_fs_client *fsc,
-			     int *total, int *avail, int *used, int *reserved,
-			     int *min)
-{
-	struct ceph_mds_client *mdsc = fsc->mdsc;
-
-	if (total)
-		*total = mdsc->caps_total_count;
-	if (avail)
-		*avail = mdsc->caps_avail_count;
-	if (used)
-		*used = mdsc->caps_use_count;
-	if (reserved)
-		*reserved = mdsc->caps_reserve_count;
-	if (min)
-		*min = mdsc->caps_min_count;
-}
-
-/*
- * Find ceph_cap for given mds, if any.
- *
- * Called with i_ceph_lock held.
- */
-static struct ceph_cap *__get_cap_for_mds(struct ceph_inode_info *ci, int mds)
-{
-	struct ceph_cap *cap;
-	struct rb_node *n = ci->i_caps.rb_node;
-
-	while (n) {
-		cap = rb_entry(n, struct ceph_cap, ci_node);
-		if (mds < cap->mds)
-			n = n->rb_left;
-		else if (mds > cap->mds)
-			n = n->rb_right;
-		else
-			return cap;
-	}
-	return NULL;
-}
-
-struct ceph_cap *ceph_get_cap_for_mds(struct ceph_inode_info *ci, int mds)
-{
-	struct ceph_cap *cap;
-
-	spin_lock(&ci->i_ceph_lock);
-	cap = __get_cap_for_mds(ci, mds);
-	spin_unlock(&ci->i_ceph_lock);
-	return cap;
-}
-
-/*
- * Return id of any MDS with a cap, preferably FILE_WR|BUFFER|EXCL, else -1.
- */
-static int __ceph_get_cap_mds(struct ceph_inode_info *ci)
-{
-	struct ceph_cap *cap;
-	int mds = -1;
-	struct rb_node *p;
-
-	/* prefer mds with WR|BUFFER|EXCL caps */
-	for (p = rb_first(&ci->i_caps); p; p = rb_next(p)) {
-		cap = rb_entry(p, struct ceph_cap, ci_node);
-		mds = cap->mds;
-		if (cap->issued & (CEPH_CAP_FILE_WR |
-				   CEPH_CAP_FILE_BUFFER |
-				   CEPH_CAP_FILE_EXCL))
-			break;
-	}
-	return mds;
-}
-
-int ceph_get_cap_mds(struct inode *inode)
-{
-	struct ceph_inode_info *ci = ceph_inode(inode);
-	int mds;
-	spin_lock(&ci->i_ceph_lock);
-	mds = __ceph_get_cap_mds(ceph_inode(inode));
-	spin_unlock(&ci->i_ceph_lock);
-	return mds;
-}
-
-/*
- * Called under i_ceph_lock.
- */
-static void __insert_cap_node(struct ceph_inode_info *ci,
-			      struct ceph_cap *new)
-{
-	struct rb_node **p = &ci->i_caps.rb_node;
-	struct rb_node *parent = NULL;
-	struct ceph_cap *cap = NULL;
-
-	while (*p) {
-		parent = *p;
-		cap = rb_entry(parent, struct ceph_cap, ci_node);
-		if (new->mds < cap->mds)
-			p = &(*p)->rb_left;
-		else if (new->mds > cap->mds)
-			p = &(*p)->rb_right;
-		else
-			BUG();
-	}
-
-	rb_link_node(&new->ci_node, parent, p);
-	rb_insert_color(&new->ci_node, &ci->i_caps);
-}
-
-/*
- * (re)set cap hold timeouts, which control the delayed release
- * of unused caps back to the MDS.  Should be called on cap use.
- */
-static void __cap_set_timeouts(struct ceph_mds_client *mdsc,
-			       struct ceph_inode_info *ci)
-{
-	struct ceph_mount_options *ma = mdsc->fsc->mount_options;
-
-	ci->i_hold_caps_min = round_jiffies(jiffies +
-					    ma->caps_wanted_delay_min * HZ);
-	ci->i_hold_caps_max = round_jiffies(jiffies +
-					    ma->caps_wanted_delay_max * HZ);
-	dout("__cap_set_timeouts %p min %lu max %lu\n", &ci->vfs_inode,
-	     ci->i_hold_caps_min - jiffies, ci->i_hold_caps_max - jiffies);
-}
-
-/*
- * (Re)queue cap at the end of the delayed cap release list.
- *
- * If I_FLUSH is set, leave the inode at the front of the list.
- *
- * Caller holds i_ceph_lock
- *    -> we take mdsc->cap_delay_lock
- */
-static void __cap_delay_requeue(struct ceph_mds_client *mdsc,
-				struct ceph_inode_info *ci)
-{
-	__cap_set_timeouts(mdsc, ci);
-	dout("__cap_delay_requeue %p flags %d at %lu\n", &ci->vfs_inode,
-	     ci->i_ceph_flags, ci->i_hold_caps_max);
-	if (!mdsc->stopping) {
-		spin_lock(&mdsc->cap_delay_lock);
-		if (!list_empty(&ci->i_cap_delay_list)) {
-			if (ci->i_ceph_flags & CEPH_I_FLUSH)
-				goto no_change;
-			list_del_init(&ci->i_cap_delay_list);
-		}
-		list_add_tail(&ci->i_cap_delay_list, &mdsc->cap_delay_list);
-no_change:
-		spin_unlock(&mdsc->cap_delay_lock);
-	}
-}
-
-/*
- * Queue an inode for immediate writeback.  Mark inode with I_FLUSH,
- * indicating we should send a cap message to flush dirty metadata
- * asap, and move to the front of the delayed cap list.
- */
-static void __cap_delay_requeue_front(struct ceph_mds_client *mdsc,
-				      struct ceph_inode_info *ci)
-{
-	dout("__cap_delay_requeue_front %p\n", &ci->vfs_inode);
-	spin_lock(&mdsc->cap_delay_lock);
-	ci->i_ceph_flags |= CEPH_I_FLUSH;
-	if (!list_empty(&ci->i_cap_delay_list))
-		list_del_init(&ci->i_cap_delay_list);
-	list_add(&ci->i_cap_delay_list, &mdsc->cap_delay_list);
-	spin_unlock(&mdsc->cap_delay_lock);
-}
-
-/*
- * Cancel delayed work on cap.
- *
- * Caller must hold i_ceph_lock.
- */
-static void __cap_delay_cancel(struct ceph_mds_client *mdsc,
-			       struct ceph_inode_info *ci)
-{
-	dout("__cap_delay_cancel %p\n", &ci->vfs_inode);
-	if (list_empty(&ci->i_cap_delay_list))
-		return;
-	spin_lock(&mdsc->cap_delay_lock);
-	list_del_init(&ci->i_cap_delay_list);
-	spin_unlock(&mdsc->cap_delay_lock);
-}
-
-/*
- * Common issue checks for add_cap, handle_cap_grant.
- */
-static void __check_cap_issue(struct ceph_inode_info *ci, struct ceph_cap *cap,
-			      unsigned issued)
-{
-	unsigned had = __ceph_caps_issued(ci, NULL);
-
-	/*
-	 * Each time we receive FILE_CACHE anew, we increment
-	 * i_rdcache_gen.
-	 */
-	if ((issued & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) &&
-	    (had & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) == 0)
-		ci->i_rdcache_gen++;
-
-	/*
-	 * if we are newly issued FILE_SHARED, clear D_COMPLETE; we
-	 * don't know what happened to this directory while we didn't
-	 * have the cap.
-	 */
-	if ((issued & CEPH_CAP_FILE_SHARED) &&
-	    (had & CEPH_CAP_FILE_SHARED) == 0) {
-		ci->i_shared_gen++;
-		if (S_ISDIR(ci->vfs_inode.i_mode))
-			ceph_dir_clear_complete(&ci->vfs_inode);
-	}
-}
-
-/*
- * Add a capability under the given MDS session.
- *
- * Caller should hold session snap_rwsem (read) and s_mutex.
- *
- * @fmode is the open file mode, if we are opening a file, otherwise
- * it is < 0.  (This is so we can atomically add the cap and add an
- * open file reference to it.)
- */
-int ceph_add_cap(struct inode *inode,
-		 struct ceph_mds_session *session, u64 cap_id,
-		 int fmode, unsigned issued, unsigned wanted,
-		 unsigned seq, unsigned mseq, u64 realmino, int flags,
-		 struct ceph_cap_reservation *caps_reservation)
-{
-	struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
-	struct ceph_inode_info *ci = ceph_inode(inode);
-	struct ceph_cap *new_cap = NULL;
-	struct ceph_cap *cap;
-	int mds = session->s_mds;
-	int actual_wanted;
-
-	dout("add_cap %p mds%d cap %llx %s seq %d\n", inode,
-	     session->s_mds, cap_id, ceph_cap_string(issued), seq);
-
-	/*
-	 * If we are opening the file, include file mode wanted bits
-	 * in wanted.
-	 */
-	if (fmode >= 0)
-		wanted |= ceph_caps_for_mode(fmode);
-
-retry:
-	spin_lock(&ci->i_ceph_lock);
-	cap = __get_cap_for_mds(ci, mds);
-	if (!cap) {
-		if (new_cap) {
-			cap = new_cap;
-			new_cap = NULL;
-		} else {
-			spin_unlock(&ci->i_ceph_lock);
-			new_cap = get_cap(mdsc, caps_reservation);
-			if (new_cap == NULL)
-				return -ENOMEM;
-			goto retry;
-		}
-
-		cap->issued = 0;
-		cap->implemented = 0;
-		cap->mds = mds;
-		cap->mds_wanted = 0;
-
-		cap->ci = ci;
-		__insert_cap_node(ci, cap);
-
-		/* clear out old exporting info?  (i.e. on cap import) */
-		if (ci->i_cap_exporting_mds == mds) {
-			ci->i_cap_exporting_issued = 0;
-			ci->i_cap_exporting_mseq = 0;
-			ci->i_cap_exporting_mds = -1;
-		}
-
-		/* add to session cap list */
-		cap->session = session;
-		spin_lock(&session->s_cap_lock);
-		list_add_tail(&cap->session_caps, &session->s_caps);
-		session->s_nr_caps++;
-		spin_unlock(&session->s_cap_lock);
-	} else if (new_cap)
-		ceph_put_cap(mdsc, new_cap);
-
-	if (!ci->i_snap_realm) {
-		/*
-		 * add this inode to the appropriate snap realm
-		 */
-		struct ceph_snap_realm *realm = ceph_lookup_snap_realm(mdsc,
-							       realmino);
-		if (realm) {
-			ceph_get_snap_realm(mdsc, realm);
-			spin_lock(&realm->inodes_with_caps_lock);
-			ci->i_snap_realm = realm;
-			list_add(&ci->i_snap_realm_item,
-				 &realm->inodes_with_caps);
-			spin_unlock(&realm->inodes_with_caps_lock);
-		} else {
-			pr_err("ceph_add_cap: couldn't find snap realm %llx\n",
-			       realmino);
-			WARN_ON(!realm);
-		}
-	}
-
-	__check_cap_issue(ci, cap, issued);
-
-	/*
-	 * If we are issued caps we don't want, or the mds' wanted
-	 * value appears to be off, queue a check so we'll release
-	 * later and/or update the mds wanted value.
-	 */
-	actual_wanted = __ceph_caps_wanted(ci);
-	if ((wanted & ~actual_wanted) ||
-	    (issued & ~actual_wanted & CEPH_CAP_ANY_WR)) {
-		dout(" issued %s, mds wanted %s, actual %s, queueing\n",
-		     ceph_cap_string(issued), ceph_cap_string(wanted),
-		     ceph_cap_string(actual_wanted));
-		__cap_delay_requeue(mdsc, ci);
-	}
-
-	if (flags & CEPH_CAP_FLAG_AUTH)
-		ci->i_auth_cap = cap;
-	else if (ci->i_auth_cap == cap)
-		ci->i_auth_cap = NULL;
-
-	dout("add_cap inode %p (%llx.%llx) cap %p %s now %s seq %d mds%d\n",
-	     inode, ceph_vinop(inode), cap, ceph_cap_string(issued),
-	     ceph_cap_string(issued|cap->issued), seq, mds);
-	cap->cap_id = cap_id;
-	cap->issued = issued;
-	cap->implemented |= issued;
-	cap->mds_wanted |= wanted;
-	cap->seq = seq;
-	cap->issue_seq = seq;
-	cap->mseq = mseq;
-	cap->cap_gen = session->s_cap_gen;
-
-	if (fmode >= 0)
-		__ceph_get_fmode(ci, fmode);
-	spin_unlock(&ci->i_ceph_lock);
-	wake_up_all(&ci->i_cap_wq);
-	return 0;
-}
-
-/*
- * Return true if cap has not timed out and belongs to the current
- * generation of the MDS session (i.e. has not gone 'stale' due to
- * us losing touch with the mds).
- */
-static int __cap_is_valid(struct ceph_cap *cap)
-{
-	unsigned long ttl;
-	u32 gen;
-
-	spin_lock(&cap->session->s_gen_ttl_lock);
-	gen = cap->session->s_cap_gen;
-	ttl = cap->session->s_cap_ttl;
-	spin_unlock(&cap->session->s_gen_ttl_lock);
-
-	if (cap->cap_gen < gen || time_after_eq(jiffies, ttl)) {
-		dout("__cap_is_valid %p cap %p issued %s "
-		     "but STALE (gen %u vs %u)\n", &cap->ci->vfs_inode,
-		     cap, ceph_cap_string(cap->issued), cap->cap_gen, gen);
-		return 0;
-	}
-
-	return 1;
-}
-
-/*
- * Return set of valid cap bits issued to us.  Note that caps time
- * out, and may be invalidated in bulk if the client session times out
- * and session->s_cap_gen is bumped.
- */
-int __ceph_caps_issued(struct ceph_inode_info *ci, int *implemented)
-{
-	int have = ci->i_snap_caps | ci->i_cap_exporting_issued;
-	struct ceph_cap *cap;
-	struct rb_node *p;
-
-	if (implemented)
-		*implemented = 0;
-	for (p = rb_first(&ci->i_caps); p; p = rb_next(p)) {
-		cap = rb_entry(p, struct ceph_cap, ci_node);
-		if (!__cap_is_valid(cap))
-			continue;
-		dout("__ceph_caps_issued %p cap %p issued %s\n",
-		     &ci->vfs_inode, cap, ceph_cap_string(cap->issued));
-		have |= cap->issued;
-		if (implemented)
-			*implemented |= cap->implemented;
-	}
-	return have;
-}
-
-/*
- * Get cap bits issued by caps other than @ocap
- */
-int __ceph_caps_issued_other(struct ceph_inode_info *ci, struct ceph_cap *ocap)
-{
-	int have = ci->i_snap_caps;
-	struct ceph_cap *cap;
-	struct rb_node *p;
-
-	for (p = rb_first(&ci->i_caps); p; p = rb_next(p)) {
-		cap = rb_entry(p, struct ceph_cap, ci_node);
-		if (cap == ocap)
-			continue;
-		if (!__cap_is_valid(cap))
-			continue;
-		have |= cap->issued;
-	}
-	return have;
-}
-
-/*
- * Move a cap to the end of the LRU (oldest caps at list head, newest
- * at list tail).
- */
-static void __touch_cap(struct ceph_cap *cap)
-{
-	struct ceph_mds_session *s = cap->session;
-
-	spin_lock(&s->s_cap_lock);
-	if (s->s_cap_iterator == NULL) {
-		dout("__touch_cap %p cap %p mds%d\n", &cap->ci->vfs_inode, cap,
-		     s->s_mds);
-		list_move_tail(&cap->session_caps, &s->s_caps);
-	} else {
-		dout("__touch_cap %p cap %p mds%d NOP, iterating over caps\n",
-		     &cap->ci->vfs_inode, cap, s->s_mds);
-	}
-	spin_unlock(&s->s_cap_lock);
-}
-
-/*
- * Check if we hold the given mask.  If so, move the cap(s) to the
- * front of their respective LRUs.  (This is the preferred way for
- * callers to check for caps they want.)
- */
-int __ceph_caps_issued_mask(struct ceph_inode_info *ci, int mask, int touch)
-{
-	struct ceph_cap *cap;
-	struct rb_node *p;
-	int have = ci->i_snap_caps;
-
-	if ((have & mask) == mask) {
-		dout("__ceph_caps_issued_mask %p snap issued %s"
-		     " (mask %s)\n", &ci->vfs_inode,
-		     ceph_cap_string(have),
-		     ceph_cap_string(mask));
-		return 1;
-	}
-
-	for (p = rb_first(&ci->i_caps); p; p = rb_next(p)) {
-		cap = rb_entry(p, struct ceph_cap, ci_node);
-		if (!__cap_is_valid(cap))
-			continue;
-		if ((cap->issued & mask) == mask) {
-			dout("__ceph_caps_issued_mask %p cap %p issued %s"
-			     " (mask %s)\n", &ci->vfs_inode, cap,
-			     ceph_cap_string(cap->issued),
-			     ceph_cap_string(mask));
-			if (touch)
-				__touch_cap(cap);
-			return 1;
-		}
-
-		/* does a combination of caps satisfy mask? */
-		have |= cap->issued;
-		if ((have & mask) == mask) {
-			dout("__ceph_caps_issued_mask %p combo issued %s"
-			     " (mask %s)\n", &ci->vfs_inode,
-			     ceph_cap_string(cap->issued),
-			     ceph_cap_string(mask));
-			if (touch) {
-				struct rb_node *q;
-
-				/* touch this + preceding caps */
-				__touch_cap(cap);
-				for (q = rb_first(&ci->i_caps); q != p;
-				     q = rb_next(q)) {
-					cap = rb_entry(q, struct ceph_cap,
-						       ci_node);
-					if (!__cap_is_valid(cap))
-						continue;
-					__touch_cap(cap);
-				}
-			}
-			return 1;
-		}
-	}
-
-	return 0;
-}
-
-/*
- * Return true if mask caps are currently being revoked by an MDS.
- */
-int ceph_caps_revoking(struct ceph_inode_info *ci, int mask)
-{
-	struct inode *inode = &ci->vfs_inode;
-	struct ceph_cap *cap;
-	struct rb_node *p;
-	int ret = 0;
-
-	spin_lock(&ci->i_ceph_lock);
-	for (p = rb_first(&ci->i_caps); p; p = rb_next(p)) {
-		cap = rb_entry(p, struct ceph_cap, ci_node);
-		if (__cap_is_valid(cap) &&
-		    (cap->implemented & ~cap->issued & mask)) {
-			ret = 1;
-			break;
-		}
-	}
-	spin_unlock(&ci->i_ceph_lock);
-	dout("ceph_caps_revoking %p %s = %d\n", inode,
-	     ceph_cap_string(mask), ret);
-	return ret;
-}
-
-int __ceph_caps_used(struct ceph_inode_info *ci)
-{
-	int used = 0;
-	if (ci->i_pin_ref)
-		used |= CEPH_CAP_PIN;
-	if (ci->i_rd_ref)
-		used |= CEPH_CAP_FILE_RD;
-	if (ci->i_rdcache_ref || ci->vfs_inode.i_data.nrpages)
-		used |= CEPH_CAP_FILE_CACHE;
-	if (ci->i_wr_ref)
-		used |= CEPH_CAP_FILE_WR;
-	if (ci->i_wb_ref || ci->i_wrbuffer_ref)
-		used |= CEPH_CAP_FILE_BUFFER;
-	return used;
-}
-
-/*
- * wanted, by virtue of open file modes
- */
-int __ceph_caps_file_wanted(struct ceph_inode_info *ci)
-{
-	int want = 0;
-	int mode;
-	for (mode = 0; mode < CEPH_FILE_MODE_NUM; mode++)
-		if (ci->i_nr_by_mode[mode])
-			want |= ceph_caps_for_mode(mode);
-	return want;
-}
-
-/*
- * Return caps we have registered with the MDS(s) as 'wanted'.
- */
-int __ceph_caps_mds_wanted(struct ceph_inode_info *ci)
-{
-	struct ceph_cap *cap;
-	struct rb_node *p;
-	int mds_wanted = 0;
-
-	for (p = rb_first(&ci->i_caps); p; p = rb_next(p)) {
-		cap = rb_entry(p, struct ceph_cap, ci_node);
-		if (!__cap_is_valid(cap))
-			continue;
-		mds_wanted |= cap->mds_wanted;
-	}
-	return mds_wanted;
-}
-
-/*
- * called under i_ceph_lock
- */
-static int __ceph_is_any_caps(struct ceph_inode_info *ci)
-{
-	return !RB_EMPTY_ROOT(&ci->i_caps) || ci->i_cap_exporting_mds >= 0;
-}
-
-/*
- * Remove a cap.  Take steps to deal with a racing iterate_session_caps.
- *
- * caller should hold i_ceph_lock.
- * caller will not hold session s_mutex if called from destroy_inode.
- */
-void __ceph_remove_cap(struct ceph_cap *cap)
-{
-	struct ceph_mds_session *session = cap->session;
-	struct ceph_inode_info *ci = cap->ci;
-	struct ceph_mds_client *mdsc =
-		ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc;
-	int removed = 0;
-
-	dout("__ceph_remove_cap %p from %p\n", cap, &ci->vfs_inode);
-
-	/* remove from session list */
-	spin_lock(&session->s_cap_lock);
-	if (session->s_cap_iterator == cap) {
-		/* not yet, we are iterating over this very cap */
-		dout("__ceph_remove_cap  delaying %p removal from session %p\n",
-		     cap, cap->session);
-	} else {
-		list_del_init(&cap->session_caps);
-		session->s_nr_caps--;
-		cap->session = NULL;
-		removed = 1;
-	}
-	/* protect backpointer with s_cap_lock: see iterate_session_caps */
-	cap->ci = NULL;
-	spin_unlock(&session->s_cap_lock);
-
-	/* remove from inode list */
-	rb_erase(&cap->ci_node, &ci->i_caps);
-	if (ci->i_auth_cap == cap)
-		ci->i_auth_cap = NULL;
-
-	if (removed)
-		ceph_put_cap(mdsc, cap);
-
-	if (!__ceph_is_any_caps(ci) && ci->i_snap_realm) {
-		struct ceph_snap_realm *realm = ci->i_snap_realm;
-		spin_lock(&realm->inodes_with_caps_lock);
-		list_del_init(&ci->i_snap_realm_item);
-		ci->i_snap_realm_counter++;
-		ci->i_snap_realm = NULL;
-		spin_unlock(&realm->inodes_with_caps_lock);
-		ceph_put_snap_realm(mdsc, realm);
-	}
-	if (!__ceph_is_any_real_caps(ci))
-		__cap_delay_cancel(mdsc, ci);
-}
-
-/*
- * Build and send a cap message to the given MDS.
- *
- * Caller should be holding s_mutex.
- */
-static int send_cap_msg(struct ceph_mds_session *session,
-			u64 ino, u64 cid, int op,
-			int caps, int wanted, int dirty,
-			u32 seq, u64 flush_tid, u32 issue_seq, u32 mseq,
-			u64 size, u64 max_size,
-			struct timespec *mtime, struct timespec *atime,
-			u64 time_warp_seq,
-			uid_t uid, gid_t gid, umode_t mode,
-			u64 xattr_version,
-			struct ceph_buffer *xattrs_buf,
-			u64 follows)
-{
-	struct ceph_mds_caps *fc;
-	struct ceph_msg *msg;
-
-	dout("send_cap_msg %s %llx %llx caps %s wanted %s dirty %s"
-	     " seq %u/%u mseq %u follows %lld size %llu/%llu"
-	     " xattr_ver %llu xattr_len %d\n", ceph_cap_op_name(op),
-	     cid, ino, ceph_cap_string(caps), ceph_cap_string(wanted),
-	     ceph_cap_string(dirty),
-	     seq, issue_seq, mseq, follows, size, max_size,
-	     xattr_version, xattrs_buf ? (int)xattrs_buf->vec.iov_len : 0);
-
-	msg = ceph_msg_new(CEPH_MSG_CLIENT_CAPS, sizeof(*fc), GFP_NOFS, false);
-	if (!msg)
-		return -ENOMEM;
-
-	msg->hdr.tid = cpu_to_le64(flush_tid);
-
-	fc = msg->front.iov_base;
-	memset(fc, 0, sizeof(*fc));
-
-	fc->cap_id = cpu_to_le64(cid);
-	fc->op = cpu_to_le32(op);
-	fc->seq = cpu_to_le32(seq);
-	fc->issue_seq = cpu_to_le32(issue_seq);
-	fc->migrate_seq = cpu_to_le32(mseq);
-	fc->caps = cpu_to_le32(caps);
-	fc->wanted = cpu_to_le32(wanted);
-	fc->dirty = cpu_to_le32(dirty);
-	fc->ino = cpu_to_le64(ino);
-	fc->snap_follows = cpu_to_le64(follows);
-
-	fc->size = cpu_to_le64(size);
-	fc->max_size = cpu_to_le64(max_size);
-	if (mtime)
-		ceph_encode_timespec(&fc->mtime, mtime);
-	if (atime)
-		ceph_encode_timespec(&fc->atime, atime);
-	fc->time_warp_seq = cpu_to_le32(time_warp_seq);
-
-	fc->uid = cpu_to_le32(uid);
-	fc->gid = cpu_to_le32(gid);
-	fc->mode = cpu_to_le32(mode);
-
-	fc->xattr_version = cpu_to_le64(xattr_version);
-	if (xattrs_buf) {
-		msg->middle = ceph_buffer_get(xattrs_buf);
-		fc->xattr_len = cpu_to_le32(xattrs_buf->vec.iov_len);
-		msg->hdr.middle_len = cpu_to_le32(xattrs_buf->vec.iov_len);
-	}
-
-	ceph_con_send(&session->s_con, msg);
-	return 0;
-}
-
-static void __queue_cap_release(struct ceph_mds_session *session,
-				u64 ino, u64 cap_id, u32 migrate_seq,
-				u32 issue_seq)
-{
-	struct ceph_msg *msg;
-	struct ceph_mds_cap_release *head;
-	struct ceph_mds_cap_item *item;
-
-	spin_lock(&session->s_cap_lock);
-	BUG_ON(!session->s_num_cap_releases);
-	msg = list_first_entry(&session->s_cap_releases,
-			       struct ceph_msg, list_head);
-
-	dout(" adding %llx release to mds%d msg %p (%d left)\n",
-	     ino, session->s_mds, msg, session->s_num_cap_releases);
-
-	BUG_ON(msg->front.iov_len + sizeof(*item) > PAGE_CACHE_SIZE);
-	head = msg->front.iov_base;
-	head->num = cpu_to_le32(le32_to_cpu(head->num) + 1);
-	item = msg->front.iov_base + msg->front.iov_len;
-	item->ino = cpu_to_le64(ino);
-	item->cap_id = cpu_to_le64(cap_id);
-	item->migrate_seq = cpu_to_le32(migrate_seq);
-	item->seq = cpu_to_le32(issue_seq);
-
-	session->s_num_cap_releases--;
-
-	msg->front.iov_len += sizeof(*item);
-	if (le32_to_cpu(head->num) == CEPH_CAPS_PER_RELEASE) {
-		dout(" release msg %p full\n", msg);
-		list_move_tail(&msg->list_head, &session->s_cap_releases_done);
-	} else {
-		dout(" release msg %p at %d/%d (%d)\n", msg,
-		     (int)le32_to_cpu(head->num),
-		     (int)CEPH_CAPS_PER_RELEASE,
-		     (int)msg->front.iov_len);
-	}
-	spin_unlock(&session->s_cap_lock);
-}
-
-/*
- * Queue cap releases when an inode is dropped from our cache.  Since
- * inode is about to be destroyed, there is no need for i_ceph_lock.
- */
-void ceph_queue_caps_release(struct inode *inode)
-{
-	struct ceph_inode_info *ci = ceph_inode(inode);
-	struct rb_node *p;
-
-	p = rb_first(&ci->i_caps);
-	while (p) {
-		struct ceph_cap *cap = rb_entry(p, struct ceph_cap, ci_node);
-		struct ceph_mds_session *session = cap->session;
-
-		__queue_cap_release(session, ceph_ino(inode), cap->cap_id,
-				    cap->mseq, cap->issue_seq);
-		p = rb_next(p);
-		__ceph_remove_cap(cap);
-	}
-}
-
-/*
- * Send a cap msg on the given inode.  Update our caps state, then
- * drop i_ceph_lock and send the message.
- *
- * Make note of max_size reported/requested from mds, revoked caps
- * that have now been implemented.
- *
- * Make half-hearted attempt ot to invalidate page cache if we are
- * dropping RDCACHE.  Note that this will leave behind locked pages
- * that we'll then need to deal with elsewhere.
- *
- * Return non-zero if delayed release, or we experienced an error
- * such that the caller should requeue + retry later.
- *
- * called with i_ceph_lock, then drops it.
- * caller should hold snap_rwsem (read), s_mutex.
- */
-static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
-		      int op, int used, int want, int retain, int flushing,
-		      unsigned *pflush_tid)
-	__releases(cap->ci->i_ceph_lock)
-{
-	struct ceph_inode_info *ci = cap->ci;
-	struct inode *inode = &ci->vfs_inode;
-	u64 cap_id = cap->cap_id;
-	int held, revoking, dropping, keep;
-	u64 seq, issue_seq, mseq, time_warp_seq, follows;
-	u64 size, max_size;
-	struct timespec mtime, atime;
-	int wake = 0;
-	umode_t mode;
-	uid_t uid;
-	gid_t gid;
-	struct ceph_mds_session *session;
-	u64 xattr_version = 0;
-	struct ceph_buffer *xattr_blob = NULL;
-	int delayed = 0;
-	u64 flush_tid = 0;
-	int i;
-	int ret;
-
-	held = cap->issued | cap->implemented;
-	revoking = cap->implemented & ~cap->issued;
-	retain &= ~revoking;
-	dropping = cap->issued & ~retain;
-
-	dout("__send_cap %p cap %p session %p %s -> %s (revoking %s)\n",
-	     inode, cap, cap->session,
-	     ceph_cap_string(held), ceph_cap_string(held & retain),
-	     ceph_cap_string(revoking));
-	BUG_ON((retain & CEPH_CAP_PIN) == 0);
-
-	session = cap->session;
-
-	/* don't release wanted unless we've waited a bit. */
-	if ((ci->i_ceph_flags & CEPH_I_NODELAY) == 0 &&
-	    time_before(jiffies, ci->i_hold_caps_min)) {
-		dout(" delaying issued %s -> %s, wanted %s -> %s on send\n",
-		     ceph_cap_string(cap->issued),
-		     ceph_cap_string(cap->issued & retain),
-		     ceph_cap_string(cap->mds_wanted),
-		     ceph_cap_string(want));
-		want |= cap->mds_wanted;
-		retain |= cap->issued;
-		delayed = 1;
-	}
-	ci->i_ceph_flags &= ~(CEPH_I_NODELAY | CEPH_I_FLUSH);
-
-	cap->issued &= retain;  /* drop bits we don't want */
-	if (cap->implemented & ~cap->issued) {
-		/*
-		 * Wake up any waiters on wanted -> needed transition.
-		 * This is due to the weird transition from buffered
-		 * to sync IO... we need to flush dirty pages _before_
-		 * allowing sync writes to avoid reordering.
-		 */
-		wake = 1;
-	}
-	cap->implemented &= cap->issued | used;
-	cap->mds_wanted = want;
-
-	if (flushing) {
-		/*
-		 * assign a tid for flush operations so we can avoid
-		 * flush1 -> dirty1 -> flush2 -> flushack1 -> mark
-		 * clean type races.  track latest tid for every bit
-		 * so we can handle flush AxFw, flush Fw, and have the
-		 * first ack clean Ax.
-		 */
-		flush_tid = ++ci->i_cap_flush_last_tid;
-		if (pflush_tid)
-			*pflush_tid = flush_tid;
-		dout(" cap_flush_tid %d\n", (int)flush_tid);
-		for (i = 0; i < CEPH_CAP_BITS; i++)
-			if (flushing & (1 << i))
-				ci->i_cap_flush_tid[i] = flush_tid;
-
-		follows = ci->i_head_snapc->seq;
-	} else {
-		follows = 0;
-	}
-
-	keep = cap->implemented;
-	seq = cap->seq;
-	issue_seq = cap->issue_seq;
-	mseq = cap->mseq;
-	size = inode->i_size;
-	ci->i_reported_size = size;
-	max_size = ci->i_wanted_max_size;
-	ci->i_requested_max_size = max_size;
-	mtime = inode->i_mtime;
-	atime = inode->i_atime;
-	time_warp_seq = ci->i_time_warp_seq;
-	uid = inode->i_uid;
-	gid = inode->i_gid;
-	mode = inode->i_mode;
-
-	if (flushing & CEPH_CAP_XATTR_EXCL) {
-		__ceph_build_xattrs_blob(ci);
-		xattr_blob = ci->i_xattrs.blob;
-		xattr_version = ci->i_xattrs.version;
-	}
-
-	spin_unlock(&ci->i_ceph_lock);
-
-	ret = send_cap_msg(session, ceph_vino(inode).ino, cap_id,
-		op, keep, want, flushing, seq, flush_tid, issue_seq, mseq,
-		size, max_size, &mtime, &atime, time_warp_seq,
-		uid, gid, mode, xattr_version, xattr_blob,
-		follows);
-	if (ret < 0) {
-		dout("error sending cap msg, must requeue %p\n", inode);
-		delayed = 1;
-	}
-
-	if (wake)
-		wake_up_all(&ci->i_cap_wq);
-
-	return delayed;
-}
-
-/*
- * When a snapshot is taken, clients accumulate dirty metadata on
- * inodes with capabilities in ceph_cap_snaps to describe the file
- * state at the time the snapshot was taken.  This must be flushed
- * asynchronously back to the MDS once sync writes complete and dirty
- * data is written out.
- *
- * Unless @again is true, skip cap_snaps that were already sent to
- * the MDS (i.e., during this session).
- *
- * Called under i_ceph_lock.  Takes s_mutex as needed.
- */
-void __ceph_flush_snaps(struct ceph_inode_info *ci,
-			struct ceph_mds_session **psession,
-			int again)
-		__releases(ci->i_ceph_lock)
-		__acquires(ci->i_ceph_lock)
-{
-	struct inode *inode = &ci->vfs_inode;
-	int mds;
-	struct ceph_cap_snap *capsnap;
-	u32 mseq;
-	struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
-	struct ceph_mds_session *session = NULL; /* if session != NULL, we hold
-						    session->s_mutex */
-	u64 next_follows = 0;  /* keep track of how far we've gotten through the
-			     i_cap_snaps list, and skip these entries next time
-			     around to avoid an infinite loop */
-
-	if (psession)
-		session = *psession;
-
-	dout("__flush_snaps %p\n", inode);
-retry:
-	list_for_each_entry(capsnap, &ci->i_cap_snaps, ci_item) {
-		/* avoid an infiniute loop after retry */
-		if (capsnap->follows < next_follows)
-			continue;
-		/*
-		 * we need to wait for sync writes to complete and for dirty
-		 * pages to be written out.
-		 */
-		if (capsnap->dirty_pages || capsnap->writing)
-			break;
-
-		/*
-		 * if cap writeback already occurred, we should have dropped
-		 * the capsnap in ceph_put_wrbuffer_cap_refs.
-		 */
-		BUG_ON(capsnap->dirty == 0);
-
-		/* pick mds, take s_mutex */
-		if (ci->i_auth_cap == NULL) {
-			dout("no auth cap (migrating?), doing nothing\n");
-			goto out;
-		}
-
-		/* only flush each capsnap once */
-		if (!again && !list_empty(&capsnap->flushing_item)) {
-			dout("already flushed %p, skipping\n", capsnap);
-			continue;
-		}
-
-		mds = ci->i_auth_cap->session->s_mds;
-		mseq = ci->i_auth_cap->mseq;
-
-		if (session && session->s_mds != mds) {
-			dout("oops, wrong session %p mutex\n", session);
-			mutex_unlock(&session->s_mutex);
-			ceph_put_mds_session(session);
-			session = NULL;
-		}
-		if (!session) {
-			spin_unlock(&ci->i_ceph_lock);
-			mutex_lock(&mdsc->mutex);
-			session = __ceph_lookup_mds_session(mdsc, mds);
-			mutex_unlock(&mdsc->mutex);
-			if (session) {
-				dout("inverting session/ino locks on %p\n",
-				     session);
-				mutex_lock(&session->s_mutex);
-			}
-			/*
-			 * if session == NULL, we raced against a cap
-			 * deletion or migration.  retry, and we'll
-			 * get a better @mds value next time.
-			 */
-			spin_lock(&ci->i_ceph_lock);
-			goto retry;
-		}
-
-		capsnap->flush_tid = ++ci->i_cap_flush_last_tid;
-		atomic_inc(&capsnap->nref);
-		if (!list_empty(&capsnap->flushing_item))
-			list_del_init(&capsnap->flushing_item);
-		list_add_tail(&capsnap->flushing_item,
-			      &session->s_cap_snaps_flushing);
-		spin_unlock(&ci->i_ceph_lock);
-
-		dout("flush_snaps %p cap_snap %p follows %lld tid %llu\n",
-		     inode, capsnap, capsnap->follows, capsnap->flush_tid);
-		send_cap_msg(session, ceph_vino(inode).ino, 0,
-			     CEPH_CAP_OP_FLUSHSNAP, capsnap->issued, 0,
-			     capsnap->dirty, 0, capsnap->flush_tid, 0, mseq,
-			     capsnap->size, 0,
-			     &capsnap->mtime, &capsnap->atime,
-			     capsnap->time_warp_seq,
-			     capsnap->uid, capsnap->gid, capsnap->mode,
-			     capsnap->xattr_version, capsnap->xattr_blob,
-			     capsnap->follows);
-
-		next_follows = capsnap->follows + 1;
-		ceph_put_cap_snap(capsnap);
-
-		spin_lock(&ci->i_ceph_lock);
-		goto retry;
-	}
-
-	/* we flushed them all; remove this inode from the queue */
-	spin_lock(&mdsc->snap_flush_lock);
-	list_del_init(&ci->i_snap_flush_item);
-	spin_unlock(&mdsc->snap_flush_lock);
-
-out:
-	if (psession)
-		*psession = session;
-	else if (session) {
-		mutex_unlock(&session->s_mutex);
-		ceph_put_mds_session(session);
-	}
-}
-
-static void ceph_flush_snaps(struct ceph_inode_info *ci)
-{
-	spin_lock(&ci->i_ceph_lock);
-	__ceph_flush_snaps(ci, NULL, 0);
-	spin_unlock(&ci->i_ceph_lock);
-}
-
-/*
- * Mark caps dirty.  If inode is newly dirty, return the dirty flags.
- * Caller is then responsible for calling __mark_inode_dirty with the
- * returned flags value.
- */
-int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask)
-{
-	struct ceph_mds_client *mdsc =
-		ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc;
-	struct inode *inode = &ci->vfs_inode;
-	int was = ci->i_dirty_caps;
-	int dirty = 0;
-
-	dout("__mark_dirty_caps %p %s dirty %s -> %s\n", &ci->vfs_inode,
-	     ceph_cap_string(mask), ceph_cap_string(was),
-	     ceph_cap_string(was | mask));
-	ci->i_dirty_caps |= mask;
-	if (was == 0) {
-		if (!ci->i_head_snapc)
-			ci->i_head_snapc = ceph_get_snap_context(
-				ci->i_snap_realm->cached_context);
-		dout(" inode %p now dirty snapc %p\n", &ci->vfs_inode,
-			ci->i_head_snapc);
-		BUG_ON(!list_empty(&ci->i_dirty_item));
-		spin_lock(&mdsc->cap_dirty_lock);
-		list_add(&ci->i_dirty_item, &mdsc->cap_dirty);
-		spin_unlock(&mdsc->cap_dirty_lock);
-		if (ci->i_flushing_caps == 0) {
-			ihold(inode);
-			dirty |= I_DIRTY_SYNC;
-		}
-	}
-	BUG_ON(list_empty(&ci->i_dirty_item));
-	if (((was | ci->i_flushing_caps) & CEPH_CAP_FILE_BUFFER) &&
-	    (mask & CEPH_CAP_FILE_BUFFER))
-		dirty |= I_DIRTY_DATASYNC;
-	__cap_delay_requeue(mdsc, ci);
-	return dirty;
-}
-
-/*
- * Add dirty inode to the flushing list.  Assigned a seq number so we
- * can wait for caps to flush without starving.
- *
- * Called under i_ceph_lock.
- */
-static int __mark_caps_flushing(struct inode *inode,
-				 struct ceph_mds_session *session)
-{
-	struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
-	struct ceph_inode_info *ci = ceph_inode(inode);
-	int flushing;
-
-	BUG_ON(ci->i_dirty_caps == 0);
-	BUG_ON(list_empty(&ci->i_dirty_item));
-
-	flushing = ci->i_dirty_caps;
-	dout("__mark_caps_flushing flushing %s, flushing_caps %s -> %s\n",
-	     ceph_cap_string(flushing),
-	     ceph_cap_string(ci->i_flushing_caps),
-	     ceph_cap_string(ci->i_flushing_caps | flushing));
-	ci->i_flushing_caps |= flushing;
-	ci->i_dirty_caps = 0;
-	dout(" inode %p now !dirty\n", inode);
-
-	spin_lock(&mdsc->cap_dirty_lock);
-	list_del_init(&ci->i_dirty_item);
-
-	ci->i_cap_flush_seq = ++mdsc->cap_flush_seq;
-	if (list_empty(&ci->i_flushing_item)) {
-		list_add_tail(&ci->i_flushing_item, &session->s_cap_flushing);
-		mdsc->num_cap_flushing++;
-		dout(" inode %p now flushing seq %lld\n", inode,
-		     ci->i_cap_flush_seq);
-	} else {
-		list_move_tail(&ci->i_flushing_item, &session->s_cap_flushing);
-		dout(" inode %p now flushing (more) seq %lld\n", inode,
-		     ci->i_cap_flush_seq);
-	}
-	spin_unlock(&mdsc->cap_dirty_lock);
-
-	return flushing;
-}
-
-/*
- * try to invalidate mapping pages without blocking.
- */
-static int try_nonblocking_invalidate(struct inode *inode)
-{
-	struct ceph_inode_info *ci = ceph_inode(inode);
-	u32 invalidating_gen = ci->i_rdcache_gen;
-
-	spin_unlock(&ci->i_ceph_lock);
-	invalidate_mapping_pages(&inode->i_data, 0, -1);
-	spin_lock(&ci->i_ceph_lock);
-
-	if (inode->i_data.nrpages == 0 &&
-	    invalidating_gen == ci->i_rdcache_gen) {
-		/* success. */
-		dout("try_nonblocking_invalidate %p success\n", inode);
-		/* save any racing async invalidate some trouble */
-		ci->i_rdcache_revoking = ci->i_rdcache_gen - 1;
-		return 0;
-	}
-	dout("try_nonblocking_invalidate %p failed\n", inode);
-	return -1;
-}
-
-/*
- * Swiss army knife function to examine currently used and wanted
- * versus held caps.  Release, flush, ack revoked caps to mds as
- * appropriate.
- *
- *  CHECK_CAPS_NODELAY - caller is delayed work and we should not delay
- *    cap release further.
- *  CHECK_CAPS_AUTHONLY - we should only check the auth cap
- *  CHECK_CAPS_FLUSH - we should flush any dirty caps immediately, without
- *    further delay.
- */
-void ceph_check_caps(struct ceph_inode_info *ci, int flags,
-		     struct ceph_mds_session *session)
-{
-	struct ceph_fs_client *fsc = ceph_inode_to_client(&ci->vfs_inode);
-	struct ceph_mds_client *mdsc = fsc->mdsc;
-	struct inode *inode = &ci->vfs_inode;
-	struct ceph_cap *cap;
-	int file_wanted, used;
-	int took_snap_rwsem = 0;             /* true if mdsc->snap_rwsem held */
-	int issued, implemented, want, retain, revoking, flushing = 0;
-	int mds = -1;   /* keep track of how far we've gone through i_caps list
-			   to avoid an infinite loop on retry */
-	struct rb_node *p;
-	int tried_invalidate = 0;
-	int delayed = 0, sent = 0, force_requeue = 0, num;
-	int queue_invalidate = 0;
-	int is_delayed = flags & CHECK_CAPS_NODELAY;
-
-	/* if we are unmounting, flush any unused caps immediately. */
-	if (mdsc->stopping)
-		is_delayed = 1;
-
-	spin_lock(&ci->i_ceph_lock);
-
-	if (ci->i_ceph_flags & CEPH_I_FLUSH)
-		flags |= CHECK_CAPS_FLUSH;
-
-	/* flush snaps first time around only */
-	if (!list_empty(&ci->i_cap_snaps))
-		__ceph_flush_snaps(ci, &session, 0);
-	goto retry_locked;
-retry:
-	spin_lock(&ci->i_ceph_lock);
-retry_locked:
-	file_wanted = __ceph_caps_file_wanted(ci);
-	used = __ceph_caps_used(ci);
-	want = file_wanted | used;
-	issued = __ceph_caps_issued(ci, &implemented);
-	revoking = implemented & ~issued;
-
-	retain = want | CEPH_CAP_PIN;
-	if (!mdsc->stopping && inode->i_nlink > 0) {
-		if (want) {
-			retain |= CEPH_CAP_ANY;       /* be greedy */
-		} else {
-			retain |= CEPH_CAP_ANY_SHARED;
-			/*
-			 * keep RD only if we didn't have the file open RW,
-			 * because then the mds would revoke it anyway to
-			 * journal max_size=0.
-			 */
-			if (ci->i_max_size == 0)
-				retain |= CEPH_CAP_ANY_RD;
-		}
-	}
-
-	dout("check_caps %p file_want %s used %s dirty %s flushing %s"
-	     " issued %s revoking %s retain %s %s%s%s\n", inode,
-	     ceph_cap_string(file_wanted),
-	     ceph_cap_string(used), ceph_cap_string(ci->i_dirty_caps),
-	     ceph_cap_string(ci->i_flushing_caps),
-	     ceph_cap_string(issued), ceph_cap_string(revoking),
-	     ceph_cap_string(retain),
-	     (flags & CHECK_CAPS_AUTHONLY) ? " AUTHONLY" : "",
-	     (flags & CHECK_CAPS_NODELAY) ? " NODELAY" : "",
-	     (flags & CHECK_CAPS_FLUSH) ? " FLUSH" : "");
-
-	/*
-	 * If we no longer need to hold onto old our caps, and we may
-	 * have cached pages, but don't want them, then try to invalidate.
-	 * If we fail, it's because pages are locked.... try again later.
-	 */
-	if ((!is_delayed || mdsc->stopping) &&
-	    ci->i_wrbuffer_ref == 0 &&               /* no dirty pages... */
-	    inode->i_data.nrpages &&                 /* have cached pages */
-	    (file_wanted == 0 ||                     /* no open files */
-	     (revoking & (CEPH_CAP_FILE_CACHE|
-			  CEPH_CAP_FILE_LAZYIO))) && /*  or revoking cache */
-	    !tried_invalidate) {
-		dout("check_caps trying to invalidate on %p\n", inode);
-		if (try_nonblocking_invalidate(inode) < 0) {
-			if (revoking & (CEPH_CAP_FILE_CACHE|
-					CEPH_CAP_FILE_LAZYIO)) {
-				dout("check_caps queuing invalidate\n");
-				queue_invalidate = 1;
-				ci->i_rdcache_revoking = ci->i_rdcache_gen;
-			} else {
-				dout("check_caps failed to invalidate pages\n");
-				/* we failed to invalidate pages.  check these
-				   caps again later. */
-				force_requeue = 1;
-				__cap_set_timeouts(mdsc, ci);
-			}
-		}
-		tried_invalidate = 1;
-		goto retry_locked;
-	}
-
-	num = 0;
-	for (p = rb_first(&ci->i_caps); p; p = rb_next(p)) {
-		cap = rb_entry(p, struct ceph_cap, ci_node);
-		num++;
-
-		/* avoid looping forever */
-		if (mds >= cap->mds ||
-		    ((flags & CHECK_CAPS_AUTHONLY) && cap != ci->i_auth_cap))
-			continue;
-
-		/* NOTE: no side-effects allowed, until we take s_mutex */
-
-		revoking = cap->implemented & ~cap->issued;
-		dout(" mds%d cap %p issued %s implemented %s revoking %s\n",
-		     cap->mds, cap, ceph_cap_string(cap->issued),
-		     ceph_cap_string(cap->implemented),
-		     ceph_cap_string(revoking));
-
-		if (cap == ci->i_auth_cap &&
-		    (cap->issued & CEPH_CAP_FILE_WR)) {
-			/* request larger max_size from MDS? */
-			if (ci->i_wanted_max_size > ci->i_max_size &&
-			    ci->i_wanted_max_size > ci->i_requested_max_size) {
-				dout("requesting new max_size\n");
-				goto ack;
-			}
-
-			/* approaching file_max? */
-			if ((inode->i_size << 1) >= ci->i_max_size &&
-			    (ci->i_reported_size << 1) < ci->i_max_size) {
-				dout("i_size approaching max_size\n");
-				goto ack;
-			}
-		}
-		/* flush anything dirty? */
-		if (cap == ci->i_auth_cap && (flags & CHECK_CAPS_FLUSH) &&
-		    ci->i_dirty_caps) {
-			dout("flushing dirty caps\n");
-			goto ack;
-		}
-
-		/* completed revocation? going down and there are no caps? */
-		if (revoking && (revoking & used) == 0) {
-			dout("completed revocation of %s\n",
-			     ceph_cap_string(cap->implemented & ~cap->issued));
-			goto ack;
-		}
-
-		/* want more caps from mds? */
-		if (want & ~(cap->mds_wanted | cap->issued))
-			goto ack;
-
-		/* things we might delay */
-		if ((cap->issued & ~retain) == 0 &&
-		    cap->mds_wanted == want)
-			continue;     /* nope, all good */
-
-		if (is_delayed)
-			goto ack;
-
-		/* delay? */
-		if ((ci->i_ceph_flags & CEPH_I_NODELAY) == 0 &&
-		    time_before(jiffies, ci->i_hold_caps_max)) {
-			dout(" delaying issued %s -> %s, wanted %s -> %s\n",
-			     ceph_cap_string(cap->issued),
-			     ceph_cap_string(cap->issued & retain),
-			     ceph_cap_string(cap->mds_wanted),
-			     ceph_cap_string(want));
-			delayed++;
-			continue;
-		}
-
-ack:
-		if (ci->i_ceph_flags & CEPH_I_NOFLUSH) {
-			dout(" skipping %p I_NOFLUSH set\n", inode);
-			continue;
-		}
-
-		if (session && session != cap->session) {
-			dout("oops, wrong session %p mutex\n", session);
-			mutex_unlock(&session->s_mutex);
-			session = NULL;
-		}
-		if (!session) {
-			session = cap->session;
-			if (mutex_trylock(&session->s_mutex) == 0) {
-				dout("inverting session/ino locks on %p\n",
-				     session);
-				spin_unlock(&ci->i_ceph_lock);
-				if (took_snap_rwsem) {
-					up_read(&mdsc->snap_rwsem);
-					took_snap_rwsem = 0;
-				}
-				mutex_lock(&session->s_mutex);
-				goto retry;
-			}
-		}
-		/* take snap_rwsem after session mutex */
-		if (!took_snap_rwsem) {
-			if (down_read_trylock(&mdsc->snap_rwsem) == 0) {
-				dout("inverting snap/in locks on %p\n",
-				     inode);
-				spin_unlock(&ci->i_ceph_lock);
-				down_read(&mdsc->snap_rwsem);
-				took_snap_rwsem = 1;
-				goto retry;
-			}
-			took_snap_rwsem = 1;
-		}
-
-		if (cap == ci->i_auth_cap && ci->i_dirty_caps)
-			flushing = __mark_caps_flushing(inode, session);
-		else
-			flushing = 0;
-
-		mds = cap->mds;  /* remember mds, so we don't repeat */
-		sent++;
-
-		/* __send_cap drops i_ceph_lock */
-		delayed += __send_cap(mdsc, cap, CEPH_CAP_OP_UPDATE, used, want,
-				      retain, flushing, NULL);
-		goto retry; /* retake i_ceph_lock and restart our cap scan. */
-	}
-
-	/*
-	 * Reschedule delayed caps release if we delayed anything,
-	 * otherwise cancel.
-	 */
-	if (delayed && is_delayed)
-		force_requeue = 1;   /* __send_cap delayed release; requeue */
-	if (!delayed && !is_delayed)
-		__cap_delay_cancel(mdsc, ci);
-	else if (!is_delayed || force_requeue)
-		__cap_delay_requeue(mdsc, ci);
-
-	spin_unlock(&ci->i_ceph_lock);
-
-	if (queue_invalidate)
-		ceph_queue_invalidate(inode);
-
-	if (session)
-		mutex_unlock(&session->s_mutex);
-	if (took_snap_rwsem)
-		up_read(&mdsc->snap_rwsem);
-}
-
-/*
- * Try to flush dirty caps back to the auth mds.
- */
-static int try_flush_caps(struct inode *inode, struct ceph_mds_session *session,
-			  unsigned *flush_tid)
-{
-	struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
-	struct ceph_inode_info *ci = ceph_inode(inode);
-	int unlock_session = session ? 0 : 1;
-	int flushing = 0;
-
-retry:
-	spin_lock(&ci->i_ceph_lock);
-	if (ci->i_ceph_flags & CEPH_I_NOFLUSH) {
-		dout("try_flush_caps skipping %p I_NOFLUSH set\n", inode);
-		goto out;
-	}
-	if (ci->i_dirty_caps && ci->i_auth_cap) {
-		struct ceph_cap *cap = ci->i_auth_cap;
-		int used = __ceph_caps_used(ci);
-		int want = __ceph_caps_wanted(ci);
-		int delayed;
-
-		if (!session) {
-			spin_unlock(&ci->i_ceph_lock);
-			session = cap->session;
-			mutex_lock(&session->s_mutex);
-			goto retry;
-		}
-		BUG_ON(session != cap->session);
-		if (cap->session->s_state < CEPH_MDS_SESSION_OPEN)
-			goto out;
-
-		flushing = __mark_caps_flushing(inode, session);
-
-		/* __send_cap drops i_ceph_lock */
-		delayed = __send_cap(mdsc, cap, CEPH_CAP_OP_FLUSH, used, want,
-				     cap->issued | cap->implemented, flushing,
-				     flush_tid);
-		if (!delayed)
-			goto out_unlocked;
-
-		spin_lock(&ci->i_ceph_lock);
-		__cap_delay_requeue(mdsc, ci);
-	}
-out:
-	spin_unlock(&ci->i_ceph_lock);
-out_unlocked:
-	if (session && unlock_session)
-		mutex_unlock(&session->s_mutex);
-	return flushing;
-}
-
-/*
- * Return true if we've flushed caps through the given flush_tid.
- */
-static int caps_are_flushed(struct inode *inode, unsigned tid)
-{
-	struct ceph_inode_info *ci = ceph_inode(inode);
-	int i, ret = 1;
-
-	spin_lock(&ci->i_ceph_lock);
-	for (i = 0; i < CEPH_CAP_BITS; i++)
-		if ((ci->i_flushing_caps & (1 << i)) &&
-		    ci->i_cap_flush_tid[i] <= tid) {
-			/* still flushing this bit */
-			ret = 0;
-			break;
-		}
-	spin_unlock(&ci->i_ceph_lock);
-	return ret;
-}
-
-/*
- * Wait on any unsafe replies for the given inode.  First wait on the
- * newest request, and make that the upper bound.  Then, if there are
- * more requests, keep waiting on the oldest as long as it is still older
- * than the original request.
- */
-static void sync_write_wait(struct inode *inode)
-{
-	struct ceph_inode_info *ci = ceph_inode(inode);
-	struct list_head *head = &ci->i_unsafe_writes;
-	struct ceph_osd_request *req;
-	u64 last_tid;
-
-	spin_lock(&ci->i_unsafe_lock);
-	if (list_empty(head))
-		goto out;
-
-	/* set upper bound as _last_ entry in chain */
-	req = list_entry(head->prev, struct ceph_osd_request,
-			 r_unsafe_item);
-	last_tid = req->r_tid;
-
-	do {
-		ceph_osdc_get_request(req);
-		spin_unlock(&ci->i_unsafe_lock);
-		dout("sync_write_wait on tid %llu (until %llu)\n",
-		     req->r_tid, last_tid);
-		wait_for_completion(&req->r_safe_completion);
-		spin_lock(&ci->i_unsafe_lock);
-		ceph_osdc_put_request(req);
-
-		/*
-		 * from here on look at first entry in chain, since we
-		 * only want to wait for anything older than last_tid
-		 */
-		if (list_empty(head))
-			break;
-		req = list_entry(head->next, struct ceph_osd_request,
-				 r_unsafe_item);
-	} while (req->r_tid < last_tid);
-out:
-	spin_unlock(&ci->i_unsafe_lock);
-}
-
-int ceph_fsync(struct file *file, loff_t start, loff_t end, int datasync)
-{
-	struct inode *inode = file->f_mapping->host;
-	struct ceph_inode_info *ci = ceph_inode(inode);
-	unsigned flush_tid;
-	int ret;
-	int dirty;
-
-	dout("fsync %p%s\n", inode, datasync ? " datasync" : "");
-	sync_write_wait(inode);
-
-	ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
-	if (ret < 0)
-		return ret;
-	mutex_lock(&inode->i_mutex);
-
-	dirty = try_flush_caps(inode, NULL, &flush_tid);
-	dout("fsync dirty caps are %s\n", ceph_cap_string(dirty));
-
-	/*
-	 * only wait on non-file metadata writeback (the mds
-	 * can recover size and mtime, so we don't need to
-	 * wait for that)
-	 */
-	if (!datasync && (dirty & ~CEPH_CAP_ANY_FILE_WR)) {
-		dout("fsync waiting for flush_tid %u\n", flush_tid);
-		ret = wait_event_interruptible(ci->i_cap_wq,
-				       caps_are_flushed(inode, flush_tid));
-	}
-
-	dout("fsync %p%s done\n", inode, datasync ? " datasync" : "");
-	mutex_unlock(&inode->i_mutex);
-	return ret;
-}
-
-/*
- * Flush any dirty caps back to the mds.  If we aren't asked to wait,
- * queue inode for flush but don't do so immediately, because we can
- * get by with fewer MDS messages if we wait for data writeback to
- * complete first.
- */
-int ceph_write_inode(struct inode *inode, struct writeback_control *wbc)
-{
-	struct ceph_inode_info *ci = ceph_inode(inode);
-	unsigned flush_tid;
-	int err = 0;
-	int dirty;
-	int wait = wbc->sync_mode == WB_SYNC_ALL;
-
-	dout("write_inode %p wait=%d\n", inode, wait);
-	if (wait) {
-		dirty = try_flush_caps(inode, NULL, &flush_tid);
-		if (dirty)
-			err = wait_event_interruptible(ci->i_cap_wq,
-				       caps_are_flushed(inode, flush_tid));
-	} else {
-		struct ceph_mds_client *mdsc =
-			ceph_sb_to_client(inode->i_sb)->mdsc;
-
-		spin_lock(&ci->i_ceph_lock);
-		if (__ceph_caps_dirty(ci))
-			__cap_delay_requeue_front(mdsc, ci);
-		spin_unlock(&ci->i_ceph_lock);
-	}
-	return err;
-}
-
-/*
- * After a recovering MDS goes active, we need to resend any caps
- * we were flushing.
- *
- * Caller holds session->s_mutex.
- */
-static void kick_flushing_capsnaps(struct ceph_mds_client *mdsc,
-				   struct ceph_mds_session *session)
-{
-	struct ceph_cap_snap *capsnap;
-
-	dout("kick_flushing_capsnaps mds%d\n", session->s_mds);
-	list_for_each_entry(capsnap, &session->s_cap_snaps_flushing,
-			    flushing_item) {
-		struct ceph_inode_info *ci = capsnap->ci;
-		struct inode *inode = &ci->vfs_inode;
-		struct ceph_cap *cap;
-
-		spin_lock(&ci->i_ceph_lock);
-		cap = ci->i_auth_cap;
-		if (cap && cap->session == session) {
-			dout("kick_flushing_caps %p cap %p capsnap %p\n", inode,
-			     cap, capsnap);
-			__ceph_flush_snaps(ci, &session, 1);
-		} else {
-			pr_err("%p auth cap %p not mds%d ???\n", inode,
-			       cap, session->s_mds);
-		}
-		spin_unlock(&ci->i_ceph_lock);
-	}
-}
-
-void ceph_kick_flushing_caps(struct ceph_mds_client *mdsc,
-			     struct ceph_mds_session *session)
-{
-	struct ceph_inode_info *ci;
-
-	kick_flushing_capsnaps(mdsc, session);
-
-	dout("kick_flushing_caps mds%d\n", session->s_mds);
-	list_for_each_entry(ci, &session->s_cap_flushing, i_flushing_item) {
-		struct inode *inode = &ci->vfs_inode;
-		struct ceph_cap *cap;
-		int delayed = 0;
-
-		spin_lock(&ci->i_ceph_lock);
-		cap = ci->i_auth_cap;
-		if (cap && cap->session == session) {
-			dout("kick_flushing_caps %p cap %p %s\n", inode,
-			     cap, ceph_cap_string(ci->i_flushing_caps));
-			delayed = __send_cap(mdsc, cap, CEPH_CAP_OP_FLUSH,
-					     __ceph_caps_used(ci),
-					     __ceph_caps_wanted(ci),
-					     cap->issued | cap->implemented,
-					     ci->i_flushing_caps, NULL);
-			if (delayed) {
-				spin_lock(&ci->i_ceph_lock);
-				__cap_delay_requeue(mdsc, ci);
-				spin_unlock(&ci->i_ceph_lock);
-			}
-		} else {
-			pr_err("%p auth cap %p not mds%d ???\n", inode,
-			       cap, session->s_mds);
-			spin_unlock(&ci->i_ceph_lock);
-		}
-	}
-}
-
-static void kick_flushing_inode_caps(struct ceph_mds_client *mdsc,
-				     struct ceph_mds_session *session,
-				     struct inode *inode)
-{
-	struct ceph_inode_info *ci = ceph_inode(inode);
-	struct ceph_cap *cap;
-	int delayed = 0;
-
-	spin_lock(&ci->i_ceph_lock);
-	cap = ci->i_auth_cap;
-	dout("kick_flushing_inode_caps %p flushing %s flush_seq %lld\n", inode,
-	     ceph_cap_string(ci->i_flushing_caps), ci->i_cap_flush_seq);
-	__ceph_flush_snaps(ci, &session, 1);
-	if (ci->i_flushing_caps) {
-		delayed = __send_cap(mdsc, cap, CEPH_CAP_OP_FLUSH,
-				     __ceph_caps_used(ci),
-				     __ceph_caps_wanted(ci),
-				     cap->issued | cap->implemented,
-				     ci->i_flushing_caps, NULL);
-		if (delayed) {
-			spin_lock(&ci->i_ceph_lock);
-			__cap_delay_requeue(mdsc, ci);
-			spin_unlock(&ci->i_ceph_lock);
-		}
-	} else {
-		spin_unlock(&ci->i_ceph_lock);
-	}
-}
-
-
-/*
- * Take references to capabilities we hold, so that we don't release
- * them to the MDS prematurely.
- *
- * Protected by i_ceph_lock.
- */
-static void __take_cap_refs(struct ceph_inode_info *ci, int got)
-{
-	if (got & CEPH_CAP_PIN)
-		ci->i_pin_ref++;
-	if (got & CEPH_CAP_FILE_RD)
-		ci->i_rd_ref++;
-	if (got & CEPH_CAP_FILE_CACHE)
-		ci->i_rdcache_ref++;
-	if (got & CEPH_CAP_FILE_WR)
-		ci->i_wr_ref++;
-	if (got & CEPH_CAP_FILE_BUFFER) {
-		if (ci->i_wb_ref == 0)
-			ihold(&ci->vfs_inode);
-		ci->i_wb_ref++;
-		dout("__take_cap_refs %p wb %d -> %d (?)\n",
-		     &ci->vfs_inode, ci->i_wb_ref-1, ci->i_wb_ref);
-	}
-}
-
-/*
- * Try to grab cap references.  Specify those refs we @want, and the
- * minimal set we @need.  Also include the larger offset we are writing
- * to (when applicable), and check against max_size here as well.
- * Note that caller is responsible for ensuring max_size increases are
- * requested from the MDS.
- */
-static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want,
-			    int *got, loff_t endoff, int *check_max, int *err)
-{
-	struct inode *inode = &ci->vfs_inode;
-	int ret = 0;
-	int have, implemented;
-	int file_wanted;
-
-	dout("get_cap_refs %p need %s want %s\n", inode,
-	     ceph_cap_string(need), ceph_cap_string(want));
-	spin_lock(&ci->i_ceph_lock);
-
-	/* make sure file is actually open */
-	file_wanted = __ceph_caps_file_wanted(ci);
-	if ((file_wanted & need) == 0) {
-		dout("try_get_cap_refs need %s file_wanted %s, EBADF\n",
-		     ceph_cap_string(need), ceph_cap_string(file_wanted));
-		*err = -EBADF;
-		ret = 1;
-		goto out;
-	}
-
-	if (need & CEPH_CAP_FILE_WR) {
-		if (endoff >= 0 && endoff > (loff_t)ci->i_max_size) {
-			dout("get_cap_refs %p endoff %llu > maxsize %llu\n",
-			     inode, endoff, ci->i_max_size);
-			if (endoff > ci->i_wanted_max_size) {
-				*check_max = 1;
-				ret = 1;
-			}
-			goto out;
-		}
-		/*
-		 * If a sync write is in progress, we must wait, so that we
-		 * can get a final snapshot value for size+mtime.
-		 */
-		if (__ceph_have_pending_cap_snap(ci)) {
-			dout("get_cap_refs %p cap_snap_pending\n", inode);
-			goto out;
-		}
-	}
-	have = __ceph_caps_issued(ci, &implemented);
-
-	/*
-	 * disallow writes while a truncate is pending
-	 */
-	if (ci->i_truncate_pending)
-		have &= ~CEPH_CAP_FILE_WR;
-
-	if ((have & need) == need) {
-		/*
-		 * Look at (implemented & ~have & not) so that we keep waiting
-		 * on transition from wanted -> needed caps.  This is needed
-		 * for WRBUFFER|WR -> WR to avoid a new WR sync write from
-		 * going before a prior buffered writeback happens.
-		 */
-		int not = want & ~(have & need);
-		int revoking = implemented & ~have;
-		dout("get_cap_refs %p have %s but not %s (revoking %s)\n",
-		     inode, ceph_cap_string(have), ceph_cap_string(not),
-		     ceph_cap_string(revoking));
-		if ((revoking & not) == 0) {
-			*got = need | (have & want);
-			__take_cap_refs(ci, *got);
-			ret = 1;
-		}
-	} else {
-		dout("get_cap_refs %p have %s needed %s\n", inode,
-		     ceph_cap_string(have), ceph_cap_string(need));
-	}
-out:
-	spin_unlock(&ci->i_ceph_lock);
-	dout("get_cap_refs %p ret %d got %s\n", inode,
-	     ret, ceph_cap_string(*got));
-	return ret;
-}
-
-/*
- * Check the offset we are writing up to against our current
- * max_size.  If necessary, tell the MDS we want to write to
- * a larger offset.
- */
-static void check_max_size(struct inode *inode, loff_t endoff)
-{
-	struct ceph_inode_info *ci = ceph_inode(inode);
-	int check = 0;
-
-	/* do we need to explicitly request a larger max_size? */
-	spin_lock(&ci->i_ceph_lock);
-	if ((endoff >= ci->i_max_size ||
-	     endoff > (inode->i_size << 1)) &&
-	    endoff > ci->i_wanted_max_size) {
-		dout("write %p at large endoff %llu, req max_size\n",
-		     inode, endoff);
-		ci->i_wanted_max_size = endoff;
-		check = 1;
-	}
-	spin_unlock(&ci->i_ceph_lock);
-	if (check)
-		ceph_check_caps(ci, CHECK_CAPS_AUTHONLY, NULL);
-}
-
-/*
- * Wait for caps, and take cap references.  If we can't get a WR cap
- * due to a small max_size, make sure we check_max_size (and possibly
- * ask the mds) so we don't get hung up indefinitely.
- */
-int ceph_get_caps(struct ceph_inode_info *ci, int need, int want, int *got,
-		  loff_t endoff)
-{
-	int check_max, ret, err;
-
-retry:
-	if (endoff > 0)
-		check_max_size(&ci->vfs_inode, endoff);
-	check_max = 0;
-	err = 0;
-	ret = wait_event_interruptible(ci->i_cap_wq,
-				       try_get_cap_refs(ci, need, want,
-							got, endoff,
-							&check_max, &err));
-	if (err)
-		ret = err;
-	if (check_max)
-		goto retry;
-	return ret;
-}
-
-/*
- * Take cap refs.  Caller must already know we hold at least one ref
- * on the caps in question or we don't know this is safe.
- */
-void ceph_get_cap_refs(struct ceph_inode_info *ci, int caps)
-{
-	spin_lock(&ci->i_ceph_lock);
-	__take_cap_refs(ci, caps);
-	spin_unlock(&ci->i_ceph_lock);
-}
-
-/*
- * Release cap refs.
- *
- * If we released the last ref on any given cap, call ceph_check_caps
- * to release (or schedule a release).
- *
- * If we are releasing a WR cap (from a sync write), finalize any affected
- * cap_snap, and wake up any waiters.
- */
-void ceph_put_cap_refs(struct ceph_inode_info *ci, int had)
-{
-	struct inode *inode = &ci->vfs_inode;
-	int last = 0, put = 0, flushsnaps = 0, wake = 0;
-	struct ceph_cap_snap *capsnap;
-
-	spin_lock(&ci->i_ceph_lock);
-	if (had & CEPH_CAP_PIN)
-		--ci->i_pin_ref;
-	if (had & CEPH_CAP_FILE_RD)
-		if (--ci->i_rd_ref == 0)
-			last++;
-	if (had & CEPH_CAP_FILE_CACHE)
-		if (--ci->i_rdcache_ref == 0)
-			last++;
-	if (had & CEPH_CAP_FILE_BUFFER) {
-		if (--ci->i_wb_ref == 0) {
-			last++;
-			put++;
-		}
-		dout("put_cap_refs %p wb %d -> %d (?)\n",
-		     inode, ci->i_wb_ref+1, ci->i_wb_ref);
-	}
-	if (had & CEPH_CAP_FILE_WR)
-		if (--ci->i_wr_ref == 0) {
-			last++;
-			if (!list_empty(&ci->i_cap_snaps)) {
-				capsnap = list_first_entry(&ci->i_cap_snaps,
-						     struct ceph_cap_snap,
-						     ci_item);
-				if (capsnap->writing) {
-					capsnap->writing = 0;
-					flushsnaps =
-						__ceph_finish_cap_snap(ci,
-								       capsnap);
-					wake = 1;
-				}
-			}
-		}
-	spin_unlock(&ci->i_ceph_lock);
-
-	dout("put_cap_refs %p had %s%s%s\n", inode, ceph_cap_string(had),
-	     last ? " last" : "", put ? " put" : "");
-
-	if (last && !flushsnaps)
-		ceph_check_caps(ci, 0, NULL);
-	else if (flushsnaps)
-		ceph_flush_snaps(ci);
-	if (wake)
-		wake_up_all(&ci->i_cap_wq);
-	if (put)
-		iput(inode);
-}
-
-/*
- * Release @nr WRBUFFER refs on dirty pages for the given @snapc snap
- * context.  Adjust per-snap dirty page accounting as appropriate.
- * Once all dirty data for a cap_snap is flushed, flush snapped file
- * metadata back to the MDS.  If we dropped the last ref, call
- * ceph_check_caps.
- */
-void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
-				struct ceph_snap_context *snapc)
-{
-	struct inode *inode = &ci->vfs_inode;
-	int last = 0;
-	int complete_capsnap = 0;
-	int drop_capsnap = 0;
-	int found = 0;
-	struct ceph_cap_snap *capsnap = NULL;
-
-	spin_lock(&ci->i_ceph_lock);
-	ci->i_wrbuffer_ref -= nr;
-	last = !ci->i_wrbuffer_ref;
-
-	if (ci->i_head_snapc == snapc) {
-		ci->i_wrbuffer_ref_head -= nr;
-		if (ci->i_wrbuffer_ref_head == 0 &&
-		    ci->i_dirty_caps == 0 && ci->i_flushing_caps == 0) {
-			BUG_ON(!ci->i_head_snapc);
-			ceph_put_snap_context(ci->i_head_snapc);
-			ci->i_head_snapc = NULL;
-		}
-		dout("put_wrbuffer_cap_refs on %p head %d/%d -> %d/%d %s\n",
-		     inode,
-		     ci->i_wrbuffer_ref+nr, ci->i_wrbuffer_ref_head+nr,
-		     ci->i_wrbuffer_ref, ci->i_wrbuffer_ref_head,
-		     last ? " LAST" : "");
-	} else {
-		list_for_each_entry(capsnap, &ci->i_cap_snaps, ci_item) {
-			if (capsnap->context == snapc) {
-				found = 1;
-				break;
-			}
-		}
-		BUG_ON(!found);
-		capsnap->dirty_pages -= nr;
-		if (capsnap->dirty_pages == 0) {
-			complete_capsnap = 1;
-			if (capsnap->dirty == 0)
-				/* cap writeback completed before we created
-				 * the cap_snap; no FLUSHSNAP is needed */
-				drop_capsnap = 1;
-		}
-		dout("put_wrbuffer_cap_refs on %p cap_snap %p "
-		     " snap %lld %d/%d -> %d/%d %s%s%s\n",
-		     inode, capsnap, capsnap->context->seq,
-		     ci->i_wrbuffer_ref+nr, capsnap->dirty_pages + nr,
-		     ci->i_wrbuffer_ref, capsnap->dirty_pages,
-		     last ? " (wrbuffer last)" : "",
-		     complete_capsnap ? " (complete capsnap)" : "",
-		     drop_capsnap ? " (drop capsnap)" : "");
-		if (drop_capsnap) {
-			ceph_put_snap_context(capsnap->context);
-			list_del(&capsnap->ci_item);
-			list_del(&capsnap->flushing_item);
-			ceph_put_cap_snap(capsnap);
-		}
-	}
-
-	spin_unlock(&ci->i_ceph_lock);
-
-	if (last) {
-		ceph_check_caps(ci, CHECK_CAPS_AUTHONLY, NULL);
-		iput(inode);
-	} else if (complete_capsnap) {
-		ceph_flush_snaps(ci);
-		wake_up_all(&ci->i_cap_wq);
-	}
-	if (drop_capsnap)
-		iput(inode);
-}
-
-/*
- * Handle a cap GRANT message from the MDS.  (Note that a GRANT may
- * actually be a revocation if it specifies a smaller cap set.)
- *
- * caller holds s_mutex and i_ceph_lock, we drop both.
- *
- * return value:
- *  0 - ok
- *  1 - check_caps on auth cap only (writeback)
- *  2 - check_caps (ack revoke)
- */
-static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
-			     struct ceph_mds_session *session,
-			     struct ceph_cap *cap,
-			     struct ceph_buffer *xattr_buf)
-		__releases(ci->i_ceph_lock)
-{
-	struct ceph_inode_info *ci = ceph_inode(inode);
-	int mds = session->s_mds;
-	int seq = le32_to_cpu(grant->seq);
-	int newcaps = le32_to_cpu(grant->caps);
-	int issued, implemented, used, wanted, dirty;
-	u64 size = le64_to_cpu(grant->size);
-	u64 max_size = le64_to_cpu(grant->max_size);
-	struct timespec mtime, atime, ctime;
-	int check_caps = 0;
-	int wake = 0;
-	int writeback = 0;
-	int revoked_rdcache = 0;
-	int queue_invalidate = 0;
-
-	dout("handle_cap_grant inode %p cap %p mds%d seq %d %s\n",
-	     inode, cap, mds, seq, ceph_cap_string(newcaps));
-	dout(" size %llu max_size %llu, i_size %llu\n", size, max_size,
-		inode->i_size);
-
-	/*
-	 * If CACHE is being revoked, and we have no dirty buffers,
-	 * try to invalidate (once).  (If there are dirty buffers, we
-	 * will invalidate _after_ writeback.)
-	 */
-	if (((cap->issued & ~newcaps) & CEPH_CAP_FILE_CACHE) &&
-	    (newcaps & CEPH_CAP_FILE_LAZYIO) == 0 &&
-	    !ci->i_wrbuffer_ref) {
-		if (try_nonblocking_invalidate(inode) == 0) {
-			revoked_rdcache = 1;
-		} else {
-			/* there were locked pages.. invalidate later
-			   in a separate thread. */
-			if (ci->i_rdcache_revoking != ci->i_rdcache_gen) {
-				queue_invalidate = 1;
-				ci->i_rdcache_revoking = ci->i_rdcache_gen;
-			}
-		}
-	}
-
-	/* side effects now are allowed */
-
-	issued = __ceph_caps_issued(ci, &implemented);
-	issued |= implemented | __ceph_caps_dirty(ci);
-
-	cap->cap_gen = session->s_cap_gen;
-
-	__check_cap_issue(ci, cap, newcaps);
-
-	if ((issued & CEPH_CAP_AUTH_EXCL) == 0) {
-		inode->i_mode = le32_to_cpu(grant->mode);
-		inode->i_uid = le32_to_cpu(grant->uid);
-		inode->i_gid = le32_to_cpu(grant->gid);
-		dout("%p mode 0%o uid.gid %d.%d\n", inode, inode->i_mode,
-		     inode->i_uid, inode->i_gid);
-	}
-
-	if ((issued & CEPH_CAP_LINK_EXCL) == 0)
-		set_nlink(inode, le32_to_cpu(grant->nlink));
-
-	if ((issued & CEPH_CAP_XATTR_EXCL) == 0 && grant->xattr_len) {
-		int len = le32_to_cpu(grant->xattr_len);
-		u64 version = le64_to_cpu(grant->xattr_version);
-
-		if (version > ci->i_xattrs.version) {
-			dout(" got new xattrs v%llu on %p len %d\n",
-			     version, inode, len);
-			if (ci->i_xattrs.blob)
-				ceph_buffer_put(ci->i_xattrs.blob);
-			ci->i_xattrs.blob = ceph_buffer_get(xattr_buf);
-			ci->i_xattrs.version = version;
-		}
-	}
-
-	/* size/ctime/mtime/atime? */
-	ceph_fill_file_size(inode, issued,
-			    le32_to_cpu(grant->truncate_seq),
-			    le64_to_cpu(grant->truncate_size), size);
-	ceph_decode_timespec(&mtime, &grant->mtime);
-	ceph_decode_timespec(&atime, &grant->atime);
-	ceph_decode_timespec(&ctime, &grant->ctime);
-	ceph_fill_file_time(inode, issued,
-			    le32_to_cpu(grant->time_warp_seq), &ctime, &mtime,
-			    &atime);
-
-	/* max size increase? */
-	if (max_size != ci->i_max_size) {
-		dout("max_size %lld -> %llu\n", ci->i_max_size, max_size);
-		ci->i_max_size = max_size;
-		if (max_size >= ci->i_wanted_max_size) {
-			ci->i_wanted_max_size = 0;  /* reset */
-			ci->i_requested_max_size = 0;
-		}
-		wake = 1;
-	}
-
-	/* check cap bits */
-	wanted = __ceph_caps_wanted(ci);
-	used = __ceph_caps_used(ci);
-	dirty = __ceph_caps_dirty(ci);
-	dout(" my wanted = %s, used = %s, dirty %s\n",
-	     ceph_cap_string(wanted),
-	     ceph_cap_string(used),
-	     ceph_cap_string(dirty));
-	if (wanted != le32_to_cpu(grant->wanted)) {
-		dout("mds wanted %s -> %s\n",
-		     ceph_cap_string(le32_to_cpu(grant->wanted)),
-		     ceph_cap_string(wanted));
-		grant->wanted = cpu_to_le32(wanted);
-	}
-
-	cap->seq = seq;
-
-	/* file layout may have changed */
-	ci->i_layout = grant->layout;
-
-	/* revocation, grant, or no-op? */
-	if (cap->issued & ~newcaps) {
-		int revoking = cap->issued & ~newcaps;
-
-		dout("revocation: %s -> %s (revoking %s)\n",
-		     ceph_cap_string(cap->issued),
-		     ceph_cap_string(newcaps),
-		     ceph_cap_string(revoking));
-		if (revoking & used & CEPH_CAP_FILE_BUFFER)
-			writeback = 1;  /* initiate writeback; will delay ack */
-		else if (revoking == CEPH_CAP_FILE_CACHE &&
-			 (newcaps & CEPH_CAP_FILE_LAZYIO) == 0 &&
-			 queue_invalidate)
-			; /* do nothing yet, invalidation will be queued */
-		else if (cap == ci->i_auth_cap)
-			check_caps = 1; /* check auth cap only */
-		else
-			check_caps = 2; /* check all caps */
-		cap->issued = newcaps;
-		cap->implemented |= newcaps;
-	} else if (cap->issued == newcaps) {
-		dout("caps unchanged: %s -> %s\n",
-		     ceph_cap_string(cap->issued), ceph_cap_string(newcaps));
-	} else {
-		dout("grant: %s -> %s\n", ceph_cap_string(cap->issued),
-		     ceph_cap_string(newcaps));
-		cap->issued = newcaps;
-		cap->implemented |= newcaps; /* add bits only, to
-					      * avoid stepping on a
-					      * pending revocation */
-		wake = 1;
-	}
-	BUG_ON(cap->issued & ~cap->implemented);
-
-	spin_unlock(&ci->i_ceph_lock);
-	if (writeback)
-		/*
-		 * queue inode for writeback: we can't actually call
-		 * filemap_write_and_wait, etc. from message handler
-		 * context.
-		 */
-		ceph_queue_writeback(inode);
-	if (queue_invalidate)
-		ceph_queue_invalidate(inode);
-	if (wake)
-		wake_up_all(&ci->i_cap_wq);
-
-	if (check_caps == 1)
-		ceph_check_caps(ci, CHECK_CAPS_NODELAY|CHECK_CAPS_AUTHONLY,
-				session);
-	else if (check_caps == 2)
-		ceph_check_caps(ci, CHECK_CAPS_NODELAY, session);
-	else
-		mutex_unlock(&session->s_mutex);
-}
-
-/*
- * Handle FLUSH_ACK from MDS, indicating that metadata we sent to the
- * MDS has been safely committed.
- */
-static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid,
-				 struct ceph_mds_caps *m,
-				 struct ceph_mds_session *session,
-				 struct ceph_cap *cap)
-	__releases(ci->i_ceph_lock)
-{
-	struct ceph_inode_info *ci = ceph_inode(inode);
-	struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
-	unsigned seq = le32_to_cpu(m->seq);
-	int dirty = le32_to_cpu(m->dirty);
-	int cleaned = 0;
-	int drop = 0;
-	int i;
-
-	for (i = 0; i < CEPH_CAP_BITS; i++)
-		if ((dirty & (1 << i)) &&
-		    flush_tid == ci->i_cap_flush_tid[i])
-			cleaned |= 1 << i;
-
-	dout("handle_cap_flush_ack inode %p mds%d seq %d on %s cleaned %s,"
-	     " flushing %s -> %s\n",
-	     inode, session->s_mds, seq, ceph_cap_string(dirty),
-	     ceph_cap_string(cleaned), ceph_cap_string(ci->i_flushing_caps),
-	     ceph_cap_string(ci->i_flushing_caps & ~cleaned));
-
-	if (ci->i_flushing_caps == (ci->i_flushing_caps & ~cleaned))
-		goto out;
-
-	ci->i_flushing_caps &= ~cleaned;
-
-	spin_lock(&mdsc->cap_dirty_lock);
-	if (ci->i_flushing_caps == 0) {
-		list_del_init(&ci->i_flushing_item);
-		if (!list_empty(&session->s_cap_flushing))
-			dout(" mds%d still flushing cap on %p\n",
-			     session->s_mds,
-			     &list_entry(session->s_cap_flushing.next,
-					 struct ceph_inode_info,
-					 i_flushing_item)->vfs_inode);
-		mdsc->num_cap_flushing--;
-		wake_up_all(&mdsc->cap_flushing_wq);
-		dout(" inode %p now !flushing\n", inode);
-
-		if (ci->i_dirty_caps == 0) {
-			dout(" inode %p now clean\n", inode);
-			BUG_ON(!list_empty(&ci->i_dirty_item));
-			drop = 1;
-			if (ci->i_wrbuffer_ref_head == 0) {
-				BUG_ON(!ci->i_head_snapc);
-				ceph_put_snap_context(ci->i_head_snapc);
-				ci->i_head_snapc = NULL;
-			}
-		} else {
-			BUG_ON(list_empty(&ci->i_dirty_item));
-		}
-	}
-	spin_unlock(&mdsc->cap_dirty_lock);
-	wake_up_all(&ci->i_cap_wq);
-
-out:
-	spin_unlock(&ci->i_ceph_lock);
-	if (drop)
-		iput(inode);
-}
-
-/*
- * Handle FLUSHSNAP_ACK.  MDS has flushed snap data to disk and we can
- * throw away our cap_snap.
- *
- * Caller hold s_mutex.
- */
-static void handle_cap_flushsnap_ack(struct inode *inode, u64 flush_tid,
-				     struct ceph_mds_caps *m,
-				     struct ceph_mds_session *session)
-{
-	struct ceph_inode_info *ci = ceph_inode(inode);
-	u64 follows = le64_to_cpu(m->snap_follows);
-	struct ceph_cap_snap *capsnap;
-	int drop = 0;
-
-	dout("handle_cap_flushsnap_ack inode %p ci %p mds%d follows %lld\n",
-	     inode, ci, session->s_mds, follows);
-
-	spin_lock(&ci->i_ceph_lock);
-	list_for_each_entry(capsnap, &ci->i_cap_snaps, ci_item) {
-		if (capsnap->follows == follows) {
-			if (capsnap->flush_tid != flush_tid) {
-				dout(" cap_snap %p follows %lld tid %lld !="
-				     " %lld\n", capsnap, follows,
-				     flush_tid, capsnap->flush_tid);
-				break;
-			}
-			WARN_ON(capsnap->dirty_pages || capsnap->writing);
-			dout(" removing %p cap_snap %p follows %lld\n",
-			     inode, capsnap, follows);
-			ceph_put_snap_context(capsnap->context);
-			list_del(&capsnap->ci_item);
-			list_del(&capsnap->flushing_item);
-			ceph_put_cap_snap(capsnap);
-			drop = 1;
-			break;
-		} else {
-			dout(" skipping cap_snap %p follows %lld\n",
-			     capsnap, capsnap->follows);
-		}
-	}
-	spin_unlock(&ci->i_ceph_lock);
-	if (drop)
-		iput(inode);
-}
-
-/*
- * Handle TRUNC from MDS, indicating file truncation.
- *
- * caller hold s_mutex.
- */
-static void handle_cap_trunc(struct inode *inode,
-			     struct ceph_mds_caps *trunc,
-			     struct ceph_mds_session *session)
-	__releases(ci->i_ceph_lock)
-{
-	struct ceph_inode_info *ci = ceph_inode(inode);
-	int mds = session->s_mds;
-	int seq = le32_to_cpu(trunc->seq);
-	u32 truncate_seq = le32_to_cpu(trunc->truncate_seq);
-	u64 truncate_size = le64_to_cpu(trunc->truncate_size);
-	u64 size = le64_to_cpu(trunc->size);
-	int implemented = 0;
-	int dirty = __ceph_caps_dirty(ci);
-	int issued = __ceph_caps_issued(ceph_inode(inode), &implemented);
-	int queue_trunc = 0;
-
-	issued |= implemented | dirty;
-
-	dout("handle_cap_trunc inode %p mds%d seq %d to %lld seq %d\n",
-	     inode, mds, seq, truncate_size, truncate_seq);
-	queue_trunc = ceph_fill_file_size(inode, issued,
-					  truncate_seq, truncate_size, size);
-	spin_unlock(&ci->i_ceph_lock);
-
-	if (queue_trunc)
-		ceph_queue_vmtruncate(inode);
-}
-
-/*
- * Handle EXPORT from MDS.  Cap is being migrated _from_ this mds to a
- * different one.  If we are the most recent migration we've seen (as
- * indicated by mseq), make note of the migrating cap bits for the
- * duration (until we see the corresponding IMPORT).
- *
- * caller holds s_mutex
- */
-static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex,
-			      struct ceph_mds_session *session,
-			      int *open_target_sessions)
-{
-	struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
-	struct ceph_inode_info *ci = ceph_inode(inode);
-	int mds = session->s_mds;
-	unsigned mseq = le32_to_cpu(ex->migrate_seq);
-	struct ceph_cap *cap = NULL, *t;
-	struct rb_node *p;
-	int remember = 1;
-
-	dout("handle_cap_export inode %p ci %p mds%d mseq %d\n",
-	     inode, ci, mds, mseq);
-
-	spin_lock(&ci->i_ceph_lock);
-
-	/* make sure we haven't seen a higher mseq */
-	for (p = rb_first(&ci->i_caps); p; p = rb_next(p)) {
-		t = rb_entry(p, struct ceph_cap, ci_node);
-		if (ceph_seq_cmp(t->mseq, mseq) > 0) {
-			dout(" higher mseq on cap from mds%d\n",
-			     t->session->s_mds);
-			remember = 0;
-		}
-		if (t->session->s_mds == mds)
-			cap = t;
-	}
-
-	if (cap) {
-		if (remember) {
-			/* make note */
-			ci->i_cap_exporting_mds = mds;
-			ci->i_cap_exporting_mseq = mseq;
-			ci->i_cap_exporting_issued = cap->issued;
-
-			/*
-			 * make sure we have open sessions with all possible
-			 * export targets, so that we get the matching IMPORT
-			 */
-			*open_target_sessions = 1;
-
-			/*
-			 * we can't flush dirty caps that we've seen the
-			 * EXPORT but no IMPORT for
-			 */
-			spin_lock(&mdsc->cap_dirty_lock);
-			if (!list_empty(&ci->i_dirty_item)) {
-				dout(" moving %p to cap_dirty_migrating\n",
-				     inode);
-				list_move(&ci->i_dirty_item,
-					  &mdsc->cap_dirty_migrating);
-			}
-			spin_unlock(&mdsc->cap_dirty_lock);
-		}
-		__ceph_remove_cap(cap);
-	}
-	/* else, we already released it */
-
-	spin_unlock(&ci->i_ceph_lock);
-}
-
-/*
- * Handle cap IMPORT.  If there are temp bits from an older EXPORT,
- * clean them up.
- *
- * caller holds s_mutex.
- */
-static void handle_cap_import(struct ceph_mds_client *mdsc,
-			      struct inode *inode, struct ceph_mds_caps *im,
-			      struct ceph_mds_session *session,
-			      void *snaptrace, int snaptrace_len)
-{
-	struct ceph_inode_info *ci = ceph_inode(inode);
-	int mds = session->s_mds;
-	unsigned issued = le32_to_cpu(im->caps);
-	unsigned wanted = le32_to_cpu(im->wanted);
-	unsigned seq = le32_to_cpu(im->seq);
-	unsigned mseq = le32_to_cpu(im->migrate_seq);
-	u64 realmino = le64_to_cpu(im->realm);
-	u64 cap_id = le64_to_cpu(im->cap_id);
-
-	if (ci->i_cap_exporting_mds >= 0 &&
-	    ceph_seq_cmp(ci->i_cap_exporting_mseq, mseq) < 0) {
-		dout("handle_cap_import inode %p ci %p mds%d mseq %d"
-		     " - cleared exporting from mds%d\n",
-		     inode, ci, mds, mseq,
-		     ci->i_cap_exporting_mds);
-		ci->i_cap_exporting_issued = 0;
-		ci->i_cap_exporting_mseq = 0;
-		ci->i_cap_exporting_mds = -1;
-
-		spin_lock(&mdsc->cap_dirty_lock);
-		if (!list_empty(&ci->i_dirty_item)) {
-			dout(" moving %p back to cap_dirty\n", inode);
-			list_move(&ci->i_dirty_item, &mdsc->cap_dirty);
-		}
-		spin_unlock(&mdsc->cap_dirty_lock);
-	} else {
-		dout("handle_cap_import inode %p ci %p mds%d mseq %d\n",
-		     inode, ci, mds, mseq);
-	}
-
-	down_write(&mdsc->snap_rwsem);
-	ceph_update_snap_trace(mdsc, snaptrace, snaptrace+snaptrace_len,
-			       false);
-	downgrade_write(&mdsc->snap_rwsem);
-	ceph_add_cap(inode, session, cap_id, -1,
-		     issued, wanted, seq, mseq, realmino, CEPH_CAP_FLAG_AUTH,
-		     NULL /* no caps context */);
-	kick_flushing_inode_caps(mdsc, session, inode);
-	up_read(&mdsc->snap_rwsem);
-
-	/* make sure we re-request max_size, if necessary */
-	spin_lock(&ci->i_ceph_lock);
-	ci->i_requested_max_size = 0;
-	spin_unlock(&ci->i_ceph_lock);
-}
-
-/*
- * Handle a caps message from the MDS.
- *
- * Identify the appropriate session, inode, and call the right handler
- * based on the cap op.
- */
-void ceph_handle_caps(struct ceph_mds_session *session,
-		      struct ceph_msg *msg)
-{
-	struct ceph_mds_client *mdsc = session->s_mdsc;
-	struct super_block *sb = mdsc->fsc->sb;
-	struct inode *inode;
-	struct ceph_inode_info *ci;
-	struct ceph_cap *cap;
-	struct ceph_mds_caps *h;
-	int mds = session->s_mds;
-	int op;
-	u32 seq, mseq;
-	struct ceph_vino vino;
-	u64 cap_id;
-	u64 size, max_size;
-	u64 tid;
-	void *snaptrace;
-	size_t snaptrace_len;
-	void *flock;
-	u32 flock_len;
-	int open_target_sessions = 0;
-
-	dout("handle_caps from mds%d\n", mds);
-
-	/* decode */
-	tid = le64_to_cpu(msg->hdr.tid);
-	if (msg->front.iov_len < sizeof(*h))
-		goto bad;
-	h = msg->front.iov_base;
-	op = le32_to_cpu(h->op);
-	vino.ino = le64_to_cpu(h->ino);
-	vino.snap = CEPH_NOSNAP;
-	cap_id = le64_to_cpu(h->cap_id);
-	seq = le32_to_cpu(h->seq);
-	mseq = le32_to_cpu(h->migrate_seq);
-	size = le64_to_cpu(h->size);
-	max_size = le64_to_cpu(h->max_size);
-
-	snaptrace = h + 1;
-	snaptrace_len = le32_to_cpu(h->snap_trace_len);
-
-	if (le16_to_cpu(msg->hdr.version) >= 2) {
-		void *p, *end;
-
-		p = snaptrace + snaptrace_len;
-		end = msg->front.iov_base + msg->front.iov_len;
-		ceph_decode_32_safe(&p, end, flock_len, bad);
-		flock = p;
-	} else {
-		flock = NULL;
-		flock_len = 0;
-	}
-
-	mutex_lock(&session->s_mutex);
-	session->s_seq++;
-	dout(" mds%d seq %lld cap seq %u\n", session->s_mds, session->s_seq,
-	     (unsigned)seq);
-
-	/* lookup ino */
-	inode = ceph_find_inode(sb, vino);
-	ci = ceph_inode(inode);
-	dout(" op %s ino %llx.%llx inode %p\n", ceph_cap_op_name(op), vino.ino,
-	     vino.snap, inode);
-	if (!inode) {
-		dout(" i don't have ino %llx\n", vino.ino);
-
-		if (op == CEPH_CAP_OP_IMPORT)
-			__queue_cap_release(session, vino.ino, cap_id,
-					    mseq, seq);
-		goto flush_cap_releases;
-	}
-
-	/* these will work even if we don't have a cap yet */
-	switch (op) {
-	case CEPH_CAP_OP_FLUSHSNAP_ACK:
-		handle_cap_flushsnap_ack(inode, tid, h, session);
-		goto done;
-
-	case CEPH_CAP_OP_EXPORT:
-		handle_cap_export(inode, h, session, &open_target_sessions);
-		goto done;
-
-	case CEPH_CAP_OP_IMPORT:
-		handle_cap_import(mdsc, inode, h, session,
-				  snaptrace, snaptrace_len);
-		ceph_check_caps(ceph_inode(inode), 0, session);
-		goto done_unlocked;
-	}
-
-	/* the rest require a cap */
-	spin_lock(&ci->i_ceph_lock);
-	cap = __get_cap_for_mds(ceph_inode(inode), mds);
-	if (!cap) {
-		dout(" no cap on %p ino %llx.%llx from mds%d\n",
-		     inode, ceph_ino(inode), ceph_snap(inode), mds);
-		spin_unlock(&ci->i_ceph_lock);
-		goto flush_cap_releases;
-	}
-
-	/* note that each of these drops i_ceph_lock for us */
-	switch (op) {
-	case CEPH_CAP_OP_REVOKE:
-	case CEPH_CAP_OP_GRANT:
-		handle_cap_grant(inode, h, session, cap, msg->middle);
-		goto done_unlocked;
-
-	case CEPH_CAP_OP_FLUSH_ACK:
-		handle_cap_flush_ack(inode, tid, h, session, cap);
-		break;
-
-	case CEPH_CAP_OP_TRUNC:
-		handle_cap_trunc(inode, h, session);
-		break;
-
-	default:
-		spin_unlock(&ci->i_ceph_lock);
-		pr_err("ceph_handle_caps: unknown cap op %d %s\n", op,
-		       ceph_cap_op_name(op));
-	}
-
-	goto done;
-
-flush_cap_releases:
-	/*
-	 * send any full release message to try to move things
-	 * along for the mds (who clearly thinks we still have this
-	 * cap).
-	 */
-	ceph_add_cap_releases(mdsc, session);
-	ceph_send_cap_releases(mdsc, session);
-
-done:
-	mutex_unlock(&session->s_mutex);
-done_unlocked:
-	if (inode)
-		iput(inode);
-	if (open_target_sessions)
-		ceph_mdsc_open_export_target_sessions(mdsc, session);
-	return;
-
-bad:
-	pr_err("ceph_handle_caps: corrupt message\n");
-	ceph_msg_dump(msg);
-	return;
-}
-
-/*
- * Delayed work handler to process end of delayed cap release LRU list.
- */
-void ceph_check_delayed_caps(struct ceph_mds_client *mdsc)
-{
-	struct ceph_inode_info *ci;
-	int flags = CHECK_CAPS_NODELAY;
-
-	dout("check_delayed_caps\n");
-	while (1) {
-		spin_lock(&mdsc->cap_delay_lock);
-		if (list_empty(&mdsc->cap_delay_list))
-			break;
-		ci = list_first_entry(&mdsc->cap_delay_list,
-				      struct ceph_inode_info,
-				      i_cap_delay_list);
-		if ((ci->i_ceph_flags & CEPH_I_FLUSH) == 0 &&
-		    time_before(jiffies, ci->i_hold_caps_max))
-			break;
-		list_del_init(&ci->i_cap_delay_list);
-		spin_unlock(&mdsc->cap_delay_lock);
-		dout("check_delayed_caps on %p\n", &ci->vfs_inode);
-		ceph_check_caps(ci, flags, NULL);
-	}
-	spin_unlock(&mdsc->cap_delay_lock);
-}
-
-/*
- * Flush all dirty caps to the mds
- */
-void ceph_flush_dirty_caps(struct ceph_mds_client *mdsc)
-{
-	struct ceph_inode_info *ci;
-	struct inode *inode;
-
-	dout("flush_dirty_caps\n");
-	spin_lock(&mdsc->cap_dirty_lock);
-	while (!list_empty(&mdsc->cap_dirty)) {
-		ci = list_first_entry(&mdsc->cap_dirty, struct ceph_inode_info,
-				      i_dirty_item);
-		inode = &ci->vfs_inode;
-		ihold(inode);
-		dout("flush_dirty_caps %p\n", inode);
-		spin_unlock(&mdsc->cap_dirty_lock);
-		ceph_check_caps(ci, CHECK_CAPS_NODELAY|CHECK_CAPS_FLUSH, NULL);
-		iput(inode);
-		spin_lock(&mdsc->cap_dirty_lock);
-	}
-	spin_unlock(&mdsc->cap_dirty_lock);
-	dout("flush_dirty_caps done\n");
-}
-
-/*
- * Drop open file reference.  If we were the last open file,
- * we may need to release capabilities to the MDS (or schedule
- * their delayed release).
- */
-void ceph_put_fmode(struct ceph_inode_info *ci, int fmode)
-{
-	struct inode *inode = &ci->vfs_inode;
-	int last = 0;
-
-	spin_lock(&ci->i_ceph_lock);
-	dout("put_fmode %p fmode %d %d -> %d\n", inode, fmode,
-	     ci->i_nr_by_mode[fmode], ci->i_nr_by_mode[fmode]-1);
-	BUG_ON(ci->i_nr_by_mode[fmode] == 0);
-	if (--ci->i_nr_by_mode[fmode] == 0)
-		last++;
-	spin_unlock(&ci->i_ceph_lock);
-
-	if (last && ci->i_vino.snap == CEPH_NOSNAP)
-		ceph_check_caps(ci, 0, NULL);
-}
-
-/*
- * Helpers for embedding cap and dentry lease releases into mds
- * requests.
- *
- * @force is used by dentry_release (below) to force inclusion of a
- * record for the directory inode, even when there aren't any caps to
- * drop.
- */
-int ceph_encode_inode_release(void **p, struct inode *inode,
-			      int mds, int drop, int unless, int force)
-{
-	struct ceph_inode_info *ci = ceph_inode(inode);
-	struct ceph_cap *cap;
-	struct ceph_mds_request_release *rel = *p;
-	int used, dirty;
-	int ret = 0;
-
-	spin_lock(&ci->i_ceph_lock);
-	used = __ceph_caps_used(ci);
-	dirty = __ceph_caps_dirty(ci);
-
-	dout("encode_inode_release %p mds%d used|dirty %s drop %s unless %s\n",
-	     inode, mds, ceph_cap_string(used|dirty), ceph_cap_string(drop),
-	     ceph_cap_string(unless));
-
-	/* only drop unused, clean caps */
-	drop &= ~(used | dirty);
-
-	cap = __get_cap_for_mds(ci, mds);
-	if (cap && __cap_is_valid(cap)) {
-		if (force ||
-		    ((cap->issued & drop) &&
-		     (cap->issued & unless) == 0)) {
-			if ((cap->issued & drop) &&
-			    (cap->issued & unless) == 0) {
-				dout("encode_inode_release %p cap %p %s -> "
-				     "%s\n", inode, cap,
-				     ceph_cap_string(cap->issued),
-				     ceph_cap_string(cap->issued & ~drop));
-				cap->issued &= ~drop;
-				cap->implemented &= ~drop;
-				if (ci->i_ceph_flags & CEPH_I_NODELAY) {
-					int wanted = __ceph_caps_wanted(ci);
-					dout("  wanted %s -> %s (act %s)\n",
-					     ceph_cap_string(cap->mds_wanted),
-					     ceph_cap_string(cap->mds_wanted &
-							     ~wanted),
-					     ceph_cap_string(wanted));
-					cap->mds_wanted &= wanted;
-				}
-			} else {
-				dout("encode_inode_release %p cap %p %s"
-				     " (force)\n", inode, cap,
-				     ceph_cap_string(cap->issued));
-			}
-
-			rel->ino = cpu_to_le64(ceph_ino(inode));
-			rel->cap_id = cpu_to_le64(cap->cap_id);
-			rel->seq = cpu_to_le32(cap->seq);
-			rel->issue_seq = cpu_to_le32(cap->issue_seq),
-			rel->mseq = cpu_to_le32(cap->mseq);
-			rel->caps = cpu_to_le32(cap->issued);
-			rel->wanted = cpu_to_le32(cap->mds_wanted);
-			rel->dname_len = 0;
-			rel->dname_seq = 0;
-			*p += sizeof(*rel);
-			ret = 1;
-		} else {
-			dout("encode_inode_release %p cap %p %s\n",
-			     inode, cap, ceph_cap_string(cap->issued));
-		}
-	}
-	spin_unlock(&ci->i_ceph_lock);
-	return ret;
-}
-
-int ceph_encode_dentry_release(void **p, struct dentry *dentry,
-			       int mds, int drop, int unless)
-{
-	struct inode *dir = dentry->d_parent->d_inode;
-	struct ceph_mds_request_release *rel = *p;
-	struct ceph_dentry_info *di = ceph_dentry(dentry);
-	int force = 0;
-	int ret;
-
-	/*
-	 * force an record for the directory caps if we have a dentry lease.
-	 * this is racy (can't take i_ceph_lock and d_lock together), but it
-	 * doesn't have to be perfect; the mds will revoke anything we don't
-	 * release.
-	 */
-	spin_lock(&dentry->d_lock);
-	if (di->lease_session && di->lease_session->s_mds == mds)
-		force = 1;
-	spin_unlock(&dentry->d_lock);
-
-	ret = ceph_encode_inode_release(p, dir, mds, drop, unless, force);
-
-	spin_lock(&dentry->d_lock);
-	if (ret && di->lease_session && di->lease_session->s_mds == mds) {
-		dout("encode_dentry_release %p mds%d seq %d\n",
-		     dentry, mds, (int)di->lease_seq);
-		rel->dname_len = cpu_to_le32(dentry->d_name.len);
-		memcpy(*p, dentry->d_name.name, dentry->d_name.len);
-		*p += dentry->d_name.len;
-		rel->dname_seq = cpu_to_le32(di->lease_seq);
-		__ceph_mdsc_drop_dentry_lease(dentry);
-	}
-	spin_unlock(&dentry->d_lock);
-	return ret;
-}
diff --git a/ANDROID_3.4.5/fs/ceph/ceph_frag.c b/ANDROID_3.4.5/fs/ceph/ceph_frag.c
deleted file mode 100644
index bdce8b1f..00000000
--- a/ANDROID_3.4.5/fs/ceph/ceph_frag.c
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- * Ceph 'frag' type
- */
-#include <linux/module.h>
-#include <linux/ceph/types.h>
-
-int ceph_frag_compare(__u32 a, __u32 b)
-{
-	unsigned va = ceph_frag_value(a);
-	unsigned vb = ceph_frag_value(b);
-	if (va < vb)
-		return -1;
-	if (va > vb)
-		return 1;
-	va = ceph_frag_bits(a);
-	vb = ceph_frag_bits(b);
-	if (va < vb)
-		return -1;
-	if (va > vb)
-		return 1;
-	return 0;
-}
diff --git a/ANDROID_3.4.5/fs/ceph/debugfs.c b/ANDROID_3.4.5/fs/ceph/debugfs.c
deleted file mode 100644
index fb962efd..00000000
--- a/ANDROID_3.4.5/fs/ceph/debugfs.c
+++ /dev/null
@@ -1,273 +0,0 @@
-#include <linux/ceph/ceph_debug.h>
-
-#include <linux/device.h>
-#include <linux/slab.h>
-#include <linux/module.h>
-#include <linux/ctype.h>
-#include <linux/debugfs.h>
-#include <linux/seq_file.h>
-
-#include <linux/ceph/libceph.h>
-#include <linux/ceph/mon_client.h>
-#include <linux/ceph/auth.h>
-#include <linux/ceph/debugfs.h>
-
-#include "super.h"
-
-#ifdef CONFIG_DEBUG_FS
-
-#include "mds_client.h"
-
-static int mdsmap_show(struct seq_file *s, void *p)
-{
-	int i;
-	struct ceph_fs_client *fsc = s->private;
-
-	if (fsc->mdsc == NULL || fsc->mdsc->mdsmap == NULL)
-		return 0;
-	seq_printf(s, "epoch %d\n", fsc->mdsc->mdsmap->m_epoch);
-	seq_printf(s, "root %d\n", fsc->mdsc->mdsmap->m_root);
-	seq_printf(s, "session_timeout %d\n",
-		       fsc->mdsc->mdsmap->m_session_timeout);
-	seq_printf(s, "session_autoclose %d\n",
-		       fsc->mdsc->mdsmap->m_session_autoclose);
-	for (i = 0; i < fsc->mdsc->mdsmap->m_max_mds; i++) {
-		struct ceph_entity_addr *addr =
-			&fsc->mdsc->mdsmap->m_info[i].addr;
-		int state = fsc->mdsc->mdsmap->m_info[i].state;
-
-		seq_printf(s, "\tmds%d\t%s\t(%s)\n", i,
-			       ceph_pr_addr(&addr->in_addr),
-			       ceph_mds_state_name(state));
-	}
-	return 0;
-}
-
-/*
- * mdsc debugfs
- */
-static int mdsc_show(struct seq_file *s, void *p)
-{
-	struct ceph_fs_client *fsc = s->private;
-	struct ceph_mds_client *mdsc = fsc->mdsc;
-	struct ceph_mds_request *req;
-	struct rb_node *rp;
-	int pathlen;
-	u64 pathbase;
-	char *path;
-
-	mutex_lock(&mdsc->mutex);
-	for (rp = rb_first(&mdsc->request_tree); rp; rp = rb_next(rp)) {
-		req = rb_entry(rp, struct ceph_mds_request, r_node);
-
-		if (req->r_request && req->r_session)
-			seq_printf(s, "%lld\tmds%d\t", req->r_tid,
-				   req->r_session->s_mds);
-		else if (!req->r_request)
-			seq_printf(s, "%lld\t(no request)\t", req->r_tid);
-		else
-			seq_printf(s, "%lld\t(no session)\t", req->r_tid);
-
-		seq_printf(s, "%s", ceph_mds_op_name(req->r_op));
-
-		if (req->r_got_unsafe)
-			seq_printf(s, "\t(unsafe)");
-		else
-			seq_printf(s, "\t");
-
-		if (req->r_inode) {
-			seq_printf(s, " #%llx", ceph_ino(req->r_inode));
-		} else if (req->r_dentry) {
-			path = ceph_mdsc_build_path(req->r_dentry, &pathlen,
-						    &pathbase, 0);
-			if (IS_ERR(path))
-				path = NULL;
-			spin_lock(&req->r_dentry->d_lock);
-			seq_printf(s, " #%llx/%.*s (%s)",
-				   ceph_ino(req->r_dentry->d_parent->d_inode),
-				   req->r_dentry->d_name.len,
-				   req->r_dentry->d_name.name,
-				   path ? path : "");
-			spin_unlock(&req->r_dentry->d_lock);
-			kfree(path);
-		} else if (req->r_path1) {
-			seq_printf(s, " #%llx/%s", req->r_ino1.ino,
-				   req->r_path1);
-		}
-
-		if (req->r_old_dentry) {
-			path = ceph_mdsc_build_path(req->r_old_dentry, &pathlen,
-						    &pathbase, 0);
-			if (IS_ERR(path))
-				path = NULL;
-			spin_lock(&req->r_old_dentry->d_lock);
-			seq_printf(s, " #%llx/%.*s (%s)",
-			   ceph_ino(req->r_old_dentry_dir),
-				   req->r_old_dentry->d_name.len,
-				   req->r_old_dentry->d_name.name,
-				   path ? path : "");
-			spin_unlock(&req->r_old_dentry->d_lock);
-			kfree(path);
-		} else if (req->r_path2) {
-			if (req->r_ino2.ino)
-				seq_printf(s, " #%llx/%s", req->r_ino2.ino,
-					   req->r_path2);
-			else
-				seq_printf(s, " %s", req->r_path2);
-		}
-
-		seq_printf(s, "\n");
-	}
-	mutex_unlock(&mdsc->mutex);
-
-	return 0;
-}
-
-static int caps_show(struct seq_file *s, void *p)
-{
-	struct ceph_fs_client *fsc = s->private;
-	int total, avail, used, reserved, min;
-
-	ceph_reservation_status(fsc, &total, &avail, &used, &reserved, &min);
-	seq_printf(s, "total\t\t%d\n"
-		   "avail\t\t%d\n"
-		   "used\t\t%d\n"
-		   "reserved\t%d\n"
-		   "min\t%d\n",
-		   total, avail, used, reserved, min);
-	return 0;
-}
-
-static int dentry_lru_show(struct seq_file *s, void *ptr)
-{
-	struct ceph_fs_client *fsc = s->private;
-	struct ceph_mds_client *mdsc = fsc->mdsc;
-	struct ceph_dentry_info *di;
-
-	spin_lock(&mdsc->dentry_lru_lock);
-	list_for_each_entry(di, &mdsc->dentry_lru, lru) {
-		struct dentry *dentry = di->dentry;
-		seq_printf(s, "%p %p\t%.*s\n",
-			   di, dentry, dentry->d_name.len, dentry->d_name.name);
-	}
-	spin_unlock(&mdsc->dentry_lru_lock);
-
-	return 0;
-}
-
-CEPH_DEFINE_SHOW_FUNC(mdsmap_show)
-CEPH_DEFINE_SHOW_FUNC(mdsc_show)
-CEPH_DEFINE_SHOW_FUNC(caps_show)
-CEPH_DEFINE_SHOW_FUNC(dentry_lru_show)
-
-
-/*
- * debugfs
- */
-static int congestion_kb_set(void *data, u64 val)
-{
-	struct ceph_fs_client *fsc = (struct ceph_fs_client *)data;
-
-	fsc->mount_options->congestion_kb = (int)val;
-	return 0;
-}
-
-static int congestion_kb_get(void *data, u64 *val)
-{
-	struct ceph_fs_client *fsc = (struct ceph_fs_client *)data;
-
-	*val = (u64)fsc->mount_options->congestion_kb;
-	return 0;
-}
-
-DEFINE_SIMPLE_ATTRIBUTE(congestion_kb_fops, congestion_kb_get,
-			congestion_kb_set, "%llu\n");
-
-
-void ceph_fs_debugfs_cleanup(struct ceph_fs_client *fsc)
-{
-	dout("ceph_fs_debugfs_cleanup\n");
-	debugfs_remove(fsc->debugfs_bdi);
-	debugfs_remove(fsc->debugfs_congestion_kb);
-	debugfs_remove(fsc->debugfs_mdsmap);
-	debugfs_remove(fsc->debugfs_caps);
-	debugfs_remove(fsc->debugfs_mdsc);
-	debugfs_remove(fsc->debugfs_dentry_lru);
-}
-
-int ceph_fs_debugfs_init(struct ceph_fs_client *fsc)
-{
-	char name[100];
-	int err = -ENOMEM;
-
-	dout("ceph_fs_debugfs_init\n");
-	fsc->debugfs_congestion_kb =
-		debugfs_create_file("writeback_congestion_kb",
-				    0600,
-				    fsc->client->debugfs_dir,
-				    fsc,
-				    &congestion_kb_fops);
-	if (!fsc->debugfs_congestion_kb)
-		goto out;
-
-	snprintf(name, sizeof(name), "../../bdi/%s",
-		 dev_name(fsc->backing_dev_info.dev));
-	fsc->debugfs_bdi =
-		debugfs_create_symlink("bdi",
-				       fsc->client->debugfs_dir,
-				       name);
-	if (!fsc->debugfs_bdi)
-		goto out;
-
-	fsc->debugfs_mdsmap = debugfs_create_file("mdsmap",
-					0600,
-					fsc->client->debugfs_dir,
-					fsc,
-					&mdsmap_show_fops);
-	if (!fsc->debugfs_mdsmap)
-		goto out;
-
-	fsc->debugfs_mdsc = debugfs_create_file("mdsc",
-						0600,
-						fsc->client->debugfs_dir,
-						fsc,
-						&mdsc_show_fops);
-	if (!fsc->debugfs_mdsc)
-		goto out;
-
-	fsc->debugfs_caps = debugfs_create_file("caps",
-						   0400,
-						   fsc->client->debugfs_dir,
-						   fsc,
-						   &caps_show_fops);
-	if (!fsc->debugfs_caps)
-		goto out;
-
-	fsc->debugfs_dentry_lru = debugfs_create_file("dentry_lru",
-					0600,
-					fsc->client->debugfs_dir,
-					fsc,
-					&dentry_lru_show_fops);
-	if (!fsc->debugfs_dentry_lru)
-		goto out;
-
-	return 0;
-
-out:
-	ceph_fs_debugfs_cleanup(fsc);
-	return err;
-}
-
-
-#else  /* CONFIG_DEBUG_FS */
-
-int ceph_fs_debugfs_init(struct ceph_fs_client *fsc)
-{
-	return 0;
-}
-
-void ceph_fs_debugfs_cleanup(struct ceph_fs_client *fsc)
-{
-}
-
-#endif  /* CONFIG_DEBUG_FS */
diff --git a/ANDROID_3.4.5/fs/ceph/dir.c b/ANDROID_3.4.5/fs/ceph/dir.c
deleted file mode 100644
index 3e8094be..00000000
--- a/ANDROID_3.4.5/fs/ceph/dir.c
+++ /dev/null
@@ -1,1376 +0,0 @@
-#include <linux/ceph/ceph_debug.h>
-
-#include <linux/spinlock.h>
-#include <linux/fs_struct.h>
-#include <linux/namei.h>
-#include <linux/slab.h>
-#include <linux/sched.h>
-
-#include "super.h"
-#include "mds_client.h"
-
-/*
- * Directory operations: readdir, lookup, create, link, unlink,
- * rename, etc.
- */
-
-/*
- * Ceph MDS operations are specified in terms of a base ino and
- * relative path.  Thus, the client can specify an operation on a
- * specific inode (e.g., a getattr due to fstat(2)), or as a path
- * relative to, say, the root directory.
- *
- * Normally, we limit ourselves to strict inode ops (no path component)
- * or dentry operations (a single path component relative to an ino).  The
- * exception to this is open_root_dentry(), which will open the mount
- * point by name.
- */
-
-const struct inode_operations ceph_dir_iops;
-const struct file_operations ceph_dir_fops;
-const struct dentry_operations ceph_dentry_ops;
-
-/*
- * Initialize ceph dentry state.
- */
-int ceph_init_dentry(struct dentry *dentry)
-{
-	struct ceph_dentry_info *di;
-
-	if (dentry->d_fsdata)
-		return 0;
-
-	di = kmem_cache_alloc(ceph_dentry_cachep, GFP_NOFS | __GFP_ZERO);
-	if (!di)
-		return -ENOMEM;          /* oh well */
-
-	spin_lock(&dentry->d_lock);
-	if (dentry->d_fsdata) {
-		/* lost a race */
-		kmem_cache_free(ceph_dentry_cachep, di);
-		goto out_unlock;
-	}
-
-	if (dentry->d_parent == NULL ||   /* nfs fh_to_dentry */
-	    ceph_snap(dentry->d_parent->d_inode) == CEPH_NOSNAP)
-		d_set_d_op(dentry, &ceph_dentry_ops);
-	else if (ceph_snap(dentry->d_parent->d_inode) == CEPH_SNAPDIR)
-		d_set_d_op(dentry, &ceph_snapdir_dentry_ops);
-	else
-		d_set_d_op(dentry, &ceph_snap_dentry_ops);
-
-	di->dentry = dentry;
-	di->lease_session = NULL;
-	dentry->d_time = jiffies;
-	/* avoid reordering d_fsdata setup so that the check above is safe */
-	smp_mb();
-	dentry->d_fsdata = di;
-	ceph_dentry_lru_add(dentry);
-out_unlock:
-	spin_unlock(&dentry->d_lock);
-	return 0;
-}
-
-struct inode *ceph_get_dentry_parent_inode(struct dentry *dentry)
-{
-	struct inode *inode = NULL;
-
-	if (!dentry)
-		return NULL;
-
-	spin_lock(&dentry->d_lock);
-	if (dentry->d_parent) {
-		inode = dentry->d_parent->d_inode;
-		ihold(inode);
-	}
-	spin_unlock(&dentry->d_lock);
-	return inode;
-}
-
-
-/*
- * for readdir, we encode the directory frag and offset within that
- * frag into f_pos.
- */
-static unsigned fpos_frag(loff_t p)
-{
-	return p >> 32;
-}
-static unsigned fpos_off(loff_t p)
-{
-	return p & 0xffffffff;
-}
-
-/*
- * When possible, we try to satisfy a readdir by peeking at the
- * dcache.  We make this work by carefully ordering dentries on
- * d_u.d_child when we initially get results back from the MDS, and
- * falling back to a "normal" sync readdir if any dentries in the dir
- * are dropped.
- *
- * D_COMPLETE tells indicates we have all dentries in the dir.  It is
- * defined IFF we hold CEPH_CAP_FILE_SHARED (which will be revoked by
- * the MDS if/when the directory is modified).
- */
-static int __dcache_readdir(struct file *filp,
-			    void *dirent, filldir_t filldir)
-{
-	struct ceph_file_info *fi = filp->private_data;
-	struct dentry *parent = filp->f_dentry;
-	struct inode *dir = parent->d_inode;
-	struct list_head *p;
-	struct dentry *dentry, *last;
-	struct ceph_dentry_info *di;
-	int err = 0;
-
-	/* claim ref on last dentry we returned */
-	last = fi->dentry;
-	fi->dentry = NULL;
-
-	dout("__dcache_readdir %p at %llu (last %p)\n", dir, filp->f_pos,
-	     last);
-
-	spin_lock(&parent->d_lock);
-
-	/* start at beginning? */
-	if (filp->f_pos == 2 || last == NULL ||
-	    filp->f_pos < ceph_dentry(last)->offset) {
-		if (list_empty(&parent->d_subdirs))
-			goto out_unlock;
-		p = parent->d_subdirs.prev;
-		dout(" initial p %p/%p\n", p->prev, p->next);
-	} else {
-		p = last->d_u.d_child.prev;
-	}
-
-more:
-	dentry = list_entry(p, struct dentry, d_u.d_child);
-	di = ceph_dentry(dentry);
-	while (1) {
-		dout(" p %p/%p %s d_subdirs %p/%p\n", p->prev, p->next,
-		     d_unhashed(dentry) ? "!hashed" : "hashed",
-		     parent->d_subdirs.prev, parent->d_subdirs.next);
-		if (p == &parent->d_subdirs) {
-			fi->flags |= CEPH_F_ATEND;
-			goto out_unlock;
-		}
-		spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
-		if (!d_unhashed(dentry) && dentry->d_inode &&
-		    ceph_snap(dentry->d_inode) != CEPH_SNAPDIR &&
-		    ceph_ino(dentry->d_inode) != CEPH_INO_CEPH &&
-		    filp->f_pos <= di->offset)
-			break;
-		dout(" skipping %p %.*s at %llu (%llu)%s%s\n", dentry,
-		     dentry->d_name.len, dentry->d_name.name, di->offset,
-		     filp->f_pos, d_unhashed(dentry) ? " unhashed" : "",
-		     !dentry->d_inode ? " null" : "");
-		spin_unlock(&dentry->d_lock);
-		p = p->prev;
-		dentry = list_entry(p, struct dentry, d_u.d_child);
-		di = ceph_dentry(dentry);
-	}
-
-	dget_dlock(dentry);
-	spin_unlock(&dentry->d_lock);
-	spin_unlock(&parent->d_lock);
-
-	dout(" %llu (%llu) dentry %p %.*s %p\n", di->offset, filp->f_pos,
-	     dentry, dentry->d_name.len, dentry->d_name.name, dentry->d_inode);
-	filp->f_pos = di->offset;
-	err = filldir(dirent, dentry->d_name.name,
-		      dentry->d_name.len, di->offset,
-		      ceph_translate_ino(dentry->d_sb, dentry->d_inode->i_ino),
-		      dentry->d_inode->i_mode >> 12);
-
-	if (last) {
-		if (err < 0) {
-			/* remember our position */
-			fi->dentry = last;
-			fi->next_offset = di->offset;
-		} else {
-			dput(last);
-		}
-	}
-	last = dentry;
-
-	if (err < 0)
-		goto out;
-
-	filp->f_pos++;
-
-	/* make sure a dentry wasn't dropped while we didn't have parent lock */
-	if (!ceph_dir_test_complete(dir)) {
-		dout(" lost D_COMPLETE on %p; falling back to mds\n", dir);
-		err = -EAGAIN;
-		goto out;
-	}
-
-	spin_lock(&parent->d_lock);
-	p = p->prev;	/* advance to next dentry */
-	goto more;
-
-out_unlock:
-	spin_unlock(&parent->d_lock);
-out:
-	if (last)
-		dput(last);
-	return err;
-}
-
-/*
- * make note of the last dentry we read, so we can
- * continue at the same lexicographical point,
- * regardless of what dir changes take place on the
- * server.
- */
-static int note_last_dentry(struct ceph_file_info *fi, const char *name,
-			    int len)
-{
-	kfree(fi->last_name);
-	fi->last_name = kmalloc(len+1, GFP_NOFS);
-	if (!fi->last_name)
-		return -ENOMEM;
-	memcpy(fi->last_name, name, len);
-	fi->last_name[len] = 0;
-	dout("note_last_dentry '%s'\n", fi->last_name);
-	return 0;
-}
-
-static int ceph_readdir(struct file *filp, void *dirent, filldir_t filldir)
-{
-	struct ceph_file_info *fi = filp->private_data;
-	struct inode *inode = filp->f_dentry->d_inode;
-	struct ceph_inode_info *ci = ceph_inode(inode);
-	struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
-	struct ceph_mds_client *mdsc = fsc->mdsc;
-	unsigned frag = fpos_frag(filp->f_pos);
-	int off = fpos_off(filp->f_pos);
-	int err;
-	u32 ftype;
-	struct ceph_mds_reply_info_parsed *rinfo;
-	const int max_entries = fsc->mount_options->max_readdir;
-	const int max_bytes = fsc->mount_options->max_readdir_bytes;
-
-	dout("readdir %p filp %p frag %u off %u\n", inode, filp, frag, off);
-	if (fi->flags & CEPH_F_ATEND)
-		return 0;
-
-	/* always start with . and .. */
-	if (filp->f_pos == 0) {
-		/* note dir version at start of readdir so we can tell
-		 * if any dentries get dropped */
-		fi->dir_release_count = ci->i_release_count;
-
-		dout("readdir off 0 -> '.'\n");
-		if (filldir(dirent, ".", 1, ceph_make_fpos(0, 0),
-			    ceph_translate_ino(inode->i_sb, inode->i_ino),
-			    inode->i_mode >> 12) < 0)
-			return 0;
-		filp->f_pos = 1;
-		off = 1;
-	}
-	if (filp->f_pos == 1) {
-		ino_t ino = parent_ino(filp->f_dentry);
-		dout("readdir off 1 -> '..'\n");
-		if (filldir(dirent, "..", 2, ceph_make_fpos(0, 1),
-			    ceph_translate_ino(inode->i_sb, ino),
-			    inode->i_mode >> 12) < 0)
-			return 0;
-		filp->f_pos = 2;
-		off = 2;
-	}
-
-	/* can we use the dcache? */
-	spin_lock(&ci->i_ceph_lock);
-	if ((filp->f_pos == 2 || fi->dentry) &&
-	    !ceph_test_mount_opt(fsc, NOASYNCREADDIR) &&
-	    ceph_snap(inode) != CEPH_SNAPDIR &&
-	    ceph_dir_test_complete(inode) &&
-	    __ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1)) {
-		spin_unlock(&ci->i_ceph_lock);
-		err = __dcache_readdir(filp, dirent, filldir);
-		if (err != -EAGAIN)
-			return err;
-	} else {
-		spin_unlock(&ci->i_ceph_lock);
-	}
-	if (fi->dentry) {
-		err = note_last_dentry(fi, fi->dentry->d_name.name,
-				       fi->dentry->d_name.len);
-		if (err)
-			return err;
-		dput(fi->dentry);
-		fi->dentry = NULL;
-	}
-
-	/* proceed with a normal readdir */
-
-more:
-	/* do we have the correct frag content buffered? */
-	if (fi->frag != frag || fi->last_readdir == NULL) {
-		struct ceph_mds_request *req;
-		int op = ceph_snap(inode) == CEPH_SNAPDIR ?
-			CEPH_MDS_OP_LSSNAP : CEPH_MDS_OP_READDIR;
-
-		/* discard old result, if any */
-		if (fi->last_readdir) {
-			ceph_mdsc_put_request(fi->last_readdir);
-			fi->last_readdir = NULL;
-		}
-
-		/* requery frag tree, as the frag topology may have changed */
-		frag = ceph_choose_frag(ceph_inode(inode), frag, NULL, NULL);
-
-		dout("readdir fetching %llx.%llx frag %x offset '%s'\n",
-		     ceph_vinop(inode), frag, fi->last_name);
-		req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS);
-		if (IS_ERR(req))
-			return PTR_ERR(req);
-		req->r_inode = inode;
-		ihold(inode);
-		req->r_dentry = dget(filp->f_dentry);
-		/* hints to request -> mds selection code */
-		req->r_direct_mode = USE_AUTH_MDS;
-		req->r_direct_hash = ceph_frag_value(frag);
-		req->r_direct_is_hash = true;
-		req->r_path2 = kstrdup(fi->last_name, GFP_NOFS);
-		req->r_readdir_offset = fi->next_offset;
-		req->r_args.readdir.frag = cpu_to_le32(frag);
-		req->r_args.readdir.max_entries = cpu_to_le32(max_entries);
-		req->r_args.readdir.max_bytes = cpu_to_le32(max_bytes);
-		req->r_num_caps = max_entries + 1;
-		err = ceph_mdsc_do_request(mdsc, NULL, req);
-		if (err < 0) {
-			ceph_mdsc_put_request(req);
-			return err;
-		}
-		dout("readdir got and parsed readdir result=%d"
-		     " on frag %x, end=%d, complete=%d\n", err, frag,
-		     (int)req->r_reply_info.dir_end,
-		     (int)req->r_reply_info.dir_complete);
-
-		if (!req->r_did_prepopulate) {
-			dout("readdir !did_prepopulate");
-			fi->dir_release_count--;    /* preclude D_COMPLETE */
-		}
-
-		/* note next offset and last dentry name */
-		fi->offset = fi->next_offset;
-		fi->last_readdir = req;
-
-		if (req->r_reply_info.dir_end) {
-			kfree(fi->last_name);
-			fi->last_name = NULL;
-			if (ceph_frag_is_rightmost(frag))
-				fi->next_offset = 2;
-			else
-				fi->next_offset = 0;
-		} else {
-			rinfo = &req->r_reply_info;
-			err = note_last_dentry(fi,
-				       rinfo->dir_dname[rinfo->dir_nr-1],
-				       rinfo->dir_dname_len[rinfo->dir_nr-1]);
-			if (err)
-				return err;
-			fi->next_offset += rinfo->dir_nr;
-		}
-	}
-
-	rinfo = &fi->last_readdir->r_reply_info;
-	dout("readdir frag %x num %d off %d chunkoff %d\n", frag,
-	     rinfo->dir_nr, off, fi->offset);
-	while (off >= fi->offset && off - fi->offset < rinfo->dir_nr) {
-		u64 pos = ceph_make_fpos(frag, off);
-		struct ceph_mds_reply_inode *in =
-			rinfo->dir_in[off - fi->offset].in;
-		struct ceph_vino vino;
-		ino_t ino;
-
-		dout("readdir off %d (%d/%d) -> %lld '%.*s' %p\n",
-		     off, off - fi->offset, rinfo->dir_nr, pos,
-		     rinfo->dir_dname_len[off - fi->offset],
-		     rinfo->dir_dname[off - fi->offset], in);
-		BUG_ON(!in);
-		ftype = le32_to_cpu(in->mode) >> 12;
-		vino.ino = le64_to_cpu(in->ino);
-		vino.snap = le64_to_cpu(in->snapid);
-		ino = ceph_vino_to_ino(vino);
-		if (filldir(dirent,
-			    rinfo->dir_dname[off - fi->offset],
-			    rinfo->dir_dname_len[off - fi->offset],
-			    pos,
-			    ceph_translate_ino(inode->i_sb, ino), ftype) < 0) {
-			dout("filldir stopping us...\n");
-			return 0;
-		}
-		off++;
-		filp->f_pos = pos + 1;
-	}
-
-	if (fi->last_name) {
-		ceph_mdsc_put_request(fi->last_readdir);
-		fi->last_readdir = NULL;
-		goto more;
-	}
-
-	/* more frags? */
-	if (!ceph_frag_is_rightmost(frag)) {
-		frag = ceph_frag_next(frag);
-		off = 0;
-		filp->f_pos = ceph_make_fpos(frag, off);
-		dout("readdir next frag is %x\n", frag);
-		goto more;
-	}
-	fi->flags |= CEPH_F_ATEND;
-
-	/*
-	 * if dir_release_count still matches the dir, no dentries
-	 * were released during the whole readdir, and we should have
-	 * the complete dir contents in our cache.
-	 */
-	spin_lock(&ci->i_ceph_lock);
-	if (ci->i_release_count == fi->dir_release_count) {
-		ceph_dir_set_complete(inode);
-		ci->i_max_offset = filp->f_pos;
-	}
-	spin_unlock(&ci->i_ceph_lock);
-
-	dout("readdir %p filp %p done.\n", inode, filp);
-	return 0;
-}
-
-static void reset_readdir(struct ceph_file_info *fi)
-{
-	if (fi->last_readdir) {
-		ceph_mdsc_put_request(fi->last_readdir);
-		fi->last_readdir = NULL;
-	}
-	kfree(fi->last_name);
-	fi->last_name = NULL;
-	fi->next_offset = 2;  /* compensate for . and .. */
-	if (fi->dentry) {
-		dput(fi->dentry);
-		fi->dentry = NULL;
-	}
-	fi->flags &= ~CEPH_F_ATEND;
-}
-
-static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int origin)
-{
-	struct ceph_file_info *fi = file->private_data;
-	struct inode *inode = file->f_mapping->host;
-	loff_t old_offset = offset;
-	loff_t retval;
-
-	mutex_lock(&inode->i_mutex);
-	retval = -EINVAL;
-	switch (origin) {
-	case SEEK_END:
-		offset += inode->i_size + 2;   /* FIXME */
-		break;
-	case SEEK_CUR:
-		offset += file->f_pos;
-	case SEEK_SET:
-		break;
-	default:
-		goto out;
-	}
-
-	if (offset >= 0 && offset <= inode->i_sb->s_maxbytes) {
-		if (offset != file->f_pos) {
-			file->f_pos = offset;
-			file->f_version = 0;
-			fi->flags &= ~CEPH_F_ATEND;
-		}
-		retval = offset;
-
-		/*
-		 * discard buffered readdir content on seekdir(0), or
-		 * seek to new frag, or seek prior to current chunk.
-		 */
-		if (offset == 0 ||
-		    fpos_frag(offset) != fpos_frag(old_offset) ||
-		    fpos_off(offset) < fi->offset) {
-			dout("dir_llseek dropping %p content\n", file);
-			reset_readdir(fi);
-		}
-
-		/* bump dir_release_count if we did a forward seek */
-		if (offset > old_offset)
-			fi->dir_release_count--;
-	}
-out:
-	mutex_unlock(&inode->i_mutex);
-	return retval;
-}
-
-/*
- * Handle lookups for the hidden .snap directory.
- */
-int ceph_handle_snapdir(struct ceph_mds_request *req,
-			struct dentry *dentry, int err)
-{
-	struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb);
-	struct inode *parent = dentry->d_parent->d_inode; /* we hold i_mutex */
-
-	/* .snap dir? */
-	if (err == -ENOENT &&
-	    ceph_snap(parent) == CEPH_NOSNAP &&
-	    strcmp(dentry->d_name.name,
-		   fsc->mount_options->snapdir_name) == 0) {
-		struct inode *inode = ceph_get_snapdir(parent);
-		dout("ENOENT on snapdir %p '%.*s', linking to snapdir %p\n",
-		     dentry, dentry->d_name.len, dentry->d_name.name, inode);
-		BUG_ON(!d_unhashed(dentry));
-		d_add(dentry, inode);
-		err = 0;
-	}
-	return err;
-}
-
-/*
- * Figure out final result of a lookup/open request.
- *
- * Mainly, make sure we return the final req->r_dentry (if it already
- * existed) in place of the original VFS-provided dentry when they
- * differ.
- *
- * Gracefully handle the case where the MDS replies with -ENOENT and
- * no trace (which it may do, at its discretion, e.g., if it doesn't
- * care to issue a lease on the negative dentry).
- */
-struct dentry *ceph_finish_lookup(struct ceph_mds_request *req,
-				  struct dentry *dentry, int err)
-{
-	if (err == -ENOENT) {
-		/* no trace? */
-		err = 0;
-		if (!req->r_reply_info.head->is_dentry) {
-			dout("ENOENT and no trace, dentry %p inode %p\n",
-			     dentry, dentry->d_inode);
-			if (dentry->d_inode) {
-				d_drop(dentry);
-				err = -ENOENT;
-			} else {
-				d_add(dentry, NULL);
-			}
-		}
-	}
-	if (err)
-		dentry = ERR_PTR(err);
-	else if (dentry != req->r_dentry)
-		dentry = dget(req->r_dentry);   /* we got spliced */
-	else
-		dentry = NULL;
-	return dentry;
-}
-
-static int is_root_ceph_dentry(struct inode *inode, struct dentry *dentry)
-{
-	return ceph_ino(inode) == CEPH_INO_ROOT &&
-		strncmp(dentry->d_name.name, ".ceph", 5) == 0;
-}
-
-/*
- * Look up a single dir entry.  If there is a lookup intent, inform
- * the MDS so that it gets our 'caps wanted' value in a single op.
- */
-static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry,
-				  struct nameidata *nd)
-{
-	struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb);
-	struct ceph_mds_client *mdsc = fsc->mdsc;
-	struct ceph_mds_request *req;
-	int op;
-	int err;
-
-	dout("lookup %p dentry %p '%.*s'\n",
-	     dir, dentry, dentry->d_name.len, dentry->d_name.name);
-
-	if (dentry->d_name.len > NAME_MAX)
-		return ERR_PTR(-ENAMETOOLONG);
-
-	err = ceph_init_dentry(dentry);
-	if (err < 0)
-		return ERR_PTR(err);
-
-	/* open (but not create!) intent? */
-	if (nd &&
-	    (nd->flags & LOOKUP_OPEN) &&
-	    !(nd->intent.open.flags & O_CREAT)) {
-		int mode = nd->intent.open.create_mode & ~current->fs->umask;
-		return ceph_lookup_open(dir, dentry, nd, mode, 1);
-	}
-
-	/* can we conclude ENOENT locally? */
-	if (dentry->d_inode == NULL) {
-		struct ceph_inode_info *ci = ceph_inode(dir);
-		struct ceph_dentry_info *di = ceph_dentry(dentry);
-
-		spin_lock(&ci->i_ceph_lock);
-		dout(" dir %p flags are %d\n", dir, ci->i_ceph_flags);
-		if (strncmp(dentry->d_name.name,
-			    fsc->mount_options->snapdir_name,
-			    dentry->d_name.len) &&
-		    !is_root_ceph_dentry(dir, dentry) &&
-		    ceph_dir_test_complete(dir) &&
-		    (__ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1))) {
-			spin_unlock(&ci->i_ceph_lock);
-			dout(" dir %p complete, -ENOENT\n", dir);
-			d_add(dentry, NULL);
-			di->lease_shared_gen = ci->i_shared_gen;
-			return NULL;
-		}
-		spin_unlock(&ci->i_ceph_lock);
-	}
-
-	op = ceph_snap(dir) == CEPH_SNAPDIR ?
-		CEPH_MDS_OP_LOOKUPSNAP : CEPH_MDS_OP_LOOKUP;
-	req = ceph_mdsc_create_request(mdsc, op, USE_ANY_MDS);
-	if (IS_ERR(req))
-		return ERR_CAST(req);
-	req->r_dentry = dget(dentry);
-	req->r_num_caps = 2;
-	/* we only need inode linkage */
-	req->r_args.getattr.mask = cpu_to_le32(CEPH_STAT_CAP_INODE);
-	req->r_locked_dir = dir;
-	err = ceph_mdsc_do_request(mdsc, NULL, req);
-	err = ceph_handle_snapdir(req, dentry, err);
-	dentry = ceph_finish_lookup(req, dentry, err);
-	ceph_mdsc_put_request(req);  /* will dput(dentry) */
-	dout("lookup result=%p\n", dentry);
-	return dentry;
-}
-
-/*
- * If we do a create but get no trace back from the MDS, follow up with
- * a lookup (the VFS expects us to link up the provided dentry).
- */
-int ceph_handle_notrace_create(struct inode *dir, struct dentry *dentry)
-{
-	struct dentry *result = ceph_lookup(dir, dentry, NULL);
-
-	if (result && !IS_ERR(result)) {
-		/*
-		 * We created the item, then did a lookup, and found
-		 * it was already linked to another inode we already
-		 * had in our cache (and thus got spliced).  Link our
-		 * dentry to that inode, but don't hash it, just in
-		 * case the VFS wants to dereference it.
-		 */
-		BUG_ON(!result->d_inode);
-		d_instantiate(dentry, result->d_inode);
-		return 0;
-	}
-	return PTR_ERR(result);
-}
-
-static int ceph_mknod(struct inode *dir, struct dentry *dentry,
-		      umode_t mode, dev_t rdev)
-{
-	struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb);
-	struct ceph_mds_client *mdsc = fsc->mdsc;
-	struct ceph_mds_request *req;
-	int err;
-
-	if (ceph_snap(dir) != CEPH_NOSNAP)
-		return -EROFS;
-
-	dout("mknod in dir %p dentry %p mode 0%ho rdev %d\n",
-	     dir, dentry, mode, rdev);
-	req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_MKNOD, USE_AUTH_MDS);
-	if (IS_ERR(req)) {
-		d_drop(dentry);
-		return PTR_ERR(req);
-	}
-	req->r_dentry = dget(dentry);
-	req->r_num_caps = 2;
-	req->r_locked_dir = dir;
-	req->r_args.mknod.mode = cpu_to_le32(mode);
-	req->r_args.mknod.rdev = cpu_to_le32(rdev);
-	req->r_dentry_drop = CEPH_CAP_FILE_SHARED;
-	req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
-	err = ceph_mdsc_do_request(mdsc, dir, req);
-	if (!err && !req->r_reply_info.head->is_dentry)
-		err = ceph_handle_notrace_create(dir, dentry);
-	ceph_mdsc_put_request(req);
-	if (err)
-		d_drop(dentry);
-	return err;
-}
-
-static int ceph_create(struct inode *dir, struct dentry *dentry, umode_t mode,
-		       struct nameidata *nd)
-{
-	dout("create in dir %p dentry %p name '%.*s'\n",
-	     dir, dentry, dentry->d_name.len, dentry->d_name.name);
-
-	if (ceph_snap(dir) != CEPH_NOSNAP)
-		return -EROFS;
-
-	if (nd) {
-		BUG_ON((nd->flags & LOOKUP_OPEN) == 0);
-		dentry = ceph_lookup_open(dir, dentry, nd, mode, 0);
-		/* hrm, what should i do here if we get aliased? */
-		if (IS_ERR(dentry))
-			return PTR_ERR(dentry);
-		return 0;
-	}
-
-	/* fall back to mknod */
-	return ceph_mknod(dir, dentry, (mode & ~S_IFMT) | S_IFREG, 0);
-}
-
-static int ceph_symlink(struct inode *dir, struct dentry *dentry,
-			    const char *dest)
-{
-	struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb);
-	struct ceph_mds_client *mdsc = fsc->mdsc;
-	struct ceph_mds_request *req;
-	int err;
-
-	if (ceph_snap(dir) != CEPH_NOSNAP)
-		return -EROFS;
-
-	dout("symlink in dir %p dentry %p to '%s'\n", dir, dentry, dest);
-	req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_SYMLINK, USE_AUTH_MDS);
-	if (IS_ERR(req)) {
-		d_drop(dentry);
-		return PTR_ERR(req);
-	}
-	req->r_dentry = dget(dentry);
-	req->r_num_caps = 2;
-	req->r_path2 = kstrdup(dest, GFP_NOFS);
-	req->r_locked_dir = dir;
-	req->r_dentry_drop = CEPH_CAP_FILE_SHARED;
-	req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
-	err = ceph_mdsc_do_request(mdsc, dir, req);
-	if (!err && !req->r_reply_info.head->is_dentry)
-		err = ceph_handle_notrace_create(dir, dentry);
-	ceph_mdsc_put_request(req);
-	if (err)
-		d_drop(dentry);
-	return err;
-}
-
-static int ceph_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
-{
-	struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb);
-	struct ceph_mds_client *mdsc = fsc->mdsc;
-	struct ceph_mds_request *req;
-	int err = -EROFS;
-	int op;
-
-	if (ceph_snap(dir) == CEPH_SNAPDIR) {
-		/* mkdir .snap/foo is a MKSNAP */
-		op = CEPH_MDS_OP_MKSNAP;
-		dout("mksnap dir %p snap '%.*s' dn %p\n", dir,
-		     dentry->d_name.len, dentry->d_name.name, dentry);
-	} else if (ceph_snap(dir) == CEPH_NOSNAP) {
-		dout("mkdir dir %p dn %p mode 0%ho\n", dir, dentry, mode);
-		op = CEPH_MDS_OP_MKDIR;
-	} else {
-		goto out;
-	}
-	req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS);
-	if (IS_ERR(req)) {
-		err = PTR_ERR(req);
-		goto out;
-	}
-
-	req->r_dentry = dget(dentry);
-	req->r_num_caps = 2;
-	req->r_locked_dir = dir;
-	req->r_args.mkdir.mode = cpu_to_le32(mode);
-	req->r_dentry_drop = CEPH_CAP_FILE_SHARED;
-	req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
-	err = ceph_mdsc_do_request(mdsc, dir, req);
-	if (!err && !req->r_reply_info.head->is_dentry)
-		err = ceph_handle_notrace_create(dir, dentry);
-	ceph_mdsc_put_request(req);
-out:
-	if (err < 0)
-		d_drop(dentry);
-	return err;
-}
-
-static int ceph_link(struct dentry *old_dentry, struct inode *dir,
-		     struct dentry *dentry)
-{
-	struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb);
-	struct ceph_mds_client *mdsc = fsc->mdsc;
-	struct ceph_mds_request *req;
-	int err;
-
-	if (ceph_snap(dir) != CEPH_NOSNAP)
-		return -EROFS;
-
-	dout("link in dir %p old_dentry %p dentry %p\n", dir,
-	     old_dentry, dentry);
-	req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LINK, USE_AUTH_MDS);
-	if (IS_ERR(req)) {
-		d_drop(dentry);
-		return PTR_ERR(req);
-	}
-	req->r_dentry = dget(dentry);
-	req->r_num_caps = 2;
-	req->r_old_dentry = dget(old_dentry); /* or inode? hrm. */
-	req->r_old_dentry_dir = ceph_get_dentry_parent_inode(old_dentry);
-	req->r_locked_dir = dir;
-	req->r_dentry_drop = CEPH_CAP_FILE_SHARED;
-	req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
-	err = ceph_mdsc_do_request(mdsc, dir, req);
-	if (err) {
-		d_drop(dentry);
-	} else if (!req->r_reply_info.head->is_dentry) {
-		ihold(old_dentry->d_inode);
-		d_instantiate(dentry, old_dentry->d_inode);
-	}
-	ceph_mdsc_put_request(req);
-	return err;
-}
-
-/*
- * For a soon-to-be unlinked file, drop the AUTH_RDCACHE caps.  If it
- * looks like the link count will hit 0, drop any other caps (other
- * than PIN) we don't specifically want (due to the file still being
- * open).
- */
-static int drop_caps_for_unlink(struct inode *inode)
-{
-	struct ceph_inode_info *ci = ceph_inode(inode);
-	int drop = CEPH_CAP_LINK_SHARED | CEPH_CAP_LINK_EXCL;
-
-	spin_lock(&ci->i_ceph_lock);
-	if (inode->i_nlink == 1) {
-		drop |= ~(__ceph_caps_wanted(ci) | CEPH_CAP_PIN);
-		ci->i_ceph_flags |= CEPH_I_NODELAY;
-	}
-	spin_unlock(&ci->i_ceph_lock);
-	return drop;
-}
-
-/*
- * rmdir and unlink are differ only by the metadata op code
- */
-static int ceph_unlink(struct inode *dir, struct dentry *dentry)
-{
-	struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb);
-	struct ceph_mds_client *mdsc = fsc->mdsc;
-	struct inode *inode = dentry->d_inode;
-	struct ceph_mds_request *req;
-	int err = -EROFS;
-	int op;
-
-	if (ceph_snap(dir) == CEPH_SNAPDIR) {
-		/* rmdir .snap/foo is RMSNAP */
-		dout("rmsnap dir %p '%.*s' dn %p\n", dir, dentry->d_name.len,
-		     dentry->d_name.name, dentry);
-		op = CEPH_MDS_OP_RMSNAP;
-	} else if (ceph_snap(dir) == CEPH_NOSNAP) {
-		dout("unlink/rmdir dir %p dn %p inode %p\n",
-		     dir, dentry, inode);
-		op = S_ISDIR(dentry->d_inode->i_mode) ?
-			CEPH_MDS_OP_RMDIR : CEPH_MDS_OP_UNLINK;
-	} else
-		goto out;
-	req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS);
-	if (IS_ERR(req)) {
-		err = PTR_ERR(req);
-		goto out;
-	}
-	req->r_dentry = dget(dentry);
-	req->r_num_caps = 2;
-	req->r_locked_dir = dir;
-	req->r_dentry_drop = CEPH_CAP_FILE_SHARED;
-	req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
-	req->r_inode_drop = drop_caps_for_unlink(inode);
-	err = ceph_mdsc_do_request(mdsc, dir, req);
-	if (!err && !req->r_reply_info.head->is_dentry)
-		d_delete(dentry);
-	ceph_mdsc_put_request(req);
-out:
-	return err;
-}
-
-static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry,
-		       struct inode *new_dir, struct dentry *new_dentry)
-{
-	struct ceph_fs_client *fsc = ceph_sb_to_client(old_dir->i_sb);
-	struct ceph_mds_client *mdsc = fsc->mdsc;
-	struct ceph_mds_request *req;
-	int err;
-
-	if (ceph_snap(old_dir) != ceph_snap(new_dir))
-		return -EXDEV;
-	if (ceph_snap(old_dir) != CEPH_NOSNAP ||
-	    ceph_snap(new_dir) != CEPH_NOSNAP)
-		return -EROFS;
-	dout("rename dir %p dentry %p to dir %p dentry %p\n",
-	     old_dir, old_dentry, new_dir, new_dentry);
-	req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_RENAME, USE_AUTH_MDS);
-	if (IS_ERR(req))
-		return PTR_ERR(req);
-	req->r_dentry = dget(new_dentry);
-	req->r_num_caps = 2;
-	req->r_old_dentry = dget(old_dentry);
-	req->r_old_dentry_dir = ceph_get_dentry_parent_inode(old_dentry);
-	req->r_locked_dir = new_dir;
-	req->r_old_dentry_drop = CEPH_CAP_FILE_SHARED;
-	req->r_old_dentry_unless = CEPH_CAP_FILE_EXCL;
-	req->r_dentry_drop = CEPH_CAP_FILE_SHARED;
-	req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
-	/* release LINK_RDCACHE on source inode (mds will lock it) */
-	req->r_old_inode_drop = CEPH_CAP_LINK_SHARED;
-	if (new_dentry->d_inode)
-		req->r_inode_drop = drop_caps_for_unlink(new_dentry->d_inode);
-	err = ceph_mdsc_do_request(mdsc, old_dir, req);
-	if (!err && !req->r_reply_info.head->is_dentry) {
-		/*
-		 * Normally d_move() is done by fill_trace (called by
-		 * do_request, above).  If there is no trace, we need
-		 * to do it here.
-		 */
-
-		/* d_move screws up d_subdirs order */
-		ceph_dir_clear_complete(new_dir);
-
-		d_move(old_dentry, new_dentry);
-
-		/* ensure target dentry is invalidated, despite
-		   rehashing bug in vfs_rename_dir */
-		ceph_invalidate_dentry_lease(new_dentry);
-	}
-	ceph_mdsc_put_request(req);
-	return err;
-}
-
-/*
- * Ensure a dentry lease will no longer revalidate.
- */
-void ceph_invalidate_dentry_lease(struct dentry *dentry)
-{
-	spin_lock(&dentry->d_lock);
-	dentry->d_time = jiffies;
-	ceph_dentry(dentry)->lease_shared_gen = 0;
-	spin_unlock(&dentry->d_lock);
-}
-
-/*
- * Check if dentry lease is valid.  If not, delete the lease.  Try to
- * renew if the least is more than half up.
- */
-static int dentry_lease_is_valid(struct dentry *dentry)
-{
-	struct ceph_dentry_info *di;
-	struct ceph_mds_session *s;
-	int valid = 0;
-	u32 gen;
-	unsigned long ttl;
-	struct ceph_mds_session *session = NULL;
-	struct inode *dir = NULL;
-	u32 seq = 0;
-
-	spin_lock(&dentry->d_lock);
-	di = ceph_dentry(dentry);
-	if (di->lease_session) {
-		s = di->lease_session;
-		spin_lock(&s->s_gen_ttl_lock);
-		gen = s->s_cap_gen;
-		ttl = s->s_cap_ttl;
-		spin_unlock(&s->s_gen_ttl_lock);
-
-		if (di->lease_gen == gen &&
-		    time_before(jiffies, dentry->d_time) &&
-		    time_before(jiffies, ttl)) {
-			valid = 1;
-			if (di->lease_renew_after &&
-			    time_after(jiffies, di->lease_renew_after)) {
-				/* we should renew */
-				dir = dentry->d_parent->d_inode;
-				session = ceph_get_mds_session(s);
-				seq = di->lease_seq;
-				di->lease_renew_after = 0;
-				di->lease_renew_from = jiffies;
-			}
-		}
-	}
-	spin_unlock(&dentry->d_lock);
-
-	if (session) {
-		ceph_mdsc_lease_send_msg(session, dir, dentry,
-					 CEPH_MDS_LEASE_RENEW, seq);
-		ceph_put_mds_session(session);
-	}
-	dout("dentry_lease_is_valid - dentry %p = %d\n", dentry, valid);
-	return valid;
-}
-
-/*
- * Check if directory-wide content lease/cap is valid.
- */
-static int dir_lease_is_valid(struct inode *dir, struct dentry *dentry)
-{
-	struct ceph_inode_info *ci = ceph_inode(dir);
-	struct ceph_dentry_info *di = ceph_dentry(dentry);
-	int valid = 0;
-
-	spin_lock(&ci->i_ceph_lock);
-	if (ci->i_shared_gen == di->lease_shared_gen)
-		valid = __ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1);
-	spin_unlock(&ci->i_ceph_lock);
-	dout("dir_lease_is_valid dir %p v%u dentry %p v%u = %d\n",
-	     dir, (unsigned)ci->i_shared_gen, dentry,
-	     (unsigned)di->lease_shared_gen, valid);
-	return valid;
-}
-
-/*
- * Check if cached dentry can be trusted.
- */
-static int ceph_d_revalidate(struct dentry *dentry, struct nameidata *nd)
-{
-	int valid = 0;
-	struct inode *dir;
-
-	if (nd && nd->flags & LOOKUP_RCU)
-		return -ECHILD;
-
-	dout("d_revalidate %p '%.*s' inode %p offset %lld\n", dentry,
-	     dentry->d_name.len, dentry->d_name.name, dentry->d_inode,
-	     ceph_dentry(dentry)->offset);
-
-	dir = ceph_get_dentry_parent_inode(dentry);
-
-	/* always trust cached snapped dentries, snapdir dentry */
-	if (ceph_snap(dir) != CEPH_NOSNAP) {
-		dout("d_revalidate %p '%.*s' inode %p is SNAPPED\n", dentry,
-		     dentry->d_name.len, dentry->d_name.name, dentry->d_inode);
-		valid = 1;
-	} else if (dentry->d_inode &&
-		   ceph_snap(dentry->d_inode) == CEPH_SNAPDIR) {
-		valid = 1;
-	} else if (dentry_lease_is_valid(dentry) ||
-		   dir_lease_is_valid(dir, dentry)) {
-		valid = 1;
-	}
-
-	dout("d_revalidate %p %s\n", dentry, valid ? "valid" : "invalid");
-	if (valid)
-		ceph_dentry_lru_touch(dentry);
-	else
-		d_drop(dentry);
-	iput(dir);
-	return valid;
-}
-
-/*
- * Release our ceph_dentry_info.
- */
-static void ceph_d_release(struct dentry *dentry)
-{
-	struct ceph_dentry_info *di = ceph_dentry(dentry);
-
-	dout("d_release %p\n", dentry);
-	ceph_dentry_lru_del(dentry);
-	if (di->lease_session)
-		ceph_put_mds_session(di->lease_session);
-	kmem_cache_free(ceph_dentry_cachep, di);
-	dentry->d_fsdata = NULL;
-}
-
-static int ceph_snapdir_d_revalidate(struct dentry *dentry,
-					  struct nameidata *nd)
-{
-	/*
-	 * Eventually, we'll want to revalidate snapped metadata
-	 * too... probably...
-	 */
-	return 1;
-}
-
-/*
- * Set/clear/test dir complete flag on the dir's dentry.
- */
-void ceph_dir_set_complete(struct inode *inode)
-{
-	struct dentry *dentry = d_find_any_alias(inode);
-	
-	if (dentry && ceph_dentry(dentry) &&
-	    ceph_test_mount_opt(ceph_sb_to_client(dentry->d_sb), DCACHE)) {
-		dout(" marking %p (%p) complete\n", inode, dentry);
-		set_bit(CEPH_D_COMPLETE, &ceph_dentry(dentry)->flags);
-	}
-	dput(dentry);
-}
-
-void ceph_dir_clear_complete(struct inode *inode)
-{
-	struct dentry *dentry = d_find_any_alias(inode);
-
-	if (dentry && ceph_dentry(dentry)) {
-		dout(" marking %p (%p) complete\n", inode, dentry);
-		set_bit(CEPH_D_COMPLETE, &ceph_dentry(dentry)->flags);
-	}
-	dput(dentry);
-}
-
-bool ceph_dir_test_complete(struct inode *inode)
-{
-	struct dentry *dentry = d_find_any_alias(inode);
-
-	if (dentry && ceph_dentry(dentry)) {
-		dout(" marking %p (%p) NOT complete\n", inode, dentry);
-		clear_bit(CEPH_D_COMPLETE, &ceph_dentry(dentry)->flags);
-	}
-	dput(dentry);
-	return false;
-}
-
-/*
- * When the VFS prunes a dentry from the cache, we need to clear the
- * complete flag on the parent directory.
- *
- * Called under dentry->d_lock.
- */
-static void ceph_d_prune(struct dentry *dentry)
-{
-	struct ceph_dentry_info *di;
-
-	dout("ceph_d_prune %p\n", dentry);
-
-	/* do we have a valid parent? */
-	if (!dentry->d_parent || IS_ROOT(dentry))
-		return;
-
-	/* if we are not hashed, we don't affect D_COMPLETE */
-	if (d_unhashed(dentry))
-		return;
-
-	/*
-	 * we hold d_lock, so d_parent is stable, and d_fsdata is never
-	 * cleared until d_release
-	 */
-	di = ceph_dentry(dentry->d_parent);
-	clear_bit(CEPH_D_COMPLETE, &di->flags);
-}
-
-/*
- * read() on a dir.  This weird interface hack only works if mounted
- * with '-o dirstat'.
- */
-static ssize_t ceph_read_dir(struct file *file, char __user *buf, size_t size,
-			     loff_t *ppos)
-{
-	struct ceph_file_info *cf = file->private_data;
-	struct inode *inode = file->f_dentry->d_inode;
-	struct ceph_inode_info *ci = ceph_inode(inode);
-	int left;
-	const int bufsize = 1024;
-
-	if (!ceph_test_mount_opt(ceph_sb_to_client(inode->i_sb), DIRSTAT))
-		return -EISDIR;
-
-	if (!cf->dir_info) {
-		cf->dir_info = kmalloc(bufsize, GFP_NOFS);
-		if (!cf->dir_info)
-			return -ENOMEM;
-		cf->dir_info_len =
-			snprintf(cf->dir_info, bufsize,
-				"entries:   %20lld\n"
-				" files:    %20lld\n"
-				" subdirs:  %20lld\n"
-				"rentries:  %20lld\n"
-				" rfiles:   %20lld\n"
-				" rsubdirs: %20lld\n"
-				"rbytes:    %20lld\n"
-				"rctime:    %10ld.%09ld\n",
-				ci->i_files + ci->i_subdirs,
-				ci->i_files,
-				ci->i_subdirs,
-				ci->i_rfiles + ci->i_rsubdirs,
-				ci->i_rfiles,
-				ci->i_rsubdirs,
-				ci->i_rbytes,
-				(long)ci->i_rctime.tv_sec,
-				(long)ci->i_rctime.tv_nsec);
-	}
-
-	if (*ppos >= cf->dir_info_len)
-		return 0;
-	size = min_t(unsigned, size, cf->dir_info_len-*ppos);
-	left = copy_to_user(buf, cf->dir_info + *ppos, size);
-	if (left == size)
-		return -EFAULT;
-	*ppos += (size - left);
-	return size - left;
-}
-
-/*
- * an fsync() on a dir will wait for any uncommitted directory
- * operations to commit.
- */
-static int ceph_dir_fsync(struct file *file, loff_t start, loff_t end,
-			  int datasync)
-{
-	struct inode *inode = file->f_path.dentry->d_inode;
-	struct ceph_inode_info *ci = ceph_inode(inode);
-	struct list_head *head = &ci->i_unsafe_dirops;
-	struct ceph_mds_request *req;
-	u64 last_tid;
-	int ret = 0;
-
-	dout("dir_fsync %p\n", inode);
-	ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
-	if (ret)
-		return ret;
-	mutex_lock(&inode->i_mutex);
-
-	spin_lock(&ci->i_unsafe_lock);
-	if (list_empty(head))
-		goto out;
-
-	req = list_entry(head->prev,
-			 struct ceph_mds_request, r_unsafe_dir_item);
-	last_tid = req->r_tid;
-
-	do {
-		ceph_mdsc_get_request(req);
-		spin_unlock(&ci->i_unsafe_lock);
-
-		dout("dir_fsync %p wait on tid %llu (until %llu)\n",
-		     inode, req->r_tid, last_tid);
-		if (req->r_timeout) {
-			ret = wait_for_completion_timeout(
-				&req->r_safe_completion, req->r_timeout);
-			if (ret > 0)
-				ret = 0;
-			else if (ret == 0)
-				ret = -EIO;  /* timed out */
-		} else {
-			wait_for_completion(&req->r_safe_completion);
-		}
-		ceph_mdsc_put_request(req);
-
-		spin_lock(&ci->i_unsafe_lock);
-		if (ret || list_empty(head))
-			break;
-		req = list_entry(head->next,
-				 struct ceph_mds_request, r_unsafe_dir_item);
-	} while (req->r_tid < last_tid);
-out:
-	spin_unlock(&ci->i_unsafe_lock);
-	mutex_unlock(&inode->i_mutex);
-
-	return ret;
-}
-
-/*
- * We maintain a private dentry LRU.
- *
- * FIXME: this needs to be changed to a per-mds lru to be useful.
- */
-void ceph_dentry_lru_add(struct dentry *dn)
-{
-	struct ceph_dentry_info *di = ceph_dentry(dn);
-	struct ceph_mds_client *mdsc;
-
-	dout("dentry_lru_add %p %p '%.*s'\n", di, dn,
-	     dn->d_name.len, dn->d_name.name);
-	mdsc = ceph_sb_to_client(dn->d_sb)->mdsc;
-	spin_lock(&mdsc->dentry_lru_lock);
-	list_add_tail(&di->lru, &mdsc->dentry_lru);
-	mdsc->num_dentry++;
-	spin_unlock(&mdsc->dentry_lru_lock);
-}
-
-void ceph_dentry_lru_touch(struct dentry *dn)
-{
-	struct ceph_dentry_info *di = ceph_dentry(dn);
-	struct ceph_mds_client *mdsc;
-
-	dout("dentry_lru_touch %p %p '%.*s' (offset %lld)\n", di, dn,
-	     dn->d_name.len, dn->d_name.name, di->offset);
-	mdsc = ceph_sb_to_client(dn->d_sb)->mdsc;
-	spin_lock(&mdsc->dentry_lru_lock);
-	list_move_tail(&di->lru, &mdsc->dentry_lru);
-	spin_unlock(&mdsc->dentry_lru_lock);
-}
-
-void ceph_dentry_lru_del(struct dentry *dn)
-{
-	struct ceph_dentry_info *di = ceph_dentry(dn);
-	struct ceph_mds_client *mdsc;
-
-	dout("dentry_lru_del %p %p '%.*s'\n", di, dn,
-	     dn->d_name.len, dn->d_name.name);
-	mdsc = ceph_sb_to_client(dn->d_sb)->mdsc;
-	spin_lock(&mdsc->dentry_lru_lock);
-	list_del_init(&di->lru);
-	mdsc->num_dentry--;
-	spin_unlock(&mdsc->dentry_lru_lock);
-}
-
-/*
- * Return name hash for a given dentry.  This is dependent on
- * the parent directory's hash function.
- */
-unsigned ceph_dentry_hash(struct inode *dir, struct dentry *dn)
-{
-	struct ceph_inode_info *dci = ceph_inode(dir);
-
-	switch (dci->i_dir_layout.dl_dir_hash) {
-	case 0:	/* for backward compat */
-	case CEPH_STR_HASH_LINUX:
-		return dn->d_name.hash;
-
-	default:
-		return ceph_str_hash(dci->i_dir_layout.dl_dir_hash,
-				     dn->d_name.name, dn->d_name.len);
-	}
-}
-
-const struct file_operations ceph_dir_fops = {
-	.read = ceph_read_dir,
-	.readdir = ceph_readdir,
-	.llseek = ceph_dir_llseek,
-	.open = ceph_open,
-	.release = ceph_release,
-	.unlocked_ioctl = ceph_ioctl,
-	.fsync = ceph_dir_fsync,
-};
-
-const struct inode_operations ceph_dir_iops = {
-	.lookup = ceph_lookup,
-	.permission = ceph_permission,
-	.getattr = ceph_getattr,
-	.setattr = ceph_setattr,
-	.setxattr = ceph_setxattr,
-	.getxattr = ceph_getxattr,
-	.listxattr = ceph_listxattr,
-	.removexattr = ceph_removexattr,
-	.mknod = ceph_mknod,
-	.symlink = ceph_symlink,
-	.mkdir = ceph_mkdir,
-	.link = ceph_link,
-	.unlink = ceph_unlink,
-	.rmdir = ceph_unlink,
-	.rename = ceph_rename,
-	.create = ceph_create,
-};
-
-const struct dentry_operations ceph_dentry_ops = {
-	.d_revalidate = ceph_d_revalidate,
-	.d_release = ceph_d_release,
-	.d_prune = ceph_d_prune,
-};
-
-const struct dentry_operations ceph_snapdir_dentry_ops = {
-	.d_revalidate = ceph_snapdir_d_revalidate,
-	.d_release = ceph_d_release,
-};
-
-const struct dentry_operations ceph_snap_dentry_ops = {
-	.d_release = ceph_d_release,
-	.d_prune = ceph_d_prune,
-};
diff --git a/ANDROID_3.4.5/fs/ceph/export.c b/ANDROID_3.4.5/fs/ceph/export.c
deleted file mode 100644
index fbb2a643..00000000
--- a/ANDROID_3.4.5/fs/ceph/export.c
+++ /dev/null
@@ -1,253 +0,0 @@
-#include <linux/ceph/ceph_debug.h>
-
-#include <linux/exportfs.h>
-#include <linux/slab.h>
-#include <asm/unaligned.h>
-
-#include "super.h"
-#include "mds_client.h"
-
-/*
- * NFS export support
- *
- * NFS re-export of a ceph mount is, at present, only semireliable.
- * The basic issue is that the Ceph architectures doesn't lend itself
- * well to generating filehandles that will remain valid forever.
- *
- * So, we do our best.  If you're lucky, your inode will be in the
- * client's cache.  If it's not, and you have a connectable fh, then
- * the MDS server may be able to find it for you.  Otherwise, you get
- * ESTALE.
- *
- * There are ways to this more reliable, but in the non-connectable fh
- * case, we won't every work perfectly, and in the connectable case,
- * some changes are needed on the MDS side to work better.
- */
-
-/*
- * Basic fh
- */
-struct ceph_nfs_fh {
-	u64 ino;
-} __attribute__ ((packed));
-
-/*
- * Larger 'connectable' fh that includes parent ino and name hash.
- * Use this whenever possible, as it works more reliably.
- */
-struct ceph_nfs_confh {
-	u64 ino, parent_ino;
-	u32 parent_name_hash;
-} __attribute__ ((packed));
-
-static int ceph_encode_fh(struct dentry *dentry, u32 *rawfh, int *max_len,
-			  int connectable)
-{
-	int type;
-	struct ceph_nfs_fh *fh = (void *)rawfh;
-	struct ceph_nfs_confh *cfh = (void *)rawfh;
-	struct dentry *parent;
-	struct inode *inode = dentry->d_inode;
-	int connected_handle_length = sizeof(*cfh)/4;
-	int handle_length = sizeof(*fh)/4;
-
-	/* don't re-export snaps */
-	if (ceph_snap(inode) != CEPH_NOSNAP)
-		return -EINVAL;
-
-	spin_lock(&dentry->d_lock);
-	parent = dentry->d_parent;
-	if (*max_len >= connected_handle_length) {
-		dout("encode_fh %p connectable\n", dentry);
-		cfh->ino = ceph_ino(dentry->d_inode);
-		cfh->parent_ino = ceph_ino(parent->d_inode);
-		cfh->parent_name_hash = ceph_dentry_hash(parent->d_inode,
-							 dentry);
-		*max_len = connected_handle_length;
-		type = 2;
-	} else if (*max_len >= handle_length) {
-		if (connectable) {
-			*max_len = connected_handle_length;
-			type = 255;
-		} else {
-			dout("encode_fh %p\n", dentry);
-			fh->ino = ceph_ino(dentry->d_inode);
-			*max_len = handle_length;
-			type = 1;
-		}
-	} else {
-		*max_len = handle_length;
-		type = 255;
-	}
-	spin_unlock(&dentry->d_lock);
-	return type;
-}
-
-/*
- * convert regular fh to dentry
- *
- * FIXME: we should try harder by querying the mds for the ino.
- */
-static struct dentry *__fh_to_dentry(struct super_block *sb,
-				     struct ceph_nfs_fh *fh)
-{
-	struct ceph_mds_client *mdsc = ceph_sb_to_client(sb)->mdsc;
-	struct inode *inode;
-	struct dentry *dentry;
-	struct ceph_vino vino;
-	int err;
-
-	dout("__fh_to_dentry %llx\n", fh->ino);
-	vino.ino = fh->ino;
-	vino.snap = CEPH_NOSNAP;
-	inode = ceph_find_inode(sb, vino);
-	if (!inode) {
-		struct ceph_mds_request *req;
-
-		req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LOOKUPINO,
-					       USE_ANY_MDS);
-		if (IS_ERR(req))
-			return ERR_CAST(req);
-
-		req->r_ino1 = vino;
-		req->r_num_caps = 1;
-		err = ceph_mdsc_do_request(mdsc, NULL, req);
-		inode = req->r_target_inode;
-		if (inode)
-			ihold(inode);
-		ceph_mdsc_put_request(req);
-		if (!inode)
-			return ERR_PTR(-ESTALE);
-	}
-
-	dentry = d_obtain_alias(inode);
-	if (IS_ERR(dentry)) {
-		pr_err("fh_to_dentry %llx -- inode %p but ENOMEM\n",
-		       fh->ino, inode);
-		iput(inode);
-		return dentry;
-	}
-	err = ceph_init_dentry(dentry);
-	if (err < 0) {
-		iput(inode);
-		return ERR_PTR(err);
-	}
-	dout("__fh_to_dentry %llx %p dentry %p\n", fh->ino, inode, dentry);
-	return dentry;
-}
-
-/*
- * convert connectable fh to dentry
- */
-static struct dentry *__cfh_to_dentry(struct super_block *sb,
-				      struct ceph_nfs_confh *cfh)
-{
-	struct ceph_mds_client *mdsc = ceph_sb_to_client(sb)->mdsc;
-	struct inode *inode;
-	struct dentry *dentry;
-	struct ceph_vino vino;
-	int err;
-
-	dout("__cfh_to_dentry %llx (%llx/%x)\n",
-	     cfh->ino, cfh->parent_ino, cfh->parent_name_hash);
-
-	vino.ino = cfh->ino;
-	vino.snap = CEPH_NOSNAP;
-	inode = ceph_find_inode(sb, vino);
-	if (!inode) {
-		struct ceph_mds_request *req;
-
-		req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LOOKUPHASH,
-					       USE_ANY_MDS);
-		if (IS_ERR(req))
-			return ERR_CAST(req);
-
-		req->r_ino1 = vino;
-		req->r_ino2.ino = cfh->parent_ino;
-		req->r_ino2.snap = CEPH_NOSNAP;
-		req->r_path2 = kmalloc(16, GFP_NOFS);
-		snprintf(req->r_path2, 16, "%d", cfh->parent_name_hash);
-		req->r_num_caps = 1;
-		err = ceph_mdsc_do_request(mdsc, NULL, req);
-		inode = req->r_target_inode;
-		if (inode)
-			ihold(inode);
-		ceph_mdsc_put_request(req);
-		if (!inode)
-			return ERR_PTR(err ? err : -ESTALE);
-	}
-
-	dentry = d_obtain_alias(inode);
-	if (IS_ERR(dentry)) {
-		pr_err("cfh_to_dentry %llx -- inode %p but ENOMEM\n",
-		       cfh->ino, inode);
-		iput(inode);
-		return dentry;
-	}
-	err = ceph_init_dentry(dentry);
-	if (err < 0) {
-		iput(inode);
-		return ERR_PTR(err);
-	}
-	dout("__cfh_to_dentry %llx %p dentry %p\n", cfh->ino, inode, dentry);
-	return dentry;
-}
-
-static struct dentry *ceph_fh_to_dentry(struct super_block *sb, struct fid *fid,
-					int fh_len, int fh_type)
-{
-	if (fh_type == 1)
-		return __fh_to_dentry(sb, (struct ceph_nfs_fh *)fid->raw);
-	else
-		return __cfh_to_dentry(sb, (struct ceph_nfs_confh *)fid->raw);
-}
-
-/*
- * get parent, if possible.
- *
- * FIXME: we could do better by querying the mds to discover the
- * parent.
- */
-static struct dentry *ceph_fh_to_parent(struct super_block *sb,
-					 struct fid *fid,
-					int fh_len, int fh_type)
-{
-	struct ceph_nfs_confh *cfh = (void *)fid->raw;
-	struct ceph_vino vino;
-	struct inode *inode;
-	struct dentry *dentry;
-	int err;
-
-	if (fh_type == 1)
-		return ERR_PTR(-ESTALE);
-
-	pr_debug("fh_to_parent %llx/%d\n", cfh->parent_ino,
-		 cfh->parent_name_hash);
-
-	vino.ino = cfh->ino;
-	vino.snap = CEPH_NOSNAP;
-	inode = ceph_find_inode(sb, vino);
-	if (!inode)
-		return ERR_PTR(-ESTALE);
-
-	dentry = d_obtain_alias(inode);
-	if (IS_ERR(dentry)) {
-		pr_err("fh_to_parent %llx -- inode %p but ENOMEM\n",
-		       cfh->ino, inode);
-		iput(inode);
-		return dentry;
-	}
-	err = ceph_init_dentry(dentry);
-	if (err < 0) {
-		iput(inode);
-		return ERR_PTR(err);
-	}
-	dout("fh_to_parent %llx %p dentry %p\n", cfh->ino, inode, dentry);
-	return dentry;
-}
-
-const struct export_operations ceph_export_ops = {
-	.encode_fh = ceph_encode_fh,
-	.fh_to_dentry = ceph_fh_to_dentry,
-	.fh_to_parent = ceph_fh_to_parent,
-};
diff --git a/ANDROID_3.4.5/fs/ceph/file.c b/ANDROID_3.4.5/fs/ceph/file.c
deleted file mode 100644
index ed72428d..00000000
--- a/ANDROID_3.4.5/fs/ceph/file.c
+++ /dev/null
@@ -1,874 +0,0 @@
-#include <linux/ceph/ceph_debug.h>
-
-#include <linux/module.h>
-#include <linux/sched.h>
-#include <linux/slab.h>
-#include <linux/file.h>
-#include <linux/namei.h>
-#include <linux/writeback.h>
-
-#include "super.h"
-#include "mds_client.h"
-
-/*
- * Ceph file operations
- *
- * Implement basic open/close functionality, and implement
- * read/write.
- *
- * We implement three modes of file I/O:
- *  - buffered uses the generic_file_aio_{read,write} helpers
- *
- *  - synchronous is used when there is multi-client read/write
- *    sharing, avoids the page cache, and synchronously waits for an
- *    ack from the OSD.
- *
- *  - direct io takes the variant of the sync path that references
- *    user pages directly.
- *
- * fsync() flushes and waits on dirty pages, but just queues metadata
- * for writeback: since the MDS can recover size and mtime there is no
- * need to wait for MDS acknowledgement.
- */
-
-
-/*
- * Prepare an open request.  Preallocate ceph_cap to avoid an
- * inopportune ENOMEM later.
- */
-static struct ceph_mds_request *
-prepare_open_request(struct super_block *sb, int flags, int create_mode)
-{
-	struct ceph_fs_client *fsc = ceph_sb_to_client(sb);
-	struct ceph_mds_client *mdsc = fsc->mdsc;
-	struct ceph_mds_request *req;
-	int want_auth = USE_ANY_MDS;
-	int op = (flags & O_CREAT) ? CEPH_MDS_OP_CREATE : CEPH_MDS_OP_OPEN;
-
-	if (flags & (O_WRONLY|O_RDWR|O_CREAT|O_TRUNC))
-		want_auth = USE_AUTH_MDS;
-
-	req = ceph_mdsc_create_request(mdsc, op, want_auth);
-	if (IS_ERR(req))
-		goto out;
-	req->r_fmode = ceph_flags_to_mode(flags);
-	req->r_args.open.flags = cpu_to_le32(flags);
-	req->r_args.open.mode = cpu_to_le32(create_mode);
-	req->r_args.open.preferred = cpu_to_le32(-1);
-out:
-	return req;
-}
-
-/*
- * initialize private struct file data.
- * if we fail, clean up by dropping fmode reference on the ceph_inode
- */
-static int ceph_init_file(struct inode *inode, struct file *file, int fmode)
-{
-	struct ceph_file_info *cf;
-	int ret = 0;
-
-	switch (inode->i_mode & S_IFMT) {
-	case S_IFREG:
-	case S_IFDIR:
-		dout("init_file %p %p 0%o (regular)\n", inode, file,
-		     inode->i_mode);
-		cf = kmem_cache_alloc(ceph_file_cachep, GFP_NOFS | __GFP_ZERO);
-		if (cf == NULL) {
-			ceph_put_fmode(ceph_inode(inode), fmode); /* clean up */
-			return -ENOMEM;
-		}
-		cf->fmode = fmode;
-		cf->next_offset = 2;
-		file->private_data = cf;
-		BUG_ON(inode->i_fop->release != ceph_release);
-		break;
-
-	case S_IFLNK:
-		dout("init_file %p %p 0%o (symlink)\n", inode, file,
-		     inode->i_mode);
-		ceph_put_fmode(ceph_inode(inode), fmode); /* clean up */
-		break;
-
-	default:
-		dout("init_file %p %p 0%o (special)\n", inode, file,
-		     inode->i_mode);
-		/*
-		 * we need to drop the open ref now, since we don't
-		 * have .release set to ceph_release.
-		 */
-		ceph_put_fmode(ceph_inode(inode), fmode); /* clean up */
-		BUG_ON(inode->i_fop->release == ceph_release);
-
-		/* call the proper open fop */
-		ret = inode->i_fop->open(inode, file);
-	}
-	return ret;
-}
-
-/*
- * If the filp already has private_data, that means the file was
- * already opened by intent during lookup, and we do nothing.
- *
- * If we already have the requisite capabilities, we can satisfy
- * the open request locally (no need to request new caps from the
- * MDS).  We do, however, need to inform the MDS (asynchronously)
- * if our wanted caps set expands.
- */
-int ceph_open(struct inode *inode, struct file *file)
-{
-	struct ceph_inode_info *ci = ceph_inode(inode);
-	struct ceph_fs_client *fsc = ceph_sb_to_client(inode->i_sb);
-	struct ceph_mds_client *mdsc = fsc->mdsc;
-	struct ceph_mds_request *req;
-	struct ceph_file_info *cf = file->private_data;
-	struct inode *parent_inode = NULL;
-	int err;
-	int flags, fmode, wanted;
-
-	if (cf) {
-		dout("open file %p is already opened\n", file);
-		return 0;
-	}
-
-	/* filter out O_CREAT|O_EXCL; vfs did that already.  yuck. */
-	flags = file->f_flags & ~(O_CREAT|O_EXCL);
-	if (S_ISDIR(inode->i_mode))
-		flags = O_DIRECTORY;  /* mds likes to know */
-
-	dout("open inode %p ino %llx.%llx file %p flags %d (%d)\n", inode,
-	     ceph_vinop(inode), file, flags, file->f_flags);
-	fmode = ceph_flags_to_mode(flags);
-	wanted = ceph_caps_for_mode(fmode);
-
-	/* snapped files are read-only */
-	if (ceph_snap(inode) != CEPH_NOSNAP && (file->f_mode & FMODE_WRITE))
-		return -EROFS;
-
-	/* trivially open snapdir */
-	if (ceph_snap(inode) == CEPH_SNAPDIR) {
-		spin_lock(&ci->i_ceph_lock);
-		__ceph_get_fmode(ci, fmode);
-		spin_unlock(&ci->i_ceph_lock);
-		return ceph_init_file(inode, file, fmode);
-	}
-
-	/*
-	 * No need to block if we have caps on the auth MDS (for
-	 * write) or any MDS (for read).  Update wanted set
-	 * asynchronously.
-	 */
-	spin_lock(&ci->i_ceph_lock);
-	if (__ceph_is_any_real_caps(ci) &&
-	    (((fmode & CEPH_FILE_MODE_WR) == 0) || ci->i_auth_cap)) {
-		int mds_wanted = __ceph_caps_mds_wanted(ci);
-		int issued = __ceph_caps_issued(ci, NULL);
-
-		dout("open %p fmode %d want %s issued %s using existing\n",
-		     inode, fmode, ceph_cap_string(wanted),
-		     ceph_cap_string(issued));
-		__ceph_get_fmode(ci, fmode);
-		spin_unlock(&ci->i_ceph_lock);
-
-		/* adjust wanted? */
-		if ((issued & wanted) != wanted &&
-		    (mds_wanted & wanted) != wanted &&
-		    ceph_snap(inode) != CEPH_SNAPDIR)
-			ceph_check_caps(ci, 0, NULL);
-
-		return ceph_init_file(inode, file, fmode);
-	} else if (ceph_snap(inode) != CEPH_NOSNAP &&
-		   (ci->i_snap_caps & wanted) == wanted) {
-		__ceph_get_fmode(ci, fmode);
-		spin_unlock(&ci->i_ceph_lock);
-		return ceph_init_file(inode, file, fmode);
-	}
-	spin_unlock(&ci->i_ceph_lock);
-
-	dout("open fmode %d wants %s\n", fmode, ceph_cap_string(wanted));
-	req = prepare_open_request(inode->i_sb, flags, 0);
-	if (IS_ERR(req)) {
-		err = PTR_ERR(req);
-		goto out;
-	}
-	req->r_inode = inode;
-	ihold(inode);
-	req->r_num_caps = 1;
-	if (flags & (O_CREAT|O_TRUNC))
-		parent_inode = ceph_get_dentry_parent_inode(file->f_dentry);
-	err = ceph_mdsc_do_request(mdsc, parent_inode, req);
-	iput(parent_inode);
-	if (!err)
-		err = ceph_init_file(inode, file, req->r_fmode);
-	ceph_mdsc_put_request(req);
-	dout("open result=%d on %llx.%llx\n", err, ceph_vinop(inode));
-out:
-	return err;
-}
-
-
-/*
- * Do a lookup + open with a single request.
- *
- * If this succeeds, but some subsequent check in the vfs
- * may_open() fails, the struct *file gets cleaned up (i.e.
- * ceph_release gets called).  So fear not!
- */
-/*
- * flags
- *  path_lookup_open   -> LOOKUP_OPEN
- *  path_lookup_create -> LOOKUP_OPEN|LOOKUP_CREATE
- */
-struct dentry *ceph_lookup_open(struct inode *dir, struct dentry *dentry,
-				struct nameidata *nd, int mode,
-				int locked_dir)
-{
-	struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb);
-	struct ceph_mds_client *mdsc = fsc->mdsc;
-	struct file *file;
-	struct ceph_mds_request *req;
-	struct dentry *ret;
-	int err;
-	int flags = nd->intent.open.flags;
-
-	dout("ceph_lookup_open dentry %p '%.*s' flags %d mode 0%o\n",
-	     dentry, dentry->d_name.len, dentry->d_name.name, flags, mode);
-
-	/* do the open */
-	req = prepare_open_request(dir->i_sb, flags, mode);
-	if (IS_ERR(req))
-		return ERR_CAST(req);
-	req->r_dentry = dget(dentry);
-	req->r_num_caps = 2;
-	if (flags & O_CREAT) {
-		req->r_dentry_drop = CEPH_CAP_FILE_SHARED;
-		req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
-	}
-	req->r_locked_dir = dir;           /* caller holds dir->i_mutex */
-	err = ceph_mdsc_do_request(mdsc,
-				   (flags & (O_CREAT|O_TRUNC)) ? dir : NULL,
-				   req);
-	err = ceph_handle_snapdir(req, dentry, err);
-	if (err)
-		goto out;
-	if ((flags & O_CREAT) && !req->r_reply_info.head->is_dentry)
-		err = ceph_handle_notrace_create(dir, dentry);
-	if (err)
-		goto out;
-	file = lookup_instantiate_filp(nd, req->r_dentry, ceph_open);
-	if (IS_ERR(file))
-		err = PTR_ERR(file);
-out:
-	ret = ceph_finish_lookup(req, dentry, err);
-	ceph_mdsc_put_request(req);
-	dout("ceph_lookup_open result=%p\n", ret);
-	return ret;
-}
-
-int ceph_release(struct inode *inode, struct file *file)
-{
-	struct ceph_inode_info *ci = ceph_inode(inode);
-	struct ceph_file_info *cf = file->private_data;
-
-	dout("release inode %p file %p\n", inode, file);
-	ceph_put_fmode(ci, cf->fmode);
-	if (cf->last_readdir)
-		ceph_mdsc_put_request(cf->last_readdir);
-	kfree(cf->last_name);
-	kfree(cf->dir_info);
-	dput(cf->dentry);
-	kmem_cache_free(ceph_file_cachep, cf);
-
-	/* wake up anyone waiting for caps on this inode */
-	wake_up_all(&ci->i_cap_wq);
-	return 0;
-}
-
-/*
- * Read a range of bytes striped over one or more objects.  Iterate over
- * objects we stripe over.  (That's not atomic, but good enough for now.)
- *
- * If we get a short result from the OSD, check against i_size; we need to
- * only return a short read to the caller if we hit EOF.
- */
-static int striped_read(struct inode *inode,
-			u64 off, u64 len,
-			struct page **pages, int num_pages,
-			int *checkeof, bool o_direct,
-			unsigned long buf_align)
-{
-	struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
-	struct ceph_inode_info *ci = ceph_inode(inode);
-	u64 pos, this_len;
-	int io_align, page_align;
-	int left, pages_left;
-	int read;
-	struct page **page_pos;
-	int ret;
-	bool hit_stripe, was_short;
-
-	/*
-	 * we may need to do multiple reads.  not atomic, unfortunately.
-	 */
-	pos = off;
-	left = len;
-	page_pos = pages;
-	pages_left = num_pages;
-	read = 0;
-	io_align = off & ~PAGE_MASK;
-
-more:
-	if (o_direct)
-		page_align = (pos - io_align + buf_align) & ~PAGE_MASK;
-	else
-		page_align = pos & ~PAGE_MASK;
-	this_len = left;
-	ret = ceph_osdc_readpages(&fsc->client->osdc, ceph_vino(inode),
-				  &ci->i_layout, pos, &this_len,
-				  ci->i_truncate_seq,
-				  ci->i_truncate_size,
-				  page_pos, pages_left, page_align);
-	if (ret == -ENOENT)
-		ret = 0;
-	hit_stripe = this_len < left;
-	was_short = ret >= 0 && ret < this_len;
-	dout("striped_read %llu~%u (read %u) got %d%s%s\n", pos, left, read,
-	     ret, hit_stripe ? " HITSTRIPE" : "", was_short ? " SHORT" : "");
-
-	if (ret > 0) {
-		int didpages = (page_align + ret) >> PAGE_CACHE_SHIFT;
-
-		if (read < pos - off) {
-			dout(" zero gap %llu to %llu\n", off + read, pos);
-			ceph_zero_page_vector_range(page_align + read,
-						    pos - off - read, pages);
-		}
-		pos += ret;
-		read = pos - off;
-		left -= ret;
-		page_pos += didpages;
-		pages_left -= didpages;
-
-		/* hit stripe? */
-		if (left && hit_stripe)
-			goto more;
-	}
-
-	if (was_short) {
-		/* did we bounce off eof? */
-		if (pos + left > inode->i_size)
-			*checkeof = 1;
-
-		/* zero trailing bytes (inside i_size) */
-		if (left > 0 && pos < inode->i_size) {
-			if (pos + left > inode->i_size)
-				left = inode->i_size - pos;
-
-			dout("zero tail %d\n", left);
-			ceph_zero_page_vector_range(page_align + read, left,
-						    pages);
-			read += left;
-		}
-	}
-
-	if (ret >= 0)
-		ret = read;
-	dout("striped_read returns %d\n", ret);
-	return ret;
-}
-
-/*
- * Completely synchronous read and write methods.  Direct from __user
- * buffer to osd, or directly to user pages (if O_DIRECT).
- *
- * If the read spans object boundary, just do multiple reads.
- */
-static ssize_t ceph_sync_read(struct file *file, char __user *data,
-			      unsigned len, loff_t *poff, int *checkeof)
-{
-	struct inode *inode = file->f_dentry->d_inode;
-	struct page **pages;
-	u64 off = *poff;
-	int num_pages, ret;
-
-	dout("sync_read on file %p %llu~%u %s\n", file, off, len,
-	     (file->f_flags & O_DIRECT) ? "O_DIRECT" : "");
-
-	if (file->f_flags & O_DIRECT) {
-		num_pages = calc_pages_for((unsigned long)data, len);
-		pages = ceph_get_direct_page_vector(data, num_pages, true);
-	} else {
-		num_pages = calc_pages_for(off, len);
-		pages = ceph_alloc_page_vector(num_pages, GFP_NOFS);
-	}
-	if (IS_ERR(pages))
-		return PTR_ERR(pages);
-
-	/*
-	 * flush any page cache pages in this range.  this
-	 * will make concurrent normal and sync io slow,
-	 * but it will at least behave sensibly when they are
-	 * in sequence.
-	 */
-	ret = filemap_write_and_wait(inode->i_mapping);
-	if (ret < 0)
-		goto done;
-
-	ret = striped_read(inode, off, len, pages, num_pages, checkeof,
-			   file->f_flags & O_DIRECT,
-			   (unsigned long)data & ~PAGE_MASK);
-
-	if (ret >= 0 && (file->f_flags & O_DIRECT) == 0)
-		ret = ceph_copy_page_vector_to_user(pages, data, off, ret);
-	if (ret >= 0)
-		*poff = off + ret;
-
-done:
-	if (file->f_flags & O_DIRECT)
-		ceph_put_page_vector(pages, num_pages, true);
-	else
-		ceph_release_page_vector(pages, num_pages);
-	dout("sync_read result %d\n", ret);
-	return ret;
-}
-
-/*
- * Write commit callback, called if we requested both an ACK and
- * ONDISK commit reply from the OSD.
- */
-static void sync_write_commit(struct ceph_osd_request *req,
-			      struct ceph_msg *msg)
-{
-	struct ceph_inode_info *ci = ceph_inode(req->r_inode);
-
-	dout("sync_write_commit %p tid %llu\n", req, req->r_tid);
-	spin_lock(&ci->i_unsafe_lock);
-	list_del_init(&req->r_unsafe_item);
-	spin_unlock(&ci->i_unsafe_lock);
-	ceph_put_cap_refs(ci, CEPH_CAP_FILE_WR);
-}
-
-/*
- * Synchronous write, straight from __user pointer or user pages (if
- * O_DIRECT).
- *
- * If write spans object boundary, just do multiple writes.  (For a
- * correct atomic write, we should e.g. take write locks on all
- * objects, rollback on failure, etc.)
- */
-static ssize_t ceph_sync_write(struct file *file, const char __user *data,
-			       size_t left, loff_t *offset)
-{
-	struct inode *inode = file->f_dentry->d_inode;
-	struct ceph_inode_info *ci = ceph_inode(inode);
-	struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
-	struct ceph_osd_request *req;
-	struct page **pages;
-	int num_pages;
-	long long unsigned pos;
-	u64 len;
-	int written = 0;
-	int flags;
-	int do_sync = 0;
-	int check_caps = 0;
-	int page_align, io_align;
-	unsigned long buf_align;
-	int ret;
-	struct timespec mtime = CURRENT_TIME;
-
-	if (ceph_snap(file->f_dentry->d_inode) != CEPH_NOSNAP)
-		return -EROFS;
-
-	dout("sync_write on file %p %lld~%u %s\n", file, *offset,
-	     (unsigned)left, (file->f_flags & O_DIRECT) ? "O_DIRECT" : "");
-
-	if (file->f_flags & O_APPEND)
-		pos = i_size_read(inode);
-	else
-		pos = *offset;
-
-	ret = filemap_write_and_wait_range(inode->i_mapping, pos, pos + left);
-	if (ret < 0)
-		return ret;
-
-	ret = invalidate_inode_pages2_range(inode->i_mapping,
-					    pos >> PAGE_CACHE_SHIFT,
-					    (pos + left) >> PAGE_CACHE_SHIFT);
-	if (ret < 0)
-		dout("invalidate_inode_pages2_range returned %d\n", ret);
-
-	flags = CEPH_OSD_FLAG_ORDERSNAP |
-		CEPH_OSD_FLAG_ONDISK |
-		CEPH_OSD_FLAG_WRITE;
-	if ((file->f_flags & (O_SYNC|O_DIRECT)) == 0)
-		flags |= CEPH_OSD_FLAG_ACK;
-	else
-		do_sync = 1;
-
-	/*
-	 * we may need to do multiple writes here if we span an object
-	 * boundary.  this isn't atomic, unfortunately.  :(
-	 */
-more:
-	io_align = pos & ~PAGE_MASK;
-	buf_align = (unsigned long)data & ~PAGE_MASK;
-	len = left;
-	if (file->f_flags & O_DIRECT) {
-		/* write from beginning of first page, regardless of
-		   io alignment */
-		page_align = (pos - io_align + buf_align) & ~PAGE_MASK;
-		num_pages = calc_pages_for((unsigned long)data, len);
-	} else {
-		page_align = pos & ~PAGE_MASK;
-		num_pages = calc_pages_for(pos, len);
-	}
-	req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout,
-				    ceph_vino(inode), pos, &len,
-				    CEPH_OSD_OP_WRITE, flags,
-				    ci->i_snap_realm->cached_context,
-				    do_sync,
-				    ci->i_truncate_seq, ci->i_truncate_size,
-				    &mtime, false, 2, page_align);
-	if (!req)
-		return -ENOMEM;
-
-	if (file->f_flags & O_DIRECT) {
-		pages = ceph_get_direct_page_vector(data, num_pages, false);
-		if (IS_ERR(pages)) {
-			ret = PTR_ERR(pages);
-			goto out;
-		}
-
-		/*
-		 * throw out any page cache pages in this range. this
-		 * may block.
-		 */
-		truncate_inode_pages_range(inode->i_mapping, pos,
-					   (pos+len) | (PAGE_CACHE_SIZE-1));
-	} else {
-		pages = ceph_alloc_page_vector(num_pages, GFP_NOFS);
-		if (IS_ERR(pages)) {
-			ret = PTR_ERR(pages);
-			goto out;
-		}
-		ret = ceph_copy_user_to_page_vector(pages, data, pos, len);
-		if (ret < 0) {
-			ceph_release_page_vector(pages, num_pages);
-			goto out;
-		}
-
-		if ((file->f_flags & O_SYNC) == 0) {
-			/* get a second commit callback */
-			req->r_safe_callback = sync_write_commit;
-			req->r_own_pages = 1;
-		}
-	}
-	req->r_pages = pages;
-	req->r_num_pages = num_pages;
-	req->r_inode = inode;
-
-	ret = ceph_osdc_start_request(&fsc->client->osdc, req, false);
-	if (!ret) {
-		if (req->r_safe_callback) {
-			/*
-			 * Add to inode unsafe list only after we
-			 * start_request so that a tid has been assigned.
-			 */
-			spin_lock(&ci->i_unsafe_lock);
-			list_add_tail(&req->r_unsafe_item,
-				      &ci->i_unsafe_writes);
-			spin_unlock(&ci->i_unsafe_lock);
-			ceph_get_cap_refs(ci, CEPH_CAP_FILE_WR);
-		}
-		
-		ret = ceph_osdc_wait_request(&fsc->client->osdc, req);
-		if (ret < 0 && req->r_safe_callback) {
-			spin_lock(&ci->i_unsafe_lock);
-			list_del_init(&req->r_unsafe_item);
-			spin_unlock(&ci->i_unsafe_lock);
-			ceph_put_cap_refs(ci, CEPH_CAP_FILE_WR);
-		}
-	}
-
-	if (file->f_flags & O_DIRECT)
-		ceph_put_page_vector(pages, num_pages, false);
-	else if (file->f_flags & O_SYNC)
-		ceph_release_page_vector(pages, num_pages);
-
-out:
-	ceph_osdc_put_request(req);
-	if (ret == 0) {
-		pos += len;
-		written += len;
-		left -= len;
-		data += written;
-		if (left)
-			goto more;
-
-		ret = written;
-		*offset = pos;
-		if (pos > i_size_read(inode))
-			check_caps = ceph_inode_set_size(inode, pos);
-		if (check_caps)
-			ceph_check_caps(ceph_inode(inode), CHECK_CAPS_AUTHONLY,
-					NULL);
-	}
-	return ret;
-}
-
-/*
- * Wrap generic_file_aio_read with checks for cap bits on the inode.
- * Atomically grab references, so that those bits are not released
- * back to the MDS mid-read.
- *
- * Hmm, the sync read case isn't actually async... should it be?
- */
-static ssize_t ceph_aio_read(struct kiocb *iocb, const struct iovec *iov,
-			     unsigned long nr_segs, loff_t pos)
-{
-	struct file *filp = iocb->ki_filp;
-	struct ceph_file_info *fi = filp->private_data;
-	loff_t *ppos = &iocb->ki_pos;
-	size_t len = iov->iov_len;
-	struct inode *inode = filp->f_dentry->d_inode;
-	struct ceph_inode_info *ci = ceph_inode(inode);
-	void __user *base = iov->iov_base;
-	ssize_t ret;
-	int want, got = 0;
-	int checkeof = 0, read = 0;
-
-	dout("aio_read %p %llx.%llx %llu~%u trying to get caps on %p\n",
-	     inode, ceph_vinop(inode), pos, (unsigned)len, inode);
-again:
-	__ceph_do_pending_vmtruncate(inode);
-	if (fi->fmode & CEPH_FILE_MODE_LAZY)
-		want = CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO;
-	else
-		want = CEPH_CAP_FILE_CACHE;
-	ret = ceph_get_caps(ci, CEPH_CAP_FILE_RD, want, &got, -1);
-	if (ret < 0)
-		goto out;
-	dout("aio_read %p %llx.%llx %llu~%u got cap refs on %s\n",
-	     inode, ceph_vinop(inode), pos, (unsigned)len,
-	     ceph_cap_string(got));
-
-	if ((got & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) == 0 ||
-	    (iocb->ki_filp->f_flags & O_DIRECT) ||
-	    (inode->i_sb->s_flags & MS_SYNCHRONOUS) ||
-	    (fi->flags & CEPH_F_SYNC))
-		/* hmm, this isn't really async... */
-		ret = ceph_sync_read(filp, base, len, ppos, &checkeof);
-	else
-		ret = generic_file_aio_read(iocb, iov, nr_segs, pos);
-
-out:
-	dout("aio_read %p %llx.%llx dropping cap refs on %s = %d\n",
-	     inode, ceph_vinop(inode), ceph_cap_string(got), (int)ret);
-	ceph_put_cap_refs(ci, got);
-
-	if (checkeof && ret >= 0) {
-		int statret = ceph_do_getattr(inode, CEPH_STAT_CAP_SIZE);
-
-		/* hit EOF or hole? */
-		if (statret == 0 && *ppos < inode->i_size) {
-			dout("aio_read sync_read hit hole, ppos %lld < size %lld, reading more\n", *ppos, inode->i_size);
-			read += ret;
-			base += ret;
-			len -= ret;
-			checkeof = 0;
-			goto again;
-		}
-	}
-	if (ret >= 0)
-		ret += read;
-
-	return ret;
-}
-
-/*
- * Take cap references to avoid releasing caps to MDS mid-write.
- *
- * If we are synchronous, and write with an old snap context, the OSD
- * may return EOLDSNAPC.  In that case, retry the write.. _after_
- * dropping our cap refs and allowing the pending snap to logically
- * complete _before_ this write occurs.
- *
- * If we are near ENOSPC, write synchronously.
- */
-static ssize_t ceph_aio_write(struct kiocb *iocb, const struct iovec *iov,
-		       unsigned long nr_segs, loff_t pos)
-{
-	struct file *file = iocb->ki_filp;
-	struct ceph_file_info *fi = file->private_data;
-	struct inode *inode = file->f_dentry->d_inode;
-	struct ceph_inode_info *ci = ceph_inode(inode);
-	struct ceph_osd_client *osdc =
-		&ceph_sb_to_client(inode->i_sb)->client->osdc;
-	loff_t endoff = pos + iov->iov_len;
-	int want, got = 0;
-	int ret, err;
-
-	if (ceph_snap(inode) != CEPH_NOSNAP)
-		return -EROFS;
-
-retry_snap:
-	if (ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_FULL))
-		return -ENOSPC;
-	__ceph_do_pending_vmtruncate(inode);
-	dout("aio_write %p %llx.%llx %llu~%u getting caps. i_size %llu\n",
-	     inode, ceph_vinop(inode), pos, (unsigned)iov->iov_len,
-	     inode->i_size);
-	if (fi->fmode & CEPH_FILE_MODE_LAZY)
-		want = CEPH_CAP_FILE_BUFFER | CEPH_CAP_FILE_LAZYIO;
-	else
-		want = CEPH_CAP_FILE_BUFFER;
-	ret = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, &got, endoff);
-	if (ret < 0)
-		goto out_put;
-
-	dout("aio_write %p %llx.%llx %llu~%u  got cap refs on %s\n",
-	     inode, ceph_vinop(inode), pos, (unsigned)iov->iov_len,
-	     ceph_cap_string(got));
-
-	if ((got & (CEPH_CAP_FILE_BUFFER|CEPH_CAP_FILE_LAZYIO)) == 0 ||
-	    (iocb->ki_filp->f_flags & O_DIRECT) ||
-	    (inode->i_sb->s_flags & MS_SYNCHRONOUS) ||
-	    (fi->flags & CEPH_F_SYNC)) {
-		ret = ceph_sync_write(file, iov->iov_base, iov->iov_len,
-			&iocb->ki_pos);
-	} else {
-		/*
-		 * buffered write; drop Fw early to avoid slow
-		 * revocation if we get stuck on balance_dirty_pages
-		 */
-		int dirty;
-
-		spin_lock(&ci->i_ceph_lock);
-		dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR);
-		spin_unlock(&ci->i_ceph_lock);
-		ceph_put_cap_refs(ci, got);
-
-		ret = generic_file_aio_write(iocb, iov, nr_segs, pos);
-		if ((ret >= 0 || ret == -EIOCBQUEUED) &&
-		    ((file->f_flags & O_SYNC) || IS_SYNC(file->f_mapping->host)
-		     || ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_NEARFULL))) {
-			err = vfs_fsync_range(file, pos, pos + ret - 1, 1);
-			if (err < 0)
-				ret = err;
-		}
-
-		if (dirty)
-			__mark_inode_dirty(inode, dirty);
-		goto out;
-	}
-
-	if (ret >= 0) {
-		int dirty;
-		spin_lock(&ci->i_ceph_lock);
-		dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR);
-		spin_unlock(&ci->i_ceph_lock);
-		if (dirty)
-			__mark_inode_dirty(inode, dirty);
-	}
-
-out_put:
-	dout("aio_write %p %llx.%llx %llu~%u  dropping cap refs on %s\n",
-	     inode, ceph_vinop(inode), pos, (unsigned)iov->iov_len,
-	     ceph_cap_string(got));
-	ceph_put_cap_refs(ci, got);
-
-out:
-	if (ret == -EOLDSNAPC) {
-		dout("aio_write %p %llx.%llx %llu~%u got EOLDSNAPC, retrying\n",
-		     inode, ceph_vinop(inode), pos, (unsigned)iov->iov_len);
-		goto retry_snap;
-	}
-
-	return ret;
-}
-
-/*
- * llseek.  be sure to verify file size on SEEK_END.
- */
-static loff_t ceph_llseek(struct file *file, loff_t offset, int origin)
-{
-	struct inode *inode = file->f_mapping->host;
-	int ret;
-
-	mutex_lock(&inode->i_mutex);
-	__ceph_do_pending_vmtruncate(inode);
-
-	if (origin == SEEK_END || origin == SEEK_DATA || origin == SEEK_HOLE) {
-		ret = ceph_do_getattr(inode, CEPH_STAT_CAP_SIZE);
-		if (ret < 0) {
-			offset = ret;
-			goto out;
-		}
-	}
-
-	switch (origin) {
-	case SEEK_END:
-		offset += inode->i_size;
-		break;
-	case SEEK_CUR:
-		/*
-		 * Here we special-case the lseek(fd, 0, SEEK_CUR)
-		 * position-querying operation.  Avoid rewriting the "same"
-		 * f_pos value back to the file because a concurrent read(),
-		 * write() or lseek() might have altered it
-		 */
-		if (offset == 0) {
-			offset = file->f_pos;
-			goto out;
-		}
-		offset += file->f_pos;
-		break;
-	case SEEK_DATA:
-		if (offset >= inode->i_size) {
-			ret = -ENXIO;
-			goto out;
-		}
-		break;
-	case SEEK_HOLE:
-		if (offset >= inode->i_size) {
-			ret = -ENXIO;
-			goto out;
-		}
-		offset = inode->i_size;
-		break;
-	}
-
-	if (offset < 0 || offset > inode->i_sb->s_maxbytes) {
-		offset = -EINVAL;
-		goto out;
-	}
-
-	/* Special lock needed here? */
-	if (offset != file->f_pos) {
-		file->f_pos = offset;
-		file->f_version = 0;
-	}
-
-out:
-	mutex_unlock(&inode->i_mutex);
-	return offset;
-}
-
-const struct file_operations ceph_file_fops = {
-	.open = ceph_open,
-	.release = ceph_release,
-	.llseek = ceph_llseek,
-	.read = do_sync_read,
-	.write = do_sync_write,
-	.aio_read = ceph_aio_read,
-	.aio_write = ceph_aio_write,
-	.mmap = ceph_mmap,
-	.fsync = ceph_fsync,
-	.lock = ceph_lock,
-	.flock = ceph_flock,
-	.splice_read = generic_file_splice_read,
-	.splice_write = generic_file_splice_write,
-	.unlocked_ioctl = ceph_ioctl,
-	.compat_ioctl	= ceph_ioctl,
-};
-
diff --git a/ANDROID_3.4.5/fs/ceph/inode.c b/ANDROID_3.4.5/fs/ceph/inode.c
deleted file mode 100644
index 9fff9f3b..00000000
--- a/ANDROID_3.4.5/fs/ceph/inode.c
+++ /dev/null
@@ -1,1811 +0,0 @@
-#include <linux/ceph/ceph_debug.h>
-
-#include <linux/module.h>
-#include <linux/fs.h>
-#include <linux/slab.h>
-#include <linux/string.h>
-#include <linux/uaccess.h>
-#include <linux/kernel.h>
-#include <linux/namei.h>
-#include <linux/writeback.h>
-#include <linux/vmalloc.h>
-
-#include "super.h"
-#include "mds_client.h"
-#include <linux/ceph/decode.h>
-
-/*
- * Ceph inode operations
- *
- * Implement basic inode helpers (get, alloc) and inode ops (getattr,
- * setattr, etc.), xattr helpers, and helpers for assimilating
- * metadata returned by the MDS into our cache.
- *
- * Also define helpers for doing asynchronous writeback, invalidation,
- * and truncation for the benefit of those who can't afford to block
- * (typically because they are in the message handler path).
- */
-
-static const struct inode_operations ceph_symlink_iops;
-
-static void ceph_invalidate_work(struct work_struct *work);
-static void ceph_writeback_work(struct work_struct *work);
-static void ceph_vmtruncate_work(struct work_struct *work);
-
-/*
- * find or create an inode, given the ceph ino number
- */
-static int ceph_set_ino_cb(struct inode *inode, void *data)
-{
-	ceph_inode(inode)->i_vino = *(struct ceph_vino *)data;
-	inode->i_ino = ceph_vino_to_ino(*(struct ceph_vino *)data);
-	return 0;
-}
-
-struct inode *ceph_get_inode(struct super_block *sb, struct ceph_vino vino)
-{
-	struct inode *inode;
-	ino_t t = ceph_vino_to_ino(vino);
-
-	inode = iget5_locked(sb, t, ceph_ino_compare, ceph_set_ino_cb, &vino);
-	if (inode == NULL)
-		return ERR_PTR(-ENOMEM);
-	if (inode->i_state & I_NEW) {
-		dout("get_inode created new inode %p %llx.%llx ino %llx\n",
-		     inode, ceph_vinop(inode), (u64)inode->i_ino);
-		unlock_new_inode(inode);
-	}
-
-	dout("get_inode on %lu=%llx.%llx got %p\n", inode->i_ino, vino.ino,
-	     vino.snap, inode);
-	return inode;
-}
-
-/*
- * get/constuct snapdir inode for a given directory
- */
-struct inode *ceph_get_snapdir(struct inode *parent)
-{
-	struct ceph_vino vino = {
-		.ino = ceph_ino(parent),
-		.snap = CEPH_SNAPDIR,
-	};
-	struct inode *inode = ceph_get_inode(parent->i_sb, vino);
-	struct ceph_inode_info *ci = ceph_inode(inode);
-
-	BUG_ON(!S_ISDIR(parent->i_mode));
-	if (IS_ERR(inode))
-		return inode;
-	inode->i_mode = parent->i_mode;
-	inode->i_uid = parent->i_uid;
-	inode->i_gid = parent->i_gid;
-	inode->i_op = &ceph_dir_iops;
-	inode->i_fop = &ceph_dir_fops;
-	ci->i_snap_caps = CEPH_CAP_PIN; /* so we can open */
-	ci->i_rbytes = 0;
-	return inode;
-}
-
-const struct inode_operations ceph_file_iops = {
-	.permission = ceph_permission,
-	.setattr = ceph_setattr,
-	.getattr = ceph_getattr,
-	.setxattr = ceph_setxattr,
-	.getxattr = ceph_getxattr,
-	.listxattr = ceph_listxattr,
-	.removexattr = ceph_removexattr,
-};
-
-
-/*
- * We use a 'frag tree' to keep track of the MDS's directory fragments
- * for a given inode (usually there is just a single fragment).  We
- * need to know when a child frag is delegated to a new MDS, or when
- * it is flagged as replicated, so we can direct our requests
- * accordingly.
- */
-
-/*
- * find/create a frag in the tree
- */
-static struct ceph_inode_frag *__get_or_create_frag(struct ceph_inode_info *ci,
-						    u32 f)
-{
-	struct rb_node **p;
-	struct rb_node *parent = NULL;
-	struct ceph_inode_frag *frag;
-	int c;
-
-	p = &ci->i_fragtree.rb_node;
-	while (*p) {
-		parent = *p;
-		frag = rb_entry(parent, struct ceph_inode_frag, node);
-		c = ceph_frag_compare(f, frag->frag);
-		if (c < 0)
-			p = &(*p)->rb_left;
-		else if (c > 0)
-			p = &(*p)->rb_right;
-		else
-			return frag;
-	}
-
-	frag = kmalloc(sizeof(*frag), GFP_NOFS);
-	if (!frag) {
-		pr_err("__get_or_create_frag ENOMEM on %p %llx.%llx "
-		       "frag %x\n", &ci->vfs_inode,
-		       ceph_vinop(&ci->vfs_inode), f);
-		return ERR_PTR(-ENOMEM);
-	}
-	frag->frag = f;
-	frag->split_by = 0;
-	frag->mds = -1;
-	frag->ndist = 0;
-
-	rb_link_node(&frag->node, parent, p);
-	rb_insert_color(&frag->node, &ci->i_fragtree);
-
-	dout("get_or_create_frag added %llx.%llx frag %x\n",
-	     ceph_vinop(&ci->vfs_inode), f);
-	return frag;
-}
-
-/*
- * find a specific frag @f
- */
-struct ceph_inode_frag *__ceph_find_frag(struct ceph_inode_info *ci, u32 f)
-{
-	struct rb_node *n = ci->i_fragtree.rb_node;
-
-	while (n) {
-		struct ceph_inode_frag *frag =
-			rb_entry(n, struct ceph_inode_frag, node);
-		int c = ceph_frag_compare(f, frag->frag);
-		if (c < 0)
-			n = n->rb_left;
-		else if (c > 0)
-			n = n->rb_right;
-		else
-			return frag;
-	}
-	return NULL;
-}
-
-/*
- * Choose frag containing the given value @v.  If @pfrag is
- * specified, copy the frag delegation info to the caller if
- * it is present.
- */
-u32 ceph_choose_frag(struct ceph_inode_info *ci, u32 v,
-		     struct ceph_inode_frag *pfrag,
-		     int *found)
-{
-	u32 t = ceph_frag_make(0, 0);
-	struct ceph_inode_frag *frag;
-	unsigned nway, i;
-	u32 n;
-
-	if (found)
-		*found = 0;
-
-	mutex_lock(&ci->i_fragtree_mutex);
-	while (1) {
-		WARN_ON(!ceph_frag_contains_value(t, v));
-		frag = __ceph_find_frag(ci, t);
-		if (!frag)
-			break; /* t is a leaf */
-		if (frag->split_by == 0) {
-			if (pfrag)
-				memcpy(pfrag, frag, sizeof(*pfrag));
-			if (found)
-				*found = 1;
-			break;
-		}
-
-		/* choose child */
-		nway = 1 << frag->split_by;
-		dout("choose_frag(%x) %x splits by %d (%d ways)\n", v, t,
-		     frag->split_by, nway);
-		for (i = 0; i < nway; i++) {
-			n = ceph_frag_make_child(t, frag->split_by, i);
-			if (ceph_frag_contains_value(n, v)) {
-				t = n;
-				break;
-			}
-		}
-		BUG_ON(i == nway);
-	}
-	dout("choose_frag(%x) = %x\n", v, t);
-
-	mutex_unlock(&ci->i_fragtree_mutex);
-	return t;
-}
-
-/*
- * Process dirfrag (delegation) info from the mds.  Include leaf
- * fragment in tree ONLY if ndist > 0.  Otherwise, only
- * branches/splits are included in i_fragtree)
- */
-static int ceph_fill_dirfrag(struct inode *inode,
-			     struct ceph_mds_reply_dirfrag *dirinfo)
-{
-	struct ceph_inode_info *ci = ceph_inode(inode);
-	struct ceph_inode_frag *frag;
-	u32 id = le32_to_cpu(dirinfo->frag);
-	int mds = le32_to_cpu(dirinfo->auth);
-	int ndist = le32_to_cpu(dirinfo->ndist);
-	int i;
-	int err = 0;
-
-	mutex_lock(&ci->i_fragtree_mutex);
-	if (ndist == 0) {
-		/* no delegation info needed. */
-		frag = __ceph_find_frag(ci, id);
-		if (!frag)
-			goto out;
-		if (frag->split_by == 0) {
-			/* tree leaf, remove */
-			dout("fill_dirfrag removed %llx.%llx frag %x"
-			     " (no ref)\n", ceph_vinop(inode), id);
-			rb_erase(&frag->node, &ci->i_fragtree);
-			kfree(frag);
-		} else {
-			/* tree branch, keep and clear */
-			dout("fill_dirfrag cleared %llx.%llx frag %x"
-			     " referral\n", ceph_vinop(inode), id);
-			frag->mds = -1;
-			frag->ndist = 0;
-		}
-		goto out;
-	}
-
-
-	/* find/add this frag to store mds delegation info */
-	frag = __get_or_create_frag(ci, id);
-	if (IS_ERR(frag)) {
-		/* this is not the end of the world; we can continue
-		   with bad/inaccurate delegation info */
-		pr_err("fill_dirfrag ENOMEM on mds ref %llx.%llx fg %x\n",
-		       ceph_vinop(inode), le32_to_cpu(dirinfo->frag));
-		err = -ENOMEM;
-		goto out;
-	}
-
-	frag->mds = mds;
-	frag->ndist = min_t(u32, ndist, CEPH_MAX_DIRFRAG_REP);
-	for (i = 0; i < frag->ndist; i++)
-		frag->dist[i] = le32_to_cpu(dirinfo->dist[i]);
-	dout("fill_dirfrag %llx.%llx frag %x ndist=%d\n",
-	     ceph_vinop(inode), frag->frag, frag->ndist);
-
-out:
-	mutex_unlock(&ci->i_fragtree_mutex);
-	return err;
-}
-
-
-/*
- * initialize a newly allocated inode.
- */
-struct inode *ceph_alloc_inode(struct super_block *sb)
-{
-	struct ceph_inode_info *ci;
-	int i;
-
-	ci = kmem_cache_alloc(ceph_inode_cachep, GFP_NOFS);
-	if (!ci)
-		return NULL;
-
-	dout("alloc_inode %p\n", &ci->vfs_inode);
-
-	spin_lock_init(&ci->i_ceph_lock);
-
-	ci->i_version = 0;
-	ci->i_time_warp_seq = 0;
-	ci->i_ceph_flags = 0;
-	ci->i_release_count = 0;
-	ci->i_symlink = NULL;
-
-	memset(&ci->i_dir_layout, 0, sizeof(ci->i_dir_layout));
-
-	ci->i_fragtree = RB_ROOT;
-	mutex_init(&ci->i_fragtree_mutex);
-
-	ci->i_xattrs.blob = NULL;
-	ci->i_xattrs.prealloc_blob = NULL;
-	ci->i_xattrs.dirty = false;
-	ci->i_xattrs.index = RB_ROOT;
-	ci->i_xattrs.count = 0;
-	ci->i_xattrs.names_size = 0;
-	ci->i_xattrs.vals_size = 0;
-	ci->i_xattrs.version = 0;
-	ci->i_xattrs.index_version = 0;
-
-	ci->i_caps = RB_ROOT;
-	ci->i_auth_cap = NULL;
-	ci->i_dirty_caps = 0;
-	ci->i_flushing_caps = 0;
-	INIT_LIST_HEAD(&ci->i_dirty_item);
-	INIT_LIST_HEAD(&ci->i_flushing_item);
-	ci->i_cap_flush_seq = 0;
-	ci->i_cap_flush_last_tid = 0;
-	memset(&ci->i_cap_flush_tid, 0, sizeof(ci->i_cap_flush_tid));
-	init_waitqueue_head(&ci->i_cap_wq);
-	ci->i_hold_caps_min = 0;
-	ci->i_hold_caps_max = 0;
-	INIT_LIST_HEAD(&ci->i_cap_delay_list);
-	ci->i_cap_exporting_mds = 0;
-	ci->i_cap_exporting_mseq = 0;
-	ci->i_cap_exporting_issued = 0;
-	INIT_LIST_HEAD(&ci->i_cap_snaps);
-	ci->i_head_snapc = NULL;
-	ci->i_snap_caps = 0;
-
-	for (i = 0; i < CEPH_FILE_MODE_NUM; i++)
-		ci->i_nr_by_mode[i] = 0;
-
-	ci->i_truncate_seq = 0;
-	ci->i_truncate_size = 0;
-	ci->i_truncate_pending = 0;
-
-	ci->i_max_size = 0;
-	ci->i_reported_size = 0;
-	ci->i_wanted_max_size = 0;
-	ci->i_requested_max_size = 0;
-
-	ci->i_pin_ref = 0;
-	ci->i_rd_ref = 0;
-	ci->i_rdcache_ref = 0;
-	ci->i_wr_ref = 0;
-	ci->i_wb_ref = 0;
-	ci->i_wrbuffer_ref = 0;
-	ci->i_wrbuffer_ref_head = 0;
-	ci->i_shared_gen = 0;
-	ci->i_rdcache_gen = 0;
-	ci->i_rdcache_revoking = 0;
-
-	INIT_LIST_HEAD(&ci->i_unsafe_writes);
-	INIT_LIST_HEAD(&ci->i_unsafe_dirops);
-	spin_lock_init(&ci->i_unsafe_lock);
-
-	ci->i_snap_realm = NULL;
-	INIT_LIST_HEAD(&ci->i_snap_realm_item);
-	INIT_LIST_HEAD(&ci->i_snap_flush_item);
-
-	INIT_WORK(&ci->i_wb_work, ceph_writeback_work);
-	INIT_WORK(&ci->i_pg_inv_work, ceph_invalidate_work);
-
-	INIT_WORK(&ci->i_vmtruncate_work, ceph_vmtruncate_work);
-
-	return &ci->vfs_inode;
-}
-
-static void ceph_i_callback(struct rcu_head *head)
-{
-	struct inode *inode = container_of(head, struct inode, i_rcu);
-	struct ceph_inode_info *ci = ceph_inode(inode);
-
-	kmem_cache_free(ceph_inode_cachep, ci);
-}
-
-void ceph_destroy_inode(struct inode *inode)
-{
-	struct ceph_inode_info *ci = ceph_inode(inode);
-	struct ceph_inode_frag *frag;
-	struct rb_node *n;
-
-	dout("destroy_inode %p ino %llx.%llx\n", inode, ceph_vinop(inode));
-
-	ceph_queue_caps_release(inode);
-
-	/*
-	 * we may still have a snap_realm reference if there are stray
-	 * caps in i_cap_exporting_issued or i_snap_caps.
-	 */
-	if (ci->i_snap_realm) {
-		struct ceph_mds_client *mdsc =
-			ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc;
-		struct ceph_snap_realm *realm = ci->i_snap_realm;
-
-		dout(" dropping residual ref to snap realm %p\n", realm);
-		spin_lock(&realm->inodes_with_caps_lock);
-		list_del_init(&ci->i_snap_realm_item);
-		spin_unlock(&realm->inodes_with_caps_lock);
-		ceph_put_snap_realm(mdsc, realm);
-	}
-
-	kfree(ci->i_symlink);
-	while ((n = rb_first(&ci->i_fragtree)) != NULL) {
-		frag = rb_entry(n, struct ceph_inode_frag, node);
-		rb_erase(n, &ci->i_fragtree);
-		kfree(frag);
-	}
-
-	__ceph_destroy_xattrs(ci);
-	if (ci->i_xattrs.blob)
-		ceph_buffer_put(ci->i_xattrs.blob);
-	if (ci->i_xattrs.prealloc_blob)
-		ceph_buffer_put(ci->i_xattrs.prealloc_blob);
-
-	call_rcu(&inode->i_rcu, ceph_i_callback);
-}
-
-
-/*
- * Helpers to fill in size, ctime, mtime, and atime.  We have to be
- * careful because either the client or MDS may have more up to date
- * info, depending on which capabilities are held, and whether
- * time_warp_seq or truncate_seq have increased.  (Ordinarily, mtime
- * and size are monotonically increasing, except when utimes() or
- * truncate() increments the corresponding _seq values.)
- */
-int ceph_fill_file_size(struct inode *inode, int issued,
-			u32 truncate_seq, u64 truncate_size, u64 size)
-{
-	struct ceph_inode_info *ci = ceph_inode(inode);
-	int queue_trunc = 0;
-
-	if (ceph_seq_cmp(truncate_seq, ci->i_truncate_seq) > 0 ||
-	    (truncate_seq == ci->i_truncate_seq && size > inode->i_size)) {
-		dout("size %lld -> %llu\n", inode->i_size, size);
-		inode->i_size = size;
-		inode->i_blocks = (size + (1<<9) - 1) >> 9;
-		ci->i_reported_size = size;
-		if (truncate_seq != ci->i_truncate_seq) {
-			dout("truncate_seq %u -> %u\n",
-			     ci->i_truncate_seq, truncate_seq);
-			ci->i_truncate_seq = truncate_seq;
-			/*
-			 * If we hold relevant caps, or in the case where we're
-			 * not the only client referencing this file and we
-			 * don't hold those caps, then we need to check whether
-			 * the file is either opened or mmaped
-			 */
-			if ((issued & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_RD|
-				       CEPH_CAP_FILE_WR|CEPH_CAP_FILE_BUFFER|
-				       CEPH_CAP_FILE_EXCL|
-				       CEPH_CAP_FILE_LAZYIO)) ||
-			    mapping_mapped(inode->i_mapping) ||
-			    __ceph_caps_file_wanted(ci)) {
-				ci->i_truncate_pending++;
-				queue_trunc = 1;
-			}
-		}
-	}
-	if (ceph_seq_cmp(truncate_seq, ci->i_truncate_seq) >= 0 &&
-	    ci->i_truncate_size != truncate_size) {
-		dout("truncate_size %lld -> %llu\n", ci->i_truncate_size,
-		     truncate_size);
-		ci->i_truncate_size = truncate_size;
-	}
-	return queue_trunc;
-}
-
-void ceph_fill_file_time(struct inode *inode, int issued,
-			 u64 time_warp_seq, struct timespec *ctime,
-			 struct timespec *mtime, struct timespec *atime)
-{
-	struct ceph_inode_info *ci = ceph_inode(inode);
-	int warn = 0;
-
-	if (issued & (CEPH_CAP_FILE_EXCL|
-		      CEPH_CAP_FILE_WR|
-		      CEPH_CAP_FILE_BUFFER|
-		      CEPH_CAP_AUTH_EXCL|
-		      CEPH_CAP_XATTR_EXCL)) {
-		if (timespec_compare(ctime, &inode->i_ctime) > 0) {
-			dout("ctime %ld.%09ld -> %ld.%09ld inc w/ cap\n",
-			     inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec,
-			     ctime->tv_sec, ctime->tv_nsec);
-			inode->i_ctime = *ctime;
-		}
-		if (ceph_seq_cmp(time_warp_seq, ci->i_time_warp_seq) > 0) {
-			/* the MDS did a utimes() */
-			dout("mtime %ld.%09ld -> %ld.%09ld "
-			     "tw %d -> %d\n",
-			     inode->i_mtime.tv_sec, inode->i_mtime.tv_nsec,
-			     mtime->tv_sec, mtime->tv_nsec,
-			     ci->i_time_warp_seq, (int)time_warp_seq);
-
-			inode->i_mtime = *mtime;
-			inode->i_atime = *atime;
-			ci->i_time_warp_seq = time_warp_seq;
-		} else if (time_warp_seq == ci->i_time_warp_seq) {
-			/* nobody did utimes(); take the max */
-			if (timespec_compare(mtime, &inode->i_mtime) > 0) {
-				dout("mtime %ld.%09ld -> %ld.%09ld inc\n",
-				     inode->i_mtime.tv_sec,
-				     inode->i_mtime.tv_nsec,
-				     mtime->tv_sec, mtime->tv_nsec);
-				inode->i_mtime = *mtime;
-			}
-			if (timespec_compare(atime, &inode->i_atime) > 0) {
-				dout("atime %ld.%09ld -> %ld.%09ld inc\n",
-				     inode->i_atime.tv_sec,
-				     inode->i_atime.tv_nsec,
-				     atime->tv_sec, atime->tv_nsec);
-				inode->i_atime = *atime;
-			}
-		} else if (issued & CEPH_CAP_FILE_EXCL) {
-			/* we did a utimes(); ignore mds values */
-		} else {
-			warn = 1;
-		}
-	} else {
-		/* we have no write|excl caps; whatever the MDS says is true */
-		if (ceph_seq_cmp(time_warp_seq, ci->i_time_warp_seq) >= 0) {
-			inode->i_ctime = *ctime;
-			inode->i_mtime = *mtime;
-			inode->i_atime = *atime;
-			ci->i_time_warp_seq = time_warp_seq;
-		} else {
-			warn = 1;
-		}
-	}
-	if (warn) /* time_warp_seq shouldn't go backwards */
-		dout("%p mds time_warp_seq %llu < %u\n",
-		     inode, time_warp_seq, ci->i_time_warp_seq);
-}
-
-/*
- * Populate an inode based on info from mds.  May be called on new or
- * existing inodes.
- */
-static int fill_inode(struct inode *inode,
-		      struct ceph_mds_reply_info_in *iinfo,
-		      struct ceph_mds_reply_dirfrag *dirinfo,
-		      struct ceph_mds_session *session,
-		      unsigned long ttl_from, int cap_fmode,
-		      struct ceph_cap_reservation *caps_reservation)
-{
-	struct ceph_mds_reply_inode *info = iinfo->in;
-	struct ceph_inode_info *ci = ceph_inode(inode);
-	int i;
-	int issued = 0, implemented;
-	int updating_inode = 0;
-	struct timespec mtime, atime, ctime;
-	u32 nsplits;
-	struct ceph_buffer *xattr_blob = NULL;
-	int err = 0;
-	int queue_trunc = 0;
-
-	dout("fill_inode %p ino %llx.%llx v %llu had %llu\n",
-	     inode, ceph_vinop(inode), le64_to_cpu(info->version),
-	     ci->i_version);
-
-	/*
-	 * prealloc xattr data, if it looks like we'll need it.  only
-	 * if len > 4 (meaning there are actually xattrs; the first 4
-	 * bytes are the xattr count).
-	 */
-	if (iinfo->xattr_len > 4) {
-		xattr_blob = ceph_buffer_new(iinfo->xattr_len, GFP_NOFS);
-		if (!xattr_blob)
-			pr_err("fill_inode ENOMEM xattr blob %d bytes\n",
-			       iinfo->xattr_len);
-	}
-
-	spin_lock(&ci->i_ceph_lock);
-
-	/*
-	 * provided version will be odd if inode value is projected,
-	 * even if stable.  skip the update if we have newer stable
-	 * info (ours>=theirs, e.g. due to racing mds replies), unless
-	 * we are getting projected (unstable) info (in which case the
-	 * version is odd, and we want ours>theirs).
-	 *   us   them
-	 *   2    2     skip
-	 *   3    2     skip
-	 *   3    3     update
-	 */
-	if (le64_to_cpu(info->version) > 0 &&
-	    (ci->i_version & ~1) >= le64_to_cpu(info->version))
-		goto no_change;
-	
-	updating_inode = 1;
-	issued = __ceph_caps_issued(ci, &implemented);
-	issued |= implemented | __ceph_caps_dirty(ci);
-
-	/* update inode */
-	ci->i_version = le64_to_cpu(info->version);
-	inode->i_version++;
-	inode->i_rdev = le32_to_cpu(info->rdev);
-
-	if ((issued & CEPH_CAP_AUTH_EXCL) == 0) {
-		inode->i_mode = le32_to_cpu(info->mode);
-		inode->i_uid = le32_to_cpu(info->uid);
-		inode->i_gid = le32_to_cpu(info->gid);
-		dout("%p mode 0%o uid.gid %d.%d\n", inode, inode->i_mode,
-		     inode->i_uid, inode->i_gid);
-	}
-
-	if ((issued & CEPH_CAP_LINK_EXCL) == 0)
-		set_nlink(inode, le32_to_cpu(info->nlink));
-
-	/* be careful with mtime, atime, size */
-	ceph_decode_timespec(&atime, &info->atime);
-	ceph_decode_timespec(&mtime, &info->mtime);
-	ceph_decode_timespec(&ctime, &info->ctime);
-	queue_trunc = ceph_fill_file_size(inode, issued,
-					  le32_to_cpu(info->truncate_seq),
-					  le64_to_cpu(info->truncate_size),
-					  le64_to_cpu(info->size));
-	ceph_fill_file_time(inode, issued,
-			    le32_to_cpu(info->time_warp_seq),
-			    &ctime, &mtime, &atime);
-
-	/* only update max_size on auth cap */
-	if ((info->cap.flags & CEPH_CAP_FLAG_AUTH) &&
-	    ci->i_max_size != le64_to_cpu(info->max_size)) {
-		dout("max_size %lld -> %llu\n", ci->i_max_size,
-		     le64_to_cpu(info->max_size));
-		ci->i_max_size = le64_to_cpu(info->max_size);
-	}
-
-	ci->i_layout = info->layout;
-	inode->i_blkbits = fls(le32_to_cpu(info->layout.fl_stripe_unit)) - 1;
-
-	/* xattrs */
-	/* note that if i_xattrs.len <= 4, i_xattrs.data will still be NULL. */
-	if ((issued & CEPH_CAP_XATTR_EXCL) == 0 &&
-	    le64_to_cpu(info->xattr_version) > ci->i_xattrs.version) {
-		if (ci->i_xattrs.blob)
-			ceph_buffer_put(ci->i_xattrs.blob);
-		ci->i_xattrs.blob = xattr_blob;
-		if (xattr_blob)
-			memcpy(ci->i_xattrs.blob->vec.iov_base,
-			       iinfo->xattr_data, iinfo->xattr_len);
-		ci->i_xattrs.version = le64_to_cpu(info->xattr_version);
-		xattr_blob = NULL;
-	}
-
-	inode->i_mapping->a_ops = &ceph_aops;
-	inode->i_mapping->backing_dev_info =
-		&ceph_sb_to_client(inode->i_sb)->backing_dev_info;
-
-	switch (inode->i_mode & S_IFMT) {
-	case S_IFIFO:
-	case S_IFBLK:
-	case S_IFCHR:
-	case S_IFSOCK:
-		init_special_inode(inode, inode->i_mode, inode->i_rdev);
-		inode->i_op = &ceph_file_iops;
-		break;
-	case S_IFREG:
-		inode->i_op = &ceph_file_iops;
-		inode->i_fop = &ceph_file_fops;
-		break;
-	case S_IFLNK:
-		inode->i_op = &ceph_symlink_iops;
-		if (!ci->i_symlink) {
-			u32 symlen = iinfo->symlink_len;
-			char *sym;
-
-			spin_unlock(&ci->i_ceph_lock);
-
-			err = -EINVAL;
-			if (WARN_ON(symlen != inode->i_size))
-				goto out;
-
-			err = -ENOMEM;
-			sym = kstrndup(iinfo->symlink, symlen, GFP_NOFS);
-			if (!sym)
-				goto out;
-
-			spin_lock(&ci->i_ceph_lock);
-			if (!ci->i_symlink)
-				ci->i_symlink = sym;
-			else
-				kfree(sym); /* lost a race */
-		}
-		break;
-	case S_IFDIR:
-		inode->i_op = &ceph_dir_iops;
-		inode->i_fop = &ceph_dir_fops;
-
-		ci->i_dir_layout = iinfo->dir_layout;
-
-		ci->i_files = le64_to_cpu(info->files);
-		ci->i_subdirs = le64_to_cpu(info->subdirs);
-		ci->i_rbytes = le64_to_cpu(info->rbytes);
-		ci->i_rfiles = le64_to_cpu(info->rfiles);
-		ci->i_rsubdirs = le64_to_cpu(info->rsubdirs);
-		ceph_decode_timespec(&ci->i_rctime, &info->rctime);
-		break;
-	default:
-		pr_err("fill_inode %llx.%llx BAD mode 0%o\n",
-		       ceph_vinop(inode), inode->i_mode);
-	}
-
-no_change:
-	spin_unlock(&ci->i_ceph_lock);
-
-	/* queue truncate if we saw i_size decrease */
-	if (queue_trunc)
-		ceph_queue_vmtruncate(inode);
-
-	/* populate frag tree */
-	/* FIXME: move me up, if/when version reflects fragtree changes */
-	nsplits = le32_to_cpu(info->fragtree.nsplits);
-	mutex_lock(&ci->i_fragtree_mutex);
-	for (i = 0; i < nsplits; i++) {
-		u32 id = le32_to_cpu(info->fragtree.splits[i].frag);
-		struct ceph_inode_frag *frag = __get_or_create_frag(ci, id);
-
-		if (IS_ERR(frag))
-			continue;
-		frag->split_by = le32_to_cpu(info->fragtree.splits[i].by);
-		dout(" frag %x split by %d\n", frag->frag, frag->split_by);
-	}
-	mutex_unlock(&ci->i_fragtree_mutex);
-
-	/* were we issued a capability? */
-	if (info->cap.caps) {
-		if (ceph_snap(inode) == CEPH_NOSNAP) {
-			ceph_add_cap(inode, session,
-				     le64_to_cpu(info->cap.cap_id),
-				     cap_fmode,
-				     le32_to_cpu(info->cap.caps),
-				     le32_to_cpu(info->cap.wanted),
-				     le32_to_cpu(info->cap.seq),
-				     le32_to_cpu(info->cap.mseq),
-				     le64_to_cpu(info->cap.realm),
-				     info->cap.flags,
-				     caps_reservation);
-		} else {
-			spin_lock(&ci->i_ceph_lock);
-			dout(" %p got snap_caps %s\n", inode,
-			     ceph_cap_string(le32_to_cpu(info->cap.caps)));
-			ci->i_snap_caps |= le32_to_cpu(info->cap.caps);
-			if (cap_fmode >= 0)
-				__ceph_get_fmode(ci, cap_fmode);
-			spin_unlock(&ci->i_ceph_lock);
-		}
-	} else if (cap_fmode >= 0) {
-		pr_warning("mds issued no caps on %llx.%llx\n",
-			   ceph_vinop(inode));
-		__ceph_get_fmode(ci, cap_fmode);
-	}
-
-	/* set dir completion flag? */
-	if (S_ISDIR(inode->i_mode) &&
-	    updating_inode &&                 /* didn't jump to no_change */
-	    ci->i_files == 0 && ci->i_subdirs == 0 &&
-	    ceph_snap(inode) == CEPH_NOSNAP &&
-	    (le32_to_cpu(info->cap.caps) & CEPH_CAP_FILE_SHARED) &&
-	    (issued & CEPH_CAP_FILE_EXCL) == 0 &&
-	    !ceph_dir_test_complete(inode)) {
-		dout(" marking %p complete (empty)\n", inode);
-		ceph_dir_set_complete(inode);
-		ci->i_max_offset = 2;
-	}
-
-	/* update delegation info? */
-	if (dirinfo)
-		ceph_fill_dirfrag(inode, dirinfo);
-
-	err = 0;
-
-out:
-	if (xattr_blob)
-		ceph_buffer_put(xattr_blob);
-	return err;
-}
-
-/*
- * caller should hold session s_mutex.
- */
-static void update_dentry_lease(struct dentry *dentry,
-				struct ceph_mds_reply_lease *lease,
-				struct ceph_mds_session *session,
-				unsigned long from_time)
-{
-	struct ceph_dentry_info *di = ceph_dentry(dentry);
-	long unsigned duration = le32_to_cpu(lease->duration_ms);
-	long unsigned ttl = from_time + (duration * HZ) / 1000;
-	long unsigned half_ttl = from_time + (duration * HZ / 2) / 1000;
-	struct inode *dir;
-
-	/* only track leases on regular dentries */
-	if (dentry->d_op != &ceph_dentry_ops)
-		return;
-
-	spin_lock(&dentry->d_lock);
-	dout("update_dentry_lease %p duration %lu ms ttl %lu\n",
-	     dentry, duration, ttl);
-
-	/* make lease_rdcache_gen match directory */
-	dir = dentry->d_parent->d_inode;
-	di->lease_shared_gen = ceph_inode(dir)->i_shared_gen;
-
-	if (duration == 0)
-		goto out_unlock;
-
-	if (di->lease_gen == session->s_cap_gen &&
-	    time_before(ttl, dentry->d_time))
-		goto out_unlock;  /* we already have a newer lease. */
-
-	if (di->lease_session && di->lease_session != session)
-		goto out_unlock;
-
-	ceph_dentry_lru_touch(dentry);
-
-	if (!di->lease_session)
-		di->lease_session = ceph_get_mds_session(session);
-	di->lease_gen = session->s_cap_gen;
-	di->lease_seq = le32_to_cpu(lease->seq);
-	di->lease_renew_after = half_ttl;
-	di->lease_renew_from = 0;
-	dentry->d_time = ttl;
-out_unlock:
-	spin_unlock(&dentry->d_lock);
-	return;
-}
-
-/*
- * Set dentry's directory position based on the current dir's max, and
- * order it in d_subdirs, so that dcache_readdir behaves.
- *
- * Always called under directory's i_mutex.
- */
-static void ceph_set_dentry_offset(struct dentry *dn)
-{
-	struct dentry *dir = dn->d_parent;
-	struct inode *inode = dir->d_inode;
-	struct ceph_inode_info *ci;
-	struct ceph_dentry_info *di;
-
-	BUG_ON(!inode);
-
-	ci = ceph_inode(inode);
-	di = ceph_dentry(dn);
-
-	spin_lock(&ci->i_ceph_lock);
-	if (!ceph_dir_test_complete(inode)) {
-		spin_unlock(&ci->i_ceph_lock);
-		return;
-	}
-	di->offset = ceph_inode(inode)->i_max_offset++;
-	spin_unlock(&ci->i_ceph_lock);
-
-	spin_lock(&dir->d_lock);
-	spin_lock_nested(&dn->d_lock, DENTRY_D_LOCK_NESTED);
-	list_move(&dn->d_u.d_child, &dir->d_subdirs);
-	dout("set_dentry_offset %p %lld (%p %p)\n", dn, di->offset,
-	     dn->d_u.d_child.prev, dn->d_u.d_child.next);
-	spin_unlock(&dn->d_lock);
-	spin_unlock(&dir->d_lock);
-}
-
-/*
- * splice a dentry to an inode.
- * caller must hold directory i_mutex for this to be safe.
- *
- * we will only rehash the resulting dentry if @prehash is
- * true; @prehash will be set to false (for the benefit of
- * the caller) if we fail.
- */
-static struct dentry *splice_dentry(struct dentry *dn, struct inode *in,
-				    bool *prehash, bool set_offset)
-{
-	struct dentry *realdn;
-
-	BUG_ON(dn->d_inode);
-
-	/* dn must be unhashed */
-	if (!d_unhashed(dn))
-		d_drop(dn);
-	realdn = d_materialise_unique(dn, in);
-	if (IS_ERR(realdn)) {
-		pr_err("splice_dentry error %ld %p inode %p ino %llx.%llx\n",
-		       PTR_ERR(realdn), dn, in, ceph_vinop(in));
-		if (prehash)
-			*prehash = false; /* don't rehash on error */
-		dn = realdn; /* note realdn contains the error */
-		goto out;
-	} else if (realdn) {
-		dout("dn %p (%d) spliced with %p (%d) "
-		     "inode %p ino %llx.%llx\n",
-		     dn, dn->d_count,
-		     realdn, realdn->d_count,
-		     realdn->d_inode, ceph_vinop(realdn->d_inode));
-		dput(dn);
-		dn = realdn;
-	} else {
-		BUG_ON(!ceph_dentry(dn));
-		dout("dn %p attached to %p ino %llx.%llx\n",
-		     dn, dn->d_inode, ceph_vinop(dn->d_inode));
-	}
-	if ((!prehash || *prehash) && d_unhashed(dn))
-		d_rehash(dn);
-	if (set_offset)
-		ceph_set_dentry_offset(dn);
-out:
-	return dn;
-}
-
-/*
- * Incorporate results into the local cache.  This is either just
- * one inode, or a directory, dentry, and possibly linked-to inode (e.g.,
- * after a lookup).
- *
- * A reply may contain
- *         a directory inode along with a dentry.
- *  and/or a target inode
- *
- * Called with snap_rwsem (read).
- */
-int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
-		    struct ceph_mds_session *session)
-{
-	struct ceph_mds_reply_info_parsed *rinfo = &req->r_reply_info;
-	struct inode *in = NULL;
-	struct ceph_mds_reply_inode *ininfo;
-	struct ceph_vino vino;
-	struct ceph_fs_client *fsc = ceph_sb_to_client(sb);
-	int i = 0;
-	int err = 0;
-
-	dout("fill_trace %p is_dentry %d is_target %d\n", req,
-	     rinfo->head->is_dentry, rinfo->head->is_target);
-
-#if 0
-	/*
-	 * Debugging hook:
-	 *
-	 * If we resend completed ops to a recovering mds, we get no
-	 * trace.  Since that is very rare, pretend this is the case
-	 * to ensure the 'no trace' handlers in the callers behave.
-	 *
-	 * Fill in inodes unconditionally to avoid breaking cap
-	 * invariants.
-	 */
-	if (rinfo->head->op & CEPH_MDS_OP_WRITE) {
-		pr_info("fill_trace faking empty trace on %lld %s\n",
-			req->r_tid, ceph_mds_op_name(rinfo->head->op));
-		if (rinfo->head->is_dentry) {
-			rinfo->head->is_dentry = 0;
-			err = fill_inode(req->r_locked_dir,
-					 &rinfo->diri, rinfo->dirfrag,
-					 session, req->r_request_started, -1);
-		}
-		if (rinfo->head->is_target) {
-			rinfo->head->is_target = 0;
-			ininfo = rinfo->targeti.in;
-			vino.ino = le64_to_cpu(ininfo->ino);
-			vino.snap = le64_to_cpu(ininfo->snapid);
-			in = ceph_get_inode(sb, vino);
-			err = fill_inode(in, &rinfo->targeti, NULL,
-					 session, req->r_request_started,
-					 req->r_fmode);
-			iput(in);
-		}
-	}
-#endif
-
-	if (!rinfo->head->is_target && !rinfo->head->is_dentry) {
-		dout("fill_trace reply is empty!\n");
-		if (rinfo->head->result == 0 && req->r_locked_dir)
-			ceph_invalidate_dir_request(req);
-		return 0;
-	}
-
-	if (rinfo->head->is_dentry) {
-		struct inode *dir = req->r_locked_dir;
-
-		err = fill_inode(dir, &rinfo->diri, rinfo->dirfrag,
-				 session, req->r_request_started, -1,
-				 &req->r_caps_reservation);
-		if (err < 0)
-			return err;
-	}
-
-	/*
-	 * ignore null lease/binding on snapdir ENOENT, or else we
-	 * will have trouble splicing in the virtual snapdir later
-	 */
-	if (rinfo->head->is_dentry && !req->r_aborted &&
-	    (rinfo->head->is_target || strncmp(req->r_dentry->d_name.name,
-					       fsc->mount_options->snapdir_name,
-					       req->r_dentry->d_name.len))) {
-		/*
-		 * lookup link rename   : null -> possibly existing inode
-		 * mknod symlink mkdir  : null -> new inode
-		 * unlink               : linked -> null
-		 */
-		struct inode *dir = req->r_locked_dir;
-		struct dentry *dn = req->r_dentry;
-		bool have_dir_cap, have_lease;
-
-		BUG_ON(!dn);
-		BUG_ON(!dir);
-		BUG_ON(dn->d_parent->d_inode != dir);
-		BUG_ON(ceph_ino(dir) !=
-		       le64_to_cpu(rinfo->diri.in->ino));
-		BUG_ON(ceph_snap(dir) !=
-		       le64_to_cpu(rinfo->diri.in->snapid));
-
-		/* do we have a lease on the whole dir? */
-		have_dir_cap =
-			(le32_to_cpu(rinfo->diri.in->cap.caps) &
-			 CEPH_CAP_FILE_SHARED);
-
-		/* do we have a dn lease? */
-		have_lease = have_dir_cap ||
-			le32_to_cpu(rinfo->dlease->duration_ms);
-		if (!have_lease)
-			dout("fill_trace  no dentry lease or dir cap\n");
-
-		/* rename? */
-		if (req->r_old_dentry && req->r_op == CEPH_MDS_OP_RENAME) {
-			dout(" src %p '%.*s' dst %p '%.*s'\n",
-			     req->r_old_dentry,
-			     req->r_old_dentry->d_name.len,
-			     req->r_old_dentry->d_name.name,
-			     dn, dn->d_name.len, dn->d_name.name);
-			dout("fill_trace doing d_move %p -> %p\n",
-			     req->r_old_dentry, dn);
-
-			d_move(req->r_old_dentry, dn);
-			dout(" src %p '%.*s' dst %p '%.*s'\n",
-			     req->r_old_dentry,
-			     req->r_old_dentry->d_name.len,
-			     req->r_old_dentry->d_name.name,
-			     dn, dn->d_name.len, dn->d_name.name);
-
-			/* ensure target dentry is invalidated, despite
-			   rehashing bug in vfs_rename_dir */
-			ceph_invalidate_dentry_lease(dn);
-
-			/*
-			 * d_move() puts the renamed dentry at the end of
-			 * d_subdirs.  We need to assign it an appropriate
-			 * directory offset so we can behave when holding
-			 * D_COMPLETE.
-			 */
-			ceph_set_dentry_offset(req->r_old_dentry);
-			dout("dn %p gets new offset %lld\n", req->r_old_dentry, 
-			     ceph_dentry(req->r_old_dentry)->offset);
-
-			dn = req->r_old_dentry;  /* use old_dentry */
-			in = dn->d_inode;
-		}
-
-		/* null dentry? */
-		if (!rinfo->head->is_target) {
-			dout("fill_trace null dentry\n");
-			if (dn->d_inode) {
-				dout("d_delete %p\n", dn);
-				d_delete(dn);
-			} else {
-				dout("d_instantiate %p NULL\n", dn);
-				d_instantiate(dn, NULL);
-				if (have_lease && d_unhashed(dn))
-					d_rehash(dn);
-				update_dentry_lease(dn, rinfo->dlease,
-						    session,
-						    req->r_request_started);
-			}
-			goto done;
-		}
-
-		/* attach proper inode */
-		ininfo = rinfo->targeti.in;
-		vino.ino = le64_to_cpu(ininfo->ino);
-		vino.snap = le64_to_cpu(ininfo->snapid);
-		in = dn->d_inode;
-		if (!in) {
-			in = ceph_get_inode(sb, vino);
-			if (IS_ERR(in)) {
-				pr_err("fill_trace bad get_inode "
-				       "%llx.%llx\n", vino.ino, vino.snap);
-				err = PTR_ERR(in);
-				d_delete(dn);
-				goto done;
-			}
-			dn = splice_dentry(dn, in, &have_lease, true);
-			if (IS_ERR(dn)) {
-				err = PTR_ERR(dn);
-				goto done;
-			}
-			req->r_dentry = dn;  /* may have spliced */
-			ihold(in);
-		} else if (ceph_ino(in) == vino.ino &&
-			   ceph_snap(in) == vino.snap) {
-			ihold(in);
-		} else {
-			dout(" %p links to %p %llx.%llx, not %llx.%llx\n",
-			     dn, in, ceph_ino(in), ceph_snap(in),
-			     vino.ino, vino.snap);
-			have_lease = false;
-			in = NULL;
-		}
-
-		if (have_lease)
-			update_dentry_lease(dn, rinfo->dlease, session,
-					    req->r_request_started);
-		dout(" final dn %p\n", dn);
-		i++;
-	} else if (req->r_op == CEPH_MDS_OP_LOOKUPSNAP ||
-		   req->r_op == CEPH_MDS_OP_MKSNAP) {
-		struct dentry *dn = req->r_dentry;
-
-		/* fill out a snapdir LOOKUPSNAP dentry */
-		BUG_ON(!dn);
-		BUG_ON(!req->r_locked_dir);
-		BUG_ON(ceph_snap(req->r_locked_dir) != CEPH_SNAPDIR);
-		ininfo = rinfo->targeti.in;
-		vino.ino = le64_to_cpu(ininfo->ino);
-		vino.snap = le64_to_cpu(ininfo->snapid);
-		in = ceph_get_inode(sb, vino);
-		if (IS_ERR(in)) {
-			pr_err("fill_inode get_inode badness %llx.%llx\n",
-			       vino.ino, vino.snap);
-			err = PTR_ERR(in);
-			d_delete(dn);
-			goto done;
-		}
-		dout(" linking snapped dir %p to dn %p\n", in, dn);
-		dn = splice_dentry(dn, in, NULL, true);
-		if (IS_ERR(dn)) {
-			err = PTR_ERR(dn);
-			goto done;
-		}
-		req->r_dentry = dn;  /* may have spliced */
-		ihold(in);
-		rinfo->head->is_dentry = 1;  /* fool notrace handlers */
-	}
-
-	if (rinfo->head->is_target) {
-		vino.ino = le64_to_cpu(rinfo->targeti.in->ino);
-		vino.snap = le64_to_cpu(rinfo->targeti.in->snapid);
-
-		if (in == NULL || ceph_ino(in) != vino.ino ||
-		    ceph_snap(in) != vino.snap) {
-			in = ceph_get_inode(sb, vino);
-			if (IS_ERR(in)) {
-				err = PTR_ERR(in);
-				goto done;
-			}
-		}
-		req->r_target_inode = in;
-
-		err = fill_inode(in,
-				 &rinfo->targeti, NULL,
-				 session, req->r_request_started,
-				 (le32_to_cpu(rinfo->head->result) == 0) ?
-				 req->r_fmode : -1,
-				 &req->r_caps_reservation);
-		if (err < 0) {
-			pr_err("fill_inode badness %p %llx.%llx\n",
-			       in, ceph_vinop(in));
-			goto done;
-		}
-	}
-
-done:
-	dout("fill_trace done err=%d\n", err);
-	return err;
-}
-
-/*
- * Prepopulate our cache with readdir results, leases, etc.
- */
-int ceph_readdir_prepopulate(struct ceph_mds_request *req,
-			     struct ceph_mds_session *session)
-{
-	struct dentry *parent = req->r_dentry;
-	struct ceph_mds_reply_info_parsed *rinfo = &req->r_reply_info;
-	struct qstr dname;
-	struct dentry *dn;
-	struct inode *in;
-	int err = 0, i;
-	struct inode *snapdir = NULL;
-	struct ceph_mds_request_head *rhead = req->r_request->front.iov_base;
-	u64 frag = le32_to_cpu(rhead->args.readdir.frag);
-	struct ceph_dentry_info *di;
-
-	if (le32_to_cpu(rinfo->head->op) == CEPH_MDS_OP_LSSNAP) {
-		snapdir = ceph_get_snapdir(parent->d_inode);
-		parent = d_find_alias(snapdir);
-		dout("readdir_prepopulate %d items under SNAPDIR dn %p\n",
-		     rinfo->dir_nr, parent);
-	} else {
-		dout("readdir_prepopulate %d items under dn %p\n",
-		     rinfo->dir_nr, parent);
-		if (rinfo->dir_dir)
-			ceph_fill_dirfrag(parent->d_inode, rinfo->dir_dir);
-	}
-
-	for (i = 0; i < rinfo->dir_nr; i++) {
-		struct ceph_vino vino;
-
-		dname.name = rinfo->dir_dname[i];
-		dname.len = rinfo->dir_dname_len[i];
-		dname.hash = full_name_hash(dname.name, dname.len);
-
-		vino.ino = le64_to_cpu(rinfo->dir_in[i].in->ino);
-		vino.snap = le64_to_cpu(rinfo->dir_in[i].in->snapid);
-
-retry_lookup:
-		dn = d_lookup(parent, &dname);
-		dout("d_lookup on parent=%p name=%.*s got %p\n",
-		     parent, dname.len, dname.name, dn);
-
-		if (!dn) {
-			dn = d_alloc(parent, &dname);
-			dout("d_alloc %p '%.*s' = %p\n", parent,
-			     dname.len, dname.name, dn);
-			if (dn == NULL) {
-				dout("d_alloc badness\n");
-				err = -ENOMEM;
-				goto out;
-			}
-			err = ceph_init_dentry(dn);
-			if (err < 0) {
-				dput(dn);
-				goto out;
-			}
-		} else if (dn->d_inode &&
-			   (ceph_ino(dn->d_inode) != vino.ino ||
-			    ceph_snap(dn->d_inode) != vino.snap)) {
-			dout(" dn %p points to wrong inode %p\n",
-			     dn, dn->d_inode);
-			d_delete(dn);
-			dput(dn);
-			goto retry_lookup;
-		} else {
-			/* reorder parent's d_subdirs */
-			spin_lock(&parent->d_lock);
-			spin_lock_nested(&dn->d_lock, DENTRY_D_LOCK_NESTED);
-			list_move(&dn->d_u.d_child, &parent->d_subdirs);
-			spin_unlock(&dn->d_lock);
-			spin_unlock(&parent->d_lock);
-		}
-
-		di = dn->d_fsdata;
-		di->offset = ceph_make_fpos(frag, i + req->r_readdir_offset);
-
-		/* inode */
-		if (dn->d_inode) {
-			in = dn->d_inode;
-		} else {
-			in = ceph_get_inode(parent->d_sb, vino);
-			if (IS_ERR(in)) {
-				dout("new_inode badness\n");
-				d_delete(dn);
-				dput(dn);
-				err = PTR_ERR(in);
-				goto out;
-			}
-			dn = splice_dentry(dn, in, NULL, false);
-			if (IS_ERR(dn))
-				dn = NULL;
-		}
-
-		if (fill_inode(in, &rinfo->dir_in[i], NULL, session,
-			       req->r_request_started, -1,
-			       &req->r_caps_reservation) < 0) {
-			pr_err("fill_inode badness on %p\n", in);
-			goto next_item;
-		}
-		if (dn)
-			update_dentry_lease(dn, rinfo->dir_dlease[i],
-					    req->r_session,
-					    req->r_request_started);
-next_item:
-		if (dn)
-			dput(dn);
-	}
-	req->r_did_prepopulate = true;
-
-out:
-	if (snapdir) {
-		iput(snapdir);
-		dput(parent);
-	}
-	dout("readdir_prepopulate done\n");
-	return err;
-}
-
-int ceph_inode_set_size(struct inode *inode, loff_t size)
-{
-	struct ceph_inode_info *ci = ceph_inode(inode);
-	int ret = 0;
-
-	spin_lock(&ci->i_ceph_lock);
-	dout("set_size %p %llu -> %llu\n", inode, inode->i_size, size);
-	inode->i_size = size;
-	inode->i_blocks = (size + (1 << 9) - 1) >> 9;
-
-	/* tell the MDS if we are approaching max_size */
-	if ((size << 1) >= ci->i_max_size &&
-	    (ci->i_reported_size << 1) < ci->i_max_size)
-		ret = 1;
-
-	spin_unlock(&ci->i_ceph_lock);
-	return ret;
-}
-
-/*
- * Write back inode data in a worker thread.  (This can't be done
- * in the message handler context.)
- */
-void ceph_queue_writeback(struct inode *inode)
-{
-	ihold(inode);
-	if (queue_work(ceph_inode_to_client(inode)->wb_wq,
-		       &ceph_inode(inode)->i_wb_work)) {
-		dout("ceph_queue_writeback %p\n", inode);
-	} else {
-		dout("ceph_queue_writeback %p failed\n", inode);
-		iput(inode);
-	}
-}
-
-static void ceph_writeback_work(struct work_struct *work)
-{
-	struct ceph_inode_info *ci = container_of(work, struct ceph_inode_info,
-						  i_wb_work);
-	struct inode *inode = &ci->vfs_inode;
-
-	dout("writeback %p\n", inode);
-	filemap_fdatawrite(&inode->i_data);
-	iput(inode);
-}
-
-/*
- * queue an async invalidation
- */
-void ceph_queue_invalidate(struct inode *inode)
-{
-	ihold(inode);
-	if (queue_work(ceph_inode_to_client(inode)->pg_inv_wq,
-		       &ceph_inode(inode)->i_pg_inv_work)) {
-		dout("ceph_queue_invalidate %p\n", inode);
-	} else {
-		dout("ceph_queue_invalidate %p failed\n", inode);
-		iput(inode);
-	}
-}
-
-/*
- * Invalidate inode pages in a worker thread.  (This can't be done
- * in the message handler context.)
- */
-static void ceph_invalidate_work(struct work_struct *work)
-{
-	struct ceph_inode_info *ci = container_of(work, struct ceph_inode_info,
-						  i_pg_inv_work);
-	struct inode *inode = &ci->vfs_inode;
-	u32 orig_gen;
-	int check = 0;
-
-	spin_lock(&ci->i_ceph_lock);
-	dout("invalidate_pages %p gen %d revoking %d\n", inode,
-	     ci->i_rdcache_gen, ci->i_rdcache_revoking);
-	if (ci->i_rdcache_revoking != ci->i_rdcache_gen) {
-		/* nevermind! */
-		spin_unlock(&ci->i_ceph_lock);
-		goto out;
-	}
-	orig_gen = ci->i_rdcache_gen;
-	spin_unlock(&ci->i_ceph_lock);
-
-	truncate_inode_pages(&inode->i_data, 0);
-
-	spin_lock(&ci->i_ceph_lock);
-	if (orig_gen == ci->i_rdcache_gen &&
-	    orig_gen == ci->i_rdcache_revoking) {
-		dout("invalidate_pages %p gen %d successful\n", inode,
-		     ci->i_rdcache_gen);
-		ci->i_rdcache_revoking--;
-		check = 1;
-	} else {
-		dout("invalidate_pages %p gen %d raced, now %d revoking %d\n",
-		     inode, orig_gen, ci->i_rdcache_gen,
-		     ci->i_rdcache_revoking);
-	}
-	spin_unlock(&ci->i_ceph_lock);
-
-	if (check)
-		ceph_check_caps(ci, 0, NULL);
-out:
-	iput(inode);
-}
-
-
-/*
- * called by trunc_wq; take i_mutex ourselves
- *
- * We also truncate in a separate thread as well.
- */
-static void ceph_vmtruncate_work(struct work_struct *work)
-{
-	struct ceph_inode_info *ci = container_of(work, struct ceph_inode_info,
-						  i_vmtruncate_work);
-	struct inode *inode = &ci->vfs_inode;
-
-	dout("vmtruncate_work %p\n", inode);
-	mutex_lock(&inode->i_mutex);
-	__ceph_do_pending_vmtruncate(inode);
-	mutex_unlock(&inode->i_mutex);
-	iput(inode);
-}
-
-/*
- * Queue an async vmtruncate.  If we fail to queue work, we will handle
- * the truncation the next time we call __ceph_do_pending_vmtruncate.
- */
-void ceph_queue_vmtruncate(struct inode *inode)
-{
-	struct ceph_inode_info *ci = ceph_inode(inode);
-
-	ihold(inode);
-	if (queue_work(ceph_sb_to_client(inode->i_sb)->trunc_wq,
-		       &ci->i_vmtruncate_work)) {
-		dout("ceph_queue_vmtruncate %p\n", inode);
-	} else {
-		dout("ceph_queue_vmtruncate %p failed, pending=%d\n",
-		     inode, ci->i_truncate_pending);
-		iput(inode);
-	}
-}
-
-/*
- * called with i_mutex held.
- *
- * Make sure any pending truncation is applied before doing anything
- * that may depend on it.
- */
-void __ceph_do_pending_vmtruncate(struct inode *inode)
-{
-	struct ceph_inode_info *ci = ceph_inode(inode);
-	u64 to;
-	int wrbuffer_refs, wake = 0;
-
-retry:
-	spin_lock(&ci->i_ceph_lock);
-	if (ci->i_truncate_pending == 0) {
-		dout("__do_pending_vmtruncate %p none pending\n", inode);
-		spin_unlock(&ci->i_ceph_lock);
-		return;
-	}
-
-	/*
-	 * make sure any dirty snapped pages are flushed before we
-	 * possibly truncate them.. so write AND block!
-	 */
-	if (ci->i_wrbuffer_ref_head < ci->i_wrbuffer_ref) {
-		dout("__do_pending_vmtruncate %p flushing snaps first\n",
-		     inode);
-		spin_unlock(&ci->i_ceph_lock);
-		filemap_write_and_wait_range(&inode->i_data, 0,
-					     inode->i_sb->s_maxbytes);
-		goto retry;
-	}
-
-	to = ci->i_truncate_size;
-	wrbuffer_refs = ci->i_wrbuffer_ref;
-	dout("__do_pending_vmtruncate %p (%d) to %lld\n", inode,
-	     ci->i_truncate_pending, to);
-	spin_unlock(&ci->i_ceph_lock);
-
-	truncate_inode_pages(inode->i_mapping, to);
-
-	spin_lock(&ci->i_ceph_lock);
-	ci->i_truncate_pending--;
-	if (ci->i_truncate_pending == 0)
-		wake = 1;
-	spin_unlock(&ci->i_ceph_lock);
-
-	if (wrbuffer_refs == 0)
-		ceph_check_caps(ci, CHECK_CAPS_AUTHONLY, NULL);
-	if (wake)
-		wake_up_all(&ci->i_cap_wq);
-}
-
-
-/*
- * symlinks
- */
-static void *ceph_sym_follow_link(struct dentry *dentry, struct nameidata *nd)
-{
-	struct ceph_inode_info *ci = ceph_inode(dentry->d_inode);
-	nd_set_link(nd, ci->i_symlink);
-	return NULL;
-}
-
-static const struct inode_operations ceph_symlink_iops = {
-	.readlink = generic_readlink,
-	.follow_link = ceph_sym_follow_link,
-};
-
-/*
- * setattr
- */
-int ceph_setattr(struct dentry *dentry, struct iattr *attr)
-{
-	struct inode *inode = dentry->d_inode;
-	struct ceph_inode_info *ci = ceph_inode(inode);
-	struct inode *parent_inode;
-	const unsigned int ia_valid = attr->ia_valid;
-	struct ceph_mds_request *req;
-	struct ceph_mds_client *mdsc = ceph_sb_to_client(dentry->d_sb)->mdsc;
-	int issued;
-	int release = 0, dirtied = 0;
-	int mask = 0;
-	int err = 0;
-	int inode_dirty_flags = 0;
-
-	if (ceph_snap(inode) != CEPH_NOSNAP)
-		return -EROFS;
-
-	__ceph_do_pending_vmtruncate(inode);
-
-	err = inode_change_ok(inode, attr);
-	if (err != 0)
-		return err;
-
-	req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_SETATTR,
-				       USE_AUTH_MDS);
-	if (IS_ERR(req))
-		return PTR_ERR(req);
-
-	spin_lock(&ci->i_ceph_lock);
-	issued = __ceph_caps_issued(ci, NULL);
-	dout("setattr %p issued %s\n", inode, ceph_cap_string(issued));
-
-	if (ia_valid & ATTR_UID) {
-		dout("setattr %p uid %d -> %d\n", inode,
-		     inode->i_uid, attr->ia_uid);
-		if (issued & CEPH_CAP_AUTH_EXCL) {
-			inode->i_uid = attr->ia_uid;
-			dirtied |= CEPH_CAP_AUTH_EXCL;
-		} else if ((issued & CEPH_CAP_AUTH_SHARED) == 0 ||
-			   attr->ia_uid != inode->i_uid) {
-			req->r_args.setattr.uid = cpu_to_le32(attr->ia_uid);
-			mask |= CEPH_SETATTR_UID;
-			release |= CEPH_CAP_AUTH_SHARED;
-		}
-	}
-	if (ia_valid & ATTR_GID) {
-		dout("setattr %p gid %d -> %d\n", inode,
-		     inode->i_gid, attr->ia_gid);
-		if (issued & CEPH_CAP_AUTH_EXCL) {
-			inode->i_gid = attr->ia_gid;
-			dirtied |= CEPH_CAP_AUTH_EXCL;
-		} else if ((issued & CEPH_CAP_AUTH_SHARED) == 0 ||
-			   attr->ia_gid != inode->i_gid) {
-			req->r_args.setattr.gid = cpu_to_le32(attr->ia_gid);
-			mask |= CEPH_SETATTR_GID;
-			release |= CEPH_CAP_AUTH_SHARED;
-		}
-	}
-	if (ia_valid & ATTR_MODE) {
-		dout("setattr %p mode 0%o -> 0%o\n", inode, inode->i_mode,
-		     attr->ia_mode);
-		if (issued & CEPH_CAP_AUTH_EXCL) {
-			inode->i_mode = attr->ia_mode;
-			dirtied |= CEPH_CAP_AUTH_EXCL;
-		} else if ((issued & CEPH_CAP_AUTH_SHARED) == 0 ||
-			   attr->ia_mode != inode->i_mode) {
-			req->r_args.setattr.mode = cpu_to_le32(attr->ia_mode);
-			mask |= CEPH_SETATTR_MODE;
-			release |= CEPH_CAP_AUTH_SHARED;
-		}
-	}
-
-	if (ia_valid & ATTR_ATIME) {
-		dout("setattr %p atime %ld.%ld -> %ld.%ld\n", inode,
-		     inode->i_atime.tv_sec, inode->i_atime.tv_nsec,
-		     attr->ia_atime.tv_sec, attr->ia_atime.tv_nsec);
-		if (issued & CEPH_CAP_FILE_EXCL) {
-			ci->i_time_warp_seq++;
-			inode->i_atime = attr->ia_atime;
-			dirtied |= CEPH_CAP_FILE_EXCL;
-		} else if ((issued & CEPH_CAP_FILE_WR) &&
-			   timespec_compare(&inode->i_atime,
-					    &attr->ia_atime) < 0) {
-			inode->i_atime = attr->ia_atime;
-			dirtied |= CEPH_CAP_FILE_WR;
-		} else if ((issued & CEPH_CAP_FILE_SHARED) == 0 ||
-			   !timespec_equal(&inode->i_atime, &attr->ia_atime)) {
-			ceph_encode_timespec(&req->r_args.setattr.atime,
-					     &attr->ia_atime);
-			mask |= CEPH_SETATTR_ATIME;
-			release |= CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_RD |
-				CEPH_CAP_FILE_WR;
-		}
-	}
-	if (ia_valid & ATTR_MTIME) {
-		dout("setattr %p mtime %ld.%ld -> %ld.%ld\n", inode,
-		     inode->i_mtime.tv_sec, inode->i_mtime.tv_nsec,
-		     attr->ia_mtime.tv_sec, attr->ia_mtime.tv_nsec);
-		if (issued & CEPH_CAP_FILE_EXCL) {
-			ci->i_time_warp_seq++;
-			inode->i_mtime = attr->ia_mtime;
-			dirtied |= CEPH_CAP_FILE_EXCL;
-		} else if ((issued & CEPH_CAP_FILE_WR) &&
-			   timespec_compare(&inode->i_mtime,
-					    &attr->ia_mtime) < 0) {
-			inode->i_mtime = attr->ia_mtime;
-			dirtied |= CEPH_CAP_FILE_WR;
-		} else if ((issued & CEPH_CAP_FILE_SHARED) == 0 ||
-			   !timespec_equal(&inode->i_mtime, &attr->ia_mtime)) {
-			ceph_encode_timespec(&req->r_args.setattr.mtime,
-					     &attr->ia_mtime);
-			mask |= CEPH_SETATTR_MTIME;
-			release |= CEPH_CAP_FILE_SHARED | CEPH_CAP_FILE_RD |
-				CEPH_CAP_FILE_WR;
-		}
-	}
-	if (ia_valid & ATTR_SIZE) {
-		dout("setattr %p size %lld -> %lld\n", inode,
-		     inode->i_size, attr->ia_size);
-		if (attr->ia_size > inode->i_sb->s_maxbytes) {
-			err = -EINVAL;
-			goto out;
-		}
-		if ((issued & CEPH_CAP_FILE_EXCL) &&
-		    attr->ia_size > inode->i_size) {
-			inode->i_size = attr->ia_size;
-			inode->i_blocks =
-				(attr->ia_size + (1 << 9) - 1) >> 9;
-			inode->i_ctime = attr->ia_ctime;
-			ci->i_reported_size = attr->ia_size;
-			dirtied |= CEPH_CAP_FILE_EXCL;
-		} else if ((issued & CEPH_CAP_FILE_SHARED) == 0 ||
-			   attr->ia_size != inode->i_size) {
-			req->r_args.setattr.size = cpu_to_le64(attr->ia_size);
-			req->r_args.setattr.old_size =
-				cpu_to_le64(inode->i_size);
-			mask |= CEPH_SETATTR_SIZE;
-			release |= CEPH_CAP_FILE_SHARED | CEPH_CAP_FILE_RD |
-				CEPH_CAP_FILE_WR;
-		}
-	}
-
-	/* these do nothing */
-	if (ia_valid & ATTR_CTIME) {
-		bool only = (ia_valid & (ATTR_SIZE|ATTR_MTIME|ATTR_ATIME|
-					 ATTR_MODE|ATTR_UID|ATTR_GID)) == 0;
-		dout("setattr %p ctime %ld.%ld -> %ld.%ld (%s)\n", inode,
-		     inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec,
-		     attr->ia_ctime.tv_sec, attr->ia_ctime.tv_nsec,
-		     only ? "ctime only" : "ignored");
-		inode->i_ctime = attr->ia_ctime;
-		if (only) {
-			/*
-			 * if kernel wants to dirty ctime but nothing else,
-			 * we need to choose a cap to dirty under, or do
-			 * a almost-no-op setattr
-			 */
-			if (issued & CEPH_CAP_AUTH_EXCL)
-				dirtied |= CEPH_CAP_AUTH_EXCL;
-			else if (issued & CEPH_CAP_FILE_EXCL)
-				dirtied |= CEPH_CAP_FILE_EXCL;
-			else if (issued & CEPH_CAP_XATTR_EXCL)
-				dirtied |= CEPH_CAP_XATTR_EXCL;
-			else
-				mask |= CEPH_SETATTR_CTIME;
-		}
-	}
-	if (ia_valid & ATTR_FILE)
-		dout("setattr %p ATTR_FILE ... hrm!\n", inode);
-
-	if (dirtied) {
-		inode_dirty_flags = __ceph_mark_dirty_caps(ci, dirtied);
-		inode->i_ctime = CURRENT_TIME;
-	}
-
-	release &= issued;
-	spin_unlock(&ci->i_ceph_lock);
-
-	if (inode_dirty_flags)
-		__mark_inode_dirty(inode, inode_dirty_flags);
-
-	if (mask) {
-		req->r_inode = inode;
-		ihold(inode);
-		req->r_inode_drop = release;
-		req->r_args.setattr.mask = cpu_to_le32(mask);
-		req->r_num_caps = 1;
-		parent_inode = ceph_get_dentry_parent_inode(dentry);
-		err = ceph_mdsc_do_request(mdsc, parent_inode, req);
-		iput(parent_inode);
-	}
-	dout("setattr %p result=%d (%s locally, %d remote)\n", inode, err,
-	     ceph_cap_string(dirtied), mask);
-
-	ceph_mdsc_put_request(req);
-	__ceph_do_pending_vmtruncate(inode);
-	return err;
-out:
-	spin_unlock(&ci->i_ceph_lock);
-	ceph_mdsc_put_request(req);
-	return err;
-}
-
-/*
- * Verify that we have a lease on the given mask.  If not,
- * do a getattr against an mds.
- */
-int ceph_do_getattr(struct inode *inode, int mask)
-{
-	struct ceph_fs_client *fsc = ceph_sb_to_client(inode->i_sb);
-	struct ceph_mds_client *mdsc = fsc->mdsc;
-	struct ceph_mds_request *req;
-	int err;
-
-	if (ceph_snap(inode) == CEPH_SNAPDIR) {
-		dout("do_getattr inode %p SNAPDIR\n", inode);
-		return 0;
-	}
-
-	dout("do_getattr inode %p mask %s mode 0%o\n", inode, ceph_cap_string(mask), inode->i_mode);
-	if (ceph_caps_issued_mask(ceph_inode(inode), mask, 1))
-		return 0;
-
-	req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_GETATTR, USE_ANY_MDS);
-	if (IS_ERR(req))
-		return PTR_ERR(req);
-	req->r_inode = inode;
-	ihold(inode);
-	req->r_num_caps = 1;
-	req->r_args.getattr.mask = cpu_to_le32(mask);
-	err = ceph_mdsc_do_request(mdsc, NULL, req);
-	ceph_mdsc_put_request(req);
-	dout("do_getattr result=%d\n", err);
-	return err;
-}
-
-
-/*
- * Check inode permissions.  We verify we have a valid value for
- * the AUTH cap, then call the generic handler.
- */
-int ceph_permission(struct inode *inode, int mask)
-{
-	int err;
-
-	if (mask & MAY_NOT_BLOCK)
-		return -ECHILD;
-
-	err = ceph_do_getattr(inode, CEPH_CAP_AUTH_SHARED);
-
-	if (!err)
-		err = generic_permission(inode, mask);
-	return err;
-}
-
-/*
- * Get all attributes.  Hopefully somedata we'll have a statlite()
- * and can limit the fields we require to be accurate.
- */
-int ceph_getattr(struct vfsmount *mnt, struct dentry *dentry,
-		 struct kstat *stat)
-{
-	struct inode *inode = dentry->d_inode;
-	struct ceph_inode_info *ci = ceph_inode(inode);
-	int err;
-
-	err = ceph_do_getattr(inode, CEPH_STAT_CAP_INODE_ALL);
-	if (!err) {
-		generic_fillattr(inode, stat);
-		stat->ino = ceph_translate_ino(inode->i_sb, inode->i_ino);
-		if (ceph_snap(inode) != CEPH_NOSNAP)
-			stat->dev = ceph_snap(inode);
-		else
-			stat->dev = 0;
-		if (S_ISDIR(inode->i_mode)) {
-			if (ceph_test_mount_opt(ceph_sb_to_client(inode->i_sb),
-						RBYTES))
-				stat->size = ci->i_rbytes;
-			else
-				stat->size = ci->i_files + ci->i_subdirs;
-			stat->blocks = 0;
-			stat->blksize = 65536;
-		}
-	}
-	return err;
-}
diff --git a/ANDROID_3.4.5/fs/ceph/ioctl.c b/ANDROID_3.4.5/fs/ceph/ioctl.c
deleted file mode 100644
index 790914a5..00000000
--- a/ANDROID_3.4.5/fs/ceph/ioctl.c
+++ /dev/null
@@ -1,290 +0,0 @@
-#include <linux/in.h>
-
-#include "super.h"
-#include "mds_client.h"
-#include <linux/ceph/ceph_debug.h>
-
-#include "ioctl.h"
-
-
-/*
- * ioctls
- */
-
-/*
- * get and set the file layout
- */
-static long ceph_ioctl_get_layout(struct file *file, void __user *arg)
-{
-	struct ceph_inode_info *ci = ceph_inode(file->f_dentry->d_inode);
-	struct ceph_ioctl_layout l;
-	int err;
-
-	err = ceph_do_getattr(file->f_dentry->d_inode, CEPH_STAT_CAP_LAYOUT);
-	if (!err) {
-		l.stripe_unit = ceph_file_layout_su(ci->i_layout);
-		l.stripe_count = ceph_file_layout_stripe_count(ci->i_layout);
-		l.object_size = ceph_file_layout_object_size(ci->i_layout);
-		l.data_pool = le32_to_cpu(ci->i_layout.fl_pg_pool);
-		l.preferred_osd =
-			(s32)le32_to_cpu(ci->i_layout.fl_pg_preferred);
-		if (copy_to_user(arg, &l, sizeof(l)))
-			return -EFAULT;
-	}
-
-	return err;
-}
-
-static long ceph_ioctl_set_layout(struct file *file, void __user *arg)
-{
-	struct inode *inode = file->f_dentry->d_inode;
-	struct inode *parent_inode;
-	struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
-	struct ceph_mds_request *req;
-	struct ceph_ioctl_layout l;
-	struct ceph_inode_info *ci = ceph_inode(file->f_dentry->d_inode);
-	struct ceph_ioctl_layout nl;
-	int err, i;
-
-	if (copy_from_user(&l, arg, sizeof(l)))
-		return -EFAULT;
-
-	/* validate changed params against current layout */
-	err = ceph_do_getattr(file->f_dentry->d_inode, CEPH_STAT_CAP_LAYOUT);
-	if (!err) {
-		nl.stripe_unit = ceph_file_layout_su(ci->i_layout);
-		nl.stripe_count = ceph_file_layout_stripe_count(ci->i_layout);
-		nl.object_size = ceph_file_layout_object_size(ci->i_layout);
-		nl.data_pool = le32_to_cpu(ci->i_layout.fl_pg_pool);
-		nl.preferred_osd =
-				(s32)le32_to_cpu(ci->i_layout.fl_pg_preferred);
-	} else
-		return err;
-
-	if (l.stripe_count)
-		nl.stripe_count = l.stripe_count;
-	if (l.stripe_unit)
-		nl.stripe_unit = l.stripe_unit;
-	if (l.object_size)
-		nl.object_size = l.object_size;
-	if (l.data_pool)
-		nl.data_pool = l.data_pool;
-	if (l.preferred_osd)
-		nl.preferred_osd = l.preferred_osd;
-
-	if ((nl.object_size & ~PAGE_MASK) ||
-	    (nl.stripe_unit & ~PAGE_MASK) ||
-	    ((unsigned)nl.object_size % (unsigned)nl.stripe_unit))
-		return -EINVAL;
-
-	/* make sure it's a valid data pool */
-	if (l.data_pool > 0) {
-		mutex_lock(&mdsc->mutex);
-		err = -EINVAL;
-		for (i = 0; i < mdsc->mdsmap->m_num_data_pg_pools; i++)
-			if (mdsc->mdsmap->m_data_pg_pools[i] == l.data_pool) {
-				err = 0;
-				break;
-			}
-		mutex_unlock(&mdsc->mutex);
-		if (err)
-			return err;
-	}
-
-	req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_SETLAYOUT,
-				       USE_AUTH_MDS);
-	if (IS_ERR(req))
-		return PTR_ERR(req);
-	req->r_inode = inode;
-	ihold(inode);
-	req->r_inode_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_FILE_EXCL;
-
-	req->r_args.setlayout.layout.fl_stripe_unit =
-		cpu_to_le32(l.stripe_unit);
-	req->r_args.setlayout.layout.fl_stripe_count =
-		cpu_to_le32(l.stripe_count);
-	req->r_args.setlayout.layout.fl_object_size =
-		cpu_to_le32(l.object_size);
-	req->r_args.setlayout.layout.fl_pg_pool = cpu_to_le32(l.data_pool);
-	req->r_args.setlayout.layout.fl_pg_preferred =
-		cpu_to_le32(l.preferred_osd);
-
-	parent_inode = ceph_get_dentry_parent_inode(file->f_dentry);
-	err = ceph_mdsc_do_request(mdsc, parent_inode, req);
-	iput(parent_inode);
-	ceph_mdsc_put_request(req);
-	return err;
-}
-
-/*
- * Set a layout policy on a directory inode. All items in the tree
- * rooted at this inode will inherit this layout on creation,
- * (It doesn't apply retroactively )
- * unless a subdirectory has its own layout policy.
- */
-static long ceph_ioctl_set_layout_policy (struct file *file, void __user *arg)
-{
-	struct inode *inode = file->f_dentry->d_inode;
-	struct ceph_mds_request *req;
-	struct ceph_ioctl_layout l;
-	int err, i;
-	struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
-
-	/* copy and validate */
-	if (copy_from_user(&l, arg, sizeof(l)))
-		return -EFAULT;
-
-	if ((l.object_size & ~PAGE_MASK) ||
-	    (l.stripe_unit & ~PAGE_MASK) ||
-	    !l.stripe_unit ||
-	    (l.object_size &&
-	        (unsigned)l.object_size % (unsigned)l.stripe_unit))
-		return -EINVAL;
-
-	/* make sure it's a valid data pool */
-	if (l.data_pool > 0) {
-		mutex_lock(&mdsc->mutex);
-		err = -EINVAL;
-		for (i = 0; i < mdsc->mdsmap->m_num_data_pg_pools; i++)
-			if (mdsc->mdsmap->m_data_pg_pools[i] == l.data_pool) {
-				err = 0;
-				break;
-			}
-		mutex_unlock(&mdsc->mutex);
-		if (err)
-			return err;
-	}
-
-	req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_SETDIRLAYOUT,
-				       USE_AUTH_MDS);
-
-	if (IS_ERR(req))
-		return PTR_ERR(req);
-	req->r_inode = inode;
-	ihold(inode);
-
-	req->r_args.setlayout.layout.fl_stripe_unit =
-			cpu_to_le32(l.stripe_unit);
-	req->r_args.setlayout.layout.fl_stripe_count =
-			cpu_to_le32(l.stripe_count);
-	req->r_args.setlayout.layout.fl_object_size =
-			cpu_to_le32(l.object_size);
-	req->r_args.setlayout.layout.fl_pg_pool =
-			cpu_to_le32(l.data_pool);
-	req->r_args.setlayout.layout.fl_pg_preferred =
-			cpu_to_le32(l.preferred_osd);
-
-	err = ceph_mdsc_do_request(mdsc, inode, req);
-	ceph_mdsc_put_request(req);
-	return err;
-}
-
-/*
- * Return object name, size/offset information, and location (OSD
- * number, network address) for a given file offset.
- */
-static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg)
-{
-	struct ceph_ioctl_dataloc dl;
-	struct inode *inode = file->f_dentry->d_inode;
-	struct ceph_inode_info *ci = ceph_inode(inode);
-	struct ceph_osd_client *osdc =
-		&ceph_sb_to_client(inode->i_sb)->client->osdc;
-	u64 len = 1, olen;
-	u64 tmp;
-	struct ceph_object_layout ol;
-	struct ceph_pg pgid;
-
-	/* copy and validate */
-	if (copy_from_user(&dl, arg, sizeof(dl)))
-		return -EFAULT;
-
-	down_read(&osdc->map_sem);
-	ceph_calc_file_object_mapping(&ci->i_layout, dl.file_offset, &len,
-				      &dl.object_no, &dl.object_offset, &olen);
-	dl.file_offset -= dl.object_offset;
-	dl.object_size = ceph_file_layout_object_size(ci->i_layout);
-	dl.block_size = ceph_file_layout_su(ci->i_layout);
-
-	/* block_offset = object_offset % block_size */
-	tmp = dl.object_offset;
-	dl.block_offset = do_div(tmp, dl.block_size);
-
-	snprintf(dl.object_name, sizeof(dl.object_name), "%llx.%08llx",
-		 ceph_ino(inode), dl.object_no);
-	ceph_calc_object_layout(&ol, dl.object_name, &ci->i_layout,
-				osdc->osdmap);
-
-	pgid = ol.ol_pgid;
-	dl.osd = ceph_calc_pg_primary(osdc->osdmap, pgid);
-	if (dl.osd >= 0) {
-		struct ceph_entity_addr *a =
-			ceph_osd_addr(osdc->osdmap, dl.osd);
-		if (a)
-			memcpy(&dl.osd_addr, &a->in_addr, sizeof(dl.osd_addr));
-	} else {
-		memset(&dl.osd_addr, 0, sizeof(dl.osd_addr));
-	}
-	up_read(&osdc->map_sem);
-
-	/* send result back to user */
-	if (copy_to_user(arg, &dl, sizeof(dl)))
-		return -EFAULT;
-
-	return 0;
-}
-
-static long ceph_ioctl_lazyio(struct file *file)
-{
-	struct ceph_file_info *fi = file->private_data;
-	struct inode *inode = file->f_dentry->d_inode;
-	struct ceph_inode_info *ci = ceph_inode(inode);
-
-	if ((fi->fmode & CEPH_FILE_MODE_LAZY) == 0) {
-		spin_lock(&ci->i_ceph_lock);
-		ci->i_nr_by_mode[fi->fmode]--;
-		fi->fmode |= CEPH_FILE_MODE_LAZY;
-		ci->i_nr_by_mode[fi->fmode]++;
-		spin_unlock(&ci->i_ceph_lock);
-		dout("ioctl_layzio: file %p marked lazy\n", file);
-
-		ceph_check_caps(ci, 0, NULL);
-	} else {
-		dout("ioctl_layzio: file %p already lazy\n", file);
-	}
-	return 0;
-}
-
-static long ceph_ioctl_syncio(struct file *file)
-{
-	struct ceph_file_info *fi = file->private_data;
-
-	fi->flags |= CEPH_F_SYNC;
-	return 0;
-}
-
-long ceph_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
-{
-	dout("ioctl file %p cmd %u arg %lu\n", file, cmd, arg);
-	switch (cmd) {
-	case CEPH_IOC_GET_LAYOUT:
-		return ceph_ioctl_get_layout(file, (void __user *)arg);
-
-	case CEPH_IOC_SET_LAYOUT:
-		return ceph_ioctl_set_layout(file, (void __user *)arg);
-
-	case CEPH_IOC_SET_LAYOUT_POLICY:
-		return ceph_ioctl_set_layout_policy(file, (void __user *)arg);
-
-	case CEPH_IOC_GET_DATALOC:
-		return ceph_ioctl_get_dataloc(file, (void __user *)arg);
-
-	case CEPH_IOC_LAZYIO:
-		return ceph_ioctl_lazyio(file);
-
-	case CEPH_IOC_SYNCIO:
-		return ceph_ioctl_syncio(file);
-	}
-
-	return -ENOTTY;
-}
diff --git a/ANDROID_3.4.5/fs/ceph/ioctl.h b/ANDROID_3.4.5/fs/ceph/ioctl.h
deleted file mode 100644
index be4a6048..00000000
--- a/ANDROID_3.4.5/fs/ceph/ioctl.h
+++ /dev/null
@@ -1,98 +0,0 @@
-#ifndef FS_CEPH_IOCTL_H
-#define FS_CEPH_IOCTL_H
-
-#include <linux/ioctl.h>
-#include <linux/types.h>
-
-#define CEPH_IOCTL_MAGIC 0x97
-
-/*
- * CEPH_IOC_GET_LAYOUT - get file layout or dir layout policy
- * CEPH_IOC_SET_LAYOUT - set file layout
- * CEPH_IOC_SET_LAYOUT_POLICY - set dir layout policy
- *
- * The file layout specifies how file data is striped over objects in
- * the distributed object store, which object pool they belong to (if
- * it differs from the default), and an optional 'preferred osd' to
- * store them on.
- *
- * Files get a new layout based on the policy set on the containing
- * directory or one of its ancestors.  The GET_LAYOUT ioctl will let
- * you examine the layout for a file or the policy on a directory.
- *
- * SET_LAYOUT will let you set a layout on a newly created file.  This
- * only works immediately after the file is created and before any
- * data is written to it.
- *
- * SET_LAYOUT_POLICY will let you set a layout policy (default layout)
- * on a directory that will apply to any new files created in that
- * directory (or any child directory that doesn't specify a layout of
- * its own).
- */
-
-/* use u64 to align sanely on all archs */
-struct ceph_ioctl_layout {
-	__u64 stripe_unit, stripe_count, object_size;
-	__u64 data_pool;
-	__s64 preferred_osd;
-};
-
-#define CEPH_IOC_GET_LAYOUT _IOR(CEPH_IOCTL_MAGIC, 1,		\
-				   struct ceph_ioctl_layout)
-#define CEPH_IOC_SET_LAYOUT _IOW(CEPH_IOCTL_MAGIC, 2,		\
-				   struct ceph_ioctl_layout)
-#define CEPH_IOC_SET_LAYOUT_POLICY _IOW(CEPH_IOCTL_MAGIC, 5,	\
-				   struct ceph_ioctl_layout)
-
-/*
- * CEPH_IOC_GET_DATALOC - get location of file data in the cluster
- *
- * Extract identity, address of the OSD and object storing a given
- * file offset.
- */
-struct ceph_ioctl_dataloc {
-	__u64 file_offset;           /* in+out: file offset */
-	__u64 object_offset;         /* out: offset in object */
-	__u64 object_no;             /* out: object # */
-	__u64 object_size;           /* out: object size */
-	char object_name[64];        /* out: object name */
-	__u64 block_offset;          /* out: offset in block */
-	__u64 block_size;            /* out: block length */
-	__s64 osd;                   /* out: osd # */
-	struct sockaddr_storage osd_addr; /* out: osd address */
-};
-
-#define CEPH_IOC_GET_DATALOC _IOWR(CEPH_IOCTL_MAGIC, 3,	\
-				   struct ceph_ioctl_dataloc)
-
-/*
- * CEPH_IOC_LAZYIO - relax consistency
- *
- * Normally Ceph switches to synchronous IO when multiple clients have
- * the file open (and or more for write).  Reads and writes bypass the
- * page cache and go directly to the OSD.  Setting this flag on a file
- * descriptor will allow buffered IO for this file in cases where the
- * application knows it won't interfere with other nodes (or doesn't
- * care).
- */
-#define CEPH_IOC_LAZYIO _IO(CEPH_IOCTL_MAGIC, 4)
-
-/*
- * CEPH_IOC_SYNCIO - force synchronous IO
- *
- * This ioctl sets a file flag that forces the synchronous IO that
- * bypasses the page cache, even if it is not necessary.  This is
- * essentially the opposite behavior of IOC_LAZYIO.  This forces the
- * same read/write path as a file opened by multiple clients when one
- * or more of those clients is opened for write.
- *
- * Note that this type of sync IO takes a different path than a file
- * opened with O_SYNC/D_SYNC (writes hit the page cache and are
- * immediately flushed on page boundaries).  It is very similar to
- * O_DIRECT (writes bypass the page cache) excep that O_DIRECT writes
- * are not copied (user page must remain stable) and O_DIRECT writes
- * have alignment restrictions (on the buffer and file offset).
- */
-#define CEPH_IOC_SYNCIO _IO(CEPH_IOCTL_MAGIC, 5)
-
-#endif
diff --git a/ANDROID_3.4.5/fs/ceph/locks.c b/ANDROID_3.4.5/fs/ceph/locks.c
deleted file mode 100644
index 80576d05..00000000
--- a/ANDROID_3.4.5/fs/ceph/locks.c
+++ /dev/null
@@ -1,286 +0,0 @@
-#include <linux/ceph/ceph_debug.h>
-
-#include <linux/file.h>
-#include <linux/namei.h>
-
-#include "super.h"
-#include "mds_client.h"
-#include <linux/ceph/pagelist.h>
-
-/**
- * Implement fcntl and flock locking functions.
- */
-static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file,
-			     int cmd, u8 wait, struct file_lock *fl)
-{
-	struct inode *inode = file->f_dentry->d_inode;
-	struct ceph_mds_client *mdsc =
-		ceph_sb_to_client(inode->i_sb)->mdsc;
-	struct ceph_mds_request *req;
-	int err;
-	u64 length = 0;
-
-	req = ceph_mdsc_create_request(mdsc, operation, USE_AUTH_MDS);
-	if (IS_ERR(req))
-		return PTR_ERR(req);
-	req->r_inode = inode;
-	ihold(inode);
-
-	/* mds requires start and length rather than start and end */
-	if (LLONG_MAX == fl->fl_end)
-		length = 0;
-	else
-		length = fl->fl_end - fl->fl_start + 1;
-
-	dout("ceph_lock_message: rule: %d, op: %d, pid: %llu, start: %llu, "
-	     "length: %llu, wait: %d, type: %d", (int)lock_type,
-	     (int)operation, (u64)fl->fl_pid, fl->fl_start,
-	     length, wait, fl->fl_type);
-
-	req->r_args.filelock_change.rule = lock_type;
-	req->r_args.filelock_change.type = cmd;
-	req->r_args.filelock_change.pid = cpu_to_le64((u64)fl->fl_pid);
-	/* This should be adjusted, but I'm not sure if
-	   namespaces actually get id numbers*/
-	req->r_args.filelock_change.pid_namespace =
-		cpu_to_le64((u64)(unsigned long)fl->fl_nspid);
-	req->r_args.filelock_change.start = cpu_to_le64(fl->fl_start);
-	req->r_args.filelock_change.length = cpu_to_le64(length);
-	req->r_args.filelock_change.wait = wait;
-
-	err = ceph_mdsc_do_request(mdsc, inode, req);
-
-	if ( operation == CEPH_MDS_OP_GETFILELOCK){
-		fl->fl_pid = le64_to_cpu(req->r_reply_info.filelock_reply->pid);
-		if (CEPH_LOCK_SHARED == req->r_reply_info.filelock_reply->type)
-			fl->fl_type = F_RDLCK;
-		else if (CEPH_LOCK_EXCL == req->r_reply_info.filelock_reply->type)
-			fl->fl_type = F_WRLCK;
-		else
-			fl->fl_type = F_UNLCK;
-
-		fl->fl_start = le64_to_cpu(req->r_reply_info.filelock_reply->start);
-		length = le64_to_cpu(req->r_reply_info.filelock_reply->start) +
-						 le64_to_cpu(req->r_reply_info.filelock_reply->length);
-		if (length >= 1)
-			fl->fl_end = length -1;
-		else
-			fl->fl_end = 0;
-
-	}
-	ceph_mdsc_put_request(req);
-	dout("ceph_lock_message: rule: %d, op: %d, pid: %llu, start: %llu, "
-	     "length: %llu, wait: %d, type: %d, err code %d", (int)lock_type,
-	     (int)operation, (u64)fl->fl_pid, fl->fl_start,
-	     length, wait, fl->fl_type, err);
-	return err;
-}
-
-/**
- * Attempt to set an fcntl lock.
- * For now, this just goes away to the server. Later it may be more awesome.
- */
-int ceph_lock(struct file *file, int cmd, struct file_lock *fl)
-{
-	u8 lock_cmd;
-	int err;
-	u8 wait = 0;
-	u16 op = CEPH_MDS_OP_SETFILELOCK;
-
-	fl->fl_nspid = get_pid(task_tgid(current));
-	dout("ceph_lock, fl_pid:%d", fl->fl_pid);
-
-	/* set wait bit as appropriate, then make command as Ceph expects it*/
-	if (F_SETLKW == cmd)
-		wait = 1;
-	if (F_GETLK == cmd)
-		op = CEPH_MDS_OP_GETFILELOCK;
-
-	if (F_RDLCK == fl->fl_type)
-		lock_cmd = CEPH_LOCK_SHARED;
-	else if (F_WRLCK == fl->fl_type)
-		lock_cmd = CEPH_LOCK_EXCL;
-	else
-		lock_cmd = CEPH_LOCK_UNLOCK;
-
-	err = ceph_lock_message(CEPH_LOCK_FCNTL, op, file, lock_cmd, wait, fl);
-	if (!err) {
-		if ( op != CEPH_MDS_OP_GETFILELOCK ){
-			dout("mds locked, locking locally");
-			err = posix_lock_file(file, fl, NULL);
-			if (err && (CEPH_MDS_OP_SETFILELOCK == op)) {
-				/* undo! This should only happen if
-				 * the kernel detects local
-				 * deadlock. */
-				ceph_lock_message(CEPH_LOCK_FCNTL, op, file,
-						  CEPH_LOCK_UNLOCK, 0, fl);
-				dout("got %d on posix_lock_file, undid lock",
-				     err);
-			}
-		}
-
-	} else if (err == -ERESTARTSYS) {
-		dout("undoing lock\n");
-		ceph_lock_message(CEPH_LOCK_FCNTL, op, file,
-				  CEPH_LOCK_UNLOCK, 0, fl);
-	}
-	return err;
-}
-
-int ceph_flock(struct file *file, int cmd, struct file_lock *fl)
-{
-	u8 lock_cmd;
-	int err;
-	u8 wait = 1;
-
-	fl->fl_nspid = get_pid(task_tgid(current));
-	dout("ceph_flock, fl_pid:%d", fl->fl_pid);
-
-	/* set wait bit, then clear it out of cmd*/
-	if (cmd & LOCK_NB)
-		wait = 0;
-	cmd = cmd & (LOCK_SH | LOCK_EX | LOCK_UN);
-	/* set command sequence that Ceph wants to see:
-	   shared lock, exclusive lock, or unlock */
-	if (LOCK_SH == cmd)
-		lock_cmd = CEPH_LOCK_SHARED;
-	else if (LOCK_EX == cmd)
-		lock_cmd = CEPH_LOCK_EXCL;
-	else
-		lock_cmd = CEPH_LOCK_UNLOCK;
-
-	err = ceph_lock_message(CEPH_LOCK_FLOCK, CEPH_MDS_OP_SETFILELOCK,
-				file, lock_cmd, wait, fl);
-	if (!err) {
-		err = flock_lock_file_wait(file, fl);
-		if (err) {
-			ceph_lock_message(CEPH_LOCK_FLOCK,
-					  CEPH_MDS_OP_SETFILELOCK,
-					  file, CEPH_LOCK_UNLOCK, 0, fl);
-			dout("got %d on flock_lock_file_wait, undid lock", err);
-		}
-	} else if (err == -ERESTARTSYS) {
-		dout("undoing lock\n");
-		ceph_lock_message(CEPH_LOCK_FLOCK,
-				  CEPH_MDS_OP_SETFILELOCK,
-				  file, CEPH_LOCK_UNLOCK, 0, fl);
-	}
-	return err;
-}
-
-/**
- * Must be called with BKL already held. Fills in the passed
- * counter variables, so you can prepare pagelist metadata before calling
- * ceph_encode_locks.
- */
-void ceph_count_locks(struct inode *inode, int *fcntl_count, int *flock_count)
-{
-	struct file_lock *lock;
-
-	*fcntl_count = 0;
-	*flock_count = 0;
-
-	for (lock = inode->i_flock; lock != NULL; lock = lock->fl_next) {
-		if (lock->fl_flags & FL_POSIX)
-			++(*fcntl_count);
-		else if (lock->fl_flags & FL_FLOCK)
-			++(*flock_count);
-	}
-	dout("counted %d flock locks and %d fcntl locks",
-	     *flock_count, *fcntl_count);
-}
-
-/**
- * Encode the flock and fcntl locks for the given inode into the pagelist.
- * Format is: #fcntl locks, sequential fcntl locks, #flock locks,
- * sequential flock locks.
- * Must be called with lock_flocks() already held.
- * If we encounter more of a specific lock type than expected,
- * we return the value 1.
- */
-int ceph_encode_locks(struct inode *inode, struct ceph_pagelist *pagelist,
-		      int num_fcntl_locks, int num_flock_locks)
-{
-	struct file_lock *lock;
-	struct ceph_filelock cephlock;
-	int err = 0;
-	int seen_fcntl = 0;
-	int seen_flock = 0;
-
-	dout("encoding %d flock and %d fcntl locks", num_flock_locks,
-	     num_fcntl_locks);
-	err = ceph_pagelist_append(pagelist, &num_fcntl_locks, sizeof(u32));
-	if (err)
-		goto fail;
-	for (lock = inode->i_flock; lock != NULL; lock = lock->fl_next) {
-		if (lock->fl_flags & FL_POSIX) {
-			++seen_fcntl;
-			if (seen_fcntl > num_fcntl_locks) {
-				err = -ENOSPC;
-				goto fail;
-			}
-			err = lock_to_ceph_filelock(lock, &cephlock);
-			if (err)
-				goto fail;
-			err = ceph_pagelist_append(pagelist, &cephlock,
-					   sizeof(struct ceph_filelock));
-		}
-		if (err)
-			goto fail;
-	}
-
-	err = ceph_pagelist_append(pagelist, &num_flock_locks, sizeof(u32));
-	if (err)
-		goto fail;
-	for (lock = inode->i_flock; lock != NULL; lock = lock->fl_next) {
-		if (lock->fl_flags & FL_FLOCK) {
-			++seen_flock;
-			if (seen_flock > num_flock_locks) {
-				err = -ENOSPC;
-				goto fail;
-			}
-			err = lock_to_ceph_filelock(lock, &cephlock);
-			if (err)
-				goto fail;
-			err = ceph_pagelist_append(pagelist, &cephlock,
-					   sizeof(struct ceph_filelock));
-		}
-		if (err)
-			goto fail;
-	}
-fail:
-	return err;
-}
-
-/*
- * Given a pointer to a lock, convert it to a ceph filelock
- */
-int lock_to_ceph_filelock(struct file_lock *lock,
-			  struct ceph_filelock *cephlock)
-{
-	int err = 0;
-
-	cephlock->start = cpu_to_le64(lock->fl_start);
-	cephlock->length = cpu_to_le64(lock->fl_end - lock->fl_start + 1);
-	cephlock->client = cpu_to_le64(0);
-	cephlock->pid = cpu_to_le64(lock->fl_pid);
-	cephlock->pid_namespace =
-	        cpu_to_le64((u64)(unsigned long)lock->fl_nspid);
-
-	switch (lock->fl_type) {
-	case F_RDLCK:
-		cephlock->type = CEPH_LOCK_SHARED;
-		break;
-	case F_WRLCK:
-		cephlock->type = CEPH_LOCK_EXCL;
-		break;
-	case F_UNLCK:
-		cephlock->type = CEPH_LOCK_UNLOCK;
-		break;
-	default:
-		dout("Have unknown lock type %d", lock->fl_type);
-		err = -EINVAL;
-	}
-
-	return err;
-}
diff --git a/ANDROID_3.4.5/fs/ceph/mds_client.c b/ANDROID_3.4.5/fs/ceph/mds_client.c
deleted file mode 100644
index 89971e13..00000000
--- a/ANDROID_3.4.5/fs/ceph/mds_client.c
+++ /dev/null
@@ -1,3465 +0,0 @@
-#include <linux/ceph/ceph_debug.h>
-
-#include <linux/fs.h>
-#include <linux/wait.h>
-#include <linux/slab.h>
-#include <linux/sched.h>
-#include <linux/debugfs.h>
-#include <linux/seq_file.h>
-
-#include "super.h"
-#include "mds_client.h"
-
-#include <linux/ceph/messenger.h>
-#include <linux/ceph/decode.h>
-#include <linux/ceph/pagelist.h>
-#include <linux/ceph/auth.h>
-#include <linux/ceph/debugfs.h>
-
-/*
- * A cluster of MDS (metadata server) daemons is responsible for
- * managing the file system namespace (the directory hierarchy and
- * inodes) and for coordinating shared access to storage.  Metadata is
- * partitioning hierarchically across a number of servers, and that
- * partition varies over time as the cluster adjusts the distribution
- * in order to balance load.
- *
- * The MDS client is primarily responsible to managing synchronous
- * metadata requests for operations like open, unlink, and so forth.
- * If there is a MDS failure, we find out about it when we (possibly
- * request and) receive a new MDS map, and can resubmit affected
- * requests.
- *
- * For the most part, though, we take advantage of a lossless
- * communications channel to the MDS, and do not need to worry about
- * timing out or resubmitting requests.
- *
- * We maintain a stateful "session" with each MDS we interact with.
- * Within each session, we sent periodic heartbeat messages to ensure
- * any capabilities or leases we have been issues remain valid.  If
- * the session times out and goes stale, our leases and capabilities
- * are no longer valid.
- */
-
-struct ceph_reconnect_state {
-	struct ceph_pagelist *pagelist;
-	bool flock;
-};
-
-static void __wake_requests(struct ceph_mds_client *mdsc,
-			    struct list_head *head);
-
-static const struct ceph_connection_operations mds_con_ops;
-
-
-/*
- * mds reply parsing
- */
-
-/*
- * parse individual inode info
- */
-static int parse_reply_info_in(void **p, void *end,
-			       struct ceph_mds_reply_info_in *info,
-			       int features)
-{
-	int err = -EIO;
-
-	info->in = *p;
-	*p += sizeof(struct ceph_mds_reply_inode) +
-		sizeof(*info->in->fragtree.splits) *
-		le32_to_cpu(info->in->fragtree.nsplits);
-
-	ceph_decode_32_safe(p, end, info->symlink_len, bad);
-	ceph_decode_need(p, end, info->symlink_len, bad);
-	info->symlink = *p;
-	*p += info->symlink_len;
-
-	if (features & CEPH_FEATURE_DIRLAYOUTHASH)
-		ceph_decode_copy_safe(p, end, &info->dir_layout,
-				      sizeof(info->dir_layout), bad);
-	else
-		memset(&info->dir_layout, 0, sizeof(info->dir_layout));
-
-	ceph_decode_32_safe(p, end, info->xattr_len, bad);
-	ceph_decode_need(p, end, info->xattr_len, bad);
-	info->xattr_data = *p;
-	*p += info->xattr_len;
-	return 0;
-bad:
-	return err;
-}
-
-/*
- * parse a normal reply, which may contain a (dir+)dentry and/or a
- * target inode.
- */
-static int parse_reply_info_trace(void **p, void *end,
-				  struct ceph_mds_reply_info_parsed *info,
-				  int features)
-{
-	int err;
-
-	if (info->head->is_dentry) {
-		err = parse_reply_info_in(p, end, &info->diri, features);
-		if (err < 0)
-			goto out_bad;
-
-		if (unlikely(*p + sizeof(*info->dirfrag) > end))
-			goto bad;
-		info->dirfrag = *p;
-		*p += sizeof(*info->dirfrag) +
-			sizeof(u32)*le32_to_cpu(info->dirfrag->ndist);
-		if (unlikely(*p > end))
-			goto bad;
-
-		ceph_decode_32_safe(p, end, info->dname_len, bad);
-		ceph_decode_need(p, end, info->dname_len, bad);
-		info->dname = *p;
-		*p += info->dname_len;
-		info->dlease = *p;
-		*p += sizeof(*info->dlease);
-	}
-
-	if (info->head->is_target) {
-		err = parse_reply_info_in(p, end, &info->targeti, features);
-		if (err < 0)
-			goto out_bad;
-	}
-
-	if (unlikely(*p != end))
-		goto bad;
-	return 0;
-
-bad:
-	err = -EIO;
-out_bad:
-	pr_err("problem parsing mds trace %d\n", err);
-	return err;
-}
-
-/*
- * parse readdir results
- */
-static int parse_reply_info_dir(void **p, void *end,
-				struct ceph_mds_reply_info_parsed *info,
-				int features)
-{
-	u32 num, i = 0;
-	int err;
-
-	info->dir_dir = *p;
-	if (*p + sizeof(*info->dir_dir) > end)
-		goto bad;
-	*p += sizeof(*info->dir_dir) +
-		sizeof(u32)*le32_to_cpu(info->dir_dir->ndist);
-	if (*p > end)
-		goto bad;
-
-	ceph_decode_need(p, end, sizeof(num) + 2, bad);
-	num = ceph_decode_32(p);
-	info->dir_end = ceph_decode_8(p);
-	info->dir_complete = ceph_decode_8(p);
-	if (num == 0)
-		goto done;
-
-	/* alloc large array */
-	info->dir_nr = num;
-	info->dir_in = kcalloc(num, sizeof(*info->dir_in) +
-			       sizeof(*info->dir_dname) +
-			       sizeof(*info->dir_dname_len) +
-			       sizeof(*info->dir_dlease),
-			       GFP_NOFS);
-	if (info->dir_in == NULL) {
-		err = -ENOMEM;
-		goto out_bad;
-	}
-	info->dir_dname = (void *)(info->dir_in + num);
-	info->dir_dname_len = (void *)(info->dir_dname + num);
-	info->dir_dlease = (void *)(info->dir_dname_len + num);
-
-	while (num) {
-		/* dentry */
-		ceph_decode_need(p, end, sizeof(u32)*2, bad);
-		info->dir_dname_len[i] = ceph_decode_32(p);
-		ceph_decode_need(p, end, info->dir_dname_len[i], bad);
-		info->dir_dname[i] = *p;
-		*p += info->dir_dname_len[i];
-		dout("parsed dir dname '%.*s'\n", info->dir_dname_len[i],
-		     info->dir_dname[i]);
-		info->dir_dlease[i] = *p;
-		*p += sizeof(struct ceph_mds_reply_lease);
-
-		/* inode */
-		err = parse_reply_info_in(p, end, &info->dir_in[i], features);
-		if (err < 0)
-			goto out_bad;
-		i++;
-		num--;
-	}
-
-done:
-	if (*p != end)
-		goto bad;
-	return 0;
-
-bad:
-	err = -EIO;
-out_bad:
-	pr_err("problem parsing dir contents %d\n", err);
-	return err;
-}
-
-/*
- * parse fcntl F_GETLK results
- */
-static int parse_reply_info_filelock(void **p, void *end,
-				     struct ceph_mds_reply_info_parsed *info,
-				     int features)
-{
-	if (*p + sizeof(*info->filelock_reply) > end)
-		goto bad;
-
-	info->filelock_reply = *p;
-	*p += sizeof(*info->filelock_reply);
-
-	if (unlikely(*p != end))
-		goto bad;
-	return 0;
-
-bad:
-	return -EIO;
-}
-
-/*
- * parse extra results
- */
-static int parse_reply_info_extra(void **p, void *end,
-				  struct ceph_mds_reply_info_parsed *info,
-				  int features)
-{
-	if (info->head->op == CEPH_MDS_OP_GETFILELOCK)
-		return parse_reply_info_filelock(p, end, info, features);
-	else
-		return parse_reply_info_dir(p, end, info, features);
-}
-
-/*
- * parse entire mds reply
- */
-static int parse_reply_info(struct ceph_msg *msg,
-			    struct ceph_mds_reply_info_parsed *info,
-			    int features)
-{
-	void *p, *end;
-	u32 len;
-	int err;
-
-	info->head = msg->front.iov_base;
-	p = msg->front.iov_base + sizeof(struct ceph_mds_reply_head);
-	end = p + msg->front.iov_len - sizeof(struct ceph_mds_reply_head);
-
-	/* trace */
-	ceph_decode_32_safe(&p, end, len, bad);
-	if (len > 0) {
-		ceph_decode_need(&p, end, len, bad);
-		err = parse_reply_info_trace(&p, p+len, info, features);
-		if (err < 0)
-			goto out_bad;
-	}
-
-	/* extra */
-	ceph_decode_32_safe(&p, end, len, bad);
-	if (len > 0) {
-		ceph_decode_need(&p, end, len, bad);
-		err = parse_reply_info_extra(&p, p+len, info, features);
-		if (err < 0)
-			goto out_bad;
-	}
-
-	/* snap blob */
-	ceph_decode_32_safe(&p, end, len, bad);
-	info->snapblob_len = len;
-	info->snapblob = p;
-	p += len;
-
-	if (p != end)
-		goto bad;
-	return 0;
-
-bad:
-	err = -EIO;
-out_bad:
-	pr_err("mds parse_reply err %d\n", err);
-	return err;
-}
-
-static void destroy_reply_info(struct ceph_mds_reply_info_parsed *info)
-{
-	kfree(info->dir_in);
-}
-
-
-/*
- * sessions
- */
-static const char *session_state_name(int s)
-{
-	switch (s) {
-	case CEPH_MDS_SESSION_NEW: return "new";
-	case CEPH_MDS_SESSION_OPENING: return "opening";
-	case CEPH_MDS_SESSION_OPEN: return "open";
-	case CEPH_MDS_SESSION_HUNG: return "hung";
-	case CEPH_MDS_SESSION_CLOSING: return "closing";
-	case CEPH_MDS_SESSION_RESTARTING: return "restarting";
-	case CEPH_MDS_SESSION_RECONNECTING: return "reconnecting";
-	default: return "???";
-	}
-}
-
-static struct ceph_mds_session *get_session(struct ceph_mds_session *s)
-{
-	if (atomic_inc_not_zero(&s->s_ref)) {
-		dout("mdsc get_session %p %d -> %d\n", s,
-		     atomic_read(&s->s_ref)-1, atomic_read(&s->s_ref));
-		return s;
-	} else {
-		dout("mdsc get_session %p 0 -- FAIL", s);
-		return NULL;
-	}
-}
-
-void ceph_put_mds_session(struct ceph_mds_session *s)
-{
-	dout("mdsc put_session %p %d -> %d\n", s,
-	     atomic_read(&s->s_ref), atomic_read(&s->s_ref)-1);
-	if (atomic_dec_and_test(&s->s_ref)) {
-		if (s->s_authorizer)
-		     s->s_mdsc->fsc->client->monc.auth->ops->destroy_authorizer(
-			     s->s_mdsc->fsc->client->monc.auth,
-			     s->s_authorizer);
-		kfree(s);
-	}
-}
-
-/*
- * called under mdsc->mutex
- */
-struct ceph_mds_session *__ceph_lookup_mds_session(struct ceph_mds_client *mdsc,
-						   int mds)
-{
-	struct ceph_mds_session *session;
-
-	if (mds >= mdsc->max_sessions || mdsc->sessions[mds] == NULL)
-		return NULL;
-	session = mdsc->sessions[mds];
-	dout("lookup_mds_session %p %d\n", session,
-	     atomic_read(&session->s_ref));
-	get_session(session);
-	return session;
-}
-
-static bool __have_session(struct ceph_mds_client *mdsc, int mds)
-{
-	if (mds >= mdsc->max_sessions)
-		return false;
-	return mdsc->sessions[mds];
-}
-
-static int __verify_registered_session(struct ceph_mds_client *mdsc,
-				       struct ceph_mds_session *s)
-{
-	if (s->s_mds >= mdsc->max_sessions ||
-	    mdsc->sessions[s->s_mds] != s)
-		return -ENOENT;
-	return 0;
-}
-
-/*
- * create+register a new session for given mds.
- * called under mdsc->mutex.
- */
-static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc,
-						 int mds)
-{
-	struct ceph_mds_session *s;
-
-	s = kzalloc(sizeof(*s), GFP_NOFS);
-	if (!s)
-		return ERR_PTR(-ENOMEM);
-	s->s_mdsc = mdsc;
-	s->s_mds = mds;
-	s->s_state = CEPH_MDS_SESSION_NEW;
-	s->s_ttl = 0;
-	s->s_seq = 0;
-	mutex_init(&s->s_mutex);
-
-	ceph_con_init(mdsc->fsc->client->msgr, &s->s_con);
-	s->s_con.private = s;
-	s->s_con.ops = &mds_con_ops;
-	s->s_con.peer_name.type = CEPH_ENTITY_TYPE_MDS;
-	s->s_con.peer_name.num = cpu_to_le64(mds);
-
-	spin_lock_init(&s->s_gen_ttl_lock);
-	s->s_cap_gen = 0;
-	s->s_cap_ttl = jiffies - 1;
-
-	spin_lock_init(&s->s_cap_lock);
-	s->s_renew_requested = 0;
-	s->s_renew_seq = 0;
-	INIT_LIST_HEAD(&s->s_caps);
-	s->s_nr_caps = 0;
-	s->s_trim_caps = 0;
-	atomic_set(&s->s_ref, 1);
-	INIT_LIST_HEAD(&s->s_waiting);
-	INIT_LIST_HEAD(&s->s_unsafe);
-	s->s_num_cap_releases = 0;
-	s->s_cap_iterator = NULL;
-	INIT_LIST_HEAD(&s->s_cap_releases);
-	INIT_LIST_HEAD(&s->s_cap_releases_done);
-	INIT_LIST_HEAD(&s->s_cap_flushing);
-	INIT_LIST_HEAD(&s->s_cap_snaps_flushing);
-
-	dout("register_session mds%d\n", mds);
-	if (mds >= mdsc->max_sessions) {
-		int newmax = 1 << get_count_order(mds+1);
-		struct ceph_mds_session **sa;
-
-		dout("register_session realloc to %d\n", newmax);
-		sa = kcalloc(newmax, sizeof(void *), GFP_NOFS);
-		if (sa == NULL)
-			goto fail_realloc;
-		if (mdsc->sessions) {
-			memcpy(sa, mdsc->sessions,
-			       mdsc->max_sessions * sizeof(void *));
-			kfree(mdsc->sessions);
-		}
-		mdsc->sessions = sa;
-		mdsc->max_sessions = newmax;
-	}
-	mdsc->sessions[mds] = s;
-	atomic_inc(&s->s_ref);  /* one ref to sessions[], one to caller */
-
-	ceph_con_open(&s->s_con, ceph_mdsmap_get_addr(mdsc->mdsmap, mds));
-
-	return s;
-
-fail_realloc:
-	kfree(s);
-	return ERR_PTR(-ENOMEM);
-}
-
-/*
- * called under mdsc->mutex
- */
-static void __unregister_session(struct ceph_mds_client *mdsc,
-			       struct ceph_mds_session *s)
-{
-	dout("__unregister_session mds%d %p\n", s->s_mds, s);
-	BUG_ON(mdsc->sessions[s->s_mds] != s);
-	mdsc->sessions[s->s_mds] = NULL;
-	ceph_con_close(&s->s_con);
-	ceph_put_mds_session(s);
-}
-
-/*
- * drop session refs in request.
- *
- * should be last request ref, or hold mdsc->mutex
- */
-static void put_request_session(struct ceph_mds_request *req)
-{
-	if (req->r_session) {
-		ceph_put_mds_session(req->r_session);
-		req->r_session = NULL;
-	}
-}
-
-void ceph_mdsc_release_request(struct kref *kref)
-{
-	struct ceph_mds_request *req = container_of(kref,
-						    struct ceph_mds_request,
-						    r_kref);
-	if (req->r_request)
-		ceph_msg_put(req->r_request);
-	if (req->r_reply) {
-		ceph_msg_put(req->r_reply);
-		destroy_reply_info(&req->r_reply_info);
-	}
-	if (req->r_inode) {
-		ceph_put_cap_refs(ceph_inode(req->r_inode), CEPH_CAP_PIN);
-		iput(req->r_inode);
-	}
-	if (req->r_locked_dir)
-		ceph_put_cap_refs(ceph_inode(req->r_locked_dir), CEPH_CAP_PIN);
-	if (req->r_target_inode)
-		iput(req->r_target_inode);
-	if (req->r_dentry)
-		dput(req->r_dentry);
-	if (req->r_old_dentry) {
-		/*
-		 * track (and drop pins for) r_old_dentry_dir
-		 * separately, since r_old_dentry's d_parent may have
-		 * changed between the dir mutex being dropped and
-		 * this request being freed.
-		 */
-		ceph_put_cap_refs(ceph_inode(req->r_old_dentry_dir),
-				  CEPH_CAP_PIN);
-		dput(req->r_old_dentry);
-		iput(req->r_old_dentry_dir);
-	}
-	kfree(req->r_path1);
-	kfree(req->r_path2);
-	put_request_session(req);
-	ceph_unreserve_caps(req->r_mdsc, &req->r_caps_reservation);
-	kfree(req);
-}
-
-/*
- * lookup session, bump ref if found.
- *
- * called under mdsc->mutex.
- */
-static struct ceph_mds_request *__lookup_request(struct ceph_mds_client *mdsc,
-					     u64 tid)
-{
-	struct ceph_mds_request *req;
-	struct rb_node *n = mdsc->request_tree.rb_node;
-
-	while (n) {
-		req = rb_entry(n, struct ceph_mds_request, r_node);
-		if (tid < req->r_tid)
-			n = n->rb_left;
-		else if (tid > req->r_tid)
-			n = n->rb_right;
-		else {
-			ceph_mdsc_get_request(req);
-			return req;
-		}
-	}
-	return NULL;
-}
-
-static void __insert_request(struct ceph_mds_client *mdsc,
-			     struct ceph_mds_request *new)
-{
-	struct rb_node **p = &mdsc->request_tree.rb_node;
-	struct rb_node *parent = NULL;
-	struct ceph_mds_request *req = NULL;
-
-	while (*p) {
-		parent = *p;
-		req = rb_entry(parent, struct ceph_mds_request, r_node);
-		if (new->r_tid < req->r_tid)
-			p = &(*p)->rb_left;
-		else if (new->r_tid > req->r_tid)
-			p = &(*p)->rb_right;
-		else
-			BUG();
-	}
-
-	rb_link_node(&new->r_node, parent, p);
-	rb_insert_color(&new->r_node, &mdsc->request_tree);
-}
-
-/*
- * Register an in-flight request, and assign a tid.  Link to directory
- * are modifying (if any).
- *
- * Called under mdsc->mutex.
- */
-static void __register_request(struct ceph_mds_client *mdsc,
-			       struct ceph_mds_request *req,
-			       struct inode *dir)
-{
-	req->r_tid = ++mdsc->last_tid;
-	if (req->r_num_caps)
-		ceph_reserve_caps(mdsc, &req->r_caps_reservation,
-				  req->r_num_caps);
-	dout("__register_request %p tid %lld\n", req, req->r_tid);
-	ceph_mdsc_get_request(req);
-	__insert_request(mdsc, req);
-
-	req->r_uid = current_fsuid();
-	req->r_gid = current_fsgid();
-
-	if (dir) {
-		struct ceph_inode_info *ci = ceph_inode(dir);
-
-		ihold(dir);
-		spin_lock(&ci->i_unsafe_lock);
-		req->r_unsafe_dir = dir;
-		list_add_tail(&req->r_unsafe_dir_item, &ci->i_unsafe_dirops);
-		spin_unlock(&ci->i_unsafe_lock);
-	}
-}
-
-static void __unregister_request(struct ceph_mds_client *mdsc,
-				 struct ceph_mds_request *req)
-{
-	dout("__unregister_request %p tid %lld\n", req, req->r_tid);
-	rb_erase(&req->r_node, &mdsc->request_tree);
-	RB_CLEAR_NODE(&req->r_node);
-
-	if (req->r_unsafe_dir) {
-		struct ceph_inode_info *ci = ceph_inode(req->r_unsafe_dir);
-
-		spin_lock(&ci->i_unsafe_lock);
-		list_del_init(&req->r_unsafe_dir_item);
-		spin_unlock(&ci->i_unsafe_lock);
-
-		iput(req->r_unsafe_dir);
-		req->r_unsafe_dir = NULL;
-	}
-
-	ceph_mdsc_put_request(req);
-}
-
-/*
- * Choose mds to send request to next.  If there is a hint set in the
- * request (e.g., due to a prior forward hint from the mds), use that.
- * Otherwise, consult frag tree and/or caps to identify the
- * appropriate mds.  If all else fails, choose randomly.
- *
- * Called under mdsc->mutex.
- */
-static struct dentry *get_nonsnap_parent(struct dentry *dentry)
-{
-	/*
-	 * we don't need to worry about protecting the d_parent access
-	 * here because we never renaming inside the snapped namespace
-	 * except to resplice to another snapdir, and either the old or new
-	 * result is a valid result.
-	 */
-	while (!IS_ROOT(dentry) && ceph_snap(dentry->d_inode) != CEPH_NOSNAP)
-		dentry = dentry->d_parent;
-	return dentry;
-}
-
-static int __choose_mds(struct ceph_mds_client *mdsc,
-			struct ceph_mds_request *req)
-{
-	struct inode *inode;
-	struct ceph_inode_info *ci;
-	struct ceph_cap *cap;
-	int mode = req->r_direct_mode;
-	int mds = -1;
-	u32 hash = req->r_direct_hash;
-	bool is_hash = req->r_direct_is_hash;
-
-	/*
-	 * is there a specific mds we should try?  ignore hint if we have
-	 * no session and the mds is not up (active or recovering).
-	 */
-	if (req->r_resend_mds >= 0 &&
-	    (__have_session(mdsc, req->r_resend_mds) ||
-	     ceph_mdsmap_get_state(mdsc->mdsmap, req->r_resend_mds) > 0)) {
-		dout("choose_mds using resend_mds mds%d\n",
-		     req->r_resend_mds);
-		return req->r_resend_mds;
-	}
-
-	if (mode == USE_RANDOM_MDS)
-		goto random;
-
-	inode = NULL;
-	if (req->r_inode) {
-		inode = req->r_inode;
-	} else if (req->r_dentry) {
-		/* ignore race with rename; old or new d_parent is okay */
-		struct dentry *parent = req->r_dentry->d_parent;
-		struct inode *dir = parent->d_inode;
-
-		if (dir->i_sb != mdsc->fsc->sb) {
-			/* not this fs! */
-			inode = req->r_dentry->d_inode;
-		} else if (ceph_snap(dir) != CEPH_NOSNAP) {
-			/* direct snapped/virtual snapdir requests
-			 * based on parent dir inode */
-			struct dentry *dn = get_nonsnap_parent(parent);
-			inode = dn->d_inode;
-			dout("__choose_mds using nonsnap parent %p\n", inode);
-		} else if (req->r_dentry->d_inode) {
-			/* dentry target */
-			inode = req->r_dentry->d_inode;
-		} else {
-			/* dir + name */
-			inode = dir;
-			hash = ceph_dentry_hash(dir, req->r_dentry);
-			is_hash = true;
-		}
-	}
-
-	dout("__choose_mds %p is_hash=%d (%d) mode %d\n", inode, (int)is_hash,
-	     (int)hash, mode);
-	if (!inode)
-		goto random;
-	ci = ceph_inode(inode);
-
-	if (is_hash && S_ISDIR(inode->i_mode)) {
-		struct ceph_inode_frag frag;
-		int found;
-
-		ceph_choose_frag(ci, hash, &frag, &found);
-		if (found) {
-			if (mode == USE_ANY_MDS && frag.ndist > 0) {
-				u8 r;
-
-				/* choose a random replica */
-				get_random_bytes(&r, 1);
-				r %= frag.ndist;
-				mds = frag.dist[r];
-				dout("choose_mds %p %llx.%llx "
-				     "frag %u mds%d (%d/%d)\n",
-				     inode, ceph_vinop(inode),
-				     frag.frag, mds,
-				     (int)r, frag.ndist);
-				if (ceph_mdsmap_get_state(mdsc->mdsmap, mds) >=
-				    CEPH_MDS_STATE_ACTIVE)
-					return mds;
-			}
-
-			/* since this file/dir wasn't known to be
-			 * replicated, then we want to look for the
-			 * authoritative mds. */
-			mode = USE_AUTH_MDS;
-			if (frag.mds >= 0) {
-				/* choose auth mds */
-				mds = frag.mds;
-				dout("choose_mds %p %llx.%llx "
-				     "frag %u mds%d (auth)\n",
-				     inode, ceph_vinop(inode), frag.frag, mds);
-				if (ceph_mdsmap_get_state(mdsc->mdsmap, mds) >=
-				    CEPH_MDS_STATE_ACTIVE)
-					return mds;
-			}
-		}
-	}
-
-	spin_lock(&ci->i_ceph_lock);
-	cap = NULL;
-	if (mode == USE_AUTH_MDS)
-		cap = ci->i_auth_cap;
-	if (!cap && !RB_EMPTY_ROOT(&ci->i_caps))
-		cap = rb_entry(rb_first(&ci->i_caps), struct ceph_cap, ci_node);
-	if (!cap) {
-		spin_unlock(&ci->i_ceph_lock);
-		goto random;
-	}
-	mds = cap->session->s_mds;
-	dout("choose_mds %p %llx.%llx mds%d (%scap %p)\n",
-	     inode, ceph_vinop(inode), mds,
-	     cap == ci->i_auth_cap ? "auth " : "", cap);
-	spin_unlock(&ci->i_ceph_lock);
-	return mds;
-
-random:
-	mds = ceph_mdsmap_get_random_mds(mdsc->mdsmap);
-	dout("choose_mds chose random mds%d\n", mds);
-	return mds;
-}
-
-
-/*
- * session messages
- */
-static struct ceph_msg *create_session_msg(u32 op, u64 seq)
-{
-	struct ceph_msg *msg;
-	struct ceph_mds_session_head *h;
-
-	msg = ceph_msg_new(CEPH_MSG_CLIENT_SESSION, sizeof(*h), GFP_NOFS,
-			   false);
-	if (!msg) {
-		pr_err("create_session_msg ENOMEM creating msg\n");
-		return NULL;
-	}
-	h = msg->front.iov_base;
-	h->op = cpu_to_le32(op);
-	h->seq = cpu_to_le64(seq);
-	return msg;
-}
-
-/*
- * send session open request.
- *
- * called under mdsc->mutex
- */
-static int __open_session(struct ceph_mds_client *mdsc,
-			  struct ceph_mds_session *session)
-{
-	struct ceph_msg *msg;
-	int mstate;
-	int mds = session->s_mds;
-
-	/* wait for mds to go active? */
-	mstate = ceph_mdsmap_get_state(mdsc->mdsmap, mds);
-	dout("open_session to mds%d (%s)\n", mds,
-	     ceph_mds_state_name(mstate));
-	session->s_state = CEPH_MDS_SESSION_OPENING;
-	session->s_renew_requested = jiffies;
-
-	/* send connect message */
-	msg = create_session_msg(CEPH_SESSION_REQUEST_OPEN, session->s_seq);
-	if (!msg)
-		return -ENOMEM;
-	ceph_con_send(&session->s_con, msg);
-	return 0;
-}
-
-/*
- * open sessions for any export targets for the given mds
- *
- * called under mdsc->mutex
- */
-static void __open_export_target_sessions(struct ceph_mds_client *mdsc,
-					  struct ceph_mds_session *session)
-{
-	struct ceph_mds_info *mi;
-	struct ceph_mds_session *ts;
-	int i, mds = session->s_mds;
-	int target;
-
-	if (mds >= mdsc->mdsmap->m_max_mds)
-		return;
-	mi = &mdsc->mdsmap->m_info[mds];
-	dout("open_export_target_sessions for mds%d (%d targets)\n",
-	     session->s_mds, mi->num_export_targets);
-
-	for (i = 0; i < mi->num_export_targets; i++) {
-		target = mi->export_targets[i];
-		ts = __ceph_lookup_mds_session(mdsc, target);
-		if (!ts) {
-			ts = register_session(mdsc, target);
-			if (IS_ERR(ts))
-				return;
-		}
-		if (session->s_state == CEPH_MDS_SESSION_NEW ||
-		    session->s_state == CEPH_MDS_SESSION_CLOSING)
-			__open_session(mdsc, session);
-		else
-			dout(" mds%d target mds%d %p is %s\n", session->s_mds,
-			     i, ts, session_state_name(ts->s_state));
-		ceph_put_mds_session(ts);
-	}
-}
-
-void ceph_mdsc_open_export_target_sessions(struct ceph_mds_client *mdsc,
-					   struct ceph_mds_session *session)
-{
-	mutex_lock(&mdsc->mutex);
-	__open_export_target_sessions(mdsc, session);
-	mutex_unlock(&mdsc->mutex);
-}
-
-/*
- * session caps
- */
-
-/*
- * Free preallocated cap messages assigned to this session
- */
-static void cleanup_cap_releases(struct ceph_mds_session *session)
-{
-	struct ceph_msg *msg;
-
-	spin_lock(&session->s_cap_lock);
-	while (!list_empty(&session->s_cap_releases)) {
-		msg = list_first_entry(&session->s_cap_releases,
-				       struct ceph_msg, list_head);
-		list_del_init(&msg->list_head);
-		ceph_msg_put(msg);
-	}
-	while (!list_empty(&session->s_cap_releases_done)) {
-		msg = list_first_entry(&session->s_cap_releases_done,
-				       struct ceph_msg, list_head);
-		list_del_init(&msg->list_head);
-		ceph_msg_put(msg);
-	}
-	spin_unlock(&session->s_cap_lock);
-}
-
-/*
- * Helper to safely iterate over all caps associated with a session, with
- * special care taken to handle a racing __ceph_remove_cap().
- *
- * Caller must hold session s_mutex.
- */
-static int iterate_session_caps(struct ceph_mds_session *session,
-				 int (*cb)(struct inode *, struct ceph_cap *,
-					    void *), void *arg)
-{
-	struct list_head *p;
-	struct ceph_cap *cap;
-	struct inode *inode, *last_inode = NULL;
-	struct ceph_cap *old_cap = NULL;
-	int ret;
-
-	dout("iterate_session_caps %p mds%d\n", session, session->s_mds);
-	spin_lock(&session->s_cap_lock);
-	p = session->s_caps.next;
-	while (p != &session->s_caps) {
-		cap = list_entry(p, struct ceph_cap, session_caps);
-		inode = igrab(&cap->ci->vfs_inode);
-		if (!inode) {
-			p = p->next;
-			continue;
-		}
-		session->s_cap_iterator = cap;
-		spin_unlock(&session->s_cap_lock);
-
-		if (last_inode) {
-			iput(last_inode);
-			last_inode = NULL;
-		}
-		if (old_cap) {
-			ceph_put_cap(session->s_mdsc, old_cap);
-			old_cap = NULL;
-		}
-
-		ret = cb(inode, cap, arg);
-		last_inode = inode;
-
-		spin_lock(&session->s_cap_lock);
-		p = p->next;
-		if (cap->ci == NULL) {
-			dout("iterate_session_caps  finishing cap %p removal\n",
-			     cap);
-			BUG_ON(cap->session != session);
-			list_del_init(&cap->session_caps);
-			session->s_nr_caps--;
-			cap->session = NULL;
-			old_cap = cap;  /* put_cap it w/o locks held */
-		}
-		if (ret < 0)
-			goto out;
-	}
-	ret = 0;
-out:
-	session->s_cap_iterator = NULL;
-	spin_unlock(&session->s_cap_lock);
-
-	if (last_inode)
-		iput(last_inode);
-	if (old_cap)
-		ceph_put_cap(session->s_mdsc, old_cap);
-
-	return ret;
-}
-
-static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
-				  void *arg)
-{
-	struct ceph_inode_info *ci = ceph_inode(inode);
-	int drop = 0;
-
-	dout("removing cap %p, ci is %p, inode is %p\n",
-	     cap, ci, &ci->vfs_inode);
-	spin_lock(&ci->i_ceph_lock);
-	__ceph_remove_cap(cap);
-	if (!__ceph_is_any_real_caps(ci)) {
-		struct ceph_mds_client *mdsc =
-			ceph_sb_to_client(inode->i_sb)->mdsc;
-
-		spin_lock(&mdsc->cap_dirty_lock);
-		if (!list_empty(&ci->i_dirty_item)) {
-			pr_info(" dropping dirty %s state for %p %lld\n",
-				ceph_cap_string(ci->i_dirty_caps),
-				inode, ceph_ino(inode));
-			ci->i_dirty_caps = 0;
-			list_del_init(&ci->i_dirty_item);
-			drop = 1;
-		}
-		if (!list_empty(&ci->i_flushing_item)) {
-			pr_info(" dropping dirty+flushing %s state for %p %lld\n",
-				ceph_cap_string(ci->i_flushing_caps),
-				inode, ceph_ino(inode));
-			ci->i_flushing_caps = 0;
-			list_del_init(&ci->i_flushing_item);
-			mdsc->num_cap_flushing--;
-			drop = 1;
-		}
-		if (drop && ci->i_wrbuffer_ref) {
-			pr_info(" dropping dirty data for %p %lld\n",
-				inode, ceph_ino(inode));
-			ci->i_wrbuffer_ref = 0;
-			ci->i_wrbuffer_ref_head = 0;
-			drop++;
-		}
-		spin_unlock(&mdsc->cap_dirty_lock);
-	}
-	spin_unlock(&ci->i_ceph_lock);
-	while (drop--)
-		iput(inode);
-	return 0;
-}
-
-/*
- * caller must hold session s_mutex
- */
-static void remove_session_caps(struct ceph_mds_session *session)
-{
-	dout("remove_session_caps on %p\n", session);
-	iterate_session_caps(session, remove_session_caps_cb, NULL);
-	BUG_ON(session->s_nr_caps > 0);
-	BUG_ON(!list_empty(&session->s_cap_flushing));
-	cleanup_cap_releases(session);
-}
-
-/*
- * wake up any threads waiting on this session's caps.  if the cap is
- * old (didn't get renewed on the client reconnect), remove it now.
- *
- * caller must hold s_mutex.
- */
-static int wake_up_session_cb(struct inode *inode, struct ceph_cap *cap,
-			      void *arg)
-{
-	struct ceph_inode_info *ci = ceph_inode(inode);
-
-	wake_up_all(&ci->i_cap_wq);
-	if (arg) {
-		spin_lock(&ci->i_ceph_lock);
-		ci->i_wanted_max_size = 0;
-		ci->i_requested_max_size = 0;
-		spin_unlock(&ci->i_ceph_lock);
-	}
-	return 0;
-}
-
-static void wake_up_session_caps(struct ceph_mds_session *session,
-				 int reconnect)
-{
-	dout("wake_up_session_caps %p mds%d\n", session, session->s_mds);
-	iterate_session_caps(session, wake_up_session_cb,
-			     (void *)(unsigned long)reconnect);
-}
-
-/*
- * Send periodic message to MDS renewing all currently held caps.  The
- * ack will reset the expiration for all caps from this session.
- *
- * caller holds s_mutex
- */
-static int send_renew_caps(struct ceph_mds_client *mdsc,
-			   struct ceph_mds_session *session)
-{
-	struct ceph_msg *msg;
-	int state;
-
-	if (time_after_eq(jiffies, session->s_cap_ttl) &&
-	    time_after_eq(session->s_cap_ttl, session->s_renew_requested))
-		pr_info("mds%d caps stale\n", session->s_mds);
-	session->s_renew_requested = jiffies;
-
-	/* do not try to renew caps until a recovering mds has reconnected
-	 * with its clients. */
-	state = ceph_mdsmap_get_state(mdsc->mdsmap, session->s_mds);
-	if (state < CEPH_MDS_STATE_RECONNECT) {
-		dout("send_renew_caps ignoring mds%d (%s)\n",
-		     session->s_mds, ceph_mds_state_name(state));
-		return 0;
-	}
-
-	dout("send_renew_caps to mds%d (%s)\n", session->s_mds,
-		ceph_mds_state_name(state));
-	msg = create_session_msg(CEPH_SESSION_REQUEST_RENEWCAPS,
-				 ++session->s_renew_seq);
-	if (!msg)
-		return -ENOMEM;
-	ceph_con_send(&session->s_con, msg);
-	return 0;
-}
-
-/*
- * Note new cap ttl, and any transition from stale -> not stale (fresh?).
- *
- * Called under session->s_mutex
- */
-static void renewed_caps(struct ceph_mds_client *mdsc,
-			 struct ceph_mds_session *session, int is_renew)
-{
-	int was_stale;
-	int wake = 0;
-
-	spin_lock(&session->s_cap_lock);
-	was_stale = is_renew && time_after_eq(jiffies, session->s_cap_ttl);
-
-	session->s_cap_ttl = session->s_renew_requested +
-		mdsc->mdsmap->m_session_timeout*HZ;
-
-	if (was_stale) {
-		if (time_before(jiffies, session->s_cap_ttl)) {
-			pr_info("mds%d caps renewed\n", session->s_mds);
-			wake = 1;
-		} else {
-			pr_info("mds%d caps still stale\n", session->s_mds);
-		}
-	}
-	dout("renewed_caps mds%d ttl now %lu, was %s, now %s\n",
-	     session->s_mds, session->s_cap_ttl, was_stale ? "stale" : "fresh",
-	     time_before(jiffies, session->s_cap_ttl) ? "stale" : "fresh");
-	spin_unlock(&session->s_cap_lock);
-
-	if (wake)
-		wake_up_session_caps(session, 0);
-}
-
-/*
- * send a session close request
- */
-static int request_close_session(struct ceph_mds_client *mdsc,
-				 struct ceph_mds_session *session)
-{
-	struct ceph_msg *msg;
-
-	dout("request_close_session mds%d state %s seq %lld\n",
-	     session->s_mds, session_state_name(session->s_state),
-	     session->s_seq);
-	msg = create_session_msg(CEPH_SESSION_REQUEST_CLOSE, session->s_seq);
-	if (!msg)
-		return -ENOMEM;
-	ceph_con_send(&session->s_con, msg);
-	return 0;
-}
-
-/*
- * Called with s_mutex held.
- */
-static int __close_session(struct ceph_mds_client *mdsc,
-			 struct ceph_mds_session *session)
-{
-	if (session->s_state >= CEPH_MDS_SESSION_CLOSING)
-		return 0;
-	session->s_state = CEPH_MDS_SESSION_CLOSING;
-	return request_close_session(mdsc, session);
-}
-
-/*
- * Trim old(er) caps.
- *
- * Because we can't cache an inode without one or more caps, we do
- * this indirectly: if a cap is unused, we prune its aliases, at which
- * point the inode will hopefully get dropped to.
- *
- * Yes, this is a bit sloppy.  Our only real goal here is to respond to
- * memory pressure from the MDS, though, so it needn't be perfect.
- */
-static int trim_caps_cb(struct inode *inode, struct ceph_cap *cap, void *arg)
-{
-	struct ceph_mds_session *session = arg;
-	struct ceph_inode_info *ci = ceph_inode(inode);
-	int used, oissued, mine;
-
-	if (session->s_trim_caps <= 0)
-		return -1;
-
-	spin_lock(&ci->i_ceph_lock);
-	mine = cap->issued | cap->implemented;
-	used = __ceph_caps_used(ci);
-	oissued = __ceph_caps_issued_other(ci, cap);
-
-	dout("trim_caps_cb %p cap %p mine %s oissued %s used %s\n",
-	     inode, cap, ceph_cap_string(mine), ceph_cap_string(oissued),
-	     ceph_cap_string(used));
-	if (ci->i_dirty_caps)
-		goto out;   /* dirty caps */
-	if ((used & ~oissued) & mine)
-		goto out;   /* we need these caps */
-
-	session->s_trim_caps--;
-	if (oissued) {
-		/* we aren't the only cap.. just remove us */
-		__ceph_remove_cap(cap);
-	} else {
-		/* try to drop referring dentries */
-		spin_unlock(&ci->i_ceph_lock);
-		d_prune_aliases(inode);
-		dout("trim_caps_cb %p cap %p  pruned, count now %d\n",
-		     inode, cap, atomic_read(&inode->i_count));
-		return 0;
-	}
-
-out:
-	spin_unlock(&ci->i_ceph_lock);
-	return 0;
-}
-
-/*
- * Trim session cap count down to some max number.
- */
-static int trim_caps(struct ceph_mds_client *mdsc,
-		     struct ceph_mds_session *session,
-		     int max_caps)
-{
-	int trim_caps = session->s_nr_caps - max_caps;
-
-	dout("trim_caps mds%d start: %d / %d, trim %d\n",
-	     session->s_mds, session->s_nr_caps, max_caps, trim_caps);
-	if (trim_caps > 0) {
-		session->s_trim_caps = trim_caps;
-		iterate_session_caps(session, trim_caps_cb, session);
-		dout("trim_caps mds%d done: %d / %d, trimmed %d\n",
-		     session->s_mds, session->s_nr_caps, max_caps,
-			trim_caps - session->s_trim_caps);
-		session->s_trim_caps = 0;
-	}
-	return 0;
-}
-
-/*
- * Allocate cap_release messages.  If there is a partially full message
- * in the queue, try to allocate enough to cover it's remainder, so that
- * we can send it immediately.
- *
- * Called under s_mutex.
- */
-int ceph_add_cap_releases(struct ceph_mds_client *mdsc,
-			  struct ceph_mds_session *session)
-{
-	struct ceph_msg *msg, *partial = NULL;
-	struct ceph_mds_cap_release *head;
-	int err = -ENOMEM;
-	int extra = mdsc->fsc->mount_options->cap_release_safety;
-	int num;
-
-	dout("add_cap_releases %p mds%d extra %d\n", session, session->s_mds,
-	     extra);
-
-	spin_lock(&session->s_cap_lock);
-
-	if (!list_empty(&session->s_cap_releases)) {
-		msg = list_first_entry(&session->s_cap_releases,
-				       struct ceph_msg,
-				 list_head);
-		head = msg->front.iov_base;
-		num = le32_to_cpu(head->num);
-		if (num) {
-			dout(" partial %p with (%d/%d)\n", msg, num,
-			     (int)CEPH_CAPS_PER_RELEASE);
-			extra += CEPH_CAPS_PER_RELEASE - num;
-			partial = msg;
-		}
-	}
-	while (session->s_num_cap_releases < session->s_nr_caps + extra) {
-		spin_unlock(&session->s_cap_lock);
-		msg = ceph_msg_new(CEPH_MSG_CLIENT_CAPRELEASE, PAGE_CACHE_SIZE,
-				   GFP_NOFS, false);
-		if (!msg)
-			goto out_unlocked;
-		dout("add_cap_releases %p msg %p now %d\n", session, msg,
-		     (int)msg->front.iov_len);
-		head = msg->front.iov_base;
-		head->num = cpu_to_le32(0);
-		msg->front.iov_len = sizeof(*head);
-		spin_lock(&session->s_cap_lock);
-		list_add(&msg->list_head, &session->s_cap_releases);
-		session->s_num_cap_releases += CEPH_CAPS_PER_RELEASE;
-	}
-
-	if (partial) {
-		head = partial->front.iov_base;
-		num = le32_to_cpu(head->num);
-		dout(" queueing partial %p with %d/%d\n", partial, num,
-		     (int)CEPH_CAPS_PER_RELEASE);
-		list_move_tail(&partial->list_head,
-			       &session->s_cap_releases_done);
-		session->s_num_cap_releases -= CEPH_CAPS_PER_RELEASE - num;
-	}
-	err = 0;
-	spin_unlock(&session->s_cap_lock);
-out_unlocked:
-	return err;
-}
-
-/*
- * flush all dirty inode data to disk.
- *
- * returns true if we've flushed through want_flush_seq
- */
-static int check_cap_flush(struct ceph_mds_client *mdsc, u64 want_flush_seq)
-{
-	int mds, ret = 1;
-
-	dout("check_cap_flush want %lld\n", want_flush_seq);
-	mutex_lock(&mdsc->mutex);
-	for (mds = 0; ret && mds < mdsc->max_sessions; mds++) {
-		struct ceph_mds_session *session = mdsc->sessions[mds];
-
-		if (!session)
-			continue;
-		get_session(session);
-		mutex_unlock(&mdsc->mutex);
-
-		mutex_lock(&session->s_mutex);
-		if (!list_empty(&session->s_cap_flushing)) {
-			struct ceph_inode_info *ci =
-				list_entry(session->s_cap_flushing.next,
-					   struct ceph_inode_info,
-					   i_flushing_item);
-			struct inode *inode = &ci->vfs_inode;
-
-			spin_lock(&ci->i_ceph_lock);
-			if (ci->i_cap_flush_seq <= want_flush_seq) {
-				dout("check_cap_flush still flushing %p "
-				     "seq %lld <= %lld to mds%d\n", inode,
-				     ci->i_cap_flush_seq, want_flush_seq,
-				     session->s_mds);
-				ret = 0;
-			}
-			spin_unlock(&ci->i_ceph_lock);
-		}
-		mutex_unlock(&session->s_mutex);
-		ceph_put_mds_session(session);
-
-		if (!ret)
-			return ret;
-		mutex_lock(&mdsc->mutex);
-	}
-
-	mutex_unlock(&mdsc->mutex);
-	dout("check_cap_flush ok, flushed thru %lld\n", want_flush_seq);
-	return ret;
-}
-
-/*
- * called under s_mutex
- */
-void ceph_send_cap_releases(struct ceph_mds_client *mdsc,
-			    struct ceph_mds_session *session)
-{
-	struct ceph_msg *msg;
-
-	dout("send_cap_releases mds%d\n", session->s_mds);
-	spin_lock(&session->s_cap_lock);
-	while (!list_empty(&session->s_cap_releases_done)) {
-		msg = list_first_entry(&session->s_cap_releases_done,
-				 struct ceph_msg, list_head);
-		list_del_init(&msg->list_head);
-		spin_unlock(&session->s_cap_lock);
-		msg->hdr.front_len = cpu_to_le32(msg->front.iov_len);
-		dout("send_cap_releases mds%d %p\n", session->s_mds, msg);
-		ceph_con_send(&session->s_con, msg);
-		spin_lock(&session->s_cap_lock);
-	}
-	spin_unlock(&session->s_cap_lock);
-}
-
-static void discard_cap_releases(struct ceph_mds_client *mdsc,
-				 struct ceph_mds_session *session)
-{
-	struct ceph_msg *msg;
-	struct ceph_mds_cap_release *head;
-	unsigned num;
-
-	dout("discard_cap_releases mds%d\n", session->s_mds);
-	spin_lock(&session->s_cap_lock);
-
-	/* zero out the in-progress message */
-	msg = list_first_entry(&session->s_cap_releases,
-			       struct ceph_msg, list_head);
-	head = msg->front.iov_base;
-	num = le32_to_cpu(head->num);
-	dout("discard_cap_releases mds%d %p %u\n", session->s_mds, msg, num);
-	head->num = cpu_to_le32(0);
-	session->s_num_cap_releases += num;
-
-	/* requeue completed messages */
-	while (!list_empty(&session->s_cap_releases_done)) {
-		msg = list_first_entry(&session->s_cap_releases_done,
-				 struct ceph_msg, list_head);
-		list_del_init(&msg->list_head);
-
-		head = msg->front.iov_base;
-		num = le32_to_cpu(head->num);
-		dout("discard_cap_releases mds%d %p %u\n", session->s_mds, msg,
-		     num);
-		session->s_num_cap_releases += num;
-		head->num = cpu_to_le32(0);
-		msg->front.iov_len = sizeof(*head);
-		list_add(&msg->list_head, &session->s_cap_releases);
-	}
-
-	spin_unlock(&session->s_cap_lock);
-}
-
-/*
- * requests
- */
-
-/*
- * Create an mds request.
- */
-struct ceph_mds_request *
-ceph_mdsc_create_request(struct ceph_mds_client *mdsc, int op, int mode)
-{
-	struct ceph_mds_request *req = kzalloc(sizeof(*req), GFP_NOFS);
-
-	if (!req)
-		return ERR_PTR(-ENOMEM);
-
-	mutex_init(&req->r_fill_mutex);
-	req->r_mdsc = mdsc;
-	req->r_started = jiffies;
-	req->r_resend_mds = -1;
-	INIT_LIST_HEAD(&req->r_unsafe_dir_item);
-	req->r_fmode = -1;
-	kref_init(&req->r_kref);
-	INIT_LIST_HEAD(&req->r_wait);
-	init_completion(&req->r_completion);
-	init_completion(&req->r_safe_completion);
-	INIT_LIST_HEAD(&req->r_unsafe_item);
-
-	req->r_op = op;
-	req->r_direct_mode = mode;
-	return req;
-}
-
-/*
- * return oldest (lowest) request, tid in request tree, 0 if none.
- *
- * called under mdsc->mutex.
- */
-static struct ceph_mds_request *__get_oldest_req(struct ceph_mds_client *mdsc)
-{
-	if (RB_EMPTY_ROOT(&mdsc->request_tree))
-		return NULL;
-	return rb_entry(rb_first(&mdsc->request_tree),
-			struct ceph_mds_request, r_node);
-}
-
-static u64 __get_oldest_tid(struct ceph_mds_client *mdsc)
-{
-	struct ceph_mds_request *req = __get_oldest_req(mdsc);
-
-	if (req)
-		return req->r_tid;
-	return 0;
-}
-
-/*
- * Build a dentry's path.  Allocate on heap; caller must kfree.  Based
- * on build_path_from_dentry in fs/cifs/dir.c.
- *
- * If @stop_on_nosnap, generate path relative to the first non-snapped
- * inode.
- *
- * Encode hidden .snap dirs as a double /, i.e.
- *   foo/.snap/bar -> foo//bar
- */
-char *ceph_mdsc_build_path(struct dentry *dentry, int *plen, u64 *base,
-			   int stop_on_nosnap)
-{
-	struct dentry *temp;
-	char *path;
-	int len, pos;
-	unsigned seq;
-
-	if (dentry == NULL)
-		return ERR_PTR(-EINVAL);
-
-retry:
-	len = 0;
-	seq = read_seqbegin(&rename_lock);
-	rcu_read_lock();
-	for (temp = dentry; !IS_ROOT(temp);) {
-		struct inode *inode = temp->d_inode;
-		if (inode && ceph_snap(inode) == CEPH_SNAPDIR)
-			len++;  /* slash only */
-		else if (stop_on_nosnap && inode &&
-			 ceph_snap(inode) == CEPH_NOSNAP)
-			break;
-		else
-			len += 1 + temp->d_name.len;
-		temp = temp->d_parent;
-		if (temp == NULL) {
-			rcu_read_unlock();
-			pr_err("build_path corrupt dentry %p\n", dentry);
-			return ERR_PTR(-EINVAL);
-		}
-	}
-	rcu_read_unlock();
-	if (len)
-		len--;  /* no leading '/' */
-
-	path = kmalloc(len+1, GFP_NOFS);
-	if (path == NULL)
-		return ERR_PTR(-ENOMEM);
-	pos = len;
-	path[pos] = 0;	/* trailing null */
-	rcu_read_lock();
-	for (temp = dentry; !IS_ROOT(temp) && pos != 0; ) {
-		struct inode *inode;
-
-		spin_lock(&temp->d_lock);
-		inode = temp->d_inode;
-		if (inode && ceph_snap(inode) == CEPH_SNAPDIR) {
-			dout("build_path path+%d: %p SNAPDIR\n",
-			     pos, temp);
-		} else if (stop_on_nosnap && inode &&
-			   ceph_snap(inode) == CEPH_NOSNAP) {
-			spin_unlock(&temp->d_lock);
-			break;
-		} else {
-			pos -= temp->d_name.len;
-			if (pos < 0) {
-				spin_unlock(&temp->d_lock);
-				break;
-			}
-			strncpy(path + pos, temp->d_name.name,
-				temp->d_name.len);
-		}
-		spin_unlock(&temp->d_lock);
-		if (pos)
-			path[--pos] = '/';
-		temp = temp->d_parent;
-		if (temp == NULL) {
-			rcu_read_unlock();
-			pr_err("build_path corrupt dentry\n");
-			kfree(path);
-			return ERR_PTR(-EINVAL);
-		}
-	}
-	rcu_read_unlock();
-	if (pos != 0 || read_seqretry(&rename_lock, seq)) {
-		pr_err("build_path did not end path lookup where "
-		       "expected, namelen is %d, pos is %d\n", len, pos);
-		/* presumably this is only possible if racing with a
-		   rename of one of the parent directories (we can not
-		   lock the dentries above us to prevent this, but
-		   retrying should be harmless) */
-		kfree(path);
-		goto retry;
-	}
-
-	*base = ceph_ino(temp->d_inode);
-	*plen = len;
-	dout("build_path on %p %d built %llx '%.*s'\n",
-	     dentry, dentry->d_count, *base, len, path);
-	return path;
-}
-
-static int build_dentry_path(struct dentry *dentry,
-			     const char **ppath, int *ppathlen, u64 *pino,
-			     int *pfreepath)
-{
-	char *path;
-
-	if (ceph_snap(dentry->d_parent->d_inode) == CEPH_NOSNAP) {
-		*pino = ceph_ino(dentry->d_parent->d_inode);
-		*ppath = dentry->d_name.name;
-		*ppathlen = dentry->d_name.len;
-		return 0;
-	}
-	path = ceph_mdsc_build_path(dentry, ppathlen, pino, 1);
-	if (IS_ERR(path))
-		return PTR_ERR(path);
-	*ppath = path;
-	*pfreepath = 1;
-	return 0;
-}
-
-static int build_inode_path(struct inode *inode,
-			    const char **ppath, int *ppathlen, u64 *pino,
-			    int *pfreepath)
-{
-	struct dentry *dentry;
-	char *path;
-
-	if (ceph_snap(inode) == CEPH_NOSNAP) {
-		*pino = ceph_ino(inode);
-		*ppathlen = 0;
-		return 0;
-	}
-	dentry = d_find_alias(inode);
-	path = ceph_mdsc_build_path(dentry, ppathlen, pino, 1);
-	dput(dentry);
-	if (IS_ERR(path))
-		return PTR_ERR(path);
-	*ppath = path;
-	*pfreepath = 1;
-	return 0;
-}
-
-/*
- * request arguments may be specified via an inode *, a dentry *, or
- * an explicit ino+path.
- */
-static int set_request_path_attr(struct inode *rinode, struct dentry *rdentry,
-				  const char *rpath, u64 rino,
-				  const char **ppath, int *pathlen,
-				  u64 *ino, int *freepath)
-{
-	int r = 0;
-
-	if (rinode) {
-		r = build_inode_path(rinode, ppath, pathlen, ino, freepath);
-		dout(" inode %p %llx.%llx\n", rinode, ceph_ino(rinode),
-		     ceph_snap(rinode));
-	} else if (rdentry) {
-		r = build_dentry_path(rdentry, ppath, pathlen, ino, freepath);
-		dout(" dentry %p %llx/%.*s\n", rdentry, *ino, *pathlen,
-		     *ppath);
-	} else if (rpath || rino) {
-		*ino = rino;
-		*ppath = rpath;
-		*pathlen = strlen(rpath);
-		dout(" path %.*s\n", *pathlen, rpath);
-	}
-
-	return r;
-}
-
-/*
- * called under mdsc->mutex
- */
-static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc,
-					       struct ceph_mds_request *req,
-					       int mds)
-{
-	struct ceph_msg *msg;
-	struct ceph_mds_request_head *head;
-	const char *path1 = NULL;
-	const char *path2 = NULL;
-	u64 ino1 = 0, ino2 = 0;
-	int pathlen1 = 0, pathlen2 = 0;
-	int freepath1 = 0, freepath2 = 0;
-	int len;
-	u16 releases;
-	void *p, *end;
-	int ret;
-
-	ret = set_request_path_attr(req->r_inode, req->r_dentry,
-			      req->r_path1, req->r_ino1.ino,
-			      &path1, &pathlen1, &ino1, &freepath1);
-	if (ret < 0) {
-		msg = ERR_PTR(ret);
-		goto out;
-	}
-
-	ret = set_request_path_attr(NULL, req->r_old_dentry,
-			      req->r_path2, req->r_ino2.ino,
-			      &path2, &pathlen2, &ino2, &freepath2);
-	if (ret < 0) {
-		msg = ERR_PTR(ret);
-		goto out_free1;
-	}
-
-	len = sizeof(*head) +
-		pathlen1 + pathlen2 + 2*(1 + sizeof(u32) + sizeof(u64));
-
-	/* calculate (max) length for cap releases */
-	len += sizeof(struct ceph_mds_request_release) *
-		(!!req->r_inode_drop + !!req->r_dentry_drop +
-		 !!req->r_old_inode_drop + !!req->r_old_dentry_drop);
-	if (req->r_dentry_drop)
-		len += req->r_dentry->d_name.len;
-	if (req->r_old_dentry_drop)
-		len += req->r_old_dentry->d_name.len;
-
-	msg = ceph_msg_new(CEPH_MSG_CLIENT_REQUEST, len, GFP_NOFS, false);
-	if (!msg) {
-		msg = ERR_PTR(-ENOMEM);
-		goto out_free2;
-	}
-
-	msg->hdr.tid = cpu_to_le64(req->r_tid);
-
-	head = msg->front.iov_base;
-	p = msg->front.iov_base + sizeof(*head);
-	end = msg->front.iov_base + msg->front.iov_len;
-
-	head->mdsmap_epoch = cpu_to_le32(mdsc->mdsmap->m_epoch);
-	head->op = cpu_to_le32(req->r_op);
-	head->caller_uid = cpu_to_le32(req->r_uid);
-	head->caller_gid = cpu_to_le32(req->r_gid);
-	head->args = req->r_args;
-
-	ceph_encode_filepath(&p, end, ino1, path1);
-	ceph_encode_filepath(&p, end, ino2, path2);
-
-	/* make note of release offset, in case we need to replay */
-	req->r_request_release_offset = p - msg->front.iov_base;
-
-	/* cap releases */
-	releases = 0;
-	if (req->r_inode_drop)
-		releases += ceph_encode_inode_release(&p,
-		      req->r_inode ? req->r_inode : req->r_dentry->d_inode,
-		      mds, req->r_inode_drop, req->r_inode_unless, 0);
-	if (req->r_dentry_drop)
-		releases += ceph_encode_dentry_release(&p, req->r_dentry,
-		       mds, req->r_dentry_drop, req->r_dentry_unless);
-	if (req->r_old_dentry_drop)
-		releases += ceph_encode_dentry_release(&p, req->r_old_dentry,
-		       mds, req->r_old_dentry_drop, req->r_old_dentry_unless);
-	if (req->r_old_inode_drop)
-		releases += ceph_encode_inode_release(&p,
-		      req->r_old_dentry->d_inode,
-		      mds, req->r_old_inode_drop, req->r_old_inode_unless, 0);
-	head->num_releases = cpu_to_le16(releases);
-
-	BUG_ON(p > end);
-	msg->front.iov_len = p - msg->front.iov_base;
-	msg->hdr.front_len = cpu_to_le32(msg->front.iov_len);
-
-	msg->pages = req->r_pages;
-	msg->nr_pages = req->r_num_pages;
-	msg->hdr.data_len = cpu_to_le32(req->r_data_len);
-	msg->hdr.data_off = cpu_to_le16(0);
-
-out_free2:
-	if (freepath2)
-		kfree((char *)path2);
-out_free1:
-	if (freepath1)
-		kfree((char *)path1);
-out:
-	return msg;
-}
-
-/*
- * called under mdsc->mutex if error, under no mutex if
- * success.
- */
-static void complete_request(struct ceph_mds_client *mdsc,
-			     struct ceph_mds_request *req)
-{
-	if (req->r_callback)
-		req->r_callback(mdsc, req);
-	else
-		complete_all(&req->r_completion);
-}
-
-/*
- * called under mdsc->mutex
- */
-static int __prepare_send_request(struct ceph_mds_client *mdsc,
-				  struct ceph_mds_request *req,
-				  int mds)
-{
-	struct ceph_mds_request_head *rhead;
-	struct ceph_msg *msg;
-	int flags = 0;
-
-	req->r_attempts++;
-	if (req->r_inode) {
-		struct ceph_cap *cap =
-			ceph_get_cap_for_mds(ceph_inode(req->r_inode), mds);
-
-		if (cap)
-			req->r_sent_on_mseq = cap->mseq;
-		else
-			req->r_sent_on_mseq = -1;
-	}
-	dout("prepare_send_request %p tid %lld %s (attempt %d)\n", req,
-	     req->r_tid, ceph_mds_op_name(req->r_op), req->r_attempts);
-
-	if (req->r_got_unsafe) {
-		/*
-		 * Replay.  Do not regenerate message (and rebuild
-		 * paths, etc.); just use the original message.
-		 * Rebuilding paths will break for renames because
-		 * d_move mangles the src name.
-		 */
-		msg = req->r_request;
-		rhead = msg->front.iov_base;
-
-		flags = le32_to_cpu(rhead->flags);
-		flags |= CEPH_MDS_FLAG_REPLAY;
-		rhead->flags = cpu_to_le32(flags);
-
-		if (req->r_target_inode)
-			rhead->ino = cpu_to_le64(ceph_ino(req->r_target_inode));
-
-		rhead->num_retry = req->r_attempts - 1;
-
-		/* remove cap/dentry releases from message */
-		rhead->num_releases = 0;
-		msg->hdr.front_len = cpu_to_le32(req->r_request_release_offset);
-		msg->front.iov_len = req->r_request_release_offset;
-		return 0;
-	}
-
-	if (req->r_request) {
-		ceph_msg_put(req->r_request);
-		req->r_request = NULL;
-	}
-	msg = create_request_message(mdsc, req, mds);
-	if (IS_ERR(msg)) {
-		req->r_err = PTR_ERR(msg);
-		complete_request(mdsc, req);
-		return PTR_ERR(msg);
-	}
-	req->r_request = msg;
-
-	rhead = msg->front.iov_base;
-	rhead->oldest_client_tid = cpu_to_le64(__get_oldest_tid(mdsc));
-	if (req->r_got_unsafe)
-		flags |= CEPH_MDS_FLAG_REPLAY;
-	if (req->r_locked_dir)
-		flags |= CEPH_MDS_FLAG_WANT_DENTRY;
-	rhead->flags = cpu_to_le32(flags);
-	rhead->num_fwd = req->r_num_fwd;
-	rhead->num_retry = req->r_attempts - 1;
-	rhead->ino = 0;
-
-	dout(" r_locked_dir = %p\n", req->r_locked_dir);
-	return 0;
-}
-
-/*
- * send request, or put it on the appropriate wait list.
- */
-static int __do_request(struct ceph_mds_client *mdsc,
-			struct ceph_mds_request *req)
-{
-	struct ceph_mds_session *session = NULL;
-	int mds = -1;
-	int err = -EAGAIN;
-
-	if (req->r_err || req->r_got_result)
-		goto out;
-
-	if (req->r_timeout &&
-	    time_after_eq(jiffies, req->r_started + req->r_timeout)) {
-		dout("do_request timed out\n");
-		err = -EIO;
-		goto finish;
-	}
-
-	put_request_session(req);
-
-	mds = __choose_mds(mdsc, req);
-	if (mds < 0 ||
-	    ceph_mdsmap_get_state(mdsc->mdsmap, mds) < CEPH_MDS_STATE_ACTIVE) {
-		dout("do_request no mds or not active, waiting for map\n");
-		list_add(&req->r_wait, &mdsc->waiting_for_map);
-		goto out;
-	}
-
-	/* get, open session */
-	session = __ceph_lookup_mds_session(mdsc, mds);
-	if (!session) {
-		session = register_session(mdsc, mds);
-		if (IS_ERR(session)) {
-			err = PTR_ERR(session);
-			goto finish;
-		}
-	}
-	req->r_session = get_session(session);
-
-	dout("do_request mds%d session %p state %s\n", mds, session,
-	     session_state_name(session->s_state));
-	if (session->s_state != CEPH_MDS_SESSION_OPEN &&
-	    session->s_state != CEPH_MDS_SESSION_HUNG) {
-		if (session->s_state == CEPH_MDS_SESSION_NEW ||
-		    session->s_state == CEPH_MDS_SESSION_CLOSING)
-			__open_session(mdsc, session);
-		list_add(&req->r_wait, &session->s_waiting);
-		goto out_session;
-	}
-
-	/* send request */
-	req->r_resend_mds = -1;   /* forget any previous mds hint */
-
-	if (req->r_request_started == 0)   /* note request start time */
-		req->r_request_started = jiffies;
-
-	err = __prepare_send_request(mdsc, req, mds);
-	if (!err) {
-		ceph_msg_get(req->r_request);
-		ceph_con_send(&session->s_con, req->r_request);
-	}
-
-out_session:
-	ceph_put_mds_session(session);
-out:
-	return err;
-
-finish:
-	req->r_err = err;
-	complete_request(mdsc, req);
-	goto out;
-}
-
-/*
- * called under mdsc->mutex
- */
-static void __wake_requests(struct ceph_mds_client *mdsc,
-			    struct list_head *head)
-{
-	struct ceph_mds_request *req, *nreq;
-
-	list_for_each_entry_safe(req, nreq, head, r_wait) {
-		list_del_init(&req->r_wait);
-		__do_request(mdsc, req);
-	}
-}
-
-/*
- * Wake up threads with requests pending for @mds, so that they can
- * resubmit their requests to a possibly different mds.
- */
-static void kick_requests(struct ceph_mds_client *mdsc, int mds)
-{
-	struct ceph_mds_request *req;
-	struct rb_node *p;
-
-	dout("kick_requests mds%d\n", mds);
-	for (p = rb_first(&mdsc->request_tree); p; p = rb_next(p)) {
-		req = rb_entry(p, struct ceph_mds_request, r_node);
-		if (req->r_got_unsafe)
-			continue;
-		if (req->r_session &&
-		    req->r_session->s_mds == mds) {
-			dout(" kicking tid %llu\n", req->r_tid);
-			__do_request(mdsc, req);
-		}
-	}
-}
-
-void ceph_mdsc_submit_request(struct ceph_mds_client *mdsc,
-			      struct ceph_mds_request *req)
-{
-	dout("submit_request on %p\n", req);
-	mutex_lock(&mdsc->mutex);
-	__register_request(mdsc, req, NULL);
-	__do_request(mdsc, req);
-	mutex_unlock(&mdsc->mutex);
-}
-
-/*
- * Synchrously perform an mds request.  Take care of all of the
- * session setup, forwarding, retry details.
- */
-int ceph_mdsc_do_request(struct ceph_mds_client *mdsc,
-			 struct inode *dir,
-			 struct ceph_mds_request *req)
-{
-	int err;
-
-	dout("do_request on %p\n", req);
-
-	/* take CAP_PIN refs for r_inode, r_locked_dir, r_old_dentry */
-	if (req->r_inode)
-		ceph_get_cap_refs(ceph_inode(req->r_inode), CEPH_CAP_PIN);
-	if (req->r_locked_dir)
-		ceph_get_cap_refs(ceph_inode(req->r_locked_dir), CEPH_CAP_PIN);
-	if (req->r_old_dentry)
-		ceph_get_cap_refs(ceph_inode(req->r_old_dentry_dir),
-				  CEPH_CAP_PIN);
-
-	/* issue */
-	mutex_lock(&mdsc->mutex);
-	__register_request(mdsc, req, dir);
-	__do_request(mdsc, req);
-
-	if (req->r_err) {
-		err = req->r_err;
-		__unregister_request(mdsc, req);
-		dout("do_request early error %d\n", err);
-		goto out;
-	}
-
-	/* wait */
-	mutex_unlock(&mdsc->mutex);
-	dout("do_request waiting\n");
-	if (req->r_timeout) {
-		err = (long)wait_for_completion_killable_timeout(
-			&req->r_completion, req->r_timeout);
-		if (err == 0)
-			err = -EIO;
-	} else {
-		err = wait_for_completion_killable(&req->r_completion);
-	}
-	dout("do_request waited, got %d\n", err);
-	mutex_lock(&mdsc->mutex);
-
-	/* only abort if we didn't race with a real reply */
-	if (req->r_got_result) {
-		err = le32_to_cpu(req->r_reply_info.head->result);
-	} else if (err < 0) {
-		dout("aborted request %lld with %d\n", req->r_tid, err);
-
-		/*
-		 * ensure we aren't running concurrently with
-		 * ceph_fill_trace or ceph_readdir_prepopulate, which
-		 * rely on locks (dir mutex) held by our caller.
-		 */
-		mutex_lock(&req->r_fill_mutex);
-		req->r_err = err;
-		req->r_aborted = true;
-		mutex_unlock(&req->r_fill_mutex);
-
-		if (req->r_locked_dir &&
-		    (req->r_op & CEPH_MDS_OP_WRITE))
-			ceph_invalidate_dir_request(req);
-	} else {
-		err = req->r_err;
-	}
-
-out:
-	mutex_unlock(&mdsc->mutex);
-	dout("do_request %p done, result %d\n", req, err);
-	return err;
-}
-
-/*
- * Invalidate dir D_COMPLETE, dentry lease state on an aborted MDS
- * namespace request.
- */
-void ceph_invalidate_dir_request(struct ceph_mds_request *req)
-{
-	struct inode *inode = req->r_locked_dir;
-	struct ceph_inode_info *ci = ceph_inode(inode);
-
-	dout("invalidate_dir_request %p (D_COMPLETE, lease(s))\n", inode);
-	spin_lock(&ci->i_ceph_lock);
-	ceph_dir_clear_complete(inode);
-	ci->i_release_count++;
-	spin_unlock(&ci->i_ceph_lock);
-
-	if (req->r_dentry)
-		ceph_invalidate_dentry_lease(req->r_dentry);
-	if (req->r_old_dentry)
-		ceph_invalidate_dentry_lease(req->r_old_dentry);
-}
-
-/*
- * Handle mds reply.
- *
- * We take the session mutex and parse and process the reply immediately.
- * This preserves the logical ordering of replies, capabilities, etc., sent
- * by the MDS as they are applied to our local cache.
- */
-static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
-{
-	struct ceph_mds_client *mdsc = session->s_mdsc;
-	struct ceph_mds_request *req;
-	struct ceph_mds_reply_head *head = msg->front.iov_base;
-	struct ceph_mds_reply_info_parsed *rinfo;  /* parsed reply info */
-	u64 tid;
-	int err, result;
-	int mds = session->s_mds;
-
-	if (msg->front.iov_len < sizeof(*head)) {
-		pr_err("mdsc_handle_reply got corrupt (short) reply\n");
-		ceph_msg_dump(msg);
-		return;
-	}
-
-	/* get request, session */
-	tid = le64_to_cpu(msg->hdr.tid);
-	mutex_lock(&mdsc->mutex);
-	req = __lookup_request(mdsc, tid);
-	if (!req) {
-		dout("handle_reply on unknown tid %llu\n", tid);
-		mutex_unlock(&mdsc->mutex);
-		return;
-	}
-	dout("handle_reply %p\n", req);
-
-	/* correct session? */
-	if (req->r_session != session) {
-		pr_err("mdsc_handle_reply got %llu on session mds%d"
-		       " not mds%d\n", tid, session->s_mds,
-		       req->r_session ? req->r_session->s_mds : -1);
-		mutex_unlock(&mdsc->mutex);
-		goto out;
-	}
-
-	/* dup? */
-	if ((req->r_got_unsafe && !head->safe) ||
-	    (req->r_got_safe && head->safe)) {
-		pr_warning("got a dup %s reply on %llu from mds%d\n",
-			   head->safe ? "safe" : "unsafe", tid, mds);
-		mutex_unlock(&mdsc->mutex);
-		goto out;
-	}
-	if (req->r_got_safe && !head->safe) {
-		pr_warning("got unsafe after safe on %llu from mds%d\n",
-			   tid, mds);
-		mutex_unlock(&mdsc->mutex);
-		goto out;
-	}
-
-	result = le32_to_cpu(head->result);
-
-	/*
-	 * Handle an ESTALE
-	 * if we're not talking to the authority, send to them
-	 * if the authority has changed while we weren't looking,
-	 * send to new authority
-	 * Otherwise we just have to return an ESTALE
-	 */
-	if (result == -ESTALE) {
-		dout("got ESTALE on request %llu", req->r_tid);
-		if (!req->r_inode) {
-			/* do nothing; not an authority problem */
-		} else if (req->r_direct_mode != USE_AUTH_MDS) {
-			dout("not using auth, setting for that now");
-			req->r_direct_mode = USE_AUTH_MDS;
-			__do_request(mdsc, req);
-			mutex_unlock(&mdsc->mutex);
-			goto out;
-		} else  {
-			struct ceph_inode_info *ci = ceph_inode(req->r_inode);
-			struct ceph_cap *cap = NULL;
-
-			if (req->r_session)
-				cap = ceph_get_cap_for_mds(ci,
-						   req->r_session->s_mds);
-
-			dout("already using auth");
-			if ((!cap || cap != ci->i_auth_cap) ||
-			    (cap->mseq != req->r_sent_on_mseq)) {
-				dout("but cap changed, so resending");
-				__do_request(mdsc, req);
-				mutex_unlock(&mdsc->mutex);
-				goto out;
-			}
-		}
-		dout("have to return ESTALE on request %llu", req->r_tid);
-	}
-
-
-	if (head->safe) {
-		req->r_got_safe = true;
-		__unregister_request(mdsc, req);
-		complete_all(&req->r_safe_completion);
-
-		if (req->r_got_unsafe) {
-			/*
-			 * We already handled the unsafe response, now do the
-			 * cleanup.  No need to examine the response; the MDS
-			 * doesn't include any result info in the safe
-			 * response.  And even if it did, there is nothing
-			 * useful we could do with a revised return value.
-			 */
-			dout("got safe reply %llu, mds%d\n", tid, mds);
-			list_del_init(&req->r_unsafe_item);
-
-			/* last unsafe request during umount? */
-			if (mdsc->stopping && !__get_oldest_req(mdsc))
-				complete_all(&mdsc->safe_umount_waiters);
-			mutex_unlock(&mdsc->mutex);
-			goto out;
-		}
-	} else {
-		req->r_got_unsafe = true;
-		list_add_tail(&req->r_unsafe_item, &req->r_session->s_unsafe);
-	}
-
-	dout("handle_reply tid %lld result %d\n", tid, result);
-	rinfo = &req->r_reply_info;
-	err = parse_reply_info(msg, rinfo, session->s_con.peer_features);
-	mutex_unlock(&mdsc->mutex);
-
-	mutex_lock(&session->s_mutex);
-	if (err < 0) {
-		pr_err("mdsc_handle_reply got corrupt reply mds%d(tid:%lld)\n", mds, tid);
-		ceph_msg_dump(msg);
-		goto out_err;
-	}
-
-	/* snap trace */
-	if (rinfo->snapblob_len) {
-		down_write(&mdsc->snap_rwsem);
-		ceph_update_snap_trace(mdsc, rinfo->snapblob,
-			       rinfo->snapblob + rinfo->snapblob_len,
-			       le32_to_cpu(head->op) == CEPH_MDS_OP_RMSNAP);
-		downgrade_write(&mdsc->snap_rwsem);
-	} else {
-		down_read(&mdsc->snap_rwsem);
-	}
-
-	/* insert trace into our cache */
-	mutex_lock(&req->r_fill_mutex);
-	err = ceph_fill_trace(mdsc->fsc->sb, req, req->r_session);
-	if (err == 0) {
-		if (result == 0 && req->r_op != CEPH_MDS_OP_GETFILELOCK &&
-		    rinfo->dir_nr)
-			ceph_readdir_prepopulate(req, req->r_session);
-		ceph_unreserve_caps(mdsc, &req->r_caps_reservation);
-	}
-	mutex_unlock(&req->r_fill_mutex);
-
-	up_read(&mdsc->snap_rwsem);
-out_err:
-	mutex_lock(&mdsc->mutex);
-	if (!req->r_aborted) {
-		if (err) {
-			req->r_err = err;
-		} else {
-			req->r_reply = msg;
-			ceph_msg_get(msg);
-			req->r_got_result = true;
-		}
-	} else {
-		dout("reply arrived after request %lld was aborted\n", tid);
-	}
-	mutex_unlock(&mdsc->mutex);
-
-	ceph_add_cap_releases(mdsc, req->r_session);
-	mutex_unlock(&session->s_mutex);
-
-	/* kick calling process */
-	complete_request(mdsc, req);
-out:
-	ceph_mdsc_put_request(req);
-	return;
-}
-
-
-
-/*
- * handle mds notification that our request has been forwarded.
- */
-static void handle_forward(struct ceph_mds_client *mdsc,
-			   struct ceph_mds_session *session,
-			   struct ceph_msg *msg)
-{
-	struct ceph_mds_request *req;
-	u64 tid = le64_to_cpu(msg->hdr.tid);
-	u32 next_mds;
-	u32 fwd_seq;
-	int err = -EINVAL;
-	void *p = msg->front.iov_base;
-	void *end = p + msg->front.iov_len;
-
-	ceph_decode_need(&p, end, 2*sizeof(u32), bad);
-	next_mds = ceph_decode_32(&p);
-	fwd_seq = ceph_decode_32(&p);
-
-	mutex_lock(&mdsc->mutex);
-	req = __lookup_request(mdsc, tid);
-	if (!req) {
-		dout("forward tid %llu to mds%d - req dne\n", tid, next_mds);
-		goto out;  /* dup reply? */
-	}
-
-	if (req->r_aborted) {
-		dout("forward tid %llu aborted, unregistering\n", tid);
-		__unregister_request(mdsc, req);
-	} else if (fwd_seq <= req->r_num_fwd) {
-		dout("forward tid %llu to mds%d - old seq %d <= %d\n",
-		     tid, next_mds, req->r_num_fwd, fwd_seq);
-	} else {
-		/* resend. forward race not possible; mds would drop */
-		dout("forward tid %llu to mds%d (we resend)\n", tid, next_mds);
-		BUG_ON(req->r_err);
-		BUG_ON(req->r_got_result);
-		req->r_num_fwd = fwd_seq;
-		req->r_resend_mds = next_mds;
-		put_request_session(req);
-		__do_request(mdsc, req);
-	}
-	ceph_mdsc_put_request(req);
-out:
-	mutex_unlock(&mdsc->mutex);
-	return;
-
-bad:
-	pr_err("mdsc_handle_forward decode error err=%d\n", err);
-}
-
-/*
- * handle a mds session control message
- */
-static void handle_session(struct ceph_mds_session *session,
-			   struct ceph_msg *msg)
-{
-	struct ceph_mds_client *mdsc = session->s_mdsc;
-	u32 op;
-	u64 seq;
-	int mds = session->s_mds;
-	struct ceph_mds_session_head *h = msg->front.iov_base;
-	int wake = 0;
-
-	/* decode */
-	if (msg->front.iov_len != sizeof(*h))
-		goto bad;
-	op = le32_to_cpu(h->op);
-	seq = le64_to_cpu(h->seq);
-
-	mutex_lock(&mdsc->mutex);
-	if (op == CEPH_SESSION_CLOSE)
-		__unregister_session(mdsc, session);
-	/* FIXME: this ttl calculation is generous */
-	session->s_ttl = jiffies + HZ*mdsc->mdsmap->m_session_autoclose;
-	mutex_unlock(&mdsc->mutex);
-
-	mutex_lock(&session->s_mutex);
-
-	dout("handle_session mds%d %s %p state %s seq %llu\n",
-	     mds, ceph_session_op_name(op), session,
-	     session_state_name(session->s_state), seq);
-
-	if (session->s_state == CEPH_MDS_SESSION_HUNG) {
-		session->s_state = CEPH_MDS_SESSION_OPEN;
-		pr_info("mds%d came back\n", session->s_mds);
-	}
-
-	switch (op) {
-	case CEPH_SESSION_OPEN:
-		if (session->s_state == CEPH_MDS_SESSION_RECONNECTING)
-			pr_info("mds%d reconnect success\n", session->s_mds);
-		session->s_state = CEPH_MDS_SESSION_OPEN;
-		renewed_caps(mdsc, session, 0);
-		wake = 1;
-		if (mdsc->stopping)
-			__close_session(mdsc, session);
-		break;
-
-	case CEPH_SESSION_RENEWCAPS:
-		if (session->s_renew_seq == seq)
-			renewed_caps(mdsc, session, 1);
-		break;
-
-	case CEPH_SESSION_CLOSE:
-		if (session->s_state == CEPH_MDS_SESSION_RECONNECTING)
-			pr_info("mds%d reconnect denied\n", session->s_mds);
-		remove_session_caps(session);
-		wake = 1; /* for good measure */
-		wake_up_all(&mdsc->session_close_wq);
-		kick_requests(mdsc, mds);
-		break;
-
-	case CEPH_SESSION_STALE:
-		pr_info("mds%d caps went stale, renewing\n",
-			session->s_mds);
-		spin_lock(&session->s_gen_ttl_lock);
-		session->s_cap_gen++;
-		session->s_cap_ttl = jiffies - 1;
-		spin_unlock(&session->s_gen_ttl_lock);
-		send_renew_caps(mdsc, session);
-		break;
-
-	case CEPH_SESSION_RECALL_STATE:
-		trim_caps(mdsc, session, le32_to_cpu(h->max_caps));
-		break;
-
-	default:
-		pr_err("mdsc_handle_session bad op %d mds%d\n", op, mds);
-		WARN_ON(1);
-	}
-
-	mutex_unlock(&session->s_mutex);
-	if (wake) {
-		mutex_lock(&mdsc->mutex);
-		__wake_requests(mdsc, &session->s_waiting);
-		mutex_unlock(&mdsc->mutex);
-	}
-	return;
-
-bad:
-	pr_err("mdsc_handle_session corrupt message mds%d len %d\n", mds,
-	       (int)msg->front.iov_len);
-	ceph_msg_dump(msg);
-	return;
-}
-
-
-/*
- * called under session->mutex.
- */
-static void replay_unsafe_requests(struct ceph_mds_client *mdsc,
-				   struct ceph_mds_session *session)
-{
-	struct ceph_mds_request *req, *nreq;
-	int err;
-
-	dout("replay_unsafe_requests mds%d\n", session->s_mds);
-
-	mutex_lock(&mdsc->mutex);
-	list_for_each_entry_safe(req, nreq, &session->s_unsafe, r_unsafe_item) {
-		err = __prepare_send_request(mdsc, req, session->s_mds);
-		if (!err) {
-			ceph_msg_get(req->r_request);
-			ceph_con_send(&session->s_con, req->r_request);
-		}
-	}
-	mutex_unlock(&mdsc->mutex);
-}
-
-/*
- * Encode information about a cap for a reconnect with the MDS.
- */
-static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap,
-			  void *arg)
-{
-	union {
-		struct ceph_mds_cap_reconnect v2;
-		struct ceph_mds_cap_reconnect_v1 v1;
-	} rec;
-	size_t reclen;
-	struct ceph_inode_info *ci;
-	struct ceph_reconnect_state *recon_state = arg;
-	struct ceph_pagelist *pagelist = recon_state->pagelist;
-	char *path;
-	int pathlen, err;
-	u64 pathbase;
-	struct dentry *dentry;
-
-	ci = cap->ci;
-
-	dout(" adding %p ino %llx.%llx cap %p %lld %s\n",
-	     inode, ceph_vinop(inode), cap, cap->cap_id,
-	     ceph_cap_string(cap->issued));
-	err = ceph_pagelist_encode_64(pagelist, ceph_ino(inode));
-	if (err)
-		return err;
-
-	dentry = d_find_alias(inode);
-	if (dentry) {
-		path = ceph_mdsc_build_path(dentry, &pathlen, &pathbase, 0);
-		if (IS_ERR(path)) {
-			err = PTR_ERR(path);
-			goto out_dput;
-		}
-	} else {
-		path = NULL;
-		pathlen = 0;
-	}
-	err = ceph_pagelist_encode_string(pagelist, path, pathlen);
-	if (err)
-		goto out_free;
-
-	spin_lock(&ci->i_ceph_lock);
-	cap->seq = 0;        /* reset cap seq */
-	cap->issue_seq = 0;  /* and issue_seq */
-
-	if (recon_state->flock) {
-		rec.v2.cap_id = cpu_to_le64(cap->cap_id);
-		rec.v2.wanted = cpu_to_le32(__ceph_caps_wanted(ci));
-		rec.v2.issued = cpu_to_le32(cap->issued);
-		rec.v2.snaprealm = cpu_to_le64(ci->i_snap_realm->ino);
-		rec.v2.pathbase = cpu_to_le64(pathbase);
-		rec.v2.flock_len = 0;
-		reclen = sizeof(rec.v2);
-	} else {
-		rec.v1.cap_id = cpu_to_le64(cap->cap_id);
-		rec.v1.wanted = cpu_to_le32(__ceph_caps_wanted(ci));
-		rec.v1.issued = cpu_to_le32(cap->issued);
-		rec.v1.size = cpu_to_le64(inode->i_size);
-		ceph_encode_timespec(&rec.v1.mtime, &inode->i_mtime);
-		ceph_encode_timespec(&rec.v1.atime, &inode->i_atime);
-		rec.v1.snaprealm = cpu_to_le64(ci->i_snap_realm->ino);
-		rec.v1.pathbase = cpu_to_le64(pathbase);
-		reclen = sizeof(rec.v1);
-	}
-	spin_unlock(&ci->i_ceph_lock);
-
-	if (recon_state->flock) {
-		int num_fcntl_locks, num_flock_locks;
-		struct ceph_pagelist_cursor trunc_point;
-
-		ceph_pagelist_set_cursor(pagelist, &trunc_point);
-		do {
-			lock_flocks();
-			ceph_count_locks(inode, &num_fcntl_locks,
-					 &num_flock_locks);
-			rec.v2.flock_len = (2*sizeof(u32) +
-					    (num_fcntl_locks+num_flock_locks) *
-					    sizeof(struct ceph_filelock));
-			unlock_flocks();
-
-			/* pre-alloc pagelist */
-			ceph_pagelist_truncate(pagelist, &trunc_point);
-			err = ceph_pagelist_append(pagelist, &rec, reclen);
-			if (!err)
-				err = ceph_pagelist_reserve(pagelist,
-							    rec.v2.flock_len);
-
-			/* encode locks */
-			if (!err) {
-				lock_flocks();
-				err = ceph_encode_locks(inode,
-							pagelist,
-							num_fcntl_locks,
-							num_flock_locks);
-				unlock_flocks();
-			}
-		} while (err == -ENOSPC);
-	} else {
-		err = ceph_pagelist_append(pagelist, &rec, reclen);
-	}
-
-out_free:
-	kfree(path);
-out_dput:
-	dput(dentry);
-	return err;
-}
-
-
-/*
- * If an MDS fails and recovers, clients need to reconnect in order to
- * reestablish shared state.  This includes all caps issued through
- * this session _and_ the snap_realm hierarchy.  Because it's not
- * clear which snap realms the mds cares about, we send everything we
- * know about.. that ensures we'll then get any new info the
- * recovering MDS might have.
- *
- * This is a relatively heavyweight operation, but it's rare.
- *
- * called with mdsc->mutex held.
- */
-static void send_mds_reconnect(struct ceph_mds_client *mdsc,
-			       struct ceph_mds_session *session)
-{
-	struct ceph_msg *reply;
-	struct rb_node *p;
-	int mds = session->s_mds;
-	int err = -ENOMEM;
-	struct ceph_pagelist *pagelist;
-	struct ceph_reconnect_state recon_state;
-
-	pr_info("mds%d reconnect start\n", mds);
-
-	pagelist = kmalloc(sizeof(*pagelist), GFP_NOFS);
-	if (!pagelist)
-		goto fail_nopagelist;
-	ceph_pagelist_init(pagelist);
-
-	reply = ceph_msg_new(CEPH_MSG_CLIENT_RECONNECT, 0, GFP_NOFS, false);
-	if (!reply)
-		goto fail_nomsg;
-
-	mutex_lock(&session->s_mutex);
-	session->s_state = CEPH_MDS_SESSION_RECONNECTING;
-	session->s_seq = 0;
-
-	ceph_con_open(&session->s_con,
-		      ceph_mdsmap_get_addr(mdsc->mdsmap, mds));
-
-	/* replay unsafe requests */
-	replay_unsafe_requests(mdsc, session);
-
-	down_read(&mdsc->snap_rwsem);
-
-	dout("session %p state %s\n", session,
-	     session_state_name(session->s_state));
-
-	/* drop old cap expires; we're about to reestablish that state */
-	discard_cap_releases(mdsc, session);
-
-	/* traverse this session's caps */
-	err = ceph_pagelist_encode_32(pagelist, session->s_nr_caps);
-	if (err)
-		goto fail;
-
-	recon_state.pagelist = pagelist;
-	recon_state.flock = session->s_con.peer_features & CEPH_FEATURE_FLOCK;
-	err = iterate_session_caps(session, encode_caps_cb, &recon_state);
-	if (err < 0)
-		goto fail;
-
-	/*
-	 * snaprealms.  we provide mds with the ino, seq (version), and
-	 * parent for all of our realms.  If the mds has any newer info,
-	 * it will tell us.
-	 */
-	for (p = rb_first(&mdsc->snap_realms); p; p = rb_next(p)) {
-		struct ceph_snap_realm *realm =
-			rb_entry(p, struct ceph_snap_realm, node);
-		struct ceph_mds_snaprealm_reconnect sr_rec;
-
-		dout(" adding snap realm %llx seq %lld parent %llx\n",
-		     realm->ino, realm->seq, realm->parent_ino);
-		sr_rec.ino = cpu_to_le64(realm->ino);
-		sr_rec.seq = cpu_to_le64(realm->seq);
-		sr_rec.parent = cpu_to_le64(realm->parent_ino);
-		err = ceph_pagelist_append(pagelist, &sr_rec, sizeof(sr_rec));
-		if (err)
-			goto fail;
-	}
-
-	reply->pagelist = pagelist;
-	if (recon_state.flock)
-		reply->hdr.version = cpu_to_le16(2);
-	reply->hdr.data_len = cpu_to_le32(pagelist->length);
-	reply->nr_pages = calc_pages_for(0, pagelist->length);
-	ceph_con_send(&session->s_con, reply);
-
-	mutex_unlock(&session->s_mutex);
-
-	mutex_lock(&mdsc->mutex);
-	__wake_requests(mdsc, &session->s_waiting);
-	mutex_unlock(&mdsc->mutex);
-
-	up_read(&mdsc->snap_rwsem);
-	return;
-
-fail:
-	ceph_msg_put(reply);
-	up_read(&mdsc->snap_rwsem);
-	mutex_unlock(&session->s_mutex);
-fail_nomsg:
-	ceph_pagelist_release(pagelist);
-	kfree(pagelist);
-fail_nopagelist:
-	pr_err("error %d preparing reconnect for mds%d\n", err, mds);
-	return;
-}
-
-
-/*
- * compare old and new mdsmaps, kicking requests
- * and closing out old connections as necessary
- *
- * called under mdsc->mutex.
- */
-static void check_new_map(struct ceph_mds_client *mdsc,
-			  struct ceph_mdsmap *newmap,
-			  struct ceph_mdsmap *oldmap)
-{
-	int i;
-	int oldstate, newstate;
-	struct ceph_mds_session *s;
-
-	dout("check_new_map new %u old %u\n",
-	     newmap->m_epoch, oldmap->m_epoch);
-
-	for (i = 0; i < oldmap->m_max_mds && i < mdsc->max_sessions; i++) {
-		if (mdsc->sessions[i] == NULL)
-			continue;
-		s = mdsc->sessions[i];
-		oldstate = ceph_mdsmap_get_state(oldmap, i);
-		newstate = ceph_mdsmap_get_state(newmap, i);
-
-		dout("check_new_map mds%d state %s%s -> %s%s (session %s)\n",
-		     i, ceph_mds_state_name(oldstate),
-		     ceph_mdsmap_is_laggy(oldmap, i) ? " (laggy)" : "",
-		     ceph_mds_state_name(newstate),
-		     ceph_mdsmap_is_laggy(newmap, i) ? " (laggy)" : "",
-		     session_state_name(s->s_state));
-
-		if (memcmp(ceph_mdsmap_get_addr(oldmap, i),
-			   ceph_mdsmap_get_addr(newmap, i),
-			   sizeof(struct ceph_entity_addr))) {
-			if (s->s_state == CEPH_MDS_SESSION_OPENING) {
-				/* the session never opened, just close it
-				 * out now */
-				__wake_requests(mdsc, &s->s_waiting);
-				__unregister_session(mdsc, s);
-			} else {
-				/* just close it */
-				mutex_unlock(&mdsc->mutex);
-				mutex_lock(&s->s_mutex);
-				mutex_lock(&mdsc->mutex);
-				ceph_con_close(&s->s_con);
-				mutex_unlock(&s->s_mutex);
-				s->s_state = CEPH_MDS_SESSION_RESTARTING;
-			}
-
-			/* kick any requests waiting on the recovering mds */
-			kick_requests(mdsc, i);
-		} else if (oldstate == newstate) {
-			continue;  /* nothing new with this mds */
-		}
-
-		/*
-		 * send reconnect?
-		 */
-		if (s->s_state == CEPH_MDS_SESSION_RESTARTING &&
-		    newstate >= CEPH_MDS_STATE_RECONNECT) {
-			mutex_unlock(&mdsc->mutex);
-			send_mds_reconnect(mdsc, s);
-			mutex_lock(&mdsc->mutex);
-		}
-
-		/*
-		 * kick request on any mds that has gone active.
-		 */
-		if (oldstate < CEPH_MDS_STATE_ACTIVE &&
-		    newstate >= CEPH_MDS_STATE_ACTIVE) {
-			if (oldstate != CEPH_MDS_STATE_CREATING &&
-			    oldstate != CEPH_MDS_STATE_STARTING)
-				pr_info("mds%d recovery completed\n", s->s_mds);
-			kick_requests(mdsc, i);
-			ceph_kick_flushing_caps(mdsc, s);
-			wake_up_session_caps(s, 1);
-		}
-	}
-
-	for (i = 0; i < newmap->m_max_mds && i < mdsc->max_sessions; i++) {
-		s = mdsc->sessions[i];
-		if (!s)
-			continue;
-		if (!ceph_mdsmap_is_laggy(newmap, i))
-			continue;
-		if (s->s_state == CEPH_MDS_SESSION_OPEN ||
-		    s->s_state == CEPH_MDS_SESSION_HUNG ||
-		    s->s_state == CEPH_MDS_SESSION_CLOSING) {
-			dout(" connecting to export targets of laggy mds%d\n",
-			     i);
-			__open_export_target_sessions(mdsc, s);
-		}
-	}
-}
-
-
-
-/*
- * leases
- */
-
-/*
- * caller must hold session s_mutex, dentry->d_lock
- */
-void __ceph_mdsc_drop_dentry_lease(struct dentry *dentry)
-{
-	struct ceph_dentry_info *di = ceph_dentry(dentry);
-
-	ceph_put_mds_session(di->lease_session);
-	di->lease_session = NULL;
-}
-
-static void handle_lease(struct ceph_mds_client *mdsc,
-			 struct ceph_mds_session *session,
-			 struct ceph_msg *msg)
-{
-	struct super_block *sb = mdsc->fsc->sb;
-	struct inode *inode;
-	struct dentry *parent, *dentry;
-	struct ceph_dentry_info *di;
-	int mds = session->s_mds;
-	struct ceph_mds_lease *h = msg->front.iov_base;
-	u32 seq;
-	struct ceph_vino vino;
-	struct qstr dname;
-	int release = 0;
-
-	dout("handle_lease from mds%d\n", mds);
-
-	/* decode */
-	if (msg->front.iov_len < sizeof(*h) + sizeof(u32))
-		goto bad;
-	vino.ino = le64_to_cpu(h->ino);
-	vino.snap = CEPH_NOSNAP;
-	seq = le32_to_cpu(h->seq);
-	dname.name = (void *)h + sizeof(*h) + sizeof(u32);
-	dname.len = msg->front.iov_len - sizeof(*h) - sizeof(u32);
-	if (dname.len != get_unaligned_le32(h+1))
-		goto bad;
-
-	mutex_lock(&session->s_mutex);
-	session->s_seq++;
-
-	/* lookup inode */
-	inode = ceph_find_inode(sb, vino);
-	dout("handle_lease %s, ino %llx %p %.*s\n",
-	     ceph_lease_op_name(h->action), vino.ino, inode,
-	     dname.len, dname.name);
-	if (inode == NULL) {
-		dout("handle_lease no inode %llx\n", vino.ino);
-		goto release;
-	}
-
-	/* dentry */
-	parent = d_find_alias(inode);
-	if (!parent) {
-		dout("no parent dentry on inode %p\n", inode);
-		WARN_ON(1);
-		goto release;  /* hrm... */
-	}
-	dname.hash = full_name_hash(dname.name, dname.len);
-	dentry = d_lookup(parent, &dname);
-	dput(parent);
-	if (!dentry)
-		goto release;
-
-	spin_lock(&dentry->d_lock);
-	di = ceph_dentry(dentry);
-	switch (h->action) {
-	case CEPH_MDS_LEASE_REVOKE:
-		if (di->lease_session == session) {
-			if (ceph_seq_cmp(di->lease_seq, seq) > 0)
-				h->seq = cpu_to_le32(di->lease_seq);
-			__ceph_mdsc_drop_dentry_lease(dentry);
-		}
-		release = 1;
-		break;
-
-	case CEPH_MDS_LEASE_RENEW:
-		if (di->lease_session == session &&
-		    di->lease_gen == session->s_cap_gen &&
-		    di->lease_renew_from &&
-		    di->lease_renew_after == 0) {
-			unsigned long duration =
-				le32_to_cpu(h->duration_ms) * HZ / 1000;
-
-			di->lease_seq = seq;
-			dentry->d_time = di->lease_renew_from + duration;
-			di->lease_renew_after = di->lease_renew_from +
-				(duration >> 1);
-			di->lease_renew_from = 0;
-		}
-		break;
-	}
-	spin_unlock(&dentry->d_lock);
-	dput(dentry);
-
-	if (!release)
-		goto out;
-
-release:
-	/* let's just reuse the same message */
-	h->action = CEPH_MDS_LEASE_REVOKE_ACK;
-	ceph_msg_get(msg);
-	ceph_con_send(&session->s_con, msg);
-
-out:
-	iput(inode);
-	mutex_unlock(&session->s_mutex);
-	return;
-
-bad:
-	pr_err("corrupt lease message\n");
-	ceph_msg_dump(msg);
-}
-
-void ceph_mdsc_lease_send_msg(struct ceph_mds_session *session,
-			      struct inode *inode,
-			      struct dentry *dentry, char action,
-			      u32 seq)
-{
-	struct ceph_msg *msg;
-	struct ceph_mds_lease *lease;
-	int len = sizeof(*lease) + sizeof(u32);
-	int dnamelen = 0;
-
-	dout("lease_send_msg inode %p dentry %p %s to mds%d\n",
-	     inode, dentry, ceph_lease_op_name(action), session->s_mds);
-	dnamelen = dentry->d_name.len;
-	len += dnamelen;
-
-	msg = ceph_msg_new(CEPH_MSG_CLIENT_LEASE, len, GFP_NOFS, false);
-	if (!msg)
-		return;
-	lease = msg->front.iov_base;
-	lease->action = action;
-	lease->ino = cpu_to_le64(ceph_vino(inode).ino);
-	lease->first = lease->last = cpu_to_le64(ceph_vino(inode).snap);
-	lease->seq = cpu_to_le32(seq);
-	put_unaligned_le32(dnamelen, lease + 1);
-	memcpy((void *)(lease + 1) + 4, dentry->d_name.name, dnamelen);
-
-	/*
-	 * if this is a preemptive lease RELEASE, no need to
-	 * flush request stream, since the actual request will
-	 * soon follow.
-	 */
-	msg->more_to_follow = (action == CEPH_MDS_LEASE_RELEASE);
-
-	ceph_con_send(&session->s_con, msg);
-}
-
-/*
- * Preemptively release a lease we expect to invalidate anyway.
- * Pass @inode always, @dentry is optional.
- */
-void ceph_mdsc_lease_release(struct ceph_mds_client *mdsc, struct inode *inode,
-			     struct dentry *dentry)
-{
-	struct ceph_dentry_info *di;
-	struct ceph_mds_session *session;
-	u32 seq;
-
-	BUG_ON(inode == NULL);
-	BUG_ON(dentry == NULL);
-
-	/* is dentry lease valid? */
-	spin_lock(&dentry->d_lock);
-	di = ceph_dentry(dentry);
-	if (!di || !di->lease_session ||
-	    di->lease_session->s_mds < 0 ||
-	    di->lease_gen != di->lease_session->s_cap_gen ||
-	    !time_before(jiffies, dentry->d_time)) {
-		dout("lease_release inode %p dentry %p -- "
-		     "no lease\n",
-		     inode, dentry);
-		spin_unlock(&dentry->d_lock);
-		return;
-	}
-
-	/* we do have a lease on this dentry; note mds and seq */
-	session = ceph_get_mds_session(di->lease_session);
-	seq = di->lease_seq;
-	__ceph_mdsc_drop_dentry_lease(dentry);
-	spin_unlock(&dentry->d_lock);
-
-	dout("lease_release inode %p dentry %p to mds%d\n",
-	     inode, dentry, session->s_mds);
-	ceph_mdsc_lease_send_msg(session, inode, dentry,
-				 CEPH_MDS_LEASE_RELEASE, seq);
-	ceph_put_mds_session(session);
-}
-
-/*
- * drop all leases (and dentry refs) in preparation for umount
- */
-static void drop_leases(struct ceph_mds_client *mdsc)
-{
-	int i;
-
-	dout("drop_leases\n");
-	mutex_lock(&mdsc->mutex);
-	for (i = 0; i < mdsc->max_sessions; i++) {
-		struct ceph_mds_session *s = __ceph_lookup_mds_session(mdsc, i);
-		if (!s)
-			continue;
-		mutex_unlock(&mdsc->mutex);
-		mutex_lock(&s->s_mutex);
-		mutex_unlock(&s->s_mutex);
-		ceph_put_mds_session(s);
-		mutex_lock(&mdsc->mutex);
-	}
-	mutex_unlock(&mdsc->mutex);
-}
-
-
-
-/*
- * delayed work -- periodically trim expired leases, renew caps with mds
- */
-static void schedule_delayed(struct ceph_mds_client *mdsc)
-{
-	int delay = 5;
-	unsigned hz = round_jiffies_relative(HZ * delay);
-	schedule_delayed_work(&mdsc->delayed_work, hz);
-}
-
-static void delayed_work(struct work_struct *work)
-{
-	int i;
-	struct ceph_mds_client *mdsc =
-		container_of(work, struct ceph_mds_client, delayed_work.work);
-	int renew_interval;
-	int renew_caps;
-
-	dout("mdsc delayed_work\n");
-	ceph_check_delayed_caps(mdsc);
-
-	mutex_lock(&mdsc->mutex);
-	renew_interval = mdsc->mdsmap->m_session_timeout >> 2;
-	renew_caps = time_after_eq(jiffies, HZ*renew_interval +
-				   mdsc->last_renew_caps);
-	if (renew_caps)
-		mdsc->last_renew_caps = jiffies;
-
-	for (i = 0; i < mdsc->max_sessions; i++) {
-		struct ceph_mds_session *s = __ceph_lookup_mds_session(mdsc, i);
-		if (s == NULL)
-			continue;
-		if (s->s_state == CEPH_MDS_SESSION_CLOSING) {
-			dout("resending session close request for mds%d\n",
-			     s->s_mds);
-			request_close_session(mdsc, s);
-			ceph_put_mds_session(s);
-			continue;
-		}
-		if (s->s_ttl && time_after(jiffies, s->s_ttl)) {
-			if (s->s_state == CEPH_MDS_SESSION_OPEN) {
-				s->s_state = CEPH_MDS_SESSION_HUNG;
-				pr_info("mds%d hung\n", s->s_mds);
-			}
-		}
-		if (s->s_state < CEPH_MDS_SESSION_OPEN) {
-			/* this mds is failed or recovering, just wait */
-			ceph_put_mds_session(s);
-			continue;
-		}
-		mutex_unlock(&mdsc->mutex);
-
-		mutex_lock(&s->s_mutex);
-		if (renew_caps)
-			send_renew_caps(mdsc, s);
-		else
-			ceph_con_keepalive(&s->s_con);
-		ceph_add_cap_releases(mdsc, s);
-		if (s->s_state == CEPH_MDS_SESSION_OPEN ||
-		    s->s_state == CEPH_MDS_SESSION_HUNG)
-			ceph_send_cap_releases(mdsc, s);
-		mutex_unlock(&s->s_mutex);
-		ceph_put_mds_session(s);
-
-		mutex_lock(&mdsc->mutex);
-	}
-	mutex_unlock(&mdsc->mutex);
-
-	schedule_delayed(mdsc);
-}
-
-int ceph_mdsc_init(struct ceph_fs_client *fsc)
-
-{
-	struct ceph_mds_client *mdsc;
-
-	mdsc = kzalloc(sizeof(struct ceph_mds_client), GFP_NOFS);
-	if (!mdsc)
-		return -ENOMEM;
-	mdsc->fsc = fsc;
-	fsc->mdsc = mdsc;
-	mutex_init(&mdsc->mutex);
-	mdsc->mdsmap = kzalloc(sizeof(*mdsc->mdsmap), GFP_NOFS);
-	if (mdsc->mdsmap == NULL)
-		return -ENOMEM;
-
-	init_completion(&mdsc->safe_umount_waiters);
-	init_waitqueue_head(&mdsc->session_close_wq);
-	INIT_LIST_HEAD(&mdsc->waiting_for_map);
-	mdsc->sessions = NULL;
-	mdsc->max_sessions = 0;
-	mdsc->stopping = 0;
-	init_rwsem(&mdsc->snap_rwsem);
-	mdsc->snap_realms = RB_ROOT;
-	INIT_LIST_HEAD(&mdsc->snap_empty);
-	spin_lock_init(&mdsc->snap_empty_lock);
-	mdsc->last_tid = 0;
-	mdsc->request_tree = RB_ROOT;
-	INIT_DELAYED_WORK(&mdsc->delayed_work, delayed_work);
-	mdsc->last_renew_caps = jiffies;
-	INIT_LIST_HEAD(&mdsc->cap_delay_list);
-	spin_lock_init(&mdsc->cap_delay_lock);
-	INIT_LIST_HEAD(&mdsc->snap_flush_list);
-	spin_lock_init(&mdsc->snap_flush_lock);
-	mdsc->cap_flush_seq = 0;
-	INIT_LIST_HEAD(&mdsc->cap_dirty);
-	INIT_LIST_HEAD(&mdsc->cap_dirty_migrating);
-	mdsc->num_cap_flushing = 0;
-	spin_lock_init(&mdsc->cap_dirty_lock);
-	init_waitqueue_head(&mdsc->cap_flushing_wq);
-	spin_lock_init(&mdsc->dentry_lru_lock);
-	INIT_LIST_HEAD(&mdsc->dentry_lru);
-
-	ceph_caps_init(mdsc);
-	ceph_adjust_min_caps(mdsc, fsc->min_caps);
-
-	return 0;
-}
-
-/*
- * Wait for safe replies on open mds requests.  If we time out, drop
- * all requests from the tree to avoid dangling dentry refs.
- */
-static void wait_requests(struct ceph_mds_client *mdsc)
-{
-	struct ceph_mds_request *req;
-	struct ceph_fs_client *fsc = mdsc->fsc;
-
-	mutex_lock(&mdsc->mutex);
-	if (__get_oldest_req(mdsc)) {
-		mutex_unlock(&mdsc->mutex);
-
-		dout("wait_requests waiting for requests\n");
-		wait_for_completion_timeout(&mdsc->safe_umount_waiters,
-				    fsc->client->options->mount_timeout * HZ);
-
-		/* tear down remaining requests */
-		mutex_lock(&mdsc->mutex);
-		while ((req = __get_oldest_req(mdsc))) {
-			dout("wait_requests timed out on tid %llu\n",
-			     req->r_tid);
-			__unregister_request(mdsc, req);
-		}
-	}
-	mutex_unlock(&mdsc->mutex);
-	dout("wait_requests done\n");
-}
-
-/*
- * called before mount is ro, and before dentries are torn down.
- * (hmm, does this still race with new lookups?)
- */
-void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc)
-{
-	dout("pre_umount\n");
-	mdsc->stopping = 1;
-
-	drop_leases(mdsc);
-	ceph_flush_dirty_caps(mdsc);
-	wait_requests(mdsc);
-
-	/*
-	 * wait for reply handlers to drop their request refs and
-	 * their inode/dcache refs
-	 */
-	ceph_msgr_flush();
-}
-
-/*
- * wait for all write mds requests to flush.
- */
-static void wait_unsafe_requests(struct ceph_mds_client *mdsc, u64 want_tid)
-{
-	struct ceph_mds_request *req = NULL, *nextreq;
-	struct rb_node *n;
-
-	mutex_lock(&mdsc->mutex);
-	dout("wait_unsafe_requests want %lld\n", want_tid);
-restart:
-	req = __get_oldest_req(mdsc);
-	while (req && req->r_tid <= want_tid) {
-		/* find next request */
-		n = rb_next(&req->r_node);
-		if (n)
-			nextreq = rb_entry(n, struct ceph_mds_request, r_node);
-		else
-			nextreq = NULL;
-		if ((req->r_op & CEPH_MDS_OP_WRITE)) {
-			/* write op */
-			ceph_mdsc_get_request(req);
-			if (nextreq)
-				ceph_mdsc_get_request(nextreq);
-			mutex_unlock(&mdsc->mutex);
-			dout("wait_unsafe_requests  wait on %llu (want %llu)\n",
-			     req->r_tid, want_tid);
-			wait_for_completion(&req->r_safe_completion);
-			mutex_lock(&mdsc->mutex);
-			ceph_mdsc_put_request(req);
-			if (!nextreq)
-				break;  /* next dne before, so we're done! */
-			if (RB_EMPTY_NODE(&nextreq->r_node)) {
-				/* next request was removed from tree */
-				ceph_mdsc_put_request(nextreq);
-				goto restart;
-			}
-			ceph_mdsc_put_request(nextreq);  /* won't go away */
-		}
-		req = nextreq;
-	}
-	mutex_unlock(&mdsc->mutex);
-	dout("wait_unsafe_requests done\n");
-}
-
-void ceph_mdsc_sync(struct ceph_mds_client *mdsc)
-{
-	u64 want_tid, want_flush;
-
-	if (mdsc->fsc->mount_state == CEPH_MOUNT_SHUTDOWN)
-		return;
-
-	dout("sync\n");
-	mutex_lock(&mdsc->mutex);
-	want_tid = mdsc->last_tid;
-	want_flush = mdsc->cap_flush_seq;
-	mutex_unlock(&mdsc->mutex);
-	dout("sync want tid %lld flush_seq %lld\n", want_tid, want_flush);
-
-	ceph_flush_dirty_caps(mdsc);
-
-	wait_unsafe_requests(mdsc, want_tid);
-	wait_event(mdsc->cap_flushing_wq, check_cap_flush(mdsc, want_flush));
-}
-
-/*
- * true if all sessions are closed, or we force unmount
- */
-static bool done_closing_sessions(struct ceph_mds_client *mdsc)
-{
-	int i, n = 0;
-
-	if (mdsc->fsc->mount_state == CEPH_MOUNT_SHUTDOWN)
-		return true;
-
-	mutex_lock(&mdsc->mutex);
-	for (i = 0; i < mdsc->max_sessions; i++)
-		if (mdsc->sessions[i])
-			n++;
-	mutex_unlock(&mdsc->mutex);
-	return n == 0;
-}
-
-/*
- * called after sb is ro.
- */
-void ceph_mdsc_close_sessions(struct ceph_mds_client *mdsc)
-{
-	struct ceph_mds_session *session;
-	int i;
-	struct ceph_fs_client *fsc = mdsc->fsc;
-	unsigned long timeout = fsc->client->options->mount_timeout * HZ;
-
-	dout("close_sessions\n");
-
-	/* close sessions */
-	mutex_lock(&mdsc->mutex);
-	for (i = 0; i < mdsc->max_sessions; i++) {
-		session = __ceph_lookup_mds_session(mdsc, i);
-		if (!session)
-			continue;
-		mutex_unlock(&mdsc->mutex);
-		mutex_lock(&session->s_mutex);
-		__close_session(mdsc, session);
-		mutex_unlock(&session->s_mutex);
-		ceph_put_mds_session(session);
-		mutex_lock(&mdsc->mutex);
-	}
-	mutex_unlock(&mdsc->mutex);
-
-	dout("waiting for sessions to close\n");
-	wait_event_timeout(mdsc->session_close_wq, done_closing_sessions(mdsc),
-			   timeout);
-
-	/* tear down remaining sessions */
-	mutex_lock(&mdsc->mutex);
-	for (i = 0; i < mdsc->max_sessions; i++) {
-		if (mdsc->sessions[i]) {
-			session = get_session(mdsc->sessions[i]);
-			__unregister_session(mdsc, session);
-			mutex_unlock(&mdsc->mutex);
-			mutex_lock(&session->s_mutex);
-			remove_session_caps(session);
-			mutex_unlock(&session->s_mutex);
-			ceph_put_mds_session(session);
-			mutex_lock(&mdsc->mutex);
-		}
-	}
-	WARN_ON(!list_empty(&mdsc->cap_delay_list));
-	mutex_unlock(&mdsc->mutex);
-
-	ceph_cleanup_empty_realms(mdsc);
-
-	cancel_delayed_work_sync(&mdsc->delayed_work); /* cancel timer */
-
-	dout("stopped\n");
-}
-
-static void ceph_mdsc_stop(struct ceph_mds_client *mdsc)
-{
-	dout("stop\n");
-	cancel_delayed_work_sync(&mdsc->delayed_work); /* cancel timer */
-	if (mdsc->mdsmap)
-		ceph_mdsmap_destroy(mdsc->mdsmap);
-	kfree(mdsc->sessions);
-	ceph_caps_finalize(mdsc);
-}
-
-void ceph_mdsc_destroy(struct ceph_fs_client *fsc)
-{
-	struct ceph_mds_client *mdsc = fsc->mdsc;
-
-	dout("mdsc_destroy %p\n", mdsc);
-	ceph_mdsc_stop(mdsc);
-
-	/* flush out any connection work with references to us */
-	ceph_msgr_flush();
-
-	fsc->mdsc = NULL;
-	kfree(mdsc);
-	dout("mdsc_destroy %p done\n", mdsc);
-}
-
-
-/*
- * handle mds map update.
- */
-void ceph_mdsc_handle_map(struct ceph_mds_client *mdsc, struct ceph_msg *msg)
-{
-	u32 epoch;
-	u32 maplen;
-	void *p = msg->front.iov_base;
-	void *end = p + msg->front.iov_len;
-	struct ceph_mdsmap *newmap, *oldmap;
-	struct ceph_fsid fsid;
-	int err = -EINVAL;
-
-	ceph_decode_need(&p, end, sizeof(fsid)+2*sizeof(u32), bad);
-	ceph_decode_copy(&p, &fsid, sizeof(fsid));
-	if (ceph_check_fsid(mdsc->fsc->client, &fsid) < 0)
-		return;
-	epoch = ceph_decode_32(&p);
-	maplen = ceph_decode_32(&p);
-	dout("handle_map epoch %u len %d\n", epoch, (int)maplen);
-
-	/* do we need it? */
-	ceph_monc_got_mdsmap(&mdsc->fsc->client->monc, epoch);
-	mutex_lock(&mdsc->mutex);
-	if (mdsc->mdsmap && epoch <= mdsc->mdsmap->m_epoch) {
-		dout("handle_map epoch %u <= our %u\n",
-		     epoch, mdsc->mdsmap->m_epoch);
-		mutex_unlock(&mdsc->mutex);
-		return;
-	}
-
-	newmap = ceph_mdsmap_decode(&p, end);
-	if (IS_ERR(newmap)) {
-		err = PTR_ERR(newmap);
-		goto bad_unlock;
-	}
-
-	/* swap into place */
-	if (mdsc->mdsmap) {
-		oldmap = mdsc->mdsmap;
-		mdsc->mdsmap = newmap;
-		check_new_map(mdsc, newmap, oldmap);
-		ceph_mdsmap_destroy(oldmap);
-	} else {
-		mdsc->mdsmap = newmap;  /* first mds map */
-	}
-	mdsc->fsc->sb->s_maxbytes = mdsc->mdsmap->m_max_file_size;
-
-	__wake_requests(mdsc, &mdsc->waiting_for_map);
-
-	mutex_unlock(&mdsc->mutex);
-	schedule_delayed(mdsc);
-	return;
-
-bad_unlock:
-	mutex_unlock(&mdsc->mutex);
-bad:
-	pr_err("error decoding mdsmap %d\n", err);
-	return;
-}
-
-static struct ceph_connection *con_get(struct ceph_connection *con)
-{
-	struct ceph_mds_session *s = con->private;
-
-	if (get_session(s)) {
-		dout("mdsc con_get %p ok (%d)\n", s, atomic_read(&s->s_ref));
-		return con;
-	}
-	dout("mdsc con_get %p FAIL\n", s);
-	return NULL;
-}
-
-static void con_put(struct ceph_connection *con)
-{
-	struct ceph_mds_session *s = con->private;
-
-	dout("mdsc con_put %p (%d)\n", s, atomic_read(&s->s_ref) - 1);
-	ceph_put_mds_session(s);
-}
-
-/*
- * if the client is unresponsive for long enough, the mds will kill
- * the session entirely.
- */
-static void peer_reset(struct ceph_connection *con)
-{
-	struct ceph_mds_session *s = con->private;
-	struct ceph_mds_client *mdsc = s->s_mdsc;
-
-	pr_warning("mds%d closed our session\n", s->s_mds);
-	send_mds_reconnect(mdsc, s);
-}
-
-static void dispatch(struct ceph_connection *con, struct ceph_msg *msg)
-{
-	struct ceph_mds_session *s = con->private;
-	struct ceph_mds_client *mdsc = s->s_mdsc;
-	int type = le16_to_cpu(msg->hdr.type);
-
-	mutex_lock(&mdsc->mutex);
-	if (__verify_registered_session(mdsc, s) < 0) {
-		mutex_unlock(&mdsc->mutex);
-		goto out;
-	}
-	mutex_unlock(&mdsc->mutex);
-
-	switch (type) {
-	case CEPH_MSG_MDS_MAP:
-		ceph_mdsc_handle_map(mdsc, msg);
-		break;
-	case CEPH_MSG_CLIENT_SESSION:
-		handle_session(s, msg);
-		break;
-	case CEPH_MSG_CLIENT_REPLY:
-		handle_reply(s, msg);
-		break;
-	case CEPH_MSG_CLIENT_REQUEST_FORWARD:
-		handle_forward(mdsc, s, msg);
-		break;
-	case CEPH_MSG_CLIENT_CAPS:
-		ceph_handle_caps(s, msg);
-		break;
-	case CEPH_MSG_CLIENT_SNAP:
-		ceph_handle_snap(mdsc, s, msg);
-		break;
-	case CEPH_MSG_CLIENT_LEASE:
-		handle_lease(mdsc, s, msg);
-		break;
-
-	default:
-		pr_err("received unknown message type %d %s\n", type,
-		       ceph_msg_type_name(type));
-	}
-out:
-	ceph_msg_put(msg);
-}
-
-/*
- * authentication
- */
-static int get_authorizer(struct ceph_connection *con,
-			  void **buf, int *len, int *proto,
-			  void **reply_buf, int *reply_len, int force_new)
-{
-	struct ceph_mds_session *s = con->private;
-	struct ceph_mds_client *mdsc = s->s_mdsc;
-	struct ceph_auth_client *ac = mdsc->fsc->client->monc.auth;
-	int ret = 0;
-
-	if (force_new && s->s_authorizer) {
-		ac->ops->destroy_authorizer(ac, s->s_authorizer);
-		s->s_authorizer = NULL;
-	}
-	if (s->s_authorizer == NULL) {
-		if (ac->ops->create_authorizer) {
-			ret = ac->ops->create_authorizer(
-				ac, CEPH_ENTITY_TYPE_MDS,
-				&s->s_authorizer,
-				&s->s_authorizer_buf,
-				&s->s_authorizer_buf_len,
-				&s->s_authorizer_reply_buf,
-				&s->s_authorizer_reply_buf_len);
-			if (ret)
-				return ret;
-		}
-	}
-
-	*proto = ac->protocol;
-	*buf = s->s_authorizer_buf;
-	*len = s->s_authorizer_buf_len;
-	*reply_buf = s->s_authorizer_reply_buf;
-	*reply_len = s->s_authorizer_reply_buf_len;
-	return 0;
-}
-
-
-static int verify_authorizer_reply(struct ceph_connection *con, int len)
-{
-	struct ceph_mds_session *s = con->private;
-	struct ceph_mds_client *mdsc = s->s_mdsc;
-	struct ceph_auth_client *ac = mdsc->fsc->client->monc.auth;
-
-	return ac->ops->verify_authorizer_reply(ac, s->s_authorizer, len);
-}
-
-static int invalidate_authorizer(struct ceph_connection *con)
-{
-	struct ceph_mds_session *s = con->private;
-	struct ceph_mds_client *mdsc = s->s_mdsc;
-	struct ceph_auth_client *ac = mdsc->fsc->client->monc.auth;
-
-	if (ac->ops->invalidate_authorizer)
-		ac->ops->invalidate_authorizer(ac, CEPH_ENTITY_TYPE_MDS);
-
-	return ceph_monc_validate_auth(&mdsc->fsc->client->monc);
-}
-
-static const struct ceph_connection_operations mds_con_ops = {
-	.get = con_get,
-	.put = con_put,
-	.dispatch = dispatch,
-	.get_authorizer = get_authorizer,
-	.verify_authorizer_reply = verify_authorizer_reply,
-	.invalidate_authorizer = invalidate_authorizer,
-	.peer_reset = peer_reset,
-};
-
-/* eof */
diff --git a/ANDROID_3.4.5/fs/ceph/mds_client.h b/ANDROID_3.4.5/fs/ceph/mds_client.h
deleted file mode 100644
index 8c7c04eb..00000000
--- a/ANDROID_3.4.5/fs/ceph/mds_client.h
+++ /dev/null
@@ -1,383 +0,0 @@
-#ifndef _FS_CEPH_MDS_CLIENT_H
-#define _FS_CEPH_MDS_CLIENT_H
-
-#include <linux/completion.h>
-#include <linux/kref.h>
-#include <linux/list.h>
-#include <linux/mutex.h>
-#include <linux/rbtree.h>
-#include <linux/spinlock.h>
-
-#include <linux/ceph/types.h>
-#include <linux/ceph/messenger.h>
-#include <linux/ceph/mdsmap.h>
-
-/*
- * Some lock dependencies:
- *
- * session->s_mutex
- *         mdsc->mutex
- *
- *         mdsc->snap_rwsem
- *
- *         ci->i_ceph_lock
- *                 mdsc->snap_flush_lock
- *                 mdsc->cap_delay_lock
- *
- */
-
-struct ceph_fs_client;
-struct ceph_cap;
-
-/*
- * parsed info about a single inode.  pointers are into the encoded
- * on-wire structures within the mds reply message payload.
- */
-struct ceph_mds_reply_info_in {
-	struct ceph_mds_reply_inode *in;
-	struct ceph_dir_layout dir_layout;
-	u32 symlink_len;
-	char *symlink;
-	u32 xattr_len;
-	char *xattr_data;
-};
-
-/*
- * parsed info about an mds reply, including information about
- * either: 1) the target inode and/or its parent directory and dentry,
- * and directory contents (for readdir results), or
- * 2) the file range lock info (for fcntl F_GETLK results).
- */
-struct ceph_mds_reply_info_parsed {
-	struct ceph_mds_reply_head    *head;
-
-	/* trace */
-	struct ceph_mds_reply_info_in diri, targeti;
-	struct ceph_mds_reply_dirfrag *dirfrag;
-	char                          *dname;
-	u32                           dname_len;
-	struct ceph_mds_reply_lease   *dlease;
-
-	/* extra */
-	union {
-		/* for fcntl F_GETLK results */
-		struct ceph_filelock *filelock_reply;
-
-		/* for readdir results */
-		struct {
-			struct ceph_mds_reply_dirfrag *dir_dir;
-			int                           dir_nr;
-			char                          **dir_dname;
-			u32                           *dir_dname_len;
-			struct ceph_mds_reply_lease   **dir_dlease;
-			struct ceph_mds_reply_info_in *dir_in;
-			u8                            dir_complete, dir_end;
-		};
-	};
-
-	/* encoded blob describing snapshot contexts for certain
-	   operations (e.g., open) */
-	void *snapblob;
-	int snapblob_len;
-};
-
-
-/*
- * cap releases are batched and sent to the MDS en masse.
- */
-#define CEPH_CAPS_PER_RELEASE ((PAGE_CACHE_SIZE -			\
-				sizeof(struct ceph_mds_cap_release)) /	\
-			       sizeof(struct ceph_mds_cap_item))
-
-
-/*
- * state associated with each MDS<->client session
- */
-enum {
-	CEPH_MDS_SESSION_NEW = 1,
-	CEPH_MDS_SESSION_OPENING = 2,
-	CEPH_MDS_SESSION_OPEN = 3,
-	CEPH_MDS_SESSION_HUNG = 4,
-	CEPH_MDS_SESSION_CLOSING = 5,
-	CEPH_MDS_SESSION_RESTARTING = 6,
-	CEPH_MDS_SESSION_RECONNECTING = 7,
-};
-
-struct ceph_mds_session {
-	struct ceph_mds_client *s_mdsc;
-	int               s_mds;
-	int               s_state;
-	unsigned long     s_ttl;      /* time until mds kills us */
-	u64               s_seq;      /* incoming msg seq # */
-	struct mutex      s_mutex;    /* serialize session messages */
-
-	struct ceph_connection s_con;
-
-	struct ceph_authorizer *s_authorizer;
-	void             *s_authorizer_buf, *s_authorizer_reply_buf;
-	size_t            s_authorizer_buf_len, s_authorizer_reply_buf_len;
-
-	/* protected by s_gen_ttl_lock */
-	spinlock_t        s_gen_ttl_lock;
-	u32               s_cap_gen;  /* inc each time we get mds stale msg */
-	unsigned long     s_cap_ttl;  /* when session caps expire */
-
-	/* protected by s_cap_lock */
-	spinlock_t        s_cap_lock;
-	struct list_head  s_caps;     /* all caps issued by this session */
-	int               s_nr_caps, s_trim_caps;
-	int               s_num_cap_releases;
-	struct list_head  s_cap_releases; /* waiting cap_release messages */
-	struct list_head  s_cap_releases_done; /* ready to send */
-	struct ceph_cap  *s_cap_iterator;
-
-	/* protected by mutex */
-	struct list_head  s_cap_flushing;     /* inodes w/ flushing caps */
-	struct list_head  s_cap_snaps_flushing;
-	unsigned long     s_renew_requested; /* last time we sent a renew req */
-	u64               s_renew_seq;
-
-	atomic_t          s_ref;
-	struct list_head  s_waiting;  /* waiting requests */
-	struct list_head  s_unsafe;   /* unsafe requests */
-};
-
-/*
- * modes of choosing which MDS to send a request to
- */
-enum {
-	USE_ANY_MDS,
-	USE_RANDOM_MDS,
-	USE_AUTH_MDS,   /* prefer authoritative mds for this metadata item */
-};
-
-struct ceph_mds_request;
-struct ceph_mds_client;
-
-/*
- * request completion callback
- */
-typedef void (*ceph_mds_request_callback_t) (struct ceph_mds_client *mdsc,
-					     struct ceph_mds_request *req);
-
-/*
- * an in-flight mds request
- */
-struct ceph_mds_request {
-	u64 r_tid;                   /* transaction id */
-	struct rb_node r_node;
-	struct ceph_mds_client *r_mdsc;
-
-	int r_op;                    /* mds op code */
-
-	/* operation on what? */
-	struct inode *r_inode;              /* arg1 */
-	struct dentry *r_dentry;            /* arg1 */
-	struct dentry *r_old_dentry;        /* arg2: rename from or link from */
-	struct inode *r_old_dentry_dir;     /* arg2: old dentry's parent dir */
-	char *r_path1, *r_path2;
-	struct ceph_vino r_ino1, r_ino2;
-
-	struct inode *r_locked_dir; /* dir (if any) i_mutex locked by vfs */
-	struct inode *r_target_inode;       /* resulting inode */
-
-	struct mutex r_fill_mutex;
-
-	union ceph_mds_request_args r_args;
-	int r_fmode;        /* file mode, if expecting cap */
-	uid_t r_uid;
-	gid_t r_gid;
-
-	/* for choosing which mds to send this request to */
-	int r_direct_mode;
-	u32 r_direct_hash;      /* choose dir frag based on this dentry hash */
-	bool r_direct_is_hash;  /* true if r_direct_hash is valid */
-
-	/* data payload is used for xattr ops */
-	struct page **r_pages;
-	int r_num_pages;
-	int r_data_len;
-
-	/* what caps shall we drop? */
-	int r_inode_drop, r_inode_unless;
-	int r_dentry_drop, r_dentry_unless;
-	int r_old_dentry_drop, r_old_dentry_unless;
-	struct inode *r_old_inode;
-	int r_old_inode_drop, r_old_inode_unless;
-
-	struct ceph_msg  *r_request;  /* original request */
-	int r_request_release_offset;
-	struct ceph_msg  *r_reply;
-	struct ceph_mds_reply_info_parsed r_reply_info;
-	int r_err;
-	bool r_aborted;
-
-	unsigned long r_timeout;  /* optional.  jiffies */
-	unsigned long r_started;  /* start time to measure timeout against */
-	unsigned long r_request_started; /* start time for mds request only,
-					    used to measure lease durations */
-
-	/* link unsafe requests to parent directory, for fsync */
-	struct inode	*r_unsafe_dir;
-	struct list_head r_unsafe_dir_item;
-
-	struct ceph_mds_session *r_session;
-
-	int               r_attempts;   /* resend attempts */
-	int               r_num_fwd;    /* number of forward attempts */
-	int               r_resend_mds; /* mds to resend to next, if any*/
-	u32               r_sent_on_mseq; /* cap mseq request was sent at*/
-
-	struct kref       r_kref;
-	struct list_head  r_wait;
-	struct completion r_completion;
-	struct completion r_safe_completion;
-	ceph_mds_request_callback_t r_callback;
-	struct list_head  r_unsafe_item;  /* per-session unsafe list item */
-	bool		  r_got_unsafe, r_got_safe, r_got_result;
-
-	bool              r_did_prepopulate;
-	u32               r_readdir_offset;
-
-	struct ceph_cap_reservation r_caps_reservation;
-	int r_num_caps;
-};
-
-/*
- * mds client state
- */
-struct ceph_mds_client {
-	struct ceph_fs_client  *fsc;
-	struct mutex            mutex;         /* all nested structures */
-
-	struct ceph_mdsmap      *mdsmap;
-	struct completion       safe_umount_waiters;
-	wait_queue_head_t       session_close_wq;
-	struct list_head        waiting_for_map;
-
-	struct ceph_mds_session **sessions;    /* NULL for mds if no session */
-	int                     max_sessions;  /* len of s_mds_sessions */
-	int                     stopping;      /* true if shutting down */
-
-	/*
-	 * snap_rwsem will cover cap linkage into snaprealms, and
-	 * realm snap contexts.  (later, we can do per-realm snap
-	 * contexts locks..)  the empty list contains realms with no
-	 * references (implying they contain no inodes with caps) that
-	 * should be destroyed.
-	 */
-	struct rw_semaphore     snap_rwsem;
-	struct rb_root          snap_realms;
-	struct list_head        snap_empty;
-	spinlock_t              snap_empty_lock;  /* protect snap_empty */
-
-	u64                    last_tid;      /* most recent mds request */
-	struct rb_root         request_tree;  /* pending mds requests */
-	struct delayed_work    delayed_work;  /* delayed work */
-	unsigned long    last_renew_caps;  /* last time we renewed our caps */
-	struct list_head cap_delay_list;   /* caps with delayed release */
-	spinlock_t       cap_delay_lock;   /* protects cap_delay_list */
-	struct list_head snap_flush_list;  /* cap_snaps ready to flush */
-	spinlock_t       snap_flush_lock;
-
-	u64               cap_flush_seq;
-	struct list_head  cap_dirty;        /* inodes with dirty caps */
-	struct list_head  cap_dirty_migrating; /* ...that are migration... */
-	int               num_cap_flushing; /* # caps we are flushing */
-	spinlock_t        cap_dirty_lock;   /* protects above items */
-	wait_queue_head_t cap_flushing_wq;
-
-	/*
-	 * Cap reservations
-	 *
-	 * Maintain a global pool of preallocated struct ceph_caps, referenced
-	 * by struct ceph_caps_reservations.  This ensures that we preallocate
-	 * memory needed to successfully process an MDS response.  (If an MDS
-	 * sends us cap information and we fail to process it, we will have
-	 * problems due to the client and MDS being out of sync.)
-	 *
-	 * Reservations are 'owned' by a ceph_cap_reservation context.
-	 */
-	spinlock_t	caps_list_lock;
-	struct		list_head caps_list; /* unused (reserved or
-						unreserved) */
-	int		caps_total_count;    /* total caps allocated */
-	int		caps_use_count;      /* in use */
-	int		caps_reserve_count;  /* unused, reserved */
-	int		caps_avail_count;    /* unused, unreserved */
-	int		caps_min_count;      /* keep at least this many
-						(unreserved) */
-	spinlock_t	  dentry_lru_lock;
-	struct list_head  dentry_lru;
-	int		  num_dentry;
-};
-
-extern const char *ceph_mds_op_name(int op);
-
-extern struct ceph_mds_session *
-__ceph_lookup_mds_session(struct ceph_mds_client *, int mds);
-
-static inline struct ceph_mds_session *
-ceph_get_mds_session(struct ceph_mds_session *s)
-{
-	atomic_inc(&s->s_ref);
-	return s;
-}
-
-extern void ceph_put_mds_session(struct ceph_mds_session *s);
-
-extern int ceph_send_msg_mds(struct ceph_mds_client *mdsc,
-			     struct ceph_msg *msg, int mds);
-
-extern int ceph_mdsc_init(struct ceph_fs_client *fsc);
-extern void ceph_mdsc_close_sessions(struct ceph_mds_client *mdsc);
-extern void ceph_mdsc_destroy(struct ceph_fs_client *fsc);
-
-extern void ceph_mdsc_sync(struct ceph_mds_client *mdsc);
-
-extern void ceph_mdsc_lease_release(struct ceph_mds_client *mdsc,
-				    struct inode *inode,
-				    struct dentry *dn);
-
-extern void ceph_invalidate_dir_request(struct ceph_mds_request *req);
-
-extern struct ceph_mds_request *
-ceph_mdsc_create_request(struct ceph_mds_client *mdsc, int op, int mode);
-extern void ceph_mdsc_submit_request(struct ceph_mds_client *mdsc,
-				     struct ceph_mds_request *req);
-extern int ceph_mdsc_do_request(struct ceph_mds_client *mdsc,
-				struct inode *dir,
-				struct ceph_mds_request *req);
-static inline void ceph_mdsc_get_request(struct ceph_mds_request *req)
-{
-	kref_get(&req->r_kref);
-}
-extern void ceph_mdsc_release_request(struct kref *kref);
-static inline void ceph_mdsc_put_request(struct ceph_mds_request *req)
-{
-	kref_put(&req->r_kref, ceph_mdsc_release_request);
-}
-
-extern int ceph_add_cap_releases(struct ceph_mds_client *mdsc,
-				 struct ceph_mds_session *session);
-extern void ceph_send_cap_releases(struct ceph_mds_client *mdsc,
-				   struct ceph_mds_session *session);
-
-extern void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc);
-
-extern char *ceph_mdsc_build_path(struct dentry *dentry, int *plen, u64 *base,
-				  int stop_on_nosnap);
-
-extern void __ceph_mdsc_drop_dentry_lease(struct dentry *dentry);
-extern void ceph_mdsc_lease_send_msg(struct ceph_mds_session *session,
-				     struct inode *inode,
-				     struct dentry *dentry, char action,
-				     u32 seq);
-
-extern void ceph_mdsc_handle_map(struct ceph_mds_client *mdsc,
-				 struct ceph_msg *msg);
-
-extern void ceph_mdsc_open_export_target_sessions(struct ceph_mds_client *mdsc,
-					  struct ceph_mds_session *session);
-
-#endif
diff --git a/ANDROID_3.4.5/fs/ceph/mdsmap.c b/ANDROID_3.4.5/fs/ceph/mdsmap.c
deleted file mode 100644
index 73b7d44e..00000000
--- a/ANDROID_3.4.5/fs/ceph/mdsmap.c
+++ /dev/null
@@ -1,179 +0,0 @@
-#include <linux/ceph/ceph_debug.h>
-
-#include <linux/bug.h>
-#include <linux/err.h>
-#include <linux/random.h>
-#include <linux/slab.h>
-#include <linux/types.h>
-
-#include <linux/ceph/mdsmap.h>
-#include <linux/ceph/messenger.h>
-#include <linux/ceph/decode.h>
-
-#include "super.h"
-
-
-/*
- * choose a random mds that is "up" (i.e. has a state > 0), or -1.
- */
-int ceph_mdsmap_get_random_mds(struct ceph_mdsmap *m)
-{
-	int n = 0;
-	int i;
-	char r;
-
-	/* count */
-	for (i = 0; i < m->m_max_mds; i++)
-		if (m->m_info[i].state > 0)
-			n++;
-	if (n == 0)
-		return -1;
-
-	/* pick */
-	get_random_bytes(&r, 1);
-	n = r % n;
-	i = 0;
-	for (i = 0; n > 0; i++, n--)
-		while (m->m_info[i].state <= 0)
-			i++;
-
-	return i;
-}
-
-/*
- * Decode an MDS map
- *
- * Ignore any fields we don't care about (there are quite a few of
- * them).
- */
-struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end)
-{
-	struct ceph_mdsmap *m;
-	const void *start = *p;
-	int i, j, n;
-	int err = -EINVAL;
-	u16 version;
-
-	m = kzalloc(sizeof(*m), GFP_NOFS);
-	if (m == NULL)
-		return ERR_PTR(-ENOMEM);
-
-	ceph_decode_16_safe(p, end, version, bad);
-
-	ceph_decode_need(p, end, 8*sizeof(u32) + sizeof(u64), bad);
-	m->m_epoch = ceph_decode_32(p);
-	m->m_client_epoch = ceph_decode_32(p);
-	m->m_last_failure = ceph_decode_32(p);
-	m->m_root = ceph_decode_32(p);
-	m->m_session_timeout = ceph_decode_32(p);
-	m->m_session_autoclose = ceph_decode_32(p);
-	m->m_max_file_size = ceph_decode_64(p);
-	m->m_max_mds = ceph_decode_32(p);
-
-	m->m_info = kcalloc(m->m_max_mds, sizeof(*m->m_info), GFP_NOFS);
-	if (m->m_info == NULL)
-		goto badmem;
-
-	/* pick out active nodes from mds_info (state > 0) */
-	n = ceph_decode_32(p);
-	for (i = 0; i < n; i++) {
-		u64 global_id;
-		u32 namelen;
-		s32 mds, inc, state;
-		u64 state_seq;
-		u8 infoversion;
-		struct ceph_entity_addr addr;
-		u32 num_export_targets;
-		void *pexport_targets = NULL;
-		struct ceph_timespec laggy_since;
-
-		ceph_decode_need(p, end, sizeof(u64)*2 + 1 + sizeof(u32), bad);
-		global_id = ceph_decode_64(p);
-		infoversion = ceph_decode_8(p);
-		*p += sizeof(u64);
-		namelen = ceph_decode_32(p);  /* skip mds name */
-		*p += namelen;
-
-		ceph_decode_need(p, end,
-				 4*sizeof(u32) + sizeof(u64) +
-				 sizeof(addr) + sizeof(struct ceph_timespec),
-				 bad);
-		mds = ceph_decode_32(p);
-		inc = ceph_decode_32(p);
-		state = ceph_decode_32(p);
-		state_seq = ceph_decode_64(p);
-		ceph_decode_copy(p, &addr, sizeof(addr));
-		ceph_decode_addr(&addr);
-		ceph_decode_copy(p, &laggy_since, sizeof(laggy_since));
-		*p += sizeof(u32);
-		ceph_decode_32_safe(p, end, namelen, bad);
-		*p += namelen;
-		if (infoversion >= 2) {
-			ceph_decode_32_safe(p, end, num_export_targets, bad);
-			pexport_targets = *p;
-			*p += num_export_targets * sizeof(u32);
-		} else {
-			num_export_targets = 0;
-		}
-
-		dout("mdsmap_decode %d/%d %lld mds%d.%d %s %s\n",
-		     i+1, n, global_id, mds, inc,
-		     ceph_pr_addr(&addr.in_addr),
-		     ceph_mds_state_name(state));
-		if (mds >= 0 && mds < m->m_max_mds && state > 0) {
-			m->m_info[mds].global_id = global_id;
-			m->m_info[mds].state = state;
-			m->m_info[mds].addr = addr;
-			m->m_info[mds].laggy =
-				(laggy_since.tv_sec != 0 ||
-				 laggy_since.tv_nsec != 0);
-			m->m_info[mds].num_export_targets = num_export_targets;
-			if (num_export_targets) {
-				m->m_info[mds].export_targets =
-					kcalloc(num_export_targets, sizeof(u32),
-						GFP_NOFS);
-				for (j = 0; j < num_export_targets; j++)
-					m->m_info[mds].export_targets[j] =
-					       ceph_decode_32(&pexport_targets);
-			} else {
-				m->m_info[mds].export_targets = NULL;
-			}
-		}
-	}
-
-	/* pg_pools */
-	ceph_decode_32_safe(p, end, n, bad);
-	m->m_num_data_pg_pools = n;
-	m->m_data_pg_pools = kcalloc(n, sizeof(u32), GFP_NOFS);
-	if (!m->m_data_pg_pools)
-		goto badmem;
-	ceph_decode_need(p, end, sizeof(u32)*(n+1), bad);
-	for (i = 0; i < n; i++)
-		m->m_data_pg_pools[i] = ceph_decode_32(p);
-	m->m_cas_pg_pool = ceph_decode_32(p);
-
-	/* ok, we don't care about the rest. */
-	dout("mdsmap_decode success epoch %u\n", m->m_epoch);
-	return m;
-
-badmem:
-	err = -ENOMEM;
-bad:
-	pr_err("corrupt mdsmap\n");
-	print_hex_dump(KERN_DEBUG, "mdsmap: ",
-		       DUMP_PREFIX_OFFSET, 16, 1,
-		       start, end - start, true);
-	ceph_mdsmap_destroy(m);
-	return ERR_PTR(-EINVAL);
-}
-
-void ceph_mdsmap_destroy(struct ceph_mdsmap *m)
-{
-	int i;
-
-	for (i = 0; i < m->m_max_mds; i++)
-		kfree(m->m_info[i].export_targets);
-	kfree(m->m_info);
-	kfree(m->m_data_pg_pools);
-	kfree(m);
-}
diff --git a/ANDROID_3.4.5/fs/ceph/snap.c b/ANDROID_3.4.5/fs/ceph/snap.c
deleted file mode 100644
index f04c0961..00000000
--- a/ANDROID_3.4.5/fs/ceph/snap.c
+++ /dev/null
@@ -1,931 +0,0 @@
-#include <linux/ceph/ceph_debug.h>
-
-#include <linux/sort.h>
-#include <linux/slab.h>
-
-#include "super.h"
-#include "mds_client.h"
-
-#include <linux/ceph/decode.h>
-
-/*
- * Snapshots in ceph are driven in large part by cooperation from the
- * client.  In contrast to local file systems or file servers that
- * implement snapshots at a single point in the system, ceph's
- * distributed access to storage requires clients to help decide
- * whether a write logically occurs before or after a recently created
- * snapshot.
- *
- * This provides a perfect instantanous client-wide snapshot.  Between
- * clients, however, snapshots may appear to be applied at slightly
- * different points in time, depending on delays in delivering the
- * snapshot notification.
- *
- * Snapshots are _not_ file system-wide.  Instead, each snapshot
- * applies to the subdirectory nested beneath some directory.  This
- * effectively divides the hierarchy into multiple "realms," where all
- * of the files contained by each realm share the same set of
- * snapshots.  An individual realm's snap set contains snapshots
- * explicitly created on that realm, as well as any snaps in its
- * parent's snap set _after_ the point at which the parent became it's
- * parent (due to, say, a rename).  Similarly, snaps from prior parents
- * during the time intervals during which they were the parent are included.
- *
- * The client is spared most of this detail, fortunately... it must only
- * maintains a hierarchy of realms reflecting the current parent/child
- * realm relationship, and for each realm has an explicit list of snaps
- * inherited from prior parents.
- *
- * A snap_realm struct is maintained for realms containing every inode
- * with an open cap in the system.  (The needed snap realm information is
- * provided by the MDS whenever a cap is issued, i.e., on open.)  A 'seq'
- * version number is used to ensure that as realm parameters change (new
- * snapshot, new parent, etc.) the client's realm hierarchy is updated.
- *
- * The realm hierarchy drives the generation of a 'snap context' for each
- * realm, which simply lists the resulting set of snaps for the realm.  This
- * is attached to any writes sent to OSDs.
- */
-/*
- * Unfortunately error handling is a bit mixed here.  If we get a snap
- * update, but don't have enough memory to update our realm hierarchy,
- * it's not clear what we can do about it (besides complaining to the
- * console).
- */
-
-
-/*
- * increase ref count for the realm
- *
- * caller must hold snap_rwsem for write.
- */
-void ceph_get_snap_realm(struct ceph_mds_client *mdsc,
-			 struct ceph_snap_realm *realm)
-{
-	dout("get_realm %p %d -> %d\n", realm,
-	     atomic_read(&realm->nref), atomic_read(&realm->nref)+1);
-	/*
-	 * since we _only_ increment realm refs or empty the empty
-	 * list with snap_rwsem held, adjusting the empty list here is
-	 * safe.  we do need to protect against concurrent empty list
-	 * additions, however.
-	 */
-	if (atomic_read(&realm->nref) == 0) {
-		spin_lock(&mdsc->snap_empty_lock);
-		list_del_init(&realm->empty_item);
-		spin_unlock(&mdsc->snap_empty_lock);
-	}
-
-	atomic_inc(&realm->nref);
-}
-
-static void __insert_snap_realm(struct rb_root *root,
-				struct ceph_snap_realm *new)
-{
-	struct rb_node **p = &root->rb_node;
-	struct rb_node *parent = NULL;
-	struct ceph_snap_realm *r = NULL;
-
-	while (*p) {
-		parent = *p;
-		r = rb_entry(parent, struct ceph_snap_realm, node);
-		if (new->ino < r->ino)
-			p = &(*p)->rb_left;
-		else if (new->ino > r->ino)
-			p = &(*p)->rb_right;
-		else
-			BUG();
-	}
-
-	rb_link_node(&new->node, parent, p);
-	rb_insert_color(&new->node, root);
-}
-
-/*
- * create and get the realm rooted at @ino and bump its ref count.
- *
- * caller must hold snap_rwsem for write.
- */
-static struct ceph_snap_realm *ceph_create_snap_realm(
-	struct ceph_mds_client *mdsc,
-	u64 ino)
-{
-	struct ceph_snap_realm *realm;
-
-	realm = kzalloc(sizeof(*realm), GFP_NOFS);
-	if (!realm)
-		return ERR_PTR(-ENOMEM);
-
-	atomic_set(&realm->nref, 0);    /* tree does not take a ref */
-	realm->ino = ino;
-	INIT_LIST_HEAD(&realm->children);
-	INIT_LIST_HEAD(&realm->child_item);
-	INIT_LIST_HEAD(&realm->empty_item);
-	INIT_LIST_HEAD(&realm->dirty_item);
-	INIT_LIST_HEAD(&realm->inodes_with_caps);
-	spin_lock_init(&realm->inodes_with_caps_lock);
-	__insert_snap_realm(&mdsc->snap_realms, realm);
-	dout("create_snap_realm %llx %p\n", realm->ino, realm);
-	return realm;
-}
-
-/*
- * lookup the realm rooted at @ino.
- *
- * caller must hold snap_rwsem for write.
- */
-struct ceph_snap_realm *ceph_lookup_snap_realm(struct ceph_mds_client *mdsc,
-					       u64 ino)
-{
-	struct rb_node *n = mdsc->snap_realms.rb_node;
-	struct ceph_snap_realm *r;
-
-	while (n) {
-		r = rb_entry(n, struct ceph_snap_realm, node);
-		if (ino < r->ino)
-			n = n->rb_left;
-		else if (ino > r->ino)
-			n = n->rb_right;
-		else {
-			dout("lookup_snap_realm %llx %p\n", r->ino, r);
-			return r;
-		}
-	}
-	return NULL;
-}
-
-static void __put_snap_realm(struct ceph_mds_client *mdsc,
-			     struct ceph_snap_realm *realm);
-
-/*
- * called with snap_rwsem (write)
- */
-static void __destroy_snap_realm(struct ceph_mds_client *mdsc,
-				 struct ceph_snap_realm *realm)
-{
-	dout("__destroy_snap_realm %p %llx\n", realm, realm->ino);
-
-	rb_erase(&realm->node, &mdsc->snap_realms);
-
-	if (realm->parent) {
-		list_del_init(&realm->child_item);
-		__put_snap_realm(mdsc, realm->parent);
-	}
-
-	kfree(realm->prior_parent_snaps);
-	kfree(realm->snaps);
-	ceph_put_snap_context(realm->cached_context);
-	kfree(realm);
-}
-
-/*
- * caller holds snap_rwsem (write)
- */
-static void __put_snap_realm(struct ceph_mds_client *mdsc,
-			     struct ceph_snap_realm *realm)
-{
-	dout("__put_snap_realm %llx %p %d -> %d\n", realm->ino, realm,
-	     atomic_read(&realm->nref), atomic_read(&realm->nref)-1);
-	if (atomic_dec_and_test(&realm->nref))
-		__destroy_snap_realm(mdsc, realm);
-}
-
-/*
- * caller needn't hold any locks
- */
-void ceph_put_snap_realm(struct ceph_mds_client *mdsc,
-			 struct ceph_snap_realm *realm)
-{
-	dout("put_snap_realm %llx %p %d -> %d\n", realm->ino, realm,
-	     atomic_read(&realm->nref), atomic_read(&realm->nref)-1);
-	if (!atomic_dec_and_test(&realm->nref))
-		return;
-
-	if (down_write_trylock(&mdsc->snap_rwsem)) {
-		__destroy_snap_realm(mdsc, realm);
-		up_write(&mdsc->snap_rwsem);
-	} else {
-		spin_lock(&mdsc->snap_empty_lock);
-		list_add(&realm->empty_item, &mdsc->snap_empty);
-		spin_unlock(&mdsc->snap_empty_lock);
-	}
-}
-
-/*
- * Clean up any realms whose ref counts have dropped to zero.  Note
- * that this does not include realms who were created but not yet
- * used.
- *
- * Called under snap_rwsem (write)
- */
-static void __cleanup_empty_realms(struct ceph_mds_client *mdsc)
-{
-	struct ceph_snap_realm *realm;
-
-	spin_lock(&mdsc->snap_empty_lock);
-	while (!list_empty(&mdsc->snap_empty)) {
-		realm = list_first_entry(&mdsc->snap_empty,
-				   struct ceph_snap_realm, empty_item);
-		list_del(&realm->empty_item);
-		spin_unlock(&mdsc->snap_empty_lock);
-		__destroy_snap_realm(mdsc, realm);
-		spin_lock(&mdsc->snap_empty_lock);
-	}
-	spin_unlock(&mdsc->snap_empty_lock);
-}
-
-void ceph_cleanup_empty_realms(struct ceph_mds_client *mdsc)
-{
-	down_write(&mdsc->snap_rwsem);
-	__cleanup_empty_realms(mdsc);
-	up_write(&mdsc->snap_rwsem);
-}
-
-/*
- * adjust the parent realm of a given @realm.  adjust child list, and parent
- * pointers, and ref counts appropriately.
- *
- * return true if parent was changed, 0 if unchanged, <0 on error.
- *
- * caller must hold snap_rwsem for write.
- */
-static int adjust_snap_realm_parent(struct ceph_mds_client *mdsc,
-				    struct ceph_snap_realm *realm,
-				    u64 parentino)
-{
-	struct ceph_snap_realm *parent;
-
-	if (realm->parent_ino == parentino)
-		return 0;
-
-	parent = ceph_lookup_snap_realm(mdsc, parentino);
-	if (!parent) {
-		parent = ceph_create_snap_realm(mdsc, parentino);
-		if (IS_ERR(parent))
-			return PTR_ERR(parent);
-	}
-	dout("adjust_snap_realm_parent %llx %p: %llx %p -> %llx %p\n",
-	     realm->ino, realm, realm->parent_ino, realm->parent,
-	     parentino, parent);
-	if (realm->parent) {
-		list_del_init(&realm->child_item);
-		ceph_put_snap_realm(mdsc, realm->parent);
-	}
-	realm->parent_ino = parentino;
-	realm->parent = parent;
-	ceph_get_snap_realm(mdsc, parent);
-	list_add(&realm->child_item, &parent->children);
-	return 1;
-}
-
-
-static int cmpu64_rev(const void *a, const void *b)
-{
-	if (*(u64 *)a < *(u64 *)b)
-		return 1;
-	if (*(u64 *)a > *(u64 *)b)
-		return -1;
-	return 0;
-}
-
-/*
- * build the snap context for a given realm.
- */
-static int build_snap_context(struct ceph_snap_realm *realm)
-{
-	struct ceph_snap_realm *parent = realm->parent;
-	struct ceph_snap_context *snapc;
-	int err = 0;
-	int i;
-	int num = realm->num_prior_parent_snaps + realm->num_snaps;
-
-	/*
-	 * build parent context, if it hasn't been built.
-	 * conservatively estimate that all parent snaps might be
-	 * included by us.
-	 */
-	if (parent) {
-		if (!parent->cached_context) {
-			err = build_snap_context(parent);
-			if (err)
-				goto fail;
-		}
-		num += parent->cached_context->num_snaps;
-	}
-
-	/* do i actually need to update?  not if my context seq
-	   matches realm seq, and my parents' does to.  (this works
-	   because we rebuild_snap_realms() works _downward_ in
-	   hierarchy after each update.) */
-	if (realm->cached_context &&
-	    realm->cached_context->seq == realm->seq &&
-	    (!parent ||
-	     realm->cached_context->seq >= parent->cached_context->seq)) {
-		dout("build_snap_context %llx %p: %p seq %lld (%d snaps)"
-		     " (unchanged)\n",
-		     realm->ino, realm, realm->cached_context,
-		     realm->cached_context->seq,
-		     realm->cached_context->num_snaps);
-		return 0;
-	}
-
-	/* alloc new snap context */
-	err = -ENOMEM;
-	if (num > (ULONG_MAX - sizeof(*snapc)) / sizeof(u64))
-		goto fail;
-	snapc = kzalloc(sizeof(*snapc) + num*sizeof(u64), GFP_NOFS);
-	if (!snapc)
-		goto fail;
-	atomic_set(&snapc->nref, 1);
-
-	/* build (reverse sorted) snap vector */
-	num = 0;
-	snapc->seq = realm->seq;
-	if (parent) {
-		/* include any of parent's snaps occurring _after_ my
-		   parent became my parent */
-		for (i = 0; i < parent->cached_context->num_snaps; i++)
-			if (parent->cached_context->snaps[i] >=
-			    realm->parent_since)
-				snapc->snaps[num++] =
-					parent->cached_context->snaps[i];
-		if (parent->cached_context->seq > snapc->seq)
-			snapc->seq = parent->cached_context->seq;
-	}
-	memcpy(snapc->snaps + num, realm->snaps,
-	       sizeof(u64)*realm->num_snaps);
-	num += realm->num_snaps;
-	memcpy(snapc->snaps + num, realm->prior_parent_snaps,
-	       sizeof(u64)*realm->num_prior_parent_snaps);
-	num += realm->num_prior_parent_snaps;
-
-	sort(snapc->snaps, num, sizeof(u64), cmpu64_rev, NULL);
-	snapc->num_snaps = num;
-	dout("build_snap_context %llx %p: %p seq %lld (%d snaps)\n",
-	     realm->ino, realm, snapc, snapc->seq, snapc->num_snaps);
-
-	if (realm->cached_context)
-		ceph_put_snap_context(realm->cached_context);
-	realm->cached_context = snapc;
-	return 0;
-
-fail:
-	/*
-	 * if we fail, clear old (incorrect) cached_context... hopefully
-	 * we'll have better luck building it later
-	 */
-	if (realm->cached_context) {
-		ceph_put_snap_context(realm->cached_context);
-		realm->cached_context = NULL;
-	}
-	pr_err("build_snap_context %llx %p fail %d\n", realm->ino,
-	       realm, err);
-	return err;
-}
-
-/*
- * rebuild snap context for the given realm and all of its children.
- */
-static void rebuild_snap_realms(struct ceph_snap_realm *realm)
-{
-	struct ceph_snap_realm *child;
-
-	dout("rebuild_snap_realms %llx %p\n", realm->ino, realm);
-	build_snap_context(realm);
-
-	list_for_each_entry(child, &realm->children, child_item)
-		rebuild_snap_realms(child);
-}
-
-
-/*
- * helper to allocate and decode an array of snapids.  free prior
- * instance, if any.
- */
-static int dup_array(u64 **dst, __le64 *src, int num)
-{
-	int i;
-
-	kfree(*dst);
-	if (num) {
-		*dst = kcalloc(num, sizeof(u64), GFP_NOFS);
-		if (!*dst)
-			return -ENOMEM;
-		for (i = 0; i < num; i++)
-			(*dst)[i] = get_unaligned_le64(src + i);
-	} else {
-		*dst = NULL;
-	}
-	return 0;
-}
-
-
-/*
- * When a snapshot is applied, the size/mtime inode metadata is queued
- * in a ceph_cap_snap (one for each snapshot) until writeback
- * completes and the metadata can be flushed back to the MDS.
- *
- * However, if a (sync) write is currently in-progress when we apply
- * the snapshot, we have to wait until the write succeeds or fails
- * (and a final size/mtime is known).  In this case the
- * cap_snap->writing = 1, and is said to be "pending."  When the write
- * finishes, we __ceph_finish_cap_snap().
- *
- * Caller must hold snap_rwsem for read (i.e., the realm topology won't
- * change).
- */
-void ceph_queue_cap_snap(struct ceph_inode_info *ci)
-{
-	struct inode *inode = &ci->vfs_inode;
-	struct ceph_cap_snap *capsnap;
-	int used, dirty;
-
-	capsnap = kzalloc(sizeof(*capsnap), GFP_NOFS);
-	if (!capsnap) {
-		pr_err("ENOMEM allocating ceph_cap_snap on %p\n", inode);
-		return;
-	}
-
-	spin_lock(&ci->i_ceph_lock);
-	used = __ceph_caps_used(ci);
-	dirty = __ceph_caps_dirty(ci);
-
-	/*
-	 * If there is a write in progress, treat that as a dirty Fw,
-	 * even though it hasn't completed yet; by the time we finish
-	 * up this capsnap it will be.
-	 */
-	if (used & CEPH_CAP_FILE_WR)
-		dirty |= CEPH_CAP_FILE_WR;
-
-	if (__ceph_have_pending_cap_snap(ci)) {
-		/* there is no point in queuing multiple "pending" cap_snaps,
-		   as no new writes are allowed to start when pending, so any
-		   writes in progress now were started before the previous
-		   cap_snap.  lucky us. */
-		dout("queue_cap_snap %p already pending\n", inode);
-		kfree(capsnap);
-	} else if (dirty & (CEPH_CAP_AUTH_EXCL|CEPH_CAP_XATTR_EXCL|
-			    CEPH_CAP_FILE_EXCL|CEPH_CAP_FILE_WR)) {
-		struct ceph_snap_context *snapc = ci->i_head_snapc;
-
-		/*
-		 * if we are a sync write, we may need to go to the snaprealm
-		 * to get the current snapc.
-		 */
-		if (!snapc)
-			snapc = ci->i_snap_realm->cached_context;
-
-		dout("queue_cap_snap %p cap_snap %p queuing under %p %s\n",
-		     inode, capsnap, snapc, ceph_cap_string(dirty));
-		ihold(inode);
-
-		atomic_set(&capsnap->nref, 1);
-		capsnap->ci = ci;
-		INIT_LIST_HEAD(&capsnap->ci_item);
-		INIT_LIST_HEAD(&capsnap->flushing_item);
-
-		capsnap->follows = snapc->seq;
-		capsnap->issued = __ceph_caps_issued(ci, NULL);
-		capsnap->dirty = dirty;
-
-		capsnap->mode = inode->i_mode;
-		capsnap->uid = inode->i_uid;
-		capsnap->gid = inode->i_gid;
-
-		if (dirty & CEPH_CAP_XATTR_EXCL) {
-			__ceph_build_xattrs_blob(ci);
-			capsnap->xattr_blob =
-				ceph_buffer_get(ci->i_xattrs.blob);
-			capsnap->xattr_version = ci->i_xattrs.version;
-		} else {
-			capsnap->xattr_blob = NULL;
-			capsnap->xattr_version = 0;
-		}
-
-		/* dirty page count moved from _head to this cap_snap;
-		   all subsequent writes page dirties occur _after_ this
-		   snapshot. */
-		capsnap->dirty_pages = ci->i_wrbuffer_ref_head;
-		ci->i_wrbuffer_ref_head = 0;
-		capsnap->context = snapc;
-		ci->i_head_snapc =
-			ceph_get_snap_context(ci->i_snap_realm->cached_context);
-		dout(" new snapc is %p\n", ci->i_head_snapc);
-		list_add_tail(&capsnap->ci_item, &ci->i_cap_snaps);
-
-		if (used & CEPH_CAP_FILE_WR) {
-			dout("queue_cap_snap %p cap_snap %p snapc %p"
-			     " seq %llu used WR, now pending\n", inode,
-			     capsnap, snapc, snapc->seq);
-			capsnap->writing = 1;
-		} else {
-			/* note mtime, size NOW. */
-			__ceph_finish_cap_snap(ci, capsnap);
-		}
-	} else {
-		dout("queue_cap_snap %p nothing dirty|writing\n", inode);
-		kfree(capsnap);
-	}
-
-	spin_unlock(&ci->i_ceph_lock);
-}
-
-/*
- * Finalize the size, mtime for a cap_snap.. that is, settle on final values
- * to be used for the snapshot, to be flushed back to the mds.
- *
- * If capsnap can now be flushed, add to snap_flush list, and return 1.
- *
- * Caller must hold i_ceph_lock.
- */
-int __ceph_finish_cap_snap(struct ceph_inode_info *ci,
-			    struct ceph_cap_snap *capsnap)
-{
-	struct inode *inode = &ci->vfs_inode;
-	struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
-
-	BUG_ON(capsnap->writing);
-	capsnap->size = inode->i_size;
-	capsnap->mtime = inode->i_mtime;
-	capsnap->atime = inode->i_atime;
-	capsnap->ctime = inode->i_ctime;
-	capsnap->time_warp_seq = ci->i_time_warp_seq;
-	if (capsnap->dirty_pages) {
-		dout("finish_cap_snap %p cap_snap %p snapc %p %llu %s s=%llu "
-		     "still has %d dirty pages\n", inode, capsnap,
-		     capsnap->context, capsnap->context->seq,
-		     ceph_cap_string(capsnap->dirty), capsnap->size,
-		     capsnap->dirty_pages);
-		return 0;
-	}
-	dout("finish_cap_snap %p cap_snap %p snapc %p %llu %s s=%llu\n",
-	     inode, capsnap, capsnap->context,
-	     capsnap->context->seq, ceph_cap_string(capsnap->dirty),
-	     capsnap->size);
-
-	spin_lock(&mdsc->snap_flush_lock);
-	list_add_tail(&ci->i_snap_flush_item, &mdsc->snap_flush_list);
-	spin_unlock(&mdsc->snap_flush_lock);
-	return 1;  /* caller may want to ceph_flush_snaps */
-}
-
-/*
- * Queue cap_snaps for snap writeback for this realm and its children.
- * Called under snap_rwsem, so realm topology won't change.
- */
-static void queue_realm_cap_snaps(struct ceph_snap_realm *realm)
-{
-	struct ceph_inode_info *ci;
-	struct inode *lastinode = NULL;
-	struct ceph_snap_realm *child;
-
-	dout("queue_realm_cap_snaps %p %llx inodes\n", realm, realm->ino);
-
-	spin_lock(&realm->inodes_with_caps_lock);
-	list_for_each_entry(ci, &realm->inodes_with_caps,
-			    i_snap_realm_item) {
-		struct inode *inode = igrab(&ci->vfs_inode);
-		if (!inode)
-			continue;
-		spin_unlock(&realm->inodes_with_caps_lock);
-		if (lastinode)
-			iput(lastinode);
-		lastinode = inode;
-		ceph_queue_cap_snap(ci);
-		spin_lock(&realm->inodes_with_caps_lock);
-	}
-	spin_unlock(&realm->inodes_with_caps_lock);
-	if (lastinode)
-		iput(lastinode);
-
-	list_for_each_entry(child, &realm->children, child_item) {
-		dout("queue_realm_cap_snaps %p %llx queue child %p %llx\n",
-		     realm, realm->ino, child, child->ino);
-		list_del_init(&child->dirty_item);
-		list_add(&child->dirty_item, &realm->dirty_item);
-	}
-
-	list_del_init(&realm->dirty_item);
-	dout("queue_realm_cap_snaps %p %llx done\n", realm, realm->ino);
-}
-
-/*
- * Parse and apply a snapblob "snap trace" from the MDS.  This specifies
- * the snap realm parameters from a given realm and all of its ancestors,
- * up to the root.
- *
- * Caller must hold snap_rwsem for write.
- */
-int ceph_update_snap_trace(struct ceph_mds_client *mdsc,
-			   void *p, void *e, bool deletion)
-{
-	struct ceph_mds_snap_realm *ri;    /* encoded */
-	__le64 *snaps;                     /* encoded */
-	__le64 *prior_parent_snaps;        /* encoded */
-	struct ceph_snap_realm *realm;
-	int invalidate = 0;
-	int err = -ENOMEM;
-	LIST_HEAD(dirty_realms);
-
-	dout("update_snap_trace deletion=%d\n", deletion);
-more:
-	ceph_decode_need(&p, e, sizeof(*ri), bad);
-	ri = p;
-	p += sizeof(*ri);
-	ceph_decode_need(&p, e, sizeof(u64)*(le32_to_cpu(ri->num_snaps) +
-			    le32_to_cpu(ri->num_prior_parent_snaps)), bad);
-	snaps = p;
-	p += sizeof(u64) * le32_to_cpu(ri->num_snaps);
-	prior_parent_snaps = p;
-	p += sizeof(u64) * le32_to_cpu(ri->num_prior_parent_snaps);
-
-	realm = ceph_lookup_snap_realm(mdsc, le64_to_cpu(ri->ino));
-	if (!realm) {
-		realm = ceph_create_snap_realm(mdsc, le64_to_cpu(ri->ino));
-		if (IS_ERR(realm)) {
-			err = PTR_ERR(realm);
-			goto fail;
-		}
-	}
-
-	/* ensure the parent is correct */
-	err = adjust_snap_realm_parent(mdsc, realm, le64_to_cpu(ri->parent));
-	if (err < 0)
-		goto fail;
-	invalidate += err;
-
-	if (le64_to_cpu(ri->seq) > realm->seq) {
-		dout("update_snap_trace updating %llx %p %lld -> %lld\n",
-		     realm->ino, realm, realm->seq, le64_to_cpu(ri->seq));
-		/* update realm parameters, snap lists */
-		realm->seq = le64_to_cpu(ri->seq);
-		realm->created = le64_to_cpu(ri->created);
-		realm->parent_since = le64_to_cpu(ri->parent_since);
-
-		realm->num_snaps = le32_to_cpu(ri->num_snaps);
-		err = dup_array(&realm->snaps, snaps, realm->num_snaps);
-		if (err < 0)
-			goto fail;
-
-		realm->num_prior_parent_snaps =
-			le32_to_cpu(ri->num_prior_parent_snaps);
-		err = dup_array(&realm->prior_parent_snaps, prior_parent_snaps,
-				realm->num_prior_parent_snaps);
-		if (err < 0)
-			goto fail;
-
-		/* queue realm for cap_snap creation */
-		list_add(&realm->dirty_item, &dirty_realms);
-
-		invalidate = 1;
-	} else if (!realm->cached_context) {
-		dout("update_snap_trace %llx %p seq %lld new\n",
-		     realm->ino, realm, realm->seq);
-		invalidate = 1;
-	} else {
-		dout("update_snap_trace %llx %p seq %lld unchanged\n",
-		     realm->ino, realm, realm->seq);
-	}
-
-	dout("done with %llx %p, invalidated=%d, %p %p\n", realm->ino,
-	     realm, invalidate, p, e);
-
-	if (p < e)
-		goto more;
-
-	/* invalidate when we reach the _end_ (root) of the trace */
-	if (invalidate)
-		rebuild_snap_realms(realm);
-
-	/*
-	 * queue cap snaps _after_ we've built the new snap contexts,
-	 * so that i_head_snapc can be set appropriately.
-	 */
-	while (!list_empty(&dirty_realms)) {
-		realm = list_first_entry(&dirty_realms, struct ceph_snap_realm,
-					 dirty_item);
-		queue_realm_cap_snaps(realm);
-	}
-
-	__cleanup_empty_realms(mdsc);
-	return 0;
-
-bad:
-	err = -EINVAL;
-fail:
-	pr_err("update_snap_trace error %d\n", err);
-	return err;
-}
-
-
-/*
- * Send any cap_snaps that are queued for flush.  Try to carry
- * s_mutex across multiple snap flushes to avoid locking overhead.
- *
- * Caller holds no locks.
- */
-static void flush_snaps(struct ceph_mds_client *mdsc)
-{
-	struct ceph_inode_info *ci;
-	struct inode *inode;
-	struct ceph_mds_session *session = NULL;
-
-	dout("flush_snaps\n");
-	spin_lock(&mdsc->snap_flush_lock);
-	while (!list_empty(&mdsc->snap_flush_list)) {
-		ci = list_first_entry(&mdsc->snap_flush_list,
-				struct ceph_inode_info, i_snap_flush_item);
-		inode = &ci->vfs_inode;
-		ihold(inode);
-		spin_unlock(&mdsc->snap_flush_lock);
-		spin_lock(&ci->i_ceph_lock);
-		__ceph_flush_snaps(ci, &session, 0);
-		spin_unlock(&ci->i_ceph_lock);
-		iput(inode);
-		spin_lock(&mdsc->snap_flush_lock);
-	}
-	spin_unlock(&mdsc->snap_flush_lock);
-
-	if (session) {
-		mutex_unlock(&session->s_mutex);
-		ceph_put_mds_session(session);
-	}
-	dout("flush_snaps done\n");
-}
-
-
-/*
- * Handle a snap notification from the MDS.
- *
- * This can take two basic forms: the simplest is just a snap creation
- * or deletion notification on an existing realm.  This should update the
- * realm and its children.
- *
- * The more difficult case is realm creation, due to snap creation at a
- * new point in the file hierarchy, or due to a rename that moves a file or
- * directory into another realm.
- */
-void ceph_handle_snap(struct ceph_mds_client *mdsc,
-		      struct ceph_mds_session *session,
-		      struct ceph_msg *msg)
-{
-	struct super_block *sb = mdsc->fsc->sb;
-	int mds = session->s_mds;
-	u64 split;
-	int op;
-	int trace_len;
-	struct ceph_snap_realm *realm = NULL;
-	void *p = msg->front.iov_base;
-	void *e = p + msg->front.iov_len;
-	struct ceph_mds_snap_head *h;
-	int num_split_inos, num_split_realms;
-	__le64 *split_inos = NULL, *split_realms = NULL;
-	int i;
-	int locked_rwsem = 0;
-
-	/* decode */
-	if (msg->front.iov_len < sizeof(*h))
-		goto bad;
-	h = p;
-	op = le32_to_cpu(h->op);
-	split = le64_to_cpu(h->split);   /* non-zero if we are splitting an
-					  * existing realm */
-	num_split_inos = le32_to_cpu(h->num_split_inos);
-	num_split_realms = le32_to_cpu(h->num_split_realms);
-	trace_len = le32_to_cpu(h->trace_len);
-	p += sizeof(*h);
-
-	dout("handle_snap from mds%d op %s split %llx tracelen %d\n", mds,
-	     ceph_snap_op_name(op), split, trace_len);
-
-	mutex_lock(&session->s_mutex);
-	session->s_seq++;
-	mutex_unlock(&session->s_mutex);
-
-	down_write(&mdsc->snap_rwsem);
-	locked_rwsem = 1;
-
-	if (op == CEPH_SNAP_OP_SPLIT) {
-		struct ceph_mds_snap_realm *ri;
-
-		/*
-		 * A "split" breaks part of an existing realm off into
-		 * a new realm.  The MDS provides a list of inodes
-		 * (with caps) and child realms that belong to the new
-		 * child.
-		 */
-		split_inos = p;
-		p += sizeof(u64) * num_split_inos;
-		split_realms = p;
-		p += sizeof(u64) * num_split_realms;
-		ceph_decode_need(&p, e, sizeof(*ri), bad);
-		/* we will peek at realm info here, but will _not_
-		 * advance p, as the realm update will occur below in
-		 * ceph_update_snap_trace. */
-		ri = p;
-
-		realm = ceph_lookup_snap_realm(mdsc, split);
-		if (!realm) {
-			realm = ceph_create_snap_realm(mdsc, split);
-			if (IS_ERR(realm))
-				goto out;
-		}
-		ceph_get_snap_realm(mdsc, realm);
-
-		dout("splitting snap_realm %llx %p\n", realm->ino, realm);
-		for (i = 0; i < num_split_inos; i++) {
-			struct ceph_vino vino = {
-				.ino = le64_to_cpu(split_inos[i]),
-				.snap = CEPH_NOSNAP,
-			};
-			struct inode *inode = ceph_find_inode(sb, vino);
-			struct ceph_inode_info *ci;
-			struct ceph_snap_realm *oldrealm;
-
-			if (!inode)
-				continue;
-			ci = ceph_inode(inode);
-
-			spin_lock(&ci->i_ceph_lock);
-			if (!ci->i_snap_realm)
-				goto skip_inode;
-			/*
-			 * If this inode belongs to a realm that was
-			 * created after our new realm, we experienced
-			 * a race (due to another split notifications
-			 * arriving from a different MDS).  So skip
-			 * this inode.
-			 */
-			if (ci->i_snap_realm->created >
-			    le64_to_cpu(ri->created)) {
-				dout(" leaving %p in newer realm %llx %p\n",
-				     inode, ci->i_snap_realm->ino,
-				     ci->i_snap_realm);
-				goto skip_inode;
-			}
-			dout(" will move %p to split realm %llx %p\n",
-			     inode, realm->ino, realm);
-			/*
-			 * Move the inode to the new realm
-			 */
-			spin_lock(&realm->inodes_with_caps_lock);
-			list_del_init(&ci->i_snap_realm_item);
-			list_add(&ci->i_snap_realm_item,
-				 &realm->inodes_with_caps);
-			oldrealm = ci->i_snap_realm;
-			ci->i_snap_realm = realm;
-			spin_unlock(&realm->inodes_with_caps_lock);
-			spin_unlock(&ci->i_ceph_lock);
-
-			ceph_get_snap_realm(mdsc, realm);
-			ceph_put_snap_realm(mdsc, oldrealm);
-
-			iput(inode);
-			continue;
-
-skip_inode:
-			spin_unlock(&ci->i_ceph_lock);
-			iput(inode);
-		}
-
-		/* we may have taken some of the old realm's children. */
-		for (i = 0; i < num_split_realms; i++) {
-			struct ceph_snap_realm *child =
-				ceph_lookup_snap_realm(mdsc,
-					   le64_to_cpu(split_realms[i]));
-			if (!child)
-				continue;
-			adjust_snap_realm_parent(mdsc, child, realm->ino);
-		}
-	}
-
-	/*
-	 * update using the provided snap trace. if we are deleting a
-	 * snap, we can avoid queueing cap_snaps.
-	 */
-	ceph_update_snap_trace(mdsc, p, e,
-			       op == CEPH_SNAP_OP_DESTROY);
-
-	if (op == CEPH_SNAP_OP_SPLIT)
-		/* we took a reference when we created the realm, above */
-		ceph_put_snap_realm(mdsc, realm);
-
-	__cleanup_empty_realms(mdsc);
-
-	up_write(&mdsc->snap_rwsem);
-
-	flush_snaps(mdsc);
-	return;
-
-bad:
-	pr_err("corrupt snap message from mds%d\n", mds);
-	ceph_msg_dump(msg);
-out:
-	if (locked_rwsem)
-		up_write(&mdsc->snap_rwsem);
-	return;
-}
-
-
-
diff --git a/ANDROID_3.4.5/fs/ceph/strings.c b/ANDROID_3.4.5/fs/ceph/strings.c
deleted file mode 100644
index cd5097d7..00000000
--- a/ANDROID_3.4.5/fs/ceph/strings.c
+++ /dev/null
@@ -1,117 +0,0 @@
-/*
- * Ceph fs string constants
- */
-#include <linux/module.h>
-#include <linux/ceph/types.h>
-
-
-const char *ceph_mds_state_name(int s)
-{
-	switch (s) {
-		/* down and out */
-	case CEPH_MDS_STATE_DNE:        return "down:dne";
-	case CEPH_MDS_STATE_STOPPED:    return "down:stopped";
-		/* up and out */
-	case CEPH_MDS_STATE_BOOT:       return "up:boot";
-	case CEPH_MDS_STATE_STANDBY:    return "up:standby";
-	case CEPH_MDS_STATE_STANDBY_REPLAY:    return "up:standby-replay";
-	case CEPH_MDS_STATE_CREATING:   return "up:creating";
-	case CEPH_MDS_STATE_STARTING:   return "up:starting";
-		/* up and in */
-	case CEPH_MDS_STATE_REPLAY:     return "up:replay";
-	case CEPH_MDS_STATE_RESOLVE:    return "up:resolve";
-	case CEPH_MDS_STATE_RECONNECT:  return "up:reconnect";
-	case CEPH_MDS_STATE_REJOIN:     return "up:rejoin";
-	case CEPH_MDS_STATE_CLIENTREPLAY: return "up:clientreplay";
-	case CEPH_MDS_STATE_ACTIVE:     return "up:active";
-	case CEPH_MDS_STATE_STOPPING:   return "up:stopping";
-	}
-	return "???";
-}
-
-const char *ceph_session_op_name(int op)
-{
-	switch (op) {
-	case CEPH_SESSION_REQUEST_OPEN: return "request_open";
-	case CEPH_SESSION_OPEN: return "open";
-	case CEPH_SESSION_REQUEST_CLOSE: return "request_close";
-	case CEPH_SESSION_CLOSE: return "close";
-	case CEPH_SESSION_REQUEST_RENEWCAPS: return "request_renewcaps";
-	case CEPH_SESSION_RENEWCAPS: return "renewcaps";
-	case CEPH_SESSION_STALE: return "stale";
-	case CEPH_SESSION_RECALL_STATE: return "recall_state";
-	}
-	return "???";
-}
-
-const char *ceph_mds_op_name(int op)
-{
-	switch (op) {
-	case CEPH_MDS_OP_LOOKUP:  return "lookup";
-	case CEPH_MDS_OP_LOOKUPHASH:  return "lookuphash";
-	case CEPH_MDS_OP_LOOKUPPARENT:  return "lookupparent";
-	case CEPH_MDS_OP_GETATTR:  return "getattr";
-	case CEPH_MDS_OP_SETXATTR: return "setxattr";
-	case CEPH_MDS_OP_SETATTR: return "setattr";
-	case CEPH_MDS_OP_RMXATTR: return "rmxattr";
-	case CEPH_MDS_OP_READDIR: return "readdir";
-	case CEPH_MDS_OP_MKNOD: return "mknod";
-	case CEPH_MDS_OP_LINK: return "link";
-	case CEPH_MDS_OP_UNLINK: return "unlink";
-	case CEPH_MDS_OP_RENAME: return "rename";
-	case CEPH_MDS_OP_MKDIR: return "mkdir";
-	case CEPH_MDS_OP_RMDIR: return "rmdir";
-	case CEPH_MDS_OP_SYMLINK: return "symlink";
-	case CEPH_MDS_OP_CREATE: return "create";
-	case CEPH_MDS_OP_OPEN: return "open";
-	case CEPH_MDS_OP_LOOKUPSNAP: return "lookupsnap";
-	case CEPH_MDS_OP_LSSNAP: return "lssnap";
-	case CEPH_MDS_OP_MKSNAP: return "mksnap";
-	case CEPH_MDS_OP_RMSNAP: return "rmsnap";
-	case CEPH_MDS_OP_SETFILELOCK: return "setfilelock";
-	case CEPH_MDS_OP_GETFILELOCK: return "getfilelock";
-	}
-	return "???";
-}
-
-const char *ceph_cap_op_name(int op)
-{
-	switch (op) {
-	case CEPH_CAP_OP_GRANT: return "grant";
-	case CEPH_CAP_OP_REVOKE: return "revoke";
-	case CEPH_CAP_OP_TRUNC: return "trunc";
-	case CEPH_CAP_OP_EXPORT: return "export";
-	case CEPH_CAP_OP_IMPORT: return "import";
-	case CEPH_CAP_OP_UPDATE: return "update";
-	case CEPH_CAP_OP_DROP: return "drop";
-	case CEPH_CAP_OP_FLUSH: return "flush";
-	case CEPH_CAP_OP_FLUSH_ACK: return "flush_ack";
-	case CEPH_CAP_OP_FLUSHSNAP: return "flushsnap";
-	case CEPH_CAP_OP_FLUSHSNAP_ACK: return "flushsnap_ack";
-	case CEPH_CAP_OP_RELEASE: return "release";
-	case CEPH_CAP_OP_RENEW: return "renew";
-	}
-	return "???";
-}
-
-const char *ceph_lease_op_name(int o)
-{
-	switch (o) {
-	case CEPH_MDS_LEASE_REVOKE: return "revoke";
-	case CEPH_MDS_LEASE_RELEASE: return "release";
-	case CEPH_MDS_LEASE_RENEW: return "renew";
-	case CEPH_MDS_LEASE_REVOKE_ACK: return "revoke_ack";
-	}
-	return "???";
-}
-
-const char *ceph_snap_op_name(int o)
-{
-	switch (o) {
-	case CEPH_SNAP_OP_UPDATE: return "update";
-	case CEPH_SNAP_OP_CREATE: return "create";
-	case CEPH_SNAP_OP_DESTROY: return "destroy";
-	case CEPH_SNAP_OP_SPLIT: return "split";
-	}
-	return "???";
-}
diff --git a/ANDROID_3.4.5/fs/ceph/super.c b/ANDROID_3.4.5/fs/ceph/super.c
deleted file mode 100644
index 1e67dd73..00000000
--- a/ANDROID_3.4.5/fs/ceph/super.c
+++ /dev/null
@@ -1,972 +0,0 @@
-
-#include <linux/ceph/ceph_debug.h>
-
-#include <linux/backing-dev.h>
-#include <linux/ctype.h>
-#include <linux/fs.h>
-#include <linux/inet.h>
-#include <linux/in6.h>
-#include <linux/module.h>
-#include <linux/mount.h>
-#include <linux/parser.h>
-#include <linux/sched.h>
-#include <linux/seq_file.h>
-#include <linux/slab.h>
-#include <linux/statfs.h>
-#include <linux/string.h>
-
-#include "super.h"
-#include "mds_client.h"
-
-#include <linux/ceph/decode.h>
-#include <linux/ceph/mon_client.h>
-#include <linux/ceph/auth.h>
-#include <linux/ceph/debugfs.h>
-
-/*
- * Ceph superblock operations
- *
- * Handle the basics of mounting, unmounting.
- */
-
-/*
- * super ops
- */
-static void ceph_put_super(struct super_block *s)
-{
-	struct ceph_fs_client *fsc = ceph_sb_to_client(s);
-
-	dout("put_super\n");
-	ceph_mdsc_close_sessions(fsc->mdsc);
-
-	/*
-	 * ensure we release the bdi before put_anon_super releases
-	 * the device name.
-	 */
-	if (s->s_bdi == &fsc->backing_dev_info) {
-		bdi_unregister(&fsc->backing_dev_info);
-		s->s_bdi = NULL;
-	}
-
-	return;
-}
-
-static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf)
-{
-	struct ceph_fs_client *fsc = ceph_inode_to_client(dentry->d_inode);
-	struct ceph_monmap *monmap = fsc->client->monc.monmap;
-	struct ceph_statfs st;
-	u64 fsid;
-	int err;
-
-	dout("statfs\n");
-	err = ceph_monc_do_statfs(&fsc->client->monc, &st);
-	if (err < 0)
-		return err;
-
-	/* fill in kstatfs */
-	buf->f_type = CEPH_SUPER_MAGIC;  /* ?? */
-
-	/*
-	 * express utilization in terms of large blocks to avoid
-	 * overflow on 32-bit machines.
-	 */
-	buf->f_bsize = 1 << CEPH_BLOCK_SHIFT;
-	buf->f_blocks = le64_to_cpu(st.kb) >> (CEPH_BLOCK_SHIFT-10);
-	buf->f_bfree = le64_to_cpu(st.kb_avail) >> (CEPH_BLOCK_SHIFT-10);
-	buf->f_bavail = le64_to_cpu(st.kb_avail) >> (CEPH_BLOCK_SHIFT-10);
-
-	buf->f_files = le64_to_cpu(st.num_objects);
-	buf->f_ffree = -1;
-	buf->f_namelen = NAME_MAX;
-	buf->f_frsize = PAGE_CACHE_SIZE;
-
-	/* leave fsid little-endian, regardless of host endianness */
-	fsid = *(u64 *)(&monmap->fsid) ^ *((u64 *)&monmap->fsid + 1);
-	buf->f_fsid.val[0] = fsid & 0xffffffff;
-	buf->f_fsid.val[1] = fsid >> 32;
-
-	return 0;
-}
-
-
-static int ceph_sync_fs(struct super_block *sb, int wait)
-{
-	struct ceph_fs_client *fsc = ceph_sb_to_client(sb);
-
-	if (!wait) {
-		dout("sync_fs (non-blocking)\n");
-		ceph_flush_dirty_caps(fsc->mdsc);
-		dout("sync_fs (non-blocking) done\n");
-		return 0;
-	}
-
-	dout("sync_fs (blocking)\n");
-	ceph_osdc_sync(&fsc->client->osdc);
-	ceph_mdsc_sync(fsc->mdsc);
-	dout("sync_fs (blocking) done\n");
-	return 0;
-}
-
-/*
- * mount options
- */
-enum {
-	Opt_wsize,
-	Opt_rsize,
-	Opt_rasize,
-	Opt_caps_wanted_delay_min,
-	Opt_caps_wanted_delay_max,
-	Opt_cap_release_safety,
-	Opt_readdir_max_entries,
-	Opt_readdir_max_bytes,
-	Opt_congestion_kb,
-	Opt_last_int,
-	/* int args above */
-	Opt_snapdirname,
-	Opt_last_string,
-	/* string args above */
-	Opt_dirstat,
-	Opt_nodirstat,
-	Opt_rbytes,
-	Opt_norbytes,
-	Opt_asyncreaddir,
-	Opt_noasyncreaddir,
-	Opt_dcache,
-	Opt_nodcache,
-	Opt_ino32,
-	Opt_noino32,
-};
-
-static match_table_t fsopt_tokens = {
-	{Opt_wsize, "wsize=%d"},
-	{Opt_rsize, "rsize=%d"},
-	{Opt_rasize, "rasize=%d"},
-	{Opt_caps_wanted_delay_min, "caps_wanted_delay_min=%d"},
-	{Opt_caps_wanted_delay_max, "caps_wanted_delay_max=%d"},
-	{Opt_cap_release_safety, "cap_release_safety=%d"},
-	{Opt_readdir_max_entries, "readdir_max_entries=%d"},
-	{Opt_readdir_max_bytes, "readdir_max_bytes=%d"},
-	{Opt_congestion_kb, "write_congestion_kb=%d"},
-	/* int args above */
-	{Opt_snapdirname, "snapdirname=%s"},
-	/* string args above */
-	{Opt_dirstat, "dirstat"},
-	{Opt_nodirstat, "nodirstat"},
-	{Opt_rbytes, "rbytes"},
-	{Opt_norbytes, "norbytes"},
-	{Opt_asyncreaddir, "asyncreaddir"},
-	{Opt_noasyncreaddir, "noasyncreaddir"},
-	{Opt_dcache, "dcache"},
-	{Opt_nodcache, "nodcache"},
-	{Opt_ino32, "ino32"},
-	{Opt_noino32, "noino32"},
-	{-1, NULL}
-};
-
-static int parse_fsopt_token(char *c, void *private)
-{
-	struct ceph_mount_options *fsopt = private;
-	substring_t argstr[MAX_OPT_ARGS];
-	int token, intval, ret;
-
-	token = match_token((char *)c, fsopt_tokens, argstr);
-	if (token < 0)
-		return -EINVAL;
-
-	if (token < Opt_last_int) {
-		ret = match_int(&argstr[0], &intval);
-		if (ret < 0) {
-			pr_err("bad mount option arg (not int) "
-			       "at '%s'\n", c);
-			return ret;
-		}
-		dout("got int token %d val %d\n", token, intval);
-	} else if (token > Opt_last_int && token < Opt_last_string) {
-		dout("got string token %d val %s\n", token,
-		     argstr[0].from);
-	} else {
-		dout("got token %d\n", token);
-	}
-
-	switch (token) {
-	case Opt_snapdirname:
-		kfree(fsopt->snapdir_name);
-		fsopt->snapdir_name = kstrndup(argstr[0].from,
-					       argstr[0].to-argstr[0].from,
-					       GFP_KERNEL);
-		if (!fsopt->snapdir_name)
-			return -ENOMEM;
-		break;
-
-		/* misc */
-	case Opt_wsize:
-		fsopt->wsize = intval;
-		break;
-	case Opt_rsize:
-		fsopt->rsize = intval;
-		break;
-	case Opt_rasize:
-		fsopt->rasize = intval;
-		break;
-	case Opt_caps_wanted_delay_min:
-		fsopt->caps_wanted_delay_min = intval;
-		break;
-	case Opt_caps_wanted_delay_max:
-		fsopt->caps_wanted_delay_max = intval;
-		break;
-	case Opt_readdir_max_entries:
-		fsopt->max_readdir = intval;
-		break;
-	case Opt_readdir_max_bytes:
-		fsopt->max_readdir_bytes = intval;
-		break;
-	case Opt_congestion_kb:
-		fsopt->congestion_kb = intval;
-		break;
-	case Opt_dirstat:
-		fsopt->flags |= CEPH_MOUNT_OPT_DIRSTAT;
-		break;
-	case Opt_nodirstat:
-		fsopt->flags &= ~CEPH_MOUNT_OPT_DIRSTAT;
-		break;
-	case Opt_rbytes:
-		fsopt->flags |= CEPH_MOUNT_OPT_RBYTES;
-		break;
-	case Opt_norbytes:
-		fsopt->flags &= ~CEPH_MOUNT_OPT_RBYTES;
-		break;
-	case Opt_asyncreaddir:
-		fsopt->flags &= ~CEPH_MOUNT_OPT_NOASYNCREADDIR;
-		break;
-	case Opt_noasyncreaddir:
-		fsopt->flags |= CEPH_MOUNT_OPT_NOASYNCREADDIR;
-		break;
-	case Opt_dcache:
-		fsopt->flags |= CEPH_MOUNT_OPT_DCACHE;
-		break;
-	case Opt_nodcache:
-		fsopt->flags &= ~CEPH_MOUNT_OPT_DCACHE;
-		break;
-	case Opt_ino32:
-		fsopt->flags |= CEPH_MOUNT_OPT_INO32;
-		break;
-	case Opt_noino32:
-		fsopt->flags &= ~CEPH_MOUNT_OPT_INO32;
-		break;
-	default:
-		BUG_ON(token);
-	}
-	return 0;
-}
-
-static void destroy_mount_options(struct ceph_mount_options *args)
-{
-	dout("destroy_mount_options %p\n", args);
-	kfree(args->snapdir_name);
-	kfree(args);
-}
-
-static int strcmp_null(const char *s1, const char *s2)
-{
-	if (!s1 && !s2)
-		return 0;
-	if (s1 && !s2)
-		return -1;
-	if (!s1 && s2)
-		return 1;
-	return strcmp(s1, s2);
-}
-
-static int compare_mount_options(struct ceph_mount_options *new_fsopt,
-				 struct ceph_options *new_opt,
-				 struct ceph_fs_client *fsc)
-{
-	struct ceph_mount_options *fsopt1 = new_fsopt;
-	struct ceph_mount_options *fsopt2 = fsc->mount_options;
-	int ofs = offsetof(struct ceph_mount_options, snapdir_name);
-	int ret;
-
-	ret = memcmp(fsopt1, fsopt2, ofs);
-	if (ret)
-		return ret;
-
-	ret = strcmp_null(fsopt1->snapdir_name, fsopt2->snapdir_name);
-	if (ret)
-		return ret;
-
-	return ceph_compare_options(new_opt, fsc->client);
-}
-
-static int parse_mount_options(struct ceph_mount_options **pfsopt,
-			       struct ceph_options **popt,
-			       int flags, char *options,
-			       const char *dev_name,
-			       const char **path)
-{
-	struct ceph_mount_options *fsopt;
-	const char *dev_name_end;
-	int err = -ENOMEM;
-
-	fsopt = kzalloc(sizeof(*fsopt), GFP_KERNEL);
-	if (!fsopt)
-		return -ENOMEM;
-
-	dout("parse_mount_options %p, dev_name '%s'\n", fsopt, dev_name);
-
-	fsopt->sb_flags = flags;
-	fsopt->flags = CEPH_MOUNT_OPT_DEFAULT;
-
-	fsopt->rsize = CEPH_RSIZE_DEFAULT;
-	fsopt->rasize = CEPH_RASIZE_DEFAULT;
-	fsopt->snapdir_name = kstrdup(CEPH_SNAPDIRNAME_DEFAULT, GFP_KERNEL);
-	fsopt->caps_wanted_delay_min = CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT;
-	fsopt->caps_wanted_delay_max = CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT;
-	fsopt->cap_release_safety = CEPH_CAP_RELEASE_SAFETY_DEFAULT;
-	fsopt->max_readdir = CEPH_MAX_READDIR_DEFAULT;
-	fsopt->max_readdir_bytes = CEPH_MAX_READDIR_BYTES_DEFAULT;
-	fsopt->congestion_kb = default_congestion_kb();
-
-	/* ip1[:port1][,ip2[:port2]...]:/subdir/in/fs */
-	err = -EINVAL;
-	if (!dev_name)
-		goto out;
-	*path = strstr(dev_name, ":/");
-	if (*path == NULL) {
-		pr_err("device name is missing path (no :/ in %s)\n",
-				dev_name);
-		goto out;
-	}
-	dev_name_end = *path;
-	dout("device name '%.*s'\n", (int)(dev_name_end - dev_name), dev_name);
-
-	/* path on server */
-	*path += 2;
-	dout("server path '%s'\n", *path);
-
-	*popt = ceph_parse_options(options, dev_name, dev_name_end,
-				 parse_fsopt_token, (void *)fsopt);
-	if (IS_ERR(*popt)) {
-		err = PTR_ERR(*popt);
-		goto out;
-	}
-
-	/* success */
-	*pfsopt = fsopt;
-	return 0;
-
-out:
-	destroy_mount_options(fsopt);
-	return err;
-}
-
-/**
- * ceph_show_options - Show mount options in /proc/mounts
- * @m: seq_file to write to
- * @root: root of that (sub)tree
- */
-static int ceph_show_options(struct seq_file *m, struct dentry *root)
-{
-	struct ceph_fs_client *fsc = ceph_sb_to_client(root->d_sb);
-	struct ceph_mount_options *fsopt = fsc->mount_options;
-	struct ceph_options *opt = fsc->client->options;
-
-	if (opt->flags & CEPH_OPT_FSID)
-		seq_printf(m, ",fsid=%pU", &opt->fsid);
-	if (opt->flags & CEPH_OPT_NOSHARE)
-		seq_puts(m, ",noshare");
-	if (opt->flags & CEPH_OPT_NOCRC)
-		seq_puts(m, ",nocrc");
-
-	if (opt->name)
-		seq_printf(m, ",name=%s", opt->name);
-	if (opt->key)
-		seq_puts(m, ",secret=<hidden>");
-
-	if (opt->mount_timeout != CEPH_MOUNT_TIMEOUT_DEFAULT)
-		seq_printf(m, ",mount_timeout=%d", opt->mount_timeout);
-	if (opt->osd_idle_ttl != CEPH_OSD_IDLE_TTL_DEFAULT)
-		seq_printf(m, ",osd_idle_ttl=%d", opt->osd_idle_ttl);
-	if (opt->osd_timeout != CEPH_OSD_TIMEOUT_DEFAULT)
-		seq_printf(m, ",osdtimeout=%d", opt->osd_timeout);
-	if (opt->osd_keepalive_timeout != CEPH_OSD_KEEPALIVE_DEFAULT)
-		seq_printf(m, ",osdkeepalivetimeout=%d",
-			   opt->osd_keepalive_timeout);
-
-	if (fsopt->flags & CEPH_MOUNT_OPT_DIRSTAT)
-		seq_puts(m, ",dirstat");
-	if ((fsopt->flags & CEPH_MOUNT_OPT_RBYTES) == 0)
-		seq_puts(m, ",norbytes");
-	if (fsopt->flags & CEPH_MOUNT_OPT_NOASYNCREADDIR)
-		seq_puts(m, ",noasyncreaddir");
-	if (fsopt->flags & CEPH_MOUNT_OPT_DCACHE)
-		seq_puts(m, ",dcache");
-	else
-		seq_puts(m, ",nodcache");
-
-	if (fsopt->wsize)
-		seq_printf(m, ",wsize=%d", fsopt->wsize);
-	if (fsopt->rsize != CEPH_RSIZE_DEFAULT)
-		seq_printf(m, ",rsize=%d", fsopt->rsize);
-	if (fsopt->rasize != CEPH_RASIZE_DEFAULT)
-		seq_printf(m, ",rasize=%d", fsopt->rasize);
-	if (fsopt->congestion_kb != default_congestion_kb())
-		seq_printf(m, ",write_congestion_kb=%d", fsopt->congestion_kb);
-	if (fsopt->caps_wanted_delay_min != CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT)
-		seq_printf(m, ",caps_wanted_delay_min=%d",
-			 fsopt->caps_wanted_delay_min);
-	if (fsopt->caps_wanted_delay_max != CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT)
-		seq_printf(m, ",caps_wanted_delay_max=%d",
-			   fsopt->caps_wanted_delay_max);
-	if (fsopt->cap_release_safety != CEPH_CAP_RELEASE_SAFETY_DEFAULT)
-		seq_printf(m, ",cap_release_safety=%d",
-			   fsopt->cap_release_safety);
-	if (fsopt->max_readdir != CEPH_MAX_READDIR_DEFAULT)
-		seq_printf(m, ",readdir_max_entries=%d", fsopt->max_readdir);
-	if (fsopt->max_readdir_bytes != CEPH_MAX_READDIR_BYTES_DEFAULT)
-		seq_printf(m, ",readdir_max_bytes=%d", fsopt->max_readdir_bytes);
-	if (strcmp(fsopt->snapdir_name, CEPH_SNAPDIRNAME_DEFAULT))
-		seq_printf(m, ",snapdirname=%s", fsopt->snapdir_name);
-	return 0;
-}
-
-/*
- * handle any mon messages the standard library doesn't understand.
- * return error if we don't either.
- */
-static int extra_mon_dispatch(struct ceph_client *client, struct ceph_msg *msg)
-{
-	struct ceph_fs_client *fsc = client->private;
-	int type = le16_to_cpu(msg->hdr.type);
-
-	switch (type) {
-	case CEPH_MSG_MDS_MAP:
-		ceph_mdsc_handle_map(fsc->mdsc, msg);
-		return 0;
-
-	default:
-		return -1;
-	}
-}
-
-/*
- * create a new fs client
- */
-static struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt,
-					struct ceph_options *opt)
-{
-	struct ceph_fs_client *fsc;
-	const unsigned supported_features =
-		CEPH_FEATURE_FLOCK |
-		CEPH_FEATURE_DIRLAYOUTHASH;
-	const unsigned required_features = 0;
-	int err = -ENOMEM;
-
-	fsc = kzalloc(sizeof(*fsc), GFP_KERNEL);
-	if (!fsc)
-		return ERR_PTR(-ENOMEM);
-
-	fsc->client = ceph_create_client(opt, fsc, supported_features,
-					 required_features);
-	if (IS_ERR(fsc->client)) {
-		err = PTR_ERR(fsc->client);
-		goto fail;
-	}
-	fsc->client->extra_mon_dispatch = extra_mon_dispatch;
-	fsc->client->monc.want_mdsmap = 1;
-
-	fsc->mount_options = fsopt;
-
-	fsc->sb = NULL;
-	fsc->mount_state = CEPH_MOUNT_MOUNTING;
-
-	atomic_long_set(&fsc->writeback_count, 0);
-
-	err = bdi_init(&fsc->backing_dev_info);
-	if (err < 0)
-		goto fail_client;
-
-	err = -ENOMEM;
-	/*
-	 * The number of concurrent works can be high but they don't need
-	 * to be processed in parallel, limit concurrency.
-	 */
-	fsc->wb_wq = alloc_workqueue("ceph-writeback", 0, 1);
-	if (fsc->wb_wq == NULL)
-		goto fail_bdi;
-	fsc->pg_inv_wq = alloc_workqueue("ceph-pg-invalid", 0, 1);
-	if (fsc->pg_inv_wq == NULL)
-		goto fail_wb_wq;
-	fsc->trunc_wq = alloc_workqueue("ceph-trunc", 0, 1);
-	if (fsc->trunc_wq == NULL)
-		goto fail_pg_inv_wq;
-
-	/* set up mempools */
-	err = -ENOMEM;
-	fsc->wb_pagevec_pool = mempool_create_kmalloc_pool(10,
-			      fsc->mount_options->wsize >> PAGE_CACHE_SHIFT);
-	if (!fsc->wb_pagevec_pool)
-		goto fail_trunc_wq;
-
-	/* caps */
-	fsc->min_caps = fsopt->max_readdir;
-
-	return fsc;
-
-fail_trunc_wq:
-	destroy_workqueue(fsc->trunc_wq);
-fail_pg_inv_wq:
-	destroy_workqueue(fsc->pg_inv_wq);
-fail_wb_wq:
-	destroy_workqueue(fsc->wb_wq);
-fail_bdi:
-	bdi_destroy(&fsc->backing_dev_info);
-fail_client:
-	ceph_destroy_client(fsc->client);
-fail:
-	kfree(fsc);
-	return ERR_PTR(err);
-}
-
-static void destroy_fs_client(struct ceph_fs_client *fsc)
-{
-	dout("destroy_fs_client %p\n", fsc);
-
-	destroy_workqueue(fsc->wb_wq);
-	destroy_workqueue(fsc->pg_inv_wq);
-	destroy_workqueue(fsc->trunc_wq);
-
-	bdi_destroy(&fsc->backing_dev_info);
-
-	mempool_destroy(fsc->wb_pagevec_pool);
-
-	destroy_mount_options(fsc->mount_options);
-
-	ceph_fs_debugfs_cleanup(fsc);
-
-	ceph_destroy_client(fsc->client);
-
-	kfree(fsc);
-	dout("destroy_fs_client %p done\n", fsc);
-}
-
-/*
- * caches
- */
-struct kmem_cache *ceph_inode_cachep;
-struct kmem_cache *ceph_cap_cachep;
-struct kmem_cache *ceph_dentry_cachep;
-struct kmem_cache *ceph_file_cachep;
-
-static void ceph_inode_init_once(void *foo)
-{
-	struct ceph_inode_info *ci = foo;
-	inode_init_once(&ci->vfs_inode);
-}
-
-static int __init init_caches(void)
-{
-	ceph_inode_cachep = kmem_cache_create("ceph_inode_info",
-				      sizeof(struct ceph_inode_info),
-				      __alignof__(struct ceph_inode_info),
-				      (SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD),
-				      ceph_inode_init_once);
-	if (ceph_inode_cachep == NULL)
-		return -ENOMEM;
-
-	ceph_cap_cachep = KMEM_CACHE(ceph_cap,
-				     SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD);
-	if (ceph_cap_cachep == NULL)
-		goto bad_cap;
-
-	ceph_dentry_cachep = KMEM_CACHE(ceph_dentry_info,
-					SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD);
-	if (ceph_dentry_cachep == NULL)
-		goto bad_dentry;
-
-	ceph_file_cachep = KMEM_CACHE(ceph_file_info,
-				      SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD);
-	if (ceph_file_cachep == NULL)
-		goto bad_file;
-
-	return 0;
-
-bad_file:
-	kmem_cache_destroy(ceph_dentry_cachep);
-bad_dentry:
-	kmem_cache_destroy(ceph_cap_cachep);
-bad_cap:
-	kmem_cache_destroy(ceph_inode_cachep);
-	return -ENOMEM;
-}
-
-static void destroy_caches(void)
-{
-	kmem_cache_destroy(ceph_inode_cachep);
-	kmem_cache_destroy(ceph_cap_cachep);
-	kmem_cache_destroy(ceph_dentry_cachep);
-	kmem_cache_destroy(ceph_file_cachep);
-}
-
-
-/*
- * ceph_umount_begin - initiate forced umount.  Tear down down the
- * mount, skipping steps that may hang while waiting for server(s).
- */
-static void ceph_umount_begin(struct super_block *sb)
-{
-	struct ceph_fs_client *fsc = ceph_sb_to_client(sb);
-
-	dout("ceph_umount_begin - starting forced umount\n");
-	if (!fsc)
-		return;
-	fsc->mount_state = CEPH_MOUNT_SHUTDOWN;
-	return;
-}
-
-static const struct super_operations ceph_super_ops = {
-	.alloc_inode	= ceph_alloc_inode,
-	.destroy_inode	= ceph_destroy_inode,
-	.write_inode    = ceph_write_inode,
-	.sync_fs        = ceph_sync_fs,
-	.put_super	= ceph_put_super,
-	.show_options   = ceph_show_options,
-	.statfs		= ceph_statfs,
-	.umount_begin   = ceph_umount_begin,
-};
-
-/*
- * Bootstrap mount by opening the root directory.  Note the mount
- * @started time from caller, and time out if this takes too long.
- */
-static struct dentry *open_root_dentry(struct ceph_fs_client *fsc,
-				       const char *path,
-				       unsigned long started)
-{
-	struct ceph_mds_client *mdsc = fsc->mdsc;
-	struct ceph_mds_request *req = NULL;
-	int err;
-	struct dentry *root;
-
-	/* open dir */
-	dout("open_root_inode opening '%s'\n", path);
-	req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_GETATTR, USE_ANY_MDS);
-	if (IS_ERR(req))
-		return ERR_CAST(req);
-	req->r_path1 = kstrdup(path, GFP_NOFS);
-	req->r_ino1.ino = CEPH_INO_ROOT;
-	req->r_ino1.snap = CEPH_NOSNAP;
-	req->r_started = started;
-	req->r_timeout = fsc->client->options->mount_timeout * HZ;
-	req->r_args.getattr.mask = cpu_to_le32(CEPH_STAT_CAP_INODE);
-	req->r_num_caps = 2;
-	err = ceph_mdsc_do_request(mdsc, NULL, req);
-	if (err == 0) {
-		struct inode *inode = req->r_target_inode;
-		req->r_target_inode = NULL;
-		dout("open_root_inode success\n");
-		if (ceph_ino(inode) == CEPH_INO_ROOT &&
-		    fsc->sb->s_root == NULL) {
-			root = d_make_root(inode);
-			if (!root) {
-				root = ERR_PTR(-ENOMEM);
-				goto out;
-			}
-		} else {
-			root = d_obtain_alias(inode);
-		}
-		ceph_init_dentry(root);
-		dout("open_root_inode success, root dentry is %p\n", root);
-	} else {
-		root = ERR_PTR(err);
-	}
-out:
-	ceph_mdsc_put_request(req);
-	return root;
-}
-
-
-
-
-/*
- * mount: join the ceph cluster, and open root directory.
- */
-static struct dentry *ceph_real_mount(struct ceph_fs_client *fsc,
-		      const char *path)
-{
-	int err;
-	unsigned long started = jiffies;  /* note the start time */
-	struct dentry *root;
-	int first = 0;   /* first vfsmount for this super_block */
-
-	dout("mount start\n");
-	mutex_lock(&fsc->client->mount_mutex);
-
-	err = __ceph_open_session(fsc->client, started);
-	if (err < 0)
-		goto out;
-
-	dout("mount opening root\n");
-	root = open_root_dentry(fsc, "", started);
-	if (IS_ERR(root)) {
-		err = PTR_ERR(root);
-		goto out;
-	}
-	if (fsc->sb->s_root) {
-		dput(root);
-	} else {
-		fsc->sb->s_root = root;
-		first = 1;
-
-		err = ceph_fs_debugfs_init(fsc);
-		if (err < 0)
-			goto fail;
-	}
-
-	if (path[0] == 0) {
-		dget(root);
-	} else {
-		dout("mount opening base mountpoint\n");
-		root = open_root_dentry(fsc, path, started);
-		if (IS_ERR(root)) {
-			err = PTR_ERR(root);
-			goto fail;
-		}
-	}
-
-	fsc->mount_state = CEPH_MOUNT_MOUNTED;
-	dout("mount success\n");
-	mutex_unlock(&fsc->client->mount_mutex);
-	return root;
-
-out:
-	mutex_unlock(&fsc->client->mount_mutex);
-	return ERR_PTR(err);
-
-fail:
-	if (first) {
-		dput(fsc->sb->s_root);
-		fsc->sb->s_root = NULL;
-	}
-	goto out;
-}
-
-static int ceph_set_super(struct super_block *s, void *data)
-{
-	struct ceph_fs_client *fsc = data;
-	int ret;
-
-	dout("set_super %p data %p\n", s, data);
-
-	s->s_flags = fsc->mount_options->sb_flags;
-	s->s_maxbytes = 1ULL << 40;  /* temp value until we get mdsmap */
-
-	s->s_fs_info = fsc;
-	fsc->sb = s;
-
-	s->s_op = &ceph_super_ops;
-	s->s_export_op = &ceph_export_ops;
-
-	s->s_time_gran = 1000;  /* 1000 ns == 1 us */
-
-	ret = set_anon_super(s, NULL);  /* what is that second arg for? */
-	if (ret != 0)
-		goto fail;
-
-	return ret;
-
-fail:
-	s->s_fs_info = NULL;
-	fsc->sb = NULL;
-	return ret;
-}
-
-/*
- * share superblock if same fs AND options
- */
-static int ceph_compare_super(struct super_block *sb, void *data)
-{
-	struct ceph_fs_client *new = data;
-	struct ceph_mount_options *fsopt = new->mount_options;
-	struct ceph_options *opt = new->client->options;
-	struct ceph_fs_client *other = ceph_sb_to_client(sb);
-
-	dout("ceph_compare_super %p\n", sb);
-
-	if (compare_mount_options(fsopt, opt, other)) {
-		dout("monitor(s)/mount options don't match\n");
-		return 0;
-	}
-	if ((opt->flags & CEPH_OPT_FSID) &&
-	    ceph_fsid_compare(&opt->fsid, &other->client->fsid)) {
-		dout("fsid doesn't match\n");
-		return 0;
-	}
-	if (fsopt->sb_flags != other->mount_options->sb_flags) {
-		dout("flags differ\n");
-		return 0;
-	}
-	return 1;
-}
-
-/*
- * construct our own bdi so we can control readahead, etc.
- */
-static atomic_long_t bdi_seq = ATOMIC_LONG_INIT(0);
-
-static int ceph_register_bdi(struct super_block *sb,
-			     struct ceph_fs_client *fsc)
-{
-	int err;
-
-	/* set ra_pages based on rasize mount option? */
-	if (fsc->mount_options->rasize >= PAGE_CACHE_SIZE)
-		fsc->backing_dev_info.ra_pages =
-			(fsc->mount_options->rasize + PAGE_CACHE_SIZE - 1)
-			>> PAGE_SHIFT;
-	else
-		fsc->backing_dev_info.ra_pages =
-			default_backing_dev_info.ra_pages;
-
-	err = bdi_register(&fsc->backing_dev_info, NULL, "ceph-%d",
-			   atomic_long_inc_return(&bdi_seq));
-	if (!err)
-		sb->s_bdi = &fsc->backing_dev_info;
-	return err;
-}
-
-static struct dentry *ceph_mount(struct file_system_type *fs_type,
-		       int flags, const char *dev_name, void *data)
-{
-	struct super_block *sb;
-	struct ceph_fs_client *fsc;
-	struct dentry *res;
-	int err;
-	int (*compare_super)(struct super_block *, void *) = ceph_compare_super;
-	const char *path = NULL;
-	struct ceph_mount_options *fsopt = NULL;
-	struct ceph_options *opt = NULL;
-
-	dout("ceph_mount\n");
-	err = parse_mount_options(&fsopt, &opt, flags, data, dev_name, &path);
-	if (err < 0) {
-		res = ERR_PTR(err);
-		goto out_final;
-	}
-
-	/* create client (which we may/may not use) */
-	fsc = create_fs_client(fsopt, opt);
-	if (IS_ERR(fsc)) {
-		res = ERR_CAST(fsc);
-		destroy_mount_options(fsopt);
-		ceph_destroy_options(opt);
-		goto out_final;
-	}
-
-	err = ceph_mdsc_init(fsc);
-	if (err < 0) {
-		res = ERR_PTR(err);
-		goto out;
-	}
-
-	if (ceph_test_opt(fsc->client, NOSHARE))
-		compare_super = NULL;
-	sb = sget(fs_type, compare_super, ceph_set_super, fsc);
-	if (IS_ERR(sb)) {
-		res = ERR_CAST(sb);
-		goto out;
-	}
-
-	if (ceph_sb_to_client(sb) != fsc) {
-		ceph_mdsc_destroy(fsc);
-		destroy_fs_client(fsc);
-		fsc = ceph_sb_to_client(sb);
-		dout("get_sb got existing client %p\n", fsc);
-	} else {
-		dout("get_sb using new client %p\n", fsc);
-		err = ceph_register_bdi(sb, fsc);
-		if (err < 0) {
-			res = ERR_PTR(err);
-			goto out_splat;
-		}
-	}
-
-	res = ceph_real_mount(fsc, path);
-	if (IS_ERR(res))
-		goto out_splat;
-	dout("root %p inode %p ino %llx.%llx\n", res,
-	     res->d_inode, ceph_vinop(res->d_inode));
-	return res;
-
-out_splat:
-	ceph_mdsc_close_sessions(fsc->mdsc);
-	deactivate_locked_super(sb);
-	goto out_final;
-
-out:
-	ceph_mdsc_destroy(fsc);
-	destroy_fs_client(fsc);
-out_final:
-	dout("ceph_mount fail %ld\n", PTR_ERR(res));
-	return res;
-}
-
-static void ceph_kill_sb(struct super_block *s)
-{
-	struct ceph_fs_client *fsc = ceph_sb_to_client(s);
-	dout("kill_sb %p\n", s);
-	ceph_mdsc_pre_umount(fsc->mdsc);
-	kill_anon_super(s);    /* will call put_super after sb is r/o */
-	ceph_mdsc_destroy(fsc);
-	destroy_fs_client(fsc);
-}
-
-static struct file_system_type ceph_fs_type = {
-	.owner		= THIS_MODULE,
-	.name		= "ceph",
-	.mount		= ceph_mount,
-	.kill_sb	= ceph_kill_sb,
-	.fs_flags	= FS_RENAME_DOES_D_MOVE,
-};
-
-#define _STRINGIFY(x) #x
-#define STRINGIFY(x) _STRINGIFY(x)
-
-static int __init init_ceph(void)
-{
-	int ret = init_caches();
-	if (ret)
-		goto out;
-
-	ceph_xattr_init();
-	ret = register_filesystem(&ceph_fs_type);
-	if (ret)
-		goto out_icache;
-
-	pr_info("loaded (mds proto %d)\n", CEPH_MDSC_PROTOCOL);
-
-	return 0;
-
-out_icache:
-	ceph_xattr_exit();
-	destroy_caches();
-out:
-	return ret;
-}
-
-static void __exit exit_ceph(void)
-{
-	dout("exit_ceph\n");
-	unregister_filesystem(&ceph_fs_type);
-	ceph_xattr_exit();
-	destroy_caches();
-}
-
-module_init(init_ceph);
-module_exit(exit_ceph);
-
-MODULE_AUTHOR("Sage Weil <sage@newdream.net>");
-MODULE_AUTHOR("Yehuda Sadeh <yehuda@hq.newdream.net>");
-MODULE_AUTHOR("Patience Warnick <patience@newdream.net>");
-MODULE_DESCRIPTION("Ceph filesystem for Linux");
-MODULE_LICENSE("GPL");
diff --git a/ANDROID_3.4.5/fs/ceph/super.h b/ANDROID_3.4.5/fs/ceph/super.h
deleted file mode 100644
index fc35036d..00000000
--- a/ANDROID_3.4.5/fs/ceph/super.h
+++ /dev/null
@@ -1,858 +0,0 @@
-#ifndef _FS_CEPH_SUPER_H
-#define _FS_CEPH_SUPER_H
-
-#include <linux/ceph/ceph_debug.h>
-
-#include <asm/unaligned.h>
-#include <linux/backing-dev.h>
-#include <linux/completion.h>
-#include <linux/exportfs.h>
-#include <linux/fs.h>
-#include <linux/mempool.h>
-#include <linux/pagemap.h>
-#include <linux/wait.h>
-#include <linux/writeback.h>
-#include <linux/slab.h>
-
-#include <linux/ceph/libceph.h>
-
-/* f_type in struct statfs */
-#define CEPH_SUPER_MAGIC 0x00c36400
-
-/* large granularity for statfs utilization stats to facilitate
- * large volume sizes on 32-bit machines. */
-#define CEPH_BLOCK_SHIFT   20  /* 1 MB */
-#define CEPH_BLOCK         (1 << CEPH_BLOCK_SHIFT)
-
-#define CEPH_MOUNT_OPT_DIRSTAT         (1<<4) /* `cat dirname` for stats */
-#define CEPH_MOUNT_OPT_RBYTES          (1<<5) /* dir st_bytes = rbytes */
-#define CEPH_MOUNT_OPT_NOASYNCREADDIR  (1<<7) /* no dcache readdir */
-#define CEPH_MOUNT_OPT_INO32           (1<<8) /* 32 bit inos */
-#define CEPH_MOUNT_OPT_DCACHE          (1<<9) /* use dcache for readdir etc */
-
-#define CEPH_MOUNT_OPT_DEFAULT    (CEPH_MOUNT_OPT_RBYTES)
-
-#define ceph_set_mount_opt(fsc, opt) \
-	(fsc)->mount_options->flags |= CEPH_MOUNT_OPT_##opt;
-#define ceph_test_mount_opt(fsc, opt) \
-	(!!((fsc)->mount_options->flags & CEPH_MOUNT_OPT_##opt))
-
-#define CEPH_RSIZE_DEFAULT             0           /* max read size */
-#define CEPH_RASIZE_DEFAULT            (8192*1024) /* readahead */
-#define CEPH_MAX_READDIR_DEFAULT        1024
-#define CEPH_MAX_READDIR_BYTES_DEFAULT  (512*1024)
-#define CEPH_SNAPDIRNAME_DEFAULT        ".snap"
-
-struct ceph_mount_options {
-	int flags;
-	int sb_flags;
-
-	int wsize;            /* max write size */
-	int rsize;            /* max read size */
-	int rasize;           /* max readahead */
-	int congestion_kb;    /* max writeback in flight */
-	int caps_wanted_delay_min, caps_wanted_delay_max;
-	int cap_release_safety;
-	int max_readdir;       /* max readdir result (entires) */
-	int max_readdir_bytes; /* max readdir result (bytes) */
-
-	/*
-	 * everything above this point can be memcmp'd; everything below
-	 * is handled in compare_mount_options()
-	 */
-
-	char *snapdir_name;   /* default ".snap" */
-};
-
-struct ceph_fs_client {
-	struct super_block *sb;
-
-	struct ceph_mount_options *mount_options;
-	struct ceph_client *client;
-
-	unsigned long mount_state;
-	int min_caps;                  /* min caps i added */
-
-	struct ceph_mds_client *mdsc;
-
-	/* writeback */
-	mempool_t *wb_pagevec_pool;
-	struct workqueue_struct *wb_wq;
-	struct workqueue_struct *pg_inv_wq;
-	struct workqueue_struct *trunc_wq;
-	atomic_long_t writeback_count;
-
-	struct backing_dev_info backing_dev_info;
-
-#ifdef CONFIG_DEBUG_FS
-	struct dentry *debugfs_dentry_lru, *debugfs_caps;
-	struct dentry *debugfs_congestion_kb;
-	struct dentry *debugfs_bdi;
-	struct dentry *debugfs_mdsc, *debugfs_mdsmap;
-#endif
-};
-
-
-/*
- * File i/o capability.  This tracks shared state with the metadata
- * server that allows us to cache or writeback attributes or to read
- * and write data.  For any given inode, we should have one or more
- * capabilities, one issued by each metadata server, and our
- * cumulative access is the OR of all issued capabilities.
- *
- * Each cap is referenced by the inode's i_caps rbtree and by per-mds
- * session capability lists.
- */
-struct ceph_cap {
-	struct ceph_inode_info *ci;
-	struct rb_node ci_node;          /* per-ci cap tree */
-	struct ceph_mds_session *session;
-	struct list_head session_caps;   /* per-session caplist */
-	int mds;
-	u64 cap_id;       /* unique cap id (mds provided) */
-	int issued;       /* latest, from the mds */
-	int implemented;  /* implemented superset of issued (for revocation) */
-	int mds_wanted;
-	u32 seq, issue_seq, mseq;
-	u32 cap_gen;      /* active/stale cycle */
-	unsigned long last_used;
-	struct list_head caps_item;
-};
-
-#define CHECK_CAPS_NODELAY    1  /* do not delay any further */
-#define CHECK_CAPS_AUTHONLY   2  /* only check auth cap */
-#define CHECK_CAPS_FLUSH      4  /* flush any dirty caps */
-
-/*
- * Snapped cap state that is pending flush to mds.  When a snapshot occurs,
- * we first complete any in-process sync writes and writeback any dirty
- * data before flushing the snapped state (tracked here) back to the MDS.
- */
-struct ceph_cap_snap {
-	atomic_t nref;
-	struct ceph_inode_info *ci;
-	struct list_head ci_item, flushing_item;
-
-	u64 follows, flush_tid;
-	int issued, dirty;
-	struct ceph_snap_context *context;
-
-	umode_t mode;
-	uid_t uid;
-	gid_t gid;
-
-	struct ceph_buffer *xattr_blob;
-	u64 xattr_version;
-
-	u64 size;
-	struct timespec mtime, atime, ctime;
-	u64 time_warp_seq;
-	int writing;   /* a sync write is still in progress */
-	int dirty_pages;     /* dirty pages awaiting writeback */
-};
-
-static inline void ceph_put_cap_snap(struct ceph_cap_snap *capsnap)
-{
-	if (atomic_dec_and_test(&capsnap->nref)) {
-		if (capsnap->xattr_blob)
-			ceph_buffer_put(capsnap->xattr_blob);
-		kfree(capsnap);
-	}
-}
-
-/*
- * The frag tree describes how a directory is fragmented, potentially across
- * multiple metadata servers.  It is also used to indicate points where
- * metadata authority is delegated, and whether/where metadata is replicated.
- *
- * A _leaf_ frag will be present in the i_fragtree IFF there is
- * delegation info.  That is, if mds >= 0 || ndist > 0.
- */
-#define CEPH_MAX_DIRFRAG_REP 4
-
-struct ceph_inode_frag {
-	struct rb_node node;
-
-	/* fragtree state */
-	u32 frag;
-	int split_by;         /* i.e. 2^(split_by) children */
-
-	/* delegation and replication info */
-	int mds;              /* -1 if same authority as parent */
-	int ndist;            /* >0 if replicated */
-	int dist[CEPH_MAX_DIRFRAG_REP];
-};
-
-/*
- * We cache inode xattrs as an encoded blob until they are first used,
- * at which point we parse them into an rbtree.
- */
-struct ceph_inode_xattr {
-	struct rb_node node;
-
-	const char *name;
-	int name_len;
-	const char *val;
-	int val_len;
-	int dirty;
-
-	int should_free_name;
-	int should_free_val;
-};
-
-/*
- * Ceph dentry state
- */
-struct ceph_dentry_info {
-	unsigned long flags;
-	struct ceph_mds_session *lease_session;
-	u32 lease_gen, lease_shared_gen;
-	u32 lease_seq;
-	unsigned long lease_renew_after, lease_renew_from;
-	struct list_head lru;
-	struct dentry *dentry;
-	u64 time;
-	u64 offset;
-};
-
-/*
- * dentry flags
- *
- * The locking for D_COMPLETE is a bit odd:
- *  - we can clear it at almost any time (see ceph_d_prune)
- *  - it is only meaningful if:
- *    - we hold dir inode i_ceph_lock
- *    - we hold dir FILE_SHARED caps
- *    - the dentry D_COMPLETE is set
- */
-#define CEPH_D_COMPLETE 1  /* if set, d_u.d_subdirs is complete directory */
-
-struct ceph_inode_xattrs_info {
-	/*
-	 * (still encoded) xattr blob. we avoid the overhead of parsing
-	 * this until someone actually calls getxattr, etc.
-	 *
-	 * blob->vec.iov_len == 4 implies there are no xattrs; blob ==
-	 * NULL means we don't know.
-	*/
-	struct ceph_buffer *blob, *prealloc_blob;
-
-	struct rb_root index;
-	bool dirty;
-	int count;
-	int names_size;
-	int vals_size;
-	u64 version, index_version;
-};
-
-/*
- * Ceph inode.
- */
-struct ceph_inode_info {
-	struct ceph_vino i_vino;   /* ceph ino + snap */
-
-	spinlock_t i_ceph_lock;
-
-	u64 i_version;
-	u32 i_time_warp_seq;
-
-	unsigned i_ceph_flags;
-	unsigned long i_release_count;
-
-	struct ceph_dir_layout i_dir_layout;
-	struct ceph_file_layout i_layout;
-	char *i_symlink;
-
-	/* for dirs */
-	struct timespec i_rctime;
-	u64 i_rbytes, i_rfiles, i_rsubdirs;
-	u64 i_files, i_subdirs;
-	u64 i_max_offset;  /* largest readdir offset, set with D_COMPLETE */
-
-	struct rb_root i_fragtree;
-	struct mutex i_fragtree_mutex;
-
-	struct ceph_inode_xattrs_info i_xattrs;
-
-	/* capabilities.  protected _both_ by i_ceph_lock and cap->session's
-	 * s_mutex. */
-	struct rb_root i_caps;           /* cap list */
-	struct ceph_cap *i_auth_cap;     /* authoritative cap, if any */
-	unsigned i_dirty_caps, i_flushing_caps;     /* mask of dirtied fields */
-	struct list_head i_dirty_item, i_flushing_item;
-	u64 i_cap_flush_seq;
-	/* we need to track cap writeback on a per-cap-bit basis, to allow
-	 * overlapping, pipelined cap flushes to the mds.  we can probably
-	 * reduce the tid to 8 bits if we're concerned about inode size. */
-	u16 i_cap_flush_last_tid, i_cap_flush_tid[CEPH_CAP_BITS];
-	wait_queue_head_t i_cap_wq;      /* threads waiting on a capability */
-	unsigned long i_hold_caps_min; /* jiffies */
-	unsigned long i_hold_caps_max; /* jiffies */
-	struct list_head i_cap_delay_list;  /* for delayed cap release to mds */
-	int i_cap_exporting_mds;         /* to handle cap migration between */
-	unsigned i_cap_exporting_mseq;   /*  mds's. */
-	unsigned i_cap_exporting_issued;
-	struct ceph_cap_reservation i_cap_migration_resv;
-	struct list_head i_cap_snaps;   /* snapped state pending flush to mds */
-	struct ceph_snap_context *i_head_snapc;  /* set if wr_buffer_head > 0 or
-						    dirty|flushing caps */
-	unsigned i_snap_caps;           /* cap bits for snapped files */
-
-	int i_nr_by_mode[CEPH_FILE_MODE_NUM];  /* open file counts */
-
-	u32 i_truncate_seq;        /* last truncate to smaller size */
-	u64 i_truncate_size;       /*  and the size we last truncated down to */
-	int i_truncate_pending;    /*  still need to call vmtruncate */
-
-	u64 i_max_size;            /* max file size authorized by mds */
-	u64 i_reported_size; /* (max_)size reported to or requested of mds */
-	u64 i_wanted_max_size;     /* offset we'd like to write too */
-	u64 i_requested_max_size;  /* max_size we've requested */
-
-	/* held references to caps */
-	int i_pin_ref;
-	int i_rd_ref, i_rdcache_ref, i_wr_ref, i_wb_ref;
-	int i_wrbuffer_ref, i_wrbuffer_ref_head;
-	u32 i_shared_gen;       /* increment each time we get FILE_SHARED */
-	u32 i_rdcache_gen;      /* incremented each time we get FILE_CACHE. */
-	u32 i_rdcache_revoking; /* RDCACHE gen to async invalidate, if any */
-
-	struct list_head i_unsafe_writes; /* uncommitted sync writes */
-	struct list_head i_unsafe_dirops; /* uncommitted mds dir ops */
-	spinlock_t i_unsafe_lock;
-
-	struct ceph_snap_realm *i_snap_realm; /* snap realm (if caps) */
-	int i_snap_realm_counter; /* snap realm (if caps) */
-	struct list_head i_snap_realm_item;
-	struct list_head i_snap_flush_item;
-
-	struct work_struct i_wb_work;  /* writeback work */
-	struct work_struct i_pg_inv_work;  /* page invalidation work */
-
-	struct work_struct i_vmtruncate_work;
-
-	struct inode vfs_inode; /* at end */
-};
-
-static inline struct ceph_inode_info *ceph_inode(struct inode *inode)
-{
-	return container_of(inode, struct ceph_inode_info, vfs_inode);
-}
-
-static inline struct ceph_fs_client *ceph_inode_to_client(struct inode *inode)
-{
-	return (struct ceph_fs_client *)inode->i_sb->s_fs_info;
-}
-
-static inline struct ceph_fs_client *ceph_sb_to_client(struct super_block *sb)
-{
-	return (struct ceph_fs_client *)sb->s_fs_info;
-}
-
-static inline struct ceph_vino ceph_vino(struct inode *inode)
-{
-	return ceph_inode(inode)->i_vino;
-}
-
-/*
- * ino_t is <64 bits on many architectures, blech.
- *
- *               i_ino (kernel inode)   st_ino (userspace)
- * i386          32                     32
- * x86_64+ino32  64                     32
- * x86_64        64                     64
- */
-static inline u32 ceph_ino_to_ino32(__u64 vino)
-{
-	u32 ino = vino & 0xffffffff;
-	ino ^= vino >> 32;
-	if (!ino)
-		ino = 2;
-	return ino;
-}
-
-/*
- * kernel i_ino value
- */
-static inline ino_t ceph_vino_to_ino(struct ceph_vino vino)
-{
-#if BITS_PER_LONG == 32
-	return ceph_ino_to_ino32(vino.ino);
-#else
-	return (ino_t)vino.ino;
-#endif
-}
-
-/*
- * user-visible ino (stat, filldir)
- */
-#if BITS_PER_LONG == 32
-static inline ino_t ceph_translate_ino(struct super_block *sb, ino_t ino)
-{
-	return ino;
-}
-#else
-static inline ino_t ceph_translate_ino(struct super_block *sb, ino_t ino)
-{
-	if (ceph_test_mount_opt(ceph_sb_to_client(sb), INO32))
-		ino = ceph_ino_to_ino32(ino);
-	return ino;
-}
-#endif
-
-
-/* for printf-style formatting */
-#define ceph_vinop(i) ceph_inode(i)->i_vino.ino, ceph_inode(i)->i_vino.snap
-
-static inline u64 ceph_ino(struct inode *inode)
-{
-	return ceph_inode(inode)->i_vino.ino;
-}
-static inline u64 ceph_snap(struct inode *inode)
-{
-	return ceph_inode(inode)->i_vino.snap;
-}
-
-static inline int ceph_ino_compare(struct inode *inode, void *data)
-{
-	struct ceph_vino *pvino = (struct ceph_vino *)data;
-	struct ceph_inode_info *ci = ceph_inode(inode);
-	return ci->i_vino.ino == pvino->ino &&
-		ci->i_vino.snap == pvino->snap;
-}
-
-static inline struct inode *ceph_find_inode(struct super_block *sb,
-					    struct ceph_vino vino)
-{
-	ino_t t = ceph_vino_to_ino(vino);
-	return ilookup5(sb, t, ceph_ino_compare, &vino);
-}
-
-
-/*
- * Ceph inode.
- */
-#define CEPH_I_NODELAY   4  /* do not delay cap release */
-#define CEPH_I_FLUSH     8  /* do not delay flush of dirty metadata */
-#define CEPH_I_NOFLUSH  16  /* do not flush dirty caps */
-
-static inline void ceph_i_clear(struct inode *inode, unsigned mask)
-{
-	struct ceph_inode_info *ci = ceph_inode(inode);
-
-	spin_lock(&ci->i_ceph_lock);
-	ci->i_ceph_flags &= ~mask;
-	spin_unlock(&ci->i_ceph_lock);
-}
-
-static inline void ceph_i_set(struct inode *inode, unsigned mask)
-{
-	struct ceph_inode_info *ci = ceph_inode(inode);
-
-	spin_lock(&ci->i_ceph_lock);
-	ci->i_ceph_flags |= mask;
-	spin_unlock(&ci->i_ceph_lock);
-}
-
-static inline bool ceph_i_test(struct inode *inode, unsigned mask)
-{
-	struct ceph_inode_info *ci = ceph_inode(inode);
-	bool r;
-
-	spin_lock(&ci->i_ceph_lock);
-	r = (ci->i_ceph_flags & mask) == mask;
-	spin_unlock(&ci->i_ceph_lock);
-	return r;
-}
-
-
-/* find a specific frag @f */
-extern struct ceph_inode_frag *__ceph_find_frag(struct ceph_inode_info *ci,
-						u32 f);
-
-/*
- * choose fragment for value @v.  copy frag content to pfrag, if leaf
- * exists
- */
-extern u32 ceph_choose_frag(struct ceph_inode_info *ci, u32 v,
-			    struct ceph_inode_frag *pfrag,
-			    int *found);
-
-static inline struct ceph_dentry_info *ceph_dentry(struct dentry *dentry)
-{
-	return (struct ceph_dentry_info *)dentry->d_fsdata;
-}
-
-static inline loff_t ceph_make_fpos(unsigned frag, unsigned off)
-{
-	return ((loff_t)frag << 32) | (loff_t)off;
-}
-
-/*
- * set/clear directory D_COMPLETE flag
- */
-void ceph_dir_set_complete(struct inode *inode);
-void ceph_dir_clear_complete(struct inode *inode);
-bool ceph_dir_test_complete(struct inode *inode);
-
-/*
- * caps helpers
- */
-static inline bool __ceph_is_any_real_caps(struct ceph_inode_info *ci)
-{
-	return !RB_EMPTY_ROOT(&ci->i_caps);
-}
-
-extern int __ceph_caps_issued(struct ceph_inode_info *ci, int *implemented);
-extern int __ceph_caps_issued_mask(struct ceph_inode_info *ci, int mask, int t);
-extern int __ceph_caps_issued_other(struct ceph_inode_info *ci,
-				    struct ceph_cap *cap);
-
-static inline int ceph_caps_issued(struct ceph_inode_info *ci)
-{
-	int issued;
-	spin_lock(&ci->i_ceph_lock);
-	issued = __ceph_caps_issued(ci, NULL);
-	spin_unlock(&ci->i_ceph_lock);
-	return issued;
-}
-
-static inline int ceph_caps_issued_mask(struct ceph_inode_info *ci, int mask,
-					int touch)
-{
-	int r;
-	spin_lock(&ci->i_ceph_lock);
-	r = __ceph_caps_issued_mask(ci, mask, touch);
-	spin_unlock(&ci->i_ceph_lock);
-	return r;
-}
-
-static inline int __ceph_caps_dirty(struct ceph_inode_info *ci)
-{
-	return ci->i_dirty_caps | ci->i_flushing_caps;
-}
-extern int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask);
-
-extern int ceph_caps_revoking(struct ceph_inode_info *ci, int mask);
-extern int __ceph_caps_used(struct ceph_inode_info *ci);
-
-extern int __ceph_caps_file_wanted(struct ceph_inode_info *ci);
-
-/*
- * wanted, by virtue of open file modes AND cap refs (buffered/cached data)
- */
-static inline int __ceph_caps_wanted(struct ceph_inode_info *ci)
-{
-	int w = __ceph_caps_file_wanted(ci) | __ceph_caps_used(ci);
-	if (w & CEPH_CAP_FILE_BUFFER)
-		w |= CEPH_CAP_FILE_EXCL;  /* we want EXCL if dirty data */
-	return w;
-}
-
-/* what the mds thinks we want */
-extern int __ceph_caps_mds_wanted(struct ceph_inode_info *ci);
-
-extern void ceph_caps_init(struct ceph_mds_client *mdsc);
-extern void ceph_caps_finalize(struct ceph_mds_client *mdsc);
-extern void ceph_adjust_min_caps(struct ceph_mds_client *mdsc, int delta);
-extern int ceph_reserve_caps(struct ceph_mds_client *mdsc,
-			     struct ceph_cap_reservation *ctx, int need);
-extern int ceph_unreserve_caps(struct ceph_mds_client *mdsc,
-			       struct ceph_cap_reservation *ctx);
-extern void ceph_reservation_status(struct ceph_fs_client *client,
-				    int *total, int *avail, int *used,
-				    int *reserved, int *min);
-
-
-
-/*
- * we keep buffered readdir results attached to file->private_data
- */
-#define CEPH_F_SYNC     1
-#define CEPH_F_ATEND    2
-
-struct ceph_file_info {
-	short fmode;     /* initialized on open */
-	short flags;     /* CEPH_F_* */
-
-	/* readdir: position within the dir */
-	u32 frag;
-	struct ceph_mds_request *last_readdir;
-
-	/* readdir: position within a frag */
-	unsigned offset;       /* offset of last chunk, adjusted for . and .. */
-	u64 next_offset;       /* offset of next chunk (last_name's + 1) */
-	char *last_name;       /* last entry in previous chunk */
-	struct dentry *dentry; /* next dentry (for dcache readdir) */
-	unsigned long dir_release_count;
-
-	/* used for -o dirstat read() on directory thing */
-	char *dir_info;
-	int dir_info_len;
-};
-
-
-
-/*
- * A "snap realm" describes a subset of the file hierarchy sharing
- * the same set of snapshots that apply to it.  The realms themselves
- * are organized into a hierarchy, such that children inherit (some of)
- * the snapshots of their parents.
- *
- * All inodes within the realm that have capabilities are linked into a
- * per-realm list.
- */
-struct ceph_snap_realm {
-	u64 ino;
-	atomic_t nref;
-	struct rb_node node;
-
-	u64 created, seq;
-	u64 parent_ino;
-	u64 parent_since;   /* snapid when our current parent became so */
-
-	u64 *prior_parent_snaps;      /* snaps inherited from any parents we */
-	int num_prior_parent_snaps;   /*  had prior to parent_since */
-	u64 *snaps;                   /* snaps specific to this realm */
-	int num_snaps;
-
-	struct ceph_snap_realm *parent;
-	struct list_head children;       /* list of child realms */
-	struct list_head child_item;
-
-	struct list_head empty_item;     /* if i have ref==0 */
-
-	struct list_head dirty_item;     /* if realm needs new context */
-
-	/* the current set of snaps for this realm */
-	struct ceph_snap_context *cached_context;
-
-	struct list_head inodes_with_caps;
-	spinlock_t inodes_with_caps_lock;
-};
-
-static inline int default_congestion_kb(void)
-{
-	int congestion_kb;
-
-	/*
-	 * Copied from NFS
-	 *
-	 * congestion size, scale with available memory.
-	 *
-	 *  64MB:    8192k
-	 * 128MB:   11585k
-	 * 256MB:   16384k
-	 * 512MB:   23170k
-	 *   1GB:   32768k
-	 *   2GB:   46340k
-	 *   4GB:   65536k
-	 *   8GB:   92681k
-	 *  16GB:  131072k
-	 *
-	 * This allows larger machines to have larger/more transfers.
-	 * Limit the default to 256M
-	 */
-	congestion_kb = (16*int_sqrt(totalram_pages)) << (PAGE_SHIFT-10);
-	if (congestion_kb > 256*1024)
-		congestion_kb = 256*1024;
-
-	return congestion_kb;
-}
-
-
-
-/* snap.c */
-struct ceph_snap_realm *ceph_lookup_snap_realm(struct ceph_mds_client *mdsc,
-					       u64 ino);
-extern void ceph_get_snap_realm(struct ceph_mds_client *mdsc,
-				struct ceph_snap_realm *realm);
-extern void ceph_put_snap_realm(struct ceph_mds_client *mdsc,
-				struct ceph_snap_realm *realm);
-extern int ceph_update_snap_trace(struct ceph_mds_client *m,
-				  void *p, void *e, bool deletion);
-extern void ceph_handle_snap(struct ceph_mds_client *mdsc,
-			     struct ceph_mds_session *session,
-			     struct ceph_msg *msg);
-extern void ceph_queue_cap_snap(struct ceph_inode_info *ci);
-extern int __ceph_finish_cap_snap(struct ceph_inode_info *ci,
-				  struct ceph_cap_snap *capsnap);
-extern void ceph_cleanup_empty_realms(struct ceph_mds_client *mdsc);
-
-/*
- * a cap_snap is "pending" if it is still awaiting an in-progress
- * sync write (that may/may not still update size, mtime, etc.).
- */
-static inline bool __ceph_have_pending_cap_snap(struct ceph_inode_info *ci)
-{
-	return !list_empty(&ci->i_cap_snaps) &&
-		list_entry(ci->i_cap_snaps.prev, struct ceph_cap_snap,
-			   ci_item)->writing;
-}
-
-/* inode.c */
-extern const struct inode_operations ceph_file_iops;
-
-extern struct inode *ceph_alloc_inode(struct super_block *sb);
-extern void ceph_destroy_inode(struct inode *inode);
-
-extern struct inode *ceph_get_inode(struct super_block *sb,
-				    struct ceph_vino vino);
-extern struct inode *ceph_get_snapdir(struct inode *parent);
-extern int ceph_fill_file_size(struct inode *inode, int issued,
-			       u32 truncate_seq, u64 truncate_size, u64 size);
-extern void ceph_fill_file_time(struct inode *inode, int issued,
-				u64 time_warp_seq, struct timespec *ctime,
-				struct timespec *mtime, struct timespec *atime);
-extern int ceph_fill_trace(struct super_block *sb,
-			   struct ceph_mds_request *req,
-			   struct ceph_mds_session *session);
-extern int ceph_readdir_prepopulate(struct ceph_mds_request *req,
-				    struct ceph_mds_session *session);
-
-extern int ceph_inode_holds_cap(struct inode *inode, int mask);
-
-extern int ceph_inode_set_size(struct inode *inode, loff_t size);
-extern void __ceph_do_pending_vmtruncate(struct inode *inode);
-extern void ceph_queue_vmtruncate(struct inode *inode);
-
-extern void ceph_queue_invalidate(struct inode *inode);
-extern void ceph_queue_writeback(struct inode *inode);
-
-extern int ceph_do_getattr(struct inode *inode, int mask);
-extern int ceph_permission(struct inode *inode, int mask);
-extern int ceph_setattr(struct dentry *dentry, struct iattr *attr);
-extern int ceph_getattr(struct vfsmount *mnt, struct dentry *dentry,
-			struct kstat *stat);
-
-/* xattr.c */
-extern int ceph_setxattr(struct dentry *, const char *, const void *,
-			 size_t, int);
-extern ssize_t ceph_getxattr(struct dentry *, const char *, void *, size_t);
-extern ssize_t ceph_listxattr(struct dentry *, char *, size_t);
-extern int ceph_removexattr(struct dentry *, const char *);
-extern void __ceph_build_xattrs_blob(struct ceph_inode_info *ci);
-extern void __ceph_destroy_xattrs(struct ceph_inode_info *ci);
-extern void __init ceph_xattr_init(void);
-extern void ceph_xattr_exit(void);
-
-/* caps.c */
-extern const char *ceph_cap_string(int c);
-extern void ceph_handle_caps(struct ceph_mds_session *session,
-			     struct ceph_msg *msg);
-extern int ceph_add_cap(struct inode *inode,
-			struct ceph_mds_session *session, u64 cap_id,
-			int fmode, unsigned issued, unsigned wanted,
-			unsigned cap, unsigned seq, u64 realmino, int flags,
-			struct ceph_cap_reservation *caps_reservation);
-extern void __ceph_remove_cap(struct ceph_cap *cap);
-static inline void ceph_remove_cap(struct ceph_cap *cap)
-{
-	spin_lock(&cap->ci->i_ceph_lock);
-	__ceph_remove_cap(cap);
-	spin_unlock(&cap->ci->i_ceph_lock);
-}
-extern void ceph_put_cap(struct ceph_mds_client *mdsc,
-			 struct ceph_cap *cap);
-
-extern void ceph_queue_caps_release(struct inode *inode);
-extern int ceph_write_inode(struct inode *inode, struct writeback_control *wbc);
-extern int ceph_fsync(struct file *file, loff_t start, loff_t end,
-		      int datasync);
-extern void ceph_kick_flushing_caps(struct ceph_mds_client *mdsc,
-				    struct ceph_mds_session *session);
-extern struct ceph_cap *ceph_get_cap_for_mds(struct ceph_inode_info *ci,
-					     int mds);
-extern int ceph_get_cap_mds(struct inode *inode);
-extern void ceph_get_cap_refs(struct ceph_inode_info *ci, int caps);
-extern void ceph_put_cap_refs(struct ceph_inode_info *ci, int had);
-extern void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
-				       struct ceph_snap_context *snapc);
-extern void __ceph_flush_snaps(struct ceph_inode_info *ci,
-			       struct ceph_mds_session **psession,
-			       int again);
-extern void ceph_check_caps(struct ceph_inode_info *ci, int flags,
-			    struct ceph_mds_session *session);
-extern void ceph_check_delayed_caps(struct ceph_mds_client *mdsc);
-extern void ceph_flush_dirty_caps(struct ceph_mds_client *mdsc);
-
-extern int ceph_encode_inode_release(void **p, struct inode *inode,
-				     int mds, int drop, int unless, int force);
-extern int ceph_encode_dentry_release(void **p, struct dentry *dn,
-				      int mds, int drop, int unless);
-
-extern int ceph_get_caps(struct ceph_inode_info *ci, int need, int want,
-			 int *got, loff_t endoff);
-
-/* for counting open files by mode */
-static inline void __ceph_get_fmode(struct ceph_inode_info *ci, int mode)
-{
-	ci->i_nr_by_mode[mode]++;
-}
-extern void ceph_put_fmode(struct ceph_inode_info *ci, int mode);
-
-/* addr.c */
-extern const struct address_space_operations ceph_aops;
-extern int ceph_mmap(struct file *file, struct vm_area_struct *vma);
-
-/* file.c */
-extern const struct file_operations ceph_file_fops;
-extern const struct address_space_operations ceph_aops;
-extern int ceph_copy_to_page_vector(struct page **pages,
-				    const char *data,
-				    loff_t off, size_t len);
-extern int ceph_copy_from_page_vector(struct page **pages,
-				    char *data,
-				    loff_t off, size_t len);
-extern struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags);
-extern int ceph_open(struct inode *inode, struct file *file);
-extern struct dentry *ceph_lookup_open(struct inode *dir, struct dentry *dentry,
-				       struct nameidata *nd, int mode,
-				       int locked_dir);
-extern int ceph_release(struct inode *inode, struct file *filp);
-
-/* dir.c */
-extern const struct file_operations ceph_dir_fops;
-extern const struct inode_operations ceph_dir_iops;
-extern const struct dentry_operations ceph_dentry_ops, ceph_snap_dentry_ops,
-	ceph_snapdir_dentry_ops;
-
-extern int ceph_handle_notrace_create(struct inode *dir, struct dentry *dentry);
-extern int ceph_handle_snapdir(struct ceph_mds_request *req,
-			       struct dentry *dentry, int err);
-extern struct dentry *ceph_finish_lookup(struct ceph_mds_request *req,
-					 struct dentry *dentry, int err);
-
-extern void ceph_dentry_lru_add(struct dentry *dn);
-extern void ceph_dentry_lru_touch(struct dentry *dn);
-extern void ceph_dentry_lru_del(struct dentry *dn);
-extern void ceph_invalidate_dentry_lease(struct dentry *dentry);
-extern unsigned ceph_dentry_hash(struct inode *dir, struct dentry *dn);
-extern struct inode *ceph_get_dentry_parent_inode(struct dentry *dentry);
-
-/*
- * our d_ops vary depending on whether the inode is live,
- * snapshotted (read-only), or a virtual ".snap" directory.
- */
-int ceph_init_dentry(struct dentry *dentry);
-
-
-/* ioctl.c */
-extern long ceph_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
-
-/* export.c */
-extern const struct export_operations ceph_export_ops;
-
-/* locks.c */
-extern int ceph_lock(struct file *file, int cmd, struct file_lock *fl);
-extern int ceph_flock(struct file *file, int cmd, struct file_lock *fl);
-extern void ceph_count_locks(struct inode *inode, int *p_num, int *f_num);
-extern int ceph_encode_locks(struct inode *i, struct ceph_pagelist *p,
-			     int p_locks, int f_locks);
-extern int lock_to_ceph_filelock(struct file_lock *fl, struct ceph_filelock *c);
-
-/* debugfs.c */
-extern int ceph_fs_debugfs_init(struct ceph_fs_client *client);
-extern void ceph_fs_debugfs_cleanup(struct ceph_fs_client *client);
-
-#endif /* _FS_CEPH_SUPER_H */
diff --git a/ANDROID_3.4.5/fs/ceph/xattr.c b/ANDROID_3.4.5/fs/ceph/xattr.c
deleted file mode 100644
index 35b86331..00000000
--- a/ANDROID_3.4.5/fs/ceph/xattr.c
+++ /dev/null
@@ -1,946 +0,0 @@
-#include <linux/ceph/ceph_debug.h>
-
-#include "super.h"
-#include "mds_client.h"
-
-#include <linux/ceph/decode.h>
-
-#include <linux/xattr.h>
-#include <linux/slab.h>
-
-#define XATTR_CEPH_PREFIX "ceph."
-#define XATTR_CEPH_PREFIX_LEN (sizeof (XATTR_CEPH_PREFIX) - 1)
-
-static bool ceph_is_valid_xattr(const char *name)
-{
-	return !strncmp(name, XATTR_CEPH_PREFIX, XATTR_CEPH_PREFIX_LEN) ||
-	       !strncmp(name, XATTR_SECURITY_PREFIX,
-			XATTR_SECURITY_PREFIX_LEN) ||
-	       !strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) ||
-	       !strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN);
-}
-
-/*
- * These define virtual xattrs exposing the recursive directory
- * statistics and layout metadata.
- */
-struct ceph_vxattr {
-	char *name;
-	size_t name_size;	/* strlen(name) + 1 (for '\0') */
-	size_t (*getxattr_cb)(struct ceph_inode_info *ci, char *val,
-			      size_t size);
-	bool readonly;
-};
-
-/* directories */
-
-static size_t ceph_vxattrcb_dir_entries(struct ceph_inode_info *ci, char *val,
-					size_t size)
-{
-	return snprintf(val, size, "%lld", ci->i_files + ci->i_subdirs);
-}
-
-static size_t ceph_vxattrcb_dir_files(struct ceph_inode_info *ci, char *val,
-				      size_t size)
-{
-	return snprintf(val, size, "%lld", ci->i_files);
-}
-
-static size_t ceph_vxattrcb_dir_subdirs(struct ceph_inode_info *ci, char *val,
-					size_t size)
-{
-	return snprintf(val, size, "%lld", ci->i_subdirs);
-}
-
-static size_t ceph_vxattrcb_dir_rentries(struct ceph_inode_info *ci, char *val,
-					 size_t size)
-{
-	return snprintf(val, size, "%lld", ci->i_rfiles + ci->i_rsubdirs);
-}
-
-static size_t ceph_vxattrcb_dir_rfiles(struct ceph_inode_info *ci, char *val,
-				       size_t size)
-{
-	return snprintf(val, size, "%lld", ci->i_rfiles);
-}
-
-static size_t ceph_vxattrcb_dir_rsubdirs(struct ceph_inode_info *ci, char *val,
-					 size_t size)
-{
-	return snprintf(val, size, "%lld", ci->i_rsubdirs);
-}
-
-static size_t ceph_vxattrcb_dir_rbytes(struct ceph_inode_info *ci, char *val,
-				       size_t size)
-{
-	return snprintf(val, size, "%lld", ci->i_rbytes);
-}
-
-static size_t ceph_vxattrcb_dir_rctime(struct ceph_inode_info *ci, char *val,
-				       size_t size)
-{
-	return snprintf(val, size, "%ld.09%ld", (long)ci->i_rctime.tv_sec,
-			(long)ci->i_rctime.tv_nsec);
-}
-
-#define CEPH_XATTR_NAME(_type, _name)	XATTR_CEPH_PREFIX #_type "." #_name
-
-#define XATTR_NAME_CEPH(_type, _name) \
-		{ \
-			.name = CEPH_XATTR_NAME(_type, _name), \
-			.name_size = sizeof (CEPH_XATTR_NAME(_type, _name)), \
-			.getxattr_cb = ceph_vxattrcb_ ## _type ## _ ## _name, \
-			.readonly = true, \
-		}
-
-static struct ceph_vxattr ceph_dir_vxattrs[] = {
-	XATTR_NAME_CEPH(dir, entries),
-	XATTR_NAME_CEPH(dir, files),
-	XATTR_NAME_CEPH(dir, subdirs),
-	XATTR_NAME_CEPH(dir, rentries),
-	XATTR_NAME_CEPH(dir, rfiles),
-	XATTR_NAME_CEPH(dir, rsubdirs),
-	XATTR_NAME_CEPH(dir, rbytes),
-	XATTR_NAME_CEPH(dir, rctime),
-	{ 0 }	/* Required table terminator */
-};
-static size_t ceph_dir_vxattrs_name_size;	/* total size of all names */
-
-/* files */
-
-static size_t ceph_vxattrcb_file_layout(struct ceph_inode_info *ci, char *val,
-				   size_t size)
-{
-	int ret;
-
-	ret = snprintf(val, size,
-		"chunk_bytes=%lld\nstripe_count=%lld\nobject_size=%lld\n",
-		(unsigned long long)ceph_file_layout_su(ci->i_layout),
-		(unsigned long long)ceph_file_layout_stripe_count(ci->i_layout),
-		(unsigned long long)ceph_file_layout_object_size(ci->i_layout));
-
-	if (ceph_file_layout_pg_preferred(ci->i_layout) >= 0) {
-		val += ret;
-		size -= ret;
-		ret += snprintf(val, size, "preferred_osd=%lld\n",
-			    (unsigned long long)ceph_file_layout_pg_preferred(
-				    ci->i_layout));
-	}
-
-	return ret;
-}
-
-static struct ceph_vxattr ceph_file_vxattrs[] = {
-	XATTR_NAME_CEPH(file, layout),
-	/* The following extended attribute name is deprecated */
-	{
-		.name = XATTR_CEPH_PREFIX "layout",
-		.name_size = sizeof (XATTR_CEPH_PREFIX "layout"),
-		.getxattr_cb = ceph_vxattrcb_file_layout,
-		.readonly = true,
-	},
-	{ 0 }	/* Required table terminator */
-};
-static size_t ceph_file_vxattrs_name_size;	/* total size of all names */
-
-static struct ceph_vxattr *ceph_inode_vxattrs(struct inode *inode)
-{
-	if (S_ISDIR(inode->i_mode))
-		return ceph_dir_vxattrs;
-	else if (S_ISREG(inode->i_mode))
-		return ceph_file_vxattrs;
-	return NULL;
-}
-
-static size_t ceph_vxattrs_name_size(struct ceph_vxattr *vxattrs)
-{
-	if (vxattrs == ceph_dir_vxattrs)
-		return ceph_dir_vxattrs_name_size;
-	if (vxattrs == ceph_file_vxattrs)
-		return ceph_file_vxattrs_name_size;
-	BUG();
-
-	return 0;
-}
-
-/*
- * Compute the aggregate size (including terminating '\0') of all
- * virtual extended attribute names in the given vxattr table.
- */
-static size_t __init vxattrs_name_size(struct ceph_vxattr *vxattrs)
-{
-	struct ceph_vxattr *vxattr;
-	size_t size = 0;
-
-	for (vxattr = vxattrs; vxattr->name; vxattr++)
-		size += vxattr->name_size;
-
-	return size;
-}
-
-/* Routines called at initialization and exit time */
-
-void __init ceph_xattr_init(void)
-{
-	ceph_dir_vxattrs_name_size = vxattrs_name_size(ceph_dir_vxattrs);
-	ceph_file_vxattrs_name_size = vxattrs_name_size(ceph_file_vxattrs);
-}
-
-void ceph_xattr_exit(void)
-{
-	ceph_dir_vxattrs_name_size = 0;
-	ceph_file_vxattrs_name_size = 0;
-}
-
-static struct ceph_vxattr *ceph_match_vxattr(struct inode *inode,
-						const char *name)
-{
-	struct ceph_vxattr *vxattr = ceph_inode_vxattrs(inode);
-
-	if (vxattr) {
-		while (vxattr->name) {
-			if (!strcmp(vxattr->name, name))
-				return vxattr;
-			vxattr++;
-		}
-	}
-
-	return NULL;
-}
-
-static int __set_xattr(struct ceph_inode_info *ci,
-			   const char *name, int name_len,
-			   const char *val, int val_len,
-			   int dirty,
-			   int should_free_name, int should_free_val,
-			   struct ceph_inode_xattr **newxattr)
-{
-	struct rb_node **p;
-	struct rb_node *parent = NULL;
-	struct ceph_inode_xattr *xattr = NULL;
-	int c;
-	int new = 0;
-
-	p = &ci->i_xattrs.index.rb_node;
-	while (*p) {
-		parent = *p;
-		xattr = rb_entry(parent, struct ceph_inode_xattr, node);
-		c = strncmp(name, xattr->name, min(name_len, xattr->name_len));
-		if (c < 0)
-			p = &(*p)->rb_left;
-		else if (c > 0)
-			p = &(*p)->rb_right;
-		else {
-			if (name_len == xattr->name_len)
-				break;
-			else if (name_len < xattr->name_len)
-				p = &(*p)->rb_left;
-			else
-				p = &(*p)->rb_right;
-		}
-		xattr = NULL;
-	}
-
-	if (!xattr) {
-		new = 1;
-		xattr = *newxattr;
-		xattr->name = name;
-		xattr->name_len = name_len;
-		xattr->should_free_name = should_free_name;
-
-		ci->i_xattrs.count++;
-		dout("__set_xattr count=%d\n", ci->i_xattrs.count);
-	} else {
-		kfree(*newxattr);
-		*newxattr = NULL;
-		if (xattr->should_free_val)
-			kfree((void *)xattr->val);
-
-		if (should_free_name) {
-			kfree((void *)name);
-			name = xattr->name;
-		}
-		ci->i_xattrs.names_size -= xattr->name_len;
-		ci->i_xattrs.vals_size -= xattr->val_len;
-	}
-	ci->i_xattrs.names_size += name_len;
-	ci->i_xattrs.vals_size += val_len;
-	if (val)
-		xattr->val = val;
-	else
-		xattr->val = "";
-
-	xattr->val_len = val_len;
-	xattr->dirty = dirty;
-	xattr->should_free_val = (val && should_free_val);
-
-	if (new) {
-		rb_link_node(&xattr->node, parent, p);
-		rb_insert_color(&xattr->node, &ci->i_xattrs.index);
-		dout("__set_xattr_val p=%p\n", p);
-	}
-
-	dout("__set_xattr_val added %llx.%llx xattr %p %s=%.*s\n",
-	     ceph_vinop(&ci->vfs_inode), xattr, name, val_len, val);
-
-	return 0;
-}
-
-static struct ceph_inode_xattr *__get_xattr(struct ceph_inode_info *ci,
-			   const char *name)
-{
-	struct rb_node **p;
-	struct rb_node *parent = NULL;
-	struct ceph_inode_xattr *xattr = NULL;
-	int name_len = strlen(name);
-	int c;
-
-	p = &ci->i_xattrs.index.rb_node;
-	while (*p) {
-		parent = *p;
-		xattr = rb_entry(parent, struct ceph_inode_xattr, node);
-		c = strncmp(name, xattr->name, xattr->name_len);
-		if (c == 0 && name_len > xattr->name_len)
-			c = 1;
-		if (c < 0)
-			p = &(*p)->rb_left;
-		else if (c > 0)
-			p = &(*p)->rb_right;
-		else {
-			dout("__get_xattr %s: found %.*s\n", name,
-			     xattr->val_len, xattr->val);
-			return xattr;
-		}
-	}
-
-	dout("__get_xattr %s: not found\n", name);
-
-	return NULL;
-}
-
-static void __free_xattr(struct ceph_inode_xattr *xattr)
-{
-	BUG_ON(!xattr);
-
-	if (xattr->should_free_name)
-		kfree((void *)xattr->name);
-	if (xattr->should_free_val)
-		kfree((void *)xattr->val);
-
-	kfree(xattr);
-}
-
-static int __remove_xattr(struct ceph_inode_info *ci,
-			  struct ceph_inode_xattr *xattr)
-{
-	if (!xattr)
-		return -EOPNOTSUPP;
-
-	rb_erase(&xattr->node, &ci->i_xattrs.index);
-
-	if (xattr->should_free_name)
-		kfree((void *)xattr->name);
-	if (xattr->should_free_val)
-		kfree((void *)xattr->val);
-
-	ci->i_xattrs.names_size -= xattr->name_len;
-	ci->i_xattrs.vals_size -= xattr->val_len;
-	ci->i_xattrs.count--;
-	kfree(xattr);
-
-	return 0;
-}
-
-static int __remove_xattr_by_name(struct ceph_inode_info *ci,
-			   const char *name)
-{
-	struct rb_node **p;
-	struct ceph_inode_xattr *xattr;
-	int err;
-
-	p = &ci->i_xattrs.index.rb_node;
-	xattr = __get_xattr(ci, name);
-	err = __remove_xattr(ci, xattr);
-	return err;
-}
-
-static char *__copy_xattr_names(struct ceph_inode_info *ci,
-				char *dest)
-{
-	struct rb_node *p;
-	struct ceph_inode_xattr *xattr = NULL;
-
-	p = rb_first(&ci->i_xattrs.index);
-	dout("__copy_xattr_names count=%d\n", ci->i_xattrs.count);
-
-	while (p) {
-		xattr = rb_entry(p, struct ceph_inode_xattr, node);
-		memcpy(dest, xattr->name, xattr->name_len);
-		dest[xattr->name_len] = '\0';
-
-		dout("dest=%s %p (%s) (%d/%d)\n", dest, xattr, xattr->name,
-		     xattr->name_len, ci->i_xattrs.names_size);
-
-		dest += xattr->name_len + 1;
-		p = rb_next(p);
-	}
-
-	return dest;
-}
-
-void __ceph_destroy_xattrs(struct ceph_inode_info *ci)
-{
-	struct rb_node *p, *tmp;
-	struct ceph_inode_xattr *xattr = NULL;
-
-	p = rb_first(&ci->i_xattrs.index);
-
-	dout("__ceph_destroy_xattrs p=%p\n", p);
-
-	while (p) {
-		xattr = rb_entry(p, struct ceph_inode_xattr, node);
-		tmp = p;
-		p = rb_next(tmp);
-		dout("__ceph_destroy_xattrs next p=%p (%.*s)\n", p,
-		     xattr->name_len, xattr->name);
-		rb_erase(tmp, &ci->i_xattrs.index);
-
-		__free_xattr(xattr);
-	}
-
-	ci->i_xattrs.names_size = 0;
-	ci->i_xattrs.vals_size = 0;
-	ci->i_xattrs.index_version = 0;
-	ci->i_xattrs.count = 0;
-	ci->i_xattrs.index = RB_ROOT;
-}
-
-static int __build_xattrs(struct inode *inode)
-	__releases(ci->i_ceph_lock)
-	__acquires(ci->i_ceph_lock)
-{
-	u32 namelen;
-	u32 numattr = 0;
-	void *p, *end;
-	u32 len;
-	const char *name, *val;
-	struct ceph_inode_info *ci = ceph_inode(inode);
-	int xattr_version;
-	struct ceph_inode_xattr **xattrs = NULL;
-	int err = 0;
-	int i;
-
-	dout("__build_xattrs() len=%d\n",
-	     ci->i_xattrs.blob ? (int)ci->i_xattrs.blob->vec.iov_len : 0);
-
-	if (ci->i_xattrs.index_version >= ci->i_xattrs.version)
-		return 0; /* already built */
-
-	__ceph_destroy_xattrs(ci);
-
-start:
-	/* updated internal xattr rb tree */
-	if (ci->i_xattrs.blob && ci->i_xattrs.blob->vec.iov_len > 4) {
-		p = ci->i_xattrs.blob->vec.iov_base;
-		end = p + ci->i_xattrs.blob->vec.iov_len;
-		ceph_decode_32_safe(&p, end, numattr, bad);
-		xattr_version = ci->i_xattrs.version;
-		spin_unlock(&ci->i_ceph_lock);
-
-		xattrs = kcalloc(numattr, sizeof(struct ceph_xattr *),
-				 GFP_NOFS);
-		err = -ENOMEM;
-		if (!xattrs)
-			goto bad_lock;
-		memset(xattrs, 0, numattr*sizeof(struct ceph_xattr *));
-		for (i = 0; i < numattr; i++) {
-			xattrs[i] = kmalloc(sizeof(struct ceph_inode_xattr),
-					    GFP_NOFS);
-			if (!xattrs[i])
-				goto bad_lock;
-		}
-
-		spin_lock(&ci->i_ceph_lock);
-		if (ci->i_xattrs.version != xattr_version) {
-			/* lost a race, retry */
-			for (i = 0; i < numattr; i++)
-				kfree(xattrs[i]);
-			kfree(xattrs);
-			goto start;
-		}
-		err = -EIO;
-		while (numattr--) {
-			ceph_decode_32_safe(&p, end, len, bad);
-			namelen = len;
-			name = p;
-			p += len;
-			ceph_decode_32_safe(&p, end, len, bad);
-			val = p;
-			p += len;
-
-			err = __set_xattr(ci, name, namelen, val, len,
-					  0, 0, 0, &xattrs[numattr]);
-
-			if (err < 0)
-				goto bad;
-		}
-		kfree(xattrs);
-	}
-	ci->i_xattrs.index_version = ci->i_xattrs.version;
-	ci->i_xattrs.dirty = false;
-
-	return err;
-bad_lock:
-	spin_lock(&ci->i_ceph_lock);
-bad:
-	if (xattrs) {
-		for (i = 0; i < numattr; i++)
-			kfree(xattrs[i]);
-		kfree(xattrs);
-	}
-	ci->i_xattrs.names_size = 0;
-	return err;
-}
-
-static int __get_required_blob_size(struct ceph_inode_info *ci, int name_size,
-				    int val_size)
-{
-	/*
-	 * 4 bytes for the length, and additional 4 bytes per each xattr name,
-	 * 4 bytes per each value
-	 */
-	int size = 4 + ci->i_xattrs.count*(4 + 4) +
-			     ci->i_xattrs.names_size +
-			     ci->i_xattrs.vals_size;
-	dout("__get_required_blob_size c=%d names.size=%d vals.size=%d\n",
-	     ci->i_xattrs.count, ci->i_xattrs.names_size,
-	     ci->i_xattrs.vals_size);
-
-	if (name_size)
-		size += 4 + 4 + name_size + val_size;
-
-	return size;
-}
-
-/*
- * If there are dirty xattrs, reencode xattrs into the prealloc_blob
- * and swap into place.
- */
-void __ceph_build_xattrs_blob(struct ceph_inode_info *ci)
-{
-	struct rb_node *p;
-	struct ceph_inode_xattr *xattr = NULL;
-	void *dest;
-
-	dout("__build_xattrs_blob %p\n", &ci->vfs_inode);
-	if (ci->i_xattrs.dirty) {
-		int need = __get_required_blob_size(ci, 0, 0);
-
-		BUG_ON(need > ci->i_xattrs.prealloc_blob->alloc_len);
-
-		p = rb_first(&ci->i_xattrs.index);
-		dest = ci->i_xattrs.prealloc_blob->vec.iov_base;
-
-		ceph_encode_32(&dest, ci->i_xattrs.count);
-		while (p) {
-			xattr = rb_entry(p, struct ceph_inode_xattr, node);
-
-			ceph_encode_32(&dest, xattr->name_len);
-			memcpy(dest, xattr->name, xattr->name_len);
-			dest += xattr->name_len;
-			ceph_encode_32(&dest, xattr->val_len);
-			memcpy(dest, xattr->val, xattr->val_len);
-			dest += xattr->val_len;
-
-			p = rb_next(p);
-		}
-
-		/* adjust buffer len; it may be larger than we need */
-		ci->i_xattrs.prealloc_blob->vec.iov_len =
-			dest - ci->i_xattrs.prealloc_blob->vec.iov_base;
-
-		if (ci->i_xattrs.blob)
-			ceph_buffer_put(ci->i_xattrs.blob);
-		ci->i_xattrs.blob = ci->i_xattrs.prealloc_blob;
-		ci->i_xattrs.prealloc_blob = NULL;
-		ci->i_xattrs.dirty = false;
-		ci->i_xattrs.version++;
-	}
-}
-
-ssize_t ceph_getxattr(struct dentry *dentry, const char *name, void *value,
-		      size_t size)
-{
-	struct inode *inode = dentry->d_inode;
-	struct ceph_inode_info *ci = ceph_inode(inode);
-	int err;
-	struct ceph_inode_xattr *xattr;
-	struct ceph_vxattr *vxattr = NULL;
-
-	if (!ceph_is_valid_xattr(name))
-		return -ENODATA;
-
-	/* let's see if a virtual xattr was requested */
-	vxattr = ceph_match_vxattr(inode, name);
-
-	spin_lock(&ci->i_ceph_lock);
-	dout("getxattr %p ver=%lld index_ver=%lld\n", inode,
-	     ci->i_xattrs.version, ci->i_xattrs.index_version);
-
-	if (__ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 1) &&
-	    (ci->i_xattrs.index_version >= ci->i_xattrs.version)) {
-		goto get_xattr;
-	} else {
-		spin_unlock(&ci->i_ceph_lock);
-		/* get xattrs from mds (if we don't already have them) */
-		err = ceph_do_getattr(inode, CEPH_STAT_CAP_XATTR);
-		if (err)
-			return err;
-	}
-
-	spin_lock(&ci->i_ceph_lock);
-
-	if (vxattr && vxattr->readonly) {
-		err = vxattr->getxattr_cb(ci, value, size);
-		goto out;
-	}
-
-	err = __build_xattrs(inode);
-	if (err < 0)
-		goto out;
-
-get_xattr:
-	err = -ENODATA;  /* == ENOATTR */
-	xattr = __get_xattr(ci, name);
-	if (!xattr) {
-		if (vxattr)
-			err = vxattr->getxattr_cb(ci, value, size);
-		goto out;
-	}
-
-	err = -ERANGE;
-	if (size && size < xattr->val_len)
-		goto out;
-
-	err = xattr->val_len;
-	if (size == 0)
-		goto out;
-
-	memcpy(value, xattr->val, xattr->val_len);
-
-out:
-	spin_unlock(&ci->i_ceph_lock);
-	return err;
-}
-
-ssize_t ceph_listxattr(struct dentry *dentry, char *names, size_t size)
-{
-	struct inode *inode = dentry->d_inode;
-	struct ceph_inode_info *ci = ceph_inode(inode);
-	struct ceph_vxattr *vxattrs = ceph_inode_vxattrs(inode);
-	u32 vir_namelen = 0;
-	u32 namelen;
-	int err;
-	u32 len;
-	int i;
-
-	spin_lock(&ci->i_ceph_lock);
-	dout("listxattr %p ver=%lld index_ver=%lld\n", inode,
-	     ci->i_xattrs.version, ci->i_xattrs.index_version);
-
-	if (__ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 1) &&
-	    (ci->i_xattrs.index_version >= ci->i_xattrs.version)) {
-		goto list_xattr;
-	} else {
-		spin_unlock(&ci->i_ceph_lock);
-		err = ceph_do_getattr(inode, CEPH_STAT_CAP_XATTR);
-		if (err)
-			return err;
-	}
-
-	spin_lock(&ci->i_ceph_lock);
-
-	err = __build_xattrs(inode);
-	if (err < 0)
-		goto out;
-
-list_xattr:
-	/*
-	 * Start with virtual dir xattr names (if any) (including
-	 * terminating '\0' characters for each).
-	 */
-	vir_namelen = ceph_vxattrs_name_size(vxattrs);
-
-	/* adding 1 byte per each variable due to the null termination */
-	namelen = vir_namelen + ci->i_xattrs.names_size + ci->i_xattrs.count;
-	err = -ERANGE;
-	if (size && namelen > size)
-		goto out;
-
-	err = namelen;
-	if (size == 0)
-		goto out;
-
-	names = __copy_xattr_names(ci, names);
-
-	/* virtual xattr names, too */
-	if (vxattrs)
-		for (i = 0; vxattrs[i].name; i++) {
-			len = sprintf(names, "%s", vxattrs[i].name);
-			names += len + 1;
-		}
-
-out:
-	spin_unlock(&ci->i_ceph_lock);
-	return err;
-}
-
-static int ceph_sync_setxattr(struct dentry *dentry, const char *name,
-			      const char *value, size_t size, int flags)
-{
-	struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb);
-	struct inode *inode = dentry->d_inode;
-	struct ceph_inode_info *ci = ceph_inode(inode);
-	struct inode *parent_inode;
-	struct ceph_mds_request *req;
-	struct ceph_mds_client *mdsc = fsc->mdsc;
-	int err;
-	int i, nr_pages;
-	struct page **pages = NULL;
-	void *kaddr;
-
-	/* copy value into some pages */
-	nr_pages = calc_pages_for(0, size);
-	if (nr_pages) {
-		pages = kmalloc(sizeof(pages[0])*nr_pages, GFP_NOFS);
-		if (!pages)
-			return -ENOMEM;
-		err = -ENOMEM;
-		for (i = 0; i < nr_pages; i++) {
-			pages[i] = __page_cache_alloc(GFP_NOFS);
-			if (!pages[i]) {
-				nr_pages = i;
-				goto out;
-			}
-			kaddr = kmap(pages[i]);
-			memcpy(kaddr, value + i*PAGE_CACHE_SIZE,
-			       min(PAGE_CACHE_SIZE, size-i*PAGE_CACHE_SIZE));
-		}
-	}
-
-	dout("setxattr value=%.*s\n", (int)size, value);
-
-	/* do request */
-	req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_SETXATTR,
-				       USE_AUTH_MDS);
-	if (IS_ERR(req)) {
-		err = PTR_ERR(req);
-		goto out;
-	}
-	req->r_inode = inode;
-	ihold(inode);
-	req->r_inode_drop = CEPH_CAP_XATTR_SHARED;
-	req->r_num_caps = 1;
-	req->r_args.setxattr.flags = cpu_to_le32(flags);
-	req->r_path2 = kstrdup(name, GFP_NOFS);
-
-	req->r_pages = pages;
-	req->r_num_pages = nr_pages;
-	req->r_data_len = size;
-
-	dout("xattr.ver (before): %lld\n", ci->i_xattrs.version);
-	parent_inode = ceph_get_dentry_parent_inode(dentry);
-	err = ceph_mdsc_do_request(mdsc, parent_inode, req);
-	iput(parent_inode);
-	ceph_mdsc_put_request(req);
-	dout("xattr.ver (after): %lld\n", ci->i_xattrs.version);
-
-out:
-	if (pages) {
-		for (i = 0; i < nr_pages; i++)
-			__free_page(pages[i]);
-		kfree(pages);
-	}
-	return err;
-}
-
-int ceph_setxattr(struct dentry *dentry, const char *name,
-		  const void *value, size_t size, int flags)
-{
-	struct inode *inode = dentry->d_inode;
-	struct ceph_vxattr *vxattr;
-	struct ceph_inode_info *ci = ceph_inode(inode);
-	int issued;
-	int err;
-	int dirty;
-	int name_len = strlen(name);
-	int val_len = size;
-	char *newname = NULL;
-	char *newval = NULL;
-	struct ceph_inode_xattr *xattr = NULL;
-	int required_blob_size;
-
-	if (ceph_snap(inode) != CEPH_NOSNAP)
-		return -EROFS;
-
-	if (!ceph_is_valid_xattr(name))
-		return -EOPNOTSUPP;
-
-	vxattr = ceph_match_vxattr(inode, name);
-	if (vxattr && vxattr->readonly)
-		return -EOPNOTSUPP;
-
-	/* preallocate memory for xattr name, value, index node */
-	err = -ENOMEM;
-	newname = kmemdup(name, name_len + 1, GFP_NOFS);
-	if (!newname)
-		goto out;
-
-	if (val_len) {
-		newval = kmemdup(value, val_len, GFP_NOFS);
-		if (!newval)
-			goto out;
-	}
-
-	xattr = kmalloc(sizeof(struct ceph_inode_xattr), GFP_NOFS);
-	if (!xattr)
-		goto out;
-
-	spin_lock(&ci->i_ceph_lock);
-retry:
-	issued = __ceph_caps_issued(ci, NULL);
-	dout("setxattr %p issued %s\n", inode, ceph_cap_string(issued));
-	if (!(issued & CEPH_CAP_XATTR_EXCL))
-		goto do_sync;
-	__build_xattrs(inode);
-
-	required_blob_size = __get_required_blob_size(ci, name_len, val_len);
-
-	if (!ci->i_xattrs.prealloc_blob ||
-	    required_blob_size > ci->i_xattrs.prealloc_blob->alloc_len) {
-		struct ceph_buffer *blob;
-
-		spin_unlock(&ci->i_ceph_lock);
-		dout(" preaallocating new blob size=%d\n", required_blob_size);
-		blob = ceph_buffer_new(required_blob_size, GFP_NOFS);
-		if (!blob)
-			goto out;
-		spin_lock(&ci->i_ceph_lock);
-		if (ci->i_xattrs.prealloc_blob)
-			ceph_buffer_put(ci->i_xattrs.prealloc_blob);
-		ci->i_xattrs.prealloc_blob = blob;
-		goto retry;
-	}
-
-	err = __set_xattr(ci, newname, name_len, newval,
-			  val_len, 1, 1, 1, &xattr);
-
-	dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_XATTR_EXCL);
-	ci->i_xattrs.dirty = true;
-	inode->i_ctime = CURRENT_TIME;
-
-	spin_unlock(&ci->i_ceph_lock);
-	if (dirty)
-		__mark_inode_dirty(inode, dirty);
-	return err;
-
-do_sync:
-	spin_unlock(&ci->i_ceph_lock);
-	err = ceph_sync_setxattr(dentry, name, value, size, flags);
-out:
-	kfree(newname);
-	kfree(newval);
-	kfree(xattr);
-	return err;
-}
-
-static int ceph_send_removexattr(struct dentry *dentry, const char *name)
-{
-	struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb);
-	struct ceph_mds_client *mdsc = fsc->mdsc;
-	struct inode *inode = dentry->d_inode;
-	struct inode *parent_inode;
-	struct ceph_mds_request *req;
-	int err;
-
-	req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_RMXATTR,
-				       USE_AUTH_MDS);
-	if (IS_ERR(req))
-		return PTR_ERR(req);
-	req->r_inode = inode;
-	ihold(inode);
-	req->r_inode_drop = CEPH_CAP_XATTR_SHARED;
-	req->r_num_caps = 1;
-	req->r_path2 = kstrdup(name, GFP_NOFS);
-
-	parent_inode = ceph_get_dentry_parent_inode(dentry);
-	err = ceph_mdsc_do_request(mdsc, parent_inode, req);
-	iput(parent_inode);
-	ceph_mdsc_put_request(req);
-	return err;
-}
-
-int ceph_removexattr(struct dentry *dentry, const char *name)
-{
-	struct inode *inode = dentry->d_inode;
-	struct ceph_vxattr *vxattr;
-	struct ceph_inode_info *ci = ceph_inode(inode);
-	int issued;
-	int err;
-	int required_blob_size;
-	int dirty;
-
-	if (ceph_snap(inode) != CEPH_NOSNAP)
-		return -EROFS;
-
-	if (!ceph_is_valid_xattr(name))
-		return -EOPNOTSUPP;
-
-	vxattr = ceph_match_vxattr(inode, name);
-	if (vxattr && vxattr->readonly)
-		return -EOPNOTSUPP;
-
-	err = -ENOMEM;
-	spin_lock(&ci->i_ceph_lock);
-retry:
-	issued = __ceph_caps_issued(ci, NULL);
-	dout("removexattr %p issued %s\n", inode, ceph_cap_string(issued));
-
-	if (!(issued & CEPH_CAP_XATTR_EXCL))
-		goto do_sync;
-	__build_xattrs(inode);
-
-	required_blob_size = __get_required_blob_size(ci, 0, 0);
-
-	if (!ci->i_xattrs.prealloc_blob ||
-	    required_blob_size > ci->i_xattrs.prealloc_blob->alloc_len) {
-		struct ceph_buffer *blob;
-
-		spin_unlock(&ci->i_ceph_lock);
-		dout(" preaallocating new blob size=%d\n", required_blob_size);
-		blob = ceph_buffer_new(required_blob_size, GFP_NOFS);
-		if (!blob)
-			goto out;
-		spin_lock(&ci->i_ceph_lock);
-		if (ci->i_xattrs.prealloc_blob)
-			ceph_buffer_put(ci->i_xattrs.prealloc_blob);
-		ci->i_xattrs.prealloc_blob = blob;
-		goto retry;
-	}
-
-	err = __remove_xattr_by_name(ceph_inode(inode), name);
-
-	dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_XATTR_EXCL);
-	ci->i_xattrs.dirty = true;
-	inode->i_ctime = CURRENT_TIME;
-	spin_unlock(&ci->i_ceph_lock);
-	if (dirty)
-		__mark_inode_dirty(inode, dirty);
-	return err;
-do_sync:
-	spin_unlock(&ci->i_ceph_lock);
-	err = ceph_send_removexattr(dentry, name);
-out:
-	return err;
-}
-