summaryrefslogtreecommitdiff
path: root/ANDROID_3.4.5/fs/btrfs
diff options
context:
space:
mode:
Diffstat (limited to 'ANDROID_3.4.5/fs/btrfs')
-rw-r--r--ANDROID_3.4.5/fs/btrfs/Kconfig52
-rw-r--r--ANDROID_3.4.5/fs/btrfs/Makefile14
-rw-r--r--ANDROID_3.4.5/fs/btrfs/acl.c273
-rw-r--r--ANDROID_3.4.5/fs/btrfs/async-thread.c707
-rw-r--r--ANDROID_3.4.5/fs/btrfs/async-thread.h119
-rw-r--r--ANDROID_3.4.5/fs/btrfs/backref.c1432
-rw-r--r--ANDROID_3.4.5/fs/btrfs/backref.h68
-rw-r--r--ANDROID_3.4.5/fs/btrfs/btrfs_inode.h205
-rw-r--r--ANDROID_3.4.5/fs/btrfs/check-integrity.c3068
-rw-r--r--ANDROID_3.4.5/fs/btrfs/check-integrity.h36
-rw-r--r--ANDROID_3.4.5/fs/btrfs/compat.h7
-rw-r--r--ANDROID_3.4.5/fs/btrfs/compression.c1038
-rw-r--r--ANDROID_3.4.5/fs/btrfs/compression.h83
-rw-r--r--ANDROID_3.4.5/fs/btrfs/ctree.c4382
-rw-r--r--ANDROID_3.4.5/fs/btrfs/ctree.h3101
-rw-r--r--ANDROID_3.4.5/fs/btrfs/delayed-inode.c1881
-rw-r--r--ANDROID_3.4.5/fs/btrfs/delayed-inode.h145
-rw-r--r--ANDROID_3.4.5/fs/btrfs/delayed-ref.c759
-rw-r--r--ANDROID_3.4.5/fs/btrfs/delayed-ref.h283
-rw-r--r--ANDROID_3.4.5/fs/btrfs/dir-item.c422
-rw-r--r--ANDROID_3.4.5/fs/btrfs/disk-io.c3693
-rw-r--r--ANDROID_3.4.5/fs/btrfs/disk-io.h106
-rw-r--r--ANDROID_3.4.5/fs/btrfs/export.c317
-rw-r--r--ANDROID_3.4.5/fs/btrfs/export.h19
-rw-r--r--ANDROID_3.4.5/fs/btrfs/extent-tree.c8025
-rw-r--r--ANDROID_3.4.5/fs/btrfs/extent_io.c4891
-rw-r--r--ANDROID_3.4.5/fs/btrfs/extent_io.h331
-rw-r--r--ANDROID_3.4.5/fs/btrfs/extent_map.c363
-rw-r--r--ANDROID_3.4.5/fs/btrfs/extent_map.h66
-rw-r--r--ANDROID_3.4.5/fs/btrfs/file-item.c861
-rw-r--r--ANDROID_3.4.5/fs/btrfs/file.c1908
-rw-r--r--ANDROID_3.4.5/fs/btrfs/free-space-cache.c2943
-rw-r--r--ANDROID_3.4.5/fs/btrfs/free-space-cache.h113
-rw-r--r--ANDROID_3.4.5/fs/btrfs/hash.h27
-rw-r--r--ANDROID_3.4.5/fs/btrfs/inode-item.c236
-rw-r--r--ANDROID_3.4.5/fs/btrfs/inode-map.c576
-rw-r--r--ANDROID_3.4.5/fs/btrfs/inode-map.h13
-rw-r--r--ANDROID_3.4.5/fs/btrfs/inode.c7681
-rw-r--r--ANDROID_3.4.5/fs/btrfs/ioctl.c3430
-rw-r--r--ANDROID_3.4.5/fs/btrfs/ioctl.h334
-rw-r--r--ANDROID_3.4.5/fs/btrfs/locking.c267
-rw-r--r--ANDROID_3.4.5/fs/btrfs/locking.h61
-rw-r--r--ANDROID_3.4.5/fs/btrfs/lzo.c427
-rw-r--r--ANDROID_3.4.5/fs/btrfs/ordered-data.c977
-rw-r--r--ANDROID_3.4.5/fs/btrfs/ordered-data.h179
-rw-r--r--ANDROID_3.4.5/fs/btrfs/orphan.c91
-rw-r--r--ANDROID_3.4.5/fs/btrfs/print-tree.c342
-rw-r--r--ANDROID_3.4.5/fs/btrfs/print-tree.h23
-rw-r--r--ANDROID_3.4.5/fs/btrfs/reada.c961
-rw-r--r--ANDROID_3.4.5/fs/btrfs/relocation.c4464
-rw-r--r--ANDROID_3.4.5/fs/btrfs/root-tree.c456
-rw-r--r--ANDROID_3.4.5/fs/btrfs/scrub.c2440
-rw-r--r--ANDROID_3.4.5/fs/btrfs/struct-funcs.c140
-rw-r--r--ANDROID_3.4.5/fs/btrfs/super.c1578
-rw-r--r--ANDROID_3.4.5/fs/btrfs/sysfs.c46
-rw-r--r--ANDROID_3.4.5/fs/btrfs/transaction.c1539
-rw-r--r--ANDROID_3.4.5/fs/btrfs/transaction.h120
-rw-r--r--ANDROID_3.4.5/fs/btrfs/tree-defrag.c145
-rw-r--r--ANDROID_3.4.5/fs/btrfs/tree-log.c3398
-rw-r--r--ANDROID_3.4.5/fs/btrfs/tree-log.h52
-rw-r--r--ANDROID_3.4.5/fs/btrfs/ulist.c220
-rw-r--r--ANDROID_3.4.5/fs/btrfs/ulist.h68
-rw-r--r--ANDROID_3.4.5/fs/btrfs/version.h4
-rw-r--r--ANDROID_3.4.5/fs/btrfs/volumes.c4585
-rw-r--r--ANDROID_3.4.5/fs/btrfs/volumes.h284
-rw-r--r--ANDROID_3.4.5/fs/btrfs/xattr.c429
-rw-r--r--ANDROID_3.4.5/fs/btrfs/xattr.h43
-rw-r--r--ANDROID_3.4.5/fs/btrfs/zlib.c399
68 files changed, 0 insertions, 77746 deletions
diff --git a/ANDROID_3.4.5/fs/btrfs/Kconfig b/ANDROID_3.4.5/fs/btrfs/Kconfig
deleted file mode 100644
index d33f01c0..00000000
--- a/ANDROID_3.4.5/fs/btrfs/Kconfig
+++ /dev/null
@@ -1,52 +0,0 @@
-config BTRFS_FS
- tristate "Btrfs filesystem (EXPERIMENTAL) Unstable disk format"
- depends on EXPERIMENTAL
- select LIBCRC32C
- select ZLIB_INFLATE
- select ZLIB_DEFLATE
- select LZO_COMPRESS
- select LZO_DECOMPRESS
- help
- Btrfs is a new filesystem with extents, writable snapshotting,
- support for multiple devices and many more features.
-
- Btrfs is highly experimental, and THE DISK FORMAT IS NOT YET
- FINALIZED. You should say N here unless you are interested in
- testing Btrfs with non-critical data.
-
- To compile this file system support as a module, choose M here. The
- module will be called btrfs.
-
- If unsure, say N.
-
-config BTRFS_FS_POSIX_ACL
- bool "Btrfs POSIX Access Control Lists"
- depends on BTRFS_FS
- select FS_POSIX_ACL
- help
- POSIX Access Control Lists (ACLs) support permissions for users and
- groups beyond the owner/group/world scheme.
-
- To learn more about Access Control Lists, visit the POSIX ACLs for
- Linux website <http://acl.bestbits.at/>.
-
- If you don't know what Access Control Lists are, say N
-
-config BTRFS_FS_CHECK_INTEGRITY
- bool "Btrfs with integrity check tool compiled in (DANGEROUS)"
- depends on BTRFS_FS
- help
- Adds code that examines all block write requests (including
- writes of the super block). The goal is to verify that the
- state of the filesystem on disk is always consistent, i.e.,
- after a power-loss or kernel panic event the filesystem is
- in a consistent state.
-
- If the integrity check tool is included and activated in
- the mount options, plenty of kernel memory is used, and
- plenty of additional CPU cycles are spent. Enabling this
- functionality is not intended for normal use.
-
- In most cases, unless you are a btrfs developer who needs
- to verify the integrity of (super)-block write requests
- during the run of a regression test, say N
diff --git a/ANDROID_3.4.5/fs/btrfs/Makefile b/ANDROID_3.4.5/fs/btrfs/Makefile
deleted file mode 100644
index 0c4fa2be..00000000
--- a/ANDROID_3.4.5/fs/btrfs/Makefile
+++ /dev/null
@@ -1,14 +0,0 @@
-
-obj-$(CONFIG_BTRFS_FS) := btrfs.o
-
-btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
- file-item.o inode-item.o inode-map.o disk-io.o \
- transaction.o inode.o file.o tree-defrag.o \
- extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \
- extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \
- export.o tree-log.o free-space-cache.o zlib.o lzo.o \
- compression.o delayed-ref.o relocation.o delayed-inode.o scrub.o \
- reada.o backref.o ulist.o
-
-btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o
-btrfs-$(CONFIG_BTRFS_FS_CHECK_INTEGRITY) += check-integrity.o
diff --git a/ANDROID_3.4.5/fs/btrfs/acl.c b/ANDROID_3.4.5/fs/btrfs/acl.c
deleted file mode 100644
index 89b156d8..00000000
--- a/ANDROID_3.4.5/fs/btrfs/acl.c
+++ /dev/null
@@ -1,273 +0,0 @@
-/*
- * Copyright (C) 2007 Red Hat. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License v2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
- */
-
-#include <linux/fs.h>
-#include <linux/string.h>
-#include <linux/xattr.h>
-#include <linux/posix_acl_xattr.h>
-#include <linux/posix_acl.h>
-#include <linux/sched.h>
-#include <linux/slab.h>
-
-#include "ctree.h"
-#include "btrfs_inode.h"
-#include "xattr.h"
-
-struct posix_acl *btrfs_get_acl(struct inode *inode, int type)
-{
- int size;
- const char *name;
- char *value = NULL;
- struct posix_acl *acl;
-
- if (!IS_POSIXACL(inode))
- return NULL;
-
- acl = get_cached_acl(inode, type);
- if (acl != ACL_NOT_CACHED)
- return acl;
-
- switch (type) {
- case ACL_TYPE_ACCESS:
- name = POSIX_ACL_XATTR_ACCESS;
- break;
- case ACL_TYPE_DEFAULT:
- name = POSIX_ACL_XATTR_DEFAULT;
- break;
- default:
- BUG();
- }
-
- size = __btrfs_getxattr(inode, name, "", 0);
- if (size > 0) {
- value = kzalloc(size, GFP_NOFS);
- if (!value)
- return ERR_PTR(-ENOMEM);
- size = __btrfs_getxattr(inode, name, value, size);
- }
- if (size > 0) {
- acl = posix_acl_from_xattr(value, size);
- } else if (size == -ENOENT || size == -ENODATA || size == 0) {
- /* FIXME, who returns -ENOENT? I think nobody */
- acl = NULL;
- } else {
- acl = ERR_PTR(-EIO);
- }
- kfree(value);
-
- if (!IS_ERR(acl))
- set_cached_acl(inode, type, acl);
-
- return acl;
-}
-
-static int btrfs_xattr_acl_get(struct dentry *dentry, const char *name,
- void *value, size_t size, int type)
-{
- struct posix_acl *acl;
- int ret = 0;
-
- if (!IS_POSIXACL(dentry->d_inode))
- return -EOPNOTSUPP;
-
- acl = btrfs_get_acl(dentry->d_inode, type);
-
- if (IS_ERR(acl))
- return PTR_ERR(acl);
- if (acl == NULL)
- return -ENODATA;
- ret = posix_acl_to_xattr(acl, value, size);
- posix_acl_release(acl);
-
- return ret;
-}
-
-/*
- * Needs to be called with fs_mutex held
- */
-static int btrfs_set_acl(struct btrfs_trans_handle *trans,
- struct inode *inode, struct posix_acl *acl, int type)
-{
- int ret, size = 0;
- const char *name;
- char *value = NULL;
-
- if (acl) {
- ret = posix_acl_valid(acl);
- if (ret < 0)
- return ret;
- ret = 0;
- }
-
- switch (type) {
- case ACL_TYPE_ACCESS:
- name = POSIX_ACL_XATTR_ACCESS;
- if (acl) {
- ret = posix_acl_equiv_mode(acl, &inode->i_mode);
- if (ret < 0)
- return ret;
- }
- ret = 0;
- break;
- case ACL_TYPE_DEFAULT:
- if (!S_ISDIR(inode->i_mode))
- return acl ? -EINVAL : 0;
- name = POSIX_ACL_XATTR_DEFAULT;
- break;
- default:
- return -EINVAL;
- }
-
- if (acl) {
- size = posix_acl_xattr_size(acl->a_count);
- value = kmalloc(size, GFP_NOFS);
- if (!value) {
- ret = -ENOMEM;
- goto out;
- }
-
- ret = posix_acl_to_xattr(acl, value, size);
- if (ret < 0)
- goto out;
- }
-
- ret = __btrfs_setxattr(trans, inode, name, value, size, 0);
-out:
- kfree(value);
-
- if (!ret)
- set_cached_acl(inode, type, acl);
-
- return ret;
-}
-
-static int btrfs_xattr_acl_set(struct dentry *dentry, const char *name,
- const void *value, size_t size, int flags, int type)
-{
- int ret;
- struct posix_acl *acl = NULL;
-
- if (!inode_owner_or_capable(dentry->d_inode))
- return -EPERM;
-
- if (!IS_POSIXACL(dentry->d_inode))
- return -EOPNOTSUPP;
-
- if (value) {
- acl = posix_acl_from_xattr(value, size);
- if (IS_ERR(acl))
- return PTR_ERR(acl);
-
- if (acl) {
- ret = posix_acl_valid(acl);
- if (ret)
- goto out;
- }
- }
-
- ret = btrfs_set_acl(NULL, dentry->d_inode, acl, type);
-out:
- posix_acl_release(acl);
-
- return ret;
-}
-
-/*
- * btrfs_init_acl is already generally called under fs_mutex, so the locking
- * stuff has been fixed to work with that. If the locking stuff changes, we
- * need to re-evaluate the acl locking stuff.
- */
-int btrfs_init_acl(struct btrfs_trans_handle *trans,
- struct inode *inode, struct inode *dir)
-{
- struct posix_acl *acl = NULL;
- int ret = 0;
-
- /* this happens with subvols */
- if (!dir)
- return 0;
-
- if (!S_ISLNK(inode->i_mode)) {
- if (IS_POSIXACL(dir)) {
- acl = btrfs_get_acl(dir, ACL_TYPE_DEFAULT);
- if (IS_ERR(acl))
- return PTR_ERR(acl);
- }
-
- if (!acl)
- inode->i_mode &= ~current_umask();
- }
-
- if (IS_POSIXACL(dir) && acl) {
- if (S_ISDIR(inode->i_mode)) {
- ret = btrfs_set_acl(trans, inode, acl,
- ACL_TYPE_DEFAULT);
- if (ret)
- goto failed;
- }
- ret = posix_acl_create(&acl, GFP_NOFS, &inode->i_mode);
- if (ret < 0)
- return ret;
-
- if (ret > 0) {
- /* we need an acl */
- ret = btrfs_set_acl(trans, inode, acl, ACL_TYPE_ACCESS);
- }
- }
-failed:
- posix_acl_release(acl);
-
- return ret;
-}
-
-int btrfs_acl_chmod(struct inode *inode)
-{
- struct posix_acl *acl;
- int ret = 0;
-
- if (S_ISLNK(inode->i_mode))
- return -EOPNOTSUPP;
-
- if (!IS_POSIXACL(inode))
- return 0;
-
- acl = btrfs_get_acl(inode, ACL_TYPE_ACCESS);
- if (IS_ERR_OR_NULL(acl))
- return PTR_ERR(acl);
-
- ret = posix_acl_chmod(&acl, GFP_KERNEL, inode->i_mode);
- if (ret)
- return ret;
- ret = btrfs_set_acl(NULL, inode, acl, ACL_TYPE_ACCESS);
- posix_acl_release(acl);
- return ret;
-}
-
-const struct xattr_handler btrfs_xattr_acl_default_handler = {
- .prefix = POSIX_ACL_XATTR_DEFAULT,
- .flags = ACL_TYPE_DEFAULT,
- .get = btrfs_xattr_acl_get,
- .set = btrfs_xattr_acl_set,
-};
-
-const struct xattr_handler btrfs_xattr_acl_access_handler = {
- .prefix = POSIX_ACL_XATTR_ACCESS,
- .flags = ACL_TYPE_ACCESS,
- .get = btrfs_xattr_acl_get,
- .set = btrfs_xattr_acl_set,
-};
diff --git a/ANDROID_3.4.5/fs/btrfs/async-thread.c b/ANDROID_3.4.5/fs/btrfs/async-thread.c
deleted file mode 100644
index 42704149..00000000
--- a/ANDROID_3.4.5/fs/btrfs/async-thread.c
+++ /dev/null
@@ -1,707 +0,0 @@
-/*
- * Copyright (C) 2007 Oracle. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License v2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
- */
-
-#include <linux/kthread.h>
-#include <linux/slab.h>
-#include <linux/list.h>
-#include <linux/spinlock.h>
-#include <linux/freezer.h>
-#include "async-thread.h"
-
-#define WORK_QUEUED_BIT 0
-#define WORK_DONE_BIT 1
-#define WORK_ORDER_DONE_BIT 2
-#define WORK_HIGH_PRIO_BIT 3
-
-/*
- * container for the kthread task pointer and the list of pending work
- * One of these is allocated per thread.
- */
-struct btrfs_worker_thread {
- /* pool we belong to */
- struct btrfs_workers *workers;
-
- /* list of struct btrfs_work that are waiting for service */
- struct list_head pending;
- struct list_head prio_pending;
-
- /* list of worker threads from struct btrfs_workers */
- struct list_head worker_list;
-
- /* kthread */
- struct task_struct *task;
-
- /* number of things on the pending list */
- atomic_t num_pending;
-
- /* reference counter for this struct */
- atomic_t refs;
-
- unsigned long sequence;
-
- /* protects the pending list. */
- spinlock_t lock;
-
- /* set to non-zero when this thread is already awake and kicking */
- int working;
-
- /* are we currently idle */
- int idle;
-};
-
-static int __btrfs_start_workers(struct btrfs_workers *workers);
-
-/*
- * btrfs_start_workers uses kthread_run, which can block waiting for memory
- * for a very long time. It will actually throttle on page writeback,
- * and so it may not make progress until after our btrfs worker threads
- * process all of the pending work structs in their queue
- *
- * This means we can't use btrfs_start_workers from inside a btrfs worker
- * thread that is used as part of cleaning dirty memory, which pretty much
- * involves all of the worker threads.
- *
- * Instead we have a helper queue who never has more than one thread
- * where we scheduler thread start operations. This worker_start struct
- * is used to contain the work and hold a pointer to the queue that needs
- * another worker.
- */
-struct worker_start {
- struct btrfs_work work;
- struct btrfs_workers *queue;
-};
-
-static void start_new_worker_func(struct btrfs_work *work)
-{
- struct worker_start *start;
- start = container_of(work, struct worker_start, work);
- __btrfs_start_workers(start->queue);
- kfree(start);
-}
-
-/*
- * helper function to move a thread onto the idle list after it
- * has finished some requests.
- */
-static void check_idle_worker(struct btrfs_worker_thread *worker)
-{
- if (!worker->idle && atomic_read(&worker->num_pending) <
- worker->workers->idle_thresh / 2) {
- unsigned long flags;
- spin_lock_irqsave(&worker->workers->lock, flags);
- worker->idle = 1;
-
- /* the list may be empty if the worker is just starting */
- if (!list_empty(&worker->worker_list)) {
- list_move(&worker->worker_list,
- &worker->workers->idle_list);
- }
- spin_unlock_irqrestore(&worker->workers->lock, flags);
- }
-}
-
-/*
- * helper function to move a thread off the idle list after new
- * pending work is added.
- */
-static void check_busy_worker(struct btrfs_worker_thread *worker)
-{
- if (worker->idle && atomic_read(&worker->num_pending) >=
- worker->workers->idle_thresh) {
- unsigned long flags;
- spin_lock_irqsave(&worker->workers->lock, flags);
- worker->idle = 0;
-
- if (!list_empty(&worker->worker_list)) {
- list_move_tail(&worker->worker_list,
- &worker->workers->worker_list);
- }
- spin_unlock_irqrestore(&worker->workers->lock, flags);
- }
-}
-
-static void check_pending_worker_creates(struct btrfs_worker_thread *worker)
-{
- struct btrfs_workers *workers = worker->workers;
- struct worker_start *start;
- unsigned long flags;
-
- rmb();
- if (!workers->atomic_start_pending)
- return;
-
- start = kzalloc(sizeof(*start), GFP_NOFS);
- if (!start)
- return;
-
- start->work.func = start_new_worker_func;
- start->queue = workers;
-
- spin_lock_irqsave(&workers->lock, flags);
- if (!workers->atomic_start_pending)
- goto out;
-
- workers->atomic_start_pending = 0;
- if (workers->num_workers + workers->num_workers_starting >=
- workers->max_workers)
- goto out;
-
- workers->num_workers_starting += 1;
- spin_unlock_irqrestore(&workers->lock, flags);
- btrfs_queue_worker(workers->atomic_worker_start, &start->work);
- return;
-
-out:
- kfree(start);
- spin_unlock_irqrestore(&workers->lock, flags);
-}
-
-static noinline void run_ordered_completions(struct btrfs_workers *workers,
- struct btrfs_work *work)
-{
- if (!workers->ordered)
- return;
-
- set_bit(WORK_DONE_BIT, &work->flags);
-
- spin_lock(&workers->order_lock);
-
- while (1) {
- if (!list_empty(&workers->prio_order_list)) {
- work = list_entry(workers->prio_order_list.next,
- struct btrfs_work, order_list);
- } else if (!list_empty(&workers->order_list)) {
- work = list_entry(workers->order_list.next,
- struct btrfs_work, order_list);
- } else {
- break;
- }
- if (!test_bit(WORK_DONE_BIT, &work->flags))
- break;
-
- /* we are going to call the ordered done function, but
- * we leave the work item on the list as a barrier so
- * that later work items that are done don't have their
- * functions called before this one returns
- */
- if (test_and_set_bit(WORK_ORDER_DONE_BIT, &work->flags))
- break;
-
- spin_unlock(&workers->order_lock);
-
- work->ordered_func(work);
-
- /* now take the lock again and call the freeing code */
- spin_lock(&workers->order_lock);
- list_del(&work->order_list);
- work->ordered_free(work);
- }
-
- spin_unlock(&workers->order_lock);
-}
-
-static void put_worker(struct btrfs_worker_thread *worker)
-{
- if (atomic_dec_and_test(&worker->refs))
- kfree(worker);
-}
-
-static int try_worker_shutdown(struct btrfs_worker_thread *worker)
-{
- int freeit = 0;
-
- spin_lock_irq(&worker->lock);
- spin_lock(&worker->workers->lock);
- if (worker->workers->num_workers > 1 &&
- worker->idle &&
- !worker->working &&
- !list_empty(&worker->worker_list) &&
- list_empty(&worker->prio_pending) &&
- list_empty(&worker->pending) &&
- atomic_read(&worker->num_pending) == 0) {
- freeit = 1;
- list_del_init(&worker->worker_list);
- worker->workers->num_workers--;
- }
- spin_unlock(&worker->workers->lock);
- spin_unlock_irq(&worker->lock);
-
- if (freeit)
- put_worker(worker);
- return freeit;
-}
-
-static struct btrfs_work *get_next_work(struct btrfs_worker_thread *worker,
- struct list_head *prio_head,
- struct list_head *head)
-{
- struct btrfs_work *work = NULL;
- struct list_head *cur = NULL;
-
- if(!list_empty(prio_head))
- cur = prio_head->next;
-
- smp_mb();
- if (!list_empty(&worker->prio_pending))
- goto refill;
-
- if (!list_empty(head))
- cur = head->next;
-
- if (cur)
- goto out;
-
-refill:
- spin_lock_irq(&worker->lock);
- list_splice_tail_init(&worker->prio_pending, prio_head);
- list_splice_tail_init(&worker->pending, head);
-
- if (!list_empty(prio_head))
- cur = prio_head->next;
- else if (!list_empty(head))
- cur = head->next;
- spin_unlock_irq(&worker->lock);
-
- if (!cur)
- goto out_fail;
-
-out:
- work = list_entry(cur, struct btrfs_work, list);
-
-out_fail:
- return work;
-}
-
-/*
- * main loop for servicing work items
- */
-static int worker_loop(void *arg)
-{
- struct btrfs_worker_thread *worker = arg;
- struct list_head head;
- struct list_head prio_head;
- struct btrfs_work *work;
-
- INIT_LIST_HEAD(&head);
- INIT_LIST_HEAD(&prio_head);
-
- do {
-again:
- while (1) {
-
-
- work = get_next_work(worker, &prio_head, &head);
- if (!work)
- break;
-
- list_del(&work->list);
- clear_bit(WORK_QUEUED_BIT, &work->flags);
-
- work->worker = worker;
-
- work->func(work);
-
- atomic_dec(&worker->num_pending);
- /*
- * unless this is an ordered work queue,
- * 'work' was probably freed by func above.
- */
- run_ordered_completions(worker->workers, work);
-
- check_pending_worker_creates(worker);
- cond_resched();
- }
-
- spin_lock_irq(&worker->lock);
- check_idle_worker(worker);
-
- if (freezing(current)) {
- worker->working = 0;
- spin_unlock_irq(&worker->lock);
- try_to_freeze();
- } else {
- spin_unlock_irq(&worker->lock);
- if (!kthread_should_stop()) {
- cpu_relax();
- /*
- * we've dropped the lock, did someone else
- * jump_in?
- */
- smp_mb();
- if (!list_empty(&worker->pending) ||
- !list_empty(&worker->prio_pending))
- continue;
-
- /*
- * this short schedule allows more work to
- * come in without the queue functions
- * needing to go through wake_up_process()
- *
- * worker->working is still 1, so nobody
- * is going to try and wake us up
- */
- schedule_timeout(1);
- smp_mb();
- if (!list_empty(&worker->pending) ||
- !list_empty(&worker->prio_pending))
- continue;
-
- if (kthread_should_stop())
- break;
-
- /* still no more work?, sleep for real */
- spin_lock_irq(&worker->lock);
- set_current_state(TASK_INTERRUPTIBLE);
- if (!list_empty(&worker->pending) ||
- !list_empty(&worker->prio_pending)) {
- spin_unlock_irq(&worker->lock);
- set_current_state(TASK_RUNNING);
- goto again;
- }
-
- /*
- * this makes sure we get a wakeup when someone
- * adds something new to the queue
- */
- worker->working = 0;
- spin_unlock_irq(&worker->lock);
-
- if (!kthread_should_stop()) {
- schedule_timeout(HZ * 120);
- if (!worker->working &&
- try_worker_shutdown(worker)) {
- return 0;
- }
- }
- }
- __set_current_state(TASK_RUNNING);
- }
- } while (!kthread_should_stop());
- return 0;
-}
-
-/*
- * this will wait for all the worker threads to shutdown
- */
-void btrfs_stop_workers(struct btrfs_workers *workers)
-{
- struct list_head *cur;
- struct btrfs_worker_thread *worker;
- int can_stop;
-
- spin_lock_irq(&workers->lock);
- list_splice_init(&workers->idle_list, &workers->worker_list);
- while (!list_empty(&workers->worker_list)) {
- cur = workers->worker_list.next;
- worker = list_entry(cur, struct btrfs_worker_thread,
- worker_list);
-
- atomic_inc(&worker->refs);
- workers->num_workers -= 1;
- if (!list_empty(&worker->worker_list)) {
- list_del_init(&worker->worker_list);
- put_worker(worker);
- can_stop = 1;
- } else
- can_stop = 0;
- spin_unlock_irq(&workers->lock);
- if (can_stop)
- kthread_stop(worker->task);
- spin_lock_irq(&workers->lock);
- put_worker(worker);
- }
- spin_unlock_irq(&workers->lock);
-}
-
-/*
- * simple init on struct btrfs_workers
- */
-void btrfs_init_workers(struct btrfs_workers *workers, char *name, int max,
- struct btrfs_workers *async_helper)
-{
- workers->num_workers = 0;
- workers->num_workers_starting = 0;
- INIT_LIST_HEAD(&workers->worker_list);
- INIT_LIST_HEAD(&workers->idle_list);
- INIT_LIST_HEAD(&workers->order_list);
- INIT_LIST_HEAD(&workers->prio_order_list);
- spin_lock_init(&workers->lock);
- spin_lock_init(&workers->order_lock);
- workers->max_workers = max;
- workers->idle_thresh = 32;
- workers->name = name;
- workers->ordered = 0;
- workers->atomic_start_pending = 0;
- workers->atomic_worker_start = async_helper;
-}
-
-/*
- * starts new worker threads. This does not enforce the max worker
- * count in case you need to temporarily go past it.
- */
-static int __btrfs_start_workers(struct btrfs_workers *workers)
-{
- struct btrfs_worker_thread *worker;
- int ret = 0;
-
- worker = kzalloc(sizeof(*worker), GFP_NOFS);
- if (!worker) {
- ret = -ENOMEM;
- goto fail;
- }
-
- INIT_LIST_HEAD(&worker->pending);
- INIT_LIST_HEAD(&worker->prio_pending);
- INIT_LIST_HEAD(&worker->worker_list);
- spin_lock_init(&worker->lock);
-
- atomic_set(&worker->num_pending, 0);
- atomic_set(&worker->refs, 1);
- worker->workers = workers;
- worker->task = kthread_run(worker_loop, worker,
- "btrfs-%s-%d", workers->name,
- workers->num_workers + 1);
- if (IS_ERR(worker->task)) {
- ret = PTR_ERR(worker->task);
- kfree(worker);
- goto fail;
- }
- spin_lock_irq(&workers->lock);
- list_add_tail(&worker->worker_list, &workers->idle_list);
- worker->idle = 1;
- workers->num_workers++;
- workers->num_workers_starting--;
- WARN_ON(workers->num_workers_starting < 0);
- spin_unlock_irq(&workers->lock);
-
- return 0;
-fail:
- spin_lock_irq(&workers->lock);
- workers->num_workers_starting--;
- spin_unlock_irq(&workers->lock);
- return ret;
-}
-
-int btrfs_start_workers(struct btrfs_workers *workers)
-{
- spin_lock_irq(&workers->lock);
- workers->num_workers_starting++;
- spin_unlock_irq(&workers->lock);
- return __btrfs_start_workers(workers);
-}
-
-/*
- * run through the list and find a worker thread that doesn't have a lot
- * to do right now. This can return null if we aren't yet at the thread
- * count limit and all of the threads are busy.
- */
-static struct btrfs_worker_thread *next_worker(struct btrfs_workers *workers)
-{
- struct btrfs_worker_thread *worker;
- struct list_head *next;
- int enforce_min;
-
- enforce_min = (workers->num_workers + workers->num_workers_starting) <
- workers->max_workers;
-
- /*
- * if we find an idle thread, don't move it to the end of the
- * idle list. This improves the chance that the next submission
- * will reuse the same thread, and maybe catch it while it is still
- * working
- */
- if (!list_empty(&workers->idle_list)) {
- next = workers->idle_list.next;
- worker = list_entry(next, struct btrfs_worker_thread,
- worker_list);
- return worker;
- }
- if (enforce_min || list_empty(&workers->worker_list))
- return NULL;
-
- /*
- * if we pick a busy task, move the task to the end of the list.
- * hopefully this will keep things somewhat evenly balanced.
- * Do the move in batches based on the sequence number. This groups
- * requests submitted at roughly the same time onto the same worker.
- */
- next = workers->worker_list.next;
- worker = list_entry(next, struct btrfs_worker_thread, worker_list);
- worker->sequence++;
-
- if (worker->sequence % workers->idle_thresh == 0)
- list_move_tail(next, &workers->worker_list);
- return worker;
-}
-
-/*
- * selects a worker thread to take the next job. This will either find
- * an idle worker, start a new worker up to the max count, or just return
- * one of the existing busy workers.
- */
-static struct btrfs_worker_thread *find_worker(struct btrfs_workers *workers)
-{
- struct btrfs_worker_thread *worker;
- unsigned long flags;
- struct list_head *fallback;
- int ret;
-
- spin_lock_irqsave(&workers->lock, flags);
-again:
- worker = next_worker(workers);
-
- if (!worker) {
- if (workers->num_workers + workers->num_workers_starting >=
- workers->max_workers) {
- goto fallback;
- } else if (workers->atomic_worker_start) {
- workers->atomic_start_pending = 1;
- goto fallback;
- } else {
- workers->num_workers_starting++;
- spin_unlock_irqrestore(&workers->lock, flags);
- /* we're below the limit, start another worker */
- ret = __btrfs_start_workers(workers);
- spin_lock_irqsave(&workers->lock, flags);
- if (ret)
- goto fallback;
- goto again;
- }
- }
- goto found;
-
-fallback:
- fallback = NULL;
- /*
- * we have failed to find any workers, just
- * return the first one we can find.
- */
- if (!list_empty(&workers->worker_list))
- fallback = workers->worker_list.next;
- if (!list_empty(&workers->idle_list))
- fallback = workers->idle_list.next;
- BUG_ON(!fallback);
- worker = list_entry(fallback,
- struct btrfs_worker_thread, worker_list);
-found:
- /*
- * this makes sure the worker doesn't exit before it is placed
- * onto a busy/idle list
- */
- atomic_inc(&worker->num_pending);
- spin_unlock_irqrestore(&workers->lock, flags);
- return worker;
-}
-
-/*
- * btrfs_requeue_work just puts the work item back on the tail of the list
- * it was taken from. It is intended for use with long running work functions
- * that make some progress and want to give the cpu up for others.
- */
-void btrfs_requeue_work(struct btrfs_work *work)
-{
- struct btrfs_worker_thread *worker = work->worker;
- unsigned long flags;
- int wake = 0;
-
- if (test_and_set_bit(WORK_QUEUED_BIT, &work->flags))
- return;
-
- spin_lock_irqsave(&worker->lock, flags);
- if (test_bit(WORK_HIGH_PRIO_BIT, &work->flags))
- list_add_tail(&work->list, &worker->prio_pending);
- else
- list_add_tail(&work->list, &worker->pending);
- atomic_inc(&worker->num_pending);
-
- /* by definition we're busy, take ourselves off the idle
- * list
- */
- if (worker->idle) {
- spin_lock(&worker->workers->lock);
- worker->idle = 0;
- list_move_tail(&worker->worker_list,
- &worker->workers->worker_list);
- spin_unlock(&worker->workers->lock);
- }
- if (!worker->working) {
- wake = 1;
- worker->working = 1;
- }
-
- if (wake)
- wake_up_process(worker->task);
- spin_unlock_irqrestore(&worker->lock, flags);
-}
-
-void btrfs_set_work_high_prio(struct btrfs_work *work)
-{
- set_bit(WORK_HIGH_PRIO_BIT, &work->flags);
-}
-
-/*
- * places a struct btrfs_work into the pending queue of one of the kthreads
- */
-void btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work)
-{
- struct btrfs_worker_thread *worker;
- unsigned long flags;
- int wake = 0;
-
- /* don't requeue something already on a list */
- if (test_and_set_bit(WORK_QUEUED_BIT, &work->flags))
- return;
-
- worker = find_worker(workers);
- if (workers->ordered) {
- /*
- * you're not allowed to do ordered queues from an
- * interrupt handler
- */
- spin_lock(&workers->order_lock);
- if (test_bit(WORK_HIGH_PRIO_BIT, &work->flags)) {
- list_add_tail(&work->order_list,
- &workers->prio_order_list);
- } else {
- list_add_tail(&work->order_list, &workers->order_list);
- }
- spin_unlock(&workers->order_lock);
- } else {
- INIT_LIST_HEAD(&work->order_list);
- }
-
- spin_lock_irqsave(&worker->lock, flags);
-
- if (test_bit(WORK_HIGH_PRIO_BIT, &work->flags))
- list_add_tail(&work->list, &worker->prio_pending);
- else
- list_add_tail(&work->list, &worker->pending);
- check_busy_worker(worker);
-
- /*
- * avoid calling into wake_up_process if this thread has already
- * been kicked
- */
- if (!worker->working)
- wake = 1;
- worker->working = 1;
-
- if (wake)
- wake_up_process(worker->task);
- spin_unlock_irqrestore(&worker->lock, flags);
-}
diff --git a/ANDROID_3.4.5/fs/btrfs/async-thread.h b/ANDROID_3.4.5/fs/btrfs/async-thread.h
deleted file mode 100644
index 063698b9..00000000
--- a/ANDROID_3.4.5/fs/btrfs/async-thread.h
+++ /dev/null
@@ -1,119 +0,0 @@
-/*
- * Copyright (C) 2007 Oracle. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License v2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
- */
-
-#ifndef __BTRFS_ASYNC_THREAD_
-#define __BTRFS_ASYNC_THREAD_
-
-struct btrfs_worker_thread;
-
-/*
- * This is similar to a workqueue, but it is meant to spread the operations
- * across all available cpus instead of just the CPU that was used to
- * queue the work. There is also some batching introduced to try and
- * cut down on context switches.
- *
- * By default threads are added on demand up to 2 * the number of cpus.
- * Changing struct btrfs_workers->max_workers is one way to prevent
- * demand creation of kthreads.
- *
- * the basic model of these worker threads is to embed a btrfs_work
- * structure in your own data struct, and use container_of in a
- * work function to get back to your data struct.
- */
-struct btrfs_work {
- /*
- * func should be set to the function you want called
- * your work struct is passed as the only arg
- *
- * ordered_func must be set for work sent to an ordered work queue,
- * and it is called to complete a given work item in the same
- * order they were sent to the queue.
- */
- void (*func)(struct btrfs_work *work);
- void (*ordered_func)(struct btrfs_work *work);
- void (*ordered_free)(struct btrfs_work *work);
-
- /*
- * flags should be set to zero. It is used to make sure the
- * struct is only inserted once into the list.
- */
- unsigned long flags;
-
- /* don't touch these */
- struct btrfs_worker_thread *worker;
- struct list_head list;
- struct list_head order_list;
-};
-
-struct btrfs_workers {
- /* current number of running workers */
- int num_workers;
-
- int num_workers_starting;
-
- /* max number of workers allowed. changed by btrfs_start_workers */
- int max_workers;
-
- /* once a worker has this many requests or fewer, it is idle */
- int idle_thresh;
-
- /* force completions in the order they were queued */
- int ordered;
-
- /* more workers required, but in an interrupt handler */
- int atomic_start_pending;
-
- /*
- * are we allowed to sleep while starting workers or are we required
- * to start them at a later time? If we can't sleep, this indicates
- * which queue we need to use to schedule thread creation.
- */
- struct btrfs_workers *atomic_worker_start;
-
- /* list with all the work threads. The workers on the idle thread
- * may be actively servicing jobs, but they haven't yet hit the
- * idle thresh limit above.
- */
- struct list_head worker_list;
- struct list_head idle_list;
-
- /*
- * when operating in ordered mode, this maintains the list
- * of work items waiting for completion
- */
- struct list_head order_list;
- struct list_head prio_order_list;
-
- /* lock for finding the next worker thread to queue on */
- spinlock_t lock;
-
- /* lock for the ordered lists */
- spinlock_t order_lock;
-
- /* extra name for this worker, used for current->name */
- char *name;
-};
-
-void btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work);
-int btrfs_start_workers(struct btrfs_workers *workers);
-void btrfs_stop_workers(struct btrfs_workers *workers);
-void btrfs_init_workers(struct btrfs_workers *workers, char *name, int max,
- struct btrfs_workers *async_starter);
-void btrfs_requeue_work(struct btrfs_work *work);
-void btrfs_set_work_high_prio(struct btrfs_work *work);
-#endif
diff --git a/ANDROID_3.4.5/fs/btrfs/backref.c b/ANDROID_3.4.5/fs/btrfs/backref.c
deleted file mode 100644
index bcec0675..00000000
--- a/ANDROID_3.4.5/fs/btrfs/backref.c
+++ /dev/null
@@ -1,1432 +0,0 @@
-/*
- * Copyright (C) 2011 STRATO. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License v2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
- */
-
-#include "ctree.h"
-#include "disk-io.h"
-#include "backref.h"
-#include "ulist.h"
-#include "transaction.h"
-#include "delayed-ref.h"
-#include "locking.h"
-
-/*
- * this structure records all encountered refs on the way up to the root
- */
-struct __prelim_ref {
- struct list_head list;
- u64 root_id;
- struct btrfs_key key;
- int level;
- int count;
- u64 parent;
- u64 wanted_disk_byte;
-};
-
-static int __add_prelim_ref(struct list_head *head, u64 root_id,
- struct btrfs_key *key, int level, u64 parent,
- u64 wanted_disk_byte, int count)
-{
- struct __prelim_ref *ref;
-
- /* in case we're adding delayed refs, we're holding the refs spinlock */
- ref = kmalloc(sizeof(*ref), GFP_ATOMIC);
- if (!ref)
- return -ENOMEM;
-
- ref->root_id = root_id;
- if (key)
- ref->key = *key;
- else
- memset(&ref->key, 0, sizeof(ref->key));
-
- ref->level = level;
- ref->count = count;
- ref->parent = parent;
- ref->wanted_disk_byte = wanted_disk_byte;
- list_add_tail(&ref->list, head);
-
- return 0;
-}
-
-static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path,
- struct ulist *parents,
- struct extent_buffer *eb, int level,
- u64 wanted_objectid, u64 wanted_disk_byte)
-{
- int ret;
- int slot;
- struct btrfs_file_extent_item *fi;
- struct btrfs_key key;
- u64 disk_byte;
-
-add_parent:
- ret = ulist_add(parents, eb->start, 0, GFP_NOFS);
- if (ret < 0)
- return ret;
-
- if (level != 0)
- return 0;
-
- /*
- * if the current leaf is full with EXTENT_DATA items, we must
- * check the next one if that holds a reference as well.
- * ref->count cannot be used to skip this check.
- * repeat this until we don't find any additional EXTENT_DATA items.
- */
- while (1) {
- ret = btrfs_next_leaf(root, path);
- if (ret < 0)
- return ret;
- if (ret)
- return 0;
-
- eb = path->nodes[0];
- for (slot = 0; slot < btrfs_header_nritems(eb); ++slot) {
- btrfs_item_key_to_cpu(eb, &key, slot);
- if (key.objectid != wanted_objectid ||
- key.type != BTRFS_EXTENT_DATA_KEY)
- return 0;
- fi = btrfs_item_ptr(eb, slot,
- struct btrfs_file_extent_item);
- disk_byte = btrfs_file_extent_disk_bytenr(eb, fi);
- if (disk_byte == wanted_disk_byte)
- goto add_parent;
- }
- }
-
- return 0;
-}
-
-/*
- * resolve an indirect backref in the form (root_id, key, level)
- * to a logical address
- */
-static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info,
- int search_commit_root,
- struct __prelim_ref *ref,
- struct ulist *parents)
-{
- struct btrfs_path *path;
- struct btrfs_root *root;
- struct btrfs_key root_key;
- struct btrfs_key key = {0};
- struct extent_buffer *eb;
- int ret = 0;
- int root_level;
- int level = ref->level;
-
- path = btrfs_alloc_path();
- if (!path)
- return -ENOMEM;
- path->search_commit_root = !!search_commit_root;
-
- root_key.objectid = ref->root_id;
- root_key.type = BTRFS_ROOT_ITEM_KEY;
- root_key.offset = (u64)-1;
- root = btrfs_read_fs_root_no_name(fs_info, &root_key);
- if (IS_ERR(root)) {
- ret = PTR_ERR(root);
- goto out;
- }
-
- rcu_read_lock();
- root_level = btrfs_header_level(root->node);
- rcu_read_unlock();
-
- if (root_level + 1 == level)
- goto out;
-
- path->lowest_level = level;
- ret = btrfs_search_slot(NULL, root, &ref->key, path, 0, 0);
- pr_debug("search slot in root %llu (level %d, ref count %d) returned "
- "%d for key (%llu %u %llu)\n",
- (unsigned long long)ref->root_id, level, ref->count, ret,
- (unsigned long long)ref->key.objectid, ref->key.type,
- (unsigned long long)ref->key.offset);
- if (ret < 0)
- goto out;
-
- eb = path->nodes[level];
- if (!eb) {
- WARN_ON(1);
- ret = 1;
- goto out;
- }
-
- if (level == 0) {
- if (ret == 1 && path->slots[0] >= btrfs_header_nritems(eb)) {
- ret = btrfs_next_leaf(root, path);
- if (ret)
- goto out;
- eb = path->nodes[0];
- }
-
- btrfs_item_key_to_cpu(eb, &key, path->slots[0]);
- }
-
- /* the last two parameters will only be used for level == 0 */
- ret = add_all_parents(root, path, parents, eb, level, key.objectid,
- ref->wanted_disk_byte);
-out:
- btrfs_free_path(path);
- return ret;
-}
-
-/*
- * resolve all indirect backrefs from the list
- */
-static int __resolve_indirect_refs(struct btrfs_fs_info *fs_info,
- int search_commit_root,
- struct list_head *head)
-{
- int err;
- int ret = 0;
- struct __prelim_ref *ref;
- struct __prelim_ref *ref_safe;
- struct __prelim_ref *new_ref;
- struct ulist *parents;
- struct ulist_node *node;
-
- parents = ulist_alloc(GFP_NOFS);
- if (!parents)
- return -ENOMEM;
-
- /*
- * _safe allows us to insert directly after the current item without
- * iterating over the newly inserted items.
- * we're also allowed to re-assign ref during iteration.
- */
- list_for_each_entry_safe(ref, ref_safe, head, list) {
- if (ref->parent) /* already direct */
- continue;
- if (ref->count == 0)
- continue;
- err = __resolve_indirect_ref(fs_info, search_commit_root,
- ref, parents);
- if (err) {
- if (ret == 0)
- ret = err;
- continue;
- }
-
- /* we put the first parent into the ref at hand */
- node = ulist_next(parents, NULL);
- ref->parent = node ? node->val : 0;
-
- /* additional parents require new refs being added here */
- while ((node = ulist_next(parents, node))) {
- new_ref = kmalloc(sizeof(*new_ref), GFP_NOFS);
- if (!new_ref) {
- ret = -ENOMEM;
- break;
- }
- memcpy(new_ref, ref, sizeof(*ref));
- new_ref->parent = node->val;
- list_add(&new_ref->list, &ref->list);
- }
- ulist_reinit(parents);
- }
-
- ulist_free(parents);
- return ret;
-}
-
-/*
- * merge two lists of backrefs and adjust counts accordingly
- *
- * mode = 1: merge identical keys, if key is set
- * mode = 2: merge identical parents
- */
-static int __merge_refs(struct list_head *head, int mode)
-{
- struct list_head *pos1;
-
- list_for_each(pos1, head) {
- struct list_head *n2;
- struct list_head *pos2;
- struct __prelim_ref *ref1;
-
- ref1 = list_entry(pos1, struct __prelim_ref, list);
-
- if (mode == 1 && ref1->key.type == 0)
- continue;
- for (pos2 = pos1->next, n2 = pos2->next; pos2 != head;
- pos2 = n2, n2 = pos2->next) {
- struct __prelim_ref *ref2;
-
- ref2 = list_entry(pos2, struct __prelim_ref, list);
-
- if (mode == 1) {
- if (memcmp(&ref1->key, &ref2->key,
- sizeof(ref1->key)) ||
- ref1->level != ref2->level ||
- ref1->root_id != ref2->root_id)
- continue;
- ref1->count += ref2->count;
- } else {
- if (ref1->parent != ref2->parent)
- continue;
- ref1->count += ref2->count;
- }
- list_del(&ref2->list);
- kfree(ref2);
- }
-
- }
- return 0;
-}
-
-/*
- * add all currently queued delayed refs from this head whose seq nr is
- * smaller or equal that seq to the list
- */
-static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq,
- struct btrfs_key *info_key,
- struct list_head *prefs)
-{
- struct btrfs_delayed_extent_op *extent_op = head->extent_op;
- struct rb_node *n = &head->node.rb_node;
- int sgn;
- int ret = 0;
-
- if (extent_op && extent_op->update_key)
- btrfs_disk_key_to_cpu(info_key, &extent_op->key);
-
- while ((n = rb_prev(n))) {
- struct btrfs_delayed_ref_node *node;
- node = rb_entry(n, struct btrfs_delayed_ref_node,
- rb_node);
- if (node->bytenr != head->node.bytenr)
- break;
- WARN_ON(node->is_head);
-
- if (node->seq > seq)
- continue;
-
- switch (node->action) {
- case BTRFS_ADD_DELAYED_EXTENT:
- case BTRFS_UPDATE_DELAYED_HEAD:
- WARN_ON(1);
- continue;
- case BTRFS_ADD_DELAYED_REF:
- sgn = 1;
- break;
- case BTRFS_DROP_DELAYED_REF:
- sgn = -1;
- break;
- default:
- BUG_ON(1);
- }
- switch (node->type) {
- case BTRFS_TREE_BLOCK_REF_KEY: {
- struct btrfs_delayed_tree_ref *ref;
-
- ref = btrfs_delayed_node_to_tree_ref(node);
- ret = __add_prelim_ref(prefs, ref->root, info_key,
- ref->level + 1, 0, node->bytenr,
- node->ref_mod * sgn);
- break;
- }
- case BTRFS_SHARED_BLOCK_REF_KEY: {
- struct btrfs_delayed_tree_ref *ref;
-
- ref = btrfs_delayed_node_to_tree_ref(node);
- ret = __add_prelim_ref(prefs, ref->root, info_key,
- ref->level + 1, ref->parent,
- node->bytenr,
- node->ref_mod * sgn);
- break;
- }
- case BTRFS_EXTENT_DATA_REF_KEY: {
- struct btrfs_delayed_data_ref *ref;
- struct btrfs_key key;
-
- ref = btrfs_delayed_node_to_data_ref(node);
-
- key.objectid = ref->objectid;
- key.type = BTRFS_EXTENT_DATA_KEY;
- key.offset = ref->offset;
- ret = __add_prelim_ref(prefs, ref->root, &key, 0, 0,
- node->bytenr,
- node->ref_mod * sgn);
- break;
- }
- case BTRFS_SHARED_DATA_REF_KEY: {
- struct btrfs_delayed_data_ref *ref;
- struct btrfs_key key;
-
- ref = btrfs_delayed_node_to_data_ref(node);
-
- key.objectid = ref->objectid;
- key.type = BTRFS_EXTENT_DATA_KEY;
- key.offset = ref->offset;
- ret = __add_prelim_ref(prefs, ref->root, &key, 0,
- ref->parent, node->bytenr,
- node->ref_mod * sgn);
- break;
- }
- default:
- WARN_ON(1);
- }
- BUG_ON(ret);
- }
-
- return 0;
-}
-
-/*
- * add all inline backrefs for bytenr to the list
- */
-static int __add_inline_refs(struct btrfs_fs_info *fs_info,
- struct btrfs_path *path, u64 bytenr,
- struct btrfs_key *info_key, int *info_level,
- struct list_head *prefs)
-{
- int ret = 0;
- int slot;
- struct extent_buffer *leaf;
- struct btrfs_key key;
- unsigned long ptr;
- unsigned long end;
- struct btrfs_extent_item *ei;
- u64 flags;
- u64 item_size;
-
- /*
- * enumerate all inline refs
- */
- leaf = path->nodes[0];
- slot = path->slots[0] - 1;
-
- item_size = btrfs_item_size_nr(leaf, slot);
- BUG_ON(item_size < sizeof(*ei));
-
- ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
- flags = btrfs_extent_flags(leaf, ei);
-
- ptr = (unsigned long)(ei + 1);
- end = (unsigned long)ei + item_size;
-
- if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
- struct btrfs_tree_block_info *info;
- struct btrfs_disk_key disk_key;
-
- info = (struct btrfs_tree_block_info *)ptr;
- *info_level = btrfs_tree_block_level(leaf, info);
- btrfs_tree_block_key(leaf, info, &disk_key);
- btrfs_disk_key_to_cpu(info_key, &disk_key);
- ptr += sizeof(struct btrfs_tree_block_info);
- BUG_ON(ptr > end);
- } else {
- BUG_ON(!(flags & BTRFS_EXTENT_FLAG_DATA));
- }
-
- while (ptr < end) {
- struct btrfs_extent_inline_ref *iref;
- u64 offset;
- int type;
-
- iref = (struct btrfs_extent_inline_ref *)ptr;
- type = btrfs_extent_inline_ref_type(leaf, iref);
- offset = btrfs_extent_inline_ref_offset(leaf, iref);
-
- switch (type) {
- case BTRFS_SHARED_BLOCK_REF_KEY:
- ret = __add_prelim_ref(prefs, 0, info_key,
- *info_level + 1, offset,
- bytenr, 1);
- break;
- case BTRFS_SHARED_DATA_REF_KEY: {
- struct btrfs_shared_data_ref *sdref;
- int count;
-
- sdref = (struct btrfs_shared_data_ref *)(iref + 1);
- count = btrfs_shared_data_ref_count(leaf, sdref);
- ret = __add_prelim_ref(prefs, 0, NULL, 0, offset,
- bytenr, count);
- break;
- }
- case BTRFS_TREE_BLOCK_REF_KEY:
- ret = __add_prelim_ref(prefs, offset, info_key,
- *info_level + 1, 0, bytenr, 1);
- break;
- case BTRFS_EXTENT_DATA_REF_KEY: {
- struct btrfs_extent_data_ref *dref;
- int count;
- u64 root;
-
- dref = (struct btrfs_extent_data_ref *)(&iref->offset);
- count = btrfs_extent_data_ref_count(leaf, dref);
- key.objectid = btrfs_extent_data_ref_objectid(leaf,
- dref);
- key.type = BTRFS_EXTENT_DATA_KEY;
- key.offset = btrfs_extent_data_ref_offset(leaf, dref);
- root = btrfs_extent_data_ref_root(leaf, dref);
- ret = __add_prelim_ref(prefs, root, &key, 0, 0, bytenr,
- count);
- break;
- }
- default:
- WARN_ON(1);
- }
- BUG_ON(ret);
- ptr += btrfs_extent_inline_ref_size(type);
- }
-
- return 0;
-}
-
-/*
- * add all non-inline backrefs for bytenr to the list
- */
-static int __add_keyed_refs(struct btrfs_fs_info *fs_info,
- struct btrfs_path *path, u64 bytenr,
- struct btrfs_key *info_key, int info_level,
- struct list_head *prefs)
-{
- struct btrfs_root *extent_root = fs_info->extent_root;
- int ret;
- int slot;
- struct extent_buffer *leaf;
- struct btrfs_key key;
-
- while (1) {
- ret = btrfs_next_item(extent_root, path);
- if (ret < 0)
- break;
- if (ret) {
- ret = 0;
- break;
- }
-
- slot = path->slots[0];
- leaf = path->nodes[0];
- btrfs_item_key_to_cpu(leaf, &key, slot);
-
- if (key.objectid != bytenr)
- break;
- if (key.type < BTRFS_TREE_BLOCK_REF_KEY)
- continue;
- if (key.type > BTRFS_SHARED_DATA_REF_KEY)
- break;
-
- switch (key.type) {
- case BTRFS_SHARED_BLOCK_REF_KEY:
- ret = __add_prelim_ref(prefs, 0, info_key,
- info_level + 1, key.offset,
- bytenr, 1);
- break;
- case BTRFS_SHARED_DATA_REF_KEY: {
- struct btrfs_shared_data_ref *sdref;
- int count;
-
- sdref = btrfs_item_ptr(leaf, slot,
- struct btrfs_shared_data_ref);
- count = btrfs_shared_data_ref_count(leaf, sdref);
- ret = __add_prelim_ref(prefs, 0, NULL, 0, key.offset,
- bytenr, count);
- break;
- }
- case BTRFS_TREE_BLOCK_REF_KEY:
- ret = __add_prelim_ref(prefs, key.offset, info_key,
- info_level + 1, 0, bytenr, 1);
- break;
- case BTRFS_EXTENT_DATA_REF_KEY: {
- struct btrfs_extent_data_ref *dref;
- int count;
- u64 root;
-
- dref = btrfs_item_ptr(leaf, slot,
- struct btrfs_extent_data_ref);
- count = btrfs_extent_data_ref_count(leaf, dref);
- key.objectid = btrfs_extent_data_ref_objectid(leaf,
- dref);
- key.type = BTRFS_EXTENT_DATA_KEY;
- key.offset = btrfs_extent_data_ref_offset(leaf, dref);
- root = btrfs_extent_data_ref_root(leaf, dref);
- ret = __add_prelim_ref(prefs, root, &key, 0, 0,
- bytenr, count);
- break;
- }
- default:
- WARN_ON(1);
- }
- BUG_ON(ret);
- }
-
- return ret;
-}
-
-/*
- * this adds all existing backrefs (inline backrefs, backrefs and delayed
- * refs) for the given bytenr to the refs list, merges duplicates and resolves
- * indirect refs to their parent bytenr.
- * When roots are found, they're added to the roots list
- *
- * FIXME some caching might speed things up
- */
-static int find_parent_nodes(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info, u64 bytenr,
- u64 seq, struct ulist *refs, struct ulist *roots)
-{
- struct btrfs_key key;
- struct btrfs_path *path;
- struct btrfs_key info_key = { 0 };
- struct btrfs_delayed_ref_root *delayed_refs = NULL;
- struct btrfs_delayed_ref_head *head;
- int info_level = 0;
- int ret;
- int search_commit_root = (trans == BTRFS_BACKREF_SEARCH_COMMIT_ROOT);
- struct list_head prefs_delayed;
- struct list_head prefs;
- struct __prelim_ref *ref;
-
- INIT_LIST_HEAD(&prefs);
- INIT_LIST_HEAD(&prefs_delayed);
-
- key.objectid = bytenr;
- key.type = BTRFS_EXTENT_ITEM_KEY;
- key.offset = (u64)-1;
-
- path = btrfs_alloc_path();
- if (!path)
- return -ENOMEM;
- path->search_commit_root = !!search_commit_root;
-
- /*
- * grab both a lock on the path and a lock on the delayed ref head.
- * We need both to get a consistent picture of how the refs look
- * at a specified point in time
- */
-again:
- head = NULL;
-
- ret = btrfs_search_slot(trans, fs_info->extent_root, &key, path, 0, 0);
- if (ret < 0)
- goto out;
- BUG_ON(ret == 0);
-
- if (trans != BTRFS_BACKREF_SEARCH_COMMIT_ROOT) {
- /*
- * look if there are updates for this ref queued and lock the
- * head
- */
- delayed_refs = &trans->transaction->delayed_refs;
- spin_lock(&delayed_refs->lock);
- head = btrfs_find_delayed_ref_head(trans, bytenr);
- if (head) {
- if (!mutex_trylock(&head->mutex)) {
- atomic_inc(&head->node.refs);
- spin_unlock(&delayed_refs->lock);
-
- btrfs_release_path(path);
-
- /*
- * Mutex was contended, block until it's
- * released and try again
- */
- mutex_lock(&head->mutex);
- mutex_unlock(&head->mutex);
- btrfs_put_delayed_ref(&head->node);
- goto again;
- }
- ret = __add_delayed_refs(head, seq, &info_key,
- &prefs_delayed);
- if (ret) {
- spin_unlock(&delayed_refs->lock);
- goto out;
- }
- }
- spin_unlock(&delayed_refs->lock);
- }
-
- if (path->slots[0]) {
- struct extent_buffer *leaf;
- int slot;
-
- leaf = path->nodes[0];
- slot = path->slots[0] - 1;
- btrfs_item_key_to_cpu(leaf, &key, slot);
- if (key.objectid == bytenr &&
- key.type == BTRFS_EXTENT_ITEM_KEY) {
- ret = __add_inline_refs(fs_info, path, bytenr,
- &info_key, &info_level, &prefs);
- if (ret)
- goto out;
- ret = __add_keyed_refs(fs_info, path, bytenr, &info_key,
- info_level, &prefs);
- if (ret)
- goto out;
- }
- }
- btrfs_release_path(path);
-
- /*
- * when adding the delayed refs above, the info_key might not have
- * been known yet. Go over the list and replace the missing keys
- */
- list_for_each_entry(ref, &prefs_delayed, list) {
- if ((ref->key.offset | ref->key.type | ref->key.objectid) == 0)
- memcpy(&ref->key, &info_key, sizeof(ref->key));
- }
- list_splice_init(&prefs_delayed, &prefs);
-
- ret = __merge_refs(&prefs, 1);
- if (ret)
- goto out;
-
- ret = __resolve_indirect_refs(fs_info, search_commit_root, &prefs);
- if (ret)
- goto out;
-
- ret = __merge_refs(&prefs, 2);
- if (ret)
- goto out;
-
- while (!list_empty(&prefs)) {
- ref = list_first_entry(&prefs, struct __prelim_ref, list);
- list_del(&ref->list);
- if (ref->count < 0)
- WARN_ON(1);
- if (ref->count && ref->root_id && ref->parent == 0) {
- /* no parent == root of tree */
- ret = ulist_add(roots, ref->root_id, 0, GFP_NOFS);
- BUG_ON(ret < 0);
- }
- if (ref->count && ref->parent) {
- ret = ulist_add(refs, ref->parent, 0, GFP_NOFS);
- BUG_ON(ret < 0);
- }
- kfree(ref);
- }
-
-out:
- if (head)
- mutex_unlock(&head->mutex);
- btrfs_free_path(path);
- while (!list_empty(&prefs)) {
- ref = list_first_entry(&prefs, struct __prelim_ref, list);
- list_del(&ref->list);
- kfree(ref);
- }
- while (!list_empty(&prefs_delayed)) {
- ref = list_first_entry(&prefs_delayed, struct __prelim_ref,
- list);
- list_del(&ref->list);
- kfree(ref);
- }
-
- return ret;
-}
-
-/*
- * Finds all leafs with a reference to the specified combination of bytenr and
- * offset. key_list_head will point to a list of corresponding keys (caller must
- * free each list element). The leafs will be stored in the leafs ulist, which
- * must be freed with ulist_free.
- *
- * returns 0 on success, <0 on error
- */
-static int btrfs_find_all_leafs(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info, u64 bytenr,
- u64 num_bytes, u64 seq, struct ulist **leafs)
-{
- struct ulist *tmp;
- int ret;
-
- tmp = ulist_alloc(GFP_NOFS);
- if (!tmp)
- return -ENOMEM;
- *leafs = ulist_alloc(GFP_NOFS);
- if (!*leafs) {
- ulist_free(tmp);
- return -ENOMEM;
- }
-
- ret = find_parent_nodes(trans, fs_info, bytenr, seq, *leafs, tmp);
- ulist_free(tmp);
-
- if (ret < 0 && ret != -ENOENT) {
- ulist_free(*leafs);
- return ret;
- }
-
- return 0;
-}
-
-/*
- * walk all backrefs for a given extent to find all roots that reference this
- * extent. Walking a backref means finding all extents that reference this
- * extent and in turn walk the backrefs of those, too. Naturally this is a
- * recursive process, but here it is implemented in an iterative fashion: We
- * find all referencing extents for the extent in question and put them on a
- * list. In turn, we find all referencing extents for those, further appending
- * to the list. The way we iterate the list allows adding more elements after
- * the current while iterating. The process stops when we reach the end of the
- * list. Found roots are added to the roots list.
- *
- * returns 0 on success, < 0 on error.
- */
-int btrfs_find_all_roots(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info, u64 bytenr,
- u64 num_bytes, u64 seq, struct ulist **roots)
-{
- struct ulist *tmp;
- struct ulist_node *node = NULL;
- int ret;
-
- tmp = ulist_alloc(GFP_NOFS);
- if (!tmp)
- return -ENOMEM;
- *roots = ulist_alloc(GFP_NOFS);
- if (!*roots) {
- ulist_free(tmp);
- return -ENOMEM;
- }
-
- while (1) {
- ret = find_parent_nodes(trans, fs_info, bytenr, seq,
- tmp, *roots);
- if (ret < 0 && ret != -ENOENT) {
- ulist_free(tmp);
- ulist_free(*roots);
- return ret;
- }
- node = ulist_next(tmp, node);
- if (!node)
- break;
- bytenr = node->val;
- }
-
- ulist_free(tmp);
- return 0;
-}
-
-
-static int __inode_info(u64 inum, u64 ioff, u8 key_type,
- struct btrfs_root *fs_root, struct btrfs_path *path,
- struct btrfs_key *found_key)
-{
- int ret;
- struct btrfs_key key;
- struct extent_buffer *eb;
-
- key.type = key_type;
- key.objectid = inum;
- key.offset = ioff;
-
- ret = btrfs_search_slot(NULL, fs_root, &key, path, 0, 0);
- if (ret < 0)
- return ret;
-
- eb = path->nodes[0];
- if (ret && path->slots[0] >= btrfs_header_nritems(eb)) {
- ret = btrfs_next_leaf(fs_root, path);
- if (ret)
- return ret;
- eb = path->nodes[0];
- }
-
- btrfs_item_key_to_cpu(eb, found_key, path->slots[0]);
- if (found_key->type != key.type || found_key->objectid != key.objectid)
- return 1;
-
- return 0;
-}
-
-/*
- * this makes the path point to (inum INODE_ITEM ioff)
- */
-int inode_item_info(u64 inum, u64 ioff, struct btrfs_root *fs_root,
- struct btrfs_path *path)
-{
- struct btrfs_key key;
- return __inode_info(inum, ioff, BTRFS_INODE_ITEM_KEY, fs_root, path,
- &key);
-}
-
-static int inode_ref_info(u64 inum, u64 ioff, struct btrfs_root *fs_root,
- struct btrfs_path *path,
- struct btrfs_key *found_key)
-{
- return __inode_info(inum, ioff, BTRFS_INODE_REF_KEY, fs_root, path,
- found_key);
-}
-
-/*
- * this iterates to turn a btrfs_inode_ref into a full filesystem path. elements
- * of the path are separated by '/' and the path is guaranteed to be
- * 0-terminated. the path is only given within the current file system.
- * Therefore, it never starts with a '/'. the caller is responsible to provide
- * "size" bytes in "dest". the dest buffer will be filled backwards. finally,
- * the start point of the resulting string is returned. this pointer is within
- * dest, normally.
- * in case the path buffer would overflow, the pointer is decremented further
- * as if output was written to the buffer, though no more output is actually
- * generated. that way, the caller can determine how much space would be
- * required for the path to fit into the buffer. in that case, the returned
- * value will be smaller than dest. callers must check this!
- */
-static char *iref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path,
- struct btrfs_inode_ref *iref,
- struct extent_buffer *eb_in, u64 parent,
- char *dest, u32 size)
-{
- u32 len;
- int slot;
- u64 next_inum;
- int ret;
- s64 bytes_left = size - 1;
- struct extent_buffer *eb = eb_in;
- struct btrfs_key found_key;
- int leave_spinning = path->leave_spinning;
-
- if (bytes_left >= 0)
- dest[bytes_left] = '\0';
-
- path->leave_spinning = 1;
- while (1) {
- len = btrfs_inode_ref_name_len(eb, iref);
- bytes_left -= len;
- if (bytes_left >= 0)
- read_extent_buffer(eb, dest + bytes_left,
- (unsigned long)(iref + 1), len);
- if (eb != eb_in) {
- btrfs_tree_read_unlock_blocking(eb);
- free_extent_buffer(eb);
- }
- ret = inode_ref_info(parent, 0, fs_root, path, &found_key);
- if (ret > 0)
- ret = -ENOENT;
- if (ret)
- break;
- next_inum = found_key.offset;
-
- /* regular exit ahead */
- if (parent == next_inum)
- break;
-
- slot = path->slots[0];
- eb = path->nodes[0];
- /* make sure we can use eb after releasing the path */
- if (eb != eb_in) {
- atomic_inc(&eb->refs);
- btrfs_tree_read_lock(eb);
- btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
- }
- btrfs_release_path(path);
-
- iref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
- parent = next_inum;
- --bytes_left;
- if (bytes_left >= 0)
- dest[bytes_left] = '/';
- }
-
- btrfs_release_path(path);
- path->leave_spinning = leave_spinning;
-
- if (ret)
- return ERR_PTR(ret);
-
- return dest + bytes_left;
-}
-
-/*
- * this makes the path point to (logical EXTENT_ITEM *)
- * returns BTRFS_EXTENT_FLAG_DATA for data, BTRFS_EXTENT_FLAG_TREE_BLOCK for
- * tree blocks and <0 on error.
- */
-int extent_from_logical(struct btrfs_fs_info *fs_info, u64 logical,
- struct btrfs_path *path, struct btrfs_key *found_key)
-{
- int ret;
- u64 flags;
- u32 item_size;
- struct extent_buffer *eb;
- struct btrfs_extent_item *ei;
- struct btrfs_key key;
-
- key.type = BTRFS_EXTENT_ITEM_KEY;
- key.objectid = logical;
- key.offset = (u64)-1;
-
- ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, path, 0, 0);
- if (ret < 0)
- return ret;
- ret = btrfs_previous_item(fs_info->extent_root, path,
- 0, BTRFS_EXTENT_ITEM_KEY);
- if (ret < 0)
- return ret;
-
- btrfs_item_key_to_cpu(path->nodes[0], found_key, path->slots[0]);
- if (found_key->type != BTRFS_EXTENT_ITEM_KEY ||
- found_key->objectid > logical ||
- found_key->objectid + found_key->offset <= logical) {
- pr_debug("logical %llu is not within any extent\n",
- (unsigned long long)logical);
- return -ENOENT;
- }
-
- eb = path->nodes[0];
- item_size = btrfs_item_size_nr(eb, path->slots[0]);
- BUG_ON(item_size < sizeof(*ei));
-
- ei = btrfs_item_ptr(eb, path->slots[0], struct btrfs_extent_item);
- flags = btrfs_extent_flags(eb, ei);
-
- pr_debug("logical %llu is at position %llu within the extent (%llu "
- "EXTENT_ITEM %llu) flags %#llx size %u\n",
- (unsigned long long)logical,
- (unsigned long long)(logical - found_key->objectid),
- (unsigned long long)found_key->objectid,
- (unsigned long long)found_key->offset,
- (unsigned long long)flags, item_size);
- if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
- return BTRFS_EXTENT_FLAG_TREE_BLOCK;
- if (flags & BTRFS_EXTENT_FLAG_DATA)
- return BTRFS_EXTENT_FLAG_DATA;
-
- return -EIO;
-}
-
-/*
- * helper function to iterate extent inline refs. ptr must point to a 0 value
- * for the first call and may be modified. it is used to track state.
- * if more refs exist, 0 is returned and the next call to
- * __get_extent_inline_ref must pass the modified ptr parameter to get the
- * next ref. after the last ref was processed, 1 is returned.
- * returns <0 on error
- */
-static int __get_extent_inline_ref(unsigned long *ptr, struct extent_buffer *eb,
- struct btrfs_extent_item *ei, u32 item_size,
- struct btrfs_extent_inline_ref **out_eiref,
- int *out_type)
-{
- unsigned long end;
- u64 flags;
- struct btrfs_tree_block_info *info;
-
- if (!*ptr) {
- /* first call */
- flags = btrfs_extent_flags(eb, ei);
- if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
- info = (struct btrfs_tree_block_info *)(ei + 1);
- *out_eiref =
- (struct btrfs_extent_inline_ref *)(info + 1);
- } else {
- *out_eiref = (struct btrfs_extent_inline_ref *)(ei + 1);
- }
- *ptr = (unsigned long)*out_eiref;
- if ((void *)*ptr >= (void *)ei + item_size)
- return -ENOENT;
- }
-
- end = (unsigned long)ei + item_size;
- *out_eiref = (struct btrfs_extent_inline_ref *)*ptr;
- *out_type = btrfs_extent_inline_ref_type(eb, *out_eiref);
-
- *ptr += btrfs_extent_inline_ref_size(*out_type);
- WARN_ON(*ptr > end);
- if (*ptr == end)
- return 1; /* last */
-
- return 0;
-}
-
-/*
- * reads the tree block backref for an extent. tree level and root are returned
- * through out_level and out_root. ptr must point to a 0 value for the first
- * call and may be modified (see __get_extent_inline_ref comment).
- * returns 0 if data was provided, 1 if there was no more data to provide or
- * <0 on error.
- */
-int tree_backref_for_extent(unsigned long *ptr, struct extent_buffer *eb,
- struct btrfs_extent_item *ei, u32 item_size,
- u64 *out_root, u8 *out_level)
-{
- int ret;
- int type;
- struct btrfs_tree_block_info *info;
- struct btrfs_extent_inline_ref *eiref;
-
- if (*ptr == (unsigned long)-1)
- return 1;
-
- while (1) {
- ret = __get_extent_inline_ref(ptr, eb, ei, item_size,
- &eiref, &type);
- if (ret < 0)
- return ret;
-
- if (type == BTRFS_TREE_BLOCK_REF_KEY ||
- type == BTRFS_SHARED_BLOCK_REF_KEY)
- break;
-
- if (ret == 1)
- return 1;
- }
-
- /* we can treat both ref types equally here */
- info = (struct btrfs_tree_block_info *)(ei + 1);
- *out_root = btrfs_extent_inline_ref_offset(eb, eiref);
- *out_level = btrfs_tree_block_level(eb, info);
-
- if (ret == 1)
- *ptr = (unsigned long)-1;
-
- return 0;
-}
-
-static int iterate_leaf_refs(struct btrfs_fs_info *fs_info, u64 logical,
- u64 orig_extent_item_objectid,
- u64 extent_item_pos, u64 root,
- iterate_extent_inodes_t *iterate, void *ctx)
-{
- u64 disk_byte;
- struct btrfs_key key;
- struct btrfs_file_extent_item *fi;
- struct extent_buffer *eb;
- int slot;
- int nritems;
- int ret = 0;
- int extent_type;
- u64 data_offset;
- u64 data_len;
-
- eb = read_tree_block(fs_info->tree_root, logical,
- fs_info->tree_root->leafsize, 0);
- if (!eb)
- return -EIO;
-
- /*
- * from the shared data ref, we only have the leaf but we need
- * the key. thus, we must look into all items and see that we
- * find one (some) with a reference to our extent item.
- */
- nritems = btrfs_header_nritems(eb);
- for (slot = 0; slot < nritems; ++slot) {
- btrfs_item_key_to_cpu(eb, &key, slot);
- if (key.type != BTRFS_EXTENT_DATA_KEY)
- continue;
- fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
- extent_type = btrfs_file_extent_type(eb, fi);
- if (extent_type == BTRFS_FILE_EXTENT_INLINE)
- continue;
- /* don't skip BTRFS_FILE_EXTENT_PREALLOC, we can handle that */
- disk_byte = btrfs_file_extent_disk_bytenr(eb, fi);
- if (disk_byte != orig_extent_item_objectid)
- continue;
-
- data_offset = btrfs_file_extent_offset(eb, fi);
- data_len = btrfs_file_extent_num_bytes(eb, fi);
-
- if (extent_item_pos < data_offset ||
- extent_item_pos >= data_offset + data_len)
- continue;
-
- pr_debug("ref for %llu resolved, key (%llu EXTEND_DATA %llu), "
- "root %llu\n", orig_extent_item_objectid,
- key.objectid, key.offset, root);
- ret = iterate(key.objectid,
- key.offset + (extent_item_pos - data_offset),
- root, ctx);
- if (ret) {
- pr_debug("stopping iteration because ret=%d\n", ret);
- break;
- }
- }
-
- free_extent_buffer(eb);
-
- return ret;
-}
-
-/*
- * calls iterate() for every inode that references the extent identified by
- * the given parameters.
- * when the iterator function returns a non-zero value, iteration stops.
- */
-int iterate_extent_inodes(struct btrfs_fs_info *fs_info,
- u64 extent_item_objectid, u64 extent_item_pos,
- int search_commit_root,
- iterate_extent_inodes_t *iterate, void *ctx)
-{
- int ret;
- struct list_head data_refs = LIST_HEAD_INIT(data_refs);
- struct list_head shared_refs = LIST_HEAD_INIT(shared_refs);
- struct btrfs_trans_handle *trans;
- struct ulist *refs = NULL;
- struct ulist *roots = NULL;
- struct ulist_node *ref_node = NULL;
- struct ulist_node *root_node = NULL;
- struct seq_list seq_elem;
- struct btrfs_delayed_ref_root *delayed_refs = NULL;
-
- pr_debug("resolving all inodes for extent %llu\n",
- extent_item_objectid);
-
- if (search_commit_root) {
- trans = BTRFS_BACKREF_SEARCH_COMMIT_ROOT;
- } else {
- trans = btrfs_join_transaction(fs_info->extent_root);
- if (IS_ERR(trans))
- return PTR_ERR(trans);
-
- delayed_refs = &trans->transaction->delayed_refs;
- spin_lock(&delayed_refs->lock);
- btrfs_get_delayed_seq(delayed_refs, &seq_elem);
- spin_unlock(&delayed_refs->lock);
- }
-
- ret = btrfs_find_all_leafs(trans, fs_info, extent_item_objectid,
- extent_item_pos, seq_elem.seq,
- &refs);
-
- if (ret)
- goto out;
-
- while (!ret && (ref_node = ulist_next(refs, ref_node))) {
- ret = btrfs_find_all_roots(trans, fs_info, ref_node->val, -1,
- seq_elem.seq, &roots);
- if (ret)
- break;
- while (!ret && (root_node = ulist_next(roots, root_node))) {
- pr_debug("root %llu references leaf %llu\n",
- root_node->val, ref_node->val);
- ret = iterate_leaf_refs(fs_info, ref_node->val,
- extent_item_objectid,
- extent_item_pos, root_node->val,
- iterate, ctx);
- }
- }
-
- ulist_free(refs);
- ulist_free(roots);
-out:
- if (!search_commit_root) {
- btrfs_put_delayed_seq(delayed_refs, &seq_elem);
- btrfs_end_transaction(trans, fs_info->extent_root);
- }
-
- return ret;
-}
-
-int iterate_inodes_from_logical(u64 logical, struct btrfs_fs_info *fs_info,
- struct btrfs_path *path,
- iterate_extent_inodes_t *iterate, void *ctx)
-{
- int ret;
- u64 extent_item_pos;
- struct btrfs_key found_key;
- int search_commit_root = path->search_commit_root;
-
- ret = extent_from_logical(fs_info, logical, path,
- &found_key);
- btrfs_release_path(path);
- if (ret & BTRFS_EXTENT_FLAG_TREE_BLOCK)
- ret = -EINVAL;
- if (ret < 0)
- return ret;
-
- extent_item_pos = logical - found_key.objectid;
- ret = iterate_extent_inodes(fs_info, found_key.objectid,
- extent_item_pos, search_commit_root,
- iterate, ctx);
-
- return ret;
-}
-
-static int iterate_irefs(u64 inum, struct btrfs_root *fs_root,
- struct btrfs_path *path,
- iterate_irefs_t *iterate, void *ctx)
-{
- int ret = 0;
- int slot;
- u32 cur;
- u32 len;
- u32 name_len;
- u64 parent = 0;
- int found = 0;
- struct extent_buffer *eb;
- struct btrfs_item *item;
- struct btrfs_inode_ref *iref;
- struct btrfs_key found_key;
-
- while (!ret) {
- path->leave_spinning = 1;
- ret = inode_ref_info(inum, parent ? parent+1 : 0, fs_root, path,
- &found_key);
- if (ret < 0)
- break;
- if (ret) {
- ret = found ? 0 : -ENOENT;
- break;
- }
- ++found;
-
- parent = found_key.offset;
- slot = path->slots[0];
- eb = path->nodes[0];
- /* make sure we can use eb after releasing the path */
- atomic_inc(&eb->refs);
- btrfs_tree_read_lock(eb);
- btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
- btrfs_release_path(path);
-
- item = btrfs_item_nr(eb, slot);
- iref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
-
- for (cur = 0; cur < btrfs_item_size(eb, item); cur += len) {
- name_len = btrfs_inode_ref_name_len(eb, iref);
- /* path must be released before calling iterate()! */
- pr_debug("following ref at offset %u for inode %llu in "
- "tree %llu\n", cur,
- (unsigned long long)found_key.objectid,
- (unsigned long long)fs_root->objectid);
- ret = iterate(parent, iref, eb, ctx);
- if (ret)
- break;
- len = sizeof(*iref) + name_len;
- iref = (struct btrfs_inode_ref *)((char *)iref + len);
- }
- btrfs_tree_read_unlock_blocking(eb);
- free_extent_buffer(eb);
- }
-
- btrfs_release_path(path);
-
- return ret;
-}
-
-/*
- * returns 0 if the path could be dumped (probably truncated)
- * returns <0 in case of an error
- */
-static int inode_to_path(u64 inum, struct btrfs_inode_ref *iref,
- struct extent_buffer *eb, void *ctx)
-{
- struct inode_fs_paths *ipath = ctx;
- char *fspath;
- char *fspath_min;
- int i = ipath->fspath->elem_cnt;
- const int s_ptr = sizeof(char *);
- u32 bytes_left;
-
- bytes_left = ipath->fspath->bytes_left > s_ptr ?
- ipath->fspath->bytes_left - s_ptr : 0;
-
- fspath_min = (char *)ipath->fspath->val + (i + 1) * s_ptr;
- fspath = iref_to_path(ipath->fs_root, ipath->btrfs_path, iref, eb,
- inum, fspath_min, bytes_left);
- if (IS_ERR(fspath))
- return PTR_ERR(fspath);
-
- if (fspath > fspath_min) {
- pr_debug("path resolved: %s\n", fspath);
- ipath->fspath->val[i] = (u64)(unsigned long)fspath;
- ++ipath->fspath->elem_cnt;
- ipath->fspath->bytes_left = fspath - fspath_min;
- } else {
- pr_debug("missed path, not enough space. missing bytes: %lu, "
- "constructed so far: %s\n",
- (unsigned long)(fspath_min - fspath), fspath_min);
- ++ipath->fspath->elem_missed;
- ipath->fspath->bytes_missing += fspath_min - fspath;
- ipath->fspath->bytes_left = 0;
- }
-
- return 0;
-}
-
-/*
- * this dumps all file system paths to the inode into the ipath struct, provided
- * is has been created large enough. each path is zero-terminated and accessed
- * from ipath->fspath->val[i].
- * when it returns, there are ipath->fspath->elem_cnt number of paths available
- * in ipath->fspath->val[]. when the allocated space wasn't sufficient, the
- * number of missed paths in recored in ipath->fspath->elem_missed, otherwise,
- * it's zero. ipath->fspath->bytes_missing holds the number of bytes that would
- * have been needed to return all paths.
- */
-int paths_from_inode(u64 inum, struct inode_fs_paths *ipath)
-{
- return iterate_irefs(inum, ipath->fs_root, ipath->btrfs_path,
- inode_to_path, ipath);
-}
-
-struct btrfs_data_container *init_data_container(u32 total_bytes)
-{
- struct btrfs_data_container *data;
- size_t alloc_bytes;
-
- alloc_bytes = max_t(size_t, total_bytes, sizeof(*data));
- data = kmalloc(alloc_bytes, GFP_NOFS);
- if (!data)
- return ERR_PTR(-ENOMEM);
-
- if (total_bytes >= sizeof(*data)) {
- data->bytes_left = total_bytes - sizeof(*data);
- data->bytes_missing = 0;
- } else {
- data->bytes_missing = sizeof(*data) - total_bytes;
- data->bytes_left = 0;
- }
-
- data->elem_cnt = 0;
- data->elem_missed = 0;
-
- return data;
-}
-
-/*
- * allocates space to return multiple file system paths for an inode.
- * total_bytes to allocate are passed, note that space usable for actual path
- * information will be total_bytes - sizeof(struct inode_fs_paths).
- * the returned pointer must be freed with free_ipath() in the end.
- */
-struct inode_fs_paths *init_ipath(s32 total_bytes, struct btrfs_root *fs_root,
- struct btrfs_path *path)
-{
- struct inode_fs_paths *ifp;
- struct btrfs_data_container *fspath;
-
- fspath = init_data_container(total_bytes);
- if (IS_ERR(fspath))
- return (void *)fspath;
-
- ifp = kmalloc(sizeof(*ifp), GFP_NOFS);
- if (!ifp) {
- kfree(fspath);
- return ERR_PTR(-ENOMEM);
- }
-
- ifp->btrfs_path = path;
- ifp->fspath = fspath;
- ifp->fs_root = fs_root;
-
- return ifp;
-}
-
-void free_ipath(struct inode_fs_paths *ipath)
-{
- if (!ipath)
- return;
- kfree(ipath->fspath);
- kfree(ipath);
-}
diff --git a/ANDROID_3.4.5/fs/btrfs/backref.h b/ANDROID_3.4.5/fs/btrfs/backref.h
deleted file mode 100644
index 57ea2e95..00000000
--- a/ANDROID_3.4.5/fs/btrfs/backref.h
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * Copyright (C) 2011 STRATO. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License v2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
- */
-
-#ifndef __BTRFS_BACKREF__
-#define __BTRFS_BACKREF__
-
-#include "ioctl.h"
-#include "ulist.h"
-
-#define BTRFS_BACKREF_SEARCH_COMMIT_ROOT ((struct btrfs_trans_handle *)0)
-
-struct inode_fs_paths {
- struct btrfs_path *btrfs_path;
- struct btrfs_root *fs_root;
- struct btrfs_data_container *fspath;
-};
-
-typedef int (iterate_extent_inodes_t)(u64 inum, u64 offset, u64 root,
- void *ctx);
-typedef int (iterate_irefs_t)(u64 parent, struct btrfs_inode_ref *iref,
- struct extent_buffer *eb, void *ctx);
-
-int inode_item_info(u64 inum, u64 ioff, struct btrfs_root *fs_root,
- struct btrfs_path *path);
-
-int extent_from_logical(struct btrfs_fs_info *fs_info, u64 logical,
- struct btrfs_path *path, struct btrfs_key *found_key);
-
-int tree_backref_for_extent(unsigned long *ptr, struct extent_buffer *eb,
- struct btrfs_extent_item *ei, u32 item_size,
- u64 *out_root, u8 *out_level);
-
-int iterate_extent_inodes(struct btrfs_fs_info *fs_info,
- u64 extent_item_objectid,
- u64 extent_offset, int search_commit_root,
- iterate_extent_inodes_t *iterate, void *ctx);
-
-int iterate_inodes_from_logical(u64 logical, struct btrfs_fs_info *fs_info,
- struct btrfs_path *path,
- iterate_extent_inodes_t *iterate, void *ctx);
-
-int paths_from_inode(u64 inum, struct inode_fs_paths *ipath);
-
-int btrfs_find_all_roots(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info, u64 bytenr,
- u64 num_bytes, u64 seq, struct ulist **roots);
-
-struct btrfs_data_container *init_data_container(u32 total_bytes);
-struct inode_fs_paths *init_ipath(s32 total_bytes, struct btrfs_root *fs_root,
- struct btrfs_path *path);
-void free_ipath(struct inode_fs_paths *ipath);
-
-#endif
diff --git a/ANDROID_3.4.5/fs/btrfs/btrfs_inode.h b/ANDROID_3.4.5/fs/btrfs/btrfs_inode.h
deleted file mode 100644
index 9b9b15fd..00000000
--- a/ANDROID_3.4.5/fs/btrfs/btrfs_inode.h
+++ /dev/null
@@ -1,205 +0,0 @@
-/*
- * Copyright (C) 2007 Oracle. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License v2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
- */
-
-#ifndef __BTRFS_I__
-#define __BTRFS_I__
-
-#include "extent_map.h"
-#include "extent_io.h"
-#include "ordered-data.h"
-#include "delayed-inode.h"
-
-/* in memory btrfs inode */
-struct btrfs_inode {
- /* which subvolume this inode belongs to */
- struct btrfs_root *root;
-
- /* key used to find this inode on disk. This is used by the code
- * to read in roots of subvolumes
- */
- struct btrfs_key location;
-
- /* Lock for counters */
- spinlock_t lock;
-
- /* the extent_tree has caches of all the extent mappings to disk */
- struct extent_map_tree extent_tree;
-
- /* the io_tree does range state (DIRTY, LOCKED etc) */
- struct extent_io_tree io_tree;
-
- /* special utility tree used to record which mirrors have already been
- * tried when checksums fail for a given block
- */
- struct extent_io_tree io_failure_tree;
-
- /* held while logging the inode in tree-log.c */
- struct mutex log_mutex;
-
- /* held while doing delalloc reservations */
- struct mutex delalloc_mutex;
-
- /* used to order data wrt metadata */
- struct btrfs_ordered_inode_tree ordered_tree;
-
- /* for keeping track of orphaned inodes */
- struct list_head i_orphan;
-
- /* list of all the delalloc inodes in the FS. There are times we need
- * to write all the delalloc pages to disk, and this list is used
- * to walk them all.
- */
- struct list_head delalloc_inodes;
-
- /*
- * list for tracking inodes that must be sent to disk before a
- * rename or truncate commit
- */
- struct list_head ordered_operations;
-
- /* node for the red-black tree that links inodes in subvolume root */
- struct rb_node rb_node;
-
- /* the space_info for where this inode's data allocations are done */
- struct btrfs_space_info *space_info;
-
- /* full 64 bit generation number, struct vfs_inode doesn't have a big
- * enough field for this.
- */
- u64 generation;
-
- /* sequence number for NFS changes */
- u64 sequence;
-
- /*
- * transid of the trans_handle that last modified this inode
- */
- u64 last_trans;
-
- /*
- * log transid when this inode was last modified
- */
- u64 last_sub_trans;
-
- /*
- * transid that last logged this inode
- */
- u64 logged_trans;
-
- /* total number of bytes pending delalloc, used by stat to calc the
- * real block usage of the file
- */
- u64 delalloc_bytes;
-
- /*
- * the size of the file stored in the metadata on disk. data=ordered
- * means the in-memory i_size might be larger than the size on disk
- * because not all the blocks are written yet.
- */
- u64 disk_i_size;
-
- /*
- * if this is a directory then index_cnt is the counter for the index
- * number for new files that are created
- */
- u64 index_cnt;
-
- /* the fsync log has some corner cases that mean we have to check
- * directories to see if any unlinks have been done before
- * the directory was logged. See tree-log.c for all the
- * details
- */
- u64 last_unlink_trans;
-
- /*
- * Number of bytes outstanding that are going to need csums. This is
- * used in ENOSPC accounting.
- */
- u64 csum_bytes;
-
- /* flags field from the on disk inode */
- u32 flags;
-
- /*
- * Counters to keep track of the number of extent item's we may use due
- * to delalloc and such. outstanding_extents is the number of extent
- * items we think we'll end up using, and reserved_extents is the number
- * of extent items we've reserved metadata for.
- */
- unsigned outstanding_extents;
- unsigned reserved_extents;
-
- /*
- * ordered_data_close is set by truncate when a file that used
- * to have good data has been truncated to zero. When it is set
- * the btrfs file release call will add this inode to the
- * ordered operations list so that we make sure to flush out any
- * new data the application may have written before commit.
- */
- unsigned ordered_data_close:1;
- unsigned orphan_meta_reserved:1;
- unsigned dummy_inode:1;
- unsigned in_defrag:1;
- unsigned delalloc_meta_reserved:1;
-
- /*
- * always compress this one file
- */
- unsigned force_compress:4;
-
- struct btrfs_delayed_node *delayed_node;
-
- struct inode vfs_inode;
-};
-
-extern unsigned char btrfs_filetype_table[];
-
-static inline struct btrfs_inode *BTRFS_I(struct inode *inode)
-{
- return container_of(inode, struct btrfs_inode, vfs_inode);
-}
-
-static inline u64 btrfs_ino(struct inode *inode)
-{
- u64 ino = BTRFS_I(inode)->location.objectid;
-
- /*
- * !ino: btree_inode
- * type == BTRFS_ROOT_ITEM_KEY: subvol dir
- */
- if (!ino || BTRFS_I(inode)->location.type == BTRFS_ROOT_ITEM_KEY)
- ino = inode->i_ino;
- return ino;
-}
-
-static inline void btrfs_i_size_write(struct inode *inode, u64 size)
-{
- i_size_write(inode, size);
- BTRFS_I(inode)->disk_i_size = size;
-}
-
-static inline bool btrfs_is_free_space_inode(struct btrfs_root *root,
- struct inode *inode)
-{
- if (root == root->fs_info->tree_root ||
- BTRFS_I(inode)->location.objectid == BTRFS_FREE_INO_OBJECTID)
- return true;
- return false;
-}
-
-#endif
diff --git a/ANDROID_3.4.5/fs/btrfs/check-integrity.c b/ANDROID_3.4.5/fs/btrfs/check-integrity.c
deleted file mode 100644
index c053e90f..00000000
--- a/ANDROID_3.4.5/fs/btrfs/check-integrity.c
+++ /dev/null
@@ -1,3068 +0,0 @@
-/*
- * Copyright (C) STRATO AG 2011. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License v2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
- */
-
-/*
- * This module can be used to catch cases when the btrfs kernel
- * code executes write requests to the disk that bring the file
- * system in an inconsistent state. In such a state, a power-loss
- * or kernel panic event would cause that the data on disk is
- * lost or at least damaged.
- *
- * Code is added that examines all block write requests during
- * runtime (including writes of the super block). Three rules
- * are verified and an error is printed on violation of the
- * rules:
- * 1. It is not allowed to write a disk block which is
- * currently referenced by the super block (either directly
- * or indirectly).
- * 2. When a super block is written, it is verified that all
- * referenced (directly or indirectly) blocks fulfill the
- * following requirements:
- * 2a. All referenced blocks have either been present when
- * the file system was mounted, (i.e., they have been
- * referenced by the super block) or they have been
- * written since then and the write completion callback
- * was called and a FLUSH request to the device where
- * these blocks are located was received and completed.
- * 2b. All referenced blocks need to have a generation
- * number which is equal to the parent's number.
- *
- * One issue that was found using this module was that the log
- * tree on disk became temporarily corrupted because disk blocks
- * that had been in use for the log tree had been freed and
- * reused too early, while being referenced by the written super
- * block.
- *
- * The search term in the kernel log that can be used to filter
- * on the existence of detected integrity issues is
- * "btrfs: attempt".
- *
- * The integrity check is enabled via mount options. These
- * mount options are only supported if the integrity check
- * tool is compiled by defining BTRFS_FS_CHECK_INTEGRITY.
- *
- * Example #1, apply integrity checks to all metadata:
- * mount /dev/sdb1 /mnt -o check_int
- *
- * Example #2, apply integrity checks to all metadata and
- * to data extents:
- * mount /dev/sdb1 /mnt -o check_int_data
- *
- * Example #3, apply integrity checks to all metadata and dump
- * the tree that the super block references to kernel messages
- * each time after a super block was written:
- * mount /dev/sdb1 /mnt -o check_int,check_int_print_mask=263
- *
- * If the integrity check tool is included and activated in
- * the mount options, plenty of kernel memory is used, and
- * plenty of additional CPU cycles are spent. Enabling this
- * functionality is not intended for normal use. In most
- * cases, unless you are a btrfs developer who needs to verify
- * the integrity of (super)-block write requests, do not
- * enable the config option BTRFS_FS_CHECK_INTEGRITY to
- * include and compile the integrity check tool.
- */
-
-#include <linux/sched.h>
-#include <linux/slab.h>
-#include <linux/buffer_head.h>
-#include <linux/mutex.h>
-#include <linux/crc32c.h>
-#include <linux/genhd.h>
-#include <linux/blkdev.h>
-#include "ctree.h"
-#include "disk-io.h"
-#include "transaction.h"
-#include "extent_io.h"
-#include "volumes.h"
-#include "print-tree.h"
-#include "locking.h"
-#include "check-integrity.h"
-
-#define BTRFSIC_BLOCK_HASHTABLE_SIZE 0x10000
-#define BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE 0x10000
-#define BTRFSIC_DEV2STATE_HASHTABLE_SIZE 0x100
-#define BTRFSIC_BLOCK_MAGIC_NUMBER 0x14491051
-#define BTRFSIC_BLOCK_LINK_MAGIC_NUMBER 0x11070807
-#define BTRFSIC_DEV2STATE_MAGIC_NUMBER 0x20111530
-#define BTRFSIC_BLOCK_STACK_FRAME_MAGIC_NUMBER 20111300
-#define BTRFSIC_TREE_DUMP_MAX_INDENT_LEVEL (200 - 6) /* in characters,
- * excluding " [...]" */
-#define BTRFSIC_BLOCK_SIZE PAGE_SIZE
-
-#define BTRFSIC_GENERATION_UNKNOWN ((u64)-1)
-
-/*
- * The definition of the bitmask fields for the print_mask.
- * They are specified with the mount option check_integrity_print_mask.
- */
-#define BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE 0x00000001
-#define BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION 0x00000002
-#define BTRFSIC_PRINT_MASK_TREE_AFTER_SB_WRITE 0x00000004
-#define BTRFSIC_PRINT_MASK_TREE_BEFORE_SB_WRITE 0x00000008
-#define BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH 0x00000010
-#define BTRFSIC_PRINT_MASK_END_IO_BIO_BH 0x00000020
-#define BTRFSIC_PRINT_MASK_VERBOSE 0x00000040
-#define BTRFSIC_PRINT_MASK_VERY_VERBOSE 0x00000080
-#define BTRFSIC_PRINT_MASK_INITIAL_TREE 0x00000100
-#define BTRFSIC_PRINT_MASK_INITIAL_ALL_TREES 0x00000200
-#define BTRFSIC_PRINT_MASK_INITIAL_DATABASE 0x00000400
-#define BTRFSIC_PRINT_MASK_NUM_COPIES 0x00000800
-#define BTRFSIC_PRINT_MASK_TREE_WITH_ALL_MIRRORS 0x00001000
-
-struct btrfsic_dev_state;
-struct btrfsic_state;
-
-struct btrfsic_block {
- u32 magic_num; /* only used for debug purposes */
- unsigned int is_metadata:1; /* if it is meta-data, not data-data */
- unsigned int is_superblock:1; /* if it is one of the superblocks */
- unsigned int is_iodone:1; /* if is done by lower subsystem */
- unsigned int iodone_w_error:1; /* error was indicated to endio */
- unsigned int never_written:1; /* block was added because it was
- * referenced, not because it was
- * written */
- unsigned int mirror_num:2; /* large enough to hold
- * BTRFS_SUPER_MIRROR_MAX */
- struct btrfsic_dev_state *dev_state;
- u64 dev_bytenr; /* key, physical byte num on disk */
- u64 logical_bytenr; /* logical byte num on disk */
- u64 generation;
- struct btrfs_disk_key disk_key; /* extra info to print in case of
- * issues, will not always be correct */
- struct list_head collision_resolving_node; /* list node */
- struct list_head all_blocks_node; /* list node */
-
- /* the following two lists contain block_link items */
- struct list_head ref_to_list; /* list */
- struct list_head ref_from_list; /* list */
- struct btrfsic_block *next_in_same_bio;
- void *orig_bio_bh_private;
- union {
- bio_end_io_t *bio;
- bh_end_io_t *bh;
- } orig_bio_bh_end_io;
- int submit_bio_bh_rw;
- u64 flush_gen; /* only valid if !never_written */
-};
-
-/*
- * Elements of this type are allocated dynamically and required because
- * each block object can refer to and can be ref from multiple blocks.
- * The key to lookup them in the hashtable is the dev_bytenr of
- * the block ref to plus the one from the block refered from.
- * The fact that they are searchable via a hashtable and that a
- * ref_cnt is maintained is not required for the btrfs integrity
- * check algorithm itself, it is only used to make the output more
- * beautiful in case that an error is detected (an error is defined
- * as a write operation to a block while that block is still referenced).
- */
-struct btrfsic_block_link {
- u32 magic_num; /* only used for debug purposes */
- u32 ref_cnt;
- struct list_head node_ref_to; /* list node */
- struct list_head node_ref_from; /* list node */
- struct list_head collision_resolving_node; /* list node */
- struct btrfsic_block *block_ref_to;
- struct btrfsic_block *block_ref_from;
- u64 parent_generation;
-};
-
-struct btrfsic_dev_state {
- u32 magic_num; /* only used for debug purposes */
- struct block_device *bdev;
- struct btrfsic_state *state;
- struct list_head collision_resolving_node; /* list node */
- struct btrfsic_block dummy_block_for_bio_bh_flush;
- u64 last_flush_gen;
- char name[BDEVNAME_SIZE];
-};
-
-struct btrfsic_block_hashtable {
- struct list_head table[BTRFSIC_BLOCK_HASHTABLE_SIZE];
-};
-
-struct btrfsic_block_link_hashtable {
- struct list_head table[BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE];
-};
-
-struct btrfsic_dev_state_hashtable {
- struct list_head table[BTRFSIC_DEV2STATE_HASHTABLE_SIZE];
-};
-
-struct btrfsic_block_data_ctx {
- u64 start; /* virtual bytenr */
- u64 dev_bytenr; /* physical bytenr on device */
- u32 len;
- struct btrfsic_dev_state *dev;
- char *data;
- struct buffer_head *bh; /* do not use if set to NULL */
-};
-
-/* This structure is used to implement recursion without occupying
- * any stack space, refer to btrfsic_process_metablock() */
-struct btrfsic_stack_frame {
- u32 magic;
- u32 nr;
- int error;
- int i;
- int limit_nesting;
- int num_copies;
- int mirror_num;
- struct btrfsic_block *block;
- struct btrfsic_block_data_ctx *block_ctx;
- struct btrfsic_block *next_block;
- struct btrfsic_block_data_ctx next_block_ctx;
- struct btrfs_header *hdr;
- struct btrfsic_stack_frame *prev;
-};
-
-/* Some state per mounted filesystem */
-struct btrfsic_state {
- u32 print_mask;
- int include_extent_data;
- int csum_size;
- struct list_head all_blocks_list;
- struct btrfsic_block_hashtable block_hashtable;
- struct btrfsic_block_link_hashtable block_link_hashtable;
- struct btrfs_root *root;
- u64 max_superblock_generation;
- struct btrfsic_block *latest_superblock;
-};
-
-static void btrfsic_block_init(struct btrfsic_block *b);
-static struct btrfsic_block *btrfsic_block_alloc(void);
-static void btrfsic_block_free(struct btrfsic_block *b);
-static void btrfsic_block_link_init(struct btrfsic_block_link *n);
-static struct btrfsic_block_link *btrfsic_block_link_alloc(void);
-static void btrfsic_block_link_free(struct btrfsic_block_link *n);
-static void btrfsic_dev_state_init(struct btrfsic_dev_state *ds);
-static struct btrfsic_dev_state *btrfsic_dev_state_alloc(void);
-static void btrfsic_dev_state_free(struct btrfsic_dev_state *ds);
-static void btrfsic_block_hashtable_init(struct btrfsic_block_hashtable *h);
-static void btrfsic_block_hashtable_add(struct btrfsic_block *b,
- struct btrfsic_block_hashtable *h);
-static void btrfsic_block_hashtable_remove(struct btrfsic_block *b);
-static struct btrfsic_block *btrfsic_block_hashtable_lookup(
- struct block_device *bdev,
- u64 dev_bytenr,
- struct btrfsic_block_hashtable *h);
-static void btrfsic_block_link_hashtable_init(
- struct btrfsic_block_link_hashtable *h);
-static void btrfsic_block_link_hashtable_add(
- struct btrfsic_block_link *l,
- struct btrfsic_block_link_hashtable *h);
-static void btrfsic_block_link_hashtable_remove(struct btrfsic_block_link *l);
-static struct btrfsic_block_link *btrfsic_block_link_hashtable_lookup(
- struct block_device *bdev_ref_to,
- u64 dev_bytenr_ref_to,
- struct block_device *bdev_ref_from,
- u64 dev_bytenr_ref_from,
- struct btrfsic_block_link_hashtable *h);
-static void btrfsic_dev_state_hashtable_init(
- struct btrfsic_dev_state_hashtable *h);
-static void btrfsic_dev_state_hashtable_add(
- struct btrfsic_dev_state *ds,
- struct btrfsic_dev_state_hashtable *h);
-static void btrfsic_dev_state_hashtable_remove(struct btrfsic_dev_state *ds);
-static struct btrfsic_dev_state *btrfsic_dev_state_hashtable_lookup(
- struct block_device *bdev,
- struct btrfsic_dev_state_hashtable *h);
-static struct btrfsic_stack_frame *btrfsic_stack_frame_alloc(void);
-static void btrfsic_stack_frame_free(struct btrfsic_stack_frame *sf);
-static int btrfsic_process_superblock(struct btrfsic_state *state,
- struct btrfs_fs_devices *fs_devices);
-static int btrfsic_process_metablock(struct btrfsic_state *state,
- struct btrfsic_block *block,
- struct btrfsic_block_data_ctx *block_ctx,
- struct btrfs_header *hdr,
- int limit_nesting, int force_iodone_flag);
-static int btrfsic_create_link_to_next_block(
- struct btrfsic_state *state,
- struct btrfsic_block *block,
- struct btrfsic_block_data_ctx
- *block_ctx, u64 next_bytenr,
- int limit_nesting,
- struct btrfsic_block_data_ctx *next_block_ctx,
- struct btrfsic_block **next_blockp,
- int force_iodone_flag,
- int *num_copiesp, int *mirror_nump,
- struct btrfs_disk_key *disk_key,
- u64 parent_generation);
-static int btrfsic_handle_extent_data(struct btrfsic_state *state,
- struct btrfsic_block *block,
- struct btrfsic_block_data_ctx *block_ctx,
- u32 item_offset, int force_iodone_flag);
-static int btrfsic_map_block(struct btrfsic_state *state, u64 bytenr, u32 len,
- struct btrfsic_block_data_ctx *block_ctx_out,
- int mirror_num);
-static int btrfsic_map_superblock(struct btrfsic_state *state, u64 bytenr,
- u32 len, struct block_device *bdev,
- struct btrfsic_block_data_ctx *block_ctx_out);
-static void btrfsic_release_block_ctx(struct btrfsic_block_data_ctx *block_ctx);
-static int btrfsic_read_block(struct btrfsic_state *state,
- struct btrfsic_block_data_ctx *block_ctx);
-static void btrfsic_dump_database(struct btrfsic_state *state);
-static int btrfsic_test_for_metadata(struct btrfsic_state *state,
- const u8 *data, unsigned int size);
-static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state,
- u64 dev_bytenr, u8 *mapped_data,
- unsigned int len, struct bio *bio,
- int *bio_is_patched,
- struct buffer_head *bh,
- int submit_bio_bh_rw);
-static int btrfsic_process_written_superblock(
- struct btrfsic_state *state,
- struct btrfsic_block *const block,
- struct btrfs_super_block *const super_hdr);
-static void btrfsic_bio_end_io(struct bio *bp, int bio_error_status);
-static void btrfsic_bh_end_io(struct buffer_head *bh, int uptodate);
-static int btrfsic_is_block_ref_by_superblock(const struct btrfsic_state *state,
- const struct btrfsic_block *block,
- int recursion_level);
-static int btrfsic_check_all_ref_blocks(struct btrfsic_state *state,
- struct btrfsic_block *const block,
- int recursion_level);
-static void btrfsic_print_add_link(const struct btrfsic_state *state,
- const struct btrfsic_block_link *l);
-static void btrfsic_print_rem_link(const struct btrfsic_state *state,
- const struct btrfsic_block_link *l);
-static char btrfsic_get_block_type(const struct btrfsic_state *state,
- const struct btrfsic_block *block);
-static void btrfsic_dump_tree(const struct btrfsic_state *state);
-static void btrfsic_dump_tree_sub(const struct btrfsic_state *state,
- const struct btrfsic_block *block,
- int indent_level);
-static struct btrfsic_block_link *btrfsic_block_link_lookup_or_add(
- struct btrfsic_state *state,
- struct btrfsic_block_data_ctx *next_block_ctx,
- struct btrfsic_block *next_block,
- struct btrfsic_block *from_block,
- u64 parent_generation);
-static struct btrfsic_block *btrfsic_block_lookup_or_add(
- struct btrfsic_state *state,
- struct btrfsic_block_data_ctx *block_ctx,
- const char *additional_string,
- int is_metadata,
- int is_iodone,
- int never_written,
- int mirror_num,
- int *was_created);
-static int btrfsic_process_superblock_dev_mirror(
- struct btrfsic_state *state,
- struct btrfsic_dev_state *dev_state,
- struct btrfs_device *device,
- int superblock_mirror_num,
- struct btrfsic_dev_state **selected_dev_state,
- struct btrfs_super_block *selected_super);
-static struct btrfsic_dev_state *btrfsic_dev_state_lookup(
- struct block_device *bdev);
-static void btrfsic_cmp_log_and_dev_bytenr(struct btrfsic_state *state,
- u64 bytenr,
- struct btrfsic_dev_state *dev_state,
- u64 dev_bytenr, char *data);
-
-static struct mutex btrfsic_mutex;
-static int btrfsic_is_initialized;
-static struct btrfsic_dev_state_hashtable btrfsic_dev_state_hashtable;
-
-
-static void btrfsic_block_init(struct btrfsic_block *b)
-{
- b->magic_num = BTRFSIC_BLOCK_MAGIC_NUMBER;
- b->dev_state = NULL;
- b->dev_bytenr = 0;
- b->logical_bytenr = 0;
- b->generation = BTRFSIC_GENERATION_UNKNOWN;
- b->disk_key.objectid = 0;
- b->disk_key.type = 0;
- b->disk_key.offset = 0;
- b->is_metadata = 0;
- b->is_superblock = 0;
- b->is_iodone = 0;
- b->iodone_w_error = 0;
- b->never_written = 0;
- b->mirror_num = 0;
- b->next_in_same_bio = NULL;
- b->orig_bio_bh_private = NULL;
- b->orig_bio_bh_end_io.bio = NULL;
- INIT_LIST_HEAD(&b->collision_resolving_node);
- INIT_LIST_HEAD(&b->all_blocks_node);
- INIT_LIST_HEAD(&b->ref_to_list);
- INIT_LIST_HEAD(&b->ref_from_list);
- b->submit_bio_bh_rw = 0;
- b->flush_gen = 0;
-}
-
-static struct btrfsic_block *btrfsic_block_alloc(void)
-{
- struct btrfsic_block *b;
-
- b = kzalloc(sizeof(*b), GFP_NOFS);
- if (NULL != b)
- btrfsic_block_init(b);
-
- return b;
-}
-
-static void btrfsic_block_free(struct btrfsic_block *b)
-{
- BUG_ON(!(NULL == b || BTRFSIC_BLOCK_MAGIC_NUMBER == b->magic_num));
- kfree(b);
-}
-
-static void btrfsic_block_link_init(struct btrfsic_block_link *l)
-{
- l->magic_num = BTRFSIC_BLOCK_LINK_MAGIC_NUMBER;
- l->ref_cnt = 1;
- INIT_LIST_HEAD(&l->node_ref_to);
- INIT_LIST_HEAD(&l->node_ref_from);
- INIT_LIST_HEAD(&l->collision_resolving_node);
- l->block_ref_to = NULL;
- l->block_ref_from = NULL;
-}
-
-static struct btrfsic_block_link *btrfsic_block_link_alloc(void)
-{
- struct btrfsic_block_link *l;
-
- l = kzalloc(sizeof(*l), GFP_NOFS);
- if (NULL != l)
- btrfsic_block_link_init(l);
-
- return l;
-}
-
-static void btrfsic_block_link_free(struct btrfsic_block_link *l)
-{
- BUG_ON(!(NULL == l || BTRFSIC_BLOCK_LINK_MAGIC_NUMBER == l->magic_num));
- kfree(l);
-}
-
-static void btrfsic_dev_state_init(struct btrfsic_dev_state *ds)
-{
- ds->magic_num = BTRFSIC_DEV2STATE_MAGIC_NUMBER;
- ds->bdev = NULL;
- ds->state = NULL;
- ds->name[0] = '\0';
- INIT_LIST_HEAD(&ds->collision_resolving_node);
- ds->last_flush_gen = 0;
- btrfsic_block_init(&ds->dummy_block_for_bio_bh_flush);
- ds->dummy_block_for_bio_bh_flush.is_iodone = 1;
- ds->dummy_block_for_bio_bh_flush.dev_state = ds;
-}
-
-static struct btrfsic_dev_state *btrfsic_dev_state_alloc(void)
-{
- struct btrfsic_dev_state *ds;
-
- ds = kzalloc(sizeof(*ds), GFP_NOFS);
- if (NULL != ds)
- btrfsic_dev_state_init(ds);
-
- return ds;
-}
-
-static void btrfsic_dev_state_free(struct btrfsic_dev_state *ds)
-{
- BUG_ON(!(NULL == ds ||
- BTRFSIC_DEV2STATE_MAGIC_NUMBER == ds->magic_num));
- kfree(ds);
-}
-
-static void btrfsic_block_hashtable_init(struct btrfsic_block_hashtable *h)
-{
- int i;
-
- for (i = 0; i < BTRFSIC_BLOCK_HASHTABLE_SIZE; i++)
- INIT_LIST_HEAD(h->table + i);
-}
-
-static void btrfsic_block_hashtable_add(struct btrfsic_block *b,
- struct btrfsic_block_hashtable *h)
-{
- const unsigned int hashval =
- (((unsigned int)(b->dev_bytenr >> 16)) ^
- ((unsigned int)((uintptr_t)b->dev_state->bdev))) &
- (BTRFSIC_BLOCK_HASHTABLE_SIZE - 1);
-
- list_add(&b->collision_resolving_node, h->table + hashval);
-}
-
-static void btrfsic_block_hashtable_remove(struct btrfsic_block *b)
-{
- list_del(&b->collision_resolving_node);
-}
-
-static struct btrfsic_block *btrfsic_block_hashtable_lookup(
- struct block_device *bdev,
- u64 dev_bytenr,
- struct btrfsic_block_hashtable *h)
-{
- const unsigned int hashval =
- (((unsigned int)(dev_bytenr >> 16)) ^
- ((unsigned int)((uintptr_t)bdev))) &
- (BTRFSIC_BLOCK_HASHTABLE_SIZE - 1);
- struct list_head *elem;
-
- list_for_each(elem, h->table + hashval) {
- struct btrfsic_block *const b =
- list_entry(elem, struct btrfsic_block,
- collision_resolving_node);
-
- if (b->dev_state->bdev == bdev && b->dev_bytenr == dev_bytenr)
- return b;
- }
-
- return NULL;
-}
-
-static void btrfsic_block_link_hashtable_init(
- struct btrfsic_block_link_hashtable *h)
-{
- int i;
-
- for (i = 0; i < BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE; i++)
- INIT_LIST_HEAD(h->table + i);
-}
-
-static void btrfsic_block_link_hashtable_add(
- struct btrfsic_block_link *l,
- struct btrfsic_block_link_hashtable *h)
-{
- const unsigned int hashval =
- (((unsigned int)(l->block_ref_to->dev_bytenr >> 16)) ^
- ((unsigned int)(l->block_ref_from->dev_bytenr >> 16)) ^
- ((unsigned int)((uintptr_t)l->block_ref_to->dev_state->bdev)) ^
- ((unsigned int)((uintptr_t)l->block_ref_from->dev_state->bdev)))
- & (BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE - 1);
-
- BUG_ON(NULL == l->block_ref_to);
- BUG_ON(NULL == l->block_ref_from);
- list_add(&l->collision_resolving_node, h->table + hashval);
-}
-
-static void btrfsic_block_link_hashtable_remove(struct btrfsic_block_link *l)
-{
- list_del(&l->collision_resolving_node);
-}
-
-static struct btrfsic_block_link *btrfsic_block_link_hashtable_lookup(
- struct block_device *bdev_ref_to,
- u64 dev_bytenr_ref_to,
- struct block_device *bdev_ref_from,
- u64 dev_bytenr_ref_from,
- struct btrfsic_block_link_hashtable *h)
-{
- const unsigned int hashval =
- (((unsigned int)(dev_bytenr_ref_to >> 16)) ^
- ((unsigned int)(dev_bytenr_ref_from >> 16)) ^
- ((unsigned int)((uintptr_t)bdev_ref_to)) ^
- ((unsigned int)((uintptr_t)bdev_ref_from))) &
- (BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE - 1);
- struct list_head *elem;
-
- list_for_each(elem, h->table + hashval) {
- struct btrfsic_block_link *const l =
- list_entry(elem, struct btrfsic_block_link,
- collision_resolving_node);
-
- BUG_ON(NULL == l->block_ref_to);
- BUG_ON(NULL == l->block_ref_from);
- if (l->block_ref_to->dev_state->bdev == bdev_ref_to &&
- l->block_ref_to->dev_bytenr == dev_bytenr_ref_to &&
- l->block_ref_from->dev_state->bdev == bdev_ref_from &&
- l->block_ref_from->dev_bytenr == dev_bytenr_ref_from)
- return l;
- }
-
- return NULL;
-}
-
-static void btrfsic_dev_state_hashtable_init(
- struct btrfsic_dev_state_hashtable *h)
-{
- int i;
-
- for (i = 0; i < BTRFSIC_DEV2STATE_HASHTABLE_SIZE; i++)
- INIT_LIST_HEAD(h->table + i);
-}
-
-static void btrfsic_dev_state_hashtable_add(
- struct btrfsic_dev_state *ds,
- struct btrfsic_dev_state_hashtable *h)
-{
- const unsigned int hashval =
- (((unsigned int)((uintptr_t)ds->bdev)) &
- (BTRFSIC_DEV2STATE_HASHTABLE_SIZE - 1));
-
- list_add(&ds->collision_resolving_node, h->table + hashval);
-}
-
-static void btrfsic_dev_state_hashtable_remove(struct btrfsic_dev_state *ds)
-{
- list_del(&ds->collision_resolving_node);
-}
-
-static struct btrfsic_dev_state *btrfsic_dev_state_hashtable_lookup(
- struct block_device *bdev,
- struct btrfsic_dev_state_hashtable *h)
-{
- const unsigned int hashval =
- (((unsigned int)((uintptr_t)bdev)) &
- (BTRFSIC_DEV2STATE_HASHTABLE_SIZE - 1));
- struct list_head *elem;
-
- list_for_each(elem, h->table + hashval) {
- struct btrfsic_dev_state *const ds =
- list_entry(elem, struct btrfsic_dev_state,
- collision_resolving_node);
-
- if (ds->bdev == bdev)
- return ds;
- }
-
- return NULL;
-}
-
-static int btrfsic_process_superblock(struct btrfsic_state *state,
- struct btrfs_fs_devices *fs_devices)
-{
- int ret = 0;
- struct btrfs_super_block *selected_super;
- struct list_head *dev_head = &fs_devices->devices;
- struct btrfs_device *device;
- struct btrfsic_dev_state *selected_dev_state = NULL;
- int pass;
-
- BUG_ON(NULL == state);
- selected_super = kmalloc(sizeof(*selected_super), GFP_NOFS);
- if (NULL == selected_super) {
- printk(KERN_INFO "btrfsic: error, kmalloc failed!\n");
- return -1;
- }
-
- list_for_each_entry(device, dev_head, dev_list) {
- int i;
- struct btrfsic_dev_state *dev_state;
-
- if (!device->bdev || !device->name)
- continue;
-
- dev_state = btrfsic_dev_state_lookup(device->bdev);
- BUG_ON(NULL == dev_state);
- for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
- ret = btrfsic_process_superblock_dev_mirror(
- state, dev_state, device, i,
- &selected_dev_state, selected_super);
- if (0 != ret && 0 == i) {
- kfree(selected_super);
- return ret;
- }
- }
- }
-
- if (NULL == state->latest_superblock) {
- printk(KERN_INFO "btrfsic: no superblock found!\n");
- kfree(selected_super);
- return -1;
- }
-
- state->csum_size = btrfs_super_csum_size(selected_super);
-
- for (pass = 0; pass < 3; pass++) {
- int num_copies;
- int mirror_num;
- u64 next_bytenr;
-
- switch (pass) {
- case 0:
- next_bytenr = btrfs_super_root(selected_super);
- if (state->print_mask &
- BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION)
- printk(KERN_INFO "root@%llu\n",
- (unsigned long long)next_bytenr);
- break;
- case 1:
- next_bytenr = btrfs_super_chunk_root(selected_super);
- if (state->print_mask &
- BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION)
- printk(KERN_INFO "chunk@%llu\n",
- (unsigned long long)next_bytenr);
- break;
- case 2:
- next_bytenr = btrfs_super_log_root(selected_super);
- if (0 == next_bytenr)
- continue;
- if (state->print_mask &
- BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION)
- printk(KERN_INFO "log@%llu\n",
- (unsigned long long)next_bytenr);
- break;
- }
-
- num_copies =
- btrfs_num_copies(&state->root->fs_info->mapping_tree,
- next_bytenr, PAGE_SIZE);
- if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES)
- printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n",
- (unsigned long long)next_bytenr, num_copies);
-
- for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) {
- struct btrfsic_block *next_block;
- struct btrfsic_block_data_ctx tmp_next_block_ctx;
- struct btrfsic_block_link *l;
- struct btrfs_header *hdr;
-
- ret = btrfsic_map_block(state, next_bytenr, PAGE_SIZE,
- &tmp_next_block_ctx,
- mirror_num);
- if (ret) {
- printk(KERN_INFO "btrfsic:"
- " btrfsic_map_block(root @%llu,"
- " mirror %d) failed!\n",
- (unsigned long long)next_bytenr,
- mirror_num);
- kfree(selected_super);
- return -1;
- }
-
- next_block = btrfsic_block_hashtable_lookup(
- tmp_next_block_ctx.dev->bdev,
- tmp_next_block_ctx.dev_bytenr,
- &state->block_hashtable);
- BUG_ON(NULL == next_block);
-
- l = btrfsic_block_link_hashtable_lookup(
- tmp_next_block_ctx.dev->bdev,
- tmp_next_block_ctx.dev_bytenr,
- state->latest_superblock->dev_state->
- bdev,
- state->latest_superblock->dev_bytenr,
- &state->block_link_hashtable);
- BUG_ON(NULL == l);
-
- ret = btrfsic_read_block(state, &tmp_next_block_ctx);
- if (ret < (int)BTRFSIC_BLOCK_SIZE) {
- printk(KERN_INFO
- "btrfsic: read @logical %llu failed!\n",
- (unsigned long long)
- tmp_next_block_ctx.start);
- btrfsic_release_block_ctx(&tmp_next_block_ctx);
- kfree(selected_super);
- return -1;
- }
-
- hdr = (struct btrfs_header *)tmp_next_block_ctx.data;
- ret = btrfsic_process_metablock(state,
- next_block,
- &tmp_next_block_ctx,
- hdr,
- BTRFS_MAX_LEVEL + 3, 1);
- btrfsic_release_block_ctx(&tmp_next_block_ctx);
- }
- }
-
- kfree(selected_super);
- return ret;
-}
-
-static int btrfsic_process_superblock_dev_mirror(
- struct btrfsic_state *state,
- struct btrfsic_dev_state *dev_state,
- struct btrfs_device *device,
- int superblock_mirror_num,
- struct btrfsic_dev_state **selected_dev_state,
- struct btrfs_super_block *selected_super)
-{
- struct btrfs_super_block *super_tmp;
- u64 dev_bytenr;
- struct buffer_head *bh;
- struct btrfsic_block *superblock_tmp;
- int pass;
- struct block_device *const superblock_bdev = device->bdev;
-
- /* super block bytenr is always the unmapped device bytenr */
- dev_bytenr = btrfs_sb_offset(superblock_mirror_num);
- bh = __bread(superblock_bdev, dev_bytenr / 4096, 4096);
- if (NULL == bh)
- return -1;
- super_tmp = (struct btrfs_super_block *)
- (bh->b_data + (dev_bytenr & 4095));
-
- if (btrfs_super_bytenr(super_tmp) != dev_bytenr ||
- strncmp((char *)(&(super_tmp->magic)), BTRFS_MAGIC,
- sizeof(super_tmp->magic)) ||
- memcmp(device->uuid, super_tmp->dev_item.uuid, BTRFS_UUID_SIZE)) {
- brelse(bh);
- return 0;
- }
-
- superblock_tmp =
- btrfsic_block_hashtable_lookup(superblock_bdev,
- dev_bytenr,
- &state->block_hashtable);
- if (NULL == superblock_tmp) {
- superblock_tmp = btrfsic_block_alloc();
- if (NULL == superblock_tmp) {
- printk(KERN_INFO "btrfsic: error, kmalloc failed!\n");
- brelse(bh);
- return -1;
- }
- /* for superblock, only the dev_bytenr makes sense */
- superblock_tmp->dev_bytenr = dev_bytenr;
- superblock_tmp->dev_state = dev_state;
- superblock_tmp->logical_bytenr = dev_bytenr;
- superblock_tmp->generation = btrfs_super_generation(super_tmp);
- superblock_tmp->is_metadata = 1;
- superblock_tmp->is_superblock = 1;
- superblock_tmp->is_iodone = 1;
- superblock_tmp->never_written = 0;
- superblock_tmp->mirror_num = 1 + superblock_mirror_num;
- if (state->print_mask & BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE)
- printk(KERN_INFO "New initial S-block (bdev %p, %s)"
- " @%llu (%s/%llu/%d)\n",
- superblock_bdev, device->name,
- (unsigned long long)dev_bytenr,
- dev_state->name,
- (unsigned long long)dev_bytenr,
- superblock_mirror_num);
- list_add(&superblock_tmp->all_blocks_node,
- &state->all_blocks_list);
- btrfsic_block_hashtable_add(superblock_tmp,
- &state->block_hashtable);
- }
-
- /* select the one with the highest generation field */
- if (btrfs_super_generation(super_tmp) >
- state->max_superblock_generation ||
- 0 == state->max_superblock_generation) {
- memcpy(selected_super, super_tmp, sizeof(*selected_super));
- *selected_dev_state = dev_state;
- state->max_superblock_generation =
- btrfs_super_generation(super_tmp);
- state->latest_superblock = superblock_tmp;
- }
-
- for (pass = 0; pass < 3; pass++) {
- u64 next_bytenr;
- int num_copies;
- int mirror_num;
- const char *additional_string = NULL;
- struct btrfs_disk_key tmp_disk_key;
-
- tmp_disk_key.type = BTRFS_ROOT_ITEM_KEY;
- tmp_disk_key.offset = 0;
- switch (pass) {
- case 0:
- tmp_disk_key.objectid =
- cpu_to_le64(BTRFS_ROOT_TREE_OBJECTID);
- additional_string = "initial root ";
- next_bytenr = btrfs_super_root(super_tmp);
- break;
- case 1:
- tmp_disk_key.objectid =
- cpu_to_le64(BTRFS_CHUNK_TREE_OBJECTID);
- additional_string = "initial chunk ";
- next_bytenr = btrfs_super_chunk_root(super_tmp);
- break;
- case 2:
- tmp_disk_key.objectid =
- cpu_to_le64(BTRFS_TREE_LOG_OBJECTID);
- additional_string = "initial log ";
- next_bytenr = btrfs_super_log_root(super_tmp);
- if (0 == next_bytenr)
- continue;
- break;
- }
-
- num_copies =
- btrfs_num_copies(&state->root->fs_info->mapping_tree,
- next_bytenr, PAGE_SIZE);
- if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES)
- printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n",
- (unsigned long long)next_bytenr, num_copies);
- for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) {
- struct btrfsic_block *next_block;
- struct btrfsic_block_data_ctx tmp_next_block_ctx;
- struct btrfsic_block_link *l;
-
- if (btrfsic_map_block(state, next_bytenr, PAGE_SIZE,
- &tmp_next_block_ctx,
- mirror_num)) {
- printk(KERN_INFO "btrfsic: btrfsic_map_block("
- "bytenr @%llu, mirror %d) failed!\n",
- (unsigned long long)next_bytenr,
- mirror_num);
- brelse(bh);
- return -1;
- }
-
- next_block = btrfsic_block_lookup_or_add(
- state, &tmp_next_block_ctx,
- additional_string, 1, 1, 0,
- mirror_num, NULL);
- if (NULL == next_block) {
- btrfsic_release_block_ctx(&tmp_next_block_ctx);
- brelse(bh);
- return -1;
- }
-
- next_block->disk_key = tmp_disk_key;
- next_block->generation = BTRFSIC_GENERATION_UNKNOWN;
- l = btrfsic_block_link_lookup_or_add(
- state, &tmp_next_block_ctx,
- next_block, superblock_tmp,
- BTRFSIC_GENERATION_UNKNOWN);
- btrfsic_release_block_ctx(&tmp_next_block_ctx);
- if (NULL == l) {
- brelse(bh);
- return -1;
- }
- }
- }
- if (state->print_mask & BTRFSIC_PRINT_MASK_INITIAL_ALL_TREES)
- btrfsic_dump_tree_sub(state, superblock_tmp, 0);
-
- brelse(bh);
- return 0;
-}
-
-static struct btrfsic_stack_frame *btrfsic_stack_frame_alloc(void)
-{
- struct btrfsic_stack_frame *sf;
-
- sf = kzalloc(sizeof(*sf), GFP_NOFS);
- if (NULL == sf)
- printk(KERN_INFO "btrfsic: alloc memory failed!\n");
- else
- sf->magic = BTRFSIC_BLOCK_STACK_FRAME_MAGIC_NUMBER;
- return sf;
-}
-
-static void btrfsic_stack_frame_free(struct btrfsic_stack_frame *sf)
-{
- BUG_ON(!(NULL == sf ||
- BTRFSIC_BLOCK_STACK_FRAME_MAGIC_NUMBER == sf->magic));
- kfree(sf);
-}
-
-static int btrfsic_process_metablock(
- struct btrfsic_state *state,
- struct btrfsic_block *const first_block,
- struct btrfsic_block_data_ctx *const first_block_ctx,
- struct btrfs_header *const first_hdr,
- int first_limit_nesting, int force_iodone_flag)
-{
- struct btrfsic_stack_frame initial_stack_frame = { 0 };
- struct btrfsic_stack_frame *sf;
- struct btrfsic_stack_frame *next_stack;
-
- sf = &initial_stack_frame;
- sf->error = 0;
- sf->i = -1;
- sf->limit_nesting = first_limit_nesting;
- sf->block = first_block;
- sf->block_ctx = first_block_ctx;
- sf->next_block = NULL;
- sf->hdr = first_hdr;
- sf->prev = NULL;
-
-continue_with_new_stack_frame:
- sf->block->generation = le64_to_cpu(sf->hdr->generation);
- if (0 == sf->hdr->level) {
- struct btrfs_leaf *const leafhdr =
- (struct btrfs_leaf *)sf->hdr;
-
- if (-1 == sf->i) {
- sf->nr = le32_to_cpu(leafhdr->header.nritems);
-
- if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
- printk(KERN_INFO
- "leaf %llu items %d generation %llu"
- " owner %llu\n",
- (unsigned long long)
- sf->block_ctx->start,
- sf->nr,
- (unsigned long long)
- le64_to_cpu(leafhdr->header.generation),
- (unsigned long long)
- le64_to_cpu(leafhdr->header.owner));
- }
-
-continue_with_current_leaf_stack_frame:
- if (0 == sf->num_copies || sf->mirror_num > sf->num_copies) {
- sf->i++;
- sf->num_copies = 0;
- }
-
- if (sf->i < sf->nr) {
- struct btrfs_item *disk_item = leafhdr->items + sf->i;
- struct btrfs_disk_key *disk_key = &disk_item->key;
- u8 type;
- const u32 item_offset = le32_to_cpu(disk_item->offset);
-
- type = disk_key->type;
-
- if (BTRFS_ROOT_ITEM_KEY == type) {
- const struct btrfs_root_item *const root_item =
- (struct btrfs_root_item *)
- (sf->block_ctx->data +
- offsetof(struct btrfs_leaf, items) +
- item_offset);
- const u64 next_bytenr =
- le64_to_cpu(root_item->bytenr);
-
- sf->error =
- btrfsic_create_link_to_next_block(
- state,
- sf->block,
- sf->block_ctx,
- next_bytenr,
- sf->limit_nesting,
- &sf->next_block_ctx,
- &sf->next_block,
- force_iodone_flag,
- &sf->num_copies,
- &sf->mirror_num,
- disk_key,
- le64_to_cpu(root_item->
- generation));
- if (sf->error)
- goto one_stack_frame_backwards;
-
- if (NULL != sf->next_block) {
- struct btrfs_header *const next_hdr =
- (struct btrfs_header *)
- sf->next_block_ctx.data;
-
- next_stack =
- btrfsic_stack_frame_alloc();
- if (NULL == next_stack) {
- btrfsic_release_block_ctx(
- &sf->
- next_block_ctx);
- goto one_stack_frame_backwards;
- }
-
- next_stack->i = -1;
- next_stack->block = sf->next_block;
- next_stack->block_ctx =
- &sf->next_block_ctx;
- next_stack->next_block = NULL;
- next_stack->hdr = next_hdr;
- next_stack->limit_nesting =
- sf->limit_nesting - 1;
- next_stack->prev = sf;
- sf = next_stack;
- goto continue_with_new_stack_frame;
- }
- } else if (BTRFS_EXTENT_DATA_KEY == type &&
- state->include_extent_data) {
- sf->error = btrfsic_handle_extent_data(
- state,
- sf->block,
- sf->block_ctx,
- item_offset,
- force_iodone_flag);
- if (sf->error)
- goto one_stack_frame_backwards;
- }
-
- goto continue_with_current_leaf_stack_frame;
- }
- } else {
- struct btrfs_node *const nodehdr = (struct btrfs_node *)sf->hdr;
-
- if (-1 == sf->i) {
- sf->nr = le32_to_cpu(nodehdr->header.nritems);
-
- if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
- printk(KERN_INFO "node %llu level %d items %d"
- " generation %llu owner %llu\n",
- (unsigned long long)
- sf->block_ctx->start,
- nodehdr->header.level, sf->nr,
- (unsigned long long)
- le64_to_cpu(nodehdr->header.generation),
- (unsigned long long)
- le64_to_cpu(nodehdr->header.owner));
- }
-
-continue_with_current_node_stack_frame:
- if (0 == sf->num_copies || sf->mirror_num > sf->num_copies) {
- sf->i++;
- sf->num_copies = 0;
- }
-
- if (sf->i < sf->nr) {
- struct btrfs_key_ptr *disk_key_ptr =
- nodehdr->ptrs + sf->i;
- const u64 next_bytenr =
- le64_to_cpu(disk_key_ptr->blockptr);
-
- sf->error = btrfsic_create_link_to_next_block(
- state,
- sf->block,
- sf->block_ctx,
- next_bytenr,
- sf->limit_nesting,
- &sf->next_block_ctx,
- &sf->next_block,
- force_iodone_flag,
- &sf->num_copies,
- &sf->mirror_num,
- &disk_key_ptr->key,
- le64_to_cpu(disk_key_ptr->generation));
- if (sf->error)
- goto one_stack_frame_backwards;
-
- if (NULL != sf->next_block) {
- struct btrfs_header *const next_hdr =
- (struct btrfs_header *)
- sf->next_block_ctx.data;
-
- next_stack = btrfsic_stack_frame_alloc();
- if (NULL == next_stack)
- goto one_stack_frame_backwards;
-
- next_stack->i = -1;
- next_stack->block = sf->next_block;
- next_stack->block_ctx = &sf->next_block_ctx;
- next_stack->next_block = NULL;
- next_stack->hdr = next_hdr;
- next_stack->limit_nesting =
- sf->limit_nesting - 1;
- next_stack->prev = sf;
- sf = next_stack;
- goto continue_with_new_stack_frame;
- }
-
- goto continue_with_current_node_stack_frame;
- }
- }
-
-one_stack_frame_backwards:
- if (NULL != sf->prev) {
- struct btrfsic_stack_frame *const prev = sf->prev;
-
- /* the one for the initial block is freed in the caller */
- btrfsic_release_block_ctx(sf->block_ctx);
-
- if (sf->error) {
- prev->error = sf->error;
- btrfsic_stack_frame_free(sf);
- sf = prev;
- goto one_stack_frame_backwards;
- }
-
- btrfsic_stack_frame_free(sf);
- sf = prev;
- goto continue_with_new_stack_frame;
- } else {
- BUG_ON(&initial_stack_frame != sf);
- }
-
- return sf->error;
-}
-
-static int btrfsic_create_link_to_next_block(
- struct btrfsic_state *state,
- struct btrfsic_block *block,
- struct btrfsic_block_data_ctx *block_ctx,
- u64 next_bytenr,
- int limit_nesting,
- struct btrfsic_block_data_ctx *next_block_ctx,
- struct btrfsic_block **next_blockp,
- int force_iodone_flag,
- int *num_copiesp, int *mirror_nump,
- struct btrfs_disk_key *disk_key,
- u64 parent_generation)
-{
- struct btrfsic_block *next_block = NULL;
- int ret;
- struct btrfsic_block_link *l;
- int did_alloc_block_link;
- int block_was_created;
-
- *next_blockp = NULL;
- if (0 == *num_copiesp) {
- *num_copiesp =
- btrfs_num_copies(&state->root->fs_info->mapping_tree,
- next_bytenr, PAGE_SIZE);
- if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES)
- printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n",
- (unsigned long long)next_bytenr, *num_copiesp);
- *mirror_nump = 1;
- }
-
- if (*mirror_nump > *num_copiesp)
- return 0;
-
- if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
- printk(KERN_INFO
- "btrfsic_create_link_to_next_block(mirror_num=%d)\n",
- *mirror_nump);
- ret = btrfsic_map_block(state, next_bytenr,
- BTRFSIC_BLOCK_SIZE,
- next_block_ctx, *mirror_nump);
- if (ret) {
- printk(KERN_INFO
- "btrfsic: btrfsic_map_block(@%llu, mirror=%d) failed!\n",
- (unsigned long long)next_bytenr, *mirror_nump);
- btrfsic_release_block_ctx(next_block_ctx);
- *next_blockp = NULL;
- return -1;
- }
-
- next_block = btrfsic_block_lookup_or_add(state,
- next_block_ctx, "referenced ",
- 1, force_iodone_flag,
- !force_iodone_flag,
- *mirror_nump,
- &block_was_created);
- if (NULL == next_block) {
- btrfsic_release_block_ctx(next_block_ctx);
- *next_blockp = NULL;
- return -1;
- }
- if (block_was_created) {
- l = NULL;
- next_block->generation = BTRFSIC_GENERATION_UNKNOWN;
- } else {
- if (next_block->logical_bytenr != next_bytenr &&
- !(!next_block->is_metadata &&
- 0 == next_block->logical_bytenr)) {
- printk(KERN_INFO
- "Referenced block @%llu (%s/%llu/%d)"
- " found in hash table, %c,"
- " bytenr mismatch (!= stored %llu).\n",
- (unsigned long long)next_bytenr,
- next_block_ctx->dev->name,
- (unsigned long long)next_block_ctx->dev_bytenr,
- *mirror_nump,
- btrfsic_get_block_type(state, next_block),
- (unsigned long long)next_block->logical_bytenr);
- } else if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
- printk(KERN_INFO
- "Referenced block @%llu (%s/%llu/%d)"
- " found in hash table, %c.\n",
- (unsigned long long)next_bytenr,
- next_block_ctx->dev->name,
- (unsigned long long)next_block_ctx->dev_bytenr,
- *mirror_nump,
- btrfsic_get_block_type(state, next_block));
- next_block->logical_bytenr = next_bytenr;
-
- next_block->mirror_num = *mirror_nump;
- l = btrfsic_block_link_hashtable_lookup(
- next_block_ctx->dev->bdev,
- next_block_ctx->dev_bytenr,
- block_ctx->dev->bdev,
- block_ctx->dev_bytenr,
- &state->block_link_hashtable);
- }
-
- next_block->disk_key = *disk_key;
- if (NULL == l) {
- l = btrfsic_block_link_alloc();
- if (NULL == l) {
- printk(KERN_INFO "btrfsic: error, kmalloc failed!\n");
- btrfsic_release_block_ctx(next_block_ctx);
- *next_blockp = NULL;
- return -1;
- }
-
- did_alloc_block_link = 1;
- l->block_ref_to = next_block;
- l->block_ref_from = block;
- l->ref_cnt = 1;
- l->parent_generation = parent_generation;
-
- if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
- btrfsic_print_add_link(state, l);
-
- list_add(&l->node_ref_to, &block->ref_to_list);
- list_add(&l->node_ref_from, &next_block->ref_from_list);
-
- btrfsic_block_link_hashtable_add(l,
- &state->block_link_hashtable);
- } else {
- did_alloc_block_link = 0;
- if (0 == limit_nesting) {
- l->ref_cnt++;
- l->parent_generation = parent_generation;
- if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
- btrfsic_print_add_link(state, l);
- }
- }
-
- if (limit_nesting > 0 && did_alloc_block_link) {
- ret = btrfsic_read_block(state, next_block_ctx);
- if (ret < (int)BTRFSIC_BLOCK_SIZE) {
- printk(KERN_INFO
- "btrfsic: read block @logical %llu failed!\n",
- (unsigned long long)next_bytenr);
- btrfsic_release_block_ctx(next_block_ctx);
- *next_blockp = NULL;
- return -1;
- }
-
- *next_blockp = next_block;
- } else {
- *next_blockp = NULL;
- }
- (*mirror_nump)++;
-
- return 0;
-}
-
-static int btrfsic_handle_extent_data(
- struct btrfsic_state *state,
- struct btrfsic_block *block,
- struct btrfsic_block_data_ctx *block_ctx,
- u32 item_offset, int force_iodone_flag)
-{
- int ret;
- struct btrfs_file_extent_item *file_extent_item =
- (struct btrfs_file_extent_item *)(block_ctx->data +
- offsetof(struct btrfs_leaf,
- items) + item_offset);
- u64 next_bytenr =
- le64_to_cpu(file_extent_item->disk_bytenr) +
- le64_to_cpu(file_extent_item->offset);
- u64 num_bytes = le64_to_cpu(file_extent_item->num_bytes);
- u64 generation = le64_to_cpu(file_extent_item->generation);
- struct btrfsic_block_link *l;
-
- if (state->print_mask & BTRFSIC_PRINT_MASK_VERY_VERBOSE)
- printk(KERN_INFO "extent_data: type %u, disk_bytenr = %llu,"
- " offset = %llu, num_bytes = %llu\n",
- file_extent_item->type,
- (unsigned long long)
- le64_to_cpu(file_extent_item->disk_bytenr),
- (unsigned long long)
- le64_to_cpu(file_extent_item->offset),
- (unsigned long long)
- le64_to_cpu(file_extent_item->num_bytes));
- if (BTRFS_FILE_EXTENT_REG != file_extent_item->type ||
- ((u64)0) == le64_to_cpu(file_extent_item->disk_bytenr))
- return 0;
- while (num_bytes > 0) {
- u32 chunk_len;
- int num_copies;
- int mirror_num;
-
- if (num_bytes > BTRFSIC_BLOCK_SIZE)
- chunk_len = BTRFSIC_BLOCK_SIZE;
- else
- chunk_len = num_bytes;
-
- num_copies =
- btrfs_num_copies(&state->root->fs_info->mapping_tree,
- next_bytenr, PAGE_SIZE);
- if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES)
- printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n",
- (unsigned long long)next_bytenr, num_copies);
- for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) {
- struct btrfsic_block_data_ctx next_block_ctx;
- struct btrfsic_block *next_block;
- int block_was_created;
-
- if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
- printk(KERN_INFO "btrfsic_handle_extent_data("
- "mirror_num=%d)\n", mirror_num);
- if (state->print_mask & BTRFSIC_PRINT_MASK_VERY_VERBOSE)
- printk(KERN_INFO
- "\tdisk_bytenr = %llu, num_bytes %u\n",
- (unsigned long long)next_bytenr,
- chunk_len);
- ret = btrfsic_map_block(state, next_bytenr,
- chunk_len, &next_block_ctx,
- mirror_num);
- if (ret) {
- printk(KERN_INFO
- "btrfsic: btrfsic_map_block(@%llu,"
- " mirror=%d) failed!\n",
- (unsigned long long)next_bytenr,
- mirror_num);
- return -1;
- }
-
- next_block = btrfsic_block_lookup_or_add(
- state,
- &next_block_ctx,
- "referenced ",
- 0,
- force_iodone_flag,
- !force_iodone_flag,
- mirror_num,
- &block_was_created);
- if (NULL == next_block) {
- printk(KERN_INFO
- "btrfsic: error, kmalloc failed!\n");
- btrfsic_release_block_ctx(&next_block_ctx);
- return -1;
- }
- if (!block_was_created) {
- if (next_block->logical_bytenr != next_bytenr &&
- !(!next_block->is_metadata &&
- 0 == next_block->logical_bytenr)) {
- printk(KERN_INFO
- "Referenced block"
- " @%llu (%s/%llu/%d)"
- " found in hash table, D,"
- " bytenr mismatch"
- " (!= stored %llu).\n",
- (unsigned long long)next_bytenr,
- next_block_ctx.dev->name,
- (unsigned long long)
- next_block_ctx.dev_bytenr,
- mirror_num,
- (unsigned long long)
- next_block->logical_bytenr);
- }
- next_block->logical_bytenr = next_bytenr;
- next_block->mirror_num = mirror_num;
- }
-
- l = btrfsic_block_link_lookup_or_add(state,
- &next_block_ctx,
- next_block, block,
- generation);
- btrfsic_release_block_ctx(&next_block_ctx);
- if (NULL == l)
- return -1;
- }
-
- next_bytenr += chunk_len;
- num_bytes -= chunk_len;
- }
-
- return 0;
-}
-
-static int btrfsic_map_block(struct btrfsic_state *state, u64 bytenr, u32 len,
- struct btrfsic_block_data_ctx *block_ctx_out,
- int mirror_num)
-{
- int ret;
- u64 length;
- struct btrfs_bio *multi = NULL;
- struct btrfs_device *device;
-
- length = len;
- ret = btrfs_map_block(&state->root->fs_info->mapping_tree, READ,
- bytenr, &length, &multi, mirror_num);
-
- device = multi->stripes[0].dev;
- block_ctx_out->dev = btrfsic_dev_state_lookup(device->bdev);
- block_ctx_out->dev_bytenr = multi->stripes[0].physical;
- block_ctx_out->start = bytenr;
- block_ctx_out->len = len;
- block_ctx_out->data = NULL;
- block_ctx_out->bh = NULL;
-
- if (0 == ret)
- kfree(multi);
- if (NULL == block_ctx_out->dev) {
- ret = -ENXIO;
- printk(KERN_INFO "btrfsic: error, cannot lookup dev (#1)!\n");
- }
-
- return ret;
-}
-
-static int btrfsic_map_superblock(struct btrfsic_state *state, u64 bytenr,
- u32 len, struct block_device *bdev,
- struct btrfsic_block_data_ctx *block_ctx_out)
-{
- block_ctx_out->dev = btrfsic_dev_state_lookup(bdev);
- block_ctx_out->dev_bytenr = bytenr;
- block_ctx_out->start = bytenr;
- block_ctx_out->len = len;
- block_ctx_out->data = NULL;
- block_ctx_out->bh = NULL;
- if (NULL != block_ctx_out->dev) {
- return 0;
- } else {
- printk(KERN_INFO "btrfsic: error, cannot lookup dev (#2)!\n");
- return -ENXIO;
- }
-}
-
-static void btrfsic_release_block_ctx(struct btrfsic_block_data_ctx *block_ctx)
-{
- if (NULL != block_ctx->bh) {
- brelse(block_ctx->bh);
- block_ctx->bh = NULL;
- }
-}
-
-static int btrfsic_read_block(struct btrfsic_state *state,
- struct btrfsic_block_data_ctx *block_ctx)
-{
- block_ctx->bh = NULL;
- if (block_ctx->dev_bytenr & 4095) {
- printk(KERN_INFO
- "btrfsic: read_block() with unaligned bytenr %llu\n",
- (unsigned long long)block_ctx->dev_bytenr);
- return -1;
- }
- if (block_ctx->len > 4096) {
- printk(KERN_INFO
- "btrfsic: read_block() with too huge size %d\n",
- block_ctx->len);
- return -1;
- }
-
- block_ctx->bh = __bread(block_ctx->dev->bdev,
- block_ctx->dev_bytenr >> 12, 4096);
- if (NULL == block_ctx->bh)
- return -1;
- block_ctx->data = block_ctx->bh->b_data;
-
- return block_ctx->len;
-}
-
-static void btrfsic_dump_database(struct btrfsic_state *state)
-{
- struct list_head *elem_all;
-
- BUG_ON(NULL == state);
-
- printk(KERN_INFO "all_blocks_list:\n");
- list_for_each(elem_all, &state->all_blocks_list) {
- const struct btrfsic_block *const b_all =
- list_entry(elem_all, struct btrfsic_block,
- all_blocks_node);
- struct list_head *elem_ref_to;
- struct list_head *elem_ref_from;
-
- printk(KERN_INFO "%c-block @%llu (%s/%llu/%d)\n",
- btrfsic_get_block_type(state, b_all),
- (unsigned long long)b_all->logical_bytenr,
- b_all->dev_state->name,
- (unsigned long long)b_all->dev_bytenr,
- b_all->mirror_num);
-
- list_for_each(elem_ref_to, &b_all->ref_to_list) {
- const struct btrfsic_block_link *const l =
- list_entry(elem_ref_to,
- struct btrfsic_block_link,
- node_ref_to);
-
- printk(KERN_INFO " %c @%llu (%s/%llu/%d)"
- " refers %u* to"
- " %c @%llu (%s/%llu/%d)\n",
- btrfsic_get_block_type(state, b_all),
- (unsigned long long)b_all->logical_bytenr,
- b_all->dev_state->name,
- (unsigned long long)b_all->dev_bytenr,
- b_all->mirror_num,
- l->ref_cnt,
- btrfsic_get_block_type(state, l->block_ref_to),
- (unsigned long long)
- l->block_ref_to->logical_bytenr,
- l->block_ref_to->dev_state->name,
- (unsigned long long)l->block_ref_to->dev_bytenr,
- l->block_ref_to->mirror_num);
- }
-
- list_for_each(elem_ref_from, &b_all->ref_from_list) {
- const struct btrfsic_block_link *const l =
- list_entry(elem_ref_from,
- struct btrfsic_block_link,
- node_ref_from);
-
- printk(KERN_INFO " %c @%llu (%s/%llu/%d)"
- " is ref %u* from"
- " %c @%llu (%s/%llu/%d)\n",
- btrfsic_get_block_type(state, b_all),
- (unsigned long long)b_all->logical_bytenr,
- b_all->dev_state->name,
- (unsigned long long)b_all->dev_bytenr,
- b_all->mirror_num,
- l->ref_cnt,
- btrfsic_get_block_type(state, l->block_ref_from),
- (unsigned long long)
- l->block_ref_from->logical_bytenr,
- l->block_ref_from->dev_state->name,
- (unsigned long long)
- l->block_ref_from->dev_bytenr,
- l->block_ref_from->mirror_num);
- }
-
- printk(KERN_INFO "\n");
- }
-}
-
-/*
- * Test whether the disk block contains a tree block (leaf or node)
- * (note that this test fails for the super block)
- */
-static int btrfsic_test_for_metadata(struct btrfsic_state *state,
- const u8 *data, unsigned int size)
-{
- struct btrfs_header *h;
- u8 csum[BTRFS_CSUM_SIZE];
- u32 crc = ~(u32)0;
- int fail = 0;
- int crc_fail = 0;
-
- h = (struct btrfs_header *)data;
-
- if (memcmp(h->fsid, state->root->fs_info->fsid, BTRFS_UUID_SIZE))
- fail++;
-
- crc = crc32c(crc, data + BTRFS_CSUM_SIZE, PAGE_SIZE - BTRFS_CSUM_SIZE);
- btrfs_csum_final(crc, csum);
- if (memcmp(csum, h->csum, state->csum_size))
- crc_fail++;
-
- return fail || crc_fail;
-}
-
-static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state,
- u64 dev_bytenr,
- u8 *mapped_data, unsigned int len,
- struct bio *bio,
- int *bio_is_patched,
- struct buffer_head *bh,
- int submit_bio_bh_rw)
-{
- int is_metadata;
- struct btrfsic_block *block;
- struct btrfsic_block_data_ctx block_ctx;
- int ret;
- struct btrfsic_state *state = dev_state->state;
- struct block_device *bdev = dev_state->bdev;
-
- WARN_ON(len > PAGE_SIZE);
- is_metadata = (0 == btrfsic_test_for_metadata(state, mapped_data, len));
- if (NULL != bio_is_patched)
- *bio_is_patched = 0;
-
- block = btrfsic_block_hashtable_lookup(bdev, dev_bytenr,
- &state->block_hashtable);
- if (NULL != block) {
- u64 bytenr = 0;
- struct list_head *elem_ref_to;
- struct list_head *tmp_ref_to;
-
- if (block->is_superblock) {
- bytenr = le64_to_cpu(((struct btrfs_super_block *)
- mapped_data)->bytenr);
- is_metadata = 1;
- if (state->print_mask &
- BTRFSIC_PRINT_MASK_TREE_BEFORE_SB_WRITE) {
- printk(KERN_INFO
- "[before new superblock is written]:\n");
- btrfsic_dump_tree_sub(state, block, 0);
- }
- }
- if (is_metadata) {
- if (!block->is_superblock) {
- bytenr = le64_to_cpu(((struct btrfs_header *)
- mapped_data)->bytenr);
- btrfsic_cmp_log_and_dev_bytenr(state, bytenr,
- dev_state,
- dev_bytenr,
- mapped_data);
- }
- if (block->logical_bytenr != bytenr) {
- printk(KERN_INFO
- "Written block @%llu (%s/%llu/%d)"
- " found in hash table, %c,"
- " bytenr mismatch"
- " (!= stored %llu).\n",
- (unsigned long long)bytenr,
- dev_state->name,
- (unsigned long long)dev_bytenr,
- block->mirror_num,
- btrfsic_get_block_type(state, block),
- (unsigned long long)
- block->logical_bytenr);
- block->logical_bytenr = bytenr;
- } else if (state->print_mask &
- BTRFSIC_PRINT_MASK_VERBOSE)
- printk(KERN_INFO
- "Written block @%llu (%s/%llu/%d)"
- " found in hash table, %c.\n",
- (unsigned long long)bytenr,
- dev_state->name,
- (unsigned long long)dev_bytenr,
- block->mirror_num,
- btrfsic_get_block_type(state, block));
- } else {
- bytenr = block->logical_bytenr;
- if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
- printk(KERN_INFO
- "Written block @%llu (%s/%llu/%d)"
- " found in hash table, %c.\n",
- (unsigned long long)bytenr,
- dev_state->name,
- (unsigned long long)dev_bytenr,
- block->mirror_num,
- btrfsic_get_block_type(state, block));
- }
-
- if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
- printk(KERN_INFO
- "ref_to_list: %cE, ref_from_list: %cE\n",
- list_empty(&block->ref_to_list) ? ' ' : '!',
- list_empty(&block->ref_from_list) ? ' ' : '!');
- if (btrfsic_is_block_ref_by_superblock(state, block, 0)) {
- printk(KERN_INFO "btrfs: attempt to overwrite %c-block"
- " @%llu (%s/%llu/%d), old(gen=%llu,"
- " objectid=%llu, type=%d, offset=%llu),"
- " new(gen=%llu),"
- " which is referenced by most recent superblock"
- " (superblockgen=%llu)!\n",
- btrfsic_get_block_type(state, block),
- (unsigned long long)bytenr,
- dev_state->name,
- (unsigned long long)dev_bytenr,
- block->mirror_num,
- (unsigned long long)block->generation,
- (unsigned long long)
- le64_to_cpu(block->disk_key.objectid),
- block->disk_key.type,
- (unsigned long long)
- le64_to_cpu(block->disk_key.offset),
- (unsigned long long)
- le64_to_cpu(((struct btrfs_header *)
- mapped_data)->generation),
- (unsigned long long)
- state->max_superblock_generation);
- btrfsic_dump_tree(state);
- }
-
- if (!block->is_iodone && !block->never_written) {
- printk(KERN_INFO "btrfs: attempt to overwrite %c-block"
- " @%llu (%s/%llu/%d), oldgen=%llu, newgen=%llu,"
- " which is not yet iodone!\n",
- btrfsic_get_block_type(state, block),
- (unsigned long long)bytenr,
- dev_state->name,
- (unsigned long long)dev_bytenr,
- block->mirror_num,
- (unsigned long long)block->generation,
- (unsigned long long)
- le64_to_cpu(((struct btrfs_header *)
- mapped_data)->generation));
- /* it would not be safe to go on */
- btrfsic_dump_tree(state);
- return;
- }
-
- /*
- * Clear all references of this block. Do not free
- * the block itself even if is not referenced anymore
- * because it still carries valueable information
- * like whether it was ever written and IO completed.
- */
- list_for_each_safe(elem_ref_to, tmp_ref_to,
- &block->ref_to_list) {
- struct btrfsic_block_link *const l =
- list_entry(elem_ref_to,
- struct btrfsic_block_link,
- node_ref_to);
-
- if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
- btrfsic_print_rem_link(state, l);
- l->ref_cnt--;
- if (0 == l->ref_cnt) {
- list_del(&l->node_ref_to);
- list_del(&l->node_ref_from);
- btrfsic_block_link_hashtable_remove(l);
- btrfsic_block_link_free(l);
- }
- }
-
- if (block->is_superblock)
- ret = btrfsic_map_superblock(state, bytenr, len,
- bdev, &block_ctx);
- else
- ret = btrfsic_map_block(state, bytenr, len,
- &block_ctx, 0);
- if (ret) {
- printk(KERN_INFO
- "btrfsic: btrfsic_map_block(root @%llu)"
- " failed!\n", (unsigned long long)bytenr);
- return;
- }
- block_ctx.data = mapped_data;
- /* the following is required in case of writes to mirrors,
- * use the same that was used for the lookup */
- block_ctx.dev = dev_state;
- block_ctx.dev_bytenr = dev_bytenr;
-
- if (is_metadata || state->include_extent_data) {
- block->never_written = 0;
- block->iodone_w_error = 0;
- if (NULL != bio) {
- block->is_iodone = 0;
- BUG_ON(NULL == bio_is_patched);
- if (!*bio_is_patched) {
- block->orig_bio_bh_private =
- bio->bi_private;
- block->orig_bio_bh_end_io.bio =
- bio->bi_end_io;
- block->next_in_same_bio = NULL;
- bio->bi_private = block;
- bio->bi_end_io = btrfsic_bio_end_io;
- *bio_is_patched = 1;
- } else {
- struct btrfsic_block *chained_block =
- (struct btrfsic_block *)
- bio->bi_private;
-
- BUG_ON(NULL == chained_block);
- block->orig_bio_bh_private =
- chained_block->orig_bio_bh_private;
- block->orig_bio_bh_end_io.bio =
- chained_block->orig_bio_bh_end_io.
- bio;
- block->next_in_same_bio = chained_block;
- bio->bi_private = block;
- }
- } else if (NULL != bh) {
- block->is_iodone = 0;
- block->orig_bio_bh_private = bh->b_private;
- block->orig_bio_bh_end_io.bh = bh->b_end_io;
- block->next_in_same_bio = NULL;
- bh->b_private = block;
- bh->b_end_io = btrfsic_bh_end_io;
- } else {
- block->is_iodone = 1;
- block->orig_bio_bh_private = NULL;
- block->orig_bio_bh_end_io.bio = NULL;
- block->next_in_same_bio = NULL;
- }
- }
-
- block->flush_gen = dev_state->last_flush_gen + 1;
- block->submit_bio_bh_rw = submit_bio_bh_rw;
- if (is_metadata) {
- block->logical_bytenr = bytenr;
- block->is_metadata = 1;
- if (block->is_superblock) {
- ret = btrfsic_process_written_superblock(
- state,
- block,
- (struct btrfs_super_block *)
- mapped_data);
- if (state->print_mask &
- BTRFSIC_PRINT_MASK_TREE_AFTER_SB_WRITE) {
- printk(KERN_INFO
- "[after new superblock is written]:\n");
- btrfsic_dump_tree_sub(state, block, 0);
- }
- } else {
- block->mirror_num = 0; /* unknown */
- ret = btrfsic_process_metablock(
- state,
- block,
- &block_ctx,
- (struct btrfs_header *)
- block_ctx.data,
- 0, 0);
- }
- if (ret)
- printk(KERN_INFO
- "btrfsic: btrfsic_process_metablock"
- "(root @%llu) failed!\n",
- (unsigned long long)dev_bytenr);
- } else {
- block->is_metadata = 0;
- block->mirror_num = 0; /* unknown */
- block->generation = BTRFSIC_GENERATION_UNKNOWN;
- if (!state->include_extent_data
- && list_empty(&block->ref_from_list)) {
- /*
- * disk block is overwritten with extent
- * data (not meta data) and we are configured
- * to not include extent data: take the
- * chance and free the block's memory
- */
- btrfsic_block_hashtable_remove(block);
- list_del(&block->all_blocks_node);
- btrfsic_block_free(block);
- }
- }
- btrfsic_release_block_ctx(&block_ctx);
- } else {
- /* block has not been found in hash table */
- u64 bytenr;
-
- if (!is_metadata) {
- if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
- printk(KERN_INFO "Written block (%s/%llu/?)"
- " !found in hash table, D.\n",
- dev_state->name,
- (unsigned long long)dev_bytenr);
- if (!state->include_extent_data)
- return; /* ignore that written D block */
-
- /* this is getting ugly for the
- * include_extent_data case... */
- bytenr = 0; /* unknown */
- block_ctx.start = bytenr;
- block_ctx.len = len;
- block_ctx.bh = NULL;
- } else {
- bytenr = le64_to_cpu(((struct btrfs_header *)
- mapped_data)->bytenr);
- btrfsic_cmp_log_and_dev_bytenr(state, bytenr, dev_state,
- dev_bytenr,
- mapped_data);
- if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
- printk(KERN_INFO
- "Written block @%llu (%s/%llu/?)"
- " !found in hash table, M.\n",
- (unsigned long long)bytenr,
- dev_state->name,
- (unsigned long long)dev_bytenr);
-
- ret = btrfsic_map_block(state, bytenr, len, &block_ctx,
- 0);
- if (ret) {
- printk(KERN_INFO
- "btrfsic: btrfsic_map_block(root @%llu)"
- " failed!\n",
- (unsigned long long)dev_bytenr);
- return;
- }
- }
- block_ctx.data = mapped_data;
- /* the following is required in case of writes to mirrors,
- * use the same that was used for the lookup */
- block_ctx.dev = dev_state;
- block_ctx.dev_bytenr = dev_bytenr;
-
- block = btrfsic_block_alloc();
- if (NULL == block) {
- printk(KERN_INFO "btrfsic: error, kmalloc failed!\n");
- btrfsic_release_block_ctx(&block_ctx);
- return;
- }
- block->dev_state = dev_state;
- block->dev_bytenr = dev_bytenr;
- block->logical_bytenr = bytenr;
- block->is_metadata = is_metadata;
- block->never_written = 0;
- block->iodone_w_error = 0;
- block->mirror_num = 0; /* unknown */
- block->flush_gen = dev_state->last_flush_gen + 1;
- block->submit_bio_bh_rw = submit_bio_bh_rw;
- if (NULL != bio) {
- block->is_iodone = 0;
- BUG_ON(NULL == bio_is_patched);
- if (!*bio_is_patched) {
- block->orig_bio_bh_private = bio->bi_private;
- block->orig_bio_bh_end_io.bio = bio->bi_end_io;
- block->next_in_same_bio = NULL;
- bio->bi_private = block;
- bio->bi_end_io = btrfsic_bio_end_io;
- *bio_is_patched = 1;
- } else {
- struct btrfsic_block *chained_block =
- (struct btrfsic_block *)
- bio->bi_private;
-
- BUG_ON(NULL == chained_block);
- block->orig_bio_bh_private =
- chained_block->orig_bio_bh_private;
- block->orig_bio_bh_end_io.bio =
- chained_block->orig_bio_bh_end_io.bio;
- block->next_in_same_bio = chained_block;
- bio->bi_private = block;
- }
- } else if (NULL != bh) {
- block->is_iodone = 0;
- block->orig_bio_bh_private = bh->b_private;
- block->orig_bio_bh_end_io.bh = bh->b_end_io;
- block->next_in_same_bio = NULL;
- bh->b_private = block;
- bh->b_end_io = btrfsic_bh_end_io;
- } else {
- block->is_iodone = 1;
- block->orig_bio_bh_private = NULL;
- block->orig_bio_bh_end_io.bio = NULL;
- block->next_in_same_bio = NULL;
- }
- if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
- printk(KERN_INFO
- "New written %c-block @%llu (%s/%llu/%d)\n",
- is_metadata ? 'M' : 'D',
- (unsigned long long)block->logical_bytenr,
- block->dev_state->name,
- (unsigned long long)block->dev_bytenr,
- block->mirror_num);
- list_add(&block->all_blocks_node, &state->all_blocks_list);
- btrfsic_block_hashtable_add(block, &state->block_hashtable);
-
- if (is_metadata) {
- ret = btrfsic_process_metablock(state, block,
- &block_ctx,
- (struct btrfs_header *)
- block_ctx.data, 0, 0);
- if (ret)
- printk(KERN_INFO
- "btrfsic: process_metablock(root @%llu)"
- " failed!\n",
- (unsigned long long)dev_bytenr);
- }
- btrfsic_release_block_ctx(&block_ctx);
- }
-}
-
-static void btrfsic_bio_end_io(struct bio *bp, int bio_error_status)
-{
- struct btrfsic_block *block = (struct btrfsic_block *)bp->bi_private;
- int iodone_w_error;
-
- /* mutex is not held! This is not save if IO is not yet completed
- * on umount */
- iodone_w_error = 0;
- if (bio_error_status)
- iodone_w_error = 1;
-
- BUG_ON(NULL == block);
- bp->bi_private = block->orig_bio_bh_private;
- bp->bi_end_io = block->orig_bio_bh_end_io.bio;
-
- do {
- struct btrfsic_block *next_block;
- struct btrfsic_dev_state *const dev_state = block->dev_state;
-
- if ((dev_state->state->print_mask &
- BTRFSIC_PRINT_MASK_END_IO_BIO_BH))
- printk(KERN_INFO
- "bio_end_io(err=%d) for %c @%llu (%s/%llu/%d)\n",
- bio_error_status,
- btrfsic_get_block_type(dev_state->state, block),
- (unsigned long long)block->logical_bytenr,
- dev_state->name,
- (unsigned long long)block->dev_bytenr,
- block->mirror_num);
- next_block = block->next_in_same_bio;
- block->iodone_w_error = iodone_w_error;
- if (block->submit_bio_bh_rw & REQ_FLUSH) {
- dev_state->last_flush_gen++;
- if ((dev_state->state->print_mask &
- BTRFSIC_PRINT_MASK_END_IO_BIO_BH))
- printk(KERN_INFO
- "bio_end_io() new %s flush_gen=%llu\n",
- dev_state->name,
- (unsigned long long)
- dev_state->last_flush_gen);
- }
- if (block->submit_bio_bh_rw & REQ_FUA)
- block->flush_gen = 0; /* FUA completed means block is
- * on disk */
- block->is_iodone = 1; /* for FLUSH, this releases the block */
- block = next_block;
- } while (NULL != block);
-
- bp->bi_end_io(bp, bio_error_status);
-}
-
-static void btrfsic_bh_end_io(struct buffer_head *bh, int uptodate)
-{
- struct btrfsic_block *block = (struct btrfsic_block *)bh->b_private;
- int iodone_w_error = !uptodate;
- struct btrfsic_dev_state *dev_state;
-
- BUG_ON(NULL == block);
- dev_state = block->dev_state;
- if ((dev_state->state->print_mask & BTRFSIC_PRINT_MASK_END_IO_BIO_BH))
- printk(KERN_INFO
- "bh_end_io(error=%d) for %c @%llu (%s/%llu/%d)\n",
- iodone_w_error,
- btrfsic_get_block_type(dev_state->state, block),
- (unsigned long long)block->logical_bytenr,
- block->dev_state->name,
- (unsigned long long)block->dev_bytenr,
- block->mirror_num);
-
- block->iodone_w_error = iodone_w_error;
- if (block->submit_bio_bh_rw & REQ_FLUSH) {
- dev_state->last_flush_gen++;
- if ((dev_state->state->print_mask &
- BTRFSIC_PRINT_MASK_END_IO_BIO_BH))
- printk(KERN_INFO
- "bh_end_io() new %s flush_gen=%llu\n",
- dev_state->name,
- (unsigned long long)dev_state->last_flush_gen);
- }
- if (block->submit_bio_bh_rw & REQ_FUA)
- block->flush_gen = 0; /* FUA completed means block is on disk */
-
- bh->b_private = block->orig_bio_bh_private;
- bh->b_end_io = block->orig_bio_bh_end_io.bh;
- block->is_iodone = 1; /* for FLUSH, this releases the block */
- bh->b_end_io(bh, uptodate);
-}
-
-static int btrfsic_process_written_superblock(
- struct btrfsic_state *state,
- struct btrfsic_block *const superblock,
- struct btrfs_super_block *const super_hdr)
-{
- int pass;
-
- superblock->generation = btrfs_super_generation(super_hdr);
- if (!(superblock->generation > state->max_superblock_generation ||
- 0 == state->max_superblock_generation)) {
- if (state->print_mask & BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE)
- printk(KERN_INFO
- "btrfsic: superblock @%llu (%s/%llu/%d)"
- " with old gen %llu <= %llu\n",
- (unsigned long long)superblock->logical_bytenr,
- superblock->dev_state->name,
- (unsigned long long)superblock->dev_bytenr,
- superblock->mirror_num,
- (unsigned long long)
- btrfs_super_generation(super_hdr),
- (unsigned long long)
- state->max_superblock_generation);
- } else {
- if (state->print_mask & BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE)
- printk(KERN_INFO
- "btrfsic: got new superblock @%llu (%s/%llu/%d)"
- " with new gen %llu > %llu\n",
- (unsigned long long)superblock->logical_bytenr,
- superblock->dev_state->name,
- (unsigned long long)superblock->dev_bytenr,
- superblock->mirror_num,
- (unsigned long long)
- btrfs_super_generation(super_hdr),
- (unsigned long long)
- state->max_superblock_generation);
-
- state->max_superblock_generation =
- btrfs_super_generation(super_hdr);
- state->latest_superblock = superblock;
- }
-
- for (pass = 0; pass < 3; pass++) {
- int ret;
- u64 next_bytenr;
- struct btrfsic_block *next_block;
- struct btrfsic_block_data_ctx tmp_next_block_ctx;
- struct btrfsic_block_link *l;
- int num_copies;
- int mirror_num;
- const char *additional_string = NULL;
- struct btrfs_disk_key tmp_disk_key;
-
- tmp_disk_key.type = BTRFS_ROOT_ITEM_KEY;
- tmp_disk_key.offset = 0;
-
- switch (pass) {
- case 0:
- tmp_disk_key.objectid =
- cpu_to_le64(BTRFS_ROOT_TREE_OBJECTID);
- additional_string = "root ";
- next_bytenr = btrfs_super_root(super_hdr);
- if (state->print_mask &
- BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION)
- printk(KERN_INFO "root@%llu\n",
- (unsigned long long)next_bytenr);
- break;
- case 1:
- tmp_disk_key.objectid =
- cpu_to_le64(BTRFS_CHUNK_TREE_OBJECTID);
- additional_string = "chunk ";
- next_bytenr = btrfs_super_chunk_root(super_hdr);
- if (state->print_mask &
- BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION)
- printk(KERN_INFO "chunk@%llu\n",
- (unsigned long long)next_bytenr);
- break;
- case 2:
- tmp_disk_key.objectid =
- cpu_to_le64(BTRFS_TREE_LOG_OBJECTID);
- additional_string = "log ";
- next_bytenr = btrfs_super_log_root(super_hdr);
- if (0 == next_bytenr)
- continue;
- if (state->print_mask &
- BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION)
- printk(KERN_INFO "log@%llu\n",
- (unsigned long long)next_bytenr);
- break;
- }
-
- num_copies =
- btrfs_num_copies(&state->root->fs_info->mapping_tree,
- next_bytenr, PAGE_SIZE);
- if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES)
- printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n",
- (unsigned long long)next_bytenr, num_copies);
- for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) {
- int was_created;
-
- if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
- printk(KERN_INFO
- "btrfsic_process_written_superblock("
- "mirror_num=%d)\n", mirror_num);
- ret = btrfsic_map_block(state, next_bytenr, PAGE_SIZE,
- &tmp_next_block_ctx,
- mirror_num);
- if (ret) {
- printk(KERN_INFO
- "btrfsic: btrfsic_map_block(@%llu,"
- " mirror=%d) failed!\n",
- (unsigned long long)next_bytenr,
- mirror_num);
- return -1;
- }
-
- next_block = btrfsic_block_lookup_or_add(
- state,
- &tmp_next_block_ctx,
- additional_string,
- 1, 0, 1,
- mirror_num,
- &was_created);
- if (NULL == next_block) {
- printk(KERN_INFO
- "btrfsic: error, kmalloc failed!\n");
- btrfsic_release_block_ctx(&tmp_next_block_ctx);
- return -1;
- }
-
- next_block->disk_key = tmp_disk_key;
- if (was_created)
- next_block->generation =
- BTRFSIC_GENERATION_UNKNOWN;
- l = btrfsic_block_link_lookup_or_add(
- state,
- &tmp_next_block_ctx,
- next_block,
- superblock,
- BTRFSIC_GENERATION_UNKNOWN);
- btrfsic_release_block_ctx(&tmp_next_block_ctx);
- if (NULL == l)
- return -1;
- }
- }
-
- if (-1 == btrfsic_check_all_ref_blocks(state, superblock, 0)) {
- WARN_ON(1);
- btrfsic_dump_tree(state);
- }
-
- return 0;
-}
-
-static int btrfsic_check_all_ref_blocks(struct btrfsic_state *state,
- struct btrfsic_block *const block,
- int recursion_level)
-{
- struct list_head *elem_ref_to;
- int ret = 0;
-
- if (recursion_level >= 3 + BTRFS_MAX_LEVEL) {
- /*
- * Note that this situation can happen and does not
- * indicate an error in regular cases. It happens
- * when disk blocks are freed and later reused.
- * The check-integrity module is not aware of any
- * block free operations, it just recognizes block
- * write operations. Therefore it keeps the linkage
- * information for a block until a block is
- * rewritten. This can temporarily cause incorrect
- * and even circular linkage informations. This
- * causes no harm unless such blocks are referenced
- * by the most recent super block.
- */
- if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
- printk(KERN_INFO
- "btrfsic: abort cyclic linkage (case 1).\n");
-
- return ret;
- }
-
- /*
- * This algorithm is recursive because the amount of used stack
- * space is very small and the max recursion depth is limited.
- */
- list_for_each(elem_ref_to, &block->ref_to_list) {
- const struct btrfsic_block_link *const l =
- list_entry(elem_ref_to, struct btrfsic_block_link,
- node_ref_to);
-
- if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
- printk(KERN_INFO
- "rl=%d, %c @%llu (%s/%llu/%d)"
- " %u* refers to %c @%llu (%s/%llu/%d)\n",
- recursion_level,
- btrfsic_get_block_type(state, block),
- (unsigned long long)block->logical_bytenr,
- block->dev_state->name,
- (unsigned long long)block->dev_bytenr,
- block->mirror_num,
- l->ref_cnt,
- btrfsic_get_block_type(state, l->block_ref_to),
- (unsigned long long)
- l->block_ref_to->logical_bytenr,
- l->block_ref_to->dev_state->name,
- (unsigned long long)l->block_ref_to->dev_bytenr,
- l->block_ref_to->mirror_num);
- if (l->block_ref_to->never_written) {
- printk(KERN_INFO "btrfs: attempt to write superblock"
- " which references block %c @%llu (%s/%llu/%d)"
- " which is never written!\n",
- btrfsic_get_block_type(state, l->block_ref_to),
- (unsigned long long)
- l->block_ref_to->logical_bytenr,
- l->block_ref_to->dev_state->name,
- (unsigned long long)l->block_ref_to->dev_bytenr,
- l->block_ref_to->mirror_num);
- ret = -1;
- } else if (!l->block_ref_to->is_iodone) {
- printk(KERN_INFO "btrfs: attempt to write superblock"
- " which references block %c @%llu (%s/%llu/%d)"
- " which is not yet iodone!\n",
- btrfsic_get_block_type(state, l->block_ref_to),
- (unsigned long long)
- l->block_ref_to->logical_bytenr,
- l->block_ref_to->dev_state->name,
- (unsigned long long)l->block_ref_to->dev_bytenr,
- l->block_ref_to->mirror_num);
- ret = -1;
- } else if (l->parent_generation !=
- l->block_ref_to->generation &&
- BTRFSIC_GENERATION_UNKNOWN !=
- l->parent_generation &&
- BTRFSIC_GENERATION_UNKNOWN !=
- l->block_ref_to->generation) {
- printk(KERN_INFO "btrfs: attempt to write superblock"
- " which references block %c @%llu (%s/%llu/%d)"
- " with generation %llu !="
- " parent generation %llu!\n",
- btrfsic_get_block_type(state, l->block_ref_to),
- (unsigned long long)
- l->block_ref_to->logical_bytenr,
- l->block_ref_to->dev_state->name,
- (unsigned long long)l->block_ref_to->dev_bytenr,
- l->block_ref_to->mirror_num,
- (unsigned long long)l->block_ref_to->generation,
- (unsigned long long)l->parent_generation);
- ret = -1;
- } else if (l->block_ref_to->flush_gen >
- l->block_ref_to->dev_state->last_flush_gen) {
- printk(KERN_INFO "btrfs: attempt to write superblock"
- " which references block %c @%llu (%s/%llu/%d)"
- " which is not flushed out of disk's write cache"
- " (block flush_gen=%llu,"
- " dev->flush_gen=%llu)!\n",
- btrfsic_get_block_type(state, l->block_ref_to),
- (unsigned long long)
- l->block_ref_to->logical_bytenr,
- l->block_ref_to->dev_state->name,
- (unsigned long long)l->block_ref_to->dev_bytenr,
- l->block_ref_to->mirror_num,
- (unsigned long long)block->flush_gen,
- (unsigned long long)
- l->block_ref_to->dev_state->last_flush_gen);
- ret = -1;
- } else if (-1 == btrfsic_check_all_ref_blocks(state,
- l->block_ref_to,
- recursion_level +
- 1)) {
- ret = -1;
- }
- }
-
- return ret;
-}
-
-static int btrfsic_is_block_ref_by_superblock(
- const struct btrfsic_state *state,
- const struct btrfsic_block *block,
- int recursion_level)
-{
- struct list_head *elem_ref_from;
-
- if (recursion_level >= 3 + BTRFS_MAX_LEVEL) {
- /* refer to comment at "abort cyclic linkage (case 1)" */
- if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
- printk(KERN_INFO
- "btrfsic: abort cyclic linkage (case 2).\n");
-
- return 0;
- }
-
- /*
- * This algorithm is recursive because the amount of used stack space
- * is very small and the max recursion depth is limited.
- */
- list_for_each(elem_ref_from, &block->ref_from_list) {
- const struct btrfsic_block_link *const l =
- list_entry(elem_ref_from, struct btrfsic_block_link,
- node_ref_from);
-
- if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
- printk(KERN_INFO
- "rl=%d, %c @%llu (%s/%llu/%d)"
- " is ref %u* from %c @%llu (%s/%llu/%d)\n",
- recursion_level,
- btrfsic_get_block_type(state, block),
- (unsigned long long)block->logical_bytenr,
- block->dev_state->name,
- (unsigned long long)block->dev_bytenr,
- block->mirror_num,
- l->ref_cnt,
- btrfsic_get_block_type(state, l->block_ref_from),
- (unsigned long long)
- l->block_ref_from->logical_bytenr,
- l->block_ref_from->dev_state->name,
- (unsigned long long)
- l->block_ref_from->dev_bytenr,
- l->block_ref_from->mirror_num);
- if (l->block_ref_from->is_superblock &&
- state->latest_superblock->dev_bytenr ==
- l->block_ref_from->dev_bytenr &&
- state->latest_superblock->dev_state->bdev ==
- l->block_ref_from->dev_state->bdev)
- return 1;
- else if (btrfsic_is_block_ref_by_superblock(state,
- l->block_ref_from,
- recursion_level +
- 1))
- return 1;
- }
-
- return 0;
-}
-
-static void btrfsic_print_add_link(const struct btrfsic_state *state,
- const struct btrfsic_block_link *l)
-{
- printk(KERN_INFO
- "Add %u* link from %c @%llu (%s/%llu/%d)"
- " to %c @%llu (%s/%llu/%d).\n",
- l->ref_cnt,
- btrfsic_get_block_type(state, l->block_ref_from),
- (unsigned long long)l->block_ref_from->logical_bytenr,
- l->block_ref_from->dev_state->name,
- (unsigned long long)l->block_ref_from->dev_bytenr,
- l->block_ref_from->mirror_num,
- btrfsic_get_block_type(state, l->block_ref_to),
- (unsigned long long)l->block_ref_to->logical_bytenr,
- l->block_ref_to->dev_state->name,
- (unsigned long long)l->block_ref_to->dev_bytenr,
- l->block_ref_to->mirror_num);
-}
-
-static void btrfsic_print_rem_link(const struct btrfsic_state *state,
- const struct btrfsic_block_link *l)
-{
- printk(KERN_INFO
- "Rem %u* link from %c @%llu (%s/%llu/%d)"
- " to %c @%llu (%s/%llu/%d).\n",
- l->ref_cnt,
- btrfsic_get_block_type(state, l->block_ref_from),
- (unsigned long long)l->block_ref_from->logical_bytenr,
- l->block_ref_from->dev_state->name,
- (unsigned long long)l->block_ref_from->dev_bytenr,
- l->block_ref_from->mirror_num,
- btrfsic_get_block_type(state, l->block_ref_to),
- (unsigned long long)l->block_ref_to->logical_bytenr,
- l->block_ref_to->dev_state->name,
- (unsigned long long)l->block_ref_to->dev_bytenr,
- l->block_ref_to->mirror_num);
-}
-
-static char btrfsic_get_block_type(const struct btrfsic_state *state,
- const struct btrfsic_block *block)
-{
- if (block->is_superblock &&
- state->latest_superblock->dev_bytenr == block->dev_bytenr &&
- state->latest_superblock->dev_state->bdev == block->dev_state->bdev)
- return 'S';
- else if (block->is_superblock)
- return 's';
- else if (block->is_metadata)
- return 'M';
- else
- return 'D';
-}
-
-static void btrfsic_dump_tree(const struct btrfsic_state *state)
-{
- btrfsic_dump_tree_sub(state, state->latest_superblock, 0);
-}
-
-static void btrfsic_dump_tree_sub(const struct btrfsic_state *state,
- const struct btrfsic_block *block,
- int indent_level)
-{
- struct list_head *elem_ref_to;
- int indent_add;
- static char buf[80];
- int cursor_position;
-
- /*
- * Should better fill an on-stack buffer with a complete line and
- * dump it at once when it is time to print a newline character.
- */
-
- /*
- * This algorithm is recursive because the amount of used stack space
- * is very small and the max recursion depth is limited.
- */
- indent_add = sprintf(buf, "%c-%llu(%s/%llu/%d)",
- btrfsic_get_block_type(state, block),
- (unsigned long long)block->logical_bytenr,
- block->dev_state->name,
- (unsigned long long)block->dev_bytenr,
- block->mirror_num);
- if (indent_level + indent_add > BTRFSIC_TREE_DUMP_MAX_INDENT_LEVEL) {
- printk("[...]\n");
- return;
- }
- printk(buf);
- indent_level += indent_add;
- if (list_empty(&block->ref_to_list)) {
- printk("\n");
- return;
- }
- if (block->mirror_num > 1 &&
- !(state->print_mask & BTRFSIC_PRINT_MASK_TREE_WITH_ALL_MIRRORS)) {
- printk(" [...]\n");
- return;
- }
-
- cursor_position = indent_level;
- list_for_each(elem_ref_to, &block->ref_to_list) {
- const struct btrfsic_block_link *const l =
- list_entry(elem_ref_to, struct btrfsic_block_link,
- node_ref_to);
-
- while (cursor_position < indent_level) {
- printk(" ");
- cursor_position++;
- }
- if (l->ref_cnt > 1)
- indent_add = sprintf(buf, " %d*--> ", l->ref_cnt);
- else
- indent_add = sprintf(buf, " --> ");
- if (indent_level + indent_add >
- BTRFSIC_TREE_DUMP_MAX_INDENT_LEVEL) {
- printk("[...]\n");
- cursor_position = 0;
- continue;
- }
-
- printk(buf);
-
- btrfsic_dump_tree_sub(state, l->block_ref_to,
- indent_level + indent_add);
- cursor_position = 0;
- }
-}
-
-static struct btrfsic_block_link *btrfsic_block_link_lookup_or_add(
- struct btrfsic_state *state,
- struct btrfsic_block_data_ctx *next_block_ctx,
- struct btrfsic_block *next_block,
- struct btrfsic_block *from_block,
- u64 parent_generation)
-{
- struct btrfsic_block_link *l;
-
- l = btrfsic_block_link_hashtable_lookup(next_block_ctx->dev->bdev,
- next_block_ctx->dev_bytenr,
- from_block->dev_state->bdev,
- from_block->dev_bytenr,
- &state->block_link_hashtable);
- if (NULL == l) {
- l = btrfsic_block_link_alloc();
- if (NULL == l) {
- printk(KERN_INFO
- "btrfsic: error, kmalloc" " failed!\n");
- return NULL;
- }
-
- l->block_ref_to = next_block;
- l->block_ref_from = from_block;
- l->ref_cnt = 1;
- l->parent_generation = parent_generation;
-
- if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
- btrfsic_print_add_link(state, l);
-
- list_add(&l->node_ref_to, &from_block->ref_to_list);
- list_add(&l->node_ref_from, &next_block->ref_from_list);
-
- btrfsic_block_link_hashtable_add(l,
- &state->block_link_hashtable);
- } else {
- l->ref_cnt++;
- l->parent_generation = parent_generation;
- if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
- btrfsic_print_add_link(state, l);
- }
-
- return l;
-}
-
-static struct btrfsic_block *btrfsic_block_lookup_or_add(
- struct btrfsic_state *state,
- struct btrfsic_block_data_ctx *block_ctx,
- const char *additional_string,
- int is_metadata,
- int is_iodone,
- int never_written,
- int mirror_num,
- int *was_created)
-{
- struct btrfsic_block *block;
-
- block = btrfsic_block_hashtable_lookup(block_ctx->dev->bdev,
- block_ctx->dev_bytenr,
- &state->block_hashtable);
- if (NULL == block) {
- struct btrfsic_dev_state *dev_state;
-
- block = btrfsic_block_alloc();
- if (NULL == block) {
- printk(KERN_INFO "btrfsic: error, kmalloc failed!\n");
- return NULL;
- }
- dev_state = btrfsic_dev_state_lookup(block_ctx->dev->bdev);
- if (NULL == dev_state) {
- printk(KERN_INFO
- "btrfsic: error, lookup dev_state failed!\n");
- btrfsic_block_free(block);
- return NULL;
- }
- block->dev_state = dev_state;
- block->dev_bytenr = block_ctx->dev_bytenr;
- block->logical_bytenr = block_ctx->start;
- block->is_metadata = is_metadata;
- block->is_iodone = is_iodone;
- block->never_written = never_written;
- block->mirror_num = mirror_num;
- if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
- printk(KERN_INFO
- "New %s%c-block @%llu (%s/%llu/%d)\n",
- additional_string,
- btrfsic_get_block_type(state, block),
- (unsigned long long)block->logical_bytenr,
- dev_state->name,
- (unsigned long long)block->dev_bytenr,
- mirror_num);
- list_add(&block->all_blocks_node, &state->all_blocks_list);
- btrfsic_block_hashtable_add(block, &state->block_hashtable);
- if (NULL != was_created)
- *was_created = 1;
- } else {
- if (NULL != was_created)
- *was_created = 0;
- }
-
- return block;
-}
-
-static void btrfsic_cmp_log_and_dev_bytenr(struct btrfsic_state *state,
- u64 bytenr,
- struct btrfsic_dev_state *dev_state,
- u64 dev_bytenr, char *data)
-{
- int num_copies;
- int mirror_num;
- int ret;
- struct btrfsic_block_data_ctx block_ctx;
- int match = 0;
-
- num_copies = btrfs_num_copies(&state->root->fs_info->mapping_tree,
- bytenr, PAGE_SIZE);
-
- for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) {
- ret = btrfsic_map_block(state, bytenr, PAGE_SIZE,
- &block_ctx, mirror_num);
- if (ret) {
- printk(KERN_INFO "btrfsic:"
- " btrfsic_map_block(logical @%llu,"
- " mirror %d) failed!\n",
- (unsigned long long)bytenr, mirror_num);
- continue;
- }
-
- if (dev_state->bdev == block_ctx.dev->bdev &&
- dev_bytenr == block_ctx.dev_bytenr) {
- match++;
- btrfsic_release_block_ctx(&block_ctx);
- break;
- }
- btrfsic_release_block_ctx(&block_ctx);
- }
-
- if (!match) {
- printk(KERN_INFO "btrfs: attempt to write M-block which contains logical bytenr that doesn't map to dev+physical bytenr of submit_bio,"
- " buffer->log_bytenr=%llu, submit_bio(bdev=%s,"
- " phys_bytenr=%llu)!\n",
- (unsigned long long)bytenr, dev_state->name,
- (unsigned long long)dev_bytenr);
- for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) {
- ret = btrfsic_map_block(state, bytenr, PAGE_SIZE,
- &block_ctx, mirror_num);
- if (ret)
- continue;
-
- printk(KERN_INFO "Read logical bytenr @%llu maps to"
- " (%s/%llu/%d)\n",
- (unsigned long long)bytenr,
- block_ctx.dev->name,
- (unsigned long long)block_ctx.dev_bytenr,
- mirror_num);
- }
- WARN_ON(1);
- }
-}
-
-static struct btrfsic_dev_state *btrfsic_dev_state_lookup(
- struct block_device *bdev)
-{
- struct btrfsic_dev_state *ds;
-
- ds = btrfsic_dev_state_hashtable_lookup(bdev,
- &btrfsic_dev_state_hashtable);
- return ds;
-}
-
-int btrfsic_submit_bh(int rw, struct buffer_head *bh)
-{
- struct btrfsic_dev_state *dev_state;
-
- if (!btrfsic_is_initialized)
- return submit_bh(rw, bh);
-
- mutex_lock(&btrfsic_mutex);
- /* since btrfsic_submit_bh() might also be called before
- * btrfsic_mount(), this might return NULL */
- dev_state = btrfsic_dev_state_lookup(bh->b_bdev);
-
- /* Only called to write the superblock (incl. FLUSH/FUA) */
- if (NULL != dev_state &&
- (rw & WRITE) && bh->b_size > 0) {
- u64 dev_bytenr;
-
- dev_bytenr = 4096 * bh->b_blocknr;
- if (dev_state->state->print_mask &
- BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH)
- printk(KERN_INFO
- "submit_bh(rw=0x%x, blocknr=%lu (bytenr %llu),"
- " size=%lu, data=%p, bdev=%p)\n",
- rw, (unsigned long)bh->b_blocknr,
- (unsigned long long)dev_bytenr,
- (unsigned long)bh->b_size, bh->b_data,
- bh->b_bdev);
- btrfsic_process_written_block(dev_state, dev_bytenr,
- bh->b_data, bh->b_size, NULL,
- NULL, bh, rw);
- } else if (NULL != dev_state && (rw & REQ_FLUSH)) {
- if (dev_state->state->print_mask &
- BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH)
- printk(KERN_INFO
- "submit_bh(rw=0x%x) FLUSH, bdev=%p)\n",
- rw, bh->b_bdev);
- if (!dev_state->dummy_block_for_bio_bh_flush.is_iodone) {
- if ((dev_state->state->print_mask &
- (BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH |
- BTRFSIC_PRINT_MASK_VERBOSE)))
- printk(KERN_INFO
- "btrfsic_submit_bh(%s) with FLUSH"
- " but dummy block already in use"
- " (ignored)!\n",
- dev_state->name);
- } else {
- struct btrfsic_block *const block =
- &dev_state->dummy_block_for_bio_bh_flush;
-
- block->is_iodone = 0;
- block->never_written = 0;
- block->iodone_w_error = 0;
- block->flush_gen = dev_state->last_flush_gen + 1;
- block->submit_bio_bh_rw = rw;
- block->orig_bio_bh_private = bh->b_private;
- block->orig_bio_bh_end_io.bh = bh->b_end_io;
- block->next_in_same_bio = NULL;
- bh->b_private = block;
- bh->b_end_io = btrfsic_bh_end_io;
- }
- }
- mutex_unlock(&btrfsic_mutex);
- return submit_bh(rw, bh);
-}
-
-void btrfsic_submit_bio(int rw, struct bio *bio)
-{
- struct btrfsic_dev_state *dev_state;
-
- if (!btrfsic_is_initialized) {
- submit_bio(rw, bio);
- return;
- }
-
- mutex_lock(&btrfsic_mutex);
- /* since btrfsic_submit_bio() is also called before
- * btrfsic_mount(), this might return NULL */
- dev_state = btrfsic_dev_state_lookup(bio->bi_bdev);
- if (NULL != dev_state &&
- (rw & WRITE) && NULL != bio->bi_io_vec) {
- unsigned int i;
- u64 dev_bytenr;
- int bio_is_patched;
-
- dev_bytenr = 512 * bio->bi_sector;
- bio_is_patched = 0;
- if (dev_state->state->print_mask &
- BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH)
- printk(KERN_INFO
- "submit_bio(rw=0x%x, bi_vcnt=%u,"
- " bi_sector=%lu (bytenr %llu), bi_bdev=%p)\n",
- rw, bio->bi_vcnt, (unsigned long)bio->bi_sector,
- (unsigned long long)dev_bytenr,
- bio->bi_bdev);
-
- for (i = 0; i < bio->bi_vcnt; i++) {
- u8 *mapped_data;
-
- mapped_data = kmap(bio->bi_io_vec[i].bv_page);
- if ((BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH |
- BTRFSIC_PRINT_MASK_VERBOSE) ==
- (dev_state->state->print_mask &
- (BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH |
- BTRFSIC_PRINT_MASK_VERBOSE)))
- printk(KERN_INFO
- "#%u: page=%p, mapped=%p, len=%u,"
- " offset=%u\n",
- i, bio->bi_io_vec[i].bv_page,
- mapped_data,
- bio->bi_io_vec[i].bv_len,
- bio->bi_io_vec[i].bv_offset);
- btrfsic_process_written_block(dev_state, dev_bytenr,
- mapped_data,
- bio->bi_io_vec[i].bv_len,
- bio, &bio_is_patched,
- NULL, rw);
- kunmap(bio->bi_io_vec[i].bv_page);
- dev_bytenr += bio->bi_io_vec[i].bv_len;
- }
- } else if (NULL != dev_state && (rw & REQ_FLUSH)) {
- if (dev_state->state->print_mask &
- BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH)
- printk(KERN_INFO
- "submit_bio(rw=0x%x) FLUSH, bdev=%p)\n",
- rw, bio->bi_bdev);
- if (!dev_state->dummy_block_for_bio_bh_flush.is_iodone) {
- if ((dev_state->state->print_mask &
- (BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH |
- BTRFSIC_PRINT_MASK_VERBOSE)))
- printk(KERN_INFO
- "btrfsic_submit_bio(%s) with FLUSH"
- " but dummy block already in use"
- " (ignored)!\n",
- dev_state->name);
- } else {
- struct btrfsic_block *const block =
- &dev_state->dummy_block_for_bio_bh_flush;
-
- block->is_iodone = 0;
- block->never_written = 0;
- block->iodone_w_error = 0;
- block->flush_gen = dev_state->last_flush_gen + 1;
- block->submit_bio_bh_rw = rw;
- block->orig_bio_bh_private = bio->bi_private;
- block->orig_bio_bh_end_io.bio = bio->bi_end_io;
- block->next_in_same_bio = NULL;
- bio->bi_private = block;
- bio->bi_end_io = btrfsic_bio_end_io;
- }
- }
- mutex_unlock(&btrfsic_mutex);
-
- submit_bio(rw, bio);
-}
-
-int btrfsic_mount(struct btrfs_root *root,
- struct btrfs_fs_devices *fs_devices,
- int including_extent_data, u32 print_mask)
-{
- int ret;
- struct btrfsic_state *state;
- struct list_head *dev_head = &fs_devices->devices;
- struct btrfs_device *device;
-
- state = kzalloc(sizeof(*state), GFP_NOFS);
- if (NULL == state) {
- printk(KERN_INFO "btrfs check-integrity: kmalloc() failed!\n");
- return -1;
- }
-
- if (!btrfsic_is_initialized) {
- mutex_init(&btrfsic_mutex);
- btrfsic_dev_state_hashtable_init(&btrfsic_dev_state_hashtable);
- btrfsic_is_initialized = 1;
- }
- mutex_lock(&btrfsic_mutex);
- state->root = root;
- state->print_mask = print_mask;
- state->include_extent_data = including_extent_data;
- state->csum_size = 0;
- INIT_LIST_HEAD(&state->all_blocks_list);
- btrfsic_block_hashtable_init(&state->block_hashtable);
- btrfsic_block_link_hashtable_init(&state->block_link_hashtable);
- state->max_superblock_generation = 0;
- state->latest_superblock = NULL;
-
- list_for_each_entry(device, dev_head, dev_list) {
- struct btrfsic_dev_state *ds;
- char *p;
-
- if (!device->bdev || !device->name)
- continue;
-
- ds = btrfsic_dev_state_alloc();
- if (NULL == ds) {
- printk(KERN_INFO
- "btrfs check-integrity: kmalloc() failed!\n");
- mutex_unlock(&btrfsic_mutex);
- return -1;
- }
- ds->bdev = device->bdev;
- ds->state = state;
- bdevname(ds->bdev, ds->name);
- ds->name[BDEVNAME_SIZE - 1] = '\0';
- for (p = ds->name; *p != '\0'; p++);
- while (p > ds->name && *p != '/')
- p--;
- if (*p == '/')
- p++;
- strlcpy(ds->name, p, sizeof(ds->name));
- btrfsic_dev_state_hashtable_add(ds,
- &btrfsic_dev_state_hashtable);
- }
-
- ret = btrfsic_process_superblock(state, fs_devices);
- if (0 != ret) {
- mutex_unlock(&btrfsic_mutex);
- btrfsic_unmount(root, fs_devices);
- return ret;
- }
-
- if (state->print_mask & BTRFSIC_PRINT_MASK_INITIAL_DATABASE)
- btrfsic_dump_database(state);
- if (state->print_mask & BTRFSIC_PRINT_MASK_INITIAL_TREE)
- btrfsic_dump_tree(state);
-
- mutex_unlock(&btrfsic_mutex);
- return 0;
-}
-
-void btrfsic_unmount(struct btrfs_root *root,
- struct btrfs_fs_devices *fs_devices)
-{
- struct list_head *elem_all;
- struct list_head *tmp_all;
- struct btrfsic_state *state;
- struct list_head *dev_head = &fs_devices->devices;
- struct btrfs_device *device;
-
- if (!btrfsic_is_initialized)
- return;
-
- mutex_lock(&btrfsic_mutex);
-
- state = NULL;
- list_for_each_entry(device, dev_head, dev_list) {
- struct btrfsic_dev_state *ds;
-
- if (!device->bdev || !device->name)
- continue;
-
- ds = btrfsic_dev_state_hashtable_lookup(
- device->bdev,
- &btrfsic_dev_state_hashtable);
- if (NULL != ds) {
- state = ds->state;
- btrfsic_dev_state_hashtable_remove(ds);
- btrfsic_dev_state_free(ds);
- }
- }
-
- if (NULL == state) {
- printk(KERN_INFO
- "btrfsic: error, cannot find state information"
- " on umount!\n");
- mutex_unlock(&btrfsic_mutex);
- return;
- }
-
- /*
- * Don't care about keeping the lists' state up to date,
- * just free all memory that was allocated dynamically.
- * Free the blocks and the block_links.
- */
- list_for_each_safe(elem_all, tmp_all, &state->all_blocks_list) {
- struct btrfsic_block *const b_all =
- list_entry(elem_all, struct btrfsic_block,
- all_blocks_node);
- struct list_head *elem_ref_to;
- struct list_head *tmp_ref_to;
-
- list_for_each_safe(elem_ref_to, tmp_ref_to,
- &b_all->ref_to_list) {
- struct btrfsic_block_link *const l =
- list_entry(elem_ref_to,
- struct btrfsic_block_link,
- node_ref_to);
-
- if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
- btrfsic_print_rem_link(state, l);
-
- l->ref_cnt--;
- if (0 == l->ref_cnt)
- btrfsic_block_link_free(l);
- }
-
- if (b_all->is_iodone)
- btrfsic_block_free(b_all);
- else
- printk(KERN_INFO "btrfs: attempt to free %c-block"
- " @%llu (%s/%llu/%d) on umount which is"
- " not yet iodone!\n",
- btrfsic_get_block_type(state, b_all),
- (unsigned long long)b_all->logical_bytenr,
- b_all->dev_state->name,
- (unsigned long long)b_all->dev_bytenr,
- b_all->mirror_num);
- }
-
- mutex_unlock(&btrfsic_mutex);
-
- kfree(state);
-}
diff --git a/ANDROID_3.4.5/fs/btrfs/check-integrity.h b/ANDROID_3.4.5/fs/btrfs/check-integrity.h
deleted file mode 100644
index 8b59175c..00000000
--- a/ANDROID_3.4.5/fs/btrfs/check-integrity.h
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * Copyright (C) STRATO AG 2011. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License v2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
- */
-
-#if !defined(__BTRFS_CHECK_INTEGRITY__)
-#define __BTRFS_CHECK_INTEGRITY__
-
-#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
-int btrfsic_submit_bh(int rw, struct buffer_head *bh);
-void btrfsic_submit_bio(int rw, struct bio *bio);
-#else
-#define btrfsic_submit_bh submit_bh
-#define btrfsic_submit_bio submit_bio
-#endif
-
-int btrfsic_mount(struct btrfs_root *root,
- struct btrfs_fs_devices *fs_devices,
- int including_extent_data, u32 print_mask);
-void btrfsic_unmount(struct btrfs_root *root,
- struct btrfs_fs_devices *fs_devices);
-
-#endif
diff --git a/ANDROID_3.4.5/fs/btrfs/compat.h b/ANDROID_3.4.5/fs/btrfs/compat.h
deleted file mode 100644
index 7c4503ef..00000000
--- a/ANDROID_3.4.5/fs/btrfs/compat.h
+++ /dev/null
@@ -1,7 +0,0 @@
-#ifndef _COMPAT_H_
-#define _COMPAT_H_
-
-#define btrfs_drop_nlink(inode) drop_nlink(inode)
-#define btrfs_inc_nlink(inode) inc_nlink(inode)
-
-#endif /* _COMPAT_H_ */
diff --git a/ANDROID_3.4.5/fs/btrfs/compression.c b/ANDROID_3.4.5/fs/btrfs/compression.c
deleted file mode 100644
index 86eff48d..00000000
--- a/ANDROID_3.4.5/fs/btrfs/compression.c
+++ /dev/null
@@ -1,1038 +0,0 @@
-/*
- * Copyright (C) 2008 Oracle. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License v2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
- */
-
-#include <linux/kernel.h>
-#include <linux/bio.h>
-#include <linux/buffer_head.h>
-#include <linux/file.h>
-#include <linux/fs.h>
-#include <linux/pagemap.h>
-#include <linux/highmem.h>
-#include <linux/time.h>
-#include <linux/init.h>
-#include <linux/string.h>
-#include <linux/backing-dev.h>
-#include <linux/mpage.h>
-#include <linux/swap.h>
-#include <linux/writeback.h>
-#include <linux/bit_spinlock.h>
-#include <linux/slab.h>
-#include "compat.h"
-#include "ctree.h"
-#include "disk-io.h"
-#include "transaction.h"
-#include "btrfs_inode.h"
-#include "volumes.h"
-#include "ordered-data.h"
-#include "compression.h"
-#include "extent_io.h"
-#include "extent_map.h"
-
-struct compressed_bio {
- /* number of bios pending for this compressed extent */
- atomic_t pending_bios;
-
- /* the pages with the compressed data on them */
- struct page **compressed_pages;
-
- /* inode that owns this data */
- struct inode *inode;
-
- /* starting offset in the inode for our pages */
- u64 start;
-
- /* number of bytes in the inode we're working on */
- unsigned long len;
-
- /* number of bytes on disk */
- unsigned long compressed_len;
-
- /* the compression algorithm for this bio */
- int compress_type;
-
- /* number of compressed pages in the array */
- unsigned long nr_pages;
-
- /* IO errors */
- int errors;
- int mirror_num;
-
- /* for reads, this is the bio we are copying the data into */
- struct bio *orig_bio;
-
- /*
- * the start of a variable length array of checksums only
- * used by reads
- */
- u32 sums;
-};
-
-static inline int compressed_bio_size(struct btrfs_root *root,
- unsigned long disk_size)
-{
- u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
-
- return sizeof(struct compressed_bio) +
- ((disk_size + root->sectorsize - 1) / root->sectorsize) *
- csum_size;
-}
-
-static struct bio *compressed_bio_alloc(struct block_device *bdev,
- u64 first_byte, gfp_t gfp_flags)
-{
- int nr_vecs;
-
- nr_vecs = bio_get_nr_vecs(bdev);
- return btrfs_bio_alloc(bdev, first_byte >> 9, nr_vecs, gfp_flags);
-}
-
-static int check_compressed_csum(struct inode *inode,
- struct compressed_bio *cb,
- u64 disk_start)
-{
- int ret;
- struct btrfs_root *root = BTRFS_I(inode)->root;
- struct page *page;
- unsigned long i;
- char *kaddr;
- u32 csum;
- u32 *cb_sum = &cb->sums;
-
- if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)
- return 0;
-
- for (i = 0; i < cb->nr_pages; i++) {
- page = cb->compressed_pages[i];
- csum = ~(u32)0;
-
- kaddr = kmap_atomic(page);
- csum = btrfs_csum_data(root, kaddr, csum, PAGE_CACHE_SIZE);
- btrfs_csum_final(csum, (char *)&csum);
- kunmap_atomic(kaddr);
-
- if (csum != *cb_sum) {
- printk(KERN_INFO "btrfs csum failed ino %llu "
- "extent %llu csum %u "
- "wanted %u mirror %d\n",
- (unsigned long long)btrfs_ino(inode),
- (unsigned long long)disk_start,
- csum, *cb_sum, cb->mirror_num);
- ret = -EIO;
- goto fail;
- }
- cb_sum++;
-
- }
- ret = 0;
-fail:
- return ret;
-}
-
-/* when we finish reading compressed pages from the disk, we
- * decompress them and then run the bio end_io routines on the
- * decompressed pages (in the inode address space).
- *
- * This allows the checksumming and other IO error handling routines
- * to work normally
- *
- * The compressed pages are freed here, and it must be run
- * in process context
- */
-static void end_compressed_bio_read(struct bio *bio, int err)
-{
- struct compressed_bio *cb = bio->bi_private;
- struct inode *inode;
- struct page *page;
- unsigned long index;
- int ret;
-
- if (err)
- cb->errors = 1;
-
- /* if there are more bios still pending for this compressed
- * extent, just exit
- */
- if (!atomic_dec_and_test(&cb->pending_bios))
- goto out;
-
- inode = cb->inode;
- ret = check_compressed_csum(inode, cb, (u64)bio->bi_sector << 9);
- if (ret)
- goto csum_failed;
-
- /* ok, we're the last bio for this extent, lets start
- * the decompression.
- */
- ret = btrfs_decompress_biovec(cb->compress_type,
- cb->compressed_pages,
- cb->start,
- cb->orig_bio->bi_io_vec,
- cb->orig_bio->bi_vcnt,
- cb->compressed_len);
-csum_failed:
- if (ret)
- cb->errors = 1;
-
- /* release the compressed pages */
- index = 0;
- for (index = 0; index < cb->nr_pages; index++) {
- page = cb->compressed_pages[index];
- page->mapping = NULL;
- page_cache_release(page);
- }
-
- /* do io completion on the original bio */
- if (cb->errors) {
- bio_io_error(cb->orig_bio);
- } else {
- int bio_index = 0;
- struct bio_vec *bvec = cb->orig_bio->bi_io_vec;
-
- /*
- * we have verified the checksum already, set page
- * checked so the end_io handlers know about it
- */
- while (bio_index < cb->orig_bio->bi_vcnt) {
- SetPageChecked(bvec->bv_page);
- bvec++;
- bio_index++;
- }
- bio_endio(cb->orig_bio, 0);
- }
-
- /* finally free the cb struct */
- kfree(cb->compressed_pages);
- kfree(cb);
-out:
- bio_put(bio);
-}
-
-/*
- * Clear the writeback bits on all of the file
- * pages for a compressed write
- */
-static noinline void end_compressed_writeback(struct inode *inode, u64 start,
- unsigned long ram_size)
-{
- unsigned long index = start >> PAGE_CACHE_SHIFT;
- unsigned long end_index = (start + ram_size - 1) >> PAGE_CACHE_SHIFT;
- struct page *pages[16];
- unsigned long nr_pages = end_index - index + 1;
- int i;
- int ret;
-
- while (nr_pages > 0) {
- ret = find_get_pages_contig(inode->i_mapping, index,
- min_t(unsigned long,
- nr_pages, ARRAY_SIZE(pages)), pages);
- if (ret == 0) {
- nr_pages -= 1;
- index += 1;
- continue;
- }
- for (i = 0; i < ret; i++) {
- end_page_writeback(pages[i]);
- page_cache_release(pages[i]);
- }
- nr_pages -= ret;
- index += ret;
- }
- /* the inode may be gone now */
-}
-
-/*
- * do the cleanup once all the compressed pages hit the disk.
- * This will clear writeback on the file pages and free the compressed
- * pages.
- *
- * This also calls the writeback end hooks for the file pages so that
- * metadata and checksums can be updated in the file.
- */
-static void end_compressed_bio_write(struct bio *bio, int err)
-{
- struct extent_io_tree *tree;
- struct compressed_bio *cb = bio->bi_private;
- struct inode *inode;
- struct page *page;
- unsigned long index;
-
- if (err)
- cb->errors = 1;
-
- /* if there are more bios still pending for this compressed
- * extent, just exit
- */
- if (!atomic_dec_and_test(&cb->pending_bios))
- goto out;
-
- /* ok, we're the last bio for this extent, step one is to
- * call back into the FS and do all the end_io operations
- */
- inode = cb->inode;
- tree = &BTRFS_I(inode)->io_tree;
- cb->compressed_pages[0]->mapping = cb->inode->i_mapping;
- tree->ops->writepage_end_io_hook(cb->compressed_pages[0],
- cb->start,
- cb->start + cb->len - 1,
- NULL, 1);
- cb->compressed_pages[0]->mapping = NULL;
-
- end_compressed_writeback(inode, cb->start, cb->len);
- /* note, our inode could be gone now */
-
- /*
- * release the compressed pages, these came from alloc_page and
- * are not attached to the inode at all
- */
- index = 0;
- for (index = 0; index < cb->nr_pages; index++) {
- page = cb->compressed_pages[index];
- page->mapping = NULL;
- page_cache_release(page);
- }
-
- /* finally free the cb struct */
- kfree(cb->compressed_pages);
- kfree(cb);
-out:
- bio_put(bio);
-}
-
-/*
- * worker function to build and submit bios for previously compressed pages.
- * The corresponding pages in the inode should be marked for writeback
- * and the compressed pages should have a reference on them for dropping
- * when the IO is complete.
- *
- * This also checksums the file bytes and gets things ready for
- * the end io hooks.
- */
-int btrfs_submit_compressed_write(struct inode *inode, u64 start,
- unsigned long len, u64 disk_start,
- unsigned long compressed_len,
- struct page **compressed_pages,
- unsigned long nr_pages)
-{
- struct bio *bio = NULL;
- struct btrfs_root *root = BTRFS_I(inode)->root;
- struct compressed_bio *cb;
- unsigned long bytes_left;
- struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
- int pg_index = 0;
- struct page *page;
- u64 first_byte = disk_start;
- struct block_device *bdev;
- int ret;
- int skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
-
- WARN_ON(start & ((u64)PAGE_CACHE_SIZE - 1));
- cb = kmalloc(compressed_bio_size(root, compressed_len), GFP_NOFS);
- if (!cb)
- return -ENOMEM;
- atomic_set(&cb->pending_bios, 0);
- cb->errors = 0;
- cb->inode = inode;
- cb->start = start;
- cb->len = len;
- cb->mirror_num = 0;
- cb->compressed_pages = compressed_pages;
- cb->compressed_len = compressed_len;
- cb->orig_bio = NULL;
- cb->nr_pages = nr_pages;
-
- bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev;
-
- bio = compressed_bio_alloc(bdev, first_byte, GFP_NOFS);
- if(!bio) {
- kfree(cb);
- return -ENOMEM;
- }
- bio->bi_private = cb;
- bio->bi_end_io = end_compressed_bio_write;
- atomic_inc(&cb->pending_bios);
-
- /* create and submit bios for the compressed pages */
- bytes_left = compressed_len;
- for (pg_index = 0; pg_index < cb->nr_pages; pg_index++) {
- page = compressed_pages[pg_index];
- page->mapping = inode->i_mapping;
- if (bio->bi_size)
- ret = io_tree->ops->merge_bio_hook(page, 0,
- PAGE_CACHE_SIZE,
- bio, 0);
- else
- ret = 0;
-
- page->mapping = NULL;
- if (ret || bio_add_page(bio, page, PAGE_CACHE_SIZE, 0) <
- PAGE_CACHE_SIZE) {
- bio_get(bio);
-
- /*
- * inc the count before we submit the bio so
- * we know the end IO handler won't happen before
- * we inc the count. Otherwise, the cb might get
- * freed before we're done setting it up
- */
- atomic_inc(&cb->pending_bios);
- ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0);
- BUG_ON(ret); /* -ENOMEM */
-
- if (!skip_sum) {
- ret = btrfs_csum_one_bio(root, inode, bio,
- start, 1);
- BUG_ON(ret); /* -ENOMEM */
- }
-
- ret = btrfs_map_bio(root, WRITE, bio, 0, 1);
- BUG_ON(ret); /* -ENOMEM */
-
- bio_put(bio);
-
- bio = compressed_bio_alloc(bdev, first_byte, GFP_NOFS);
- BUG_ON(!bio);
- bio->bi_private = cb;
- bio->bi_end_io = end_compressed_bio_write;
- bio_add_page(bio, page, PAGE_CACHE_SIZE, 0);
- }
- if (bytes_left < PAGE_CACHE_SIZE) {
- printk("bytes left %lu compress len %lu nr %lu\n",
- bytes_left, cb->compressed_len, cb->nr_pages);
- }
- bytes_left -= PAGE_CACHE_SIZE;
- first_byte += PAGE_CACHE_SIZE;
- cond_resched();
- }
- bio_get(bio);
-
- ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0);
- BUG_ON(ret); /* -ENOMEM */
-
- if (!skip_sum) {
- ret = btrfs_csum_one_bio(root, inode, bio, start, 1);
- BUG_ON(ret); /* -ENOMEM */
- }
-
- ret = btrfs_map_bio(root, WRITE, bio, 0, 1);
- BUG_ON(ret); /* -ENOMEM */
-
- bio_put(bio);
- return 0;
-}
-
-static noinline int add_ra_bio_pages(struct inode *inode,
- u64 compressed_end,
- struct compressed_bio *cb)
-{
- unsigned long end_index;
- unsigned long pg_index;
- u64 last_offset;
- u64 isize = i_size_read(inode);
- int ret;
- struct page *page;
- unsigned long nr_pages = 0;
- struct extent_map *em;
- struct address_space *mapping = inode->i_mapping;
- struct extent_map_tree *em_tree;
- struct extent_io_tree *tree;
- u64 end;
- int misses = 0;
-
- page = cb->orig_bio->bi_io_vec[cb->orig_bio->bi_vcnt - 1].bv_page;
- last_offset = (page_offset(page) + PAGE_CACHE_SIZE);
- em_tree = &BTRFS_I(inode)->extent_tree;
- tree = &BTRFS_I(inode)->io_tree;
-
- if (isize == 0)
- return 0;
-
- end_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT;
-
- while (last_offset < compressed_end) {
- pg_index = last_offset >> PAGE_CACHE_SHIFT;
-
- if (pg_index > end_index)
- break;
-
- rcu_read_lock();
- page = radix_tree_lookup(&mapping->page_tree, pg_index);
- rcu_read_unlock();
- if (page) {
- misses++;
- if (misses > 4)
- break;
- goto next;
- }
-
- page = __page_cache_alloc(mapping_gfp_mask(mapping) &
- ~__GFP_FS);
- if (!page)
- break;
-
- if (add_to_page_cache_lru(page, mapping, pg_index,
- GFP_NOFS)) {
- page_cache_release(page);
- goto next;
- }
-
- end = last_offset + PAGE_CACHE_SIZE - 1;
- /*
- * at this point, we have a locked page in the page cache
- * for these bytes in the file. But, we have to make
- * sure they map to this compressed extent on disk.
- */
- set_page_extent_mapped(page);
- lock_extent(tree, last_offset, end);
- read_lock(&em_tree->lock);
- em = lookup_extent_mapping(em_tree, last_offset,
- PAGE_CACHE_SIZE);
- read_unlock(&em_tree->lock);
-
- if (!em || last_offset < em->start ||
- (last_offset + PAGE_CACHE_SIZE > extent_map_end(em)) ||
- (em->block_start >> 9) != cb->orig_bio->bi_sector) {
- free_extent_map(em);
- unlock_extent(tree, last_offset, end);
- unlock_page(page);
- page_cache_release(page);
- break;
- }
- free_extent_map(em);
-
- if (page->index == end_index) {
- char *userpage;
- size_t zero_offset = isize & (PAGE_CACHE_SIZE - 1);
-
- if (zero_offset) {
- int zeros;
- zeros = PAGE_CACHE_SIZE - zero_offset;
- userpage = kmap_atomic(page);
- memset(userpage + zero_offset, 0, zeros);
- flush_dcache_page(page);
- kunmap_atomic(userpage);
- }
- }
-
- ret = bio_add_page(cb->orig_bio, page,
- PAGE_CACHE_SIZE, 0);
-
- if (ret == PAGE_CACHE_SIZE) {
- nr_pages++;
- page_cache_release(page);
- } else {
- unlock_extent(tree, last_offset, end);
- unlock_page(page);
- page_cache_release(page);
- break;
- }
-next:
- last_offset += PAGE_CACHE_SIZE;
- }
- return 0;
-}
-
-/*
- * for a compressed read, the bio we get passed has all the inode pages
- * in it. We don't actually do IO on those pages but allocate new ones
- * to hold the compressed pages on disk.
- *
- * bio->bi_sector points to the compressed extent on disk
- * bio->bi_io_vec points to all of the inode pages
- * bio->bi_vcnt is a count of pages
- *
- * After the compressed pages are read, we copy the bytes into the
- * bio we were passed and then call the bio end_io calls
- */
-int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
- int mirror_num, unsigned long bio_flags)
-{
- struct extent_io_tree *tree;
- struct extent_map_tree *em_tree;
- struct compressed_bio *cb;
- struct btrfs_root *root = BTRFS_I(inode)->root;
- unsigned long uncompressed_len = bio->bi_vcnt * PAGE_CACHE_SIZE;
- unsigned long compressed_len;
- unsigned long nr_pages;
- unsigned long pg_index;
- struct page *page;
- struct block_device *bdev;
- struct bio *comp_bio;
- u64 cur_disk_byte = (u64)bio->bi_sector << 9;
- u64 em_len;
- u64 em_start;
- struct extent_map *em;
- int ret = -ENOMEM;
- u32 *sums;
-
- tree = &BTRFS_I(inode)->io_tree;
- em_tree = &BTRFS_I(inode)->extent_tree;
-
- /* we need the actual starting offset of this extent in the file */
- read_lock(&em_tree->lock);
- em = lookup_extent_mapping(em_tree,
- page_offset(bio->bi_io_vec->bv_page),
- PAGE_CACHE_SIZE);
- read_unlock(&em_tree->lock);
- if (!em)
- return -EIO;
-
- compressed_len = em->block_len;
- cb = kmalloc(compressed_bio_size(root, compressed_len), GFP_NOFS);
- if (!cb)
- goto out;
-
- atomic_set(&cb->pending_bios, 0);
- cb->errors = 0;
- cb->inode = inode;
- cb->mirror_num = mirror_num;
- sums = &cb->sums;
-
- cb->start = em->orig_start;
- em_len = em->len;
- em_start = em->start;
-
- free_extent_map(em);
- em = NULL;
-
- cb->len = uncompressed_len;
- cb->compressed_len = compressed_len;
- cb->compress_type = extent_compress_type(bio_flags);
- cb->orig_bio = bio;
-
- nr_pages = (compressed_len + PAGE_CACHE_SIZE - 1) /
- PAGE_CACHE_SIZE;
- cb->compressed_pages = kzalloc(sizeof(struct page *) * nr_pages,
- GFP_NOFS);
- if (!cb->compressed_pages)
- goto fail1;
-
- bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev;
-
- for (pg_index = 0; pg_index < nr_pages; pg_index++) {
- cb->compressed_pages[pg_index] = alloc_page(GFP_NOFS |
- __GFP_HIGHMEM);
- if (!cb->compressed_pages[pg_index])
- goto fail2;
- }
- cb->nr_pages = nr_pages;
-
- add_ra_bio_pages(inode, em_start + em_len, cb);
-
- /* include any pages we added in add_ra-bio_pages */
- uncompressed_len = bio->bi_vcnt * PAGE_CACHE_SIZE;
- cb->len = uncompressed_len;
-
- comp_bio = compressed_bio_alloc(bdev, cur_disk_byte, GFP_NOFS);
- if (!comp_bio)
- goto fail2;
- comp_bio->bi_private = cb;
- comp_bio->bi_end_io = end_compressed_bio_read;
- atomic_inc(&cb->pending_bios);
-
- for (pg_index = 0; pg_index < nr_pages; pg_index++) {
- page = cb->compressed_pages[pg_index];
- page->mapping = inode->i_mapping;
- page->index = em_start >> PAGE_CACHE_SHIFT;
-
- if (comp_bio->bi_size)
- ret = tree->ops->merge_bio_hook(page, 0,
- PAGE_CACHE_SIZE,
- comp_bio, 0);
- else
- ret = 0;
-
- page->mapping = NULL;
- if (ret || bio_add_page(comp_bio, page, PAGE_CACHE_SIZE, 0) <
- PAGE_CACHE_SIZE) {
- bio_get(comp_bio);
-
- ret = btrfs_bio_wq_end_io(root->fs_info, comp_bio, 0);
- BUG_ON(ret); /* -ENOMEM */
-
- /*
- * inc the count before we submit the bio so
- * we know the end IO handler won't happen before
- * we inc the count. Otherwise, the cb might get
- * freed before we're done setting it up
- */
- atomic_inc(&cb->pending_bios);
-
- if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) {
- ret = btrfs_lookup_bio_sums(root, inode,
- comp_bio, sums);
- BUG_ON(ret); /* -ENOMEM */
- }
- sums += (comp_bio->bi_size + root->sectorsize - 1) /
- root->sectorsize;
-
- ret = btrfs_map_bio(root, READ, comp_bio,
- mirror_num, 0);
- BUG_ON(ret); /* -ENOMEM */
-
- bio_put(comp_bio);
-
- comp_bio = compressed_bio_alloc(bdev, cur_disk_byte,
- GFP_NOFS);
- BUG_ON(!comp_bio);
- comp_bio->bi_private = cb;
- comp_bio->bi_end_io = end_compressed_bio_read;
-
- bio_add_page(comp_bio, page, PAGE_CACHE_SIZE, 0);
- }
- cur_disk_byte += PAGE_CACHE_SIZE;
- }
- bio_get(comp_bio);
-
- ret = btrfs_bio_wq_end_io(root->fs_info, comp_bio, 0);
- BUG_ON(ret); /* -ENOMEM */
-
- if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) {
- ret = btrfs_lookup_bio_sums(root, inode, comp_bio, sums);
- BUG_ON(ret); /* -ENOMEM */
- }
-
- ret = btrfs_map_bio(root, READ, comp_bio, mirror_num, 0);
- BUG_ON(ret); /* -ENOMEM */
-
- bio_put(comp_bio);
- return 0;
-
-fail2:
- for (pg_index = 0; pg_index < nr_pages; pg_index++)
- free_page((unsigned long)cb->compressed_pages[pg_index]);
-
- kfree(cb->compressed_pages);
-fail1:
- kfree(cb);
-out:
- free_extent_map(em);
- return ret;
-}
-
-static struct list_head comp_idle_workspace[BTRFS_COMPRESS_TYPES];
-static spinlock_t comp_workspace_lock[BTRFS_COMPRESS_TYPES];
-static int comp_num_workspace[BTRFS_COMPRESS_TYPES];
-static atomic_t comp_alloc_workspace[BTRFS_COMPRESS_TYPES];
-static wait_queue_head_t comp_workspace_wait[BTRFS_COMPRESS_TYPES];
-
-struct btrfs_compress_op *btrfs_compress_op[] = {
- &btrfs_zlib_compress,
- &btrfs_lzo_compress,
-};
-
-void __init btrfs_init_compress(void)
-{
- int i;
-
- for (i = 0; i < BTRFS_COMPRESS_TYPES; i++) {
- INIT_LIST_HEAD(&comp_idle_workspace[i]);
- spin_lock_init(&comp_workspace_lock[i]);
- atomic_set(&comp_alloc_workspace[i], 0);
- init_waitqueue_head(&comp_workspace_wait[i]);
- }
-}
-
-/*
- * this finds an available workspace or allocates a new one
- * ERR_PTR is returned if things go bad.
- */
-static struct list_head *find_workspace(int type)
-{
- struct list_head *workspace;
- int cpus = num_online_cpus();
- int idx = type - 1;
-
- struct list_head *idle_workspace = &comp_idle_workspace[idx];
- spinlock_t *workspace_lock = &comp_workspace_lock[idx];
- atomic_t *alloc_workspace = &comp_alloc_workspace[idx];
- wait_queue_head_t *workspace_wait = &comp_workspace_wait[idx];
- int *num_workspace = &comp_num_workspace[idx];
-again:
- spin_lock(workspace_lock);
- if (!list_empty(idle_workspace)) {
- workspace = idle_workspace->next;
- list_del(workspace);
- (*num_workspace)--;
- spin_unlock(workspace_lock);
- return workspace;
-
- }
- if (atomic_read(alloc_workspace) > cpus) {
- DEFINE_WAIT(wait);
-
- spin_unlock(workspace_lock);
- prepare_to_wait(workspace_wait, &wait, TASK_UNINTERRUPTIBLE);
- if (atomic_read(alloc_workspace) > cpus && !*num_workspace)
- schedule();
- finish_wait(workspace_wait, &wait);
- goto again;
- }
- atomic_inc(alloc_workspace);
- spin_unlock(workspace_lock);
-
- workspace = btrfs_compress_op[idx]->alloc_workspace();
- if (IS_ERR(workspace)) {
- atomic_dec(alloc_workspace);
- wake_up(workspace_wait);
- }
- return workspace;
-}
-
-/*
- * put a workspace struct back on the list or free it if we have enough
- * idle ones sitting around
- */
-static void free_workspace(int type, struct list_head *workspace)
-{
- int idx = type - 1;
- struct list_head *idle_workspace = &comp_idle_workspace[idx];
- spinlock_t *workspace_lock = &comp_workspace_lock[idx];
- atomic_t *alloc_workspace = &comp_alloc_workspace[idx];
- wait_queue_head_t *workspace_wait = &comp_workspace_wait[idx];
- int *num_workspace = &comp_num_workspace[idx];
-
- spin_lock(workspace_lock);
- if (*num_workspace < num_online_cpus()) {
- list_add_tail(workspace, idle_workspace);
- (*num_workspace)++;
- spin_unlock(workspace_lock);
- goto wake;
- }
- spin_unlock(workspace_lock);
-
- btrfs_compress_op[idx]->free_workspace(workspace);
- atomic_dec(alloc_workspace);
-wake:
- if (waitqueue_active(workspace_wait))
- wake_up(workspace_wait);
-}
-
-/*
- * cleanup function for module exit
- */
-static void free_workspaces(void)
-{
- struct list_head *workspace;
- int i;
-
- for (i = 0; i < BTRFS_COMPRESS_TYPES; i++) {
- while (!list_empty(&comp_idle_workspace[i])) {
- workspace = comp_idle_workspace[i].next;
- list_del(workspace);
- btrfs_compress_op[i]->free_workspace(workspace);
- atomic_dec(&comp_alloc_workspace[i]);
- }
- }
-}
-
-/*
- * given an address space and start/len, compress the bytes.
- *
- * pages are allocated to hold the compressed result and stored
- * in 'pages'
- *
- * out_pages is used to return the number of pages allocated. There
- * may be pages allocated even if we return an error
- *
- * total_in is used to return the number of bytes actually read. It
- * may be smaller then len if we had to exit early because we
- * ran out of room in the pages array or because we cross the
- * max_out threshold.
- *
- * total_out is used to return the total number of compressed bytes
- *
- * max_out tells us the max number of bytes that we're allowed to
- * stuff into pages
- */
-int btrfs_compress_pages(int type, struct address_space *mapping,
- u64 start, unsigned long len,
- struct page **pages,
- unsigned long nr_dest_pages,
- unsigned long *out_pages,
- unsigned long *total_in,
- unsigned long *total_out,
- unsigned long max_out)
-{
- struct list_head *workspace;
- int ret;
-
- workspace = find_workspace(type);
- if (IS_ERR(workspace))
- return -1;
-
- ret = btrfs_compress_op[type-1]->compress_pages(workspace, mapping,
- start, len, pages,
- nr_dest_pages, out_pages,
- total_in, total_out,
- max_out);
- free_workspace(type, workspace);
- return ret;
-}
-
-/*
- * pages_in is an array of pages with compressed data.
- *
- * disk_start is the starting logical offset of this array in the file
- *
- * bvec is a bio_vec of pages from the file that we want to decompress into
- *
- * vcnt is the count of pages in the biovec
- *
- * srclen is the number of bytes in pages_in
- *
- * The basic idea is that we have a bio that was created by readpages.
- * The pages in the bio are for the uncompressed data, and they may not
- * be contiguous. They all correspond to the range of bytes covered by
- * the compressed extent.
- */
-int btrfs_decompress_biovec(int type, struct page **pages_in, u64 disk_start,
- struct bio_vec *bvec, int vcnt, size_t srclen)
-{
- struct list_head *workspace;
- int ret;
-
- workspace = find_workspace(type);
- if (IS_ERR(workspace))
- return -ENOMEM;
-
- ret = btrfs_compress_op[type-1]->decompress_biovec(workspace, pages_in,
- disk_start,
- bvec, vcnt, srclen);
- free_workspace(type, workspace);
- return ret;
-}
-
-/*
- * a less complex decompression routine. Our compressed data fits in a
- * single page, and we want to read a single page out of it.
- * start_byte tells us the offset into the compressed data we're interested in
- */
-int btrfs_decompress(int type, unsigned char *data_in, struct page *dest_page,
- unsigned long start_byte, size_t srclen, size_t destlen)
-{
- struct list_head *workspace;
- int ret;
-
- workspace = find_workspace(type);
- if (IS_ERR(workspace))
- return -ENOMEM;
-
- ret = btrfs_compress_op[type-1]->decompress(workspace, data_in,
- dest_page, start_byte,
- srclen, destlen);
-
- free_workspace(type, workspace);
- return ret;
-}
-
-void btrfs_exit_compress(void)
-{
- free_workspaces();
-}
-
-/*
- * Copy uncompressed data from working buffer to pages.
- *
- * buf_start is the byte offset we're of the start of our workspace buffer.
- *
- * total_out is the last byte of the buffer
- */
-int btrfs_decompress_buf2page(char *buf, unsigned long buf_start,
- unsigned long total_out, u64 disk_start,
- struct bio_vec *bvec, int vcnt,
- unsigned long *pg_index,
- unsigned long *pg_offset)
-{
- unsigned long buf_offset;
- unsigned long current_buf_start;
- unsigned long start_byte;
- unsigned long working_bytes = total_out - buf_start;
- unsigned long bytes;
- char *kaddr;
- struct page *page_out = bvec[*pg_index].bv_page;
-
- /*
- * start byte is the first byte of the page we're currently
- * copying into relative to the start of the compressed data.
- */
- start_byte = page_offset(page_out) - disk_start;
-
- /* we haven't yet hit data corresponding to this page */
- if (total_out <= start_byte)
- return 1;
-
- /*
- * the start of the data we care about is offset into
- * the middle of our working buffer
- */
- if (total_out > start_byte && buf_start < start_byte) {
- buf_offset = start_byte - buf_start;
- working_bytes -= buf_offset;
- } else {
- buf_offset = 0;
- }
- current_buf_start = buf_start;
-
- /* copy bytes from the working buffer into the pages */
- while (working_bytes > 0) {
- bytes = min(PAGE_CACHE_SIZE - *pg_offset,
- PAGE_CACHE_SIZE - buf_offset);
- bytes = min(bytes, working_bytes);
- kaddr = kmap_atomic(page_out);
- memcpy(kaddr + *pg_offset, buf + buf_offset, bytes);
- kunmap_atomic(kaddr);
- flush_dcache_page(page_out);
-
- *pg_offset += bytes;
- buf_offset += bytes;
- working_bytes -= bytes;
- current_buf_start += bytes;
-
- /* check if we need to pick another page */
- if (*pg_offset == PAGE_CACHE_SIZE) {
- (*pg_index)++;
- if (*pg_index >= vcnt)
- return 0;
-
- page_out = bvec[*pg_index].bv_page;
- *pg_offset = 0;
- start_byte = page_offset(page_out) - disk_start;
-
- /*
- * make sure our new page is covered by this
- * working buffer
- */
- if (total_out <= start_byte)
- return 1;
-
- /*
- * the next page in the biovec might not be adjacent
- * to the last page, but it might still be found
- * inside this working buffer. bump our offset pointer
- */
- if (total_out > start_byte &&
- current_buf_start < start_byte) {
- buf_offset = start_byte - buf_start;
- working_bytes = total_out - start_byte;
- current_buf_start = buf_start + buf_offset;
- }
- }
- }
-
- return 1;
-}
diff --git a/ANDROID_3.4.5/fs/btrfs/compression.h b/ANDROID_3.4.5/fs/btrfs/compression.h
deleted file mode 100644
index 9afb0a62..00000000
--- a/ANDROID_3.4.5/fs/btrfs/compression.h
+++ /dev/null
@@ -1,83 +0,0 @@
-/*
- * Copyright (C) 2008 Oracle. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License v2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
- */
-
-#ifndef __BTRFS_COMPRESSION_
-#define __BTRFS_COMPRESSION_
-
-void btrfs_init_compress(void);
-void btrfs_exit_compress(void);
-
-int btrfs_compress_pages(int type, struct address_space *mapping,
- u64 start, unsigned long len,
- struct page **pages,
- unsigned long nr_dest_pages,
- unsigned long *out_pages,
- unsigned long *total_in,
- unsigned long *total_out,
- unsigned long max_out);
-int btrfs_decompress_biovec(int type, struct page **pages_in, u64 disk_start,
- struct bio_vec *bvec, int vcnt, size_t srclen);
-int btrfs_decompress(int type, unsigned char *data_in, struct page *dest_page,
- unsigned long start_byte, size_t srclen, size_t destlen);
-int btrfs_decompress_buf2page(char *buf, unsigned long buf_start,
- unsigned long total_out, u64 disk_start,
- struct bio_vec *bvec, int vcnt,
- unsigned long *pg_index,
- unsigned long *pg_offset);
-
-int btrfs_submit_compressed_write(struct inode *inode, u64 start,
- unsigned long len, u64 disk_start,
- unsigned long compressed_len,
- struct page **compressed_pages,
- unsigned long nr_pages);
-int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
- int mirror_num, unsigned long bio_flags);
-
-struct btrfs_compress_op {
- struct list_head *(*alloc_workspace)(void);
-
- void (*free_workspace)(struct list_head *workspace);
-
- int (*compress_pages)(struct list_head *workspace,
- struct address_space *mapping,
- u64 start, unsigned long len,
- struct page **pages,
- unsigned long nr_dest_pages,
- unsigned long *out_pages,
- unsigned long *total_in,
- unsigned long *total_out,
- unsigned long max_out);
-
- int (*decompress_biovec)(struct list_head *workspace,
- struct page **pages_in,
- u64 disk_start,
- struct bio_vec *bvec,
- int vcnt,
- size_t srclen);
-
- int (*decompress)(struct list_head *workspace,
- unsigned char *data_in,
- struct page *dest_page,
- unsigned long start_byte,
- size_t srclen, size_t destlen);
-};
-
-extern struct btrfs_compress_op btrfs_zlib_compress;
-extern struct btrfs_compress_op btrfs_lzo_compress;
-
-#endif
diff --git a/ANDROID_3.4.5/fs/btrfs/ctree.c b/ANDROID_3.4.5/fs/btrfs/ctree.c
deleted file mode 100644
index 4106264f..00000000
--- a/ANDROID_3.4.5/fs/btrfs/ctree.c
+++ /dev/null
@@ -1,4382 +0,0 @@
-/*
- * Copyright (C) 2007,2008 Oracle. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License v2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
- */
-
-#include <linux/sched.h>
-#include <linux/slab.h>
-#include "ctree.h"
-#include "disk-io.h"
-#include "transaction.h"
-#include "print-tree.h"
-#include "locking.h"
-
-static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root
- *root, struct btrfs_path *path, int level);
-static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root
- *root, struct btrfs_key *ins_key,
- struct btrfs_path *path, int data_size, int extend);
-static int push_node_left(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, struct extent_buffer *dst,
- struct extent_buffer *src, int empty);
-static int balance_node_right(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct extent_buffer *dst_buf,
- struct extent_buffer *src_buf);
-static void del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
- struct btrfs_path *path, int level, int slot);
-
-struct btrfs_path *btrfs_alloc_path(void)
-{
- struct btrfs_path *path;
- path = kmem_cache_zalloc(btrfs_path_cachep, GFP_NOFS);
- return path;
-}
-
-/*
- * set all locked nodes in the path to blocking locks. This should
- * be done before scheduling
- */
-noinline void btrfs_set_path_blocking(struct btrfs_path *p)
-{
- int i;
- for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
- if (!p->nodes[i] || !p->locks[i])
- continue;
- btrfs_set_lock_blocking_rw(p->nodes[i], p->locks[i]);
- if (p->locks[i] == BTRFS_READ_LOCK)
- p->locks[i] = BTRFS_READ_LOCK_BLOCKING;
- else if (p->locks[i] == BTRFS_WRITE_LOCK)
- p->locks[i] = BTRFS_WRITE_LOCK_BLOCKING;
- }
-}
-
-/*
- * reset all the locked nodes in the patch to spinning locks.
- *
- * held is used to keep lockdep happy, when lockdep is enabled
- * we set held to a blocking lock before we go around and
- * retake all the spinlocks in the path. You can safely use NULL
- * for held
- */
-noinline void btrfs_clear_path_blocking(struct btrfs_path *p,
- struct extent_buffer *held, int held_rw)
-{
- int i;
-
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
- /* lockdep really cares that we take all of these spinlocks
- * in the right order. If any of the locks in the path are not
- * currently blocking, it is going to complain. So, make really
- * really sure by forcing the path to blocking before we clear
- * the path blocking.
- */
- if (held) {
- btrfs_set_lock_blocking_rw(held, held_rw);
- if (held_rw == BTRFS_WRITE_LOCK)
- held_rw = BTRFS_WRITE_LOCK_BLOCKING;
- else if (held_rw == BTRFS_READ_LOCK)
- held_rw = BTRFS_READ_LOCK_BLOCKING;
- }
- btrfs_set_path_blocking(p);
-#endif
-
- for (i = BTRFS_MAX_LEVEL - 1; i >= 0; i--) {
- if (p->nodes[i] && p->locks[i]) {
- btrfs_clear_lock_blocking_rw(p->nodes[i], p->locks[i]);
- if (p->locks[i] == BTRFS_WRITE_LOCK_BLOCKING)
- p->locks[i] = BTRFS_WRITE_LOCK;
- else if (p->locks[i] == BTRFS_READ_LOCK_BLOCKING)
- p->locks[i] = BTRFS_READ_LOCK;
- }
- }
-
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
- if (held)
- btrfs_clear_lock_blocking_rw(held, held_rw);
-#endif
-}
-
-/* this also releases the path */
-void btrfs_free_path(struct btrfs_path *p)
-{
- if (!p)
- return;
- btrfs_release_path(p);
- kmem_cache_free(btrfs_path_cachep, p);
-}
-
-/*
- * path release drops references on the extent buffers in the path
- * and it drops any locks held by this path
- *
- * It is safe to call this on paths that no locks or extent buffers held.
- */
-noinline void btrfs_release_path(struct btrfs_path *p)
-{
- int i;
-
- for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
- p->slots[i] = 0;
- if (!p->nodes[i])
- continue;
- if (p->locks[i]) {
- btrfs_tree_unlock_rw(p->nodes[i], p->locks[i]);
- p->locks[i] = 0;
- }
- free_extent_buffer(p->nodes[i]);
- p->nodes[i] = NULL;
- }
-}
-
-/*
- * safely gets a reference on the root node of a tree. A lock
- * is not taken, so a concurrent writer may put a different node
- * at the root of the tree. See btrfs_lock_root_node for the
- * looping required.
- *
- * The extent buffer returned by this has a reference taken, so
- * it won't disappear. It may stop being the root of the tree
- * at any time because there are no locks held.
- */
-struct extent_buffer *btrfs_root_node(struct btrfs_root *root)
-{
- struct extent_buffer *eb;
-
- while (1) {
- rcu_read_lock();
- eb = rcu_dereference(root->node);
-
- /*
- * RCU really hurts here, we could free up the root node because
- * it was cow'ed but we may not get the new root node yet so do
- * the inc_not_zero dance and if it doesn't work then
- * synchronize_rcu and try again.
- */
- if (atomic_inc_not_zero(&eb->refs)) {
- rcu_read_unlock();
- break;
- }
- rcu_read_unlock();
- synchronize_rcu();
- }
- return eb;
-}
-
-/* loop around taking references on and locking the root node of the
- * tree until you end up with a lock on the root. A locked buffer
- * is returned, with a reference held.
- */
-struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root)
-{
- struct extent_buffer *eb;
-
- while (1) {
- eb = btrfs_root_node(root);
- btrfs_tree_lock(eb);
- if (eb == root->node)
- break;
- btrfs_tree_unlock(eb);
- free_extent_buffer(eb);
- }
- return eb;
-}
-
-/* loop around taking references on and locking the root node of the
- * tree until you end up with a lock on the root. A locked buffer
- * is returned, with a reference held.
- */
-struct extent_buffer *btrfs_read_lock_root_node(struct btrfs_root *root)
-{
- struct extent_buffer *eb;
-
- while (1) {
- eb = btrfs_root_node(root);
- btrfs_tree_read_lock(eb);
- if (eb == root->node)
- break;
- btrfs_tree_read_unlock(eb);
- free_extent_buffer(eb);
- }
- return eb;
-}
-
-/* cowonly root (everything not a reference counted cow subvolume), just get
- * put onto a simple dirty list. transaction.c walks this to make sure they
- * get properly updated on disk.
- */
-static void add_root_to_dirty_list(struct btrfs_root *root)
-{
- spin_lock(&root->fs_info->trans_lock);
- if (root->track_dirty && list_empty(&root->dirty_list)) {
- list_add(&root->dirty_list,
- &root->fs_info->dirty_cowonly_roots);
- }
- spin_unlock(&root->fs_info->trans_lock);
-}
-
-/*
- * used by snapshot creation to make a copy of a root for a tree with
- * a given objectid. The buffer with the new root node is returned in
- * cow_ret, and this func returns zero on success or a negative error code.
- */
-int btrfs_copy_root(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct extent_buffer *buf,
- struct extent_buffer **cow_ret, u64 new_root_objectid)
-{
- struct extent_buffer *cow;
- int ret = 0;
- int level;
- struct btrfs_disk_key disk_key;
-
- WARN_ON(root->ref_cows && trans->transid !=
- root->fs_info->running_transaction->transid);
- WARN_ON(root->ref_cows && trans->transid != root->last_trans);
-
- level = btrfs_header_level(buf);
- if (level == 0)
- btrfs_item_key(buf, &disk_key, 0);
- else
- btrfs_node_key(buf, &disk_key, 0);
-
- cow = btrfs_alloc_free_block(trans, root, buf->len, 0,
- new_root_objectid, &disk_key, level,
- buf->start, 0, 1);
- if (IS_ERR(cow))
- return PTR_ERR(cow);
-
- copy_extent_buffer(cow, buf, 0, 0, cow->len);
- btrfs_set_header_bytenr(cow, cow->start);
- btrfs_set_header_generation(cow, trans->transid);
- btrfs_set_header_backref_rev(cow, BTRFS_MIXED_BACKREF_REV);
- btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN |
- BTRFS_HEADER_FLAG_RELOC);
- if (new_root_objectid == BTRFS_TREE_RELOC_OBJECTID)
- btrfs_set_header_flag(cow, BTRFS_HEADER_FLAG_RELOC);
- else
- btrfs_set_header_owner(cow, new_root_objectid);
-
- write_extent_buffer(cow, root->fs_info->fsid,
- (unsigned long)btrfs_header_fsid(cow),
- BTRFS_FSID_SIZE);
-
- WARN_ON(btrfs_header_generation(buf) > trans->transid);
- if (new_root_objectid == BTRFS_TREE_RELOC_OBJECTID)
- ret = btrfs_inc_ref(trans, root, cow, 1, 1);
- else
- ret = btrfs_inc_ref(trans, root, cow, 0, 1);
-
- if (ret)
- return ret;
-
- btrfs_mark_buffer_dirty(cow);
- *cow_ret = cow;
- return 0;
-}
-
-/*
- * check if the tree block can be shared by multiple trees
- */
-int btrfs_block_can_be_shared(struct btrfs_root *root,
- struct extent_buffer *buf)
-{
- /*
- * Tree blocks not in refernece counted trees and tree roots
- * are never shared. If a block was allocated after the last
- * snapshot and the block was not allocated by tree relocation,
- * we know the block is not shared.
- */
- if (root->ref_cows &&
- buf != root->node && buf != root->commit_root &&
- (btrfs_header_generation(buf) <=
- btrfs_root_last_snapshot(&root->root_item) ||
- btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC)))
- return 1;
-#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
- if (root->ref_cows &&
- btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
- return 1;
-#endif
- return 0;
-}
-
-static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct extent_buffer *buf,
- struct extent_buffer *cow,
- int *last_ref)
-{
- u64 refs;
- u64 owner;
- u64 flags;
- u64 new_flags = 0;
- int ret;
-
- /*
- * Backrefs update rules:
- *
- * Always use full backrefs for extent pointers in tree block
- * allocated by tree relocation.
- *
- * If a shared tree block is no longer referenced by its owner
- * tree (btrfs_header_owner(buf) == root->root_key.objectid),
- * use full backrefs for extent pointers in tree block.
- *
- * If a tree block is been relocating
- * (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID),
- * use full backrefs for extent pointers in tree block.
- * The reason for this is some operations (such as drop tree)
- * are only allowed for blocks use full backrefs.
- */
-
- if (btrfs_block_can_be_shared(root, buf)) {
- ret = btrfs_lookup_extent_info(trans, root, buf->start,
- buf->len, &refs, &flags);
- if (ret)
- return ret;
- if (refs == 0) {
- ret = -EROFS;
- btrfs_std_error(root->fs_info, ret);
- return ret;
- }
- } else {
- refs = 1;
- if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
- btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
- flags = BTRFS_BLOCK_FLAG_FULL_BACKREF;
- else
- flags = 0;
- }
-
- owner = btrfs_header_owner(buf);
- BUG_ON(owner == BTRFS_TREE_RELOC_OBJECTID &&
- !(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF));
-
- if (refs > 1) {
- if ((owner == root->root_key.objectid ||
- root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) &&
- !(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)) {
- ret = btrfs_inc_ref(trans, root, buf, 1, 1);
- BUG_ON(ret); /* -ENOMEM */
-
- if (root->root_key.objectid ==
- BTRFS_TREE_RELOC_OBJECTID) {
- ret = btrfs_dec_ref(trans, root, buf, 0, 1);
- BUG_ON(ret); /* -ENOMEM */
- ret = btrfs_inc_ref(trans, root, cow, 1, 1);
- BUG_ON(ret); /* -ENOMEM */
- }
- new_flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
- } else {
-
- if (root->root_key.objectid ==
- BTRFS_TREE_RELOC_OBJECTID)
- ret = btrfs_inc_ref(trans, root, cow, 1, 1);
- else
- ret = btrfs_inc_ref(trans, root, cow, 0, 1);
- BUG_ON(ret); /* -ENOMEM */
- }
- if (new_flags != 0) {
- ret = btrfs_set_disk_extent_flags(trans, root,
- buf->start,
- buf->len,
- new_flags, 0);
- if (ret)
- return ret;
- }
- } else {
- if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
- if (root->root_key.objectid ==
- BTRFS_TREE_RELOC_OBJECTID)
- ret = btrfs_inc_ref(trans, root, cow, 1, 1);
- else
- ret = btrfs_inc_ref(trans, root, cow, 0, 1);
- BUG_ON(ret); /* -ENOMEM */
- ret = btrfs_dec_ref(trans, root, buf, 1, 1);
- BUG_ON(ret); /* -ENOMEM */
- }
- clean_tree_block(trans, root, buf);
- *last_ref = 1;
- }
- return 0;
-}
-
-/*
- * does the dirty work in cow of a single block. The parent block (if
- * supplied) is updated to point to the new cow copy. The new buffer is marked
- * dirty and returned locked. If you modify the block it needs to be marked
- * dirty again.
- *
- * search_start -- an allocation hint for the new block
- *
- * empty_size -- a hint that you plan on doing more cow. This is the size in
- * bytes the allocator should try to find free next to the block it returns.
- * This is just a hint and may be ignored by the allocator.
- */
-static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct extent_buffer *buf,
- struct extent_buffer *parent, int parent_slot,
- struct extent_buffer **cow_ret,
- u64 search_start, u64 empty_size)
-{
- struct btrfs_disk_key disk_key;
- struct extent_buffer *cow;
- int level, ret;
- int last_ref = 0;
- int unlock_orig = 0;
- u64 parent_start;
-
- if (*cow_ret == buf)
- unlock_orig = 1;
-
- btrfs_assert_tree_locked(buf);
-
- WARN_ON(root->ref_cows && trans->transid !=
- root->fs_info->running_transaction->transid);
- WARN_ON(root->ref_cows && trans->transid != root->last_trans);
-
- level = btrfs_header_level(buf);
-
- if (level == 0)
- btrfs_item_key(buf, &disk_key, 0);
- else
- btrfs_node_key(buf, &disk_key, 0);
-
- if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
- if (parent)
- parent_start = parent->start;
- else
- parent_start = 0;
- } else
- parent_start = 0;
-
- cow = btrfs_alloc_free_block(trans, root, buf->len, parent_start,
- root->root_key.objectid, &disk_key,
- level, search_start, empty_size, 1);
- if (IS_ERR(cow))
- return PTR_ERR(cow);
-
- /* cow is set to blocking by btrfs_init_new_buffer */
-
- copy_extent_buffer(cow, buf, 0, 0, cow->len);
- btrfs_set_header_bytenr(cow, cow->start);
- btrfs_set_header_generation(cow, trans->transid);
- btrfs_set_header_backref_rev(cow, BTRFS_MIXED_BACKREF_REV);
- btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN |
- BTRFS_HEADER_FLAG_RELOC);
- if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
- btrfs_set_header_flag(cow, BTRFS_HEADER_FLAG_RELOC);
- else
- btrfs_set_header_owner(cow, root->root_key.objectid);
-
- write_extent_buffer(cow, root->fs_info->fsid,
- (unsigned long)btrfs_header_fsid(cow),
- BTRFS_FSID_SIZE);
-
- ret = update_ref_for_cow(trans, root, buf, cow, &last_ref);
- if (ret) {
- btrfs_abort_transaction(trans, root, ret);
- return ret;
- }
-
- if (root->ref_cows)
- btrfs_reloc_cow_block(trans, root, buf, cow);
-
- if (buf == root->node) {
- WARN_ON(parent && parent != buf);
- if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
- btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
- parent_start = buf->start;
- else
- parent_start = 0;
-
- extent_buffer_get(cow);
- rcu_assign_pointer(root->node, cow);
-
- btrfs_free_tree_block(trans, root, buf, parent_start,
- last_ref, 1);
- free_extent_buffer(buf);
- add_root_to_dirty_list(root);
- } else {
- if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
- parent_start = parent->start;
- else
- parent_start = 0;
-
- WARN_ON(trans->transid != btrfs_header_generation(parent));
- btrfs_set_node_blockptr(parent, parent_slot,
- cow->start);
- btrfs_set_node_ptr_generation(parent, parent_slot,
- trans->transid);
- btrfs_mark_buffer_dirty(parent);
- btrfs_free_tree_block(trans, root, buf, parent_start,
- last_ref, 1);
- }
- if (unlock_orig)
- btrfs_tree_unlock(buf);
- free_extent_buffer_stale(buf);
- btrfs_mark_buffer_dirty(cow);
- *cow_ret = cow;
- return 0;
-}
-
-static inline int should_cow_block(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct extent_buffer *buf)
-{
- /* ensure we can see the force_cow */
- smp_rmb();
-
- /*
- * We do not need to cow a block if
- * 1) this block is not created or changed in this transaction;
- * 2) this block does not belong to TREE_RELOC tree;
- * 3) the root is not forced COW.
- *
- * What is forced COW:
- * when we create snapshot during commiting the transaction,
- * after we've finished coping src root, we must COW the shared
- * block to ensure the metadata consistency.
- */
- if (btrfs_header_generation(buf) == trans->transid &&
- !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN) &&
- !(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID &&
- btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC)) &&
- !root->force_cow)
- return 0;
- return 1;
-}
-
-/*
- * cows a single block, see __btrfs_cow_block for the real work.
- * This version of it has extra checks so that a block isn't cow'd more than
- * once per transaction, as long as it hasn't been written yet
- */
-noinline int btrfs_cow_block(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, struct extent_buffer *buf,
- struct extent_buffer *parent, int parent_slot,
- struct extent_buffer **cow_ret)
-{
- u64 search_start;
- int ret;
-
- if (trans->transaction != root->fs_info->running_transaction) {
- printk(KERN_CRIT "trans %llu running %llu\n",
- (unsigned long long)trans->transid,
- (unsigned long long)
- root->fs_info->running_transaction->transid);
- WARN_ON(1);
- }
- if (trans->transid != root->fs_info->generation) {
- printk(KERN_CRIT "trans %llu running %llu\n",
- (unsigned long long)trans->transid,
- (unsigned long long)root->fs_info->generation);
- WARN_ON(1);
- }
-
- if (!should_cow_block(trans, root, buf)) {
- *cow_ret = buf;
- return 0;
- }
-
- search_start = buf->start & ~((u64)(1024 * 1024 * 1024) - 1);
-
- if (parent)
- btrfs_set_lock_blocking(parent);
- btrfs_set_lock_blocking(buf);
-
- ret = __btrfs_cow_block(trans, root, buf, parent,
- parent_slot, cow_ret, search_start, 0);
-
- trace_btrfs_cow_block(root, buf, *cow_ret);
-
- return ret;
-}
-
-/*
- * helper function for defrag to decide if two blocks pointed to by a
- * node are actually close by
- */
-static int close_blocks(u64 blocknr, u64 other, u32 blocksize)
-{
- if (blocknr < other && other - (blocknr + blocksize) < 32768)
- return 1;
- if (blocknr > other && blocknr - (other + blocksize) < 32768)
- return 1;
- return 0;
-}
-
-/*
- * compare two keys in a memcmp fashion
- */
-static int comp_keys(struct btrfs_disk_key *disk, struct btrfs_key *k2)
-{
- struct btrfs_key k1;
-
- btrfs_disk_key_to_cpu(&k1, disk);
-
- return btrfs_comp_cpu_keys(&k1, k2);
-}
-
-/*
- * same as comp_keys only with two btrfs_key's
- */
-int btrfs_comp_cpu_keys(struct btrfs_key *k1, struct btrfs_key *k2)
-{
- if (k1->objectid > k2->objectid)
- return 1;
- if (k1->objectid < k2->objectid)
- return -1;
- if (k1->type > k2->type)
- return 1;
- if (k1->type < k2->type)
- return -1;
- if (k1->offset > k2->offset)
- return 1;
- if (k1->offset < k2->offset)
- return -1;
- return 0;
-}
-
-/*
- * this is used by the defrag code to go through all the
- * leaves pointed to by a node and reallocate them so that
- * disk order is close to key order
- */
-int btrfs_realloc_node(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, struct extent_buffer *parent,
- int start_slot, int cache_only, u64 *last_ret,
- struct btrfs_key *progress)
-{
- struct extent_buffer *cur;
- u64 blocknr;
- u64 gen;
- u64 search_start = *last_ret;
- u64 last_block = 0;
- u64 other;
- u32 parent_nritems;
- int end_slot;
- int i;
- int err = 0;
- int parent_level;
- int uptodate;
- u32 blocksize;
- int progress_passed = 0;
- struct btrfs_disk_key disk_key;
-
- parent_level = btrfs_header_level(parent);
- if (cache_only && parent_level != 1)
- return 0;
-
- if (trans->transaction != root->fs_info->running_transaction)
- WARN_ON(1);
- if (trans->transid != root->fs_info->generation)
- WARN_ON(1);
-
- parent_nritems = btrfs_header_nritems(parent);
- blocksize = btrfs_level_size(root, parent_level - 1);
- end_slot = parent_nritems;
-
- if (parent_nritems == 1)
- return 0;
-
- btrfs_set_lock_blocking(parent);
-
- for (i = start_slot; i < end_slot; i++) {
- int close = 1;
-
- btrfs_node_key(parent, &disk_key, i);
- if (!progress_passed && comp_keys(&disk_key, progress) < 0)
- continue;
-
- progress_passed = 1;
- blocknr = btrfs_node_blockptr(parent, i);
- gen = btrfs_node_ptr_generation(parent, i);
- if (last_block == 0)
- last_block = blocknr;
-
- if (i > 0) {
- other = btrfs_node_blockptr(parent, i - 1);
- close = close_blocks(blocknr, other, blocksize);
- }
- if (!close && i < end_slot - 2) {
- other = btrfs_node_blockptr(parent, i + 1);
- close = close_blocks(blocknr, other, blocksize);
- }
- if (close) {
- last_block = blocknr;
- continue;
- }
-
- cur = btrfs_find_tree_block(root, blocknr, blocksize);
- if (cur)
- uptodate = btrfs_buffer_uptodate(cur, gen, 0);
- else
- uptodate = 0;
- if (!cur || !uptodate) {
- if (cache_only) {
- free_extent_buffer(cur);
- continue;
- }
- if (!cur) {
- cur = read_tree_block(root, blocknr,
- blocksize, gen);
- if (!cur)
- return -EIO;
- } else if (!uptodate) {
- btrfs_read_buffer(cur, gen);
- }
- }
- if (search_start == 0)
- search_start = last_block;
-
- btrfs_tree_lock(cur);
- btrfs_set_lock_blocking(cur);
- err = __btrfs_cow_block(trans, root, cur, parent, i,
- &cur, search_start,
- min(16 * blocksize,
- (end_slot - i) * blocksize));
- if (err) {
- btrfs_tree_unlock(cur);
- free_extent_buffer(cur);
- break;
- }
- search_start = cur->start;
- last_block = cur->start;
- *last_ret = search_start;
- btrfs_tree_unlock(cur);
- free_extent_buffer(cur);
- }
- return err;
-}
-
-/*
- * The leaf data grows from end-to-front in the node.
- * this returns the address of the start of the last item,
- * which is the stop of the leaf data stack
- */
-static inline unsigned int leaf_data_end(struct btrfs_root *root,
- struct extent_buffer *leaf)
-{
- u32 nr = btrfs_header_nritems(leaf);
- if (nr == 0)
- return BTRFS_LEAF_DATA_SIZE(root);
- return btrfs_item_offset_nr(leaf, nr - 1);
-}
-
-
-/*
- * search for key in the extent_buffer. The items start at offset p,
- * and they are item_size apart. There are 'max' items in p.
- *
- * the slot in the array is returned via slot, and it points to
- * the place where you would insert key if it is not found in
- * the array.
- *
- * slot may point to max if the key is bigger than all of the keys
- */
-static noinline int generic_bin_search(struct extent_buffer *eb,
- unsigned long p,
- int item_size, struct btrfs_key *key,
- int max, int *slot)
-{
- int low = 0;
- int high = max;
- int mid;
- int ret;
- struct btrfs_disk_key *tmp = NULL;
- struct btrfs_disk_key unaligned;
- unsigned long offset;
- char *kaddr = NULL;
- unsigned long map_start = 0;
- unsigned long map_len = 0;
- int err;
-
- while (low < high) {
- mid = (low + high) / 2;
- offset = p + mid * item_size;
-
- if (!kaddr || offset < map_start ||
- (offset + sizeof(struct btrfs_disk_key)) >
- map_start + map_len) {
-
- err = map_private_extent_buffer(eb, offset,
- sizeof(struct btrfs_disk_key),
- &kaddr, &map_start, &map_len);
-
- if (!err) {
- tmp = (struct btrfs_disk_key *)(kaddr + offset -
- map_start);
- } else {
- read_extent_buffer(eb, &unaligned,
- offset, sizeof(unaligned));
- tmp = &unaligned;
- }
-
- } else {
- tmp = (struct btrfs_disk_key *)(kaddr + offset -
- map_start);
- }
- ret = comp_keys(tmp, key);
-
- if (ret < 0)
- low = mid + 1;
- else if (ret > 0)
- high = mid;
- else {
- *slot = mid;
- return 0;
- }
- }
- *slot = low;
- return 1;
-}
-
-/*
- * simple bin_search frontend that does the right thing for
- * leaves vs nodes
- */
-static int bin_search(struct extent_buffer *eb, struct btrfs_key *key,
- int level, int *slot)
-{
- if (level == 0) {
- return generic_bin_search(eb,
- offsetof(struct btrfs_leaf, items),
- sizeof(struct btrfs_item),
- key, btrfs_header_nritems(eb),
- slot);
- } else {
- return generic_bin_search(eb,
- offsetof(struct btrfs_node, ptrs),
- sizeof(struct btrfs_key_ptr),
- key, btrfs_header_nritems(eb),
- slot);
- }
- return -1;
-}
-
-int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key,
- int level, int *slot)
-{
- return bin_search(eb, key, level, slot);
-}
-
-static void root_add_used(struct btrfs_root *root, u32 size)
-{
- spin_lock(&root->accounting_lock);
- btrfs_set_root_used(&root->root_item,
- btrfs_root_used(&root->root_item) + size);
- spin_unlock(&root->accounting_lock);
-}
-
-static void root_sub_used(struct btrfs_root *root, u32 size)
-{
- spin_lock(&root->accounting_lock);
- btrfs_set_root_used(&root->root_item,
- btrfs_root_used(&root->root_item) - size);
- spin_unlock(&root->accounting_lock);
-}
-
-/* given a node and slot number, this reads the blocks it points to. The
- * extent buffer is returned with a reference taken (but unlocked).
- * NULL is returned on error.
- */
-static noinline struct extent_buffer *read_node_slot(struct btrfs_root *root,
- struct extent_buffer *parent, int slot)
-{
- int level = btrfs_header_level(parent);
- if (slot < 0)
- return NULL;
- if (slot >= btrfs_header_nritems(parent))
- return NULL;
-
- BUG_ON(level == 0);
-
- return read_tree_block(root, btrfs_node_blockptr(parent, slot),
- btrfs_level_size(root, level - 1),
- btrfs_node_ptr_generation(parent, slot));
-}
-
-/*
- * node level balancing, used to make sure nodes are in proper order for
- * item deletion. We balance from the top down, so we have to make sure
- * that a deletion won't leave an node completely empty later on.
- */
-static noinline int balance_level(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_path *path, int level)
-{
- struct extent_buffer *right = NULL;
- struct extent_buffer *mid;
- struct extent_buffer *left = NULL;
- struct extent_buffer *parent = NULL;
- int ret = 0;
- int wret;
- int pslot;
- int orig_slot = path->slots[level];
- u64 orig_ptr;
-
- if (level == 0)
- return 0;
-
- mid = path->nodes[level];
-
- WARN_ON(path->locks[level] != BTRFS_WRITE_LOCK &&
- path->locks[level] != BTRFS_WRITE_LOCK_BLOCKING);
- WARN_ON(btrfs_header_generation(mid) != trans->transid);
-
- orig_ptr = btrfs_node_blockptr(mid, orig_slot);
-
- if (level < BTRFS_MAX_LEVEL - 1) {
- parent = path->nodes[level + 1];
- pslot = path->slots[level + 1];
- }
-
- /*
- * deal with the case where there is only one pointer in the root
- * by promoting the node below to a root
- */
- if (!parent) {
- struct extent_buffer *child;
-
- if (btrfs_header_nritems(mid) != 1)
- return 0;
-
- /* promote the child to a root */
- child = read_node_slot(root, mid, 0);
- if (!child) {
- ret = -EROFS;
- btrfs_std_error(root->fs_info, ret);
- goto enospc;
- }
-
- btrfs_tree_lock(child);
- btrfs_set_lock_blocking(child);
- ret = btrfs_cow_block(trans, root, child, mid, 0, &child);
- if (ret) {
- btrfs_tree_unlock(child);
- free_extent_buffer(child);
- goto enospc;
- }
-
- rcu_assign_pointer(root->node, child);
-
- add_root_to_dirty_list(root);
- btrfs_tree_unlock(child);
-
- path->locks[level] = 0;
- path->nodes[level] = NULL;
- clean_tree_block(trans, root, mid);
- btrfs_tree_unlock(mid);
- /* once for the path */
- free_extent_buffer(mid);
-
- root_sub_used(root, mid->len);
- btrfs_free_tree_block(trans, root, mid, 0, 1, 0);
- /* once for the root ptr */
- free_extent_buffer_stale(mid);
- return 0;
- }
- if (btrfs_header_nritems(mid) >
- BTRFS_NODEPTRS_PER_BLOCK(root) / 4)
- return 0;
-
- btrfs_header_nritems(mid);
-
- left = read_node_slot(root, parent, pslot - 1);
- if (left) {
- btrfs_tree_lock(left);
- btrfs_set_lock_blocking(left);
- wret = btrfs_cow_block(trans, root, left,
- parent, pslot - 1, &left);
- if (wret) {
- ret = wret;
- goto enospc;
- }
- }
- right = read_node_slot(root, parent, pslot + 1);
- if (right) {
- btrfs_tree_lock(right);
- btrfs_set_lock_blocking(right);
- wret = btrfs_cow_block(trans, root, right,
- parent, pslot + 1, &right);
- if (wret) {
- ret = wret;
- goto enospc;
- }
- }
-
- /* first, try to make some room in the middle buffer */
- if (left) {
- orig_slot += btrfs_header_nritems(left);
- wret = push_node_left(trans, root, left, mid, 1);
- if (wret < 0)
- ret = wret;
- btrfs_header_nritems(mid);
- }
-
- /*
- * then try to empty the right most buffer into the middle
- */
- if (right) {
- wret = push_node_left(trans, root, mid, right, 1);
- if (wret < 0 && wret != -ENOSPC)
- ret = wret;
- if (btrfs_header_nritems(right) == 0) {
- clean_tree_block(trans, root, right);
- btrfs_tree_unlock(right);
- del_ptr(trans, root, path, level + 1, pslot + 1);
- root_sub_used(root, right->len);
- btrfs_free_tree_block(trans, root, right, 0, 1, 0);
- free_extent_buffer_stale(right);
- right = NULL;
- } else {
- struct btrfs_disk_key right_key;
- btrfs_node_key(right, &right_key, 0);
- btrfs_set_node_key(parent, &right_key, pslot + 1);
- btrfs_mark_buffer_dirty(parent);
- }
- }
- if (btrfs_header_nritems(mid) == 1) {
- /*
- * we're not allowed to leave a node with one item in the
- * tree during a delete. A deletion from lower in the tree
- * could try to delete the only pointer in this node.
- * So, pull some keys from the left.
- * There has to be a left pointer at this point because
- * otherwise we would have pulled some pointers from the
- * right
- */
- if (!left) {
- ret = -EROFS;
- btrfs_std_error(root->fs_info, ret);
- goto enospc;
- }
- wret = balance_node_right(trans, root, mid, left);
- if (wret < 0) {
- ret = wret;
- goto enospc;
- }
- if (wret == 1) {
- wret = push_node_left(trans, root, left, mid, 1);
- if (wret < 0)
- ret = wret;
- }
- BUG_ON(wret == 1);
- }
- if (btrfs_header_nritems(mid) == 0) {
- clean_tree_block(trans, root, mid);
- btrfs_tree_unlock(mid);
- del_ptr(trans, root, path, level + 1, pslot);
- root_sub_used(root, mid->len);
- btrfs_free_tree_block(trans, root, mid, 0, 1, 0);
- free_extent_buffer_stale(mid);
- mid = NULL;
- } else {
- /* update the parent key to reflect our changes */
- struct btrfs_disk_key mid_key;
- btrfs_node_key(mid, &mid_key, 0);
- btrfs_set_node_key(parent, &mid_key, pslot);
- btrfs_mark_buffer_dirty(parent);
- }
-
- /* update the path */
- if (left) {
- if (btrfs_header_nritems(left) > orig_slot) {
- extent_buffer_get(left);
- /* left was locked after cow */
- path->nodes[level] = left;
- path->slots[level + 1] -= 1;
- path->slots[level] = orig_slot;
- if (mid) {
- btrfs_tree_unlock(mid);
- free_extent_buffer(mid);
- }
- } else {
- orig_slot -= btrfs_header_nritems(left);
- path->slots[level] = orig_slot;
- }
- }
- /* double check we haven't messed things up */
- if (orig_ptr !=
- btrfs_node_blockptr(path->nodes[level], path->slots[level]))
- BUG();
-enospc:
- if (right) {
- btrfs_tree_unlock(right);
- free_extent_buffer(right);
- }
- if (left) {
- if (path->nodes[level] != left)
- btrfs_tree_unlock(left);
- free_extent_buffer(left);
- }
- return ret;
-}
-
-/* Node balancing for insertion. Here we only split or push nodes around
- * when they are completely full. This is also done top down, so we
- * have to be pessimistic.
- */
-static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_path *path, int level)
-{
- struct extent_buffer *right = NULL;
- struct extent_buffer *mid;
- struct extent_buffer *left = NULL;
- struct extent_buffer *parent = NULL;
- int ret = 0;
- int wret;
- int pslot;
- int orig_slot = path->slots[level];
-
- if (level == 0)
- return 1;
-
- mid = path->nodes[level];
- WARN_ON(btrfs_header_generation(mid) != trans->transid);
-
- if (level < BTRFS_MAX_LEVEL - 1) {
- parent = path->nodes[level + 1];
- pslot = path->slots[level + 1];
- }
-
- if (!parent)
- return 1;
-
- left = read_node_slot(root, parent, pslot - 1);
-
- /* first, try to make some room in the middle buffer */
- if (left) {
- u32 left_nr;
-
- btrfs_tree_lock(left);
- btrfs_set_lock_blocking(left);
-
- left_nr = btrfs_header_nritems(left);
- if (left_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) {
- wret = 1;
- } else {
- ret = btrfs_cow_block(trans, root, left, parent,
- pslot - 1, &left);
- if (ret)
- wret = 1;
- else {
- wret = push_node_left(trans, root,
- left, mid, 0);
- }
- }
- if (wret < 0)
- ret = wret;
- if (wret == 0) {
- struct btrfs_disk_key disk_key;
- orig_slot += left_nr;
- btrfs_node_key(mid, &disk_key, 0);
- btrfs_set_node_key(parent, &disk_key, pslot);
- btrfs_mark_buffer_dirty(parent);
- if (btrfs_header_nritems(left) > orig_slot) {
- path->nodes[level] = left;
- path->slots[level + 1] -= 1;
- path->slots[level] = orig_slot;
- btrfs_tree_unlock(mid);
- free_extent_buffer(mid);
- } else {
- orig_slot -=
- btrfs_header_nritems(left);
- path->slots[level] = orig_slot;
- btrfs_tree_unlock(left);
- free_extent_buffer(left);
- }
- return 0;
- }
- btrfs_tree_unlock(left);
- free_extent_buffer(left);
- }
- right = read_node_slot(root, parent, pslot + 1);
-
- /*
- * then try to empty the right most buffer into the middle
- */
- if (right) {
- u32 right_nr;
-
- btrfs_tree_lock(right);
- btrfs_set_lock_blocking(right);
-
- right_nr = btrfs_header_nritems(right);
- if (right_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) {
- wret = 1;
- } else {
- ret = btrfs_cow_block(trans, root, right,
- parent, pslot + 1,
- &right);
- if (ret)
- wret = 1;
- else {
- wret = balance_node_right(trans, root,
- right, mid);
- }
- }
- if (wret < 0)
- ret = wret;
- if (wret == 0) {
- struct btrfs_disk_key disk_key;
-
- btrfs_node_key(right, &disk_key, 0);
- btrfs_set_node_key(parent, &disk_key, pslot + 1);
- btrfs_mark_buffer_dirty(parent);
-
- if (btrfs_header_nritems(mid) <= orig_slot) {
- path->nodes[level] = right;
- path->slots[level + 1] += 1;
- path->slots[level] = orig_slot -
- btrfs_header_nritems(mid);
- btrfs_tree_unlock(mid);
- free_extent_buffer(mid);
- } else {
- btrfs_tree_unlock(right);
- free_extent_buffer(right);
- }
- return 0;
- }
- btrfs_tree_unlock(right);
- free_extent_buffer(right);
- }
- return 1;
-}
-
-/*
- * readahead one full node of leaves, finding things that are close
- * to the block in 'slot', and triggering ra on them.
- */
-static void reada_for_search(struct btrfs_root *root,
- struct btrfs_path *path,
- int level, int slot, u64 objectid)
-{
- struct extent_buffer *node;
- struct btrfs_disk_key disk_key;
- u32 nritems;
- u64 search;
- u64 target;
- u64 nread = 0;
- u64 gen;
- int direction = path->reada;
- struct extent_buffer *eb;
- u32 nr;
- u32 blocksize;
- u32 nscan = 0;
-
- if (level != 1)
- return;
-
- if (!path->nodes[level])
- return;
-
- node = path->nodes[level];
-
- search = btrfs_node_blockptr(node, slot);
- blocksize = btrfs_level_size(root, level - 1);
- eb = btrfs_find_tree_block(root, search, blocksize);
- if (eb) {
- free_extent_buffer(eb);
- return;
- }
-
- target = search;
-
- nritems = btrfs_header_nritems(node);
- nr = slot;
-
- while (1) {
- if (direction < 0) {
- if (nr == 0)
- break;
- nr--;
- } else if (direction > 0) {
- nr++;
- if (nr >= nritems)
- break;
- }
- if (path->reada < 0 && objectid) {
- btrfs_node_key(node, &disk_key, nr);
- if (btrfs_disk_key_objectid(&disk_key) != objectid)
- break;
- }
- search = btrfs_node_blockptr(node, nr);
- if ((search <= target && target - search <= 65536) ||
- (search > target && search - target <= 65536)) {
- gen = btrfs_node_ptr_generation(node, nr);
- readahead_tree_block(root, search, blocksize, gen);
- nread += blocksize;
- }
- nscan++;
- if ((nread > 65536 || nscan > 32))
- break;
- }
-}
-
-/*
- * returns -EAGAIN if it had to drop the path, or zero if everything was in
- * cache
- */
-static noinline int reada_for_balance(struct btrfs_root *root,
- struct btrfs_path *path, int level)
-{
- int slot;
- int nritems;
- struct extent_buffer *parent;
- struct extent_buffer *eb;
- u64 gen;
- u64 block1 = 0;
- u64 block2 = 0;
- int ret = 0;
- int blocksize;
-
- parent = path->nodes[level + 1];
- if (!parent)
- return 0;
-
- nritems = btrfs_header_nritems(parent);
- slot = path->slots[level + 1];
- blocksize = btrfs_level_size(root, level);
-
- if (slot > 0) {
- block1 = btrfs_node_blockptr(parent, slot - 1);
- gen = btrfs_node_ptr_generation(parent, slot - 1);
- eb = btrfs_find_tree_block(root, block1, blocksize);
- /*
- * if we get -eagain from btrfs_buffer_uptodate, we
- * don't want to return eagain here. That will loop
- * forever
- */
- if (eb && btrfs_buffer_uptodate(eb, gen, 1) != 0)
- block1 = 0;
- free_extent_buffer(eb);
- }
- if (slot + 1 < nritems) {
- block2 = btrfs_node_blockptr(parent, slot + 1);
- gen = btrfs_node_ptr_generation(parent, slot + 1);
- eb = btrfs_find_tree_block(root, block2, blocksize);
- if (eb && btrfs_buffer_uptodate(eb, gen, 1) != 0)
- block2 = 0;
- free_extent_buffer(eb);
- }
- if (block1 || block2) {
- ret = -EAGAIN;
-
- /* release the whole path */
- btrfs_release_path(path);
-
- /* read the blocks */
- if (block1)
- readahead_tree_block(root, block1, blocksize, 0);
- if (block2)
- readahead_tree_block(root, block2, blocksize, 0);
-
- if (block1) {
- eb = read_tree_block(root, block1, blocksize, 0);
- free_extent_buffer(eb);
- }
- if (block2) {
- eb = read_tree_block(root, block2, blocksize, 0);
- free_extent_buffer(eb);
- }
- }
- return ret;
-}
-
-
-/*
- * when we walk down the tree, it is usually safe to unlock the higher layers
- * in the tree. The exceptions are when our path goes through slot 0, because
- * operations on the tree might require changing key pointers higher up in the
- * tree.
- *
- * callers might also have set path->keep_locks, which tells this code to keep
- * the lock if the path points to the last slot in the block. This is part of
- * walking through the tree, and selecting the next slot in the higher block.
- *
- * lowest_unlock sets the lowest level in the tree we're allowed to unlock. so
- * if lowest_unlock is 1, level 0 won't be unlocked
- */
-static noinline void unlock_up(struct btrfs_path *path, int level,
- int lowest_unlock, int min_write_lock_level,
- int *write_lock_level)
-{
- int i;
- int skip_level = level;
- int no_skips = 0;
- struct extent_buffer *t;
-
- for (i = level; i < BTRFS_MAX_LEVEL; i++) {
- if (!path->nodes[i])
- break;
- if (!path->locks[i])
- break;
- if (!no_skips && path->slots[i] == 0) {
- skip_level = i + 1;
- continue;
- }
- if (!no_skips && path->keep_locks) {
- u32 nritems;
- t = path->nodes[i];
- nritems = btrfs_header_nritems(t);
- if (nritems < 1 || path->slots[i] >= nritems - 1) {
- skip_level = i + 1;
- continue;
- }
- }
- if (skip_level < i && i >= lowest_unlock)
- no_skips = 1;
-
- t = path->nodes[i];
- if (i >= lowest_unlock && i > skip_level && path->locks[i]) {
- btrfs_tree_unlock_rw(t, path->locks[i]);
- path->locks[i] = 0;
- if (write_lock_level &&
- i > min_write_lock_level &&
- i <= *write_lock_level) {
- *write_lock_level = i - 1;
- }
- }
- }
-}
-
-/*
- * This releases any locks held in the path starting at level and
- * going all the way up to the root.
- *
- * btrfs_search_slot will keep the lock held on higher nodes in a few
- * corner cases, such as COW of the block at slot zero in the node. This
- * ignores those rules, and it should only be called when there are no
- * more updates to be done higher up in the tree.
- */
-noinline void btrfs_unlock_up_safe(struct btrfs_path *path, int level)
-{
- int i;
-
- if (path->keep_locks)
- return;
-
- for (i = level; i < BTRFS_MAX_LEVEL; i++) {
- if (!path->nodes[i])
- continue;
- if (!path->locks[i])
- continue;
- btrfs_tree_unlock_rw(path->nodes[i], path->locks[i]);
- path->locks[i] = 0;
- }
-}
-
-/*
- * helper function for btrfs_search_slot. The goal is to find a block
- * in cache without setting the path to blocking. If we find the block
- * we return zero and the path is unchanged.
- *
- * If we can't find the block, we set the path blocking and do some
- * reada. -EAGAIN is returned and the search must be repeated.
- */
-static int
-read_block_for_search(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, struct btrfs_path *p,
- struct extent_buffer **eb_ret, int level, int slot,
- struct btrfs_key *key)
-{
- u64 blocknr;
- u64 gen;
- u32 blocksize;
- struct extent_buffer *b = *eb_ret;
- struct extent_buffer *tmp;
- int ret;
-
- blocknr = btrfs_node_blockptr(b, slot);
- gen = btrfs_node_ptr_generation(b, slot);
- blocksize = btrfs_level_size(root, level - 1);
-
- tmp = btrfs_find_tree_block(root, blocknr, blocksize);
- if (tmp) {
- /* first we do an atomic uptodate check */
- if (btrfs_buffer_uptodate(tmp, 0, 1) > 0) {
- if (btrfs_buffer_uptodate(tmp, gen, 1) > 0) {
- /*
- * we found an up to date block without
- * sleeping, return
- * right away
- */
- *eb_ret = tmp;
- return 0;
- }
- /* the pages were up to date, but we failed
- * the generation number check. Do a full
- * read for the generation number that is correct.
- * We must do this without dropping locks so
- * we can trust our generation number
- */
- free_extent_buffer(tmp);
- btrfs_set_path_blocking(p);
-
- /* now we're allowed to do a blocking uptodate check */
- tmp = read_tree_block(root, blocknr, blocksize, gen);
- if (tmp && btrfs_buffer_uptodate(tmp, gen, 0) > 0) {
- *eb_ret = tmp;
- return 0;
- }
- free_extent_buffer(tmp);
- btrfs_release_path(p);
- return -EIO;
- }
- }
-
- /*
- * reduce lock contention at high levels
- * of the btree by dropping locks before
- * we read. Don't release the lock on the current
- * level because we need to walk this node to figure
- * out which blocks to read.
- */
- btrfs_unlock_up_safe(p, level + 1);
- btrfs_set_path_blocking(p);
-
- free_extent_buffer(tmp);
- if (p->reada)
- reada_for_search(root, p, level, slot, key->objectid);
-
- btrfs_release_path(p);
-
- ret = -EAGAIN;
- tmp = read_tree_block(root, blocknr, blocksize, 0);
- if (tmp) {
- /*
- * If the read above didn't mark this buffer up to date,
- * it will never end up being up to date. Set ret to EIO now
- * and give up so that our caller doesn't loop forever
- * on our EAGAINs.
- */
- if (!btrfs_buffer_uptodate(tmp, 0, 0))
- ret = -EIO;
- free_extent_buffer(tmp);
- }
- return ret;
-}
-
-/*
- * helper function for btrfs_search_slot. This does all of the checks
- * for node-level blocks and does any balancing required based on
- * the ins_len.
- *
- * If no extra work was required, zero is returned. If we had to
- * drop the path, -EAGAIN is returned and btrfs_search_slot must
- * start over
- */
-static int
-setup_nodes_for_search(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, struct btrfs_path *p,
- struct extent_buffer *b, int level, int ins_len,
- int *write_lock_level)
-{
- int ret;
- if ((p->search_for_split || ins_len > 0) && btrfs_header_nritems(b) >=
- BTRFS_NODEPTRS_PER_BLOCK(root) - 3) {
- int sret;
-
- if (*write_lock_level < level + 1) {
- *write_lock_level = level + 1;
- btrfs_release_path(p);
- goto again;
- }
-
- sret = reada_for_balance(root, p, level);
- if (sret)
- goto again;
-
- btrfs_set_path_blocking(p);
- sret = split_node(trans, root, p, level);
- btrfs_clear_path_blocking(p, NULL, 0);
-
- BUG_ON(sret > 0);
- if (sret) {
- ret = sret;
- goto done;
- }
- b = p->nodes[level];
- } else if (ins_len < 0 && btrfs_header_nritems(b) <
- BTRFS_NODEPTRS_PER_BLOCK(root) / 2) {
- int sret;
-
- if (*write_lock_level < level + 1) {
- *write_lock_level = level + 1;
- btrfs_release_path(p);
- goto again;
- }
-
- sret = reada_for_balance(root, p, level);
- if (sret)
- goto again;
-
- btrfs_set_path_blocking(p);
- sret = balance_level(trans, root, p, level);
- btrfs_clear_path_blocking(p, NULL, 0);
-
- if (sret) {
- ret = sret;
- goto done;
- }
- b = p->nodes[level];
- if (!b) {
- btrfs_release_path(p);
- goto again;
- }
- BUG_ON(btrfs_header_nritems(b) == 1);
- }
- return 0;
-
-again:
- ret = -EAGAIN;
-done:
- return ret;
-}
-
-/*
- * look for key in the tree. path is filled in with nodes along the way
- * if key is found, we return zero and you can find the item in the leaf
- * level of the path (level 0)
- *
- * If the key isn't found, the path points to the slot where it should
- * be inserted, and 1 is returned. If there are other errors during the
- * search a negative error number is returned.
- *
- * if ins_len > 0, nodes and leaves will be split as we walk down the
- * tree. if ins_len < 0, nodes will be merged as we walk down the tree (if
- * possible)
- */
-int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
- *root, struct btrfs_key *key, struct btrfs_path *p, int
- ins_len, int cow)
-{
- struct extent_buffer *b;
- int slot;
- int ret;
- int err;
- int level;
- int lowest_unlock = 1;
- int root_lock;
- /* everything at write_lock_level or lower must be write locked */
- int write_lock_level = 0;
- u8 lowest_level = 0;
- int min_write_lock_level;
-
- lowest_level = p->lowest_level;
- WARN_ON(lowest_level && ins_len > 0);
- WARN_ON(p->nodes[0] != NULL);
-
- if (ins_len < 0) {
- lowest_unlock = 2;
-
- /* when we are removing items, we might have to go up to level
- * two as we update tree pointers Make sure we keep write
- * for those levels as well
- */
- write_lock_level = 2;
- } else if (ins_len > 0) {
- /*
- * for inserting items, make sure we have a write lock on
- * level 1 so we can update keys
- */
- write_lock_level = 1;
- }
-
- if (!cow)
- write_lock_level = -1;
-
- if (cow && (p->keep_locks || p->lowest_level))
- write_lock_level = BTRFS_MAX_LEVEL;
-
- min_write_lock_level = write_lock_level;
-
-again:
- /*
- * we try very hard to do read locks on the root
- */
- root_lock = BTRFS_READ_LOCK;
- level = 0;
- if (p->search_commit_root) {
- /*
- * the commit roots are read only
- * so we always do read locks
- */
- b = root->commit_root;
- extent_buffer_get(b);
- level = btrfs_header_level(b);
- if (!p->skip_locking)
- btrfs_tree_read_lock(b);
- } else {
- if (p->skip_locking) {
- b = btrfs_root_node(root);
- level = btrfs_header_level(b);
- } else {
- /* we don't know the level of the root node
- * until we actually have it read locked
- */
- b = btrfs_read_lock_root_node(root);
- level = btrfs_header_level(b);
- if (level <= write_lock_level) {
- /* whoops, must trade for write lock */
- btrfs_tree_read_unlock(b);
- free_extent_buffer(b);
- b = btrfs_lock_root_node(root);
- root_lock = BTRFS_WRITE_LOCK;
-
- /* the level might have changed, check again */
- level = btrfs_header_level(b);
- }
- }
- }
- p->nodes[level] = b;
- if (!p->skip_locking)
- p->locks[level] = root_lock;
-
- while (b) {
- level = btrfs_header_level(b);
-
- /*
- * setup the path here so we can release it under lock
- * contention with the cow code
- */
- if (cow) {
- /*
- * if we don't really need to cow this block
- * then we don't want to set the path blocking,
- * so we test it here
- */
- if (!should_cow_block(trans, root, b))
- goto cow_done;
-
- btrfs_set_path_blocking(p);
-
- /*
- * must have write locks on this node and the
- * parent
- */
- if (level + 1 > write_lock_level) {
- write_lock_level = level + 1;
- btrfs_release_path(p);
- goto again;
- }
-
- err = btrfs_cow_block(trans, root, b,
- p->nodes[level + 1],
- p->slots[level + 1], &b);
- if (err) {
- ret = err;
- goto done;
- }
- }
-cow_done:
- BUG_ON(!cow && ins_len);
-
- p->nodes[level] = b;
- btrfs_clear_path_blocking(p, NULL, 0);
-
- /*
- * we have a lock on b and as long as we aren't changing
- * the tree, there is no way to for the items in b to change.
- * It is safe to drop the lock on our parent before we
- * go through the expensive btree search on b.
- *
- * If cow is true, then we might be changing slot zero,
- * which may require changing the parent. So, we can't
- * drop the lock until after we know which slot we're
- * operating on.
- */
- if (!cow)
- btrfs_unlock_up_safe(p, level + 1);
-
- ret = bin_search(b, key, level, &slot);
-
- if (level != 0) {
- int dec = 0;
- if (ret && slot > 0) {
- dec = 1;
- slot -= 1;
- }
- p->slots[level] = slot;
- err = setup_nodes_for_search(trans, root, p, b, level,
- ins_len, &write_lock_level);
- if (err == -EAGAIN)
- goto again;
- if (err) {
- ret = err;
- goto done;
- }
- b = p->nodes[level];
- slot = p->slots[level];
-
- /*
- * slot 0 is special, if we change the key
- * we have to update the parent pointer
- * which means we must have a write lock
- * on the parent
- */
- if (slot == 0 && cow &&
- write_lock_level < level + 1) {
- write_lock_level = level + 1;
- btrfs_release_path(p);
- goto again;
- }
-
- unlock_up(p, level, lowest_unlock,
- min_write_lock_level, &write_lock_level);
-
- if (level == lowest_level) {
- if (dec)
- p->slots[level]++;
- goto done;
- }
-
- err = read_block_for_search(trans, root, p,
- &b, level, slot, key);
- if (err == -EAGAIN)
- goto again;
- if (err) {
- ret = err;
- goto done;
- }
-
- if (!p->skip_locking) {
- level = btrfs_header_level(b);
- if (level <= write_lock_level) {
- err = btrfs_try_tree_write_lock(b);
- if (!err) {
- btrfs_set_path_blocking(p);
- btrfs_tree_lock(b);
- btrfs_clear_path_blocking(p, b,
- BTRFS_WRITE_LOCK);
- }
- p->locks[level] = BTRFS_WRITE_LOCK;
- } else {
- err = btrfs_try_tree_read_lock(b);
- if (!err) {
- btrfs_set_path_blocking(p);
- btrfs_tree_read_lock(b);
- btrfs_clear_path_blocking(p, b,
- BTRFS_READ_LOCK);
- }
- p->locks[level] = BTRFS_READ_LOCK;
- }
- p->nodes[level] = b;
- }
- } else {
- p->slots[level] = slot;
- if (ins_len > 0 &&
- btrfs_leaf_free_space(root, b) < ins_len) {
- if (write_lock_level < 1) {
- write_lock_level = 1;
- btrfs_release_path(p);
- goto again;
- }
-
- btrfs_set_path_blocking(p);
- err = split_leaf(trans, root, key,
- p, ins_len, ret == 0);
- btrfs_clear_path_blocking(p, NULL, 0);
-
- BUG_ON(err > 0);
- if (err) {
- ret = err;
- goto done;
- }
- }
- if (!p->search_for_split)
- unlock_up(p, level, lowest_unlock,
- min_write_lock_level, &write_lock_level);
- goto done;
- }
- }
- ret = 1;
-done:
- /*
- * we don't really know what they plan on doing with the path
- * from here on, so for now just mark it as blocking
- */
- if (!p->leave_spinning)
- btrfs_set_path_blocking(p);
- if (ret < 0)
- btrfs_release_path(p);
- return ret;
-}
-
-/*
- * adjust the pointers going up the tree, starting at level
- * making sure the right key of each node is points to 'key'.
- * This is used after shifting pointers to the left, so it stops
- * fixing up pointers when a given leaf/node is not in slot 0 of the
- * higher levels
- *
- */
-static void fixup_low_keys(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, struct btrfs_path *path,
- struct btrfs_disk_key *key, int level)
-{
- int i;
- struct extent_buffer *t;
-
- for (i = level; i < BTRFS_MAX_LEVEL; i++) {
- int tslot = path->slots[i];
- if (!path->nodes[i])
- break;
- t = path->nodes[i];
- btrfs_set_node_key(t, key, tslot);
- btrfs_mark_buffer_dirty(path->nodes[i]);
- if (tslot != 0)
- break;
- }
-}
-
-/*
- * update item key.
- *
- * This function isn't completely safe. It's the caller's responsibility
- * that the new key won't break the order
- */
-void btrfs_set_item_key_safe(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, struct btrfs_path *path,
- struct btrfs_key *new_key)
-{
- struct btrfs_disk_key disk_key;
- struct extent_buffer *eb;
- int slot;
-
- eb = path->nodes[0];
- slot = path->slots[0];
- if (slot > 0) {
- btrfs_item_key(eb, &disk_key, slot - 1);
- BUG_ON(comp_keys(&disk_key, new_key) >= 0);
- }
- if (slot < btrfs_header_nritems(eb) - 1) {
- btrfs_item_key(eb, &disk_key, slot + 1);
- BUG_ON(comp_keys(&disk_key, new_key) <= 0);
- }
-
- btrfs_cpu_key_to_disk(&disk_key, new_key);
- btrfs_set_item_key(eb, &disk_key, slot);
- btrfs_mark_buffer_dirty(eb);
- if (slot == 0)
- fixup_low_keys(trans, root, path, &disk_key, 1);
-}
-
-/*
- * try to push data from one node into the next node left in the
- * tree.
- *
- * returns 0 if some ptrs were pushed left, < 0 if there was some horrible
- * error, and > 0 if there was no room in the left hand block.
- */
-static int push_node_left(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, struct extent_buffer *dst,
- struct extent_buffer *src, int empty)
-{
- int push_items = 0;
- int src_nritems;
- int dst_nritems;
- int ret = 0;
-
- src_nritems = btrfs_header_nritems(src);
- dst_nritems = btrfs_header_nritems(dst);
- push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems;
- WARN_ON(btrfs_header_generation(src) != trans->transid);
- WARN_ON(btrfs_header_generation(dst) != trans->transid);
-
- if (!empty && src_nritems <= 8)
- return 1;
-
- if (push_items <= 0)
- return 1;
-
- if (empty) {
- push_items = min(src_nritems, push_items);
- if (push_items < src_nritems) {
- /* leave at least 8 pointers in the node if
- * we aren't going to empty it
- */
- if (src_nritems - push_items < 8) {
- if (push_items <= 8)
- return 1;
- push_items -= 8;
- }
- }
- } else
- push_items = min(src_nritems - 8, push_items);
-
- copy_extent_buffer(dst, src,
- btrfs_node_key_ptr_offset(dst_nritems),
- btrfs_node_key_ptr_offset(0),
- push_items * sizeof(struct btrfs_key_ptr));
-
- if (push_items < src_nritems) {
- memmove_extent_buffer(src, btrfs_node_key_ptr_offset(0),
- btrfs_node_key_ptr_offset(push_items),
- (src_nritems - push_items) *
- sizeof(struct btrfs_key_ptr));
- }
- btrfs_set_header_nritems(src, src_nritems - push_items);
- btrfs_set_header_nritems(dst, dst_nritems + push_items);
- btrfs_mark_buffer_dirty(src);
- btrfs_mark_buffer_dirty(dst);
-
- return ret;
-}
-
-/*
- * try to push data from one node into the next node right in the
- * tree.
- *
- * returns 0 if some ptrs were pushed, < 0 if there was some horrible
- * error, and > 0 if there was no room in the right hand block.
- *
- * this will only push up to 1/2 the contents of the left node over
- */
-static int balance_node_right(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct extent_buffer *dst,
- struct extent_buffer *src)
-{
- int push_items = 0;
- int max_push;
- int src_nritems;
- int dst_nritems;
- int ret = 0;
-
- WARN_ON(btrfs_header_generation(src) != trans->transid);
- WARN_ON(btrfs_header_generation(dst) != trans->transid);
-
- src_nritems = btrfs_header_nritems(src);
- dst_nritems = btrfs_header_nritems(dst);
- push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems;
- if (push_items <= 0)
- return 1;
-
- if (src_nritems < 4)
- return 1;
-
- max_push = src_nritems / 2 + 1;
- /* don't try to empty the node */
- if (max_push >= src_nritems)
- return 1;
-
- if (max_push < push_items)
- push_items = max_push;
-
- memmove_extent_buffer(dst, btrfs_node_key_ptr_offset(push_items),
- btrfs_node_key_ptr_offset(0),
- (dst_nritems) *
- sizeof(struct btrfs_key_ptr));
-
- copy_extent_buffer(dst, src,
- btrfs_node_key_ptr_offset(0),
- btrfs_node_key_ptr_offset(src_nritems - push_items),
- push_items * sizeof(struct btrfs_key_ptr));
-
- btrfs_set_header_nritems(src, src_nritems - push_items);
- btrfs_set_header_nritems(dst, dst_nritems + push_items);
-
- btrfs_mark_buffer_dirty(src);
- btrfs_mark_buffer_dirty(dst);
-
- return ret;
-}
-
-/*
- * helper function to insert a new root level in the tree.
- * A new node is allocated, and a single item is inserted to
- * point to the existing root
- *
- * returns zero on success or < 0 on failure.
- */
-static noinline int insert_new_root(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_path *path, int level)
-{
- u64 lower_gen;
- struct extent_buffer *lower;
- struct extent_buffer *c;
- struct extent_buffer *old;
- struct btrfs_disk_key lower_key;
-
- BUG_ON(path->nodes[level]);
- BUG_ON(path->nodes[level-1] != root->node);
-
- lower = path->nodes[level-1];
- if (level == 1)
- btrfs_item_key(lower, &lower_key, 0);
- else
- btrfs_node_key(lower, &lower_key, 0);
-
- c = btrfs_alloc_free_block(trans, root, root->nodesize, 0,
- root->root_key.objectid, &lower_key,
- level, root->node->start, 0, 0);
- if (IS_ERR(c))
- return PTR_ERR(c);
-
- root_add_used(root, root->nodesize);
-
- memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
- btrfs_set_header_nritems(c, 1);
- btrfs_set_header_level(c, level);
- btrfs_set_header_bytenr(c, c->start);
- btrfs_set_header_generation(c, trans->transid);
- btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
- btrfs_set_header_owner(c, root->root_key.objectid);
-
- write_extent_buffer(c, root->fs_info->fsid,
- (unsigned long)btrfs_header_fsid(c),
- BTRFS_FSID_SIZE);
-
- write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
- (unsigned long)btrfs_header_chunk_tree_uuid(c),
- BTRFS_UUID_SIZE);
-
- btrfs_set_node_key(c, &lower_key, 0);
- btrfs_set_node_blockptr(c, 0, lower->start);
- lower_gen = btrfs_header_generation(lower);
- WARN_ON(lower_gen != trans->transid);
-
- btrfs_set_node_ptr_generation(c, 0, lower_gen);
-
- btrfs_mark_buffer_dirty(c);
-
- old = root->node;
- rcu_assign_pointer(root->node, c);
-
- /* the super has an extra ref to root->node */
- free_extent_buffer(old);
-
- add_root_to_dirty_list(root);
- extent_buffer_get(c);
- path->nodes[level] = c;
- path->locks[level] = BTRFS_WRITE_LOCK;
- path->slots[level] = 0;
- return 0;
-}
-
-/*
- * worker function to insert a single pointer in a node.
- * the node should have enough room for the pointer already
- *
- * slot and level indicate where you want the key to go, and
- * blocknr is the block the key points to.
- */
-static void insert_ptr(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, struct btrfs_path *path,
- struct btrfs_disk_key *key, u64 bytenr,
- int slot, int level)
-{
- struct extent_buffer *lower;
- int nritems;
-
- BUG_ON(!path->nodes[level]);
- btrfs_assert_tree_locked(path->nodes[level]);
- lower = path->nodes[level];
- nritems = btrfs_header_nritems(lower);
- BUG_ON(slot > nritems);
- BUG_ON(nritems == BTRFS_NODEPTRS_PER_BLOCK(root));
- if (slot != nritems) {
- memmove_extent_buffer(lower,
- btrfs_node_key_ptr_offset(slot + 1),
- btrfs_node_key_ptr_offset(slot),
- (nritems - slot) * sizeof(struct btrfs_key_ptr));
- }
- btrfs_set_node_key(lower, key, slot);
- btrfs_set_node_blockptr(lower, slot, bytenr);
- WARN_ON(trans->transid == 0);
- btrfs_set_node_ptr_generation(lower, slot, trans->transid);
- btrfs_set_header_nritems(lower, nritems + 1);
- btrfs_mark_buffer_dirty(lower);
-}
-
-/*
- * split the node at the specified level in path in two.
- * The path is corrected to point to the appropriate node after the split
- *
- * Before splitting this tries to make some room in the node by pushing
- * left and right, if either one works, it returns right away.
- *
- * returns 0 on success and < 0 on failure
- */
-static noinline int split_node(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_path *path, int level)
-{
- struct extent_buffer *c;
- struct extent_buffer *split;
- struct btrfs_disk_key disk_key;
- int mid;
- int ret;
- u32 c_nritems;
-
- c = path->nodes[level];
- WARN_ON(btrfs_header_generation(c) != trans->transid);
- if (c == root->node) {
- /* trying to split the root, lets make a new one */
- ret = insert_new_root(trans, root, path, level + 1);
- if (ret)
- return ret;
- } else {
- ret = push_nodes_for_insert(trans, root, path, level);
- c = path->nodes[level];
- if (!ret && btrfs_header_nritems(c) <
- BTRFS_NODEPTRS_PER_BLOCK(root) - 3)
- return 0;
- if (ret < 0)
- return ret;
- }
-
- c_nritems = btrfs_header_nritems(c);
- mid = (c_nritems + 1) / 2;
- btrfs_node_key(c, &disk_key, mid);
-
- split = btrfs_alloc_free_block(trans, root, root->nodesize, 0,
- root->root_key.objectid,
- &disk_key, level, c->start, 0, 0);
- if (IS_ERR(split))
- return PTR_ERR(split);
-
- root_add_used(root, root->nodesize);
-
- memset_extent_buffer(split, 0, 0, sizeof(struct btrfs_header));
- btrfs_set_header_level(split, btrfs_header_level(c));
- btrfs_set_header_bytenr(split, split->start);
- btrfs_set_header_generation(split, trans->transid);
- btrfs_set_header_backref_rev(split, BTRFS_MIXED_BACKREF_REV);
- btrfs_set_header_owner(split, root->root_key.objectid);
- write_extent_buffer(split, root->fs_info->fsid,
- (unsigned long)btrfs_header_fsid(split),
- BTRFS_FSID_SIZE);
- write_extent_buffer(split, root->fs_info->chunk_tree_uuid,
- (unsigned long)btrfs_header_chunk_tree_uuid(split),
- BTRFS_UUID_SIZE);
-
-
- copy_extent_buffer(split, c,
- btrfs_node_key_ptr_offset(0),
- btrfs_node_key_ptr_offset(mid),
- (c_nritems - mid) * sizeof(struct btrfs_key_ptr));
- btrfs_set_header_nritems(split, c_nritems - mid);
- btrfs_set_header_nritems(c, mid);
- ret = 0;
-
- btrfs_mark_buffer_dirty(c);
- btrfs_mark_buffer_dirty(split);
-
- insert_ptr(trans, root, path, &disk_key, split->start,
- path->slots[level + 1] + 1, level + 1);
-
- if (path->slots[level] >= mid) {
- path->slots[level] -= mid;
- btrfs_tree_unlock(c);
- free_extent_buffer(c);
- path->nodes[level] = split;
- path->slots[level + 1] += 1;
- } else {
- btrfs_tree_unlock(split);
- free_extent_buffer(split);
- }
- return ret;
-}
-
-/*
- * how many bytes are required to store the items in a leaf. start
- * and nr indicate which items in the leaf to check. This totals up the
- * space used both by the item structs and the item data
- */
-static int leaf_space_used(struct extent_buffer *l, int start, int nr)
-{
- int data_len;
- int nritems = btrfs_header_nritems(l);
- int end = min(nritems, start + nr) - 1;
-
- if (!nr)
- return 0;
- data_len = btrfs_item_end_nr(l, start);
- data_len = data_len - btrfs_item_offset_nr(l, end);
- data_len += sizeof(struct btrfs_item) * nr;
- WARN_ON(data_len < 0);
- return data_len;
-}
-
-/*
- * The space between the end of the leaf items and
- * the start of the leaf data. IOW, how much room
- * the leaf has left for both items and data
- */
-noinline int btrfs_leaf_free_space(struct btrfs_root *root,
- struct extent_buffer *leaf)
-{
- int nritems = btrfs_header_nritems(leaf);
- int ret;
- ret = BTRFS_LEAF_DATA_SIZE(root) - leaf_space_used(leaf, 0, nritems);
- if (ret < 0) {
- printk(KERN_CRIT "leaf free space ret %d, leaf data size %lu, "
- "used %d nritems %d\n",
- ret, (unsigned long) BTRFS_LEAF_DATA_SIZE(root),
- leaf_space_used(leaf, 0, nritems), nritems);
- }
- return ret;
-}
-
-/*
- * min slot controls the lowest index we're willing to push to the
- * right. We'll push up to and including min_slot, but no lower
- */
-static noinline int __push_leaf_right(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_path *path,
- int data_size, int empty,
- struct extent_buffer *right,
- int free_space, u32 left_nritems,
- u32 min_slot)
-{
- struct extent_buffer *left = path->nodes[0];
- struct extent_buffer *upper = path->nodes[1];
- struct btrfs_map_token token;
- struct btrfs_disk_key disk_key;
- int slot;
- u32 i;
- int push_space = 0;
- int push_items = 0;
- struct btrfs_item *item;
- u32 nr;
- u32 right_nritems;
- u32 data_end;
- u32 this_item_size;
-
- btrfs_init_map_token(&token);
-
- if (empty)
- nr = 0;
- else
- nr = max_t(u32, 1, min_slot);
-
- if (path->slots[0] >= left_nritems)
- push_space += data_size;
-
- slot = path->slots[1];
- i = left_nritems - 1;
- while (i >= nr) {
- item = btrfs_item_nr(left, i);
-
- if (!empty && push_items > 0) {
- if (path->slots[0] > i)
- break;
- if (path->slots[0] == i) {
- int space = btrfs_leaf_free_space(root, left);
- if (space + push_space * 2 > free_space)
- break;
- }
- }
-
- if (path->slots[0] == i)
- push_space += data_size;
-
- this_item_size = btrfs_item_size(left, item);
- if (this_item_size + sizeof(*item) + push_space > free_space)
- break;
-
- push_items++;
- push_space += this_item_size + sizeof(*item);
- if (i == 0)
- break;
- i--;
- }
-
- if (push_items == 0)
- goto out_unlock;
-
- if (!empty && push_items == left_nritems)
- WARN_ON(1);
-
- /* push left to right */
- right_nritems = btrfs_header_nritems(right);
-
- push_space = btrfs_item_end_nr(left, left_nritems - push_items);
- push_space -= leaf_data_end(root, left);
-
- /* make room in the right data area */
- data_end = leaf_data_end(root, right);
- memmove_extent_buffer(right,
- btrfs_leaf_data(right) + data_end - push_space,
- btrfs_leaf_data(right) + data_end,
- BTRFS_LEAF_DATA_SIZE(root) - data_end);
-
- /* copy from the left data area */
- copy_extent_buffer(right, left, btrfs_leaf_data(right) +
- BTRFS_LEAF_DATA_SIZE(root) - push_space,
- btrfs_leaf_data(left) + leaf_data_end(root, left),
- push_space);
-
- memmove_extent_buffer(right, btrfs_item_nr_offset(push_items),
- btrfs_item_nr_offset(0),
- right_nritems * sizeof(struct btrfs_item));
-
- /* copy the items from left to right */
- copy_extent_buffer(right, left, btrfs_item_nr_offset(0),
- btrfs_item_nr_offset(left_nritems - push_items),
- push_items * sizeof(struct btrfs_item));
-
- /* update the item pointers */
- right_nritems += push_items;
- btrfs_set_header_nritems(right, right_nritems);
- push_space = BTRFS_LEAF_DATA_SIZE(root);
- for (i = 0; i < right_nritems; i++) {
- item = btrfs_item_nr(right, i);
- push_space -= btrfs_token_item_size(right, item, &token);
- btrfs_set_token_item_offset(right, item, push_space, &token);
- }
-
- left_nritems -= push_items;
- btrfs_set_header_nritems(left, left_nritems);
-
- if (left_nritems)
- btrfs_mark_buffer_dirty(left);
- else
- clean_tree_block(trans, root, left);
-
- btrfs_mark_buffer_dirty(right);
-
- btrfs_item_key(right, &disk_key, 0);
- btrfs_set_node_key(upper, &disk_key, slot + 1);
- btrfs_mark_buffer_dirty(upper);
-
- /* then fixup the leaf pointer in the path */
- if (path->slots[0] >= left_nritems) {
- path->slots[0] -= left_nritems;
- if (btrfs_header_nritems(path->nodes[0]) == 0)
- clean_tree_block(trans, root, path->nodes[0]);
- btrfs_tree_unlock(path->nodes[0]);
- free_extent_buffer(path->nodes[0]);
- path->nodes[0] = right;
- path->slots[1] += 1;
- } else {
- btrfs_tree_unlock(right);
- free_extent_buffer(right);
- }
- return 0;
-
-out_unlock:
- btrfs_tree_unlock(right);
- free_extent_buffer(right);
- return 1;
-}
-
-/*
- * push some data in the path leaf to the right, trying to free up at
- * least data_size bytes. returns zero if the push worked, nonzero otherwise
- *
- * returns 1 if the push failed because the other node didn't have enough
- * room, 0 if everything worked out and < 0 if there were major errors.
- *
- * this will push starting from min_slot to the end of the leaf. It won't
- * push any slot lower than min_slot
- */
-static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
- *root, struct btrfs_path *path,
- int min_data_size, int data_size,
- int empty, u32 min_slot)
-{
- struct extent_buffer *left = path->nodes[0];
- struct extent_buffer *right;
- struct extent_buffer *upper;
- int slot;
- int free_space;
- u32 left_nritems;
- int ret;
-
- if (!path->nodes[1])
- return 1;
-
- slot = path->slots[1];
- upper = path->nodes[1];
- if (slot >= btrfs_header_nritems(upper) - 1)
- return 1;
-
- btrfs_assert_tree_locked(path->nodes[1]);
-
- right = read_node_slot(root, upper, slot + 1);
- if (right == NULL)
- return 1;
-
- btrfs_tree_lock(right);
- btrfs_set_lock_blocking(right);
-
- free_space = btrfs_leaf_free_space(root, right);
- if (free_space < data_size)
- goto out_unlock;
-
- /* cow and double check */
- ret = btrfs_cow_block(trans, root, right, upper,
- slot + 1, &right);
- if (ret)
- goto out_unlock;
-
- free_space = btrfs_leaf_free_space(root, right);
- if (free_space < data_size)
- goto out_unlock;
-
- left_nritems = btrfs_header_nritems(left);
- if (left_nritems == 0)
- goto out_unlock;
-
- return __push_leaf_right(trans, root, path, min_data_size, empty,
- right, free_space, left_nritems, min_slot);
-out_unlock:
- btrfs_tree_unlock(right);
- free_extent_buffer(right);
- return 1;
-}
-
-/*
- * push some data in the path leaf to the left, trying to free up at
- * least data_size bytes. returns zero if the push worked, nonzero otherwise
- *
- * max_slot can put a limit on how far into the leaf we'll push items. The
- * item at 'max_slot' won't be touched. Use (u32)-1 to make us do all the
- * items
- */
-static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_path *path, int data_size,
- int empty, struct extent_buffer *left,
- int free_space, u32 right_nritems,
- u32 max_slot)
-{
- struct btrfs_disk_key disk_key;
- struct extent_buffer *right = path->nodes[0];
- int i;
- int push_space = 0;
- int push_items = 0;
- struct btrfs_item *item;
- u32 old_left_nritems;
- u32 nr;
- int ret = 0;
- u32 this_item_size;
- u32 old_left_item_size;
- struct btrfs_map_token token;
-
- btrfs_init_map_token(&token);
-
- if (empty)
- nr = min(right_nritems, max_slot);
- else
- nr = min(right_nritems - 1, max_slot);
-
- for (i = 0; i < nr; i++) {
- item = btrfs_item_nr(right, i);
-
- if (!empty && push_items > 0) {
- if (path->slots[0] < i)
- break;
- if (path->slots[0] == i) {
- int space = btrfs_leaf_free_space(root, right);
- if (space + push_space * 2 > free_space)
- break;
- }
- }
-
- if (path->slots[0] == i)
- push_space += data_size;
-
- this_item_size = btrfs_item_size(right, item);
- if (this_item_size + sizeof(*item) + push_space > free_space)
- break;
-
- push_items++;
- push_space += this_item_size + sizeof(*item);
- }
-
- if (push_items == 0) {
- ret = 1;
- goto out;
- }
- if (!empty && push_items == btrfs_header_nritems(right))
- WARN_ON(1);
-
- /* push data from right to left */
- copy_extent_buffer(left, right,
- btrfs_item_nr_offset(btrfs_header_nritems(left)),
- btrfs_item_nr_offset(0),
- push_items * sizeof(struct btrfs_item));
-
- push_space = BTRFS_LEAF_DATA_SIZE(root) -
- btrfs_item_offset_nr(right, push_items - 1);
-
- copy_extent_buffer(left, right, btrfs_leaf_data(left) +
- leaf_data_end(root, left) - push_space,
- btrfs_leaf_data(right) +
- btrfs_item_offset_nr(right, push_items - 1),
- push_space);
- old_left_nritems = btrfs_header_nritems(left);
- BUG_ON(old_left_nritems <= 0);
-
- old_left_item_size = btrfs_item_offset_nr(left, old_left_nritems - 1);
- for (i = old_left_nritems; i < old_left_nritems + push_items; i++) {
- u32 ioff;
-
- item = btrfs_item_nr(left, i);
-
- ioff = btrfs_token_item_offset(left, item, &token);
- btrfs_set_token_item_offset(left, item,
- ioff - (BTRFS_LEAF_DATA_SIZE(root) - old_left_item_size),
- &token);
- }
- btrfs_set_header_nritems(left, old_left_nritems + push_items);
-
- /* fixup right node */
- if (push_items > right_nritems) {
- printk(KERN_CRIT "push items %d nr %u\n", push_items,
- right_nritems);
- WARN_ON(1);
- }
-
- if (push_items < right_nritems) {
- push_space = btrfs_item_offset_nr(right, push_items - 1) -
- leaf_data_end(root, right);
- memmove_extent_buffer(right, btrfs_leaf_data(right) +
- BTRFS_LEAF_DATA_SIZE(root) - push_space,
- btrfs_leaf_data(right) +
- leaf_data_end(root, right), push_space);
-
- memmove_extent_buffer(right, btrfs_item_nr_offset(0),
- btrfs_item_nr_offset(push_items),
- (btrfs_header_nritems(right) - push_items) *
- sizeof(struct btrfs_item));
- }
- right_nritems -= push_items;
- btrfs_set_header_nritems(right, right_nritems);
- push_space = BTRFS_LEAF_DATA_SIZE(root);
- for (i = 0; i < right_nritems; i++) {
- item = btrfs_item_nr(right, i);
-
- push_space = push_space - btrfs_token_item_size(right,
- item, &token);
- btrfs_set_token_item_offset(right, item, push_space, &token);
- }
-
- btrfs_mark_buffer_dirty(left);
- if (right_nritems)
- btrfs_mark_buffer_dirty(right);
- else
- clean_tree_block(trans, root, right);
-
- btrfs_item_key(right, &disk_key, 0);
- fixup_low_keys(trans, root, path, &disk_key, 1);
-
- /* then fixup the leaf pointer in the path */
- if (path->slots[0] < push_items) {
- path->slots[0] += old_left_nritems;
- btrfs_tree_unlock(path->nodes[0]);
- free_extent_buffer(path->nodes[0]);
- path->nodes[0] = left;
- path->slots[1] -= 1;
- } else {
- btrfs_tree_unlock(left);
- free_extent_buffer(left);
- path->slots[0] -= push_items;
- }
- BUG_ON(path->slots[0] < 0);
- return ret;
-out:
- btrfs_tree_unlock(left);
- free_extent_buffer(left);
- return ret;
-}
-
-/*
- * push some data in the path leaf to the left, trying to free up at
- * least data_size bytes. returns zero if the push worked, nonzero otherwise
- *
- * max_slot can put a limit on how far into the leaf we'll push items. The
- * item at 'max_slot' won't be touched. Use (u32)-1 to make us push all the
- * items
- */
-static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
- *root, struct btrfs_path *path, int min_data_size,
- int data_size, int empty, u32 max_slot)
-{
- struct extent_buffer *right = path->nodes[0];
- struct extent_buffer *left;
- int slot;
- int free_space;
- u32 right_nritems;
- int ret = 0;
-
- slot = path->slots[1];
- if (slot == 0)
- return 1;
- if (!path->nodes[1])
- return 1;
-
- right_nritems = btrfs_header_nritems(right);
- if (right_nritems == 0)
- return 1;
-
- btrfs_assert_tree_locked(path->nodes[1]);
-
- left = read_node_slot(root, path->nodes[1], slot - 1);
- if (left == NULL)
- return 1;
-
- btrfs_tree_lock(left);
- btrfs_set_lock_blocking(left);
-
- free_space = btrfs_leaf_free_space(root, left);
- if (free_space < data_size) {
- ret = 1;
- goto out;
- }
-
- /* cow and double check */
- ret = btrfs_cow_block(trans, root, left,
- path->nodes[1], slot - 1, &left);
- if (ret) {
- /* we hit -ENOSPC, but it isn't fatal here */
- if (ret == -ENOSPC)
- ret = 1;
- goto out;
- }
-
- free_space = btrfs_leaf_free_space(root, left);
- if (free_space < data_size) {
- ret = 1;
- goto out;
- }
-
- return __push_leaf_left(trans, root, path, min_data_size,
- empty, left, free_space, right_nritems,
- max_slot);
-out:
- btrfs_tree_unlock(left);
- free_extent_buffer(left);
- return ret;
-}
-
-/*
- * split the path's leaf in two, making sure there is at least data_size
- * available for the resulting leaf level of the path.
- */
-static noinline void copy_for_split(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_path *path,
- struct extent_buffer *l,
- struct extent_buffer *right,
- int slot, int mid, int nritems)
-{
- int data_copy_size;
- int rt_data_off;
- int i;
- struct btrfs_disk_key disk_key;
- struct btrfs_map_token token;
-
- btrfs_init_map_token(&token);
-
- nritems = nritems - mid;
- btrfs_set_header_nritems(right, nritems);
- data_copy_size = btrfs_item_end_nr(l, mid) - leaf_data_end(root, l);
-
- copy_extent_buffer(right, l, btrfs_item_nr_offset(0),
- btrfs_item_nr_offset(mid),
- nritems * sizeof(struct btrfs_item));
-
- copy_extent_buffer(right, l,
- btrfs_leaf_data(right) + BTRFS_LEAF_DATA_SIZE(root) -
- data_copy_size, btrfs_leaf_data(l) +
- leaf_data_end(root, l), data_copy_size);
-
- rt_data_off = BTRFS_LEAF_DATA_SIZE(root) -
- btrfs_item_end_nr(l, mid);
-
- for (i = 0; i < nritems; i++) {
- struct btrfs_item *item = btrfs_item_nr(right, i);
- u32 ioff;
-
- ioff = btrfs_token_item_offset(right, item, &token);
- btrfs_set_token_item_offset(right, item,
- ioff + rt_data_off, &token);
- }
-
- btrfs_set_header_nritems(l, mid);
- btrfs_item_key(right, &disk_key, 0);
- insert_ptr(trans, root, path, &disk_key, right->start,
- path->slots[1] + 1, 1);
-
- btrfs_mark_buffer_dirty(right);
- btrfs_mark_buffer_dirty(l);
- BUG_ON(path->slots[0] != slot);
-
- if (mid <= slot) {
- btrfs_tree_unlock(path->nodes[0]);
- free_extent_buffer(path->nodes[0]);
- path->nodes[0] = right;
- path->slots[0] -= mid;
- path->slots[1] += 1;
- } else {
- btrfs_tree_unlock(right);
- free_extent_buffer(right);
- }
-
- BUG_ON(path->slots[0] < 0);
-}
-
-/*
- * double splits happen when we need to insert a big item in the middle
- * of a leaf. A double split can leave us with 3 mostly empty leaves:
- * leaf: [ slots 0 - N] [ our target ] [ N + 1 - total in leaf ]
- * A B C
- *
- * We avoid this by trying to push the items on either side of our target
- * into the adjacent leaves. If all goes well we can avoid the double split
- * completely.
- */
-static noinline int push_for_double_split(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_path *path,
- int data_size)
-{
- int ret;
- int progress = 0;
- int slot;
- u32 nritems;
-
- slot = path->slots[0];
-
- /*
- * try to push all the items after our slot into the
- * right leaf
- */
- ret = push_leaf_right(trans, root, path, 1, data_size, 0, slot);
- if (ret < 0)
- return ret;
-
- if (ret == 0)
- progress++;
-
- nritems = btrfs_header_nritems(path->nodes[0]);
- /*
- * our goal is to get our slot at the start or end of a leaf. If
- * we've done so we're done
- */
- if (path->slots[0] == 0 || path->slots[0] == nritems)
- return 0;
-
- if (btrfs_leaf_free_space(root, path->nodes[0]) >= data_size)
- return 0;
-
- /* try to push all the items before our slot into the next leaf */
- slot = path->slots[0];
- ret = push_leaf_left(trans, root, path, 1, data_size, 0, slot);
- if (ret < 0)
- return ret;
-
- if (ret == 0)
- progress++;
-
- if (progress)
- return 0;
- return 1;
-}
-
-/*
- * split the path's leaf in two, making sure there is at least data_size
- * available for the resulting leaf level of the path.
- *
- * returns 0 if all went well and < 0 on failure.
- */
-static noinline int split_leaf(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_key *ins_key,
- struct btrfs_path *path, int data_size,
- int extend)
-{
- struct btrfs_disk_key disk_key;
- struct extent_buffer *l;
- u32 nritems;
- int mid;
- int slot;
- struct extent_buffer *right;
- int ret = 0;
- int wret;
- int split;
- int num_doubles = 0;
- int tried_avoid_double = 0;
-
- l = path->nodes[0];
- slot = path->slots[0];
- if (extend && data_size + btrfs_item_size_nr(l, slot) +
- sizeof(struct btrfs_item) > BTRFS_LEAF_DATA_SIZE(root))
- return -EOVERFLOW;
-
- /* first try to make some room by pushing left and right */
- if (data_size) {
- wret = push_leaf_right(trans, root, path, data_size,
- data_size, 0, 0);
- if (wret < 0)
- return wret;
- if (wret) {
- wret = push_leaf_left(trans, root, path, data_size,
- data_size, 0, (u32)-1);
- if (wret < 0)
- return wret;
- }
- l = path->nodes[0];
-
- /* did the pushes work? */
- if (btrfs_leaf_free_space(root, l) >= data_size)
- return 0;
- }
-
- if (!path->nodes[1]) {
- ret = insert_new_root(trans, root, path, 1);
- if (ret)
- return ret;
- }
-again:
- split = 1;
- l = path->nodes[0];
- slot = path->slots[0];
- nritems = btrfs_header_nritems(l);
- mid = (nritems + 1) / 2;
-
- if (mid <= slot) {
- if (nritems == 1 ||
- leaf_space_used(l, mid, nritems - mid) + data_size >
- BTRFS_LEAF_DATA_SIZE(root)) {
- if (slot >= nritems) {
- split = 0;
- } else {
- mid = slot;
- if (mid != nritems &&
- leaf_space_used(l, mid, nritems - mid) +
- data_size > BTRFS_LEAF_DATA_SIZE(root)) {
- if (data_size && !tried_avoid_double)
- goto push_for_double;
- split = 2;
- }
- }
- }
- } else {
- if (leaf_space_used(l, 0, mid) + data_size >
- BTRFS_LEAF_DATA_SIZE(root)) {
- if (!extend && data_size && slot == 0) {
- split = 0;
- } else if ((extend || !data_size) && slot == 0) {
- mid = 1;
- } else {
- mid = slot;
- if (mid != nritems &&
- leaf_space_used(l, mid, nritems - mid) +
- data_size > BTRFS_LEAF_DATA_SIZE(root)) {
- if (data_size && !tried_avoid_double)
- goto push_for_double;
- split = 2 ;
- }
- }
- }
- }
-
- if (split == 0)
- btrfs_cpu_key_to_disk(&disk_key, ins_key);
- else
- btrfs_item_key(l, &disk_key, mid);
-
- right = btrfs_alloc_free_block(trans, root, root->leafsize, 0,
- root->root_key.objectid,
- &disk_key, 0, l->start, 0, 0);
- if (IS_ERR(right))
- return PTR_ERR(right);
-
- root_add_used(root, root->leafsize);
-
- memset_extent_buffer(right, 0, 0, sizeof(struct btrfs_header));
- btrfs_set_header_bytenr(right, right->start);
- btrfs_set_header_generation(right, trans->transid);
- btrfs_set_header_backref_rev(right, BTRFS_MIXED_BACKREF_REV);
- btrfs_set_header_owner(right, root->root_key.objectid);
- btrfs_set_header_level(right, 0);
- write_extent_buffer(right, root->fs_info->fsid,
- (unsigned long)btrfs_header_fsid(right),
- BTRFS_FSID_SIZE);
-
- write_extent_buffer(right, root->fs_info->chunk_tree_uuid,
- (unsigned long)btrfs_header_chunk_tree_uuid(right),
- BTRFS_UUID_SIZE);
-
- if (split == 0) {
- if (mid <= slot) {
- btrfs_set_header_nritems(right, 0);
- insert_ptr(trans, root, path, &disk_key, right->start,
- path->slots[1] + 1, 1);
- btrfs_tree_unlock(path->nodes[0]);
- free_extent_buffer(path->nodes[0]);
- path->nodes[0] = right;
- path->slots[0] = 0;
- path->slots[1] += 1;
- } else {
- btrfs_set_header_nritems(right, 0);
- insert_ptr(trans, root, path, &disk_key, right->start,
- path->slots[1], 1);
- btrfs_tree_unlock(path->nodes[0]);
- free_extent_buffer(path->nodes[0]);
- path->nodes[0] = right;
- path->slots[0] = 0;
- if (path->slots[1] == 0)
- fixup_low_keys(trans, root, path,
- &disk_key, 1);
- }
- btrfs_mark_buffer_dirty(right);
- return ret;
- }
-
- copy_for_split(trans, root, path, l, right, slot, mid, nritems);
-
- if (split == 2) {
- BUG_ON(num_doubles != 0);
- num_doubles++;
- goto again;
- }
-
- return 0;
-
-push_for_double:
- push_for_double_split(trans, root, path, data_size);
- tried_avoid_double = 1;
- if (btrfs_leaf_free_space(root, path->nodes[0]) >= data_size)
- return 0;
- goto again;
-}
-
-static noinline int setup_leaf_for_split(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_path *path, int ins_len)
-{
- struct btrfs_key key;
- struct extent_buffer *leaf;
- struct btrfs_file_extent_item *fi;
- u64 extent_len = 0;
- u32 item_size;
- int ret;
-
- leaf = path->nodes[0];
- btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
-
- BUG_ON(key.type != BTRFS_EXTENT_DATA_KEY &&
- key.type != BTRFS_EXTENT_CSUM_KEY);
-
- if (btrfs_leaf_free_space(root, leaf) >= ins_len)
- return 0;
-
- item_size = btrfs_item_size_nr(leaf, path->slots[0]);
- if (key.type == BTRFS_EXTENT_DATA_KEY) {
- fi = btrfs_item_ptr(leaf, path->slots[0],
- struct btrfs_file_extent_item);
- extent_len = btrfs_file_extent_num_bytes(leaf, fi);
- }
- btrfs_release_path(path);
-
- path->keep_locks = 1;
- path->search_for_split = 1;
- ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
- path->search_for_split = 0;
- if (ret < 0)
- goto err;
-
- ret = -EAGAIN;
- leaf = path->nodes[0];
- /* if our item isn't there or got smaller, return now */
- if (ret > 0 || item_size != btrfs_item_size_nr(leaf, path->slots[0]))
- goto err;
-
- /* the leaf has changed, it now has room. return now */
- if (btrfs_leaf_free_space(root, path->nodes[0]) >= ins_len)
- goto err;
-
- if (key.type == BTRFS_EXTENT_DATA_KEY) {
- fi = btrfs_item_ptr(leaf, path->slots[0],
- struct btrfs_file_extent_item);
- if (extent_len != btrfs_file_extent_num_bytes(leaf, fi))
- goto err;
- }
-
- btrfs_set_path_blocking(path);
- ret = split_leaf(trans, root, &key, path, ins_len, 1);
- if (ret)
- goto err;
-
- path->keep_locks = 0;
- btrfs_unlock_up_safe(path, 1);
- return 0;
-err:
- path->keep_locks = 0;
- return ret;
-}
-
-static noinline int split_item(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_path *path,
- struct btrfs_key *new_key,
- unsigned long split_offset)
-{
- struct extent_buffer *leaf;
- struct btrfs_item *item;
- struct btrfs_item *new_item;
- int slot;
- char *buf;
- u32 nritems;
- u32 item_size;
- u32 orig_offset;
- struct btrfs_disk_key disk_key;
-
- leaf = path->nodes[0];
- BUG_ON(btrfs_leaf_free_space(root, leaf) < sizeof(struct btrfs_item));
-
- btrfs_set_path_blocking(path);
-
- item = btrfs_item_nr(leaf, path->slots[0]);
- orig_offset = btrfs_item_offset(leaf, item);
- item_size = btrfs_item_size(leaf, item);
-
- buf = kmalloc(item_size, GFP_NOFS);
- if (!buf)
- return -ENOMEM;
-
- read_extent_buffer(leaf, buf, btrfs_item_ptr_offset(leaf,
- path->slots[0]), item_size);
-
- slot = path->slots[0] + 1;
- nritems = btrfs_header_nritems(leaf);
- if (slot != nritems) {
- /* shift the items */
- memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + 1),
- btrfs_item_nr_offset(slot),
- (nritems - slot) * sizeof(struct btrfs_item));
- }
-
- btrfs_cpu_key_to_disk(&disk_key, new_key);
- btrfs_set_item_key(leaf, &disk_key, slot);
-
- new_item = btrfs_item_nr(leaf, slot);
-
- btrfs_set_item_offset(leaf, new_item, orig_offset);
- btrfs_set_item_size(leaf, new_item, item_size - split_offset);
-
- btrfs_set_item_offset(leaf, item,
- orig_offset + item_size - split_offset);
- btrfs_set_item_size(leaf, item, split_offset);
-
- btrfs_set_header_nritems(leaf, nritems + 1);
-
- /* write the data for the start of the original item */
- write_extent_buffer(leaf, buf,
- btrfs_item_ptr_offset(leaf, path->slots[0]),
- split_offset);
-
- /* write the data for the new item */
- write_extent_buffer(leaf, buf + split_offset,
- btrfs_item_ptr_offset(leaf, slot),
- item_size - split_offset);
- btrfs_mark_buffer_dirty(leaf);
-
- BUG_ON(btrfs_leaf_free_space(root, leaf) < 0);
- kfree(buf);
- return 0;
-}
-
-/*
- * This function splits a single item into two items,
- * giving 'new_key' to the new item and splitting the
- * old one at split_offset (from the start of the item).
- *
- * The path may be released by this operation. After
- * the split, the path is pointing to the old item. The
- * new item is going to be in the same node as the old one.
- *
- * Note, the item being split must be smaller enough to live alone on
- * a tree block with room for one extra struct btrfs_item
- *
- * This allows us to split the item in place, keeping a lock on the
- * leaf the entire time.
- */
-int btrfs_split_item(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_path *path,
- struct btrfs_key *new_key,
- unsigned long split_offset)
-{
- int ret;
- ret = setup_leaf_for_split(trans, root, path,
- sizeof(struct btrfs_item));
- if (ret)
- return ret;
-
- ret = split_item(trans, root, path, new_key, split_offset);
- return ret;
-}
-
-/*
- * This function duplicate a item, giving 'new_key' to the new item.
- * It guarantees both items live in the same tree leaf and the new item
- * is contiguous with the original item.
- *
- * This allows us to split file extent in place, keeping a lock on the
- * leaf the entire time.
- */
-int btrfs_duplicate_item(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_path *path,
- struct btrfs_key *new_key)
-{
- struct extent_buffer *leaf;
- int ret;
- u32 item_size;
-
- leaf = path->nodes[0];
- item_size = btrfs_item_size_nr(leaf, path->slots[0]);
- ret = setup_leaf_for_split(trans, root, path,
- item_size + sizeof(struct btrfs_item));
- if (ret)
- return ret;
-
- path->slots[0]++;
- setup_items_for_insert(trans, root, path, new_key, &item_size,
- item_size, item_size +
- sizeof(struct btrfs_item), 1);
- leaf = path->nodes[0];
- memcpy_extent_buffer(leaf,
- btrfs_item_ptr_offset(leaf, path->slots[0]),
- btrfs_item_ptr_offset(leaf, path->slots[0] - 1),
- item_size);
- return 0;
-}
-
-/*
- * make the item pointed to by the path smaller. new_size indicates
- * how small to make it, and from_end tells us if we just chop bytes
- * off the end of the item or if we shift the item to chop bytes off
- * the front.
- */
-void btrfs_truncate_item(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_path *path,
- u32 new_size, int from_end)
-{
- int slot;
- struct extent_buffer *leaf;
- struct btrfs_item *item;
- u32 nritems;
- unsigned int data_end;
- unsigned int old_data_start;
- unsigned int old_size;
- unsigned int size_diff;
- int i;
- struct btrfs_map_token token;
-
- btrfs_init_map_token(&token);
-
- leaf = path->nodes[0];
- slot = path->slots[0];
-
- old_size = btrfs_item_size_nr(leaf, slot);
- if (old_size == new_size)
- return;
-
- nritems = btrfs_header_nritems(leaf);
- data_end = leaf_data_end(root, leaf);
-
- old_data_start = btrfs_item_offset_nr(leaf, slot);
-
- size_diff = old_size - new_size;
-
- BUG_ON(slot < 0);
- BUG_ON(slot >= nritems);
-
- /*
- * item0..itemN ... dataN.offset..dataN.size .. data0.size
- */
- /* first correct the data pointers */
- for (i = slot; i < nritems; i++) {
- u32 ioff;
- item = btrfs_item_nr(leaf, i);
-
- ioff = btrfs_token_item_offset(leaf, item, &token);
- btrfs_set_token_item_offset(leaf, item,
- ioff + size_diff, &token);
- }
-
- /* shift the data */
- if (from_end) {
- memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
- data_end + size_diff, btrfs_leaf_data(leaf) +
- data_end, old_data_start + new_size - data_end);
- } else {
- struct btrfs_disk_key disk_key;
- u64 offset;
-
- btrfs_item_key(leaf, &disk_key, slot);
-
- if (btrfs_disk_key_type(&disk_key) == BTRFS_EXTENT_DATA_KEY) {
- unsigned long ptr;
- struct btrfs_file_extent_item *fi;
-
- fi = btrfs_item_ptr(leaf, slot,
- struct btrfs_file_extent_item);
- fi = (struct btrfs_file_extent_item *)(
- (unsigned long)fi - size_diff);
-
- if (btrfs_file_extent_type(leaf, fi) ==
- BTRFS_FILE_EXTENT_INLINE) {
- ptr = btrfs_item_ptr_offset(leaf, slot);
- memmove_extent_buffer(leaf, ptr,
- (unsigned long)fi,
- offsetof(struct btrfs_file_extent_item,
- disk_bytenr));
- }
- }
-
- memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
- data_end + size_diff, btrfs_leaf_data(leaf) +
- data_end, old_data_start - data_end);
-
- offset = btrfs_disk_key_offset(&disk_key);
- btrfs_set_disk_key_offset(&disk_key, offset + size_diff);
- btrfs_set_item_key(leaf, &disk_key, slot);
- if (slot == 0)
- fixup_low_keys(trans, root, path, &disk_key, 1);
- }
-
- item = btrfs_item_nr(leaf, slot);
- btrfs_set_item_size(leaf, item, new_size);
- btrfs_mark_buffer_dirty(leaf);
-
- if (btrfs_leaf_free_space(root, leaf) < 0) {
- btrfs_print_leaf(root, leaf);
- BUG();
- }
-}
-
-/*
- * make the item pointed to by the path bigger, data_size is the new size.
- */
-void btrfs_extend_item(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, struct btrfs_path *path,
- u32 data_size)
-{
- int slot;
- struct extent_buffer *leaf;
- struct btrfs_item *item;
- u32 nritems;
- unsigned int data_end;
- unsigned int old_data;
- unsigned int old_size;
- int i;
- struct btrfs_map_token token;
-
- btrfs_init_map_token(&token);
-
- leaf = path->nodes[0];
-
- nritems = btrfs_header_nritems(leaf);
- data_end = leaf_data_end(root, leaf);
-
- if (btrfs_leaf_free_space(root, leaf) < data_size) {
- btrfs_print_leaf(root, leaf);
- BUG();
- }
- slot = path->slots[0];
- old_data = btrfs_item_end_nr(leaf, slot);
-
- BUG_ON(slot < 0);
- if (slot >= nritems) {
- btrfs_print_leaf(root, leaf);
- printk(KERN_CRIT "slot %d too large, nritems %d\n",
- slot, nritems);
- BUG_ON(1);
- }
-
- /*
- * item0..itemN ... dataN.offset..dataN.size .. data0.size
- */
- /* first correct the data pointers */
- for (i = slot; i < nritems; i++) {
- u32 ioff;
- item = btrfs_item_nr(leaf, i);
-
- ioff = btrfs_token_item_offset(leaf, item, &token);
- btrfs_set_token_item_offset(leaf, item,
- ioff - data_size, &token);
- }
-
- /* shift the data */
- memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
- data_end - data_size, btrfs_leaf_data(leaf) +
- data_end, old_data - data_end);
-
- data_end = old_data;
- old_size = btrfs_item_size_nr(leaf, slot);
- item = btrfs_item_nr(leaf, slot);
- btrfs_set_item_size(leaf, item, old_size + data_size);
- btrfs_mark_buffer_dirty(leaf);
-
- if (btrfs_leaf_free_space(root, leaf) < 0) {
- btrfs_print_leaf(root, leaf);
- BUG();
- }
-}
-
-/*
- * Given a key and some data, insert items into the tree.
- * This does all the path init required, making room in the tree if needed.
- * Returns the number of keys that were inserted.
- */
-int btrfs_insert_some_items(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_path *path,
- struct btrfs_key *cpu_key, u32 *data_size,
- int nr)
-{
- struct extent_buffer *leaf;
- struct btrfs_item *item;
- int ret = 0;
- int slot;
- int i;
- u32 nritems;
- u32 total_data = 0;
- u32 total_size = 0;
- unsigned int data_end;
- struct btrfs_disk_key disk_key;
- struct btrfs_key found_key;
- struct btrfs_map_token token;
-
- btrfs_init_map_token(&token);
-
- for (i = 0; i < nr; i++) {
- if (total_size + data_size[i] + sizeof(struct btrfs_item) >
- BTRFS_LEAF_DATA_SIZE(root)) {
- break;
- nr = i;
- }
- total_data += data_size[i];
- total_size += data_size[i] + sizeof(struct btrfs_item);
- }
- BUG_ON(nr == 0);
-
- ret = btrfs_search_slot(trans, root, cpu_key, path, total_size, 1);
- if (ret == 0)
- return -EEXIST;
- if (ret < 0)
- goto out;
-
- leaf = path->nodes[0];
-
- nritems = btrfs_header_nritems(leaf);
- data_end = leaf_data_end(root, leaf);
-
- if (btrfs_leaf_free_space(root, leaf) < total_size) {
- for (i = nr; i >= 0; i--) {
- total_data -= data_size[i];
- total_size -= data_size[i] + sizeof(struct btrfs_item);
- if (total_size < btrfs_leaf_free_space(root, leaf))
- break;
- }
- nr = i;
- }
-
- slot = path->slots[0];
- BUG_ON(slot < 0);
-
- if (slot != nritems) {
- unsigned int old_data = btrfs_item_end_nr(leaf, slot);
-
- item = btrfs_item_nr(leaf, slot);
- btrfs_item_key_to_cpu(leaf, &found_key, slot);
-
- /* figure out how many keys we can insert in here */
- total_data = data_size[0];
- for (i = 1; i < nr; i++) {
- if (btrfs_comp_cpu_keys(&found_key, cpu_key + i) <= 0)
- break;
- total_data += data_size[i];
- }
- nr = i;
-
- if (old_data < data_end) {
- btrfs_print_leaf(root, leaf);
- printk(KERN_CRIT "slot %d old_data %d data_end %d\n",
- slot, old_data, data_end);
- BUG_ON(1);
- }
- /*
- * item0..itemN ... dataN.offset..dataN.size .. data0.size
- */
- /* first correct the data pointers */
- for (i = slot; i < nritems; i++) {
- u32 ioff;
-
- item = btrfs_item_nr(leaf, i);
- ioff = btrfs_token_item_offset(leaf, item, &token);
- btrfs_set_token_item_offset(leaf, item,
- ioff - total_data, &token);
- }
- /* shift the items */
- memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + nr),
- btrfs_item_nr_offset(slot),
- (nritems - slot) * sizeof(struct btrfs_item));
-
- /* shift the data */
- memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
- data_end - total_data, btrfs_leaf_data(leaf) +
- data_end, old_data - data_end);
- data_end = old_data;
- } else {
- /*
- * this sucks but it has to be done, if we are inserting at
- * the end of the leaf only insert 1 of the items, since we
- * have no way of knowing whats on the next leaf and we'd have
- * to drop our current locks to figure it out
- */
- nr = 1;
- }
-
- /* setup the item for the new data */
- for (i = 0; i < nr; i++) {
- btrfs_cpu_key_to_disk(&disk_key, cpu_key + i);
- btrfs_set_item_key(leaf, &disk_key, slot + i);
- item = btrfs_item_nr(leaf, slot + i);
- btrfs_set_token_item_offset(leaf, item,
- data_end - data_size[i], &token);
- data_end -= data_size[i];
- btrfs_set_token_item_size(leaf, item, data_size[i], &token);
- }
- btrfs_set_header_nritems(leaf, nritems + nr);
- btrfs_mark_buffer_dirty(leaf);
-
- ret = 0;
- if (slot == 0) {
- btrfs_cpu_key_to_disk(&disk_key, cpu_key);
- fixup_low_keys(trans, root, path, &disk_key, 1);
- }
-
- if (btrfs_leaf_free_space(root, leaf) < 0) {
- btrfs_print_leaf(root, leaf);
- BUG();
- }
-out:
- if (!ret)
- ret = nr;
- return ret;
-}
-
-/*
- * this is a helper for btrfs_insert_empty_items, the main goal here is
- * to save stack depth by doing the bulk of the work in a function
- * that doesn't call btrfs_search_slot
- */
-void setup_items_for_insert(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, struct btrfs_path *path,
- struct btrfs_key *cpu_key, u32 *data_size,
- u32 total_data, u32 total_size, int nr)
-{
- struct btrfs_item *item;
- int i;
- u32 nritems;
- unsigned int data_end;
- struct btrfs_disk_key disk_key;
- struct extent_buffer *leaf;
- int slot;
- struct btrfs_map_token token;
-
- btrfs_init_map_token(&token);
-
- leaf = path->nodes[0];
- slot = path->slots[0];
-
- nritems = btrfs_header_nritems(leaf);
- data_end = leaf_data_end(root, leaf);
-
- if (btrfs_leaf_free_space(root, leaf) < total_size) {
- btrfs_print_leaf(root, leaf);
- printk(KERN_CRIT "not enough freespace need %u have %d\n",
- total_size, btrfs_leaf_free_space(root, leaf));
- BUG();
- }
-
- if (slot != nritems) {
- unsigned int old_data = btrfs_item_end_nr(leaf, slot);
-
- if (old_data < data_end) {
- btrfs_print_leaf(root, leaf);
- printk(KERN_CRIT "slot %d old_data %d data_end %d\n",
- slot, old_data, data_end);
- BUG_ON(1);
- }
- /*
- * item0..itemN ... dataN.offset..dataN.size .. data0.size
- */
- /* first correct the data pointers */
- for (i = slot; i < nritems; i++) {
- u32 ioff;
-
- item = btrfs_item_nr(leaf, i);
- ioff = btrfs_token_item_offset(leaf, item, &token);
- btrfs_set_token_item_offset(leaf, item,
- ioff - total_data, &token);
- }
- /* shift the items */
- memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + nr),
- btrfs_item_nr_offset(slot),
- (nritems - slot) * sizeof(struct btrfs_item));
-
- /* shift the data */
- memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
- data_end - total_data, btrfs_leaf_data(leaf) +
- data_end, old_data - data_end);
- data_end = old_data;
- }
-
- /* setup the item for the new data */
- for (i = 0; i < nr; i++) {
- btrfs_cpu_key_to_disk(&disk_key, cpu_key + i);
- btrfs_set_item_key(leaf, &disk_key, slot + i);
- item = btrfs_item_nr(leaf, slot + i);
- btrfs_set_token_item_offset(leaf, item,
- data_end - data_size[i], &token);
- data_end -= data_size[i];
- btrfs_set_token_item_size(leaf, item, data_size[i], &token);
- }
-
- btrfs_set_header_nritems(leaf, nritems + nr);
-
- if (slot == 0) {
- btrfs_cpu_key_to_disk(&disk_key, cpu_key);
- fixup_low_keys(trans, root, path, &disk_key, 1);
- }
- btrfs_unlock_up_safe(path, 1);
- btrfs_mark_buffer_dirty(leaf);
-
- if (btrfs_leaf_free_space(root, leaf) < 0) {
- btrfs_print_leaf(root, leaf);
- BUG();
- }
-}
-
-/*
- * Given a key and some data, insert items into the tree.
- * This does all the path init required, making room in the tree if needed.
- */
-int btrfs_insert_empty_items(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_path *path,
- struct btrfs_key *cpu_key, u32 *data_size,
- int nr)
-{
- int ret = 0;
- int slot;
- int i;
- u32 total_size = 0;
- u32 total_data = 0;
-
- for (i = 0; i < nr; i++)
- total_data += data_size[i];
-
- total_size = total_data + (nr * sizeof(struct btrfs_item));
- ret = btrfs_search_slot(trans, root, cpu_key, path, total_size, 1);
- if (ret == 0)
- return -EEXIST;
- if (ret < 0)
- return ret;
-
- slot = path->slots[0];
- BUG_ON(slot < 0);
-
- setup_items_for_insert(trans, root, path, cpu_key, data_size,
- total_data, total_size, nr);
- return 0;
-}
-
-/*
- * Given a key and some data, insert an item into the tree.
- * This does all the path init required, making room in the tree if needed.
- */
-int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root
- *root, struct btrfs_key *cpu_key, void *data, u32
- data_size)
-{
- int ret = 0;
- struct btrfs_path *path;
- struct extent_buffer *leaf;
- unsigned long ptr;
-
- path = btrfs_alloc_path();
- if (!path)
- return -ENOMEM;
- ret = btrfs_insert_empty_item(trans, root, path, cpu_key, data_size);
- if (!ret) {
- leaf = path->nodes[0];
- ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
- write_extent_buffer(leaf, data, ptr, data_size);
- btrfs_mark_buffer_dirty(leaf);
- }
- btrfs_free_path(path);
- return ret;
-}
-
-/*
- * delete the pointer from a given node.
- *
- * the tree should have been previously balanced so the deletion does not
- * empty a node.
- */
-static void del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
- struct btrfs_path *path, int level, int slot)
-{
- struct extent_buffer *parent = path->nodes[level];
- u32 nritems;
-
- nritems = btrfs_header_nritems(parent);
- if (slot != nritems - 1) {
- memmove_extent_buffer(parent,
- btrfs_node_key_ptr_offset(slot),
- btrfs_node_key_ptr_offset(slot + 1),
- sizeof(struct btrfs_key_ptr) *
- (nritems - slot - 1));
- }
- nritems--;
- btrfs_set_header_nritems(parent, nritems);
- if (nritems == 0 && parent == root->node) {
- BUG_ON(btrfs_header_level(root->node) != 1);
- /* just turn the root into a leaf and break */
- btrfs_set_header_level(root->node, 0);
- } else if (slot == 0) {
- struct btrfs_disk_key disk_key;
-
- btrfs_node_key(parent, &disk_key, 0);
- fixup_low_keys(trans, root, path, &disk_key, level + 1);
- }
- btrfs_mark_buffer_dirty(parent);
-}
-
-/*
- * a helper function to delete the leaf pointed to by path->slots[1] and
- * path->nodes[1].
- *
- * This deletes the pointer in path->nodes[1] and frees the leaf
- * block extent. zero is returned if it all worked out, < 0 otherwise.
- *
- * The path must have already been setup for deleting the leaf, including
- * all the proper balancing. path->nodes[1] must be locked.
- */
-static noinline void btrfs_del_leaf(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_path *path,
- struct extent_buffer *leaf)
-{
- WARN_ON(btrfs_header_generation(leaf) != trans->transid);
- del_ptr(trans, root, path, 1, path->slots[1]);
-
- /*
- * btrfs_free_extent is expensive, we want to make sure we
- * aren't holding any locks when we call it
- */
- btrfs_unlock_up_safe(path, 0);
-
- root_sub_used(root, leaf->len);
-
- extent_buffer_get(leaf);
- btrfs_free_tree_block(trans, root, leaf, 0, 1, 0);
- free_extent_buffer_stale(leaf);
-}
-/*
- * delete the item at the leaf level in path. If that empties
- * the leaf, remove it from the tree
- */
-int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
- struct btrfs_path *path, int slot, int nr)
-{
- struct extent_buffer *leaf;
- struct btrfs_item *item;
- int last_off;
- int dsize = 0;
- int ret = 0;
- int wret;
- int i;
- u32 nritems;
- struct btrfs_map_token token;
-
- btrfs_init_map_token(&token);
-
- leaf = path->nodes[0];
- last_off = btrfs_item_offset_nr(leaf, slot + nr - 1);
-
- for (i = 0; i < nr; i++)
- dsize += btrfs_item_size_nr(leaf, slot + i);
-
- nritems = btrfs_header_nritems(leaf);
-
- if (slot + nr != nritems) {
- int data_end = leaf_data_end(root, leaf);
-
- memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
- data_end + dsize,
- btrfs_leaf_data(leaf) + data_end,
- last_off - data_end);
-
- for (i = slot + nr; i < nritems; i++) {
- u32 ioff;
-
- item = btrfs_item_nr(leaf, i);
- ioff = btrfs_token_item_offset(leaf, item, &token);
- btrfs_set_token_item_offset(leaf, item,
- ioff + dsize, &token);
- }
-
- memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot),
- btrfs_item_nr_offset(slot + nr),
- sizeof(struct btrfs_item) *
- (nritems - slot - nr));
- }
- btrfs_set_header_nritems(leaf, nritems - nr);
- nritems -= nr;
-
- /* delete the leaf if we've emptied it */
- if (nritems == 0) {
- if (leaf == root->node) {
- btrfs_set_header_level(leaf, 0);
- } else {
- btrfs_set_path_blocking(path);
- clean_tree_block(trans, root, leaf);
- btrfs_del_leaf(trans, root, path, leaf);
- }
- } else {
- int used = leaf_space_used(leaf, 0, nritems);
- if (slot == 0) {
- struct btrfs_disk_key disk_key;
-
- btrfs_item_key(leaf, &disk_key, 0);
- fixup_low_keys(trans, root, path, &disk_key, 1);
- }
-
- /* delete the leaf if it is mostly empty */
- if (used < BTRFS_LEAF_DATA_SIZE(root) / 3) {
- /* push_leaf_left fixes the path.
- * make sure the path still points to our leaf
- * for possible call to del_ptr below
- */
- slot = path->slots[1];
- extent_buffer_get(leaf);
-
- btrfs_set_path_blocking(path);
- wret = push_leaf_left(trans, root, path, 1, 1,
- 1, (u32)-1);
- if (wret < 0 && wret != -ENOSPC)
- ret = wret;
-
- if (path->nodes[0] == leaf &&
- btrfs_header_nritems(leaf)) {
- wret = push_leaf_right(trans, root, path, 1,
- 1, 1, 0);
- if (wret < 0 && wret != -ENOSPC)
- ret = wret;
- }
-
- if (btrfs_header_nritems(leaf) == 0) {
- path->slots[1] = slot;
- btrfs_del_leaf(trans, root, path, leaf);
- free_extent_buffer(leaf);
- ret = 0;
- } else {
- /* if we're still in the path, make sure
- * we're dirty. Otherwise, one of the
- * push_leaf functions must have already
- * dirtied this buffer
- */
- if (path->nodes[0] == leaf)
- btrfs_mark_buffer_dirty(leaf);
- free_extent_buffer(leaf);
- }
- } else {
- btrfs_mark_buffer_dirty(leaf);
- }
- }
- return ret;
-}
-
-/*
- * search the tree again to find a leaf with lesser keys
- * returns 0 if it found something or 1 if there are no lesser leaves.
- * returns < 0 on io errors.
- *
- * This may release the path, and so you may lose any locks held at the
- * time you call it.
- */
-int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path)
-{
- struct btrfs_key key;
- struct btrfs_disk_key found_key;
- int ret;
-
- btrfs_item_key_to_cpu(path->nodes[0], &key, 0);
-
- if (key.offset > 0)
- key.offset--;
- else if (key.type > 0)
- key.type--;
- else if (key.objectid > 0)
- key.objectid--;
- else
- return 1;
-
- btrfs_release_path(path);
- ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
- if (ret < 0)
- return ret;
- btrfs_item_key(path->nodes[0], &found_key, 0);
- ret = comp_keys(&found_key, &key);
- if (ret < 0)
- return 0;
- return 1;
-}
-
-/*
- * A helper function to walk down the tree starting at min_key, and looking
- * for nodes or leaves that are either in cache or have a minimum
- * transaction id. This is used by the btree defrag code, and tree logging
- *
- * This does not cow, but it does stuff the starting key it finds back
- * into min_key, so you can call btrfs_search_slot with cow=1 on the
- * key and get a writable path.
- *
- * This does lock as it descends, and path->keep_locks should be set
- * to 1 by the caller.
- *
- * This honors path->lowest_level to prevent descent past a given level
- * of the tree.
- *
- * min_trans indicates the oldest transaction that you are interested
- * in walking through. Any nodes or leaves older than min_trans are
- * skipped over (without reading them).
- *
- * returns zero if something useful was found, < 0 on error and 1 if there
- * was nothing in the tree that matched the search criteria.
- */
-int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key,
- struct btrfs_key *max_key,
- struct btrfs_path *path, int cache_only,
- u64 min_trans)
-{
- struct extent_buffer *cur;
- struct btrfs_key found_key;
- int slot;
- int sret;
- u32 nritems;
- int level;
- int ret = 1;
-
- WARN_ON(!path->keep_locks);
-again:
- cur = btrfs_read_lock_root_node(root);
- level = btrfs_header_level(cur);
- WARN_ON(path->nodes[level]);
- path->nodes[level] = cur;
- path->locks[level] = BTRFS_READ_LOCK;
-
- if (btrfs_header_generation(cur) < min_trans) {
- ret = 1;
- goto out;
- }
- while (1) {
- nritems = btrfs_header_nritems(cur);
- level = btrfs_header_level(cur);
- sret = bin_search(cur, min_key, level, &slot);
-
- /* at the lowest level, we're done, setup the path and exit */
- if (level == path->lowest_level) {
- if (slot >= nritems)
- goto find_next_key;
- ret = 0;
- path->slots[level] = slot;
- btrfs_item_key_to_cpu(cur, &found_key, slot);
- goto out;
- }
- if (sret && slot > 0)
- slot--;
- /*
- * check this node pointer against the cache_only and
- * min_trans parameters. If it isn't in cache or is too
- * old, skip to the next one.
- */
- while (slot < nritems) {
- u64 blockptr;
- u64 gen;
- struct extent_buffer *tmp;
- struct btrfs_disk_key disk_key;
-
- blockptr = btrfs_node_blockptr(cur, slot);
- gen = btrfs_node_ptr_generation(cur, slot);
- if (gen < min_trans) {
- slot++;
- continue;
- }
- if (!cache_only)
- break;
-
- if (max_key) {
- btrfs_node_key(cur, &disk_key, slot);
- if (comp_keys(&disk_key, max_key) >= 0) {
- ret = 1;
- goto out;
- }
- }
-
- tmp = btrfs_find_tree_block(root, blockptr,
- btrfs_level_size(root, level - 1));
-
- if (tmp && btrfs_buffer_uptodate(tmp, gen, 1) > 0) {
- free_extent_buffer(tmp);
- break;
- }
- if (tmp)
- free_extent_buffer(tmp);
- slot++;
- }
-find_next_key:
- /*
- * we didn't find a candidate key in this node, walk forward
- * and find another one
- */
- if (slot >= nritems) {
- path->slots[level] = slot;
- btrfs_set_path_blocking(path);
- sret = btrfs_find_next_key(root, path, min_key, level,
- cache_only, min_trans);
- if (sret == 0) {
- btrfs_release_path(path);
- goto again;
- } else {
- goto out;
- }
- }
- /* save our key for returning back */
- btrfs_node_key_to_cpu(cur, &found_key, slot);
- path->slots[level] = slot;
- if (level == path->lowest_level) {
- ret = 0;
- unlock_up(path, level, 1, 0, NULL);
- goto out;
- }
- btrfs_set_path_blocking(path);
- cur = read_node_slot(root, cur, slot);
- BUG_ON(!cur); /* -ENOMEM */
-
- btrfs_tree_read_lock(cur);
-
- path->locks[level - 1] = BTRFS_READ_LOCK;
- path->nodes[level - 1] = cur;
- unlock_up(path, level, 1, 0, NULL);
- btrfs_clear_path_blocking(path, NULL, 0);
- }
-out:
- if (ret == 0)
- memcpy(min_key, &found_key, sizeof(found_key));
- btrfs_set_path_blocking(path);
- return ret;
-}
-
-/*
- * this is similar to btrfs_next_leaf, but does not try to preserve
- * and fixup the path. It looks for and returns the next key in the
- * tree based on the current path and the cache_only and min_trans
- * parameters.
- *
- * 0 is returned if another key is found, < 0 if there are any errors
- * and 1 is returned if there are no higher keys in the tree
- *
- * path->keep_locks should be set to 1 on the search made before
- * calling this function.
- */
-int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path,
- struct btrfs_key *key, int level,
- int cache_only, u64 min_trans)
-{
- int slot;
- struct extent_buffer *c;
-
- WARN_ON(!path->keep_locks);
- while (level < BTRFS_MAX_LEVEL) {
- if (!path->nodes[level])
- return 1;
-
- slot = path->slots[level] + 1;
- c = path->nodes[level];
-next:
- if (slot >= btrfs_header_nritems(c)) {
- int ret;
- int orig_lowest;
- struct btrfs_key cur_key;
- if (level + 1 >= BTRFS_MAX_LEVEL ||
- !path->nodes[level + 1])
- return 1;
-
- if (path->locks[level + 1]) {
- level++;
- continue;
- }
-
- slot = btrfs_header_nritems(c) - 1;
- if (level == 0)
- btrfs_item_key_to_cpu(c, &cur_key, slot);
- else
- btrfs_node_key_to_cpu(c, &cur_key, slot);
-
- orig_lowest = path->lowest_level;
- btrfs_release_path(path);
- path->lowest_level = level;
- ret = btrfs_search_slot(NULL, root, &cur_key, path,
- 0, 0);
- path->lowest_level = orig_lowest;
- if (ret < 0)
- return ret;
-
- c = path->nodes[level];
- slot = path->slots[level];
- if (ret == 0)
- slot++;
- goto next;
- }
-
- if (level == 0)
- btrfs_item_key_to_cpu(c, key, slot);
- else {
- u64 blockptr = btrfs_node_blockptr(c, slot);
- u64 gen = btrfs_node_ptr_generation(c, slot);
-
- if (cache_only) {
- struct extent_buffer *cur;
- cur = btrfs_find_tree_block(root, blockptr,
- btrfs_level_size(root, level - 1));
- if (!cur ||
- btrfs_buffer_uptodate(cur, gen, 1) <= 0) {
- slot++;
- if (cur)
- free_extent_buffer(cur);
- goto next;
- }
- free_extent_buffer(cur);
- }
- if (gen < min_trans) {
- slot++;
- goto next;
- }
- btrfs_node_key_to_cpu(c, key, slot);
- }
- return 0;
- }
- return 1;
-}
-
-/*
- * search the tree again to find a leaf with greater keys
- * returns 0 if it found something or 1 if there are no greater leaves.
- * returns < 0 on io errors.
- */
-int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
-{
- int slot;
- int level;
- struct extent_buffer *c;
- struct extent_buffer *next;
- struct btrfs_key key;
- u32 nritems;
- int ret;
- int old_spinning = path->leave_spinning;
- int next_rw_lock = 0;
-
- nritems = btrfs_header_nritems(path->nodes[0]);
- if (nritems == 0)
- return 1;
-
- btrfs_item_key_to_cpu(path->nodes[0], &key, nritems - 1);
-again:
- level = 1;
- next = NULL;
- next_rw_lock = 0;
- btrfs_release_path(path);
-
- path->keep_locks = 1;
- path->leave_spinning = 1;
-
- ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
- path->keep_locks = 0;
-
- if (ret < 0)
- return ret;
-
- nritems = btrfs_header_nritems(path->nodes[0]);
- /*
- * by releasing the path above we dropped all our locks. A balance
- * could have added more items next to the key that used to be
- * at the very end of the block. So, check again here and
- * advance the path if there are now more items available.
- */
- if (nritems > 0 && path->slots[0] < nritems - 1) {
- if (ret == 0)
- path->slots[0]++;
- ret = 0;
- goto done;
- }
-
- while (level < BTRFS_MAX_LEVEL) {
- if (!path->nodes[level]) {
- ret = 1;
- goto done;
- }
-
- slot = path->slots[level] + 1;
- c = path->nodes[level];
- if (slot >= btrfs_header_nritems(c)) {
- level++;
- if (level == BTRFS_MAX_LEVEL) {
- ret = 1;
- goto done;
- }
- continue;
- }
-
- if (next) {
- btrfs_tree_unlock_rw(next, next_rw_lock);
- free_extent_buffer(next);
- }
-
- next = c;
- next_rw_lock = path->locks[level];
- ret = read_block_for_search(NULL, root, path, &next, level,
- slot, &key);
- if (ret == -EAGAIN)
- goto again;
-
- if (ret < 0) {
- btrfs_release_path(path);
- goto done;
- }
-
- if (!path->skip_locking) {
- ret = btrfs_try_tree_read_lock(next);
- if (!ret) {
- btrfs_set_path_blocking(path);
- btrfs_tree_read_lock(next);
- btrfs_clear_path_blocking(path, next,
- BTRFS_READ_LOCK);
- }
- next_rw_lock = BTRFS_READ_LOCK;
- }
- break;
- }
- path->slots[level] = slot;
- while (1) {
- level--;
- c = path->nodes[level];
- if (path->locks[level])
- btrfs_tree_unlock_rw(c, path->locks[level]);
-
- free_extent_buffer(c);
- path->nodes[level] = next;
- path->slots[level] = 0;
- if (!path->skip_locking)
- path->locks[level] = next_rw_lock;
- if (!level)
- break;
-
- ret = read_block_for_search(NULL, root, path, &next, level,
- 0, &key);
- if (ret == -EAGAIN)
- goto again;
-
- if (ret < 0) {
- btrfs_release_path(path);
- goto done;
- }
-
- if (!path->skip_locking) {
- ret = btrfs_try_tree_read_lock(next);
- if (!ret) {
- btrfs_set_path_blocking(path);
- btrfs_tree_read_lock(next);
- btrfs_clear_path_blocking(path, next,
- BTRFS_READ_LOCK);
- }
- next_rw_lock = BTRFS_READ_LOCK;
- }
- }
- ret = 0;
-done:
- unlock_up(path, 0, 1, 0, NULL);
- path->leave_spinning = old_spinning;
- if (!old_spinning)
- btrfs_set_path_blocking(path);
-
- return ret;
-}
-
-/*
- * this uses btrfs_prev_leaf to walk backwards in the tree, and keeps
- * searching until it gets past min_objectid or finds an item of 'type'
- *
- * returns 0 if something is found, 1 if nothing was found and < 0 on error
- */
-int btrfs_previous_item(struct btrfs_root *root,
- struct btrfs_path *path, u64 min_objectid,
- int type)
-{
- struct btrfs_key found_key;
- struct extent_buffer *leaf;
- u32 nritems;
- int ret;
-
- while (1) {
- if (path->slots[0] == 0) {
- btrfs_set_path_blocking(path);
- ret = btrfs_prev_leaf(root, path);
- if (ret != 0)
- return ret;
- } else {
- path->slots[0]--;
- }
- leaf = path->nodes[0];
- nritems = btrfs_header_nritems(leaf);
- if (nritems == 0)
- return 1;
- if (path->slots[0] == nritems)
- path->slots[0]--;
-
- btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
- if (found_key.objectid < min_objectid)
- break;
- if (found_key.type == type)
- return 0;
- if (found_key.objectid == min_objectid &&
- found_key.type < type)
- break;
- }
- return 1;
-}
diff --git a/ANDROID_3.4.5/fs/btrfs/ctree.h b/ANDROID_3.4.5/fs/btrfs/ctree.h
deleted file mode 100644
index 8fd72331..00000000
--- a/ANDROID_3.4.5/fs/btrfs/ctree.h
+++ /dev/null
@@ -1,3101 +0,0 @@
-/*
- * Copyright (C) 2007 Oracle. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License v2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
- */
-
-#ifndef __BTRFS_CTREE__
-#define __BTRFS_CTREE__
-
-#include <linux/mm.h>
-#include <linux/highmem.h>
-#include <linux/fs.h>
-#include <linux/rwsem.h>
-#include <linux/completion.h>
-#include <linux/backing-dev.h>
-#include <linux/wait.h>
-#include <linux/slab.h>
-#include <linux/kobject.h>
-#include <trace/events/btrfs.h>
-#include <asm/kmap_types.h>
-#include <linux/pagemap.h>
-#include "extent_io.h"
-#include "extent_map.h"
-#include "async-thread.h"
-#include "ioctl.h"
-
-struct btrfs_trans_handle;
-struct btrfs_transaction;
-struct btrfs_pending_snapshot;
-extern struct kmem_cache *btrfs_trans_handle_cachep;
-extern struct kmem_cache *btrfs_transaction_cachep;
-extern struct kmem_cache *btrfs_bit_radix_cachep;
-extern struct kmem_cache *btrfs_path_cachep;
-extern struct kmem_cache *btrfs_free_space_cachep;
-struct btrfs_ordered_sum;
-
-#define BTRFS_MAGIC "_BHRfS_M"
-
-#define BTRFS_MAX_MIRRORS 2
-
-#define BTRFS_MAX_LEVEL 8
-
-#define BTRFS_COMPAT_EXTENT_TREE_V0
-
-/*
- * files bigger than this get some pre-flushing when they are added
- * to the ordered operations list. That way we limit the total
- * work done by the commit
- */
-#define BTRFS_ORDERED_OPERATIONS_FLUSH_LIMIT (8 * 1024 * 1024)
-
-/* holds pointers to all of the tree roots */
-#define BTRFS_ROOT_TREE_OBJECTID 1ULL
-
-/* stores information about which extents are in use, and reference counts */
-#define BTRFS_EXTENT_TREE_OBJECTID 2ULL
-
-/*
- * chunk tree stores translations from logical -> physical block numbering
- * the super block points to the chunk tree
- */
-#define BTRFS_CHUNK_TREE_OBJECTID 3ULL
-
-/*
- * stores information about which areas of a given device are in use.
- * one per device. The tree of tree roots points to the device tree
- */
-#define BTRFS_DEV_TREE_OBJECTID 4ULL
-
-/* one per subvolume, storing files and directories */
-#define BTRFS_FS_TREE_OBJECTID 5ULL
-
-/* directory objectid inside the root tree */
-#define BTRFS_ROOT_TREE_DIR_OBJECTID 6ULL
-
-/* holds checksums of all the data extents */
-#define BTRFS_CSUM_TREE_OBJECTID 7ULL
-
-/* for storing balance parameters in the root tree */
-#define BTRFS_BALANCE_OBJECTID -4ULL
-
-/* orhpan objectid for tracking unlinked/truncated files */
-#define BTRFS_ORPHAN_OBJECTID -5ULL
-
-/* does write ahead logging to speed up fsyncs */
-#define BTRFS_TREE_LOG_OBJECTID -6ULL
-#define BTRFS_TREE_LOG_FIXUP_OBJECTID -7ULL
-
-/* for space balancing */
-#define BTRFS_TREE_RELOC_OBJECTID -8ULL
-#define BTRFS_DATA_RELOC_TREE_OBJECTID -9ULL
-
-/*
- * extent checksums all have this objectid
- * this allows them to share the logging tree
- * for fsyncs
- */
-#define BTRFS_EXTENT_CSUM_OBJECTID -10ULL
-
-/* For storing free space cache */
-#define BTRFS_FREE_SPACE_OBJECTID -11ULL
-
-/*
- * The inode number assigned to the special inode for sotring
- * free ino cache
- */
-#define BTRFS_FREE_INO_OBJECTID -12ULL
-
-/* dummy objectid represents multiple objectids */
-#define BTRFS_MULTIPLE_OBJECTIDS -255ULL
-
-/*
- * All files have objectids in this range.
- */
-#define BTRFS_FIRST_FREE_OBJECTID 256ULL
-#define BTRFS_LAST_FREE_OBJECTID -256ULL
-#define BTRFS_FIRST_CHUNK_TREE_OBJECTID 256ULL
-
-
-/*
- * the device items go into the chunk tree. The key is in the form
- * [ 1 BTRFS_DEV_ITEM_KEY device_id ]
- */
-#define BTRFS_DEV_ITEMS_OBJECTID 1ULL
-
-#define BTRFS_BTREE_INODE_OBJECTID 1
-
-#define BTRFS_EMPTY_SUBVOL_DIR_OBJECTID 2
-
-/*
- * the max metadata block size. This limit is somewhat artificial,
- * but the memmove costs go through the roof for larger blocks.
- */
-#define BTRFS_MAX_METADATA_BLOCKSIZE 65536
-
-/*
- * we can actually store much bigger names, but lets not confuse the rest
- * of linux
- */
-#define BTRFS_NAME_LEN 255
-
-/* 32 bytes in various csum fields */
-#define BTRFS_CSUM_SIZE 32
-
-/* csum types */
-#define BTRFS_CSUM_TYPE_CRC32 0
-
-static int btrfs_csum_sizes[] = { 4, 0 };
-
-/* four bytes for CRC32 */
-#define BTRFS_EMPTY_DIR_SIZE 0
-
-#define BTRFS_FT_UNKNOWN 0
-#define BTRFS_FT_REG_FILE 1
-#define BTRFS_FT_DIR 2
-#define BTRFS_FT_CHRDEV 3
-#define BTRFS_FT_BLKDEV 4
-#define BTRFS_FT_FIFO 5
-#define BTRFS_FT_SOCK 6
-#define BTRFS_FT_SYMLINK 7
-#define BTRFS_FT_XATTR 8
-#define BTRFS_FT_MAX 9
-
-/*
- * The key defines the order in the tree, and so it also defines (optimal)
- * block layout.
- *
- * objectid corresponds to the inode number.
- *
- * type tells us things about the object, and is a kind of stream selector.
- * so for a given inode, keys with type of 1 might refer to the inode data,
- * type of 2 may point to file data in the btree and type == 3 may point to
- * extents.
- *
- * offset is the starting byte offset for this key in the stream.
- *
- * btrfs_disk_key is in disk byte order. struct btrfs_key is always
- * in cpu native order. Otherwise they are identical and their sizes
- * should be the same (ie both packed)
- */
-struct btrfs_disk_key {
- __le64 objectid;
- u8 type;
- __le64 offset;
-} __attribute__ ((__packed__));
-
-struct btrfs_key {
- u64 objectid;
- u8 type;
- u64 offset;
-} __attribute__ ((__packed__));
-
-struct btrfs_mapping_tree {
- struct extent_map_tree map_tree;
-};
-
-struct btrfs_dev_item {
- /* the internal btrfs device id */
- __le64 devid;
-
- /* size of the device */
- __le64 total_bytes;
-
- /* bytes used */
- __le64 bytes_used;
-
- /* optimal io alignment for this device */
- __le32 io_align;
-
- /* optimal io width for this device */
- __le32 io_width;
-
- /* minimal io size for this device */
- __le32 sector_size;
-
- /* type and info about this device */
- __le64 type;
-
- /* expected generation for this device */
- __le64 generation;
-
- /*
- * starting byte of this partition on the device,
- * to allow for stripe alignment in the future
- */
- __le64 start_offset;
-
- /* grouping information for allocation decisions */
- __le32 dev_group;
-
- /* seek speed 0-100 where 100 is fastest */
- u8 seek_speed;
-
- /* bandwidth 0-100 where 100 is fastest */
- u8 bandwidth;
-
- /* btrfs generated uuid for this device */
- u8 uuid[BTRFS_UUID_SIZE];
-
- /* uuid of FS who owns this device */
- u8 fsid[BTRFS_UUID_SIZE];
-} __attribute__ ((__packed__));
-
-struct btrfs_stripe {
- __le64 devid;
- __le64 offset;
- u8 dev_uuid[BTRFS_UUID_SIZE];
-} __attribute__ ((__packed__));
-
-struct btrfs_chunk {
- /* size of this chunk in bytes */
- __le64 length;
-
- /* objectid of the root referencing this chunk */
- __le64 owner;
-
- __le64 stripe_len;
- __le64 type;
-
- /* optimal io alignment for this chunk */
- __le32 io_align;
-
- /* optimal io width for this chunk */
- __le32 io_width;
-
- /* minimal io size for this chunk */
- __le32 sector_size;
-
- /* 2^16 stripes is quite a lot, a second limit is the size of a single
- * item in the btree
- */
- __le16 num_stripes;
-
- /* sub stripes only matter for raid10 */
- __le16 sub_stripes;
- struct btrfs_stripe stripe;
- /* additional stripes go here */
-} __attribute__ ((__packed__));
-
-#define BTRFS_FREE_SPACE_EXTENT 1
-#define BTRFS_FREE_SPACE_BITMAP 2
-
-struct btrfs_free_space_entry {
- __le64 offset;
- __le64 bytes;
- u8 type;
-} __attribute__ ((__packed__));
-
-struct btrfs_free_space_header {
- struct btrfs_disk_key location;
- __le64 generation;
- __le64 num_entries;
- __le64 num_bitmaps;
-} __attribute__ ((__packed__));
-
-static inline unsigned long btrfs_chunk_item_size(int num_stripes)
-{
- BUG_ON(num_stripes == 0);
- return sizeof(struct btrfs_chunk) +
- sizeof(struct btrfs_stripe) * (num_stripes - 1);
-}
-
-#define BTRFS_HEADER_FLAG_WRITTEN (1ULL << 0)
-#define BTRFS_HEADER_FLAG_RELOC (1ULL << 1)
-
-/*
- * File system states
- */
-
-/* Errors detected */
-#define BTRFS_SUPER_FLAG_ERROR (1ULL << 2)
-
-#define BTRFS_SUPER_FLAG_SEEDING (1ULL << 32)
-#define BTRFS_SUPER_FLAG_METADUMP (1ULL << 33)
-
-#define BTRFS_BACKREF_REV_MAX 256
-#define BTRFS_BACKREF_REV_SHIFT 56
-#define BTRFS_BACKREF_REV_MASK (((u64)BTRFS_BACKREF_REV_MAX - 1) << \
- BTRFS_BACKREF_REV_SHIFT)
-
-#define BTRFS_OLD_BACKREF_REV 0
-#define BTRFS_MIXED_BACKREF_REV 1
-
-/*
- * every tree block (leaf or node) starts with this header.
- */
-struct btrfs_header {
- /* these first four must match the super block */
- u8 csum[BTRFS_CSUM_SIZE];
- u8 fsid[BTRFS_FSID_SIZE]; /* FS specific uuid */
- __le64 bytenr; /* which block this node is supposed to live in */
- __le64 flags;
-
- /* allowed to be different from the super from here on down */
- u8 chunk_tree_uuid[BTRFS_UUID_SIZE];
- __le64 generation;
- __le64 owner;
- __le32 nritems;
- u8 level;
-} __attribute__ ((__packed__));
-
-#define BTRFS_NODEPTRS_PER_BLOCK(r) (((r)->nodesize - \
- sizeof(struct btrfs_header)) / \
- sizeof(struct btrfs_key_ptr))
-#define __BTRFS_LEAF_DATA_SIZE(bs) ((bs) - sizeof(struct btrfs_header))
-#define BTRFS_LEAF_DATA_SIZE(r) (__BTRFS_LEAF_DATA_SIZE(r->leafsize))
-#define BTRFS_MAX_INLINE_DATA_SIZE(r) (BTRFS_LEAF_DATA_SIZE(r) - \
- sizeof(struct btrfs_item) - \
- sizeof(struct btrfs_file_extent_item))
-#define BTRFS_MAX_XATTR_SIZE(r) (BTRFS_LEAF_DATA_SIZE(r) - \
- sizeof(struct btrfs_item) -\
- sizeof(struct btrfs_dir_item))
-
-
-/*
- * this is a very generous portion of the super block, giving us
- * room to translate 14 chunks with 3 stripes each.
- */
-#define BTRFS_SYSTEM_CHUNK_ARRAY_SIZE 2048
-#define BTRFS_LABEL_SIZE 256
-
-/*
- * just in case we somehow lose the roots and are not able to mount,
- * we store an array of the roots from previous transactions
- * in the super.
- */
-#define BTRFS_NUM_BACKUP_ROOTS 4
-struct btrfs_root_backup {
- __le64 tree_root;
- __le64 tree_root_gen;
-
- __le64 chunk_root;
- __le64 chunk_root_gen;
-
- __le64 extent_root;
- __le64 extent_root_gen;
-
- __le64 fs_root;
- __le64 fs_root_gen;
-
- __le64 dev_root;
- __le64 dev_root_gen;
-
- __le64 csum_root;
- __le64 csum_root_gen;
-
- __le64 total_bytes;
- __le64 bytes_used;
- __le64 num_devices;
- /* future */
- __le64 unsed_64[4];
-
- u8 tree_root_level;
- u8 chunk_root_level;
- u8 extent_root_level;
- u8 fs_root_level;
- u8 dev_root_level;
- u8 csum_root_level;
- /* future and to align */
- u8 unused_8[10];
-} __attribute__ ((__packed__));
-
-/*
- * the super block basically lists the main trees of the FS
- * it currently lacks any block count etc etc
- */
-struct btrfs_super_block {
- u8 csum[BTRFS_CSUM_SIZE];
- /* the first 4 fields must match struct btrfs_header */
- u8 fsid[BTRFS_FSID_SIZE]; /* FS specific uuid */
- __le64 bytenr; /* this block number */
- __le64 flags;
-
- /* allowed to be different from the btrfs_header from here own down */
- __le64 magic;
- __le64 generation;
- __le64 root;
- __le64 chunk_root;
- __le64 log_root;
-
- /* this will help find the new super based on the log root */
- __le64 log_root_transid;
- __le64 total_bytes;
- __le64 bytes_used;
- __le64 root_dir_objectid;
- __le64 num_devices;
- __le32 sectorsize;
- __le32 nodesize;
- __le32 leafsize;
- __le32 stripesize;
- __le32 sys_chunk_array_size;
- __le64 chunk_root_generation;
- __le64 compat_flags;
- __le64 compat_ro_flags;
- __le64 incompat_flags;
- __le16 csum_type;
- u8 root_level;
- u8 chunk_root_level;
- u8 log_root_level;
- struct btrfs_dev_item dev_item;
-
- char label[BTRFS_LABEL_SIZE];
-
- __le64 cache_generation;
-
- /* future expansion */
- __le64 reserved[31];
- u8 sys_chunk_array[BTRFS_SYSTEM_CHUNK_ARRAY_SIZE];
- struct btrfs_root_backup super_roots[BTRFS_NUM_BACKUP_ROOTS];
-} __attribute__ ((__packed__));
-
-/*
- * Compat flags that we support. If any incompat flags are set other than the
- * ones specified below then we will fail to mount
- */
-#define BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF (1ULL << 0)
-#define BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL (1ULL << 1)
-#define BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS (1ULL << 2)
-#define BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO (1ULL << 3)
-/*
- * some patches floated around with a second compression method
- * lets save that incompat here for when they do get in
- * Note we don't actually support it, we're just reserving the
- * number
- */
-#define BTRFS_FEATURE_INCOMPAT_COMPRESS_LZOv2 (1ULL << 4)
-
-/*
- * older kernels tried to do bigger metadata blocks, but the
- * code was pretty buggy. Lets not let them try anymore.
- */
-#define BTRFS_FEATURE_INCOMPAT_BIG_METADATA (1ULL << 5)
-
-#define BTRFS_FEATURE_COMPAT_SUPP 0ULL
-#define BTRFS_FEATURE_COMPAT_RO_SUPP 0ULL
-#define BTRFS_FEATURE_INCOMPAT_SUPP \
- (BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF | \
- BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL | \
- BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS | \
- BTRFS_FEATURE_INCOMPAT_BIG_METADATA | \
- BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO)
-
-/*
- * A leaf is full of items. offset and size tell us where to find
- * the item in the leaf (relative to the start of the data area)
- */
-struct btrfs_item {
- struct btrfs_disk_key key;
- __le32 offset;
- __le32 size;
-} __attribute__ ((__packed__));
-
-/*
- * leaves have an item area and a data area:
- * [item0, item1....itemN] [free space] [dataN...data1, data0]
- *
- * The data is separate from the items to get the keys closer together
- * during searches.
- */
-struct btrfs_leaf {
- struct btrfs_header header;
- struct btrfs_item items[];
-} __attribute__ ((__packed__));
-
-/*
- * all non-leaf blocks are nodes, they hold only keys and pointers to
- * other blocks
- */
-struct btrfs_key_ptr {
- struct btrfs_disk_key key;
- __le64 blockptr;
- __le64 generation;
-} __attribute__ ((__packed__));
-
-struct btrfs_node {
- struct btrfs_header header;
- struct btrfs_key_ptr ptrs[];
-} __attribute__ ((__packed__));
-
-/*
- * btrfs_paths remember the path taken from the root down to the leaf.
- * level 0 is always the leaf, and nodes[1...BTRFS_MAX_LEVEL] will point
- * to any other levels that are present.
- *
- * The slots array records the index of the item or block pointer
- * used while walking the tree.
- */
-struct btrfs_path {
- struct extent_buffer *nodes[BTRFS_MAX_LEVEL];
- int slots[BTRFS_MAX_LEVEL];
- /* if there is real range locking, this locks field will change */
- int locks[BTRFS_MAX_LEVEL];
- int reada;
- /* keep some upper locks as we walk down */
- int lowest_level;
-
- /*
- * set by btrfs_split_item, tells search_slot to keep all locks
- * and to force calls to keep space in the nodes
- */
- unsigned int search_for_split:1;
- unsigned int keep_locks:1;
- unsigned int skip_locking:1;
- unsigned int leave_spinning:1;
- unsigned int search_commit_root:1;
-};
-
-/*
- * items in the extent btree are used to record the objectid of the
- * owner of the block and the number of references
- */
-
-struct btrfs_extent_item {
- __le64 refs;
- __le64 generation;
- __le64 flags;
-} __attribute__ ((__packed__));
-
-struct btrfs_extent_item_v0 {
- __le32 refs;
-} __attribute__ ((__packed__));
-
-#define BTRFS_MAX_EXTENT_ITEM_SIZE(r) ((BTRFS_LEAF_DATA_SIZE(r) >> 4) - \
- sizeof(struct btrfs_item))
-
-#define BTRFS_EXTENT_FLAG_DATA (1ULL << 0)
-#define BTRFS_EXTENT_FLAG_TREE_BLOCK (1ULL << 1)
-
-/* following flags only apply to tree blocks */
-
-/* use full backrefs for extent pointers in the block */
-#define BTRFS_BLOCK_FLAG_FULL_BACKREF (1ULL << 8)
-
-/*
- * this flag is only used internally by scrub and may be changed at any time
- * it is only declared here to avoid collisions
- */
-#define BTRFS_EXTENT_FLAG_SUPER (1ULL << 48)
-
-struct btrfs_tree_block_info {
- struct btrfs_disk_key key;
- u8 level;
-} __attribute__ ((__packed__));
-
-struct btrfs_extent_data_ref {
- __le64 root;
- __le64 objectid;
- __le64 offset;
- __le32 count;
-} __attribute__ ((__packed__));
-
-struct btrfs_shared_data_ref {
- __le32 count;
-} __attribute__ ((__packed__));
-
-struct btrfs_extent_inline_ref {
- u8 type;
- __le64 offset;
-} __attribute__ ((__packed__));
-
-/* old style backrefs item */
-struct btrfs_extent_ref_v0 {
- __le64 root;
- __le64 generation;
- __le64 objectid;
- __le32 count;
-} __attribute__ ((__packed__));
-
-
-/* dev extents record free space on individual devices. The owner
- * field points back to the chunk allocation mapping tree that allocated
- * the extent. The chunk tree uuid field is a way to double check the owner
- */
-struct btrfs_dev_extent {
- __le64 chunk_tree;
- __le64 chunk_objectid;
- __le64 chunk_offset;
- __le64 length;
- u8 chunk_tree_uuid[BTRFS_UUID_SIZE];
-} __attribute__ ((__packed__));
-
-struct btrfs_inode_ref {
- __le64 index;
- __le16 name_len;
- /* name goes here */
-} __attribute__ ((__packed__));
-
-struct btrfs_timespec {
- __le64 sec;
- __le32 nsec;
-} __attribute__ ((__packed__));
-
-enum btrfs_compression_type {
- BTRFS_COMPRESS_NONE = 0,
- BTRFS_COMPRESS_ZLIB = 1,
- BTRFS_COMPRESS_LZO = 2,
- BTRFS_COMPRESS_TYPES = 2,
- BTRFS_COMPRESS_LAST = 3,
-};
-
-struct btrfs_inode_item {
- /* nfs style generation number */
- __le64 generation;
- /* transid that last touched this inode */
- __le64 transid;
- __le64 size;
- __le64 nbytes;
- __le64 block_group;
- __le32 nlink;
- __le32 uid;
- __le32 gid;
- __le32 mode;
- __le64 rdev;
- __le64 flags;
-
- /* modification sequence number for NFS */
- __le64 sequence;
-
- /*
- * a little future expansion, for more than this we can
- * just grow the inode item and version it
- */
- __le64 reserved[4];
- struct btrfs_timespec atime;
- struct btrfs_timespec ctime;
- struct btrfs_timespec mtime;
- struct btrfs_timespec otime;
-} __attribute__ ((__packed__));
-
-struct btrfs_dir_log_item {
- __le64 end;
-} __attribute__ ((__packed__));
-
-struct btrfs_dir_item {
- struct btrfs_disk_key location;
- __le64 transid;
- __le16 data_len;
- __le16 name_len;
- u8 type;
-} __attribute__ ((__packed__));
-
-#define BTRFS_ROOT_SUBVOL_RDONLY (1ULL << 0)
-
-struct btrfs_root_item {
- struct btrfs_inode_item inode;
- __le64 generation;
- __le64 root_dirid;
- __le64 bytenr;
- __le64 byte_limit;
- __le64 bytes_used;
- __le64 last_snapshot;
- __le64 flags;
- __le32 refs;
- struct btrfs_disk_key drop_progress;
- u8 drop_level;
- u8 level;
-} __attribute__ ((__packed__));
-
-/*
- * this is used for both forward and backward root refs
- */
-struct btrfs_root_ref {
- __le64 dirid;
- __le64 sequence;
- __le16 name_len;
-} __attribute__ ((__packed__));
-
-struct btrfs_disk_balance_args {
- /*
- * profiles to operate on, single is denoted by
- * BTRFS_AVAIL_ALLOC_BIT_SINGLE
- */
- __le64 profiles;
-
- /* usage filter */
- __le64 usage;
-
- /* devid filter */
- __le64 devid;
-
- /* devid subset filter [pstart..pend) */
- __le64 pstart;
- __le64 pend;
-
- /* btrfs virtual address space subset filter [vstart..vend) */
- __le64 vstart;
- __le64 vend;
-
- /*
- * profile to convert to, single is denoted by
- * BTRFS_AVAIL_ALLOC_BIT_SINGLE
- */
- __le64 target;
-
- /* BTRFS_BALANCE_ARGS_* */
- __le64 flags;
-
- __le64 unused[8];
-} __attribute__ ((__packed__));
-
-/*
- * store balance parameters to disk so that balance can be properly
- * resumed after crash or unmount
- */
-struct btrfs_balance_item {
- /* BTRFS_BALANCE_* */
- __le64 flags;
-
- struct btrfs_disk_balance_args data;
- struct btrfs_disk_balance_args meta;
- struct btrfs_disk_balance_args sys;
-
- __le64 unused[4];
-} __attribute__ ((__packed__));
-
-#define BTRFS_FILE_EXTENT_INLINE 0
-#define BTRFS_FILE_EXTENT_REG 1
-#define BTRFS_FILE_EXTENT_PREALLOC 2
-
-struct btrfs_file_extent_item {
- /*
- * transaction id that created this extent
- */
- __le64 generation;
- /*
- * max number of bytes to hold this extent in ram
- * when we split a compressed extent we can't know how big
- * each of the resulting pieces will be. So, this is
- * an upper limit on the size of the extent in ram instead of
- * an exact limit.
- */
- __le64 ram_bytes;
-
- /*
- * 32 bits for the various ways we might encode the data,
- * including compression and encryption. If any of these
- * are set to something a given disk format doesn't understand
- * it is treated like an incompat flag for reading and writing,
- * but not for stat.
- */
- u8 compression;
- u8 encryption;
- __le16 other_encoding; /* spare for later use */
-
- /* are we inline data or a real extent? */
- u8 type;
-
- /*
- * disk space consumed by the extent, checksum blocks are included
- * in these numbers
- */
- __le64 disk_bytenr;
- __le64 disk_num_bytes;
- /*
- * the logical offset in file blocks (no csums)
- * this extent record is for. This allows a file extent to point
- * into the middle of an existing extent on disk, sharing it
- * between two snapshots (useful if some bytes in the middle of the
- * extent have changed
- */
- __le64 offset;
- /*
- * the logical number of file blocks (no csums included). This
- * always reflects the size uncompressed and without encoding.
- */
- __le64 num_bytes;
-
-} __attribute__ ((__packed__));
-
-struct btrfs_csum_item {
- u8 csum;
-} __attribute__ ((__packed__));
-
-/* different types of block groups (and chunks) */
-#define BTRFS_BLOCK_GROUP_DATA (1ULL << 0)
-#define BTRFS_BLOCK_GROUP_SYSTEM (1ULL << 1)
-#define BTRFS_BLOCK_GROUP_METADATA (1ULL << 2)
-#define BTRFS_BLOCK_GROUP_RAID0 (1ULL << 3)
-#define BTRFS_BLOCK_GROUP_RAID1 (1ULL << 4)
-#define BTRFS_BLOCK_GROUP_DUP (1ULL << 5)
-#define BTRFS_BLOCK_GROUP_RAID10 (1ULL << 6)
-#define BTRFS_BLOCK_GROUP_RESERVED BTRFS_AVAIL_ALLOC_BIT_SINGLE
-#define BTRFS_NR_RAID_TYPES 5
-
-#define BTRFS_BLOCK_GROUP_TYPE_MASK (BTRFS_BLOCK_GROUP_DATA | \
- BTRFS_BLOCK_GROUP_SYSTEM | \
- BTRFS_BLOCK_GROUP_METADATA)
-
-#define BTRFS_BLOCK_GROUP_PROFILE_MASK (BTRFS_BLOCK_GROUP_RAID0 | \
- BTRFS_BLOCK_GROUP_RAID1 | \
- BTRFS_BLOCK_GROUP_DUP | \
- BTRFS_BLOCK_GROUP_RAID10)
-/*
- * We need a bit for restriper to be able to tell when chunks of type
- * SINGLE are available. This "extended" profile format is used in
- * fs_info->avail_*_alloc_bits (in-memory) and balance item fields
- * (on-disk). The corresponding on-disk bit in chunk.type is reserved
- * to avoid remappings between two formats in future.
- */
-#define BTRFS_AVAIL_ALLOC_BIT_SINGLE (1ULL << 48)
-
-#define BTRFS_EXTENDED_PROFILE_MASK (BTRFS_BLOCK_GROUP_PROFILE_MASK | \
- BTRFS_AVAIL_ALLOC_BIT_SINGLE)
-
-static inline u64 chunk_to_extended(u64 flags)
-{
- if ((flags & BTRFS_BLOCK_GROUP_PROFILE_MASK) == 0)
- flags |= BTRFS_AVAIL_ALLOC_BIT_SINGLE;
-
- return flags;
-}
-static inline u64 extended_to_chunk(u64 flags)
-{
- return flags & ~BTRFS_AVAIL_ALLOC_BIT_SINGLE;
-}
-
-struct btrfs_block_group_item {
- __le64 used;
- __le64 chunk_objectid;
- __le64 flags;
-} __attribute__ ((__packed__));
-
-struct btrfs_space_info {
- u64 flags;
-
- u64 total_bytes; /* total bytes in the space,
- this doesn't take mirrors into account */
- u64 bytes_used; /* total bytes used,
- this doesn't take mirrors into account */
- u64 bytes_pinned; /* total bytes pinned, will be freed when the
- transaction finishes */
- u64 bytes_reserved; /* total bytes the allocator has reserved for
- current allocations */
- u64 bytes_readonly; /* total bytes that are read only */
-
- u64 bytes_may_use; /* number of bytes that may be used for
- delalloc/allocations */
- u64 disk_used; /* total bytes used on disk */
- u64 disk_total; /* total bytes on disk, takes mirrors into
- account */
-
- /*
- * we bump reservation progress every time we decrement
- * bytes_reserved. This way people waiting for reservations
- * know something good has happened and they can check
- * for progress. The number here isn't to be trusted, it
- * just shows reclaim activity
- */
- unsigned long reservation_progress;
-
- unsigned int full:1; /* indicates that we cannot allocate any more
- chunks for this space */
- unsigned int chunk_alloc:1; /* set if we are allocating a chunk */
-
- unsigned int flush:1; /* set if we are trying to make space */
-
- unsigned int force_alloc; /* set if we need to force a chunk
- alloc for this space */
-
- struct list_head list;
-
- /* for block groups in our same type */
- struct list_head block_groups[BTRFS_NR_RAID_TYPES];
- spinlock_t lock;
- struct rw_semaphore groups_sem;
- wait_queue_head_t wait;
-};
-
-struct btrfs_block_rsv {
- u64 size;
- u64 reserved;
- struct btrfs_space_info *space_info;
- spinlock_t lock;
- unsigned int full;
-};
-
-/*
- * free clusters are used to claim free space in relatively large chunks,
- * allowing us to do less seeky writes. They are used for all metadata
- * allocations and data allocations in ssd mode.
- */
-struct btrfs_free_cluster {
- spinlock_t lock;
- spinlock_t refill_lock;
- struct rb_root root;
-
- /* largest extent in this cluster */
- u64 max_size;
-
- /* first extent starting offset */
- u64 window_start;
-
- struct btrfs_block_group_cache *block_group;
- /*
- * when a cluster is allocated from a block group, we put the
- * cluster onto a list in the block group so that it can
- * be freed before the block group is freed.
- */
- struct list_head block_group_list;
-};
-
-enum btrfs_caching_type {
- BTRFS_CACHE_NO = 0,
- BTRFS_CACHE_STARTED = 1,
- BTRFS_CACHE_FAST = 2,
- BTRFS_CACHE_FINISHED = 3,
-};
-
-enum btrfs_disk_cache_state {
- BTRFS_DC_WRITTEN = 0,
- BTRFS_DC_ERROR = 1,
- BTRFS_DC_CLEAR = 2,
- BTRFS_DC_SETUP = 3,
- BTRFS_DC_NEED_WRITE = 4,
-};
-
-struct btrfs_caching_control {
- struct list_head list;
- struct mutex mutex;
- wait_queue_head_t wait;
- struct btrfs_work work;
- struct btrfs_block_group_cache *block_group;
- u64 progress;
- atomic_t count;
-};
-
-struct btrfs_block_group_cache {
- struct btrfs_key key;
- struct btrfs_block_group_item item;
- struct btrfs_fs_info *fs_info;
- struct inode *inode;
- spinlock_t lock;
- u64 pinned;
- u64 reserved;
- u64 bytes_super;
- u64 flags;
- u64 sectorsize;
- u64 cache_generation;
- unsigned int ro:1;
- unsigned int dirty:1;
- unsigned int iref:1;
-
- int disk_cache_state;
-
- /* cache tracking stuff */
- int cached;
- struct btrfs_caching_control *caching_ctl;
- u64 last_byte_to_unpin;
-
- struct btrfs_space_info *space_info;
-
- /* free space cache stuff */
- struct btrfs_free_space_ctl *free_space_ctl;
-
- /* block group cache stuff */
- struct rb_node cache_node;
-
- /* for block groups in the same raid type */
- struct list_head list;
-
- /* usage count */
- atomic_t count;
-
- /* List of struct btrfs_free_clusters for this block group.
- * Today it will only have one thing on it, but that may change
- */
- struct list_head cluster_list;
-};
-
-struct reloc_control;
-struct btrfs_device;
-struct btrfs_fs_devices;
-struct btrfs_balance_control;
-struct btrfs_delayed_root;
-struct btrfs_fs_info {
- u8 fsid[BTRFS_FSID_SIZE];
- u8 chunk_tree_uuid[BTRFS_UUID_SIZE];
- struct btrfs_root *extent_root;
- struct btrfs_root *tree_root;
- struct btrfs_root *chunk_root;
- struct btrfs_root *dev_root;
- struct btrfs_root *fs_root;
- struct btrfs_root *csum_root;
-
- /* the log root tree is a directory of all the other log roots */
- struct btrfs_root *log_root_tree;
-
- spinlock_t fs_roots_radix_lock;
- struct radix_tree_root fs_roots_radix;
-
- /* block group cache stuff */
- spinlock_t block_group_cache_lock;
- struct rb_root block_group_cache_tree;
-
- /* keep track of unallocated space */
- spinlock_t free_chunk_lock;
- u64 free_chunk_space;
-
- struct extent_io_tree freed_extents[2];
- struct extent_io_tree *pinned_extents;
-
- /* logical->physical extent mapping */
- struct btrfs_mapping_tree mapping_tree;
-
- /*
- * block reservation for extent, checksum, root tree and
- * delayed dir index item
- */
- struct btrfs_block_rsv global_block_rsv;
- /* block reservation for delay allocation */
- struct btrfs_block_rsv delalloc_block_rsv;
- /* block reservation for metadata operations */
- struct btrfs_block_rsv trans_block_rsv;
- /* block reservation for chunk tree */
- struct btrfs_block_rsv chunk_block_rsv;
- /* block reservation for delayed operations */
- struct btrfs_block_rsv delayed_block_rsv;
-
- struct btrfs_block_rsv empty_block_rsv;
-
- u64 generation;
- u64 last_trans_committed;
-
- /*
- * this is updated to the current trans every time a full commit
- * is required instead of the faster short fsync log commits
- */
- u64 last_trans_log_full_commit;
- unsigned long mount_opt;
- unsigned long compress_type:4;
- u64 max_inline;
- u64 alloc_start;
- struct btrfs_transaction *running_transaction;
- wait_queue_head_t transaction_throttle;
- wait_queue_head_t transaction_wait;
- wait_queue_head_t transaction_blocked_wait;
- wait_queue_head_t async_submit_wait;
-
- struct btrfs_super_block *super_copy;
- struct btrfs_super_block *super_for_commit;
- struct block_device *__bdev;
- struct super_block *sb;
- struct inode *btree_inode;
- struct backing_dev_info bdi;
- struct mutex tree_log_mutex;
- struct mutex transaction_kthread_mutex;
- struct mutex cleaner_mutex;
- struct mutex chunk_mutex;
- struct mutex volume_mutex;
- /*
- * this protects the ordered operations list only while we are
- * processing all of the entries on it. This way we make
- * sure the commit code doesn't find the list temporarily empty
- * because another function happens to be doing non-waiting preflush
- * before jumping into the main commit.
- */
- struct mutex ordered_operations_mutex;
- struct rw_semaphore extent_commit_sem;
-
- struct rw_semaphore cleanup_work_sem;
-
- struct rw_semaphore subvol_sem;
- struct srcu_struct subvol_srcu;
-
- spinlock_t trans_lock;
- /*
- * the reloc mutex goes with the trans lock, it is taken
- * during commit to protect us from the relocation code
- */
- struct mutex reloc_mutex;
-
- struct list_head trans_list;
- struct list_head hashers;
- struct list_head dead_roots;
- struct list_head caching_block_groups;
-
- spinlock_t delayed_iput_lock;
- struct list_head delayed_iputs;
-
- atomic_t nr_async_submits;
- atomic_t async_submit_draining;
- atomic_t nr_async_bios;
- atomic_t async_delalloc_pages;
- atomic_t open_ioctl_trans;
-
- /*
- * this is used by the balancing code to wait for all the pending
- * ordered extents
- */
- spinlock_t ordered_extent_lock;
-
- /*
- * all of the data=ordered extents pending writeback
- * these can span multiple transactions and basically include
- * every dirty data page that isn't from nodatacow
- */
- struct list_head ordered_extents;
-
- /*
- * all of the inodes that have delalloc bytes. It is possible for
- * this list to be empty even when there is still dirty data=ordered
- * extents waiting to finish IO.
- */
- struct list_head delalloc_inodes;
-
- /*
- * special rename and truncate targets that must be on disk before
- * we're allowed to commit. This is basically the ext3 style
- * data=ordered list.
- */
- struct list_head ordered_operations;
-
- /*
- * there is a pool of worker threads for checksumming during writes
- * and a pool for checksumming after reads. This is because readers
- * can run with FS locks held, and the writers may be waiting for
- * those locks. We don't want ordering in the pending list to cause
- * deadlocks, and so the two are serviced separately.
- *
- * A third pool does submit_bio to avoid deadlocking with the other
- * two
- */
- struct btrfs_workers generic_worker;
- struct btrfs_workers workers;
- struct btrfs_workers delalloc_workers;
- struct btrfs_workers endio_workers;
- struct btrfs_workers endio_meta_workers;
- struct btrfs_workers endio_meta_write_workers;
- struct btrfs_workers endio_write_workers;
- struct btrfs_workers endio_freespace_worker;
- struct btrfs_workers submit_workers;
- struct btrfs_workers caching_workers;
- struct btrfs_workers readahead_workers;
-
- /*
- * fixup workers take dirty pages that didn't properly go through
- * the cow mechanism and make them safe to write. It happens
- * for the sys_munmap function call path
- */
- struct btrfs_workers fixup_workers;
- struct btrfs_workers delayed_workers;
- struct task_struct *transaction_kthread;
- struct task_struct *cleaner_kthread;
- int thread_pool_size;
-
- struct kobject super_kobj;
- struct completion kobj_unregister;
- int do_barriers;
- int closing;
- int log_root_recovering;
- int enospc_unlink;
- int trans_no_join;
-
- u64 total_pinned;
-
- /* protected by the delalloc lock, used to keep from writing
- * metadata until there is a nice batch
- */
- u64 dirty_metadata_bytes;
- struct list_head dirty_cowonly_roots;
-
- struct btrfs_fs_devices *fs_devices;
-
- /*
- * the space_info list is almost entirely read only. It only changes
- * when we add a new raid type to the FS, and that happens
- * very rarely. RCU is used to protect it.
- */
- struct list_head space_info;
-
- struct reloc_control *reloc_ctl;
-
- spinlock_t delalloc_lock;
- u64 delalloc_bytes;
-
- /* data_alloc_cluster is only used in ssd mode */
- struct btrfs_free_cluster data_alloc_cluster;
-
- /* all metadata allocations go through this cluster */
- struct btrfs_free_cluster meta_alloc_cluster;
-
- /* auto defrag inodes go here */
- spinlock_t defrag_inodes_lock;
- struct rb_root defrag_inodes;
- atomic_t defrag_running;
-
- spinlock_t ref_cache_lock;
- u64 total_ref_cache_size;
-
- /*
- * these three are in extended format (availability of single
- * chunks is denoted by BTRFS_AVAIL_ALLOC_BIT_SINGLE bit, other
- * types are denoted by corresponding BTRFS_BLOCK_GROUP_* bits)
- */
- u64 avail_data_alloc_bits;
- u64 avail_metadata_alloc_bits;
- u64 avail_system_alloc_bits;
-
- /* restriper state */
- spinlock_t balance_lock;
- struct mutex balance_mutex;
- atomic_t balance_running;
- atomic_t balance_pause_req;
- atomic_t balance_cancel_req;
- struct btrfs_balance_control *balance_ctl;
- wait_queue_head_t balance_wait_q;
-
- unsigned data_chunk_allocations;
- unsigned metadata_ratio;
-
- void *bdev_holder;
-
- /* private scrub information */
- struct mutex scrub_lock;
- atomic_t scrubs_running;
- atomic_t scrub_pause_req;
- atomic_t scrubs_paused;
- atomic_t scrub_cancel_req;
- wait_queue_head_t scrub_pause_wait;
- struct rw_semaphore scrub_super_lock;
- int scrub_workers_refcnt;
- struct btrfs_workers scrub_workers;
-
-#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
- u32 check_integrity_print_mask;
-#endif
-
- /* filesystem state */
- u64 fs_state;
-
- struct btrfs_delayed_root *delayed_root;
-
- /* readahead tree */
- spinlock_t reada_lock;
- struct radix_tree_root reada_tree;
-
- /* next backup root to be overwritten */
- int backup_root_index;
-};
-
-/*
- * in ram representation of the tree. extent_root is used for all allocations
- * and for the extent tree extent_root root.
- */
-struct btrfs_root {
- struct extent_buffer *node;
-
- struct extent_buffer *commit_root;
- struct btrfs_root *log_root;
- struct btrfs_root *reloc_root;
-
- struct btrfs_root_item root_item;
- struct btrfs_key root_key;
- struct btrfs_fs_info *fs_info;
- struct extent_io_tree dirty_log_pages;
-
- struct kobject root_kobj;
- struct completion kobj_unregister;
- struct mutex objectid_mutex;
-
- spinlock_t accounting_lock;
- struct btrfs_block_rsv *block_rsv;
-
- /* free ino cache stuff */
- struct mutex fs_commit_mutex;
- struct btrfs_free_space_ctl *free_ino_ctl;
- enum btrfs_caching_type cached;
- spinlock_t cache_lock;
- wait_queue_head_t cache_wait;
- struct btrfs_free_space_ctl *free_ino_pinned;
- u64 cache_progress;
- struct inode *cache_inode;
-
- struct mutex log_mutex;
- wait_queue_head_t log_writer_wait;
- wait_queue_head_t log_commit_wait[2];
- atomic_t log_writers;
- atomic_t log_commit[2];
- unsigned long log_transid;
- unsigned long last_log_commit;
- unsigned long log_batch;
- pid_t log_start_pid;
- bool log_multiple_pids;
-
- u64 objectid;
- u64 last_trans;
-
- /* data allocations are done in sectorsize units */
- u32 sectorsize;
-
- /* node allocations are done in nodesize units */
- u32 nodesize;
-
- /* leaf allocations are done in leafsize units */
- u32 leafsize;
-
- u32 stripesize;
-
- u32 type;
-
- u64 highest_objectid;
-
- /* btrfs_record_root_in_trans is a multi-step process,
- * and it can race with the balancing code. But the
- * race is very small, and only the first time the root
- * is added to each transaction. So in_trans_setup
- * is used to tell us when more checks are required
- */
- unsigned long in_trans_setup;
- int ref_cows;
- int track_dirty;
- int in_radix;
-
- u64 defrag_trans_start;
- struct btrfs_key defrag_progress;
- struct btrfs_key defrag_max;
- int defrag_running;
- char *name;
-
- /* the dirty list is only used by non-reference counted roots */
- struct list_head dirty_list;
-
- struct list_head root_list;
-
- spinlock_t orphan_lock;
- struct list_head orphan_list;
- struct btrfs_block_rsv *orphan_block_rsv;
- int orphan_item_inserted;
- int orphan_cleanup_state;
-
- spinlock_t inode_lock;
- /* red-black tree that keeps track of in-memory inodes */
- struct rb_root inode_tree;
-
- /*
- * radix tree that keeps track of delayed nodes of every inode,
- * protected by inode_lock
- */
- struct radix_tree_root delayed_nodes_tree;
- /*
- * right now this just gets used so that a root has its own devid
- * for stat. It may be used for more later
- */
- dev_t anon_dev;
-
- int force_cow;
-};
-
-struct btrfs_ioctl_defrag_range_args {
- /* start of the defrag operation */
- __u64 start;
-
- /* number of bytes to defrag, use (u64)-1 to say all */
- __u64 len;
-
- /*
- * flags for the operation, which can include turning
- * on compression for this one defrag
- */
- __u64 flags;
-
- /*
- * any extent bigger than this will be considered
- * already defragged. Use 0 to take the kernel default
- * Use 1 to say every single extent must be rewritten
- */
- __u32 extent_thresh;
-
- /*
- * which compression method to use if turning on compression
- * for this defrag operation. If unspecified, zlib will
- * be used
- */
- __u32 compress_type;
-
- /* spare for later */
- __u32 unused[4];
-};
-
-
-/*
- * inode items have the data typically returned from stat and store other
- * info about object characteristics. There is one for every file and dir in
- * the FS
- */
-#define BTRFS_INODE_ITEM_KEY 1
-#define BTRFS_INODE_REF_KEY 12
-#define BTRFS_XATTR_ITEM_KEY 24
-#define BTRFS_ORPHAN_ITEM_KEY 48
-/* reserve 2-15 close to the inode for later flexibility */
-
-/*
- * dir items are the name -> inode pointers in a directory. There is one
- * for every name in a directory.
- */
-#define BTRFS_DIR_LOG_ITEM_KEY 60
-#define BTRFS_DIR_LOG_INDEX_KEY 72
-#define BTRFS_DIR_ITEM_KEY 84
-#define BTRFS_DIR_INDEX_KEY 96
-/*
- * extent data is for file data
- */
-#define BTRFS_EXTENT_DATA_KEY 108
-
-/*
- * extent csums are stored in a separate tree and hold csums for
- * an entire extent on disk.
- */
-#define BTRFS_EXTENT_CSUM_KEY 128
-
-/*
- * root items point to tree roots. They are typically in the root
- * tree used by the super block to find all the other trees
- */
-#define BTRFS_ROOT_ITEM_KEY 132
-
-/*
- * root backrefs tie subvols and snapshots to the directory entries that
- * reference them
- */
-#define BTRFS_ROOT_BACKREF_KEY 144
-
-/*
- * root refs make a fast index for listing all of the snapshots and
- * subvolumes referenced by a given root. They point directly to the
- * directory item in the root that references the subvol
- */
-#define BTRFS_ROOT_REF_KEY 156
-
-/*
- * extent items are in the extent map tree. These record which blocks
- * are used, and how many references there are to each block
- */
-#define BTRFS_EXTENT_ITEM_KEY 168
-
-#define BTRFS_TREE_BLOCK_REF_KEY 176
-
-#define BTRFS_EXTENT_DATA_REF_KEY 178
-
-#define BTRFS_EXTENT_REF_V0_KEY 180
-
-#define BTRFS_SHARED_BLOCK_REF_KEY 182
-
-#define BTRFS_SHARED_DATA_REF_KEY 184
-
-/*
- * block groups give us hints into the extent allocation trees. Which
- * blocks are free etc etc
- */
-#define BTRFS_BLOCK_GROUP_ITEM_KEY 192
-
-#define BTRFS_DEV_EXTENT_KEY 204
-#define BTRFS_DEV_ITEM_KEY 216
-#define BTRFS_CHUNK_ITEM_KEY 228
-
-#define BTRFS_BALANCE_ITEM_KEY 248
-
-/*
- * string items are for debugging. They just store a short string of
- * data in the FS
- */
-#define BTRFS_STRING_ITEM_KEY 253
-
-/*
- * Flags for mount options.
- *
- * Note: don't forget to add new options to btrfs_show_options()
- */
-#define BTRFS_MOUNT_NODATASUM (1 << 0)
-#define BTRFS_MOUNT_NODATACOW (1 << 1)
-#define BTRFS_MOUNT_NOBARRIER (1 << 2)
-#define BTRFS_MOUNT_SSD (1 << 3)
-#define BTRFS_MOUNT_DEGRADED (1 << 4)
-#define BTRFS_MOUNT_COMPRESS (1 << 5)
-#define BTRFS_MOUNT_NOTREELOG (1 << 6)
-#define BTRFS_MOUNT_FLUSHONCOMMIT (1 << 7)
-#define BTRFS_MOUNT_SSD_SPREAD (1 << 8)
-#define BTRFS_MOUNT_NOSSD (1 << 9)
-#define BTRFS_MOUNT_DISCARD (1 << 10)
-#define BTRFS_MOUNT_FORCE_COMPRESS (1 << 11)
-#define BTRFS_MOUNT_SPACE_CACHE (1 << 12)
-#define BTRFS_MOUNT_CLEAR_CACHE (1 << 13)
-#define BTRFS_MOUNT_USER_SUBVOL_RM_ALLOWED (1 << 14)
-#define BTRFS_MOUNT_ENOSPC_DEBUG (1 << 15)
-#define BTRFS_MOUNT_AUTO_DEFRAG (1 << 16)
-#define BTRFS_MOUNT_INODE_MAP_CACHE (1 << 17)
-#define BTRFS_MOUNT_RECOVERY (1 << 18)
-#define BTRFS_MOUNT_SKIP_BALANCE (1 << 19)
-#define BTRFS_MOUNT_CHECK_INTEGRITY (1 << 20)
-#define BTRFS_MOUNT_CHECK_INTEGRITY_INCLUDING_EXTENT_DATA (1 << 21)
-#define BTRFS_MOUNT_PANIC_ON_FATAL_ERROR (1 << 22)
-
-#define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt)
-#define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt)
-#define btrfs_test_opt(root, opt) ((root)->fs_info->mount_opt & \
- BTRFS_MOUNT_##opt)
-/*
- * Inode flags
- */
-#define BTRFS_INODE_NODATASUM (1 << 0)
-#define BTRFS_INODE_NODATACOW (1 << 1)
-#define BTRFS_INODE_READONLY (1 << 2)
-#define BTRFS_INODE_NOCOMPRESS (1 << 3)
-#define BTRFS_INODE_PREALLOC (1 << 4)
-#define BTRFS_INODE_SYNC (1 << 5)
-#define BTRFS_INODE_IMMUTABLE (1 << 6)
-#define BTRFS_INODE_APPEND (1 << 7)
-#define BTRFS_INODE_NODUMP (1 << 8)
-#define BTRFS_INODE_NOATIME (1 << 9)
-#define BTRFS_INODE_DIRSYNC (1 << 10)
-#define BTRFS_INODE_COMPRESS (1 << 11)
-
-#define BTRFS_INODE_ROOT_ITEM_INIT (1 << 31)
-
-struct btrfs_map_token {
- struct extent_buffer *eb;
- char *kaddr;
- unsigned long offset;
-};
-
-static inline void btrfs_init_map_token (struct btrfs_map_token *token)
-{
- memset(token, 0, sizeof(*token));
-}
-
-/* some macros to generate set/get funcs for the struct fields. This
- * assumes there is a lefoo_to_cpu for every type, so lets make a simple
- * one for u8:
- */
-#define le8_to_cpu(v) (v)
-#define cpu_to_le8(v) (v)
-#define __le8 u8
-
-#define read_eb_member(eb, ptr, type, member, result) ( \
- read_extent_buffer(eb, (char *)(result), \
- ((unsigned long)(ptr)) + \
- offsetof(type, member), \
- sizeof(((type *)0)->member)))
-
-#define write_eb_member(eb, ptr, type, member, result) ( \
- write_extent_buffer(eb, (char *)(result), \
- ((unsigned long)(ptr)) + \
- offsetof(type, member), \
- sizeof(((type *)0)->member)))
-
-#ifndef BTRFS_SETGET_FUNCS
-#define BTRFS_SETGET_FUNCS(name, type, member, bits) \
-u##bits btrfs_##name(struct extent_buffer *eb, type *s); \
-u##bits btrfs_token_##name(struct extent_buffer *eb, type *s, struct btrfs_map_token *token); \
-void btrfs_set_token_##name(struct extent_buffer *eb, type *s, u##bits val, struct btrfs_map_token *token);\
-void btrfs_set_##name(struct extent_buffer *eb, type *s, u##bits val);
-#endif
-
-#define BTRFS_SETGET_HEADER_FUNCS(name, type, member, bits) \
-static inline u##bits btrfs_##name(struct extent_buffer *eb) \
-{ \
- type *p = page_address(eb->pages[0]); \
- u##bits res = le##bits##_to_cpu(p->member); \
- return res; \
-} \
-static inline void btrfs_set_##name(struct extent_buffer *eb, \
- u##bits val) \
-{ \
- type *p = page_address(eb->pages[0]); \
- p->member = cpu_to_le##bits(val); \
-}
-
-#define BTRFS_SETGET_STACK_FUNCS(name, type, member, bits) \
-static inline u##bits btrfs_##name(type *s) \
-{ \
- return le##bits##_to_cpu(s->member); \
-} \
-static inline void btrfs_set_##name(type *s, u##bits val) \
-{ \
- s->member = cpu_to_le##bits(val); \
-}
-
-BTRFS_SETGET_FUNCS(device_type, struct btrfs_dev_item, type, 64);
-BTRFS_SETGET_FUNCS(device_total_bytes, struct btrfs_dev_item, total_bytes, 64);
-BTRFS_SETGET_FUNCS(device_bytes_used, struct btrfs_dev_item, bytes_used, 64);
-BTRFS_SETGET_FUNCS(device_io_align, struct btrfs_dev_item, io_align, 32);
-BTRFS_SETGET_FUNCS(device_io_width, struct btrfs_dev_item, io_width, 32);
-BTRFS_SETGET_FUNCS(device_start_offset, struct btrfs_dev_item,
- start_offset, 64);
-BTRFS_SETGET_FUNCS(device_sector_size, struct btrfs_dev_item, sector_size, 32);
-BTRFS_SETGET_FUNCS(device_id, struct btrfs_dev_item, devid, 64);
-BTRFS_SETGET_FUNCS(device_group, struct btrfs_dev_item, dev_group, 32);
-BTRFS_SETGET_FUNCS(device_seek_speed, struct btrfs_dev_item, seek_speed, 8);
-BTRFS_SETGET_FUNCS(device_bandwidth, struct btrfs_dev_item, bandwidth, 8);
-BTRFS_SETGET_FUNCS(device_generation, struct btrfs_dev_item, generation, 64);
-
-BTRFS_SETGET_STACK_FUNCS(stack_device_type, struct btrfs_dev_item, type, 64);
-BTRFS_SETGET_STACK_FUNCS(stack_device_total_bytes, struct btrfs_dev_item,
- total_bytes, 64);
-BTRFS_SETGET_STACK_FUNCS(stack_device_bytes_used, struct btrfs_dev_item,
- bytes_used, 64);
-BTRFS_SETGET_STACK_FUNCS(stack_device_io_align, struct btrfs_dev_item,
- io_align, 32);
-BTRFS_SETGET_STACK_FUNCS(stack_device_io_width, struct btrfs_dev_item,
- io_width, 32);
-BTRFS_SETGET_STACK_FUNCS(stack_device_sector_size, struct btrfs_dev_item,
- sector_size, 32);
-BTRFS_SETGET_STACK_FUNCS(stack_device_id, struct btrfs_dev_item, devid, 64);
-BTRFS_SETGET_STACK_FUNCS(stack_device_group, struct btrfs_dev_item,
- dev_group, 32);
-BTRFS_SETGET_STACK_FUNCS(stack_device_seek_speed, struct btrfs_dev_item,
- seek_speed, 8);
-BTRFS_SETGET_STACK_FUNCS(stack_device_bandwidth, struct btrfs_dev_item,
- bandwidth, 8);
-BTRFS_SETGET_STACK_FUNCS(stack_device_generation, struct btrfs_dev_item,
- generation, 64);
-
-static inline char *btrfs_device_uuid(struct btrfs_dev_item *d)
-{
- return (char *)d + offsetof(struct btrfs_dev_item, uuid);
-}
-
-static inline char *btrfs_device_fsid(struct btrfs_dev_item *d)
-{
- return (char *)d + offsetof(struct btrfs_dev_item, fsid);
-}
-
-BTRFS_SETGET_FUNCS(chunk_length, struct btrfs_chunk, length, 64);
-BTRFS_SETGET_FUNCS(chunk_owner, struct btrfs_chunk, owner, 64);
-BTRFS_SETGET_FUNCS(chunk_stripe_len, struct btrfs_chunk, stripe_len, 64);
-BTRFS_SETGET_FUNCS(chunk_io_align, struct btrfs_chunk, io_align, 32);
-BTRFS_SETGET_FUNCS(chunk_io_width, struct btrfs_chunk, io_width, 32);
-BTRFS_SETGET_FUNCS(chunk_sector_size, struct btrfs_chunk, sector_size, 32);
-BTRFS_SETGET_FUNCS(chunk_type, struct btrfs_chunk, type, 64);
-BTRFS_SETGET_FUNCS(chunk_num_stripes, struct btrfs_chunk, num_stripes, 16);
-BTRFS_SETGET_FUNCS(chunk_sub_stripes, struct btrfs_chunk, sub_stripes, 16);
-BTRFS_SETGET_FUNCS(stripe_devid, struct btrfs_stripe, devid, 64);
-BTRFS_SETGET_FUNCS(stripe_offset, struct btrfs_stripe, offset, 64);
-
-static inline char *btrfs_stripe_dev_uuid(struct btrfs_stripe *s)
-{
- return (char *)s + offsetof(struct btrfs_stripe, dev_uuid);
-}
-
-BTRFS_SETGET_STACK_FUNCS(stack_chunk_length, struct btrfs_chunk, length, 64);
-BTRFS_SETGET_STACK_FUNCS(stack_chunk_owner, struct btrfs_chunk, owner, 64);
-BTRFS_SETGET_STACK_FUNCS(stack_chunk_stripe_len, struct btrfs_chunk,
- stripe_len, 64);
-BTRFS_SETGET_STACK_FUNCS(stack_chunk_io_align, struct btrfs_chunk,
- io_align, 32);
-BTRFS_SETGET_STACK_FUNCS(stack_chunk_io_width, struct btrfs_chunk,
- io_width, 32);
-BTRFS_SETGET_STACK_FUNCS(stack_chunk_sector_size, struct btrfs_chunk,
- sector_size, 32);
-BTRFS_SETGET_STACK_FUNCS(stack_chunk_type, struct btrfs_chunk, type, 64);
-BTRFS_SETGET_STACK_FUNCS(stack_chunk_num_stripes, struct btrfs_chunk,
- num_stripes, 16);
-BTRFS_SETGET_STACK_FUNCS(stack_chunk_sub_stripes, struct btrfs_chunk,
- sub_stripes, 16);
-BTRFS_SETGET_STACK_FUNCS(stack_stripe_devid, struct btrfs_stripe, devid, 64);
-BTRFS_SETGET_STACK_FUNCS(stack_stripe_offset, struct btrfs_stripe, offset, 64);
-
-static inline struct btrfs_stripe *btrfs_stripe_nr(struct btrfs_chunk *c,
- int nr)
-{
- unsigned long offset = (unsigned long)c;
- offset += offsetof(struct btrfs_chunk, stripe);
- offset += nr * sizeof(struct btrfs_stripe);
- return (struct btrfs_stripe *)offset;
-}
-
-static inline char *btrfs_stripe_dev_uuid_nr(struct btrfs_chunk *c, int nr)
-{
- return btrfs_stripe_dev_uuid(btrfs_stripe_nr(c, nr));
-}
-
-static inline u64 btrfs_stripe_offset_nr(struct extent_buffer *eb,
- struct btrfs_chunk *c, int nr)
-{
- return btrfs_stripe_offset(eb, btrfs_stripe_nr(c, nr));
-}
-
-static inline u64 btrfs_stripe_devid_nr(struct extent_buffer *eb,
- struct btrfs_chunk *c, int nr)
-{
- return btrfs_stripe_devid(eb, btrfs_stripe_nr(c, nr));
-}
-
-/* struct btrfs_block_group_item */
-BTRFS_SETGET_STACK_FUNCS(block_group_used, struct btrfs_block_group_item,
- used, 64);
-BTRFS_SETGET_FUNCS(disk_block_group_used, struct btrfs_block_group_item,
- used, 64);
-BTRFS_SETGET_STACK_FUNCS(block_group_chunk_objectid,
- struct btrfs_block_group_item, chunk_objectid, 64);
-
-BTRFS_SETGET_FUNCS(disk_block_group_chunk_objectid,
- struct btrfs_block_group_item, chunk_objectid, 64);
-BTRFS_SETGET_FUNCS(disk_block_group_flags,
- struct btrfs_block_group_item, flags, 64);
-BTRFS_SETGET_STACK_FUNCS(block_group_flags,
- struct btrfs_block_group_item, flags, 64);
-
-/* struct btrfs_inode_ref */
-BTRFS_SETGET_FUNCS(inode_ref_name_len, struct btrfs_inode_ref, name_len, 16);
-BTRFS_SETGET_FUNCS(inode_ref_index, struct btrfs_inode_ref, index, 64);
-
-/* struct btrfs_inode_item */
-BTRFS_SETGET_FUNCS(inode_generation, struct btrfs_inode_item, generation, 64);
-BTRFS_SETGET_FUNCS(inode_sequence, struct btrfs_inode_item, sequence, 64);
-BTRFS_SETGET_FUNCS(inode_transid, struct btrfs_inode_item, transid, 64);
-BTRFS_SETGET_FUNCS(inode_size, struct btrfs_inode_item, size, 64);
-BTRFS_SETGET_FUNCS(inode_nbytes, struct btrfs_inode_item, nbytes, 64);
-BTRFS_SETGET_FUNCS(inode_block_group, struct btrfs_inode_item, block_group, 64);
-BTRFS_SETGET_FUNCS(inode_nlink, struct btrfs_inode_item, nlink, 32);
-BTRFS_SETGET_FUNCS(inode_uid, struct btrfs_inode_item, uid, 32);
-BTRFS_SETGET_FUNCS(inode_gid, struct btrfs_inode_item, gid, 32);
-BTRFS_SETGET_FUNCS(inode_mode, struct btrfs_inode_item, mode, 32);
-BTRFS_SETGET_FUNCS(inode_rdev, struct btrfs_inode_item, rdev, 64);
-BTRFS_SETGET_FUNCS(inode_flags, struct btrfs_inode_item, flags, 64);
-
-static inline struct btrfs_timespec *
-btrfs_inode_atime(struct btrfs_inode_item *inode_item)
-{
- unsigned long ptr = (unsigned long)inode_item;
- ptr += offsetof(struct btrfs_inode_item, atime);
- return (struct btrfs_timespec *)ptr;
-}
-
-static inline struct btrfs_timespec *
-btrfs_inode_mtime(struct btrfs_inode_item *inode_item)
-{
- unsigned long ptr = (unsigned long)inode_item;
- ptr += offsetof(struct btrfs_inode_item, mtime);
- return (struct btrfs_timespec *)ptr;
-}
-
-static inline struct btrfs_timespec *
-btrfs_inode_ctime(struct btrfs_inode_item *inode_item)
-{
- unsigned long ptr = (unsigned long)inode_item;
- ptr += offsetof(struct btrfs_inode_item, ctime);
- return (struct btrfs_timespec *)ptr;
-}
-
-BTRFS_SETGET_FUNCS(timespec_sec, struct btrfs_timespec, sec, 64);
-BTRFS_SETGET_FUNCS(timespec_nsec, struct btrfs_timespec, nsec, 32);
-
-/* struct btrfs_dev_extent */
-BTRFS_SETGET_FUNCS(dev_extent_chunk_tree, struct btrfs_dev_extent,
- chunk_tree, 64);
-BTRFS_SETGET_FUNCS(dev_extent_chunk_objectid, struct btrfs_dev_extent,
- chunk_objectid, 64);
-BTRFS_SETGET_FUNCS(dev_extent_chunk_offset, struct btrfs_dev_extent,
- chunk_offset, 64);
-BTRFS_SETGET_FUNCS(dev_extent_length, struct btrfs_dev_extent, length, 64);
-
-static inline u8 *btrfs_dev_extent_chunk_tree_uuid(struct btrfs_dev_extent *dev)
-{
- unsigned long ptr = offsetof(struct btrfs_dev_extent, chunk_tree_uuid);
- return (u8 *)((unsigned long)dev + ptr);
-}
-
-BTRFS_SETGET_FUNCS(extent_refs, struct btrfs_extent_item, refs, 64);
-BTRFS_SETGET_FUNCS(extent_generation, struct btrfs_extent_item,
- generation, 64);
-BTRFS_SETGET_FUNCS(extent_flags, struct btrfs_extent_item, flags, 64);
-
-BTRFS_SETGET_FUNCS(extent_refs_v0, struct btrfs_extent_item_v0, refs, 32);
-
-
-BTRFS_SETGET_FUNCS(tree_block_level, struct btrfs_tree_block_info, level, 8);
-
-static inline void btrfs_tree_block_key(struct extent_buffer *eb,
- struct btrfs_tree_block_info *item,
- struct btrfs_disk_key *key)
-{
- read_eb_member(eb, item, struct btrfs_tree_block_info, key, key);
-}
-
-static inline void btrfs_set_tree_block_key(struct extent_buffer *eb,
- struct btrfs_tree_block_info *item,
- struct btrfs_disk_key *key)
-{
- write_eb_member(eb, item, struct btrfs_tree_block_info, key, key);
-}
-
-BTRFS_SETGET_FUNCS(extent_data_ref_root, struct btrfs_extent_data_ref,
- root, 64);
-BTRFS_SETGET_FUNCS(extent_data_ref_objectid, struct btrfs_extent_data_ref,
- objectid, 64);
-BTRFS_SETGET_FUNCS(extent_data_ref_offset, struct btrfs_extent_data_ref,
- offset, 64);
-BTRFS_SETGET_FUNCS(extent_data_ref_count, struct btrfs_extent_data_ref,
- count, 32);
-
-BTRFS_SETGET_FUNCS(shared_data_ref_count, struct btrfs_shared_data_ref,
- count, 32);
-
-BTRFS_SETGET_FUNCS(extent_inline_ref_type, struct btrfs_extent_inline_ref,
- type, 8);
-BTRFS_SETGET_FUNCS(extent_inline_ref_offset, struct btrfs_extent_inline_ref,
- offset, 64);
-
-static inline u32 btrfs_extent_inline_ref_size(int type)
-{
- if (type == BTRFS_TREE_BLOCK_REF_KEY ||
- type == BTRFS_SHARED_BLOCK_REF_KEY)
- return sizeof(struct btrfs_extent_inline_ref);
- if (type == BTRFS_SHARED_DATA_REF_KEY)
- return sizeof(struct btrfs_shared_data_ref) +
- sizeof(struct btrfs_extent_inline_ref);
- if (type == BTRFS_EXTENT_DATA_REF_KEY)
- return sizeof(struct btrfs_extent_data_ref) +
- offsetof(struct btrfs_extent_inline_ref, offset);
- BUG();
- return 0;
-}
-
-BTRFS_SETGET_FUNCS(ref_root_v0, struct btrfs_extent_ref_v0, root, 64);
-BTRFS_SETGET_FUNCS(ref_generation_v0, struct btrfs_extent_ref_v0,
- generation, 64);
-BTRFS_SETGET_FUNCS(ref_objectid_v0, struct btrfs_extent_ref_v0, objectid, 64);
-BTRFS_SETGET_FUNCS(ref_count_v0, struct btrfs_extent_ref_v0, count, 32);
-
-/* struct btrfs_node */
-BTRFS_SETGET_FUNCS(key_blockptr, struct btrfs_key_ptr, blockptr, 64);
-BTRFS_SETGET_FUNCS(key_generation, struct btrfs_key_ptr, generation, 64);
-
-static inline u64 btrfs_node_blockptr(struct extent_buffer *eb, int nr)
-{
- unsigned long ptr;
- ptr = offsetof(struct btrfs_node, ptrs) +
- sizeof(struct btrfs_key_ptr) * nr;
- return btrfs_key_blockptr(eb, (struct btrfs_key_ptr *)ptr);
-}
-
-static inline void btrfs_set_node_blockptr(struct extent_buffer *eb,
- int nr, u64 val)
-{
- unsigned long ptr;
- ptr = offsetof(struct btrfs_node, ptrs) +
- sizeof(struct btrfs_key_ptr) * nr;
- btrfs_set_key_blockptr(eb, (struct btrfs_key_ptr *)ptr, val);
-}
-
-static inline u64 btrfs_node_ptr_generation(struct extent_buffer *eb, int nr)
-{
- unsigned long ptr;
- ptr = offsetof(struct btrfs_node, ptrs) +
- sizeof(struct btrfs_key_ptr) * nr;
- return btrfs_key_generation(eb, (struct btrfs_key_ptr *)ptr);
-}
-
-static inline void btrfs_set_node_ptr_generation(struct extent_buffer *eb,
- int nr, u64 val)
-{
- unsigned long ptr;
- ptr = offsetof(struct btrfs_node, ptrs) +
- sizeof(struct btrfs_key_ptr) * nr;
- btrfs_set_key_generation(eb, (struct btrfs_key_ptr *)ptr, val);
-}
-
-static inline unsigned long btrfs_node_key_ptr_offset(int nr)
-{
- return offsetof(struct btrfs_node, ptrs) +
- sizeof(struct btrfs_key_ptr) * nr;
-}
-
-void btrfs_node_key(struct extent_buffer *eb,
- struct btrfs_disk_key *disk_key, int nr);
-
-static inline void btrfs_set_node_key(struct extent_buffer *eb,
- struct btrfs_disk_key *disk_key, int nr)
-{
- unsigned long ptr;
- ptr = btrfs_node_key_ptr_offset(nr);
- write_eb_member(eb, (struct btrfs_key_ptr *)ptr,
- struct btrfs_key_ptr, key, disk_key);
-}
-
-/* struct btrfs_item */
-BTRFS_SETGET_FUNCS(item_offset, struct btrfs_item, offset, 32);
-BTRFS_SETGET_FUNCS(item_size, struct btrfs_item, size, 32);
-
-static inline unsigned long btrfs_item_nr_offset(int nr)
-{
- return offsetof(struct btrfs_leaf, items) +
- sizeof(struct btrfs_item) * nr;
-}
-
-static inline struct btrfs_item *btrfs_item_nr(struct extent_buffer *eb,
- int nr)
-{
- return (struct btrfs_item *)btrfs_item_nr_offset(nr);
-}
-
-static inline u32 btrfs_item_end(struct extent_buffer *eb,
- struct btrfs_item *item)
-{
- return btrfs_item_offset(eb, item) + btrfs_item_size(eb, item);
-}
-
-static inline u32 btrfs_item_end_nr(struct extent_buffer *eb, int nr)
-{
- return btrfs_item_end(eb, btrfs_item_nr(eb, nr));
-}
-
-static inline u32 btrfs_item_offset_nr(struct extent_buffer *eb, int nr)
-{
- return btrfs_item_offset(eb, btrfs_item_nr(eb, nr));
-}
-
-static inline u32 btrfs_item_size_nr(struct extent_buffer *eb, int nr)
-{
- return btrfs_item_size(eb, btrfs_item_nr(eb, nr));
-}
-
-static inline void btrfs_item_key(struct extent_buffer *eb,
- struct btrfs_disk_key *disk_key, int nr)
-{
- struct btrfs_item *item = btrfs_item_nr(eb, nr);
- read_eb_member(eb, item, struct btrfs_item, key, disk_key);
-}
-
-static inline void btrfs_set_item_key(struct extent_buffer *eb,
- struct btrfs_disk_key *disk_key, int nr)
-{
- struct btrfs_item *item = btrfs_item_nr(eb, nr);
- write_eb_member(eb, item, struct btrfs_item, key, disk_key);
-}
-
-BTRFS_SETGET_FUNCS(dir_log_end, struct btrfs_dir_log_item, end, 64);
-
-/*
- * struct btrfs_root_ref
- */
-BTRFS_SETGET_FUNCS(root_ref_dirid, struct btrfs_root_ref, dirid, 64);
-BTRFS_SETGET_FUNCS(root_ref_sequence, struct btrfs_root_ref, sequence, 64);
-BTRFS_SETGET_FUNCS(root_ref_name_len, struct btrfs_root_ref, name_len, 16);
-
-/* struct btrfs_dir_item */
-BTRFS_SETGET_FUNCS(dir_data_len, struct btrfs_dir_item, data_len, 16);
-BTRFS_SETGET_FUNCS(dir_type, struct btrfs_dir_item, type, 8);
-BTRFS_SETGET_FUNCS(dir_name_len, struct btrfs_dir_item, name_len, 16);
-BTRFS_SETGET_FUNCS(dir_transid, struct btrfs_dir_item, transid, 64);
-
-static inline void btrfs_dir_item_key(struct extent_buffer *eb,
- struct btrfs_dir_item *item,
- struct btrfs_disk_key *key)
-{
- read_eb_member(eb, item, struct btrfs_dir_item, location, key);
-}
-
-static inline void btrfs_set_dir_item_key(struct extent_buffer *eb,
- struct btrfs_dir_item *item,
- struct btrfs_disk_key *key)
-{
- write_eb_member(eb, item, struct btrfs_dir_item, location, key);
-}
-
-BTRFS_SETGET_FUNCS(free_space_entries, struct btrfs_free_space_header,
- num_entries, 64);
-BTRFS_SETGET_FUNCS(free_space_bitmaps, struct btrfs_free_space_header,
- num_bitmaps, 64);
-BTRFS_SETGET_FUNCS(free_space_generation, struct btrfs_free_space_header,
- generation, 64);
-
-static inline void btrfs_free_space_key(struct extent_buffer *eb,
- struct btrfs_free_space_header *h,
- struct btrfs_disk_key *key)
-{
- read_eb_member(eb, h, struct btrfs_free_space_header, location, key);
-}
-
-static inline void btrfs_set_free_space_key(struct extent_buffer *eb,
- struct btrfs_free_space_header *h,
- struct btrfs_disk_key *key)
-{
- write_eb_member(eb, h, struct btrfs_free_space_header, location, key);
-}
-
-/* struct btrfs_disk_key */
-BTRFS_SETGET_STACK_FUNCS(disk_key_objectid, struct btrfs_disk_key,
- objectid, 64);
-BTRFS_SETGET_STACK_FUNCS(disk_key_offset, struct btrfs_disk_key, offset, 64);
-BTRFS_SETGET_STACK_FUNCS(disk_key_type, struct btrfs_disk_key, type, 8);
-
-static inline void btrfs_disk_key_to_cpu(struct btrfs_key *cpu,
- struct btrfs_disk_key *disk)
-{
- cpu->offset = le64_to_cpu(disk->offset);
- cpu->type = disk->type;
- cpu->objectid = le64_to_cpu(disk->objectid);
-}
-
-static inline void btrfs_cpu_key_to_disk(struct btrfs_disk_key *disk,
- struct btrfs_key *cpu)
-{
- disk->offset = cpu_to_le64(cpu->offset);
- disk->type = cpu->type;
- disk->objectid = cpu_to_le64(cpu->objectid);
-}
-
-static inline void btrfs_node_key_to_cpu(struct extent_buffer *eb,
- struct btrfs_key *key, int nr)
-{
- struct btrfs_disk_key disk_key;
- btrfs_node_key(eb, &disk_key, nr);
- btrfs_disk_key_to_cpu(key, &disk_key);
-}
-
-static inline void btrfs_item_key_to_cpu(struct extent_buffer *eb,
- struct btrfs_key *key, int nr)
-{
- struct btrfs_disk_key disk_key;
- btrfs_item_key(eb, &disk_key, nr);
- btrfs_disk_key_to_cpu(key, &disk_key);
-}
-
-static inline void btrfs_dir_item_key_to_cpu(struct extent_buffer *eb,
- struct btrfs_dir_item *item,
- struct btrfs_key *key)
-{
- struct btrfs_disk_key disk_key;
- btrfs_dir_item_key(eb, item, &disk_key);
- btrfs_disk_key_to_cpu(key, &disk_key);
-}
-
-
-static inline u8 btrfs_key_type(struct btrfs_key *key)
-{
- return key->type;
-}
-
-static inline void btrfs_set_key_type(struct btrfs_key *key, u8 val)
-{
- key->type = val;
-}
-
-/* struct btrfs_header */
-BTRFS_SETGET_HEADER_FUNCS(header_bytenr, struct btrfs_header, bytenr, 64);
-BTRFS_SETGET_HEADER_FUNCS(header_generation, struct btrfs_header,
- generation, 64);
-BTRFS_SETGET_HEADER_FUNCS(header_owner, struct btrfs_header, owner, 64);
-BTRFS_SETGET_HEADER_FUNCS(header_nritems, struct btrfs_header, nritems, 32);
-BTRFS_SETGET_HEADER_FUNCS(header_flags, struct btrfs_header, flags, 64);
-BTRFS_SETGET_HEADER_FUNCS(header_level, struct btrfs_header, level, 8);
-
-static inline int btrfs_header_flag(struct extent_buffer *eb, u64 flag)
-{
- return (btrfs_header_flags(eb) & flag) == flag;
-}
-
-static inline int btrfs_set_header_flag(struct extent_buffer *eb, u64 flag)
-{
- u64 flags = btrfs_header_flags(eb);
- btrfs_set_header_flags(eb, flags | flag);
- return (flags & flag) == flag;
-}
-
-static inline int btrfs_clear_header_flag(struct extent_buffer *eb, u64 flag)
-{
- u64 flags = btrfs_header_flags(eb);
- btrfs_set_header_flags(eb, flags & ~flag);
- return (flags & flag) == flag;
-}
-
-static inline int btrfs_header_backref_rev(struct extent_buffer *eb)
-{
- u64 flags = btrfs_header_flags(eb);
- return flags >> BTRFS_BACKREF_REV_SHIFT;
-}
-
-static inline void btrfs_set_header_backref_rev(struct extent_buffer *eb,
- int rev)
-{
- u64 flags = btrfs_header_flags(eb);
- flags &= ~BTRFS_BACKREF_REV_MASK;
- flags |= (u64)rev << BTRFS_BACKREF_REV_SHIFT;
- btrfs_set_header_flags(eb, flags);
-}
-
-static inline u8 *btrfs_header_fsid(struct extent_buffer *eb)
-{
- unsigned long ptr = offsetof(struct btrfs_header, fsid);
- return (u8 *)ptr;
-}
-
-static inline u8 *btrfs_header_chunk_tree_uuid(struct extent_buffer *eb)
-{
- unsigned long ptr = offsetof(struct btrfs_header, chunk_tree_uuid);
- return (u8 *)ptr;
-}
-
-static inline int btrfs_is_leaf(struct extent_buffer *eb)
-{
- return btrfs_header_level(eb) == 0;
-}
-
-/* struct btrfs_root_item */
-BTRFS_SETGET_FUNCS(disk_root_generation, struct btrfs_root_item,
- generation, 64);
-BTRFS_SETGET_FUNCS(disk_root_refs, struct btrfs_root_item, refs, 32);
-BTRFS_SETGET_FUNCS(disk_root_bytenr, struct btrfs_root_item, bytenr, 64);
-BTRFS_SETGET_FUNCS(disk_root_level, struct btrfs_root_item, level, 8);
-
-BTRFS_SETGET_STACK_FUNCS(root_generation, struct btrfs_root_item,
- generation, 64);
-BTRFS_SETGET_STACK_FUNCS(root_bytenr, struct btrfs_root_item, bytenr, 64);
-BTRFS_SETGET_STACK_FUNCS(root_level, struct btrfs_root_item, level, 8);
-BTRFS_SETGET_STACK_FUNCS(root_dirid, struct btrfs_root_item, root_dirid, 64);
-BTRFS_SETGET_STACK_FUNCS(root_refs, struct btrfs_root_item, refs, 32);
-BTRFS_SETGET_STACK_FUNCS(root_flags, struct btrfs_root_item, flags, 64);
-BTRFS_SETGET_STACK_FUNCS(root_used, struct btrfs_root_item, bytes_used, 64);
-BTRFS_SETGET_STACK_FUNCS(root_limit, struct btrfs_root_item, byte_limit, 64);
-BTRFS_SETGET_STACK_FUNCS(root_last_snapshot, struct btrfs_root_item,
- last_snapshot, 64);
-
-static inline bool btrfs_root_readonly(struct btrfs_root *root)
-{
- return (root->root_item.flags & cpu_to_le64(BTRFS_ROOT_SUBVOL_RDONLY)) != 0;
-}
-
-/* struct btrfs_root_backup */
-BTRFS_SETGET_STACK_FUNCS(backup_tree_root, struct btrfs_root_backup,
- tree_root, 64);
-BTRFS_SETGET_STACK_FUNCS(backup_tree_root_gen, struct btrfs_root_backup,
- tree_root_gen, 64);
-BTRFS_SETGET_STACK_FUNCS(backup_tree_root_level, struct btrfs_root_backup,
- tree_root_level, 8);
-
-BTRFS_SETGET_STACK_FUNCS(backup_chunk_root, struct btrfs_root_backup,
- chunk_root, 64);
-BTRFS_SETGET_STACK_FUNCS(backup_chunk_root_gen, struct btrfs_root_backup,
- chunk_root_gen, 64);
-BTRFS_SETGET_STACK_FUNCS(backup_chunk_root_level, struct btrfs_root_backup,
- chunk_root_level, 8);
-
-BTRFS_SETGET_STACK_FUNCS(backup_extent_root, struct btrfs_root_backup,
- extent_root, 64);
-BTRFS_SETGET_STACK_FUNCS(backup_extent_root_gen, struct btrfs_root_backup,
- extent_root_gen, 64);
-BTRFS_SETGET_STACK_FUNCS(backup_extent_root_level, struct btrfs_root_backup,
- extent_root_level, 8);
-
-BTRFS_SETGET_STACK_FUNCS(backup_fs_root, struct btrfs_root_backup,
- fs_root, 64);
-BTRFS_SETGET_STACK_FUNCS(backup_fs_root_gen, struct btrfs_root_backup,
- fs_root_gen, 64);
-BTRFS_SETGET_STACK_FUNCS(backup_fs_root_level, struct btrfs_root_backup,
- fs_root_level, 8);
-
-BTRFS_SETGET_STACK_FUNCS(backup_dev_root, struct btrfs_root_backup,
- dev_root, 64);
-BTRFS_SETGET_STACK_FUNCS(backup_dev_root_gen, struct btrfs_root_backup,
- dev_root_gen, 64);
-BTRFS_SETGET_STACK_FUNCS(backup_dev_root_level, struct btrfs_root_backup,
- dev_root_level, 8);
-
-BTRFS_SETGET_STACK_FUNCS(backup_csum_root, struct btrfs_root_backup,
- csum_root, 64);
-BTRFS_SETGET_STACK_FUNCS(backup_csum_root_gen, struct btrfs_root_backup,
- csum_root_gen, 64);
-BTRFS_SETGET_STACK_FUNCS(backup_csum_root_level, struct btrfs_root_backup,
- csum_root_level, 8);
-BTRFS_SETGET_STACK_FUNCS(backup_total_bytes, struct btrfs_root_backup,
- total_bytes, 64);
-BTRFS_SETGET_STACK_FUNCS(backup_bytes_used, struct btrfs_root_backup,
- bytes_used, 64);
-BTRFS_SETGET_STACK_FUNCS(backup_num_devices, struct btrfs_root_backup,
- num_devices, 64);
-
-/* struct btrfs_balance_item */
-BTRFS_SETGET_FUNCS(balance_flags, struct btrfs_balance_item, flags, 64);
-
-static inline void btrfs_balance_data(struct extent_buffer *eb,
- struct btrfs_balance_item *bi,
- struct btrfs_disk_balance_args *ba)
-{
- read_eb_member(eb, bi, struct btrfs_balance_item, data, ba);
-}
-
-static inline void btrfs_set_balance_data(struct extent_buffer *eb,
- struct btrfs_balance_item *bi,
- struct btrfs_disk_balance_args *ba)
-{
- write_eb_member(eb, bi, struct btrfs_balance_item, data, ba);
-}
-
-static inline void btrfs_balance_meta(struct extent_buffer *eb,
- struct btrfs_balance_item *bi,
- struct btrfs_disk_balance_args *ba)
-{
- read_eb_member(eb, bi, struct btrfs_balance_item, meta, ba);
-}
-
-static inline void btrfs_set_balance_meta(struct extent_buffer *eb,
- struct btrfs_balance_item *bi,
- struct btrfs_disk_balance_args *ba)
-{
- write_eb_member(eb, bi, struct btrfs_balance_item, meta, ba);
-}
-
-static inline void btrfs_balance_sys(struct extent_buffer *eb,
- struct btrfs_balance_item *bi,
- struct btrfs_disk_balance_args *ba)
-{
- read_eb_member(eb, bi, struct btrfs_balance_item, sys, ba);
-}
-
-static inline void btrfs_set_balance_sys(struct extent_buffer *eb,
- struct btrfs_balance_item *bi,
- struct btrfs_disk_balance_args *ba)
-{
- write_eb_member(eb, bi, struct btrfs_balance_item, sys, ba);
-}
-
-static inline void
-btrfs_disk_balance_args_to_cpu(struct btrfs_balance_args *cpu,
- struct btrfs_disk_balance_args *disk)
-{
- memset(cpu, 0, sizeof(*cpu));
-
- cpu->profiles = le64_to_cpu(disk->profiles);
- cpu->usage = le64_to_cpu(disk->usage);
- cpu->devid = le64_to_cpu(disk->devid);
- cpu->pstart = le64_to_cpu(disk->pstart);
- cpu->pend = le64_to_cpu(disk->pend);
- cpu->vstart = le64_to_cpu(disk->vstart);
- cpu->vend = le64_to_cpu(disk->vend);
- cpu->target = le64_to_cpu(disk->target);
- cpu->flags = le64_to_cpu(disk->flags);
-}
-
-static inline void
-btrfs_cpu_balance_args_to_disk(struct btrfs_disk_balance_args *disk,
- struct btrfs_balance_args *cpu)
-{
- memset(disk, 0, sizeof(*disk));
-
- disk->profiles = cpu_to_le64(cpu->profiles);
- disk->usage = cpu_to_le64(cpu->usage);
- disk->devid = cpu_to_le64(cpu->devid);
- disk->pstart = cpu_to_le64(cpu->pstart);
- disk->pend = cpu_to_le64(cpu->pend);
- disk->vstart = cpu_to_le64(cpu->vstart);
- disk->vend = cpu_to_le64(cpu->vend);
- disk->target = cpu_to_le64(cpu->target);
- disk->flags = cpu_to_le64(cpu->flags);
-}
-
-/* struct btrfs_super_block */
-BTRFS_SETGET_STACK_FUNCS(super_bytenr, struct btrfs_super_block, bytenr, 64);
-BTRFS_SETGET_STACK_FUNCS(super_flags, struct btrfs_super_block, flags, 64);
-BTRFS_SETGET_STACK_FUNCS(super_generation, struct btrfs_super_block,
- generation, 64);
-BTRFS_SETGET_STACK_FUNCS(super_root, struct btrfs_super_block, root, 64);
-BTRFS_SETGET_STACK_FUNCS(super_sys_array_size,
- struct btrfs_super_block, sys_chunk_array_size, 32);
-BTRFS_SETGET_STACK_FUNCS(super_chunk_root_generation,
- struct btrfs_super_block, chunk_root_generation, 64);
-BTRFS_SETGET_STACK_FUNCS(super_root_level, struct btrfs_super_block,
- root_level, 8);
-BTRFS_SETGET_STACK_FUNCS(super_chunk_root, struct btrfs_super_block,
- chunk_root, 64);
-BTRFS_SETGET_STACK_FUNCS(super_chunk_root_level, struct btrfs_super_block,
- chunk_root_level, 8);
-BTRFS_SETGET_STACK_FUNCS(super_log_root, struct btrfs_super_block,
- log_root, 64);
-BTRFS_SETGET_STACK_FUNCS(super_log_root_transid, struct btrfs_super_block,
- log_root_transid, 64);
-BTRFS_SETGET_STACK_FUNCS(super_log_root_level, struct btrfs_super_block,
- log_root_level, 8);
-BTRFS_SETGET_STACK_FUNCS(super_total_bytes, struct btrfs_super_block,
- total_bytes, 64);
-BTRFS_SETGET_STACK_FUNCS(super_bytes_used, struct btrfs_super_block,
- bytes_used, 64);
-BTRFS_SETGET_STACK_FUNCS(super_sectorsize, struct btrfs_super_block,
- sectorsize, 32);
-BTRFS_SETGET_STACK_FUNCS(super_nodesize, struct btrfs_super_block,
- nodesize, 32);
-BTRFS_SETGET_STACK_FUNCS(super_leafsize, struct btrfs_super_block,
- leafsize, 32);
-BTRFS_SETGET_STACK_FUNCS(super_stripesize, struct btrfs_super_block,
- stripesize, 32);
-BTRFS_SETGET_STACK_FUNCS(super_root_dir, struct btrfs_super_block,
- root_dir_objectid, 64);
-BTRFS_SETGET_STACK_FUNCS(super_num_devices, struct btrfs_super_block,
- num_devices, 64);
-BTRFS_SETGET_STACK_FUNCS(super_compat_flags, struct btrfs_super_block,
- compat_flags, 64);
-BTRFS_SETGET_STACK_FUNCS(super_compat_ro_flags, struct btrfs_super_block,
- compat_ro_flags, 64);
-BTRFS_SETGET_STACK_FUNCS(super_incompat_flags, struct btrfs_super_block,
- incompat_flags, 64);
-BTRFS_SETGET_STACK_FUNCS(super_csum_type, struct btrfs_super_block,
- csum_type, 16);
-BTRFS_SETGET_STACK_FUNCS(super_cache_generation, struct btrfs_super_block,
- cache_generation, 64);
-
-static inline int btrfs_super_csum_size(struct btrfs_super_block *s)
-{
- int t = btrfs_super_csum_type(s);
- BUG_ON(t >= ARRAY_SIZE(btrfs_csum_sizes));
- return btrfs_csum_sizes[t];
-}
-
-static inline unsigned long btrfs_leaf_data(struct extent_buffer *l)
-{
- return offsetof(struct btrfs_leaf, items);
-}
-
-/* struct btrfs_file_extent_item */
-BTRFS_SETGET_FUNCS(file_extent_type, struct btrfs_file_extent_item, type, 8);
-
-static inline unsigned long
-btrfs_file_extent_inline_start(struct btrfs_file_extent_item *e)
-{
- unsigned long offset = (unsigned long)e;
- offset += offsetof(struct btrfs_file_extent_item, disk_bytenr);
- return offset;
-}
-
-static inline u32 btrfs_file_extent_calc_inline_size(u32 datasize)
-{
- return offsetof(struct btrfs_file_extent_item, disk_bytenr) + datasize;
-}
-
-BTRFS_SETGET_FUNCS(file_extent_disk_bytenr, struct btrfs_file_extent_item,
- disk_bytenr, 64);
-BTRFS_SETGET_FUNCS(file_extent_generation, struct btrfs_file_extent_item,
- generation, 64);
-BTRFS_SETGET_FUNCS(file_extent_disk_num_bytes, struct btrfs_file_extent_item,
- disk_num_bytes, 64);
-BTRFS_SETGET_FUNCS(file_extent_offset, struct btrfs_file_extent_item,
- offset, 64);
-BTRFS_SETGET_FUNCS(file_extent_num_bytes, struct btrfs_file_extent_item,
- num_bytes, 64);
-BTRFS_SETGET_FUNCS(file_extent_ram_bytes, struct btrfs_file_extent_item,
- ram_bytes, 64);
-BTRFS_SETGET_FUNCS(file_extent_compression, struct btrfs_file_extent_item,
- compression, 8);
-BTRFS_SETGET_FUNCS(file_extent_encryption, struct btrfs_file_extent_item,
- encryption, 8);
-BTRFS_SETGET_FUNCS(file_extent_other_encoding, struct btrfs_file_extent_item,
- other_encoding, 16);
-
-/* this returns the number of file bytes represented by the inline item.
- * If an item is compressed, this is the uncompressed size
- */
-static inline u32 btrfs_file_extent_inline_len(struct extent_buffer *eb,
- struct btrfs_file_extent_item *e)
-{
- return btrfs_file_extent_ram_bytes(eb, e);
-}
-
-/*
- * this returns the number of bytes used by the item on disk, minus the
- * size of any extent headers. If a file is compressed on disk, this is
- * the compressed size
- */
-static inline u32 btrfs_file_extent_inline_item_len(struct extent_buffer *eb,
- struct btrfs_item *e)
-{
- unsigned long offset;
- offset = offsetof(struct btrfs_file_extent_item, disk_bytenr);
- return btrfs_item_size(eb, e) - offset;
-}
-
-static inline struct btrfs_fs_info *btrfs_sb(struct super_block *sb)
-{
- return sb->s_fs_info;
-}
-
-static inline u32 btrfs_level_size(struct btrfs_root *root, int level)
-{
- if (level == 0)
- return root->leafsize;
- return root->nodesize;
-}
-
-/* helper function to cast into the data area of the leaf. */
-#define btrfs_item_ptr(leaf, slot, type) \
- ((type *)(btrfs_leaf_data(leaf) + \
- btrfs_item_offset_nr(leaf, slot)))
-
-#define btrfs_item_ptr_offset(leaf, slot) \
- ((unsigned long)(btrfs_leaf_data(leaf) + \
- btrfs_item_offset_nr(leaf, slot)))
-
-static inline struct dentry *fdentry(struct file *file)
-{
- return file->f_path.dentry;
-}
-
-static inline bool btrfs_mixed_space_info(struct btrfs_space_info *space_info)
-{
- return ((space_info->flags & BTRFS_BLOCK_GROUP_METADATA) &&
- (space_info->flags & BTRFS_BLOCK_GROUP_DATA));
-}
-
-static inline gfp_t btrfs_alloc_write_mask(struct address_space *mapping)
-{
- return mapping_gfp_mask(mapping) & ~__GFP_FS;
-}
-
-/* extent-tree.c */
-static inline u64 btrfs_calc_trans_metadata_size(struct btrfs_root *root,
- unsigned num_items)
-{
- return (root->leafsize + root->nodesize * (BTRFS_MAX_LEVEL - 1)) *
- 3 * num_items;
-}
-
-/*
- * Doing a truncate won't result in new nodes or leaves, just what we need for
- * COW.
- */
-static inline u64 btrfs_calc_trunc_metadata_size(struct btrfs_root *root,
- unsigned num_items)
-{
- return (root->leafsize + root->nodesize * (BTRFS_MAX_LEVEL - 1)) *
- num_items;
-}
-
-void btrfs_put_block_group(struct btrfs_block_group_cache *cache);
-int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, unsigned long count);
-int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len);
-int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, u64 bytenr,
- u64 num_bytes, u64 *refs, u64 *flags);
-int btrfs_pin_extent(struct btrfs_root *root,
- u64 bytenr, u64 num, int reserved);
-int btrfs_pin_extent_for_log_replay(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- u64 bytenr, u64 num_bytes);
-int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- u64 objectid, u64 offset, u64 bytenr);
-struct btrfs_block_group_cache *btrfs_lookup_block_group(
- struct btrfs_fs_info *info,
- u64 bytenr);
-void btrfs_put_block_group(struct btrfs_block_group_cache *cache);
-u64 btrfs_find_block_group(struct btrfs_root *root,
- u64 search_start, u64 search_hint, int owner);
-struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, u32 blocksize,
- u64 parent, u64 root_objectid,
- struct btrfs_disk_key *key, int level,
- u64 hint, u64 empty_size, int for_cow);
-void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct extent_buffer *buf,
- u64 parent, int last_ref, int for_cow);
-struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- u64 bytenr, u32 blocksize,
- int level);
-int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- u64 root_objectid, u64 owner,
- u64 offset, struct btrfs_key *ins);
-int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- u64 root_objectid, u64 owner, u64 offset,
- struct btrfs_key *ins);
-int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- u64 num_bytes, u64 min_alloc_size,
- u64 empty_size, u64 hint_byte,
- struct btrfs_key *ins, u64 data);
-int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
- struct extent_buffer *buf, int full_backref, int for_cow);
-int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
- struct extent_buffer *buf, int full_backref, int for_cow);
-int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- u64 bytenr, u64 num_bytes, u64 flags,
- int is_data);
-int btrfs_free_extent(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid,
- u64 owner, u64 offset, int for_cow);
-
-int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len);
-int btrfs_free_and_pin_reserved_extent(struct btrfs_root *root,
- u64 start, u64 len);
-void btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans,
- struct btrfs_root *root);
-int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
- struct btrfs_root *root);
-int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- u64 bytenr, u64 num_bytes, u64 parent,
- u64 root_objectid, u64 owner, u64 offset, int for_cow);
-
-int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
- struct btrfs_root *root);
-int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr);
-int btrfs_free_block_groups(struct btrfs_fs_info *info);
-int btrfs_read_block_groups(struct btrfs_root *root);
-int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr);
-int btrfs_make_block_group(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, u64 bytes_used,
- u64 type, u64 chunk_objectid, u64 chunk_offset,
- u64 size);
-int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, u64 group_start);
-u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags);
-u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data);
-void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *ionde);
-void btrfs_clear_space_info_full(struct btrfs_fs_info *info);
-int btrfs_check_data_free_space(struct inode *inode, u64 bytes);
-void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes);
-void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans,
- struct btrfs_root *root);
-int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans,
- struct inode *inode);
-void btrfs_orphan_release_metadata(struct inode *inode);
-int btrfs_snap_reserve_metadata(struct btrfs_trans_handle *trans,
- struct btrfs_pending_snapshot *pending);
-int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes);
-void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes);
-int btrfs_delalloc_reserve_space(struct inode *inode, u64 num_bytes);
-void btrfs_delalloc_release_space(struct inode *inode, u64 num_bytes);
-void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv);
-struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_root *root);
-void btrfs_free_block_rsv(struct btrfs_root *root,
- struct btrfs_block_rsv *rsv);
-int btrfs_block_rsv_add(struct btrfs_root *root,
- struct btrfs_block_rsv *block_rsv,
- u64 num_bytes);
-int btrfs_block_rsv_add_noflush(struct btrfs_root *root,
- struct btrfs_block_rsv *block_rsv,
- u64 num_bytes);
-int btrfs_block_rsv_check(struct btrfs_root *root,
- struct btrfs_block_rsv *block_rsv, int min_factor);
-int btrfs_block_rsv_refill(struct btrfs_root *root,
- struct btrfs_block_rsv *block_rsv,
- u64 min_reserved);
-int btrfs_block_rsv_refill_noflush(struct btrfs_root *root,
- struct btrfs_block_rsv *block_rsv,
- u64 min_reserved);
-int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src_rsv,
- struct btrfs_block_rsv *dst_rsv,
- u64 num_bytes);
-void btrfs_block_rsv_release(struct btrfs_root *root,
- struct btrfs_block_rsv *block_rsv,
- u64 num_bytes);
-int btrfs_set_block_group_ro(struct btrfs_root *root,
- struct btrfs_block_group_cache *cache);
-void btrfs_set_block_group_rw(struct btrfs_root *root,
- struct btrfs_block_group_cache *cache);
-void btrfs_put_block_group_cache(struct btrfs_fs_info *info);
-u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo);
-int btrfs_error_unpin_extent_range(struct btrfs_root *root,
- u64 start, u64 end);
-int btrfs_error_discard_extent(struct btrfs_root *root, u64 bytenr,
- u64 num_bytes, u64 *actual_bytes);
-int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, u64 type);
-int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range);
-
-int btrfs_init_space_info(struct btrfs_fs_info *fs_info);
-/* ctree.c */
-int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key,
- int level, int *slot);
-int btrfs_comp_cpu_keys(struct btrfs_key *k1, struct btrfs_key *k2);
-int btrfs_previous_item(struct btrfs_root *root,
- struct btrfs_path *path, u64 min_objectid,
- int type);
-void btrfs_set_item_key_safe(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, struct btrfs_path *path,
- struct btrfs_key *new_key);
-struct extent_buffer *btrfs_root_node(struct btrfs_root *root);
-struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root);
-int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path,
- struct btrfs_key *key, int lowest_level,
- int cache_only, u64 min_trans);
-int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key,
- struct btrfs_key *max_key,
- struct btrfs_path *path, int cache_only,
- u64 min_trans);
-int btrfs_cow_block(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, struct extent_buffer *buf,
- struct extent_buffer *parent, int parent_slot,
- struct extent_buffer **cow_ret);
-int btrfs_copy_root(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct extent_buffer *buf,
- struct extent_buffer **cow_ret, u64 new_root_objectid);
-int btrfs_block_can_be_shared(struct btrfs_root *root,
- struct extent_buffer *buf);
-void btrfs_extend_item(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, struct btrfs_path *path,
- u32 data_size);
-void btrfs_truncate_item(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_path *path,
- u32 new_size, int from_end);
-int btrfs_split_item(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_path *path,
- struct btrfs_key *new_key,
- unsigned long split_offset);
-int btrfs_duplicate_item(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_path *path,
- struct btrfs_key *new_key);
-int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
- *root, struct btrfs_key *key, struct btrfs_path *p, int
- ins_len, int cow);
-int btrfs_realloc_node(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, struct extent_buffer *parent,
- int start_slot, int cache_only, u64 *last_ret,
- struct btrfs_key *progress);
-void btrfs_release_path(struct btrfs_path *p);
-struct btrfs_path *btrfs_alloc_path(void);
-void btrfs_free_path(struct btrfs_path *p);
-void btrfs_set_path_blocking(struct btrfs_path *p);
-void btrfs_clear_path_blocking(struct btrfs_path *p,
- struct extent_buffer *held, int held_rw);
-void btrfs_unlock_up_safe(struct btrfs_path *p, int level);
-
-int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
- struct btrfs_path *path, int slot, int nr);
-static inline int btrfs_del_item(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_path *path)
-{
- return btrfs_del_items(trans, root, path, path->slots[0], 1);
-}
-
-void setup_items_for_insert(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, struct btrfs_path *path,
- struct btrfs_key *cpu_key, u32 *data_size,
- u32 total_data, u32 total_size, int nr);
-int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root
- *root, struct btrfs_key *key, void *data, u32 data_size);
-int btrfs_insert_empty_items(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_path *path,
- struct btrfs_key *cpu_key, u32 *data_size, int nr);
-
-static inline int btrfs_insert_empty_item(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_path *path,
- struct btrfs_key *key,
- u32 data_size)
-{
- return btrfs_insert_empty_items(trans, root, path, key, &data_size, 1);
-}
-
-int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path);
-static inline int btrfs_next_item(struct btrfs_root *root, struct btrfs_path *p)
-{
- ++p->slots[0];
- if (p->slots[0] >= btrfs_header_nritems(p->nodes[0]))
- return btrfs_next_leaf(root, p);
- return 0;
-}
-int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path);
-int btrfs_leaf_free_space(struct btrfs_root *root, struct extent_buffer *leaf);
-int __must_check btrfs_drop_snapshot(struct btrfs_root *root,
- struct btrfs_block_rsv *block_rsv,
- int update_ref, int for_reloc);
-int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct extent_buffer *node,
- struct extent_buffer *parent);
-static inline int btrfs_fs_closing(struct btrfs_fs_info *fs_info)
-{
- /*
- * Get synced with close_ctree()
- */
- smp_mb();
- return fs_info->closing;
-}
-static inline void free_fs_info(struct btrfs_fs_info *fs_info)
-{
- kfree(fs_info->balance_ctl);
- kfree(fs_info->delayed_root);
- kfree(fs_info->extent_root);
- kfree(fs_info->tree_root);
- kfree(fs_info->chunk_root);
- kfree(fs_info->dev_root);
- kfree(fs_info->csum_root);
- kfree(fs_info->super_copy);
- kfree(fs_info->super_for_commit);
- kfree(fs_info);
-}
-
-/* root-item.c */
-int btrfs_find_root_ref(struct btrfs_root *tree_root,
- struct btrfs_path *path,
- u64 root_id, u64 ref_id);
-int btrfs_add_root_ref(struct btrfs_trans_handle *trans,
- struct btrfs_root *tree_root,
- u64 root_id, u64 ref_id, u64 dirid, u64 sequence,
- const char *name, int name_len);
-int btrfs_del_root_ref(struct btrfs_trans_handle *trans,
- struct btrfs_root *tree_root,
- u64 root_id, u64 ref_id, u64 dirid, u64 *sequence,
- const char *name, int name_len);
-int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root,
- struct btrfs_key *key);
-int btrfs_insert_root(struct btrfs_trans_handle *trans, struct btrfs_root
- *root, struct btrfs_key *key, struct btrfs_root_item
- *item);
-int __must_check btrfs_update_root(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_key *key,
- struct btrfs_root_item *item);
-int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, struct
- btrfs_root_item *item, struct btrfs_key *key);
-int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid);
-int btrfs_find_orphan_roots(struct btrfs_root *tree_root);
-void btrfs_set_root_node(struct btrfs_root_item *item,
- struct extent_buffer *node);
-void btrfs_check_and_init_root_item(struct btrfs_root_item *item);
-
-/* dir-item.c */
-int btrfs_insert_dir_item(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, const char *name,
- int name_len, struct inode *dir,
- struct btrfs_key *location, u8 type, u64 index);
-struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_path *path, u64 dir,
- const char *name, int name_len,
- int mod);
-struct btrfs_dir_item *
-btrfs_lookup_dir_index_item(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_path *path, u64 dir,
- u64 objectid, const char *name, int name_len,
- int mod);
-struct btrfs_dir_item *
-btrfs_search_dir_index_item(struct btrfs_root *root,
- struct btrfs_path *path, u64 dirid,
- const char *name, int name_len);
-struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_root *root,
- struct btrfs_path *path,
- const char *name, int name_len);
-int btrfs_delete_one_dir_name(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_path *path,
- struct btrfs_dir_item *di);
-int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_path *path, u64 objectid,
- const char *name, u16 name_len,
- const void *data, u16 data_len);
-struct btrfs_dir_item *btrfs_lookup_xattr(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_path *path, u64 dir,
- const char *name, u16 name_len,
- int mod);
-int verify_dir_item(struct btrfs_root *root,
- struct extent_buffer *leaf,
- struct btrfs_dir_item *dir_item);
-
-/* orphan.c */
-int btrfs_insert_orphan_item(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, u64 offset);
-int btrfs_del_orphan_item(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, u64 offset);
-int btrfs_find_orphan_item(struct btrfs_root *root, u64 offset);
-
-/* inode-item.c */
-int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- const char *name, int name_len,
- u64 inode_objectid, u64 ref_objectid, u64 index);
-int btrfs_del_inode_ref(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- const char *name, int name_len,
- u64 inode_objectid, u64 ref_objectid, u64 *index);
-struct btrfs_inode_ref *
-btrfs_lookup_inode_ref(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_path *path,
- const char *name, int name_len,
- u64 inode_objectid, u64 ref_objectid, int mod);
-int btrfs_insert_empty_inode(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_path *path, u64 objectid);
-int btrfs_lookup_inode(struct btrfs_trans_handle *trans, struct btrfs_root
- *root, struct btrfs_path *path,
- struct btrfs_key *location, int mod);
-
-/* file-item.c */
-int btrfs_del_csums(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, u64 bytenr, u64 len);
-int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode,
- struct bio *bio, u32 *dst);
-int btrfs_lookup_bio_sums_dio(struct btrfs_root *root, struct inode *inode,
- struct bio *bio, u64 logical_offset, u32 *dst);
-int btrfs_insert_file_extent(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- u64 objectid, u64 pos,
- u64 disk_offset, u64 disk_num_bytes,
- u64 num_bytes, u64 offset, u64 ram_bytes,
- u8 compression, u8 encryption, u16 other_encoding);
-int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_path *path, u64 objectid,
- u64 bytenr, int mod);
-int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_ordered_sum *sums);
-int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode,
- struct bio *bio, u64 file_start, int contig);
-struct btrfs_csum_item *btrfs_lookup_csum(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_path *path,
- u64 bytenr, int cow);
-int btrfs_csum_truncate(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, struct btrfs_path *path,
- u64 isize);
-int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
- struct list_head *list, int search_commit);
-/* inode.c */
-struct extent_map *btrfs_get_extent_fiemap(struct inode *inode, struct page *page,
- size_t pg_offset, u64 start, u64 len,
- int create);
-
-/* RHEL and EL kernels have a patch that renames PG_checked to FsMisc */
-#if defined(ClearPageFsMisc) && !defined(ClearPageChecked)
-#define ClearPageChecked ClearPageFsMisc
-#define SetPageChecked SetPageFsMisc
-#define PageChecked PageFsMisc
-#endif
-
-/* This forces readahead on a given range of bytes in an inode */
-static inline void btrfs_force_ra(struct address_space *mapping,
- struct file_ra_state *ra, struct file *file,
- pgoff_t offset, unsigned long req_size)
-{
- page_cache_sync_readahead(mapping, ra, file, offset, req_size);
-}
-
-struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry);
-int btrfs_set_inode_index(struct inode *dir, u64 *index);
-int btrfs_unlink_inode(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct inode *dir, struct inode *inode,
- const char *name, int name_len);
-int btrfs_add_link(struct btrfs_trans_handle *trans,
- struct inode *parent_inode, struct inode *inode,
- const char *name, int name_len, int add_backref, u64 index);
-int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct inode *dir, u64 objectid,
- const char *name, int name_len);
-int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct inode *inode, u64 new_size,
- u32 min_type);
-
-int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput);
-int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end,
- struct extent_state **cached_state);
-int btrfs_writepages(struct address_space *mapping,
- struct writeback_control *wbc);
-int btrfs_create_subvol_root(struct btrfs_trans_handle *trans,
- struct btrfs_root *new_root, u64 new_dirid);
-int btrfs_merge_bio_hook(struct page *page, unsigned long offset,
- size_t size, struct bio *bio, unsigned long bio_flags);
-
-int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
-int btrfs_readpage(struct file *file, struct page *page);
-void btrfs_evict_inode(struct inode *inode);
-int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc);
-int btrfs_dirty_inode(struct inode *inode);
-int btrfs_update_time(struct file *file);
-struct inode *btrfs_alloc_inode(struct super_block *sb);
-void btrfs_destroy_inode(struct inode *inode);
-int btrfs_drop_inode(struct inode *inode);
-int btrfs_init_cachep(void);
-void btrfs_destroy_cachep(void);
-long btrfs_ioctl_trans_end(struct file *file);
-struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
- struct btrfs_root *root, int *was_new);
-struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page,
- size_t pg_offset, u64 start, u64 end,
- int create);
-int btrfs_update_inode(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct inode *inode);
-int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode);
-int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode);
-int btrfs_orphan_cleanup(struct btrfs_root *root);
-void btrfs_orphan_commit_root(struct btrfs_trans_handle *trans,
- struct btrfs_root *root);
-int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size);
-void btrfs_invalidate_inodes(struct btrfs_root *root);
-void btrfs_add_delayed_iput(struct inode *inode);
-void btrfs_run_delayed_iputs(struct btrfs_root *root);
-int btrfs_prealloc_file_range(struct inode *inode, int mode,
- u64 start, u64 num_bytes, u64 min_size,
- loff_t actual_len, u64 *alloc_hint);
-int btrfs_prealloc_file_range_trans(struct inode *inode,
- struct btrfs_trans_handle *trans, int mode,
- u64 start, u64 num_bytes, u64 min_size,
- loff_t actual_len, u64 *alloc_hint);
-extern const struct dentry_operations btrfs_dentry_operations;
-
-/* ioctl.c */
-long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
-void btrfs_update_iflags(struct inode *inode);
-void btrfs_inherit_iflags(struct inode *inode, struct inode *dir);
-int btrfs_defrag_file(struct inode *inode, struct file *file,
- struct btrfs_ioctl_defrag_range_args *range,
- u64 newer_than, unsigned long max_pages);
-/* file.c */
-int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
- struct inode *inode);
-int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info);
-int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync);
-int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
- int skip_pinned);
-extern const struct file_operations btrfs_file_operations;
-int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct inode *inode,
- u64 start, u64 end, u64 *hint_byte, int drop_cache);
-int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
- struct inode *inode, u64 start, u64 end);
-int btrfs_release_file(struct inode *inode, struct file *file);
-void btrfs_drop_pages(struct page **pages, size_t num_pages);
-int btrfs_dirty_pages(struct btrfs_root *root, struct inode *inode,
- struct page **pages, size_t num_pages,
- loff_t pos, size_t write_bytes,
- struct extent_state **cached);
-
-/* tree-defrag.c */
-int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, int cache_only);
-
-/* sysfs.c */
-int btrfs_init_sysfs(void);
-void btrfs_exit_sysfs(void);
-
-/* xattr.c */
-ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size);
-
-/* super.c */
-int btrfs_parse_options(struct btrfs_root *root, char *options);
-int btrfs_sync_fs(struct super_block *sb, int wait);
-void btrfs_printk(struct btrfs_fs_info *fs_info, const char *fmt, ...);
-void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function,
- unsigned int line, int errno, const char *fmt, ...);
-
-void __btrfs_abort_transaction(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, const char *function,
- unsigned int line, int errno);
-
-#define btrfs_abort_transaction(trans, root, errno) \
-do { \
- __btrfs_abort_transaction(trans, root, __func__, \
- __LINE__, errno); \
-} while (0)
-
-#define btrfs_std_error(fs_info, errno) \
-do { \
- if ((errno)) \
- __btrfs_std_error((fs_info), __func__, \
- __LINE__, (errno), NULL); \
-} while (0)
-
-#define btrfs_error(fs_info, errno, fmt, args...) \
-do { \
- __btrfs_std_error((fs_info), __func__, __LINE__, \
- (errno), fmt, ##args); \
-} while (0)
-
-void __btrfs_panic(struct btrfs_fs_info *fs_info, const char *function,
- unsigned int line, int errno, const char *fmt, ...);
-
-#define btrfs_panic(fs_info, errno, fmt, args...) \
-do { \
- struct btrfs_fs_info *_i = (fs_info); \
- __btrfs_panic(_i, __func__, __LINE__, errno, fmt, ##args); \
- BUG_ON(!(_i->mount_opt & BTRFS_MOUNT_PANIC_ON_FATAL_ERROR)); \
-} while (0)
-
-/* acl.c */
-#ifdef CONFIG_BTRFS_FS_POSIX_ACL
-struct posix_acl *btrfs_get_acl(struct inode *inode, int type);
-int btrfs_init_acl(struct btrfs_trans_handle *trans,
- struct inode *inode, struct inode *dir);
-int btrfs_acl_chmod(struct inode *inode);
-#else
-#define btrfs_get_acl NULL
-static inline int btrfs_init_acl(struct btrfs_trans_handle *trans,
- struct inode *inode, struct inode *dir)
-{
- return 0;
-}
-static inline int btrfs_acl_chmod(struct inode *inode)
-{
- return 0;
-}
-#endif
-
-/* relocation.c */
-int btrfs_relocate_block_group(struct btrfs_root *root, u64 group_start);
-int btrfs_init_reloc_root(struct btrfs_trans_handle *trans,
- struct btrfs_root *root);
-int btrfs_update_reloc_root(struct btrfs_trans_handle *trans,
- struct btrfs_root *root);
-int btrfs_recover_relocation(struct btrfs_root *root);
-int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len);
-void btrfs_reloc_cow_block(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, struct extent_buffer *buf,
- struct extent_buffer *cow);
-void btrfs_reloc_pre_snapshot(struct btrfs_trans_handle *trans,
- struct btrfs_pending_snapshot *pending,
- u64 *bytes_to_reserve);
-int btrfs_reloc_post_snapshot(struct btrfs_trans_handle *trans,
- struct btrfs_pending_snapshot *pending);
-
-/* scrub.c */
-int btrfs_scrub_dev(struct btrfs_root *root, u64 devid, u64 start, u64 end,
- struct btrfs_scrub_progress *progress, int readonly);
-void btrfs_scrub_pause(struct btrfs_root *root);
-void btrfs_scrub_pause_super(struct btrfs_root *root);
-void btrfs_scrub_continue(struct btrfs_root *root);
-void btrfs_scrub_continue_super(struct btrfs_root *root);
-int __btrfs_scrub_cancel(struct btrfs_fs_info *info);
-int btrfs_scrub_cancel(struct btrfs_root *root);
-int btrfs_scrub_cancel_dev(struct btrfs_root *root, struct btrfs_device *dev);
-int btrfs_scrub_cancel_devid(struct btrfs_root *root, u64 devid);
-int btrfs_scrub_progress(struct btrfs_root *root, u64 devid,
- struct btrfs_scrub_progress *progress);
-
-/* reada.c */
-struct reada_control {
- struct btrfs_root *root; /* tree to prefetch */
- struct btrfs_key key_start;
- struct btrfs_key key_end; /* exclusive */
- atomic_t elems;
- struct kref refcnt;
- wait_queue_head_t wait;
-};
-struct reada_control *btrfs_reada_add(struct btrfs_root *root,
- struct btrfs_key *start, struct btrfs_key *end);
-int btrfs_reada_wait(void *handle);
-void btrfs_reada_detach(void *handle);
-int btree_readahead_hook(struct btrfs_root *root, struct extent_buffer *eb,
- u64 start, int err);
-
-#endif
diff --git a/ANDROID_3.4.5/fs/btrfs/delayed-inode.c b/ANDROID_3.4.5/fs/btrfs/delayed-inode.c
deleted file mode 100644
index 03e3748d..00000000
--- a/ANDROID_3.4.5/fs/btrfs/delayed-inode.c
+++ /dev/null
@@ -1,1881 +0,0 @@
-/*
- * Copyright (C) 2011 Fujitsu. All rights reserved.
- * Written by Miao Xie <miaox@cn.fujitsu.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License v2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
- */
-
-#include <linux/slab.h>
-#include "delayed-inode.h"
-#include "disk-io.h"
-#include "transaction.h"
-
-#define BTRFS_DELAYED_WRITEBACK 400
-#define BTRFS_DELAYED_BACKGROUND 100
-
-static struct kmem_cache *delayed_node_cache;
-
-int __init btrfs_delayed_inode_init(void)
-{
- delayed_node_cache = kmem_cache_create("delayed_node",
- sizeof(struct btrfs_delayed_node),
- 0,
- SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD,
- NULL);
- if (!delayed_node_cache)
- return -ENOMEM;
- return 0;
-}
-
-void btrfs_delayed_inode_exit(void)
-{
- if (delayed_node_cache)
- kmem_cache_destroy(delayed_node_cache);
-}
-
-static inline void btrfs_init_delayed_node(
- struct btrfs_delayed_node *delayed_node,
- struct btrfs_root *root, u64 inode_id)
-{
- delayed_node->root = root;
- delayed_node->inode_id = inode_id;
- atomic_set(&delayed_node->refs, 0);
- delayed_node->count = 0;
- delayed_node->in_list = 0;
- delayed_node->inode_dirty = 0;
- delayed_node->ins_root = RB_ROOT;
- delayed_node->del_root = RB_ROOT;
- mutex_init(&delayed_node->mutex);
- delayed_node->index_cnt = 0;
- INIT_LIST_HEAD(&delayed_node->n_list);
- INIT_LIST_HEAD(&delayed_node->p_list);
- delayed_node->bytes_reserved = 0;
-}
-
-static inline int btrfs_is_continuous_delayed_item(
- struct btrfs_delayed_item *item1,
- struct btrfs_delayed_item *item2)
-{
- if (item1->key.type == BTRFS_DIR_INDEX_KEY &&
- item1->key.objectid == item2->key.objectid &&
- item1->key.type == item2->key.type &&
- item1->key.offset + 1 == item2->key.offset)
- return 1;
- return 0;
-}
-
-static inline struct btrfs_delayed_root *btrfs_get_delayed_root(
- struct btrfs_root *root)
-{
- return root->fs_info->delayed_root;
-}
-
-static struct btrfs_delayed_node *btrfs_get_delayed_node(struct inode *inode)
-{
- struct btrfs_inode *btrfs_inode = BTRFS_I(inode);
- struct btrfs_root *root = btrfs_inode->root;
- u64 ino = btrfs_ino(inode);
- struct btrfs_delayed_node *node;
-
- node = ACCESS_ONCE(btrfs_inode->delayed_node);
- if (node) {
- atomic_inc(&node->refs);
- return node;
- }
-
- spin_lock(&root->inode_lock);
- node = radix_tree_lookup(&root->delayed_nodes_tree, ino);
- if (node) {
- if (btrfs_inode->delayed_node) {
- atomic_inc(&node->refs); /* can be accessed */
- BUG_ON(btrfs_inode->delayed_node != node);
- spin_unlock(&root->inode_lock);
- return node;
- }
- btrfs_inode->delayed_node = node;
- atomic_inc(&node->refs); /* can be accessed */
- atomic_inc(&node->refs); /* cached in the inode */
- spin_unlock(&root->inode_lock);
- return node;
- }
- spin_unlock(&root->inode_lock);
-
- return NULL;
-}
-
-/* Will return either the node or PTR_ERR(-ENOMEM) */
-static struct btrfs_delayed_node *btrfs_get_or_create_delayed_node(
- struct inode *inode)
-{
- struct btrfs_delayed_node *node;
- struct btrfs_inode *btrfs_inode = BTRFS_I(inode);
- struct btrfs_root *root = btrfs_inode->root;
- u64 ino = btrfs_ino(inode);
- int ret;
-
-again:
- node = btrfs_get_delayed_node(inode);
- if (node)
- return node;
-
- node = kmem_cache_alloc(delayed_node_cache, GFP_NOFS);
- if (!node)
- return ERR_PTR(-ENOMEM);
- btrfs_init_delayed_node(node, root, ino);
-
- atomic_inc(&node->refs); /* cached in the btrfs inode */
- atomic_inc(&node->refs); /* can be accessed */
-
- ret = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM);
- if (ret) {
- kmem_cache_free(delayed_node_cache, node);
- return ERR_PTR(ret);
- }
-
- spin_lock(&root->inode_lock);
- ret = radix_tree_insert(&root->delayed_nodes_tree, ino, node);
- if (ret == -EEXIST) {
- kmem_cache_free(delayed_node_cache, node);
- spin_unlock(&root->inode_lock);
- radix_tree_preload_end();
- goto again;
- }
- btrfs_inode->delayed_node = node;
- spin_unlock(&root->inode_lock);
- radix_tree_preload_end();
-
- return node;
-}
-
-/*
- * Call it when holding delayed_node->mutex
- *
- * If mod = 1, add this node into the prepared list.
- */
-static void btrfs_queue_delayed_node(struct btrfs_delayed_root *root,
- struct btrfs_delayed_node *node,
- int mod)
-{
- spin_lock(&root->lock);
- if (node->in_list) {
- if (!list_empty(&node->p_list))
- list_move_tail(&node->p_list, &root->prepare_list);
- else if (mod)
- list_add_tail(&node->p_list, &root->prepare_list);
- } else {
- list_add_tail(&node->n_list, &root->node_list);
- list_add_tail(&node->p_list, &root->prepare_list);
- atomic_inc(&node->refs); /* inserted into list */
- root->nodes++;
- node->in_list = 1;
- }
- spin_unlock(&root->lock);
-}
-
-/* Call it when holding delayed_node->mutex */
-static void btrfs_dequeue_delayed_node(struct btrfs_delayed_root *root,
- struct btrfs_delayed_node *node)
-{
- spin_lock(&root->lock);
- if (node->in_list) {
- root->nodes--;
- atomic_dec(&node->refs); /* not in the list */
- list_del_init(&node->n_list);
- if (!list_empty(&node->p_list))
- list_del_init(&node->p_list);
- node->in_list = 0;
- }
- spin_unlock(&root->lock);
-}
-
-struct btrfs_delayed_node *btrfs_first_delayed_node(
- struct btrfs_delayed_root *delayed_root)
-{
- struct list_head *p;
- struct btrfs_delayed_node *node = NULL;
-
- spin_lock(&delayed_root->lock);
- if (list_empty(&delayed_root->node_list))
- goto out;
-
- p = delayed_root->node_list.next;
- node = list_entry(p, struct btrfs_delayed_node, n_list);
- atomic_inc(&node->refs);
-out:
- spin_unlock(&delayed_root->lock);
-
- return node;
-}
-
-struct btrfs_delayed_node *btrfs_next_delayed_node(
- struct btrfs_delayed_node *node)
-{
- struct btrfs_delayed_root *delayed_root;
- struct list_head *p;
- struct btrfs_delayed_node *next = NULL;
-
- delayed_root = node->root->fs_info->delayed_root;
- spin_lock(&delayed_root->lock);
- if (!node->in_list) { /* not in the list */
- if (list_empty(&delayed_root->node_list))
- goto out;
- p = delayed_root->node_list.next;
- } else if (list_is_last(&node->n_list, &delayed_root->node_list))
- goto out;
- else
- p = node->n_list.next;
-
- next = list_entry(p, struct btrfs_delayed_node, n_list);
- atomic_inc(&next->refs);
-out:
- spin_unlock(&delayed_root->lock);
-
- return next;
-}
-
-static void __btrfs_release_delayed_node(
- struct btrfs_delayed_node *delayed_node,
- int mod)
-{
- struct btrfs_delayed_root *delayed_root;
-
- if (!delayed_node)
- return;
-
- delayed_root = delayed_node->root->fs_info->delayed_root;
-
- mutex_lock(&delayed_node->mutex);
- if (delayed_node->count)
- btrfs_queue_delayed_node(delayed_root, delayed_node, mod);
- else
- btrfs_dequeue_delayed_node(delayed_root, delayed_node);
- mutex_unlock(&delayed_node->mutex);
-
- if (atomic_dec_and_test(&delayed_node->refs)) {
- struct btrfs_root *root = delayed_node->root;
- spin_lock(&root->inode_lock);
- if (atomic_read(&delayed_node->refs) == 0) {
- radix_tree_delete(&root->delayed_nodes_tree,
- delayed_node->inode_id);
- kmem_cache_free(delayed_node_cache, delayed_node);
- }
- spin_unlock(&root->inode_lock);
- }
-}
-
-static inline void btrfs_release_delayed_node(struct btrfs_delayed_node *node)
-{
- __btrfs_release_delayed_node(node, 0);
-}
-
-struct btrfs_delayed_node *btrfs_first_prepared_delayed_node(
- struct btrfs_delayed_root *delayed_root)
-{
- struct list_head *p;
- struct btrfs_delayed_node *node = NULL;
-
- spin_lock(&delayed_root->lock);
- if (list_empty(&delayed_root->prepare_list))
- goto out;
-
- p = delayed_root->prepare_list.next;
- list_del_init(p);
- node = list_entry(p, struct btrfs_delayed_node, p_list);
- atomic_inc(&node->refs);
-out:
- spin_unlock(&delayed_root->lock);
-
- return node;
-}
-
-static inline void btrfs_release_prepared_delayed_node(
- struct btrfs_delayed_node *node)
-{
- __btrfs_release_delayed_node(node, 1);
-}
-
-struct btrfs_delayed_item *btrfs_alloc_delayed_item(u32 data_len)
-{
- struct btrfs_delayed_item *item;
- item = kmalloc(sizeof(*item) + data_len, GFP_NOFS);
- if (item) {
- item->data_len = data_len;
- item->ins_or_del = 0;
- item->bytes_reserved = 0;
- item->delayed_node = NULL;
- atomic_set(&item->refs, 1);
- }
- return item;
-}
-
-/*
- * __btrfs_lookup_delayed_item - look up the delayed item by key
- * @delayed_node: pointer to the delayed node
- * @key: the key to look up
- * @prev: used to store the prev item if the right item isn't found
- * @next: used to store the next item if the right item isn't found
- *
- * Note: if we don't find the right item, we will return the prev item and
- * the next item.
- */
-static struct btrfs_delayed_item *__btrfs_lookup_delayed_item(
- struct rb_root *root,
- struct btrfs_key *key,
- struct btrfs_delayed_item **prev,
- struct btrfs_delayed_item **next)
-{
- struct rb_node *node, *prev_node = NULL;
- struct btrfs_delayed_item *delayed_item = NULL;
- int ret = 0;
-
- node = root->rb_node;
-
- while (node) {
- delayed_item = rb_entry(node, struct btrfs_delayed_item,
- rb_node);
- prev_node = node;
- ret = btrfs_comp_cpu_keys(&delayed_item->key, key);
- if (ret < 0)
- node = node->rb_right;
- else if (ret > 0)
- node = node->rb_left;
- else
- return delayed_item;
- }
-
- if (prev) {
- if (!prev_node)
- *prev = NULL;
- else if (ret < 0)
- *prev = delayed_item;
- else if ((node = rb_prev(prev_node)) != NULL) {
- *prev = rb_entry(node, struct btrfs_delayed_item,
- rb_node);
- } else
- *prev = NULL;
- }
-
- if (next) {
- if (!prev_node)
- *next = NULL;
- else if (ret > 0)
- *next = delayed_item;
- else if ((node = rb_next(prev_node)) != NULL) {
- *next = rb_entry(node, struct btrfs_delayed_item,
- rb_node);
- } else
- *next = NULL;
- }
- return NULL;
-}
-
-struct btrfs_delayed_item *__btrfs_lookup_delayed_insertion_item(
- struct btrfs_delayed_node *delayed_node,
- struct btrfs_key *key)
-{
- struct btrfs_delayed_item *item;
-
- item = __btrfs_lookup_delayed_item(&delayed_node->ins_root, key,
- NULL, NULL);
- return item;
-}
-
-struct btrfs_delayed_item *__btrfs_lookup_delayed_deletion_item(
- struct btrfs_delayed_node *delayed_node,
- struct btrfs_key *key)
-{
- struct btrfs_delayed_item *item;
-
- item = __btrfs_lookup_delayed_item(&delayed_node->del_root, key,
- NULL, NULL);
- return item;
-}
-
-struct btrfs_delayed_item *__btrfs_search_delayed_insertion_item(
- struct btrfs_delayed_node *delayed_node,
- struct btrfs_key *key)
-{
- struct btrfs_delayed_item *item, *next;
-
- item = __btrfs_lookup_delayed_item(&delayed_node->ins_root, key,
- NULL, &next);
- if (!item)
- item = next;
-
- return item;
-}
-
-struct btrfs_delayed_item *__btrfs_search_delayed_deletion_item(
- struct btrfs_delayed_node *delayed_node,
- struct btrfs_key *key)
-{
- struct btrfs_delayed_item *item, *next;
-
- item = __btrfs_lookup_delayed_item(&delayed_node->del_root, key,
- NULL, &next);
- if (!item)
- item = next;
-
- return item;
-}
-
-static int __btrfs_add_delayed_item(struct btrfs_delayed_node *delayed_node,
- struct btrfs_delayed_item *ins,
- int action)
-{
- struct rb_node **p, *node;
- struct rb_node *parent_node = NULL;
- struct rb_root *root;
- struct btrfs_delayed_item *item;
- int cmp;
-
- if (action == BTRFS_DELAYED_INSERTION_ITEM)
- root = &delayed_node->ins_root;
- else if (action == BTRFS_DELAYED_DELETION_ITEM)
- root = &delayed_node->del_root;
- else
- BUG();
- p = &root->rb_node;
- node = &ins->rb_node;
-
- while (*p) {
- parent_node = *p;
- item = rb_entry(parent_node, struct btrfs_delayed_item,
- rb_node);
-
- cmp = btrfs_comp_cpu_keys(&item->key, &ins->key);
- if (cmp < 0)
- p = &(*p)->rb_right;
- else if (cmp > 0)
- p = &(*p)->rb_left;
- else
- return -EEXIST;
- }
-
- rb_link_node(node, parent_node, p);
- rb_insert_color(node, root);
- ins->delayed_node = delayed_node;
- ins->ins_or_del = action;
-
- if (ins->key.type == BTRFS_DIR_INDEX_KEY &&
- action == BTRFS_DELAYED_INSERTION_ITEM &&
- ins->key.offset >= delayed_node->index_cnt)
- delayed_node->index_cnt = ins->key.offset + 1;
-
- delayed_node->count++;
- atomic_inc(&delayed_node->root->fs_info->delayed_root->items);
- return 0;
-}
-
-static int __btrfs_add_delayed_insertion_item(struct btrfs_delayed_node *node,
- struct btrfs_delayed_item *item)
-{
- return __btrfs_add_delayed_item(node, item,
- BTRFS_DELAYED_INSERTION_ITEM);
-}
-
-static int __btrfs_add_delayed_deletion_item(struct btrfs_delayed_node *node,
- struct btrfs_delayed_item *item)
-{
- return __btrfs_add_delayed_item(node, item,
- BTRFS_DELAYED_DELETION_ITEM);
-}
-
-static void __btrfs_remove_delayed_item(struct btrfs_delayed_item *delayed_item)
-{
- struct rb_root *root;
- struct btrfs_delayed_root *delayed_root;
-
- delayed_root = delayed_item->delayed_node->root->fs_info->delayed_root;
-
- BUG_ON(!delayed_root);
- BUG_ON(delayed_item->ins_or_del != BTRFS_DELAYED_DELETION_ITEM &&
- delayed_item->ins_or_del != BTRFS_DELAYED_INSERTION_ITEM);
-
- if (delayed_item->ins_or_del == BTRFS_DELAYED_INSERTION_ITEM)
- root = &delayed_item->delayed_node->ins_root;
- else
- root = &delayed_item->delayed_node->del_root;
-
- rb_erase(&delayed_item->rb_node, root);
- delayed_item->delayed_node->count--;
- atomic_dec(&delayed_root->items);
- if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND &&
- waitqueue_active(&delayed_root->wait))
- wake_up(&delayed_root->wait);
-}
-
-static void btrfs_release_delayed_item(struct btrfs_delayed_item *item)
-{
- if (item) {
- __btrfs_remove_delayed_item(item);
- if (atomic_dec_and_test(&item->refs))
- kfree(item);
- }
-}
-
-struct btrfs_delayed_item *__btrfs_first_delayed_insertion_item(
- struct btrfs_delayed_node *delayed_node)
-{
- struct rb_node *p;
- struct btrfs_delayed_item *item = NULL;
-
- p = rb_first(&delayed_node->ins_root);
- if (p)
- item = rb_entry(p, struct btrfs_delayed_item, rb_node);
-
- return item;
-}
-
-struct btrfs_delayed_item *__btrfs_first_delayed_deletion_item(
- struct btrfs_delayed_node *delayed_node)
-{
- struct rb_node *p;
- struct btrfs_delayed_item *item = NULL;
-
- p = rb_first(&delayed_node->del_root);
- if (p)
- item = rb_entry(p, struct btrfs_delayed_item, rb_node);
-
- return item;
-}
-
-struct btrfs_delayed_item *__btrfs_next_delayed_item(
- struct btrfs_delayed_item *item)
-{
- struct rb_node *p;
- struct btrfs_delayed_item *next = NULL;
-
- p = rb_next(&item->rb_node);
- if (p)
- next = rb_entry(p, struct btrfs_delayed_item, rb_node);
-
- return next;
-}
-
-static inline struct btrfs_root *btrfs_get_fs_root(struct btrfs_root *root,
- u64 root_id)
-{
- struct btrfs_key root_key;
-
- if (root->objectid == root_id)
- return root;
-
- root_key.objectid = root_id;
- root_key.type = BTRFS_ROOT_ITEM_KEY;
- root_key.offset = (u64)-1;
- return btrfs_read_fs_root_no_name(root->fs_info, &root_key);
-}
-
-static int btrfs_delayed_item_reserve_metadata(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_delayed_item *item)
-{
- struct btrfs_block_rsv *src_rsv;
- struct btrfs_block_rsv *dst_rsv;
- u64 num_bytes;
- int ret;
-
- if (!trans->bytes_reserved)
- return 0;
-
- src_rsv = trans->block_rsv;
- dst_rsv = &root->fs_info->delayed_block_rsv;
-
- num_bytes = btrfs_calc_trans_metadata_size(root, 1);
- ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes);
- if (!ret) {
- trace_btrfs_space_reservation(root->fs_info, "delayed_item",
- item->key.objectid,
- num_bytes, 1);
- item->bytes_reserved = num_bytes;
- }
-
- return ret;
-}
-
-static void btrfs_delayed_item_release_metadata(struct btrfs_root *root,
- struct btrfs_delayed_item *item)
-{
- struct btrfs_block_rsv *rsv;
-
- if (!item->bytes_reserved)
- return;
-
- rsv = &root->fs_info->delayed_block_rsv;
- trace_btrfs_space_reservation(root->fs_info, "delayed_item",
- item->key.objectid, item->bytes_reserved,
- 0);
- btrfs_block_rsv_release(root, rsv,
- item->bytes_reserved);
-}
-
-static int btrfs_delayed_inode_reserve_metadata(
- struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct inode *inode,
- struct btrfs_delayed_node *node)
-{
- struct btrfs_block_rsv *src_rsv;
- struct btrfs_block_rsv *dst_rsv;
- u64 num_bytes;
- int ret;
- bool release = false;
-
- src_rsv = trans->block_rsv;
- dst_rsv = &root->fs_info->delayed_block_rsv;
-
- num_bytes = btrfs_calc_trans_metadata_size(root, 1);
-
- /*
- * btrfs_dirty_inode will update the inode under btrfs_join_transaction
- * which doesn't reserve space for speed. This is a problem since we
- * still need to reserve space for this update, so try to reserve the
- * space.
- *
- * Now if src_rsv == delalloc_block_rsv we'll let it just steal since
- * we're accounted for.
- */
- if (!src_rsv || (!trans->bytes_reserved &&
- src_rsv != &root->fs_info->delalloc_block_rsv)) {
- ret = btrfs_block_rsv_add_noflush(root, dst_rsv, num_bytes);
- /*
- * Since we're under a transaction reserve_metadata_bytes could
- * try to commit the transaction which will make it return
- * EAGAIN to make us stop the transaction we have, so return
- * ENOSPC instead so that btrfs_dirty_inode knows what to do.
- */
- if (ret == -EAGAIN)
- ret = -ENOSPC;
- if (!ret) {
- node->bytes_reserved = num_bytes;
- trace_btrfs_space_reservation(root->fs_info,
- "delayed_inode",
- btrfs_ino(inode),
- num_bytes, 1);
- }
- return ret;
- } else if (src_rsv == &root->fs_info->delalloc_block_rsv) {
- spin_lock(&BTRFS_I(inode)->lock);
- if (BTRFS_I(inode)->delalloc_meta_reserved) {
- BTRFS_I(inode)->delalloc_meta_reserved = 0;
- spin_unlock(&BTRFS_I(inode)->lock);
- release = true;
- goto migrate;
- }
- spin_unlock(&BTRFS_I(inode)->lock);
-
- /* Ok we didn't have space pre-reserved. This shouldn't happen
- * too often but it can happen if we do delalloc to an existing
- * inode which gets dirtied because of the time update, and then
- * isn't touched again until after the transaction commits and
- * then we try to write out the data. First try to be nice and
- * reserve something strictly for us. If not be a pain and try
- * to steal from the delalloc block rsv.
- */
- ret = btrfs_block_rsv_add_noflush(root, dst_rsv, num_bytes);
- if (!ret)
- goto out;
-
- ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes);
- if (!ret)
- goto out;
-
- /*
- * Ok this is a problem, let's just steal from the global rsv
- * since this really shouldn't happen that often.
- */
- WARN_ON(1);
- ret = btrfs_block_rsv_migrate(&root->fs_info->global_block_rsv,
- dst_rsv, num_bytes);
- goto out;
- }
-
-migrate:
- ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes);
-
-out:
- /*
- * Migrate only takes a reservation, it doesn't touch the size of the
- * block_rsv. This is to simplify people who don't normally have things
- * migrated from their block rsv. If they go to release their
- * reservation, that will decrease the size as well, so if migrate
- * reduced size we'd end up with a negative size. But for the
- * delalloc_meta_reserved stuff we will only know to drop 1 reservation,
- * but we could in fact do this reserve/migrate dance several times
- * between the time we did the original reservation and we'd clean it
- * up. So to take care of this, release the space for the meta
- * reservation here. I think it may be time for a documentation page on
- * how block rsvs. work.
- */
- if (!ret) {
- trace_btrfs_space_reservation(root->fs_info, "delayed_inode",
- btrfs_ino(inode), num_bytes, 1);
- node->bytes_reserved = num_bytes;
- }
-
- if (release) {
- trace_btrfs_space_reservation(root->fs_info, "delalloc",
- btrfs_ino(inode), num_bytes, 0);
- btrfs_block_rsv_release(root, src_rsv, num_bytes);
- }
-
- return ret;
-}
-
-static void btrfs_delayed_inode_release_metadata(struct btrfs_root *root,
- struct btrfs_delayed_node *node)
-{
- struct btrfs_block_rsv *rsv;
-
- if (!node->bytes_reserved)
- return;
-
- rsv = &root->fs_info->delayed_block_rsv;
- trace_btrfs_space_reservation(root->fs_info, "delayed_inode",
- node->inode_id, node->bytes_reserved, 0);
- btrfs_block_rsv_release(root, rsv,
- node->bytes_reserved);
- node->bytes_reserved = 0;
-}
-
-/*
- * This helper will insert some continuous items into the same leaf according
- * to the free space of the leaf.
- */
-static int btrfs_batch_insert_items(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_path *path,
- struct btrfs_delayed_item *item)
-{
- struct btrfs_delayed_item *curr, *next;
- int free_space;
- int total_data_size = 0, total_size = 0;
- struct extent_buffer *leaf;
- char *data_ptr;
- struct btrfs_key *keys;
- u32 *data_size;
- struct list_head head;
- int slot;
- int nitems;
- int i;
- int ret = 0;
-
- BUG_ON(!path->nodes[0]);
-
- leaf = path->nodes[0];
- free_space = btrfs_leaf_free_space(root, leaf);
- INIT_LIST_HEAD(&head);
-
- next = item;
- nitems = 0;
-
- /*
- * count the number of the continuous items that we can insert in batch
- */
- while (total_size + next->data_len + sizeof(struct btrfs_item) <=
- free_space) {
- total_data_size += next->data_len;
- total_size += next->data_len + sizeof(struct btrfs_item);
- list_add_tail(&next->tree_list, &head);
- nitems++;
-
- curr = next;
- next = __btrfs_next_delayed_item(curr);
- if (!next)
- break;
-
- if (!btrfs_is_continuous_delayed_item(curr, next))
- break;
- }
-
- if (!nitems) {
- ret = 0;
- goto out;
- }
-
- /*
- * we need allocate some memory space, but it might cause the task
- * to sleep, so we set all locked nodes in the path to blocking locks
- * first.
- */
- btrfs_set_path_blocking(path);
-
- keys = kmalloc(sizeof(struct btrfs_key) * nitems, GFP_NOFS);
- if (!keys) {
- ret = -ENOMEM;
- goto out;
- }
-
- data_size = kmalloc(sizeof(u32) * nitems, GFP_NOFS);
- if (!data_size) {
- ret = -ENOMEM;
- goto error;
- }
-
- /* get keys of all the delayed items */
- i = 0;
- list_for_each_entry(next, &head, tree_list) {
- keys[i] = next->key;
- data_size[i] = next->data_len;
- i++;
- }
-
- /* reset all the locked nodes in the patch to spinning locks. */
- btrfs_clear_path_blocking(path, NULL, 0);
-
- /* insert the keys of the items */
- setup_items_for_insert(trans, root, path, keys, data_size,
- total_data_size, total_size, nitems);
-
- /* insert the dir index items */
- slot = path->slots[0];
- list_for_each_entry_safe(curr, next, &head, tree_list) {
- data_ptr = btrfs_item_ptr(leaf, slot, char);
- write_extent_buffer(leaf, &curr->data,
- (unsigned long)data_ptr,
- curr->data_len);
- slot++;
-
- btrfs_delayed_item_release_metadata(root, curr);
-
- list_del(&curr->tree_list);
- btrfs_release_delayed_item(curr);
- }
-
-error:
- kfree(data_size);
- kfree(keys);
-out:
- return ret;
-}
-
-/*
- * This helper can just do simple insertion that needn't extend item for new
- * data, such as directory name index insertion, inode insertion.
- */
-static int btrfs_insert_delayed_item(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_path *path,
- struct btrfs_delayed_item *delayed_item)
-{
- struct extent_buffer *leaf;
- struct btrfs_item *item;
- char *ptr;
- int ret;
-
- ret = btrfs_insert_empty_item(trans, root, path, &delayed_item->key,
- delayed_item->data_len);
- if (ret < 0 && ret != -EEXIST)
- return ret;
-
- leaf = path->nodes[0];
-
- item = btrfs_item_nr(leaf, path->slots[0]);
- ptr = btrfs_item_ptr(leaf, path->slots[0], char);
-
- write_extent_buffer(leaf, delayed_item->data, (unsigned long)ptr,
- delayed_item->data_len);
- btrfs_mark_buffer_dirty(leaf);
-
- btrfs_delayed_item_release_metadata(root, delayed_item);
- return 0;
-}
-
-/*
- * we insert an item first, then if there are some continuous items, we try
- * to insert those items into the same leaf.
- */
-static int btrfs_insert_delayed_items(struct btrfs_trans_handle *trans,
- struct btrfs_path *path,
- struct btrfs_root *root,
- struct btrfs_delayed_node *node)
-{
- struct btrfs_delayed_item *curr, *prev;
- int ret = 0;
-
-do_again:
- mutex_lock(&node->mutex);
- curr = __btrfs_first_delayed_insertion_item(node);
- if (!curr)
- goto insert_end;
-
- ret = btrfs_insert_delayed_item(trans, root, path, curr);
- if (ret < 0) {
- btrfs_release_path(path);
- goto insert_end;
- }
-
- prev = curr;
- curr = __btrfs_next_delayed_item(prev);
- if (curr && btrfs_is_continuous_delayed_item(prev, curr)) {
- /* insert the continuous items into the same leaf */
- path->slots[0]++;
- btrfs_batch_insert_items(trans, root, path, curr);
- }
- btrfs_release_delayed_item(prev);
- btrfs_mark_buffer_dirty(path->nodes[0]);
-
- btrfs_release_path(path);
- mutex_unlock(&node->mutex);
- goto do_again;
-
-insert_end:
- mutex_unlock(&node->mutex);
- return ret;
-}
-
-static int btrfs_batch_delete_items(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_path *path,
- struct btrfs_delayed_item *item)
-{
- struct btrfs_delayed_item *curr, *next;
- struct extent_buffer *leaf;
- struct btrfs_key key;
- struct list_head head;
- int nitems, i, last_item;
- int ret = 0;
-
- BUG_ON(!path->nodes[0]);
-
- leaf = path->nodes[0];
-
- i = path->slots[0];
- last_item = btrfs_header_nritems(leaf) - 1;
- if (i > last_item)
- return -ENOENT; /* FIXME: Is errno suitable? */
-
- next = item;
- INIT_LIST_HEAD(&head);
- btrfs_item_key_to_cpu(leaf, &key, i);
- nitems = 0;
- /*
- * count the number of the dir index items that we can delete in batch
- */
- while (btrfs_comp_cpu_keys(&next->key, &key) == 0) {
- list_add_tail(&next->tree_list, &head);
- nitems++;
-
- curr = next;
- next = __btrfs_next_delayed_item(curr);
- if (!next)
- break;
-
- if (!btrfs_is_continuous_delayed_item(curr, next))
- break;
-
- i++;
- if (i > last_item)
- break;
- btrfs_item_key_to_cpu(leaf, &key, i);
- }
-
- if (!nitems)
- return 0;
-
- ret = btrfs_del_items(trans, root, path, path->slots[0], nitems);
- if (ret)
- goto out;
-
- list_for_each_entry_safe(curr, next, &head, tree_list) {
- btrfs_delayed_item_release_metadata(root, curr);
- list_del(&curr->tree_list);
- btrfs_release_delayed_item(curr);
- }
-
-out:
- return ret;
-}
-
-static int btrfs_delete_delayed_items(struct btrfs_trans_handle *trans,
- struct btrfs_path *path,
- struct btrfs_root *root,
- struct btrfs_delayed_node *node)
-{
- struct btrfs_delayed_item *curr, *prev;
- int ret = 0;
-
-do_again:
- mutex_lock(&node->mutex);
- curr = __btrfs_first_delayed_deletion_item(node);
- if (!curr)
- goto delete_fail;
-
- ret = btrfs_search_slot(trans, root, &curr->key, path, -1, 1);
- if (ret < 0)
- goto delete_fail;
- else if (ret > 0) {
- /*
- * can't find the item which the node points to, so this node
- * is invalid, just drop it.
- */
- prev = curr;
- curr = __btrfs_next_delayed_item(prev);
- btrfs_release_delayed_item(prev);
- ret = 0;
- btrfs_release_path(path);
- if (curr)
- goto do_again;
- else
- goto delete_fail;
- }
-
- btrfs_batch_delete_items(trans, root, path, curr);
- btrfs_release_path(path);
- mutex_unlock(&node->mutex);
- goto do_again;
-
-delete_fail:
- btrfs_release_path(path);
- mutex_unlock(&node->mutex);
- return ret;
-}
-
-static void btrfs_release_delayed_inode(struct btrfs_delayed_node *delayed_node)
-{
- struct btrfs_delayed_root *delayed_root;
-
- if (delayed_node && delayed_node->inode_dirty) {
- BUG_ON(!delayed_node->root);
- delayed_node->inode_dirty = 0;
- delayed_node->count--;
-
- delayed_root = delayed_node->root->fs_info->delayed_root;
- atomic_dec(&delayed_root->items);
- if (atomic_read(&delayed_root->items) <
- BTRFS_DELAYED_BACKGROUND &&
- waitqueue_active(&delayed_root->wait))
- wake_up(&delayed_root->wait);
- }
-}
-
-static int btrfs_update_delayed_inode(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_path *path,
- struct btrfs_delayed_node *node)
-{
- struct btrfs_key key;
- struct btrfs_inode_item *inode_item;
- struct extent_buffer *leaf;
- int ret;
-
- mutex_lock(&node->mutex);
- if (!node->inode_dirty) {
- mutex_unlock(&node->mutex);
- return 0;
- }
-
- key.objectid = node->inode_id;
- btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
- key.offset = 0;
- ret = btrfs_lookup_inode(trans, root, path, &key, 1);
- if (ret > 0) {
- btrfs_release_path(path);
- mutex_unlock(&node->mutex);
- return -ENOENT;
- } else if (ret < 0) {
- mutex_unlock(&node->mutex);
- return ret;
- }
-
- btrfs_unlock_up_safe(path, 1);
- leaf = path->nodes[0];
- inode_item = btrfs_item_ptr(leaf, path->slots[0],
- struct btrfs_inode_item);
- write_extent_buffer(leaf, &node->inode_item, (unsigned long)inode_item,
- sizeof(struct btrfs_inode_item));
- btrfs_mark_buffer_dirty(leaf);
- btrfs_release_path(path);
-
- btrfs_delayed_inode_release_metadata(root, node);
- btrfs_release_delayed_inode(node);
- mutex_unlock(&node->mutex);
-
- return 0;
-}
-
-/*
- * Called when committing the transaction.
- * Returns 0 on success.
- * Returns < 0 on error and returns with an aborted transaction with any
- * outstanding delayed items cleaned up.
- */
-int btrfs_run_delayed_items(struct btrfs_trans_handle *trans,
- struct btrfs_root *root)
-{
- struct btrfs_root *curr_root = root;
- struct btrfs_delayed_root *delayed_root;
- struct btrfs_delayed_node *curr_node, *prev_node;
- struct btrfs_path *path;
- struct btrfs_block_rsv *block_rsv;
- int ret = 0;
-
- if (trans->aborted)
- return -EIO;
-
- path = btrfs_alloc_path();
- if (!path)
- return -ENOMEM;
- path->leave_spinning = 1;
-
- block_rsv = trans->block_rsv;
- trans->block_rsv = &root->fs_info->delayed_block_rsv;
-
- delayed_root = btrfs_get_delayed_root(root);
-
- curr_node = btrfs_first_delayed_node(delayed_root);
- while (curr_node) {
- curr_root = curr_node->root;
- ret = btrfs_insert_delayed_items(trans, path, curr_root,
- curr_node);
- if (!ret)
- ret = btrfs_delete_delayed_items(trans, path,
- curr_root, curr_node);
- if (!ret)
- ret = btrfs_update_delayed_inode(trans, curr_root,
- path, curr_node);
- if (ret) {
- btrfs_release_delayed_node(curr_node);
- btrfs_abort_transaction(trans, root, ret);
- break;
- }
-
- prev_node = curr_node;
- curr_node = btrfs_next_delayed_node(curr_node);
- btrfs_release_delayed_node(prev_node);
- }
-
- btrfs_free_path(path);
- trans->block_rsv = block_rsv;
-
- return ret;
-}
-
-static int __btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
- struct btrfs_delayed_node *node)
-{
- struct btrfs_path *path;
- struct btrfs_block_rsv *block_rsv;
- int ret;
-
- path = btrfs_alloc_path();
- if (!path)
- return -ENOMEM;
- path->leave_spinning = 1;
-
- block_rsv = trans->block_rsv;
- trans->block_rsv = &node->root->fs_info->delayed_block_rsv;
-
- ret = btrfs_insert_delayed_items(trans, path, node->root, node);
- if (!ret)
- ret = btrfs_delete_delayed_items(trans, path, node->root, node);
- if (!ret)
- ret = btrfs_update_delayed_inode(trans, node->root, path, node);
- btrfs_free_path(path);
-
- trans->block_rsv = block_rsv;
- return ret;
-}
-
-int btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
- struct inode *inode)
-{
- struct btrfs_delayed_node *delayed_node = btrfs_get_delayed_node(inode);
- int ret;
-
- if (!delayed_node)
- return 0;
-
- mutex_lock(&delayed_node->mutex);
- if (!delayed_node->count) {
- mutex_unlock(&delayed_node->mutex);
- btrfs_release_delayed_node(delayed_node);
- return 0;
- }
- mutex_unlock(&delayed_node->mutex);
-
- ret = __btrfs_commit_inode_delayed_items(trans, delayed_node);
- btrfs_release_delayed_node(delayed_node);
- return ret;
-}
-
-void btrfs_remove_delayed_node(struct inode *inode)
-{
- struct btrfs_delayed_node *delayed_node;
-
- delayed_node = ACCESS_ONCE(BTRFS_I(inode)->delayed_node);
- if (!delayed_node)
- return;
-
- BTRFS_I(inode)->delayed_node = NULL;
- btrfs_release_delayed_node(delayed_node);
-}
-
-struct btrfs_async_delayed_node {
- struct btrfs_root *root;
- struct btrfs_delayed_node *delayed_node;
- struct btrfs_work work;
-};
-
-static void btrfs_async_run_delayed_node_done(struct btrfs_work *work)
-{
- struct btrfs_async_delayed_node *async_node;
- struct btrfs_trans_handle *trans;
- struct btrfs_path *path;
- struct btrfs_delayed_node *delayed_node = NULL;
- struct btrfs_root *root;
- struct btrfs_block_rsv *block_rsv;
- unsigned long nr = 0;
- int need_requeue = 0;
- int ret;
-
- async_node = container_of(work, struct btrfs_async_delayed_node, work);
-
- path = btrfs_alloc_path();
- if (!path)
- goto out;
- path->leave_spinning = 1;
-
- delayed_node = async_node->delayed_node;
- root = delayed_node->root;
-
- trans = btrfs_join_transaction(root);
- if (IS_ERR(trans))
- goto free_path;
-
- block_rsv = trans->block_rsv;
- trans->block_rsv = &root->fs_info->delayed_block_rsv;
-
- ret = btrfs_insert_delayed_items(trans, path, root, delayed_node);
- if (!ret)
- ret = btrfs_delete_delayed_items(trans, path, root,
- delayed_node);
-
- if (!ret)
- btrfs_update_delayed_inode(trans, root, path, delayed_node);
-
- /*
- * Maybe new delayed items have been inserted, so we need requeue
- * the work. Besides that, we must dequeue the empty delayed nodes
- * to avoid the race between delayed items balance and the worker.
- * The race like this:
- * Task1 Worker thread
- * count == 0, needn't requeue
- * also needn't insert the
- * delayed node into prepare
- * list again.
- * add lots of delayed items
- * queue the delayed node
- * already in the list,
- * and not in the prepare
- * list, it means the delayed
- * node is being dealt with
- * by the worker.
- * do delayed items balance
- * the delayed node is being
- * dealt with by the worker
- * now, just wait.
- * the worker goto idle.
- * Task1 will sleep until the transaction is commited.
- */
- mutex_lock(&delayed_node->mutex);
- if (delayed_node->count)
- need_requeue = 1;
- else
- btrfs_dequeue_delayed_node(root->fs_info->delayed_root,
- delayed_node);
- mutex_unlock(&delayed_node->mutex);
-
- nr = trans->blocks_used;
-
- trans->block_rsv = block_rsv;
- btrfs_end_transaction_dmeta(trans, root);
- __btrfs_btree_balance_dirty(root, nr);
-free_path:
- btrfs_free_path(path);
-out:
- if (need_requeue)
- btrfs_requeue_work(&async_node->work);
- else {
- btrfs_release_prepared_delayed_node(delayed_node);
- kfree(async_node);
- }
-}
-
-static int btrfs_wq_run_delayed_node(struct btrfs_delayed_root *delayed_root,
- struct btrfs_root *root, int all)
-{
- struct btrfs_async_delayed_node *async_node;
- struct btrfs_delayed_node *curr;
- int count = 0;
-
-again:
- curr = btrfs_first_prepared_delayed_node(delayed_root);
- if (!curr)
- return 0;
-
- async_node = kmalloc(sizeof(*async_node), GFP_NOFS);
- if (!async_node) {
- btrfs_release_prepared_delayed_node(curr);
- return -ENOMEM;
- }
-
- async_node->root = root;
- async_node->delayed_node = curr;
-
- async_node->work.func = btrfs_async_run_delayed_node_done;
- async_node->work.flags = 0;
-
- btrfs_queue_worker(&root->fs_info->delayed_workers, &async_node->work);
- count++;
-
- if (all || count < 4)
- goto again;
-
- return 0;
-}
-
-void btrfs_assert_delayed_root_empty(struct btrfs_root *root)
-{
- struct btrfs_delayed_root *delayed_root;
- delayed_root = btrfs_get_delayed_root(root);
- WARN_ON(btrfs_first_delayed_node(delayed_root));
-}
-
-void btrfs_balance_delayed_items(struct btrfs_root *root)
-{
- struct btrfs_delayed_root *delayed_root;
-
- delayed_root = btrfs_get_delayed_root(root);
-
- if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND)
- return;
-
- if (atomic_read(&delayed_root->items) >= BTRFS_DELAYED_WRITEBACK) {
- int ret;
- ret = btrfs_wq_run_delayed_node(delayed_root, root, 1);
- if (ret)
- return;
-
- wait_event_interruptible_timeout(
- delayed_root->wait,
- (atomic_read(&delayed_root->items) <
- BTRFS_DELAYED_BACKGROUND),
- HZ);
- return;
- }
-
- btrfs_wq_run_delayed_node(delayed_root, root, 0);
-}
-
-/* Will return 0 or -ENOMEM */
-int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, const char *name,
- int name_len, struct inode *dir,
- struct btrfs_disk_key *disk_key, u8 type,
- u64 index)
-{
- struct btrfs_delayed_node *delayed_node;
- struct btrfs_delayed_item *delayed_item;
- struct btrfs_dir_item *dir_item;
- int ret;
-
- delayed_node = btrfs_get_or_create_delayed_node(dir);
- if (IS_ERR(delayed_node))
- return PTR_ERR(delayed_node);
-
- delayed_item = btrfs_alloc_delayed_item(sizeof(*dir_item) + name_len);
- if (!delayed_item) {
- ret = -ENOMEM;
- goto release_node;
- }
-
- delayed_item->key.objectid = btrfs_ino(dir);
- btrfs_set_key_type(&delayed_item->key, BTRFS_DIR_INDEX_KEY);
- delayed_item->key.offset = index;
-
- dir_item = (struct btrfs_dir_item *)delayed_item->data;
- dir_item->location = *disk_key;
- dir_item->transid = cpu_to_le64(trans->transid);
- dir_item->data_len = 0;
- dir_item->name_len = cpu_to_le16(name_len);
- dir_item->type = type;
- memcpy((char *)(dir_item + 1), name, name_len);
-
- ret = btrfs_delayed_item_reserve_metadata(trans, root, delayed_item);
- /*
- * we have reserved enough space when we start a new transaction,
- * so reserving metadata failure is impossible
- */
- BUG_ON(ret);
-
-
- mutex_lock(&delayed_node->mutex);
- ret = __btrfs_add_delayed_insertion_item(delayed_node, delayed_item);
- if (unlikely(ret)) {
- printk(KERN_ERR "err add delayed dir index item(name: %s) into "
- "the insertion tree of the delayed node"
- "(root id: %llu, inode id: %llu, errno: %d)\n",
- name,
- (unsigned long long)delayed_node->root->objectid,
- (unsigned long long)delayed_node->inode_id,
- ret);
- BUG();
- }
- mutex_unlock(&delayed_node->mutex);
-
-release_node:
- btrfs_release_delayed_node(delayed_node);
- return ret;
-}
-
-static int btrfs_delete_delayed_insertion_item(struct btrfs_root *root,
- struct btrfs_delayed_node *node,
- struct btrfs_key *key)
-{
- struct btrfs_delayed_item *item;
-
- mutex_lock(&node->mutex);
- item = __btrfs_lookup_delayed_insertion_item(node, key);
- if (!item) {
- mutex_unlock(&node->mutex);
- return 1;
- }
-
- btrfs_delayed_item_release_metadata(root, item);
- btrfs_release_delayed_item(item);
- mutex_unlock(&node->mutex);
- return 0;
-}
-
-int btrfs_delete_delayed_dir_index(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, struct inode *dir,
- u64 index)
-{
- struct btrfs_delayed_node *node;
- struct btrfs_delayed_item *item;
- struct btrfs_key item_key;
- int ret;
-
- node = btrfs_get_or_create_delayed_node(dir);
- if (IS_ERR(node))
- return PTR_ERR(node);
-
- item_key.objectid = btrfs_ino(dir);
- btrfs_set_key_type(&item_key, BTRFS_DIR_INDEX_KEY);
- item_key.offset = index;
-
- ret = btrfs_delete_delayed_insertion_item(root, node, &item_key);
- if (!ret)
- goto end;
-
- item = btrfs_alloc_delayed_item(0);
- if (!item) {
- ret = -ENOMEM;
- goto end;
- }
-
- item->key = item_key;
-
- ret = btrfs_delayed_item_reserve_metadata(trans, root, item);
- /*
- * we have reserved enough space when we start a new transaction,
- * so reserving metadata failure is impossible.
- */
- BUG_ON(ret);
-
- mutex_lock(&node->mutex);
- ret = __btrfs_add_delayed_deletion_item(node, item);
- if (unlikely(ret)) {
- printk(KERN_ERR "err add delayed dir index item(index: %llu) "
- "into the deletion tree of the delayed node"
- "(root id: %llu, inode id: %llu, errno: %d)\n",
- (unsigned long long)index,
- (unsigned long long)node->root->objectid,
- (unsigned long long)node->inode_id,
- ret);
- BUG();
- }
- mutex_unlock(&node->mutex);
-end:
- btrfs_release_delayed_node(node);
- return ret;
-}
-
-int btrfs_inode_delayed_dir_index_count(struct inode *inode)
-{
- struct btrfs_delayed_node *delayed_node = btrfs_get_delayed_node(inode);
-
- if (!delayed_node)
- return -ENOENT;
-
- /*
- * Since we have held i_mutex of this directory, it is impossible that
- * a new directory index is added into the delayed node and index_cnt
- * is updated now. So we needn't lock the delayed node.
- */
- if (!delayed_node->index_cnt) {
- btrfs_release_delayed_node(delayed_node);
- return -EINVAL;
- }
-
- BTRFS_I(inode)->index_cnt = delayed_node->index_cnt;
- btrfs_release_delayed_node(delayed_node);
- return 0;
-}
-
-void btrfs_get_delayed_items(struct inode *inode, struct list_head *ins_list,
- struct list_head *del_list)
-{
- struct btrfs_delayed_node *delayed_node;
- struct btrfs_delayed_item *item;
-
- delayed_node = btrfs_get_delayed_node(inode);
- if (!delayed_node)
- return;
-
- mutex_lock(&delayed_node->mutex);
- item = __btrfs_first_delayed_insertion_item(delayed_node);
- while (item) {
- atomic_inc(&item->refs);
- list_add_tail(&item->readdir_list, ins_list);
- item = __btrfs_next_delayed_item(item);
- }
-
- item = __btrfs_first_delayed_deletion_item(delayed_node);
- while (item) {
- atomic_inc(&item->refs);
- list_add_tail(&item->readdir_list, del_list);
- item = __btrfs_next_delayed_item(item);
- }
- mutex_unlock(&delayed_node->mutex);
- /*
- * This delayed node is still cached in the btrfs inode, so refs
- * must be > 1 now, and we needn't check it is going to be freed
- * or not.
- *
- * Besides that, this function is used to read dir, we do not
- * insert/delete delayed items in this period. So we also needn't
- * requeue or dequeue this delayed node.
- */
- atomic_dec(&delayed_node->refs);
-}
-
-void btrfs_put_delayed_items(struct list_head *ins_list,
- struct list_head *del_list)
-{
- struct btrfs_delayed_item *curr, *next;
-
- list_for_each_entry_safe(curr, next, ins_list, readdir_list) {
- list_del(&curr->readdir_list);
- if (atomic_dec_and_test(&curr->refs))
- kfree(curr);
- }
-
- list_for_each_entry_safe(curr, next, del_list, readdir_list) {
- list_del(&curr->readdir_list);
- if (atomic_dec_and_test(&curr->refs))
- kfree(curr);
- }
-}
-
-int btrfs_should_delete_dir_index(struct list_head *del_list,
- u64 index)
-{
- struct btrfs_delayed_item *curr, *next;
- int ret;
-
- if (list_empty(del_list))
- return 0;
-
- list_for_each_entry_safe(curr, next, del_list, readdir_list) {
- if (curr->key.offset > index)
- break;
-
- list_del(&curr->readdir_list);
- ret = (curr->key.offset == index);
-
- if (atomic_dec_and_test(&curr->refs))
- kfree(curr);
-
- if (ret)
- return 1;
- else
- continue;
- }
- return 0;
-}
-
-/*
- * btrfs_readdir_delayed_dir_index - read dir info stored in the delayed tree
- *
- */
-int btrfs_readdir_delayed_dir_index(struct file *filp, void *dirent,
- filldir_t filldir,
- struct list_head *ins_list)
-{
- struct btrfs_dir_item *di;
- struct btrfs_delayed_item *curr, *next;
- struct btrfs_key location;
- char *name;
- int name_len;
- int over = 0;
- unsigned char d_type;
-
- if (list_empty(ins_list))
- return 0;
-
- /*
- * Changing the data of the delayed item is impossible. So
- * we needn't lock them. And we have held i_mutex of the
- * directory, nobody can delete any directory indexes now.
- */
- list_for_each_entry_safe(curr, next, ins_list, readdir_list) {
- list_del(&curr->readdir_list);
-
- if (curr->key.offset < filp->f_pos) {
- if (atomic_dec_and_test(&curr->refs))
- kfree(curr);
- continue;
- }
-
- filp->f_pos = curr->key.offset;
-
- di = (struct btrfs_dir_item *)curr->data;
- name = (char *)(di + 1);
- name_len = le16_to_cpu(di->name_len);
-
- d_type = btrfs_filetype_table[di->type];
- btrfs_disk_key_to_cpu(&location, &di->location);
-
- over = filldir(dirent, name, name_len, curr->key.offset,
- location.objectid, d_type);
-
- if (atomic_dec_and_test(&curr->refs))
- kfree(curr);
-
- if (over)
- return 1;
- }
- return 0;
-}
-
-BTRFS_SETGET_STACK_FUNCS(stack_inode_generation, struct btrfs_inode_item,
- generation, 64);
-BTRFS_SETGET_STACK_FUNCS(stack_inode_sequence, struct btrfs_inode_item,
- sequence, 64);
-BTRFS_SETGET_STACK_FUNCS(stack_inode_transid, struct btrfs_inode_item,
- transid, 64);
-BTRFS_SETGET_STACK_FUNCS(stack_inode_size, struct btrfs_inode_item, size, 64);
-BTRFS_SETGET_STACK_FUNCS(stack_inode_nbytes, struct btrfs_inode_item,
- nbytes, 64);
-BTRFS_SETGET_STACK_FUNCS(stack_inode_block_group, struct btrfs_inode_item,
- block_group, 64);
-BTRFS_SETGET_STACK_FUNCS(stack_inode_nlink, struct btrfs_inode_item, nlink, 32);
-BTRFS_SETGET_STACK_FUNCS(stack_inode_uid, struct btrfs_inode_item, uid, 32);
-BTRFS_SETGET_STACK_FUNCS(stack_inode_gid, struct btrfs_inode_item, gid, 32);
-BTRFS_SETGET_STACK_FUNCS(stack_inode_mode, struct btrfs_inode_item, mode, 32);
-BTRFS_SETGET_STACK_FUNCS(stack_inode_rdev, struct btrfs_inode_item, rdev, 64);
-BTRFS_SETGET_STACK_FUNCS(stack_inode_flags, struct btrfs_inode_item, flags, 64);
-
-BTRFS_SETGET_STACK_FUNCS(stack_timespec_sec, struct btrfs_timespec, sec, 64);
-BTRFS_SETGET_STACK_FUNCS(stack_timespec_nsec, struct btrfs_timespec, nsec, 32);
-
-static void fill_stack_inode_item(struct btrfs_trans_handle *trans,
- struct btrfs_inode_item *inode_item,
- struct inode *inode)
-{
- btrfs_set_stack_inode_uid(inode_item, inode->i_uid);
- btrfs_set_stack_inode_gid(inode_item, inode->i_gid);
- btrfs_set_stack_inode_size(inode_item, BTRFS_I(inode)->disk_i_size);
- btrfs_set_stack_inode_mode(inode_item, inode->i_mode);
- btrfs_set_stack_inode_nlink(inode_item, inode->i_nlink);
- btrfs_set_stack_inode_nbytes(inode_item, inode_get_bytes(inode));
- btrfs_set_stack_inode_generation(inode_item,
- BTRFS_I(inode)->generation);
- btrfs_set_stack_inode_sequence(inode_item, BTRFS_I(inode)->sequence);
- btrfs_set_stack_inode_transid(inode_item, trans->transid);
- btrfs_set_stack_inode_rdev(inode_item, inode->i_rdev);
- btrfs_set_stack_inode_flags(inode_item, BTRFS_I(inode)->flags);
- btrfs_set_stack_inode_block_group(inode_item, 0);
-
- btrfs_set_stack_timespec_sec(btrfs_inode_atime(inode_item),
- inode->i_atime.tv_sec);
- btrfs_set_stack_timespec_nsec(btrfs_inode_atime(inode_item),
- inode->i_atime.tv_nsec);
-
- btrfs_set_stack_timespec_sec(btrfs_inode_mtime(inode_item),
- inode->i_mtime.tv_sec);
- btrfs_set_stack_timespec_nsec(btrfs_inode_mtime(inode_item),
- inode->i_mtime.tv_nsec);
-
- btrfs_set_stack_timespec_sec(btrfs_inode_ctime(inode_item),
- inode->i_ctime.tv_sec);
- btrfs_set_stack_timespec_nsec(btrfs_inode_ctime(inode_item),
- inode->i_ctime.tv_nsec);
-}
-
-int btrfs_fill_inode(struct inode *inode, u32 *rdev)
-{
- struct btrfs_delayed_node *delayed_node;
- struct btrfs_inode_item *inode_item;
- struct btrfs_timespec *tspec;
-
- delayed_node = btrfs_get_delayed_node(inode);
- if (!delayed_node)
- return -ENOENT;
-
- mutex_lock(&delayed_node->mutex);
- if (!delayed_node->inode_dirty) {
- mutex_unlock(&delayed_node->mutex);
- btrfs_release_delayed_node(delayed_node);
- return -ENOENT;
- }
-
- inode_item = &delayed_node->inode_item;
-
- inode->i_uid = btrfs_stack_inode_uid(inode_item);
- inode->i_gid = btrfs_stack_inode_gid(inode_item);
- btrfs_i_size_write(inode, btrfs_stack_inode_size(inode_item));
- inode->i_mode = btrfs_stack_inode_mode(inode_item);
- set_nlink(inode, btrfs_stack_inode_nlink(inode_item));
- inode_set_bytes(inode, btrfs_stack_inode_nbytes(inode_item));
- BTRFS_I(inode)->generation = btrfs_stack_inode_generation(inode_item);
- BTRFS_I(inode)->sequence = btrfs_stack_inode_sequence(inode_item);
- inode->i_rdev = 0;
- *rdev = btrfs_stack_inode_rdev(inode_item);
- BTRFS_I(inode)->flags = btrfs_stack_inode_flags(inode_item);
-
- tspec = btrfs_inode_atime(inode_item);
- inode->i_atime.tv_sec = btrfs_stack_timespec_sec(tspec);
- inode->i_atime.tv_nsec = btrfs_stack_timespec_nsec(tspec);
-
- tspec = btrfs_inode_mtime(inode_item);
- inode->i_mtime.tv_sec = btrfs_stack_timespec_sec(tspec);
- inode->i_mtime.tv_nsec = btrfs_stack_timespec_nsec(tspec);
-
- tspec = btrfs_inode_ctime(inode_item);
- inode->i_ctime.tv_sec = btrfs_stack_timespec_sec(tspec);
- inode->i_ctime.tv_nsec = btrfs_stack_timespec_nsec(tspec);
-
- inode->i_generation = BTRFS_I(inode)->generation;
- BTRFS_I(inode)->index_cnt = (u64)-1;
-
- mutex_unlock(&delayed_node->mutex);
- btrfs_release_delayed_node(delayed_node);
- return 0;
-}
-
-int btrfs_delayed_update_inode(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, struct inode *inode)
-{
- struct btrfs_delayed_node *delayed_node;
- int ret = 0;
-
- delayed_node = btrfs_get_or_create_delayed_node(inode);
- if (IS_ERR(delayed_node))
- return PTR_ERR(delayed_node);
-
- mutex_lock(&delayed_node->mutex);
- if (delayed_node->inode_dirty) {
- fill_stack_inode_item(trans, &delayed_node->inode_item, inode);
- goto release_node;
- }
-
- ret = btrfs_delayed_inode_reserve_metadata(trans, root, inode,
- delayed_node);
- if (ret)
- goto release_node;
-
- fill_stack_inode_item(trans, &delayed_node->inode_item, inode);
- delayed_node->inode_dirty = 1;
- delayed_node->count++;
- atomic_inc(&root->fs_info->delayed_root->items);
-release_node:
- mutex_unlock(&delayed_node->mutex);
- btrfs_release_delayed_node(delayed_node);
- return ret;
-}
-
-static void __btrfs_kill_delayed_node(struct btrfs_delayed_node *delayed_node)
-{
- struct btrfs_root *root = delayed_node->root;
- struct btrfs_delayed_item *curr_item, *prev_item;
-
- mutex_lock(&delayed_node->mutex);
- curr_item = __btrfs_first_delayed_insertion_item(delayed_node);
- while (curr_item) {
- btrfs_delayed_item_release_metadata(root, curr_item);
- prev_item = curr_item;
- curr_item = __btrfs_next_delayed_item(prev_item);
- btrfs_release_delayed_item(prev_item);
- }
-
- curr_item = __btrfs_first_delayed_deletion_item(delayed_node);
- while (curr_item) {
- btrfs_delayed_item_release_metadata(root, curr_item);
- prev_item = curr_item;
- curr_item = __btrfs_next_delayed_item(prev_item);
- btrfs_release_delayed_item(prev_item);
- }
-
- if (delayed_node->inode_dirty) {
- btrfs_delayed_inode_release_metadata(root, delayed_node);
- btrfs_release_delayed_inode(delayed_node);
- }
- mutex_unlock(&delayed_node->mutex);
-}
-
-void btrfs_kill_delayed_inode_items(struct inode *inode)
-{
- struct btrfs_delayed_node *delayed_node;
-
- delayed_node = btrfs_get_delayed_node(inode);
- if (!delayed_node)
- return;
-
- __btrfs_kill_delayed_node(delayed_node);
- btrfs_release_delayed_node(delayed_node);
-}
-
-void btrfs_kill_all_delayed_nodes(struct btrfs_root *root)
-{
- u64 inode_id = 0;
- struct btrfs_delayed_node *delayed_nodes[8];
- int i, n;
-
- while (1) {
- spin_lock(&root->inode_lock);
- n = radix_tree_gang_lookup(&root->delayed_nodes_tree,
- (void **)delayed_nodes, inode_id,
- ARRAY_SIZE(delayed_nodes));
- if (!n) {
- spin_unlock(&root->inode_lock);
- break;
- }
-
- inode_id = delayed_nodes[n - 1]->inode_id + 1;
-
- for (i = 0; i < n; i++)
- atomic_inc(&delayed_nodes[i]->refs);
- spin_unlock(&root->inode_lock);
-
- for (i = 0; i < n; i++) {
- __btrfs_kill_delayed_node(delayed_nodes[i]);
- btrfs_release_delayed_node(delayed_nodes[i]);
- }
- }
-}
diff --git a/ANDROID_3.4.5/fs/btrfs/delayed-inode.h b/ANDROID_3.4.5/fs/btrfs/delayed-inode.h
deleted file mode 100644
index 7083d08b..00000000
--- a/ANDROID_3.4.5/fs/btrfs/delayed-inode.h
+++ /dev/null
@@ -1,145 +0,0 @@
-/*
- * Copyright (C) 2011 Fujitsu. All rights reserved.
- * Written by Miao Xie <miaox@cn.fujitsu.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License v2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
- */
-
-#ifndef __DELAYED_TREE_OPERATION_H
-#define __DELAYED_TREE_OPERATION_H
-
-#include <linux/rbtree.h>
-#include <linux/spinlock.h>
-#include <linux/mutex.h>
-#include <linux/list.h>
-#include <linux/wait.h>
-#include <linux/atomic.h>
-
-#include "ctree.h"
-
-/* types of the delayed item */
-#define BTRFS_DELAYED_INSERTION_ITEM 1
-#define BTRFS_DELAYED_DELETION_ITEM 2
-
-struct btrfs_delayed_root {
- spinlock_t lock;
- struct list_head node_list;
- /*
- * Used for delayed nodes which is waiting to be dealt with by the
- * worker. If the delayed node is inserted into the work queue, we
- * drop it from this list.
- */
- struct list_head prepare_list;
- atomic_t items; /* for delayed items */
- int nodes; /* for delayed nodes */
- wait_queue_head_t wait;
-};
-
-struct btrfs_delayed_node {
- u64 inode_id;
- u64 bytes_reserved;
- struct btrfs_root *root;
- /* Used to add the node into the delayed root's node list. */
- struct list_head n_list;
- /*
- * Used to add the node into the prepare list, the nodes in this list
- * is waiting to be dealt with by the async worker.
- */
- struct list_head p_list;
- struct rb_root ins_root;
- struct rb_root del_root;
- struct mutex mutex;
- struct btrfs_inode_item inode_item;
- atomic_t refs;
- u64 index_cnt;
- bool in_list;
- bool inode_dirty;
- int count;
-};
-
-struct btrfs_delayed_item {
- struct rb_node rb_node;
- struct btrfs_key key;
- struct list_head tree_list; /* used for batch insert/delete items */
- struct list_head readdir_list; /* used for readdir items */
- u64 bytes_reserved;
- struct btrfs_delayed_node *delayed_node;
- atomic_t refs;
- int ins_or_del;
- u32 data_len;
- char data[0];
-};
-
-static inline void btrfs_init_delayed_root(
- struct btrfs_delayed_root *delayed_root)
-{
- atomic_set(&delayed_root->items, 0);
- delayed_root->nodes = 0;
- spin_lock_init(&delayed_root->lock);
- init_waitqueue_head(&delayed_root->wait);
- INIT_LIST_HEAD(&delayed_root->node_list);
- INIT_LIST_HEAD(&delayed_root->prepare_list);
-}
-
-int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, const char *name,
- int name_len, struct inode *dir,
- struct btrfs_disk_key *disk_key, u8 type,
- u64 index);
-
-int btrfs_delete_delayed_dir_index(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, struct inode *dir,
- u64 index);
-
-int btrfs_inode_delayed_dir_index_count(struct inode *inode);
-
-int btrfs_run_delayed_items(struct btrfs_trans_handle *trans,
- struct btrfs_root *root);
-
-void btrfs_balance_delayed_items(struct btrfs_root *root);
-
-int btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
- struct inode *inode);
-/* Used for evicting the inode. */
-void btrfs_remove_delayed_node(struct inode *inode);
-void btrfs_kill_delayed_inode_items(struct inode *inode);
-
-
-int btrfs_delayed_update_inode(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, struct inode *inode);
-int btrfs_fill_inode(struct inode *inode, u32 *rdev);
-
-/* Used for drop dead root */
-void btrfs_kill_all_delayed_nodes(struct btrfs_root *root);
-
-/* Used for readdir() */
-void btrfs_get_delayed_items(struct inode *inode, struct list_head *ins_list,
- struct list_head *del_list);
-void btrfs_put_delayed_items(struct list_head *ins_list,
- struct list_head *del_list);
-int btrfs_should_delete_dir_index(struct list_head *del_list,
- u64 index);
-int btrfs_readdir_delayed_dir_index(struct file *filp, void *dirent,
- filldir_t filldir,
- struct list_head *ins_list);
-
-/* for init */
-int __init btrfs_delayed_inode_init(void);
-void btrfs_delayed_inode_exit(void);
-
-/* for debugging */
-void btrfs_assert_delayed_root_empty(struct btrfs_root *root);
-
-#endif
diff --git a/ANDROID_3.4.5/fs/btrfs/delayed-ref.c b/ANDROID_3.4.5/fs/btrfs/delayed-ref.c
deleted file mode 100644
index 69f22e3a..00000000
--- a/ANDROID_3.4.5/fs/btrfs/delayed-ref.c
+++ /dev/null
@@ -1,759 +0,0 @@
-/*
- * Copyright (C) 2009 Oracle. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License v2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
- */
-
-#include <linux/sched.h>
-#include <linux/slab.h>
-#include <linux/sort.h>
-#include "ctree.h"
-#include "delayed-ref.h"
-#include "transaction.h"
-
-/*
- * delayed back reference update tracking. For subvolume trees
- * we queue up extent allocations and backref maintenance for
- * delayed processing. This avoids deep call chains where we
- * add extents in the middle of btrfs_search_slot, and it allows
- * us to buffer up frequently modified backrefs in an rb tree instead
- * of hammering updates on the extent allocation tree.
- */
-
-/*
- * compare two delayed tree backrefs with same bytenr and type
- */
-static int comp_tree_refs(struct btrfs_delayed_tree_ref *ref2,
- struct btrfs_delayed_tree_ref *ref1)
-{
- if (ref1->node.type == BTRFS_TREE_BLOCK_REF_KEY) {
- if (ref1->root < ref2->root)
- return -1;
- if (ref1->root > ref2->root)
- return 1;
- } else {
- if (ref1->parent < ref2->parent)
- return -1;
- if (ref1->parent > ref2->parent)
- return 1;
- }
- return 0;
-}
-
-/*
- * compare two delayed data backrefs with same bytenr and type
- */
-static int comp_data_refs(struct btrfs_delayed_data_ref *ref2,
- struct btrfs_delayed_data_ref *ref1)
-{
- if (ref1->node.type == BTRFS_EXTENT_DATA_REF_KEY) {
- if (ref1->root < ref2->root)
- return -1;
- if (ref1->root > ref2->root)
- return 1;
- if (ref1->objectid < ref2->objectid)
- return -1;
- if (ref1->objectid > ref2->objectid)
- return 1;
- if (ref1->offset < ref2->offset)
- return -1;
- if (ref1->offset > ref2->offset)
- return 1;
- } else {
- if (ref1->parent < ref2->parent)
- return -1;
- if (ref1->parent > ref2->parent)
- return 1;
- }
- return 0;
-}
-
-/*
- * entries in the rb tree are ordered by the byte number of the extent,
- * type of the delayed backrefs and content of delayed backrefs.
- */
-static int comp_entry(struct btrfs_delayed_ref_node *ref2,
- struct btrfs_delayed_ref_node *ref1)
-{
- if (ref1->bytenr < ref2->bytenr)
- return -1;
- if (ref1->bytenr > ref2->bytenr)
- return 1;
- if (ref1->is_head && ref2->is_head)
- return 0;
- if (ref2->is_head)
- return -1;
- if (ref1->is_head)
- return 1;
- if (ref1->type < ref2->type)
- return -1;
- if (ref1->type > ref2->type)
- return 1;
- /* merging of sequenced refs is not allowed */
- if (ref1->seq < ref2->seq)
- return -1;
- if (ref1->seq > ref2->seq)
- return 1;
- if (ref1->type == BTRFS_TREE_BLOCK_REF_KEY ||
- ref1->type == BTRFS_SHARED_BLOCK_REF_KEY) {
- return comp_tree_refs(btrfs_delayed_node_to_tree_ref(ref2),
- btrfs_delayed_node_to_tree_ref(ref1));
- } else if (ref1->type == BTRFS_EXTENT_DATA_REF_KEY ||
- ref1->type == BTRFS_SHARED_DATA_REF_KEY) {
- return comp_data_refs(btrfs_delayed_node_to_data_ref(ref2),
- btrfs_delayed_node_to_data_ref(ref1));
- }
- BUG();
- return 0;
-}
-
-/*
- * insert a new ref into the rbtree. This returns any existing refs
- * for the same (bytenr,parent) tuple, or NULL if the new node was properly
- * inserted.
- */
-static struct btrfs_delayed_ref_node *tree_insert(struct rb_root *root,
- struct rb_node *node)
-{
- struct rb_node **p = &root->rb_node;
- struct rb_node *parent_node = NULL;
- struct btrfs_delayed_ref_node *entry;
- struct btrfs_delayed_ref_node *ins;
- int cmp;
-
- ins = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
- while (*p) {
- parent_node = *p;
- entry = rb_entry(parent_node, struct btrfs_delayed_ref_node,
- rb_node);
-
- cmp = comp_entry(entry, ins);
- if (cmp < 0)
- p = &(*p)->rb_left;
- else if (cmp > 0)
- p = &(*p)->rb_right;
- else
- return entry;
- }
-
- rb_link_node(node, parent_node, p);
- rb_insert_color(node, root);
- return NULL;
-}
-
-/*
- * find an head entry based on bytenr. This returns the delayed ref
- * head if it was able to find one, or NULL if nothing was in that spot.
- * If return_bigger is given, the next bigger entry is returned if no exact
- * match is found.
- */
-static struct btrfs_delayed_ref_node *find_ref_head(struct rb_root *root,
- u64 bytenr,
- struct btrfs_delayed_ref_node **last,
- int return_bigger)
-{
- struct rb_node *n;
- struct btrfs_delayed_ref_node *entry;
- int cmp = 0;
-
-again:
- n = root->rb_node;
- entry = NULL;
- while (n) {
- entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node);
- WARN_ON(!entry->in_tree);
- if (last)
- *last = entry;
-
- if (bytenr < entry->bytenr)
- cmp = -1;
- else if (bytenr > entry->bytenr)
- cmp = 1;
- else if (!btrfs_delayed_ref_is_head(entry))
- cmp = 1;
- else
- cmp = 0;
-
- if (cmp < 0)
- n = n->rb_left;
- else if (cmp > 0)
- n = n->rb_right;
- else
- return entry;
- }
- if (entry && return_bigger) {
- if (cmp > 0) {
- n = rb_next(&entry->rb_node);
- if (!n)
- n = rb_first(root);
- entry = rb_entry(n, struct btrfs_delayed_ref_node,
- rb_node);
- bytenr = entry->bytenr;
- return_bigger = 0;
- goto again;
- }
- return entry;
- }
- return NULL;
-}
-
-int btrfs_delayed_ref_lock(struct btrfs_trans_handle *trans,
- struct btrfs_delayed_ref_head *head)
-{
- struct btrfs_delayed_ref_root *delayed_refs;
-
- delayed_refs = &trans->transaction->delayed_refs;
- assert_spin_locked(&delayed_refs->lock);
- if (mutex_trylock(&head->mutex))
- return 0;
-
- atomic_inc(&head->node.refs);
- spin_unlock(&delayed_refs->lock);
-
- mutex_lock(&head->mutex);
- spin_lock(&delayed_refs->lock);
- if (!head->node.in_tree) {
- mutex_unlock(&head->mutex);
- btrfs_put_delayed_ref(&head->node);
- return -EAGAIN;
- }
- btrfs_put_delayed_ref(&head->node);
- return 0;
-}
-
-int btrfs_check_delayed_seq(struct btrfs_delayed_ref_root *delayed_refs,
- u64 seq)
-{
- struct seq_list *elem;
-
- assert_spin_locked(&delayed_refs->lock);
- if (list_empty(&delayed_refs->seq_head))
- return 0;
-
- elem = list_first_entry(&delayed_refs->seq_head, struct seq_list, list);
- if (seq >= elem->seq) {
- pr_debug("holding back delayed_ref %llu, lowest is %llu (%p)\n",
- seq, elem->seq, delayed_refs);
- return 1;
- }
- return 0;
-}
-
-int btrfs_find_ref_cluster(struct btrfs_trans_handle *trans,
- struct list_head *cluster, u64 start)
-{
- int count = 0;
- struct btrfs_delayed_ref_root *delayed_refs;
- struct rb_node *node;
- struct btrfs_delayed_ref_node *ref;
- struct btrfs_delayed_ref_head *head;
-
- delayed_refs = &trans->transaction->delayed_refs;
- if (start == 0) {
- node = rb_first(&delayed_refs->root);
- } else {
- ref = NULL;
- find_ref_head(&delayed_refs->root, start + 1, &ref, 1);
- if (ref) {
- node = &ref->rb_node;
- } else
- node = rb_first(&delayed_refs->root);
- }
-again:
- while (node && count < 32) {
- ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
- if (btrfs_delayed_ref_is_head(ref)) {
- head = btrfs_delayed_node_to_head(ref);
- if (list_empty(&head->cluster)) {
- list_add_tail(&head->cluster, cluster);
- delayed_refs->run_delayed_start =
- head->node.bytenr;
- count++;
-
- WARN_ON(delayed_refs->num_heads_ready == 0);
- delayed_refs->num_heads_ready--;
- } else if (count) {
- /* the goal of the clustering is to find extents
- * that are likely to end up in the same extent
- * leaf on disk. So, we don't want them spread
- * all over the tree. Stop now if we've hit
- * a head that was already in use
- */
- break;
- }
- }
- node = rb_next(node);
- }
- if (count) {
- return 0;
- } else if (start) {
- /*
- * we've gone to the end of the rbtree without finding any
- * clusters. start from the beginning and try again
- */
- start = 0;
- node = rb_first(&delayed_refs->root);
- goto again;
- }
- return 1;
-}
-
-/*
- * helper function to update an extent delayed ref in the
- * rbtree. existing and update must both have the same
- * bytenr and parent
- *
- * This may free existing if the update cancels out whatever
- * operation it was doing.
- */
-static noinline void
-update_existing_ref(struct btrfs_trans_handle *trans,
- struct btrfs_delayed_ref_root *delayed_refs,
- struct btrfs_delayed_ref_node *existing,
- struct btrfs_delayed_ref_node *update)
-{
- if (update->action != existing->action) {
- /*
- * this is effectively undoing either an add or a
- * drop. We decrement the ref_mod, and if it goes
- * down to zero we just delete the entry without
- * every changing the extent allocation tree.
- */
- existing->ref_mod--;
- if (existing->ref_mod == 0) {
- rb_erase(&existing->rb_node,
- &delayed_refs->root);
- existing->in_tree = 0;
- btrfs_put_delayed_ref(existing);
- delayed_refs->num_entries--;
- if (trans->delayed_ref_updates)
- trans->delayed_ref_updates--;
- } else {
- WARN_ON(existing->type == BTRFS_TREE_BLOCK_REF_KEY ||
- existing->type == BTRFS_SHARED_BLOCK_REF_KEY);
- }
- } else {
- WARN_ON(existing->type == BTRFS_TREE_BLOCK_REF_KEY ||
- existing->type == BTRFS_SHARED_BLOCK_REF_KEY);
- /*
- * the action on the existing ref matches
- * the action on the ref we're trying to add.
- * Bump the ref_mod by one so the backref that
- * is eventually added/removed has the correct
- * reference count
- */
- existing->ref_mod += update->ref_mod;
- }
-}
-
-/*
- * helper function to update the accounting in the head ref
- * existing and update must have the same bytenr
- */
-static noinline void
-update_existing_head_ref(struct btrfs_delayed_ref_node *existing,
- struct btrfs_delayed_ref_node *update)
-{
- struct btrfs_delayed_ref_head *existing_ref;
- struct btrfs_delayed_ref_head *ref;
-
- existing_ref = btrfs_delayed_node_to_head(existing);
- ref = btrfs_delayed_node_to_head(update);
- BUG_ON(existing_ref->is_data != ref->is_data);
-
- if (ref->must_insert_reserved) {
- /* if the extent was freed and then
- * reallocated before the delayed ref
- * entries were processed, we can end up
- * with an existing head ref without
- * the must_insert_reserved flag set.
- * Set it again here
- */
- existing_ref->must_insert_reserved = ref->must_insert_reserved;
-
- /*
- * update the num_bytes so we make sure the accounting
- * is done correctly
- */
- existing->num_bytes = update->num_bytes;
-
- }
-
- if (ref->extent_op) {
- if (!existing_ref->extent_op) {
- existing_ref->extent_op = ref->extent_op;
- } else {
- if (ref->extent_op->update_key) {
- memcpy(&existing_ref->extent_op->key,
- &ref->extent_op->key,
- sizeof(ref->extent_op->key));
- existing_ref->extent_op->update_key = 1;
- }
- if (ref->extent_op->update_flags) {
- existing_ref->extent_op->flags_to_set |=
- ref->extent_op->flags_to_set;
- existing_ref->extent_op->update_flags = 1;
- }
- kfree(ref->extent_op);
- }
- }
- /*
- * update the reference mod on the head to reflect this new operation
- */
- existing->ref_mod += update->ref_mod;
-}
-
-/*
- * helper function to actually insert a head node into the rbtree.
- * this does all the dirty work in terms of maintaining the correct
- * overall modification count.
- */
-static noinline void add_delayed_ref_head(struct btrfs_fs_info *fs_info,
- struct btrfs_trans_handle *trans,
- struct btrfs_delayed_ref_node *ref,
- u64 bytenr, u64 num_bytes,
- int action, int is_data)
-{
- struct btrfs_delayed_ref_node *existing;
- struct btrfs_delayed_ref_head *head_ref = NULL;
- struct btrfs_delayed_ref_root *delayed_refs;
- int count_mod = 1;
- int must_insert_reserved = 0;
-
- /*
- * the head node stores the sum of all the mods, so dropping a ref
- * should drop the sum in the head node by one.
- */
- if (action == BTRFS_UPDATE_DELAYED_HEAD)
- count_mod = 0;
- else if (action == BTRFS_DROP_DELAYED_REF)
- count_mod = -1;
-
- /*
- * BTRFS_ADD_DELAYED_EXTENT means that we need to update
- * the reserved accounting when the extent is finally added, or
- * if a later modification deletes the delayed ref without ever
- * inserting the extent into the extent allocation tree.
- * ref->must_insert_reserved is the flag used to record
- * that accounting mods are required.
- *
- * Once we record must_insert_reserved, switch the action to
- * BTRFS_ADD_DELAYED_REF because other special casing is not required.
- */
- if (action == BTRFS_ADD_DELAYED_EXTENT)
- must_insert_reserved = 1;
- else
- must_insert_reserved = 0;
-
- delayed_refs = &trans->transaction->delayed_refs;
-
- /* first set the basic ref node struct up */
- atomic_set(&ref->refs, 1);
- ref->bytenr = bytenr;
- ref->num_bytes = num_bytes;
- ref->ref_mod = count_mod;
- ref->type = 0;
- ref->action = 0;
- ref->is_head = 1;
- ref->in_tree = 1;
- ref->seq = 0;
-
- head_ref = btrfs_delayed_node_to_head(ref);
- head_ref->must_insert_reserved = must_insert_reserved;
- head_ref->is_data = is_data;
-
- INIT_LIST_HEAD(&head_ref->cluster);
- mutex_init(&head_ref->mutex);
-
- trace_btrfs_delayed_ref_head(ref, head_ref, action);
-
- existing = tree_insert(&delayed_refs->root, &ref->rb_node);
-
- if (existing) {
- update_existing_head_ref(existing, ref);
- /*
- * we've updated the existing ref, free the newly
- * allocated ref
- */
- kfree(head_ref);
- } else {
- delayed_refs->num_heads++;
- delayed_refs->num_heads_ready++;
- delayed_refs->num_entries++;
- trans->delayed_ref_updates++;
- }
-}
-
-/*
- * helper to insert a delayed tree ref into the rbtree.
- */
-static noinline void add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
- struct btrfs_trans_handle *trans,
- struct btrfs_delayed_ref_node *ref,
- u64 bytenr, u64 num_bytes, u64 parent,
- u64 ref_root, int level, int action,
- int for_cow)
-{
- struct btrfs_delayed_ref_node *existing;
- struct btrfs_delayed_tree_ref *full_ref;
- struct btrfs_delayed_ref_root *delayed_refs;
- u64 seq = 0;
-
- if (action == BTRFS_ADD_DELAYED_EXTENT)
- action = BTRFS_ADD_DELAYED_REF;
-
- delayed_refs = &trans->transaction->delayed_refs;
-
- /* first set the basic ref node struct up */
- atomic_set(&ref->refs, 1);
- ref->bytenr = bytenr;
- ref->num_bytes = num_bytes;
- ref->ref_mod = 1;
- ref->action = action;
- ref->is_head = 0;
- ref->in_tree = 1;
-
- if (need_ref_seq(for_cow, ref_root))
- seq = inc_delayed_seq(delayed_refs);
- ref->seq = seq;
-
- full_ref = btrfs_delayed_node_to_tree_ref(ref);
- full_ref->parent = parent;
- full_ref->root = ref_root;
- if (parent)
- ref->type = BTRFS_SHARED_BLOCK_REF_KEY;
- else
- ref->type = BTRFS_TREE_BLOCK_REF_KEY;
- full_ref->level = level;
-
- trace_btrfs_delayed_tree_ref(ref, full_ref, action);
-
- existing = tree_insert(&delayed_refs->root, &ref->rb_node);
-
- if (existing) {
- update_existing_ref(trans, delayed_refs, existing, ref);
- /*
- * we've updated the existing ref, free the newly
- * allocated ref
- */
- kfree(full_ref);
- } else {
- delayed_refs->num_entries++;
- trans->delayed_ref_updates++;
- }
-}
-
-/*
- * helper to insert a delayed data ref into the rbtree.
- */
-static noinline void add_delayed_data_ref(struct btrfs_fs_info *fs_info,
- struct btrfs_trans_handle *trans,
- struct btrfs_delayed_ref_node *ref,
- u64 bytenr, u64 num_bytes, u64 parent,
- u64 ref_root, u64 owner, u64 offset,
- int action, int for_cow)
-{
- struct btrfs_delayed_ref_node *existing;
- struct btrfs_delayed_data_ref *full_ref;
- struct btrfs_delayed_ref_root *delayed_refs;
- u64 seq = 0;
-
- if (action == BTRFS_ADD_DELAYED_EXTENT)
- action = BTRFS_ADD_DELAYED_REF;
-
- delayed_refs = &trans->transaction->delayed_refs;
-
- /* first set the basic ref node struct up */
- atomic_set(&ref->refs, 1);
- ref->bytenr = bytenr;
- ref->num_bytes = num_bytes;
- ref->ref_mod = 1;
- ref->action = action;
- ref->is_head = 0;
- ref->in_tree = 1;
-
- if (need_ref_seq(for_cow, ref_root))
- seq = inc_delayed_seq(delayed_refs);
- ref->seq = seq;
-
- full_ref = btrfs_delayed_node_to_data_ref(ref);
- full_ref->parent = parent;
- full_ref->root = ref_root;
- if (parent)
- ref->type = BTRFS_SHARED_DATA_REF_KEY;
- else
- ref->type = BTRFS_EXTENT_DATA_REF_KEY;
-
- full_ref->objectid = owner;
- full_ref->offset = offset;
-
- trace_btrfs_delayed_data_ref(ref, full_ref, action);
-
- existing = tree_insert(&delayed_refs->root, &ref->rb_node);
-
- if (existing) {
- update_existing_ref(trans, delayed_refs, existing, ref);
- /*
- * we've updated the existing ref, free the newly
- * allocated ref
- */
- kfree(full_ref);
- } else {
- delayed_refs->num_entries++;
- trans->delayed_ref_updates++;
- }
-}
-
-/*
- * add a delayed tree ref. This does all of the accounting required
- * to make sure the delayed ref is eventually processed before this
- * transaction commits.
- */
-int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
- struct btrfs_trans_handle *trans,
- u64 bytenr, u64 num_bytes, u64 parent,
- u64 ref_root, int level, int action,
- struct btrfs_delayed_extent_op *extent_op,
- int for_cow)
-{
- struct btrfs_delayed_tree_ref *ref;
- struct btrfs_delayed_ref_head *head_ref;
- struct btrfs_delayed_ref_root *delayed_refs;
-
- BUG_ON(extent_op && extent_op->is_data);
- ref = kmalloc(sizeof(*ref), GFP_NOFS);
- if (!ref)
- return -ENOMEM;
-
- head_ref = kmalloc(sizeof(*head_ref), GFP_NOFS);
- if (!head_ref) {
- kfree(ref);
- return -ENOMEM;
- }
-
- head_ref->extent_op = extent_op;
-
- delayed_refs = &trans->transaction->delayed_refs;
- spin_lock(&delayed_refs->lock);
-
- /*
- * insert both the head node and the new ref without dropping
- * the spin lock
- */
- add_delayed_ref_head(fs_info, trans, &head_ref->node, bytenr,
- num_bytes, action, 0);
-
- add_delayed_tree_ref(fs_info, trans, &ref->node, bytenr,
- num_bytes, parent, ref_root, level, action,
- for_cow);
- if (!need_ref_seq(for_cow, ref_root) &&
- waitqueue_active(&delayed_refs->seq_wait))
- wake_up(&delayed_refs->seq_wait);
- spin_unlock(&delayed_refs->lock);
- return 0;
-}
-
-/*
- * add a delayed data ref. it's similar to btrfs_add_delayed_tree_ref.
- */
-int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
- struct btrfs_trans_handle *trans,
- u64 bytenr, u64 num_bytes,
- u64 parent, u64 ref_root,
- u64 owner, u64 offset, int action,
- struct btrfs_delayed_extent_op *extent_op,
- int for_cow)
-{
- struct btrfs_delayed_data_ref *ref;
- struct btrfs_delayed_ref_head *head_ref;
- struct btrfs_delayed_ref_root *delayed_refs;
-
- BUG_ON(extent_op && !extent_op->is_data);
- ref = kmalloc(sizeof(*ref), GFP_NOFS);
- if (!ref)
- return -ENOMEM;
-
- head_ref = kmalloc(sizeof(*head_ref), GFP_NOFS);
- if (!head_ref) {
- kfree(ref);
- return -ENOMEM;
- }
-
- head_ref->extent_op = extent_op;
-
- delayed_refs = &trans->transaction->delayed_refs;
- spin_lock(&delayed_refs->lock);
-
- /*
- * insert both the head node and the new ref without dropping
- * the spin lock
- */
- add_delayed_ref_head(fs_info, trans, &head_ref->node, bytenr,
- num_bytes, action, 1);
-
- add_delayed_data_ref(fs_info, trans, &ref->node, bytenr,
- num_bytes, parent, ref_root, owner, offset,
- action, for_cow);
- if (!need_ref_seq(for_cow, ref_root) &&
- waitqueue_active(&delayed_refs->seq_wait))
- wake_up(&delayed_refs->seq_wait);
- spin_unlock(&delayed_refs->lock);
- return 0;
-}
-
-int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
- struct btrfs_trans_handle *trans,
- u64 bytenr, u64 num_bytes,
- struct btrfs_delayed_extent_op *extent_op)
-{
- struct btrfs_delayed_ref_head *head_ref;
- struct btrfs_delayed_ref_root *delayed_refs;
-
- head_ref = kmalloc(sizeof(*head_ref), GFP_NOFS);
- if (!head_ref)
- return -ENOMEM;
-
- head_ref->extent_op = extent_op;
-
- delayed_refs = &trans->transaction->delayed_refs;
- spin_lock(&delayed_refs->lock);
-
- add_delayed_ref_head(fs_info, trans, &head_ref->node, bytenr,
- num_bytes, BTRFS_UPDATE_DELAYED_HEAD,
- extent_op->is_data);
-
- if (waitqueue_active(&delayed_refs->seq_wait))
- wake_up(&delayed_refs->seq_wait);
- spin_unlock(&delayed_refs->lock);
- return 0;
-}
-
-/*
- * this does a simple search for the head node for a given extent.
- * It must be called with the delayed ref spinlock held, and it returns
- * the head node if any where found, or NULL if not.
- */
-struct btrfs_delayed_ref_head *
-btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr)
-{
- struct btrfs_delayed_ref_node *ref;
- struct btrfs_delayed_ref_root *delayed_refs;
-
- delayed_refs = &trans->transaction->delayed_refs;
- ref = find_ref_head(&delayed_refs->root, bytenr, NULL, 0);
- if (ref)
- return btrfs_delayed_node_to_head(ref);
- return NULL;
-}
diff --git a/ANDROID_3.4.5/fs/btrfs/delayed-ref.h b/ANDROID_3.4.5/fs/btrfs/delayed-ref.h
deleted file mode 100644
index d8f244d9..00000000
--- a/ANDROID_3.4.5/fs/btrfs/delayed-ref.h
+++ /dev/null
@@ -1,283 +0,0 @@
-/*
- * Copyright (C) 2008 Oracle. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License v2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
- */
-#ifndef __DELAYED_REF__
-#define __DELAYED_REF__
-
-/* these are the possible values of struct btrfs_delayed_ref->action */
-#define BTRFS_ADD_DELAYED_REF 1 /* add one backref to the tree */
-#define BTRFS_DROP_DELAYED_REF 2 /* delete one backref from the tree */
-#define BTRFS_ADD_DELAYED_EXTENT 3 /* record a full extent allocation */
-#define BTRFS_UPDATE_DELAYED_HEAD 4 /* not changing ref count on head ref */
-
-struct btrfs_delayed_ref_node {
- struct rb_node rb_node;
-
- /* the starting bytenr of the extent */
- u64 bytenr;
-
- /* the size of the extent */
- u64 num_bytes;
-
- /* seq number to keep track of insertion order */
- u64 seq;
-
- /* ref count on this data structure */
- atomic_t refs;
-
- /*
- * how many refs is this entry adding or deleting. For
- * head refs, this may be a negative number because it is keeping
- * track of the total mods done to the reference count.
- * For individual refs, this will always be a positive number
- *
- * It may be more than one, since it is possible for a single
- * parent to have more than one ref on an extent
- */
- int ref_mod;
-
- unsigned int action:8;
- unsigned int type:8;
- /* is this node still in the rbtree? */
- unsigned int is_head:1;
- unsigned int in_tree:1;
-};
-
-struct btrfs_delayed_extent_op {
- struct btrfs_disk_key key;
- u64 flags_to_set;
- unsigned int update_key:1;
- unsigned int update_flags:1;
- unsigned int is_data:1;
-};
-
-/*
- * the head refs are used to hold a lock on a given extent, which allows us
- * to make sure that only one process is running the delayed refs
- * at a time for a single extent. They also store the sum of all the
- * reference count modifications we've queued up.
- */
-struct btrfs_delayed_ref_head {
- struct btrfs_delayed_ref_node node;
-
- /*
- * the mutex is held while running the refs, and it is also
- * held when checking the sum of reference modifications.
- */
- struct mutex mutex;
-
- struct list_head cluster;
-
- struct btrfs_delayed_extent_op *extent_op;
- /*
- * when a new extent is allocated, it is just reserved in memory
- * The actual extent isn't inserted into the extent allocation tree
- * until the delayed ref is processed. must_insert_reserved is
- * used to flag a delayed ref so the accounting can be updated
- * when a full insert is done.
- *
- * It is possible the extent will be freed before it is ever
- * inserted into the extent allocation tree. In this case
- * we need to update the in ram accounting to properly reflect
- * the free has happened.
- */
- unsigned int must_insert_reserved:1;
- unsigned int is_data:1;
-};
-
-struct btrfs_delayed_tree_ref {
- struct btrfs_delayed_ref_node node;
- u64 root;
- u64 parent;
- int level;
-};
-
-struct btrfs_delayed_data_ref {
- struct btrfs_delayed_ref_node node;
- u64 root;
- u64 parent;
- u64 objectid;
- u64 offset;
-};
-
-struct btrfs_delayed_ref_root {
- struct rb_root root;
-
- /* this spin lock protects the rbtree and the entries inside */
- spinlock_t lock;
-
- /* how many delayed ref updates we've queued, used by the
- * throttling code
- */
- unsigned long num_entries;
-
- /* total number of head nodes in tree */
- unsigned long num_heads;
-
- /* total number of head nodes ready for processing */
- unsigned long num_heads_ready;
-
- /*
- * set when the tree is flushing before a transaction commit,
- * used by the throttling code to decide if new updates need
- * to be run right away
- */
- int flushing;
-
- u64 run_delayed_start;
-
- /*
- * seq number of delayed refs. We need to know if a backref was being
- * added before the currently processed ref or afterwards.
- */
- u64 seq;
-
- /*
- * seq_list holds a list of all seq numbers that are currently being
- * added to the list. While walking backrefs (btrfs_find_all_roots,
- * qgroups), which might take some time, no newer ref must be processed,
- * as it might influence the outcome of the walk.
- */
- struct list_head seq_head;
-
- /*
- * when the only refs we have in the list must not be processed, we want
- * to wait for more refs to show up or for the end of backref walking.
- */
- wait_queue_head_t seq_wait;
-};
-
-static inline void btrfs_put_delayed_ref(struct btrfs_delayed_ref_node *ref)
-{
- WARN_ON(atomic_read(&ref->refs) == 0);
- if (atomic_dec_and_test(&ref->refs)) {
- WARN_ON(ref->in_tree);
- kfree(ref);
- }
-}
-
-int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
- struct btrfs_trans_handle *trans,
- u64 bytenr, u64 num_bytes, u64 parent,
- u64 ref_root, int level, int action,
- struct btrfs_delayed_extent_op *extent_op,
- int for_cow);
-int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
- struct btrfs_trans_handle *trans,
- u64 bytenr, u64 num_bytes,
- u64 parent, u64 ref_root,
- u64 owner, u64 offset, int action,
- struct btrfs_delayed_extent_op *extent_op,
- int for_cow);
-int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
- struct btrfs_trans_handle *trans,
- u64 bytenr, u64 num_bytes,
- struct btrfs_delayed_extent_op *extent_op);
-
-struct btrfs_delayed_ref_head *
-btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr);
-int btrfs_delayed_ref_lock(struct btrfs_trans_handle *trans,
- struct btrfs_delayed_ref_head *head);
-int btrfs_find_ref_cluster(struct btrfs_trans_handle *trans,
- struct list_head *cluster, u64 search_start);
-
-struct seq_list {
- struct list_head list;
- u64 seq;
-};
-
-static inline u64 inc_delayed_seq(struct btrfs_delayed_ref_root *delayed_refs)
-{
- assert_spin_locked(&delayed_refs->lock);
- ++delayed_refs->seq;
- return delayed_refs->seq;
-}
-
-static inline void
-btrfs_get_delayed_seq(struct btrfs_delayed_ref_root *delayed_refs,
- struct seq_list *elem)
-{
- assert_spin_locked(&delayed_refs->lock);
- elem->seq = delayed_refs->seq;
- list_add_tail(&elem->list, &delayed_refs->seq_head);
-}
-
-static inline void
-btrfs_put_delayed_seq(struct btrfs_delayed_ref_root *delayed_refs,
- struct seq_list *elem)
-{
- spin_lock(&delayed_refs->lock);
- list_del(&elem->list);
- wake_up(&delayed_refs->seq_wait);
- spin_unlock(&delayed_refs->lock);
-}
-
-int btrfs_check_delayed_seq(struct btrfs_delayed_ref_root *delayed_refs,
- u64 seq);
-
-/*
- * delayed refs with a ref_seq > 0 must be held back during backref walking.
- * this only applies to items in one of the fs-trees. for_cow items never need
- * to be held back, so they won't get a ref_seq number.
- */
-static inline int need_ref_seq(int for_cow, u64 rootid)
-{
- if (for_cow)
- return 0;
-
- if (rootid == BTRFS_FS_TREE_OBJECTID)
- return 1;
-
- if ((s64)rootid >= (s64)BTRFS_FIRST_FREE_OBJECTID)
- return 1;
-
- return 0;
-}
-
-/*
- * a node might live in a head or a regular ref, this lets you
- * test for the proper type to use.
- */
-static int btrfs_delayed_ref_is_head(struct btrfs_delayed_ref_node *node)
-{
- return node->is_head;
-}
-
-/*
- * helper functions to cast a node into its container
- */
-static inline struct btrfs_delayed_tree_ref *
-btrfs_delayed_node_to_tree_ref(struct btrfs_delayed_ref_node *node)
-{
- WARN_ON(btrfs_delayed_ref_is_head(node));
- return container_of(node, struct btrfs_delayed_tree_ref, node);
-}
-
-static inline struct btrfs_delayed_data_ref *
-btrfs_delayed_node_to_data_ref(struct btrfs_delayed_ref_node *node)
-{
- WARN_ON(btrfs_delayed_ref_is_head(node));
- return container_of(node, struct btrfs_delayed_data_ref, node);
-}
-
-static inline struct btrfs_delayed_ref_head *
-btrfs_delayed_node_to_head(struct btrfs_delayed_ref_node *node)
-{
- WARN_ON(!btrfs_delayed_ref_is_head(node));
- return container_of(node, struct btrfs_delayed_ref_head, node);
-}
-#endif
diff --git a/ANDROID_3.4.5/fs/btrfs/dir-item.c b/ANDROID_3.4.5/fs/btrfs/dir-item.c
deleted file mode 100644
index c1a074d0..00000000
--- a/ANDROID_3.4.5/fs/btrfs/dir-item.c
+++ /dev/null
@@ -1,422 +0,0 @@
-/*
- * Copyright (C) 2007 Oracle. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License v2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
- */
-
-#include "ctree.h"
-#include "disk-io.h"
-#include "hash.h"
-#include "transaction.h"
-
-/*
- * insert a name into a directory, doing overflow properly if there is a hash
- * collision. data_size indicates how big the item inserted should be. On
- * success a struct btrfs_dir_item pointer is returned, otherwise it is
- * an ERR_PTR.
- *
- * The name is not copied into the dir item, you have to do that yourself.
- */
-static struct btrfs_dir_item *insert_with_overflow(struct btrfs_trans_handle
- *trans,
- struct btrfs_root *root,
- struct btrfs_path *path,
- struct btrfs_key *cpu_key,
- u32 data_size,
- const char *name,
- int name_len)
-{
- int ret;
- char *ptr;
- struct btrfs_item *item;
- struct extent_buffer *leaf;
-
- ret = btrfs_insert_empty_item(trans, root, path, cpu_key, data_size);
- if (ret == -EEXIST) {
- struct btrfs_dir_item *di;
- di = btrfs_match_dir_item_name(root, path, name, name_len);
- if (di)
- return ERR_PTR(-EEXIST);
- btrfs_extend_item(trans, root, path, data_size);
- } else if (ret < 0)
- return ERR_PTR(ret);
- WARN_ON(ret > 0);
- leaf = path->nodes[0];
- item = btrfs_item_nr(leaf, path->slots[0]);
- ptr = btrfs_item_ptr(leaf, path->slots[0], char);
- BUG_ON(data_size > btrfs_item_size(leaf, item));
- ptr += btrfs_item_size(leaf, item) - data_size;
- return (struct btrfs_dir_item *)ptr;
-}
-
-/*
- * xattrs work a lot like directories, this inserts an xattr item
- * into the tree
- */
-int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_path *path, u64 objectid,
- const char *name, u16 name_len,
- const void *data, u16 data_len)
-{
- int ret = 0;
- struct btrfs_dir_item *dir_item;
- unsigned long name_ptr, data_ptr;
- struct btrfs_key key, location;
- struct btrfs_disk_key disk_key;
- struct extent_buffer *leaf;
- u32 data_size;
-
- BUG_ON(name_len + data_len > BTRFS_MAX_XATTR_SIZE(root));
-
- key.objectid = objectid;
- btrfs_set_key_type(&key, BTRFS_XATTR_ITEM_KEY);
- key.offset = btrfs_name_hash(name, name_len);
-
- data_size = sizeof(*dir_item) + name_len + data_len;
- dir_item = insert_with_overflow(trans, root, path, &key, data_size,
- name, name_len);
- if (IS_ERR(dir_item))
- return PTR_ERR(dir_item);
- memset(&location, 0, sizeof(location));
-
- leaf = path->nodes[0];
- btrfs_cpu_key_to_disk(&disk_key, &location);
- btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
- btrfs_set_dir_type(leaf, dir_item, BTRFS_FT_XATTR);
- btrfs_set_dir_name_len(leaf, dir_item, name_len);
- btrfs_set_dir_transid(leaf, dir_item, trans->transid);
- btrfs_set_dir_data_len(leaf, dir_item, data_len);
- name_ptr = (unsigned long)(dir_item + 1);
- data_ptr = (unsigned long)((char *)name_ptr + name_len);
-
- write_extent_buffer(leaf, name, name_ptr, name_len);
- write_extent_buffer(leaf, data, data_ptr, data_len);
- btrfs_mark_buffer_dirty(path->nodes[0]);
-
- return ret;
-}
-
-/*
- * insert a directory item in the tree, doing all the magic for
- * both indexes. 'dir' indicates which objectid to insert it into,
- * 'location' is the key to stuff into the directory item, 'type' is the
- * type of the inode we're pointing to, and 'index' is the sequence number
- * to use for the second index (if one is created).
- * Will return 0 or -ENOMEM
- */
-int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root
- *root, const char *name, int name_len,
- struct inode *dir, struct btrfs_key *location,
- u8 type, u64 index)
-{
- int ret = 0;
- int ret2 = 0;
- struct btrfs_path *path;
- struct btrfs_dir_item *dir_item;
- struct extent_buffer *leaf;
- unsigned long name_ptr;
- struct btrfs_key key;
- struct btrfs_disk_key disk_key;
- u32 data_size;
-
- key.objectid = btrfs_ino(dir);
- btrfs_set_key_type(&key, BTRFS_DIR_ITEM_KEY);
- key.offset = btrfs_name_hash(name, name_len);
-
- path = btrfs_alloc_path();
- if (!path)
- return -ENOMEM;
- path->leave_spinning = 1;
-
- btrfs_cpu_key_to_disk(&disk_key, location);
-
- data_size = sizeof(*dir_item) + name_len;
- dir_item = insert_with_overflow(trans, root, path, &key, data_size,
- name, name_len);
- if (IS_ERR(dir_item)) {
- ret = PTR_ERR(dir_item);
- if (ret == -EEXIST)
- goto second_insert;
- goto out_free;
- }
-
- leaf = path->nodes[0];
- btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
- btrfs_set_dir_type(leaf, dir_item, type);
- btrfs_set_dir_data_len(leaf, dir_item, 0);
- btrfs_set_dir_name_len(leaf, dir_item, name_len);
- btrfs_set_dir_transid(leaf, dir_item, trans->transid);
- name_ptr = (unsigned long)(dir_item + 1);
-
- write_extent_buffer(leaf, name, name_ptr, name_len);
- btrfs_mark_buffer_dirty(leaf);
-
-second_insert:
- /* FIXME, use some real flag for selecting the extra index */
- if (root == root->fs_info->tree_root) {
- ret = 0;
- goto out_free;
- }
- btrfs_release_path(path);
-
- ret2 = btrfs_insert_delayed_dir_index(trans, root, name, name_len, dir,
- &disk_key, type, index);
-out_free:
- btrfs_free_path(path);
- if (ret)
- return ret;
- if (ret2)
- return ret2;
- return 0;
-}
-
-/*
- * lookup a directory item based on name. 'dir' is the objectid
- * we're searching in, and 'mod' tells us if you plan on deleting the
- * item (use mod < 0) or changing the options (use mod > 0)
- */
-struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_path *path, u64 dir,
- const char *name, int name_len,
- int mod)
-{
- int ret;
- struct btrfs_key key;
- int ins_len = mod < 0 ? -1 : 0;
- int cow = mod != 0;
-
- key.objectid = dir;
- btrfs_set_key_type(&key, BTRFS_DIR_ITEM_KEY);
-
- key.offset = btrfs_name_hash(name, name_len);
-
- ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow);
- if (ret < 0)
- return ERR_PTR(ret);
- if (ret > 0)
- return NULL;
-
- return btrfs_match_dir_item_name(root, path, name, name_len);
-}
-
-/*
- * lookup a directory item based on index. 'dir' is the objectid
- * we're searching in, and 'mod' tells us if you plan on deleting the
- * item (use mod < 0) or changing the options (use mod > 0)
- *
- * The name is used to make sure the index really points to the name you were
- * looking for.
- */
-struct btrfs_dir_item *
-btrfs_lookup_dir_index_item(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_path *path, u64 dir,
- u64 objectid, const char *name, int name_len,
- int mod)
-{
- int ret;
- struct btrfs_key key;
- int ins_len = mod < 0 ? -1 : 0;
- int cow = mod != 0;
-
- key.objectid = dir;
- btrfs_set_key_type(&key, BTRFS_DIR_INDEX_KEY);
- key.offset = objectid;
-
- ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow);
- if (ret < 0)
- return ERR_PTR(ret);
- if (ret > 0)
- return ERR_PTR(-ENOENT);
- return btrfs_match_dir_item_name(root, path, name, name_len);
-}
-
-struct btrfs_dir_item *
-btrfs_search_dir_index_item(struct btrfs_root *root,
- struct btrfs_path *path, u64 dirid,
- const char *name, int name_len)
-{
- struct extent_buffer *leaf;
- struct btrfs_dir_item *di;
- struct btrfs_key key;
- u32 nritems;
- int ret;
-
- key.objectid = dirid;
- key.type = BTRFS_DIR_INDEX_KEY;
- key.offset = 0;
-
- ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
- if (ret < 0)
- return ERR_PTR(ret);
-
- leaf = path->nodes[0];
- nritems = btrfs_header_nritems(leaf);
-
- while (1) {
- if (path->slots[0] >= nritems) {
- ret = btrfs_next_leaf(root, path);
- if (ret < 0)
- return ERR_PTR(ret);
- if (ret > 0)
- break;
- leaf = path->nodes[0];
- nritems = btrfs_header_nritems(leaf);
- continue;
- }
-
- btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
- if (key.objectid != dirid || key.type != BTRFS_DIR_INDEX_KEY)
- break;
-
- di = btrfs_match_dir_item_name(root, path, name, name_len);
- if (di)
- return di;
-
- path->slots[0]++;
- }
- return NULL;
-}
-
-struct btrfs_dir_item *btrfs_lookup_xattr(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_path *path, u64 dir,
- const char *name, u16 name_len,
- int mod)
-{
- int ret;
- struct btrfs_key key;
- int ins_len = mod < 0 ? -1 : 0;
- int cow = mod != 0;
-
- key.objectid = dir;
- btrfs_set_key_type(&key, BTRFS_XATTR_ITEM_KEY);
- key.offset = btrfs_name_hash(name, name_len);
- ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow);
- if (ret < 0)
- return ERR_PTR(ret);
- if (ret > 0)
- return NULL;
-
- return btrfs_match_dir_item_name(root, path, name, name_len);
-}
-
-/*
- * helper function to look at the directory item pointed to by 'path'
- * this walks through all the entries in a dir item and finds one
- * for a specific name.
- */
-struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_root *root,
- struct btrfs_path *path,
- const char *name, int name_len)
-{
- struct btrfs_dir_item *dir_item;
- unsigned long name_ptr;
- u32 total_len;
- u32 cur = 0;
- u32 this_len;
- struct extent_buffer *leaf;
-
- leaf = path->nodes[0];
- dir_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dir_item);
- if (verify_dir_item(root, leaf, dir_item))
- return NULL;
-
- total_len = btrfs_item_size_nr(leaf, path->slots[0]);
- while (cur < total_len) {
- this_len = sizeof(*dir_item) +
- btrfs_dir_name_len(leaf, dir_item) +
- btrfs_dir_data_len(leaf, dir_item);
- name_ptr = (unsigned long)(dir_item + 1);
-
- if (btrfs_dir_name_len(leaf, dir_item) == name_len &&
- memcmp_extent_buffer(leaf, name, name_ptr, name_len) == 0)
- return dir_item;
-
- cur += this_len;
- dir_item = (struct btrfs_dir_item *)((char *)dir_item +
- this_len);
- }
- return NULL;
-}
-
-/*
- * given a pointer into a directory item, delete it. This
- * handles items that have more than one entry in them.
- */
-int btrfs_delete_one_dir_name(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_path *path,
- struct btrfs_dir_item *di)
-{
-
- struct extent_buffer *leaf;
- u32 sub_item_len;
- u32 item_len;
- int ret = 0;
-
- leaf = path->nodes[0];
- sub_item_len = sizeof(*di) + btrfs_dir_name_len(leaf, di) +
- btrfs_dir_data_len(leaf, di);
- item_len = btrfs_item_size_nr(leaf, path->slots[0]);
- if (sub_item_len == item_len) {
- ret = btrfs_del_item(trans, root, path);
- } else {
- /* MARKER */
- unsigned long ptr = (unsigned long)di;
- unsigned long start;
-
- start = btrfs_item_ptr_offset(leaf, path->slots[0]);
- memmove_extent_buffer(leaf, ptr, ptr + sub_item_len,
- item_len - (ptr + sub_item_len - start));
- btrfs_truncate_item(trans, root, path,
- item_len - sub_item_len, 1);
- }
- return ret;
-}
-
-int verify_dir_item(struct btrfs_root *root,
- struct extent_buffer *leaf,
- struct btrfs_dir_item *dir_item)
-{
- u16 namelen = BTRFS_NAME_LEN;
- u8 type = btrfs_dir_type(leaf, dir_item);
-
- if (type >= BTRFS_FT_MAX) {
- printk(KERN_CRIT "btrfs: invalid dir item type: %d\n",
- (int)type);
- return 1;
- }
-
- if (type == BTRFS_FT_XATTR)
- namelen = XATTR_NAME_MAX;
-
- if (btrfs_dir_name_len(leaf, dir_item) > namelen) {
- printk(KERN_CRIT "btrfs: invalid dir item name len: %u\n",
- (unsigned)btrfs_dir_data_len(leaf, dir_item));
- return 1;
- }
-
- /* BTRFS_MAX_XATTR_SIZE is the same for all dir items */
- if (btrfs_dir_data_len(leaf, dir_item) > BTRFS_MAX_XATTR_SIZE(root)) {
- printk(KERN_CRIT "btrfs: invalid dir item data len: %u\n",
- (unsigned)btrfs_dir_data_len(leaf, dir_item));
- return 1;
- }
-
- return 0;
-}
diff --git a/ANDROID_3.4.5/fs/btrfs/disk-io.c b/ANDROID_3.4.5/fs/btrfs/disk-io.c
deleted file mode 100644
index a7ffc88a..00000000
--- a/ANDROID_3.4.5/fs/btrfs/disk-io.c
+++ /dev/null
@@ -1,3693 +0,0 @@
-/*
- * Copyright (C) 2007 Oracle. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License v2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
- */
-
-#include <linux/fs.h>
-#include <linux/blkdev.h>
-#include <linux/scatterlist.h>
-#include <linux/swap.h>
-#include <linux/radix-tree.h>
-#include <linux/writeback.h>
-#include <linux/buffer_head.h>
-#include <linux/workqueue.h>
-#include <linux/kthread.h>
-#include <linux/freezer.h>
-#include <linux/crc32c.h>
-#include <linux/slab.h>
-#include <linux/migrate.h>
-#include <linux/ratelimit.h>
-#include <asm/unaligned.h>
-#include "compat.h"
-#include "ctree.h"
-#include "disk-io.h"
-#include "transaction.h"
-#include "btrfs_inode.h"
-#include "volumes.h"
-#include "print-tree.h"
-#include "async-thread.h"
-#include "locking.h"
-#include "tree-log.h"
-#include "free-space-cache.h"
-#include "inode-map.h"
-#include "check-integrity.h"
-
-static struct extent_io_ops btree_extent_io_ops;
-static void end_workqueue_fn(struct btrfs_work *work);
-static void free_fs_root(struct btrfs_root *root);
-static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
- int read_only);
-static void btrfs_destroy_ordered_operations(struct btrfs_root *root);
-static void btrfs_destroy_ordered_extents(struct btrfs_root *root);
-static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
- struct btrfs_root *root);
-static void btrfs_destroy_pending_snapshots(struct btrfs_transaction *t);
-static void btrfs_destroy_delalloc_inodes(struct btrfs_root *root);
-static int btrfs_destroy_marked_extents(struct btrfs_root *root,
- struct extent_io_tree *dirty_pages,
- int mark);
-static int btrfs_destroy_pinned_extent(struct btrfs_root *root,
- struct extent_io_tree *pinned_extents);
-
-/*
- * end_io_wq structs are used to do processing in task context when an IO is
- * complete. This is used during reads to verify checksums, and it is used
- * by writes to insert metadata for new file extents after IO is complete.
- */
-struct end_io_wq {
- struct bio *bio;
- bio_end_io_t *end_io;
- void *private;
- struct btrfs_fs_info *info;
- int error;
- int metadata;
- struct list_head list;
- struct btrfs_work work;
-};
-
-/*
- * async submit bios are used to offload expensive checksumming
- * onto the worker threads. They checksum file and metadata bios
- * just before they are sent down the IO stack.
- */
-struct async_submit_bio {
- struct inode *inode;
- struct bio *bio;
- struct list_head list;
- extent_submit_bio_hook_t *submit_bio_start;
- extent_submit_bio_hook_t *submit_bio_done;
- int rw;
- int mirror_num;
- unsigned long bio_flags;
- /*
- * bio_offset is optional, can be used if the pages in the bio
- * can't tell us where in the file the bio should go
- */
- u64 bio_offset;
- struct btrfs_work work;
- int error;
-};
-
-/*
- * Lockdep class keys for extent_buffer->lock's in this root. For a given
- * eb, the lockdep key is determined by the btrfs_root it belongs to and
- * the level the eb occupies in the tree.
- *
- * Different roots are used for different purposes and may nest inside each
- * other and they require separate keysets. As lockdep keys should be
- * static, assign keysets according to the purpose of the root as indicated
- * by btrfs_root->objectid. This ensures that all special purpose roots
- * have separate keysets.
- *
- * Lock-nesting across peer nodes is always done with the immediate parent
- * node locked thus preventing deadlock. As lockdep doesn't know this, use
- * subclass to avoid triggering lockdep warning in such cases.
- *
- * The key is set by the readpage_end_io_hook after the buffer has passed
- * csum validation but before the pages are unlocked. It is also set by
- * btrfs_init_new_buffer on freshly allocated blocks.
- *
- * We also add a check to make sure the highest level of the tree is the
- * same as our lockdep setup here. If BTRFS_MAX_LEVEL changes, this code
- * needs update as well.
- */
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-# if BTRFS_MAX_LEVEL != 8
-# error
-# endif
-
-static struct btrfs_lockdep_keyset {
- u64 id; /* root objectid */
- const char *name_stem; /* lock name stem */
- char names[BTRFS_MAX_LEVEL + 1][20];
- struct lock_class_key keys[BTRFS_MAX_LEVEL + 1];
-} btrfs_lockdep_keysets[] = {
- { .id = BTRFS_ROOT_TREE_OBJECTID, .name_stem = "root" },
- { .id = BTRFS_EXTENT_TREE_OBJECTID, .name_stem = "extent" },
- { .id = BTRFS_CHUNK_TREE_OBJECTID, .name_stem = "chunk" },
- { .id = BTRFS_DEV_TREE_OBJECTID, .name_stem = "dev" },
- { .id = BTRFS_FS_TREE_OBJECTID, .name_stem = "fs" },
- { .id = BTRFS_CSUM_TREE_OBJECTID, .name_stem = "csum" },
- { .id = BTRFS_ORPHAN_OBJECTID, .name_stem = "orphan" },
- { .id = BTRFS_TREE_LOG_OBJECTID, .name_stem = "log" },
- { .id = BTRFS_TREE_RELOC_OBJECTID, .name_stem = "treloc" },
- { .id = BTRFS_DATA_RELOC_TREE_OBJECTID, .name_stem = "dreloc" },
- { .id = 0, .name_stem = "tree" },
-};
-
-void __init btrfs_init_lockdep(void)
-{
- int i, j;
-
- /* initialize lockdep class names */
- for (i = 0; i < ARRAY_SIZE(btrfs_lockdep_keysets); i++) {
- struct btrfs_lockdep_keyset *ks = &btrfs_lockdep_keysets[i];
-
- for (j = 0; j < ARRAY_SIZE(ks->names); j++)
- snprintf(ks->names[j], sizeof(ks->names[j]),
- "btrfs-%s-%02d", ks->name_stem, j);
- }
-}
-
-void btrfs_set_buffer_lockdep_class(u64 objectid, struct extent_buffer *eb,
- int level)
-{
- struct btrfs_lockdep_keyset *ks;
-
- BUG_ON(level >= ARRAY_SIZE(ks->keys));
-
- /* find the matching keyset, id 0 is the default entry */
- for (ks = btrfs_lockdep_keysets; ks->id; ks++)
- if (ks->id == objectid)
- break;
-
- lockdep_set_class_and_name(&eb->lock,
- &ks->keys[level], ks->names[level]);
-}
-
-#endif
-
-/*
- * extents on the btree inode are pretty simple, there's one extent
- * that covers the entire device
- */
-static struct extent_map *btree_get_extent(struct inode *inode,
- struct page *page, size_t pg_offset, u64 start, u64 len,
- int create)
-{
- struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
- struct extent_map *em;
- int ret;
-
- read_lock(&em_tree->lock);
- em = lookup_extent_mapping(em_tree, start, len);
- if (em) {
- em->bdev =
- BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev;
- read_unlock(&em_tree->lock);
- goto out;
- }
- read_unlock(&em_tree->lock);
-
- em = alloc_extent_map();
- if (!em) {
- em = ERR_PTR(-ENOMEM);
- goto out;
- }
- em->start = 0;
- em->len = (u64)-1;
- em->block_len = (u64)-1;
- em->block_start = 0;
- em->bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev;
-
- write_lock(&em_tree->lock);
- ret = add_extent_mapping(em_tree, em);
- if (ret == -EEXIST) {
- u64 failed_start = em->start;
- u64 failed_len = em->len;
-
- free_extent_map(em);
- em = lookup_extent_mapping(em_tree, start, len);
- if (em) {
- ret = 0;
- } else {
- em = lookup_extent_mapping(em_tree, failed_start,
- failed_len);
- ret = -EIO;
- }
- } else if (ret) {
- free_extent_map(em);
- em = NULL;
- }
- write_unlock(&em_tree->lock);
-
- if (ret)
- em = ERR_PTR(ret);
-out:
- return em;
-}
-
-u32 btrfs_csum_data(struct btrfs_root *root, char *data, u32 seed, size_t len)
-{
- return crc32c(seed, data, len);
-}
-
-void btrfs_csum_final(u32 crc, char *result)
-{
- put_unaligned_le32(~crc, result);
-}
-
-/*
- * compute the csum for a btree block, and either verify it or write it
- * into the csum field of the block.
- */
-static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf,
- int verify)
-{
- u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
- char *result = NULL;
- unsigned long len;
- unsigned long cur_len;
- unsigned long offset = BTRFS_CSUM_SIZE;
- char *kaddr;
- unsigned long map_start;
- unsigned long map_len;
- int err;
- u32 crc = ~(u32)0;
- unsigned long inline_result;
-
- len = buf->len - offset;
- while (len > 0) {
- err = map_private_extent_buffer(buf, offset, 32,
- &kaddr, &map_start, &map_len);
- if (err)
- return 1;
- cur_len = min(len, map_len - (offset - map_start));
- crc = btrfs_csum_data(root, kaddr + offset - map_start,
- crc, cur_len);
- len -= cur_len;
- offset += cur_len;
- }
- if (csum_size > sizeof(inline_result)) {
- result = kzalloc(csum_size * sizeof(char), GFP_NOFS);
- if (!result)
- return 1;
- } else {
- result = (char *)&inline_result;
- }
-
- btrfs_csum_final(crc, result);
-
- if (verify) {
- if (memcmp_extent_buffer(buf, result, 0, csum_size)) {
- u32 val;
- u32 found = 0;
- memcpy(&found, result, csum_size);
-
- read_extent_buffer(buf, &val, 0, csum_size);
- printk_ratelimited(KERN_INFO "btrfs: %s checksum verify "
- "failed on %llu wanted %X found %X "
- "level %d\n",
- root->fs_info->sb->s_id,
- (unsigned long long)buf->start, val, found,
- btrfs_header_level(buf));
- if (result != (char *)&inline_result)
- kfree(result);
- return 1;
- }
- } else {
- write_extent_buffer(buf, result, 0, csum_size);
- }
- if (result != (char *)&inline_result)
- kfree(result);
- return 0;
-}
-
-/*
- * we can't consider a given block up to date unless the transid of the
- * block matches the transid in the parent node's pointer. This is how we
- * detect blocks that either didn't get written at all or got written
- * in the wrong place.
- */
-static int verify_parent_transid(struct extent_io_tree *io_tree,
- struct extent_buffer *eb, u64 parent_transid,
- int atomic)
-{
- struct extent_state *cached_state = NULL;
- int ret;
-
- if (!parent_transid || btrfs_header_generation(eb) == parent_transid)
- return 0;
-
- if (atomic)
- return -EAGAIN;
-
- lock_extent_bits(io_tree, eb->start, eb->start + eb->len - 1,
- 0, &cached_state);
- if (extent_buffer_uptodate(eb) &&
- btrfs_header_generation(eb) == parent_transid) {
- ret = 0;
- goto out;
- }
- printk_ratelimited("parent transid verify failed on %llu wanted %llu "
- "found %llu\n",
- (unsigned long long)eb->start,
- (unsigned long long)parent_transid,
- (unsigned long long)btrfs_header_generation(eb));
- ret = 1;
- clear_extent_buffer_uptodate(eb);
-out:
- unlock_extent_cached(io_tree, eb->start, eb->start + eb->len - 1,
- &cached_state, GFP_NOFS);
- return ret;
-}
-
-/*
- * helper to read a given tree block, doing retries as required when
- * the checksums don't match and we have alternate mirrors to try.
- */
-static int btree_read_extent_buffer_pages(struct btrfs_root *root,
- struct extent_buffer *eb,
- u64 start, u64 parent_transid)
-{
- struct extent_io_tree *io_tree;
- int failed = 0;
- int ret;
- int num_copies = 0;
- int mirror_num = 0;
- int failed_mirror = 0;
-
- clear_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags);
- io_tree = &BTRFS_I(root->fs_info->btree_inode)->io_tree;
- while (1) {
- ret = read_extent_buffer_pages(io_tree, eb, start,
- WAIT_COMPLETE,
- btree_get_extent, mirror_num);
- if (!ret && !verify_parent_transid(io_tree, eb,
- parent_transid, 0))
- break;
-
- /*
- * This buffer's crc is fine, but its contents are corrupted, so
- * there is no reason to read the other copies, they won't be
- * any less wrong.
- */
- if (test_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags))
- break;
-
- num_copies = btrfs_num_copies(&root->fs_info->mapping_tree,
- eb->start, eb->len);
- if (num_copies == 1)
- break;
-
- if (!failed_mirror) {
- failed = 1;
- failed_mirror = eb->read_mirror;
- }
-
- mirror_num++;
- if (mirror_num == failed_mirror)
- mirror_num++;
-
- if (mirror_num > num_copies)
- break;
- }
-
- if (failed && !ret)
- repair_eb_io_failure(root, eb, failed_mirror);
-
- return ret;
-}
-
-/*
- * checksum a dirty tree block before IO. This has extra checks to make sure
- * we only fill in the checksum field in the first page of a multi-page block
- */
-
-static int csum_dirty_buffer(struct btrfs_root *root, struct page *page)
-{
- struct extent_io_tree *tree;
- u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
- u64 found_start;
- struct extent_buffer *eb;
-
- tree = &BTRFS_I(page->mapping->host)->io_tree;
-
- eb = (struct extent_buffer *)page->private;
- if (page != eb->pages[0])
- return 0;
- found_start = btrfs_header_bytenr(eb);
- if (found_start != start) {
- WARN_ON(1);
- return 0;
- }
- if (eb->pages[0] != page) {
- WARN_ON(1);
- return 0;
- }
- if (!PageUptodate(page)) {
- WARN_ON(1);
- return 0;
- }
- csum_tree_block(root, eb, 0);
- return 0;
-}
-
-static int check_tree_block_fsid(struct btrfs_root *root,
- struct extent_buffer *eb)
-{
- struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices;
- u8 fsid[BTRFS_UUID_SIZE];
- int ret = 1;
-
- read_extent_buffer(eb, fsid, (unsigned long)btrfs_header_fsid(eb),
- BTRFS_FSID_SIZE);
- while (fs_devices) {
- if (!memcmp(fsid, fs_devices->fsid, BTRFS_FSID_SIZE)) {
- ret = 0;
- break;
- }
- fs_devices = fs_devices->seed;
- }
- return ret;
-}
-
-#define CORRUPT(reason, eb, root, slot) \
- printk(KERN_CRIT "btrfs: corrupt leaf, %s: block=%llu," \
- "root=%llu, slot=%d\n", reason, \
- (unsigned long long)btrfs_header_bytenr(eb), \
- (unsigned long long)root->objectid, slot)
-
-static noinline int check_leaf(struct btrfs_root *root,
- struct extent_buffer *leaf)
-{
- struct btrfs_key key;
- struct btrfs_key leaf_key;
- u32 nritems = btrfs_header_nritems(leaf);
- int slot;
-
- if (nritems == 0)
- return 0;
-
- /* Check the 0 item */
- if (btrfs_item_offset_nr(leaf, 0) + btrfs_item_size_nr(leaf, 0) !=
- BTRFS_LEAF_DATA_SIZE(root)) {
- CORRUPT("invalid item offset size pair", leaf, root, 0);
- return -EIO;
- }
-
- /*
- * Check to make sure each items keys are in the correct order and their
- * offsets make sense. We only have to loop through nritems-1 because
- * we check the current slot against the next slot, which verifies the
- * next slot's offset+size makes sense and that the current's slot
- * offset is correct.
- */
- for (slot = 0; slot < nritems - 1; slot++) {
- btrfs_item_key_to_cpu(leaf, &leaf_key, slot);
- btrfs_item_key_to_cpu(leaf, &key, slot + 1);
-
- /* Make sure the keys are in the right order */
- if (btrfs_comp_cpu_keys(&leaf_key, &key) >= 0) {
- CORRUPT("bad key order", leaf, root, slot);
- return -EIO;
- }
-
- /*
- * Make sure the offset and ends are right, remember that the
- * item data starts at the end of the leaf and grows towards the
- * front.
- */
- if (btrfs_item_offset_nr(leaf, slot) !=
- btrfs_item_end_nr(leaf, slot + 1)) {
- CORRUPT("slot offset bad", leaf, root, slot);
- return -EIO;
- }
-
- /*
- * Check to make sure that we don't point outside of the leaf,
- * just incase all the items are consistent to eachother, but
- * all point outside of the leaf.
- */
- if (btrfs_item_end_nr(leaf, slot) >
- BTRFS_LEAF_DATA_SIZE(root)) {
- CORRUPT("slot end outside of leaf", leaf, root, slot);
- return -EIO;
- }
- }
-
- return 0;
-}
-
-struct extent_buffer *find_eb_for_page(struct extent_io_tree *tree,
- struct page *page, int max_walk)
-{
- struct extent_buffer *eb;
- u64 start = page_offset(page);
- u64 target = start;
- u64 min_start;
-
- if (start < max_walk)
- min_start = 0;
- else
- min_start = start - max_walk;
-
- while (start >= min_start) {
- eb = find_extent_buffer(tree, start, 0);
- if (eb) {
- /*
- * we found an extent buffer and it contains our page
- * horray!
- */
- if (eb->start <= target &&
- eb->start + eb->len > target)
- return eb;
-
- /* we found an extent buffer that wasn't for us */
- free_extent_buffer(eb);
- return NULL;
- }
- if (start == 0)
- break;
- start -= PAGE_CACHE_SIZE;
- }
- return NULL;
-}
-
-static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end,
- struct extent_state *state, int mirror)
-{
- struct extent_io_tree *tree;
- u64 found_start;
- int found_level;
- struct extent_buffer *eb;
- struct btrfs_root *root = BTRFS_I(page->mapping->host)->root;
- int ret = 0;
- int reads_done;
-
- if (!page->private)
- goto out;
-
- tree = &BTRFS_I(page->mapping->host)->io_tree;
- eb = (struct extent_buffer *)page->private;
-
- /* the pending IO might have been the only thing that kept this buffer
- * in memory. Make sure we have a ref for all this other checks
- */
- extent_buffer_get(eb);
-
- reads_done = atomic_dec_and_test(&eb->io_pages);
- if (!reads_done)
- goto err;
-
- eb->read_mirror = mirror;
- if (test_bit(EXTENT_BUFFER_IOERR, &eb->bflags)) {
- ret = -EIO;
- goto err;
- }
-
- found_start = btrfs_header_bytenr(eb);
- if (found_start != eb->start) {
- printk_ratelimited(KERN_INFO "btrfs bad tree block start "
- "%llu %llu\n",
- (unsigned long long)found_start,
- (unsigned long long)eb->start);
- ret = -EIO;
- goto err;
- }
- if (check_tree_block_fsid(root, eb)) {
- printk_ratelimited(KERN_INFO "btrfs bad fsid on block %llu\n",
- (unsigned long long)eb->start);
- ret = -EIO;
- goto err;
- }
- found_level = btrfs_header_level(eb);
-
- btrfs_set_buffer_lockdep_class(btrfs_header_owner(eb),
- eb, found_level);
-
- ret = csum_tree_block(root, eb, 1);
- if (ret) {
- ret = -EIO;
- goto err;
- }
-
- /*
- * If this is a leaf block and it is corrupt, set the corrupt bit so
- * that we don't try and read the other copies of this block, just
- * return -EIO.
- */
- if (found_level == 0 && check_leaf(root, eb)) {
- set_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags);
- ret = -EIO;
- }
-
- if (!ret)
- set_extent_buffer_uptodate(eb);
-err:
- if (test_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags)) {
- clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags);
- btree_readahead_hook(root, eb, eb->start, ret);
- }
-
- if (ret)
- clear_extent_buffer_uptodate(eb);
- free_extent_buffer(eb);
-out:
- return ret;
-}
-
-static int btree_io_failed_hook(struct page *page, int failed_mirror)
-{
- struct extent_buffer *eb;
- struct btrfs_root *root = BTRFS_I(page->mapping->host)->root;
-
- eb = (struct extent_buffer *)page->private;
- set_bit(EXTENT_BUFFER_IOERR, &eb->bflags);
- eb->read_mirror = failed_mirror;
- if (test_and_clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags))
- btree_readahead_hook(root, eb, eb->start, -EIO);
- return -EIO; /* we fixed nothing */
-}
-
-static void end_workqueue_bio(struct bio *bio, int err)
-{
- struct end_io_wq *end_io_wq = bio->bi_private;
- struct btrfs_fs_info *fs_info;
-
- fs_info = end_io_wq->info;
- end_io_wq->error = err;
- end_io_wq->work.func = end_workqueue_fn;
- end_io_wq->work.flags = 0;
-
- if (bio->bi_rw & REQ_WRITE) {
- if (end_io_wq->metadata == 1)
- btrfs_queue_worker(&fs_info->endio_meta_write_workers,
- &end_io_wq->work);
- else if (end_io_wq->metadata == 2)
- btrfs_queue_worker(&fs_info->endio_freespace_worker,
- &end_io_wq->work);
- else
- btrfs_queue_worker(&fs_info->endio_write_workers,
- &end_io_wq->work);
- } else {
- if (end_io_wq->metadata)
- btrfs_queue_worker(&fs_info->endio_meta_workers,
- &end_io_wq->work);
- else
- btrfs_queue_worker(&fs_info->endio_workers,
- &end_io_wq->work);
- }
-}
-
-/*
- * For the metadata arg you want
- *
- * 0 - if data
- * 1 - if normal metadta
- * 2 - if writing to the free space cache area
- */
-int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
- int metadata)
-{
- struct end_io_wq *end_io_wq;
- end_io_wq = kmalloc(sizeof(*end_io_wq), GFP_NOFS);
- if (!end_io_wq)
- return -ENOMEM;
-
- end_io_wq->private = bio->bi_private;
- end_io_wq->end_io = bio->bi_end_io;
- end_io_wq->info = info;
- end_io_wq->error = 0;
- end_io_wq->bio = bio;
- end_io_wq->metadata = metadata;
-
- bio->bi_private = end_io_wq;
- bio->bi_end_io = end_workqueue_bio;
- return 0;
-}
-
-unsigned long btrfs_async_submit_limit(struct btrfs_fs_info *info)
-{
- unsigned long limit = min_t(unsigned long,
- info->workers.max_workers,
- info->fs_devices->open_devices);
- return 256 * limit;
-}
-
-static void run_one_async_start(struct btrfs_work *work)
-{
- struct async_submit_bio *async;
- int ret;
-
- async = container_of(work, struct async_submit_bio, work);
- ret = async->submit_bio_start(async->inode, async->rw, async->bio,
- async->mirror_num, async->bio_flags,
- async->bio_offset);
- if (ret)
- async->error = ret;
-}
-
-static void run_one_async_done(struct btrfs_work *work)
-{
- struct btrfs_fs_info *fs_info;
- struct async_submit_bio *async;
- int limit;
-
- async = container_of(work, struct async_submit_bio, work);
- fs_info = BTRFS_I(async->inode)->root->fs_info;
-
- limit = btrfs_async_submit_limit(fs_info);
- limit = limit * 2 / 3;
-
- atomic_dec(&fs_info->nr_async_submits);
-
- if (atomic_read(&fs_info->nr_async_submits) < limit &&
- waitqueue_active(&fs_info->async_submit_wait))
- wake_up(&fs_info->async_submit_wait);
-
- /* If an error occured we just want to clean up the bio and move on */
- if (async->error) {
- bio_endio(async->bio, async->error);
- return;
- }
-
- async->submit_bio_done(async->inode, async->rw, async->bio,
- async->mirror_num, async->bio_flags,
- async->bio_offset);
-}
-
-static void run_one_async_free(struct btrfs_work *work)
-{
- struct async_submit_bio *async;
-
- async = container_of(work, struct async_submit_bio, work);
- kfree(async);
-}
-
-int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode,
- int rw, struct bio *bio, int mirror_num,
- unsigned long bio_flags,
- u64 bio_offset,
- extent_submit_bio_hook_t *submit_bio_start,
- extent_submit_bio_hook_t *submit_bio_done)
-{
- struct async_submit_bio *async;
-
- async = kmalloc(sizeof(*async), GFP_NOFS);
- if (!async)
- return -ENOMEM;
-
- async->inode = inode;
- async->rw = rw;
- async->bio = bio;
- async->mirror_num = mirror_num;
- async->submit_bio_start = submit_bio_start;
- async->submit_bio_done = submit_bio_done;
-
- async->work.func = run_one_async_start;
- async->work.ordered_func = run_one_async_done;
- async->work.ordered_free = run_one_async_free;
-
- async->work.flags = 0;
- async->bio_flags = bio_flags;
- async->bio_offset = bio_offset;
-
- async->error = 0;
-
- atomic_inc(&fs_info->nr_async_submits);
-
- if (rw & REQ_SYNC)
- btrfs_set_work_high_prio(&async->work);
-
- btrfs_queue_worker(&fs_info->workers, &async->work);
-
- while (atomic_read(&fs_info->async_submit_draining) &&
- atomic_read(&fs_info->nr_async_submits)) {
- wait_event(fs_info->async_submit_wait,
- (atomic_read(&fs_info->nr_async_submits) == 0));
- }
-
- return 0;
-}
-
-static int btree_csum_one_bio(struct bio *bio)
-{
- struct bio_vec *bvec = bio->bi_io_vec;
- int bio_index = 0;
- struct btrfs_root *root;
- int ret = 0;
-
- WARN_ON(bio->bi_vcnt <= 0);
- while (bio_index < bio->bi_vcnt) {
- root = BTRFS_I(bvec->bv_page->mapping->host)->root;
- ret = csum_dirty_buffer(root, bvec->bv_page);
- if (ret)
- break;
- bio_index++;
- bvec++;
- }
- return ret;
-}
-
-static int __btree_submit_bio_start(struct inode *inode, int rw,
- struct bio *bio, int mirror_num,
- unsigned long bio_flags,
- u64 bio_offset)
-{
- /*
- * when we're called for a write, we're already in the async
- * submission context. Just jump into btrfs_map_bio
- */
- return btree_csum_one_bio(bio);
-}
-
-static int __btree_submit_bio_done(struct inode *inode, int rw, struct bio *bio,
- int mirror_num, unsigned long bio_flags,
- u64 bio_offset)
-{
- /*
- * when we're called for a write, we're already in the async
- * submission context. Just jump into btrfs_map_bio
- */
- return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, mirror_num, 1);
-}
-
-static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
- int mirror_num, unsigned long bio_flags,
- u64 bio_offset)
-{
- int ret;
-
- if (!(rw & REQ_WRITE)) {
-
- /*
- * called for a read, do the setup so that checksum validation
- * can happen in the async kernel threads
- */
- ret = btrfs_bio_wq_end_io(BTRFS_I(inode)->root->fs_info,
- bio, 1);
- if (ret)
- return ret;
- return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio,
- mirror_num, 0);
- }
-
- /*
- * kthread helpers are used to submit writes so that checksumming
- * can happen in parallel across all CPUs
- */
- return btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info,
- inode, rw, bio, mirror_num, 0,
- bio_offset,
- __btree_submit_bio_start,
- __btree_submit_bio_done);
-}
-
-#ifdef CONFIG_MIGRATION
-static int btree_migratepage(struct address_space *mapping,
- struct page *newpage, struct page *page,
- enum migrate_mode mode)
-{
- /*
- * we can't safely write a btree page from here,
- * we haven't done the locking hook
- */
- if (PageDirty(page))
- return -EAGAIN;
- /*
- * Buffers may be managed in a filesystem specific way.
- * We must have no buffers or drop them.
- */
- if (page_has_private(page) &&
- !try_to_release_page(page, GFP_KERNEL))
- return -EAGAIN;
- return migrate_page(mapping, newpage, page, mode);
-}
-#endif
-
-
-static int btree_writepages(struct address_space *mapping,
- struct writeback_control *wbc)
-{
- struct extent_io_tree *tree;
- tree = &BTRFS_I(mapping->host)->io_tree;
- if (wbc->sync_mode == WB_SYNC_NONE) {
- struct btrfs_root *root = BTRFS_I(mapping->host)->root;
- u64 num_dirty;
- unsigned long thresh = 32 * 1024 * 1024;
-
- if (wbc->for_kupdate)
- return 0;
-
- /* this is a bit racy, but that's ok */
- num_dirty = root->fs_info->dirty_metadata_bytes;
- if (num_dirty < thresh)
- return 0;
- }
- return btree_write_cache_pages(mapping, wbc);
-}
-
-static int btree_readpage(struct file *file, struct page *page)
-{
- struct extent_io_tree *tree;
- tree = &BTRFS_I(page->mapping->host)->io_tree;
- return extent_read_full_page(tree, page, btree_get_extent, 0);
-}
-
-static int btree_releasepage(struct page *page, gfp_t gfp_flags)
-{
- if (PageWriteback(page) || PageDirty(page))
- return 0;
- /*
- * We need to mask out eg. __GFP_HIGHMEM and __GFP_DMA32 as we're doing
- * slab allocation from alloc_extent_state down the callchain where
- * it'd hit a BUG_ON as those flags are not allowed.
- */
- gfp_flags &= ~GFP_SLAB_BUG_MASK;
-
- return try_release_extent_buffer(page, gfp_flags);
-}
-
-static void btree_invalidatepage(struct page *page, unsigned long offset)
-{
- struct extent_io_tree *tree;
- tree = &BTRFS_I(page->mapping->host)->io_tree;
- extent_invalidatepage(tree, page, offset);
- btree_releasepage(page, GFP_NOFS);
- if (PagePrivate(page)) {
- printk(KERN_WARNING "btrfs warning page private not zero "
- "on page %llu\n", (unsigned long long)page_offset(page));
- ClearPagePrivate(page);
- set_page_private(page, 0);
- page_cache_release(page);
- }
-}
-
-static int btree_set_page_dirty(struct page *page)
-{
- struct extent_buffer *eb;
-
- BUG_ON(!PagePrivate(page));
- eb = (struct extent_buffer *)page->private;
- BUG_ON(!eb);
- BUG_ON(!test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags));
- BUG_ON(!atomic_read(&eb->refs));
- btrfs_assert_tree_locked(eb);
- return __set_page_dirty_nobuffers(page);
-}
-
-static const struct address_space_operations btree_aops = {
- .readpage = btree_readpage,
- .writepages = btree_writepages,
- .releasepage = btree_releasepage,
- .invalidatepage = btree_invalidatepage,
-#ifdef CONFIG_MIGRATION
- .migratepage = btree_migratepage,
-#endif
- .set_page_dirty = btree_set_page_dirty,
-};
-
-int readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize,
- u64 parent_transid)
-{
- struct extent_buffer *buf = NULL;
- struct inode *btree_inode = root->fs_info->btree_inode;
- int ret = 0;
-
- buf = btrfs_find_create_tree_block(root, bytenr, blocksize);
- if (!buf)
- return 0;
- read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree,
- buf, 0, WAIT_NONE, btree_get_extent, 0);
- free_extent_buffer(buf);
- return ret;
-}
-
-int reada_tree_block_flagged(struct btrfs_root *root, u64 bytenr, u32 blocksize,
- int mirror_num, struct extent_buffer **eb)
-{
- struct extent_buffer *buf = NULL;
- struct inode *btree_inode = root->fs_info->btree_inode;
- struct extent_io_tree *io_tree = &BTRFS_I(btree_inode)->io_tree;
- int ret;
-
- buf = btrfs_find_create_tree_block(root, bytenr, blocksize);
- if (!buf)
- return 0;
-
- set_bit(EXTENT_BUFFER_READAHEAD, &buf->bflags);
-
- ret = read_extent_buffer_pages(io_tree, buf, 0, WAIT_PAGE_LOCK,
- btree_get_extent, mirror_num);
- if (ret) {
- free_extent_buffer(buf);
- return ret;
- }
-
- if (test_bit(EXTENT_BUFFER_CORRUPT, &buf->bflags)) {
- free_extent_buffer(buf);
- return -EIO;
- } else if (extent_buffer_uptodate(buf)) {
- *eb = buf;
- } else {
- free_extent_buffer(buf);
- }
- return 0;
-}
-
-struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root,
- u64 bytenr, u32 blocksize)
-{
- struct inode *btree_inode = root->fs_info->btree_inode;
- struct extent_buffer *eb;
- eb = find_extent_buffer(&BTRFS_I(btree_inode)->io_tree,
- bytenr, blocksize);
- return eb;
-}
-
-struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root,
- u64 bytenr, u32 blocksize)
-{
- struct inode *btree_inode = root->fs_info->btree_inode;
- struct extent_buffer *eb;
-
- eb = alloc_extent_buffer(&BTRFS_I(btree_inode)->io_tree,
- bytenr, blocksize);
- return eb;
-}
-
-
-int btrfs_write_tree_block(struct extent_buffer *buf)
-{
- return filemap_fdatawrite_range(buf->pages[0]->mapping, buf->start,
- buf->start + buf->len - 1);
-}
-
-int btrfs_wait_tree_block_writeback(struct extent_buffer *buf)
-{
- return filemap_fdatawait_range(buf->pages[0]->mapping,
- buf->start, buf->start + buf->len - 1);
-}
-
-struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr,
- u32 blocksize, u64 parent_transid)
-{
- struct extent_buffer *buf = NULL;
- int ret;
-
- buf = btrfs_find_create_tree_block(root, bytenr, blocksize);
- if (!buf)
- return NULL;
-
- ret = btree_read_extent_buffer_pages(root, buf, 0, parent_transid);
- return buf;
-
-}
-
-void clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root,
- struct extent_buffer *buf)
-{
- if (btrfs_header_generation(buf) ==
- root->fs_info->running_transaction->transid) {
- btrfs_assert_tree_locked(buf);
-
- if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &buf->bflags)) {
- spin_lock(&root->fs_info->delalloc_lock);
- if (root->fs_info->dirty_metadata_bytes >= buf->len)
- root->fs_info->dirty_metadata_bytes -= buf->len;
- else {
- spin_unlock(&root->fs_info->delalloc_lock);
- btrfs_panic(root->fs_info, -EOVERFLOW,
- "Can't clear %lu bytes from "
- " dirty_mdatadata_bytes (%lu)",
- buf->len,
- root->fs_info->dirty_metadata_bytes);
- }
- spin_unlock(&root->fs_info->delalloc_lock);
- }
-
- /* ugh, clear_extent_buffer_dirty needs to lock the page */
- btrfs_set_lock_blocking(buf);
- clear_extent_buffer_dirty(buf);
- }
-}
-
-static void __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
- u32 stripesize, struct btrfs_root *root,
- struct btrfs_fs_info *fs_info,
- u64 objectid)
-{
- root->node = NULL;
- root->commit_root = NULL;
- root->sectorsize = sectorsize;
- root->nodesize = nodesize;
- root->leafsize = leafsize;
- root->stripesize = stripesize;
- root->ref_cows = 0;
- root->track_dirty = 0;
- root->in_radix = 0;
- root->orphan_item_inserted = 0;
- root->orphan_cleanup_state = 0;
-
- root->objectid = objectid;
- root->last_trans = 0;
- root->highest_objectid = 0;
- root->name = NULL;
- root->inode_tree = RB_ROOT;
- INIT_RADIX_TREE(&root->delayed_nodes_tree, GFP_ATOMIC);
- root->block_rsv = NULL;
- root->orphan_block_rsv = NULL;
-
- INIT_LIST_HEAD(&root->dirty_list);
- INIT_LIST_HEAD(&root->orphan_list);
- INIT_LIST_HEAD(&root->root_list);
- spin_lock_init(&root->orphan_lock);
- spin_lock_init(&root->inode_lock);
- spin_lock_init(&root->accounting_lock);
- mutex_init(&root->objectid_mutex);
- mutex_init(&root->log_mutex);
- init_waitqueue_head(&root->log_writer_wait);
- init_waitqueue_head(&root->log_commit_wait[0]);
- init_waitqueue_head(&root->log_commit_wait[1]);
- atomic_set(&root->log_commit[0], 0);
- atomic_set(&root->log_commit[1], 0);
- atomic_set(&root->log_writers, 0);
- root->log_batch = 0;
- root->log_transid = 0;
- root->last_log_commit = 0;
- extent_io_tree_init(&root->dirty_log_pages,
- fs_info->btree_inode->i_mapping);
-
- memset(&root->root_key, 0, sizeof(root->root_key));
- memset(&root->root_item, 0, sizeof(root->root_item));
- memset(&root->defrag_progress, 0, sizeof(root->defrag_progress));
- memset(&root->root_kobj, 0, sizeof(root->root_kobj));
- root->defrag_trans_start = fs_info->generation;
- init_completion(&root->kobj_unregister);
- root->defrag_running = 0;
- root->root_key.objectid = objectid;
- root->anon_dev = 0;
-}
-
-static int __must_check find_and_setup_root(struct btrfs_root *tree_root,
- struct btrfs_fs_info *fs_info,
- u64 objectid,
- struct btrfs_root *root)
-{
- int ret;
- u32 blocksize;
- u64 generation;
-
- __setup_root(tree_root->nodesize, tree_root->leafsize,
- tree_root->sectorsize, tree_root->stripesize,
- root, fs_info, objectid);
- ret = btrfs_find_last_root(tree_root, objectid,
- &root->root_item, &root->root_key);
- if (ret > 0)
- return -ENOENT;
- else if (ret < 0)
- return ret;
-
- generation = btrfs_root_generation(&root->root_item);
- blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item));
- root->commit_root = NULL;
- root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item),
- blocksize, generation);
- if (!root->node || !btrfs_buffer_uptodate(root->node, generation, 0)) {
- free_extent_buffer(root->node);
- root->node = NULL;
- return -EIO;
- }
- root->commit_root = btrfs_root_node(root);
- return 0;
-}
-
-static struct btrfs_root *btrfs_alloc_root(struct btrfs_fs_info *fs_info)
-{
- struct btrfs_root *root = kzalloc(sizeof(*root), GFP_NOFS);
- if (root)
- root->fs_info = fs_info;
- return root;
-}
-
-static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info)
-{
- struct btrfs_root *root;
- struct btrfs_root *tree_root = fs_info->tree_root;
- struct extent_buffer *leaf;
-
- root = btrfs_alloc_root(fs_info);
- if (!root)
- return ERR_PTR(-ENOMEM);
-
- __setup_root(tree_root->nodesize, tree_root->leafsize,
- tree_root->sectorsize, tree_root->stripesize,
- root, fs_info, BTRFS_TREE_LOG_OBJECTID);
-
- root->root_key.objectid = BTRFS_TREE_LOG_OBJECTID;
- root->root_key.type = BTRFS_ROOT_ITEM_KEY;
- root->root_key.offset = BTRFS_TREE_LOG_OBJECTID;
- /*
- * log trees do not get reference counted because they go away
- * before a real commit is actually done. They do store pointers
- * to file data extents, and those reference counts still get
- * updated (along with back refs to the log tree).
- */
- root->ref_cows = 0;
-
- leaf = btrfs_alloc_free_block(trans, root, root->leafsize, 0,
- BTRFS_TREE_LOG_OBJECTID, NULL,
- 0, 0, 0, 0);
- if (IS_ERR(leaf)) {
- kfree(root);
- return ERR_CAST(leaf);
- }
-
- memset_extent_buffer(leaf, 0, 0, sizeof(struct btrfs_header));
- btrfs_set_header_bytenr(leaf, leaf->start);
- btrfs_set_header_generation(leaf, trans->transid);
- btrfs_set_header_backref_rev(leaf, BTRFS_MIXED_BACKREF_REV);
- btrfs_set_header_owner(leaf, BTRFS_TREE_LOG_OBJECTID);
- root->node = leaf;
-
- write_extent_buffer(root->node, root->fs_info->fsid,
- (unsigned long)btrfs_header_fsid(root->node),
- BTRFS_FSID_SIZE);
- btrfs_mark_buffer_dirty(root->node);
- btrfs_tree_unlock(root->node);
- return root;
-}
-
-int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info)
-{
- struct btrfs_root *log_root;
-
- log_root = alloc_log_tree(trans, fs_info);
- if (IS_ERR(log_root))
- return PTR_ERR(log_root);
- WARN_ON(fs_info->log_root_tree);
- fs_info->log_root_tree = log_root;
- return 0;
-}
-
-int btrfs_add_log_tree(struct btrfs_trans_handle *trans,
- struct btrfs_root *root)
-{
- struct btrfs_root *log_root;
- struct btrfs_inode_item *inode_item;
-
- log_root = alloc_log_tree(trans, root->fs_info);
- if (IS_ERR(log_root))
- return PTR_ERR(log_root);
-
- log_root->last_trans = trans->transid;
- log_root->root_key.offset = root->root_key.objectid;
-
- inode_item = &log_root->root_item.inode;
- inode_item->generation = cpu_to_le64(1);
- inode_item->size = cpu_to_le64(3);
- inode_item->nlink = cpu_to_le32(1);
- inode_item->nbytes = cpu_to_le64(root->leafsize);
- inode_item->mode = cpu_to_le32(S_IFDIR | 0755);
-
- btrfs_set_root_node(&log_root->root_item, log_root->node);
-
- WARN_ON(root->log_root);
- root->log_root = log_root;
- root->log_transid = 0;
- root->last_log_commit = 0;
- return 0;
-}
-
-struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root,
- struct btrfs_key *location)
-{
- struct btrfs_root *root;
- struct btrfs_fs_info *fs_info = tree_root->fs_info;
- struct btrfs_path *path;
- struct extent_buffer *l;
- u64 generation;
- u32 blocksize;
- int ret = 0;
-
- root = btrfs_alloc_root(fs_info);
- if (!root)
- return ERR_PTR(-ENOMEM);
- if (location->offset == (u64)-1) {
- ret = find_and_setup_root(tree_root, fs_info,
- location->objectid, root);
- if (ret) {
- kfree(root);
- return ERR_PTR(ret);
- }
- goto out;
- }
-
- __setup_root(tree_root->nodesize, tree_root->leafsize,
- tree_root->sectorsize, tree_root->stripesize,
- root, fs_info, location->objectid);
-
- path = btrfs_alloc_path();
- if (!path) {
- kfree(root);
- return ERR_PTR(-ENOMEM);
- }
- ret = btrfs_search_slot(NULL, tree_root, location, path, 0, 0);
- if (ret == 0) {
- l = path->nodes[0];
- read_extent_buffer(l, &root->root_item,
- btrfs_item_ptr_offset(l, path->slots[0]),
- sizeof(root->root_item));
- memcpy(&root->root_key, location, sizeof(*location));
- }
- btrfs_free_path(path);
- if (ret) {
- kfree(root);
- if (ret > 0)
- ret = -ENOENT;
- return ERR_PTR(ret);
- }
-
- generation = btrfs_root_generation(&root->root_item);
- blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item));
- root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item),
- blocksize, generation);
- root->commit_root = btrfs_root_node(root);
- BUG_ON(!root->node); /* -ENOMEM */
-out:
- if (location->objectid != BTRFS_TREE_LOG_OBJECTID) {
- root->ref_cows = 1;
- btrfs_check_and_init_root_item(&root->root_item);
- }
-
- return root;
-}
-
-struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info,
- struct btrfs_key *location)
-{
- struct btrfs_root *root;
- int ret;
-
- if (location->objectid == BTRFS_ROOT_TREE_OBJECTID)
- return fs_info->tree_root;
- if (location->objectid == BTRFS_EXTENT_TREE_OBJECTID)
- return fs_info->extent_root;
- if (location->objectid == BTRFS_CHUNK_TREE_OBJECTID)
- return fs_info->chunk_root;
- if (location->objectid == BTRFS_DEV_TREE_OBJECTID)
- return fs_info->dev_root;
- if (location->objectid == BTRFS_CSUM_TREE_OBJECTID)
- return fs_info->csum_root;
-again:
- spin_lock(&fs_info->fs_roots_radix_lock);
- root = radix_tree_lookup(&fs_info->fs_roots_radix,
- (unsigned long)location->objectid);
- spin_unlock(&fs_info->fs_roots_radix_lock);
- if (root)
- return root;
-
- root = btrfs_read_fs_root_no_radix(fs_info->tree_root, location);
- if (IS_ERR(root))
- return root;
-
- root->free_ino_ctl = kzalloc(sizeof(*root->free_ino_ctl), GFP_NOFS);
- root->free_ino_pinned = kzalloc(sizeof(*root->free_ino_pinned),
- GFP_NOFS);
- if (!root->free_ino_pinned || !root->free_ino_ctl) {
- ret = -ENOMEM;
- goto fail;
- }
-
- btrfs_init_free_ino_ctl(root);
- mutex_init(&root->fs_commit_mutex);
- spin_lock_init(&root->cache_lock);
- init_waitqueue_head(&root->cache_wait);
-
- ret = get_anon_bdev(&root->anon_dev);
- if (ret)
- goto fail;
-
- if (btrfs_root_refs(&root->root_item) == 0) {
- ret = -ENOENT;
- goto fail;
- }
-
- ret = btrfs_find_orphan_item(fs_info->tree_root, location->objectid);
- if (ret < 0)
- goto fail;
- if (ret == 0)
- root->orphan_item_inserted = 1;
-
- ret = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM);
- if (ret)
- goto fail;
-
- spin_lock(&fs_info->fs_roots_radix_lock);
- ret = radix_tree_insert(&fs_info->fs_roots_radix,
- (unsigned long)root->root_key.objectid,
- root);
- if (ret == 0)
- root->in_radix = 1;
-
- spin_unlock(&fs_info->fs_roots_radix_lock);
- radix_tree_preload_end();
- if (ret) {
- if (ret == -EEXIST) {
- free_fs_root(root);
- goto again;
- }
- goto fail;
- }
-
- ret = btrfs_find_dead_roots(fs_info->tree_root,
- root->root_key.objectid);
- WARN_ON(ret);
- return root;
-fail:
- free_fs_root(root);
- return ERR_PTR(ret);
-}
-
-static int btrfs_congested_fn(void *congested_data, int bdi_bits)
-{
- struct btrfs_fs_info *info = (struct btrfs_fs_info *)congested_data;
- int ret = 0;
- struct btrfs_device *device;
- struct backing_dev_info *bdi;
-
- rcu_read_lock();
- list_for_each_entry_rcu(device, &info->fs_devices->devices, dev_list) {
- if (!device->bdev)
- continue;
- bdi = blk_get_backing_dev_info(device->bdev);
- if (bdi && bdi_congested(bdi, bdi_bits)) {
- ret = 1;
- break;
- }
- }
- rcu_read_unlock();
- return ret;
-}
-
-/*
- * If this fails, caller must call bdi_destroy() to get rid of the
- * bdi again.
- */
-static int setup_bdi(struct btrfs_fs_info *info, struct backing_dev_info *bdi)
-{
- int err;
-
- bdi->capabilities = BDI_CAP_MAP_COPY;
- err = bdi_setup_and_register(bdi, "btrfs", BDI_CAP_MAP_COPY);
- if (err)
- return err;
-
- bdi->ra_pages = default_backing_dev_info.ra_pages;
- bdi->congested_fn = btrfs_congested_fn;
- bdi->congested_data = info;
- return 0;
-}
-
-/*
- * called by the kthread helper functions to finally call the bio end_io
- * functions. This is where read checksum verification actually happens
- */
-static void end_workqueue_fn(struct btrfs_work *work)
-{
- struct bio *bio;
- struct end_io_wq *end_io_wq;
- struct btrfs_fs_info *fs_info;
- int error;
-
- end_io_wq = container_of(work, struct end_io_wq, work);
- bio = end_io_wq->bio;
- fs_info = end_io_wq->info;
-
- error = end_io_wq->error;
- bio->bi_private = end_io_wq->private;
- bio->bi_end_io = end_io_wq->end_io;
- kfree(end_io_wq);
- bio_endio(bio, error);
-}
-
-static int cleaner_kthread(void *arg)
-{
- struct btrfs_root *root = arg;
-
- do {
- vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE);
-
- if (!(root->fs_info->sb->s_flags & MS_RDONLY) &&
- mutex_trylock(&root->fs_info->cleaner_mutex)) {
- btrfs_run_delayed_iputs(root);
- btrfs_clean_old_snapshots(root);
- mutex_unlock(&root->fs_info->cleaner_mutex);
- btrfs_run_defrag_inodes(root->fs_info);
- }
-
- if (!try_to_freeze()) {
- set_current_state(TASK_INTERRUPTIBLE);
- if (!kthread_should_stop())
- schedule();
- __set_current_state(TASK_RUNNING);
- }
- } while (!kthread_should_stop());
- return 0;
-}
-
-static int transaction_kthread(void *arg)
-{
- struct btrfs_root *root = arg;
- struct btrfs_trans_handle *trans;
- struct btrfs_transaction *cur;
- u64 transid;
- unsigned long now;
- unsigned long delay;
- bool cannot_commit;
-
- do {
- cannot_commit = false;
- delay = HZ * 30;
- vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE);
- mutex_lock(&root->fs_info->transaction_kthread_mutex);
-
- spin_lock(&root->fs_info->trans_lock);
- cur = root->fs_info->running_transaction;
- if (!cur) {
- spin_unlock(&root->fs_info->trans_lock);
- goto sleep;
- }
-
- now = get_seconds();
- if (!cur->blocked &&
- (now < cur->start_time || now - cur->start_time < 30)) {
- spin_unlock(&root->fs_info->trans_lock);
- delay = HZ * 5;
- goto sleep;
- }
- transid = cur->transid;
- spin_unlock(&root->fs_info->trans_lock);
-
- /* If the file system is aborted, this will always fail. */
- trans = btrfs_join_transaction(root);
- if (IS_ERR(trans)) {
- cannot_commit = true;
- goto sleep;
- }
- if (transid == trans->transid) {
- btrfs_commit_transaction(trans, root);
- } else {
- btrfs_end_transaction(trans, root);
- }
-sleep:
- wake_up_process(root->fs_info->cleaner_kthread);
- mutex_unlock(&root->fs_info->transaction_kthread_mutex);
-
- if (!try_to_freeze()) {
- set_current_state(TASK_INTERRUPTIBLE);
- if (!kthread_should_stop() &&
- (!btrfs_transaction_blocked(root->fs_info) ||
- cannot_commit))
- schedule_timeout(delay);
- __set_current_state(TASK_RUNNING);
- }
- } while (!kthread_should_stop());
- return 0;
-}
-
-/*
- * this will find the highest generation in the array of
- * root backups. The index of the highest array is returned,
- * or -1 if we can't find anything.
- *
- * We check to make sure the array is valid by comparing the
- * generation of the latest root in the array with the generation
- * in the super block. If they don't match we pitch it.
- */
-static int find_newest_super_backup(struct btrfs_fs_info *info, u64 newest_gen)
-{
- u64 cur;
- int newest_index = -1;
- struct btrfs_root_backup *root_backup;
- int i;
-
- for (i = 0; i < BTRFS_NUM_BACKUP_ROOTS; i++) {
- root_backup = info->super_copy->super_roots + i;
- cur = btrfs_backup_tree_root_gen(root_backup);
- if (cur == newest_gen)
- newest_index = i;
- }
-
- /* check to see if we actually wrapped around */
- if (newest_index == BTRFS_NUM_BACKUP_ROOTS - 1) {
- root_backup = info->super_copy->super_roots;
- cur = btrfs_backup_tree_root_gen(root_backup);
- if (cur == newest_gen)
- newest_index = 0;
- }
- return newest_index;
-}
-
-
-/*
- * find the oldest backup so we know where to store new entries
- * in the backup array. This will set the backup_root_index
- * field in the fs_info struct
- */
-static void find_oldest_super_backup(struct btrfs_fs_info *info,
- u64 newest_gen)
-{
- int newest_index = -1;
-
- newest_index = find_newest_super_backup(info, newest_gen);
- /* if there was garbage in there, just move along */
- if (newest_index == -1) {
- info->backup_root_index = 0;
- } else {
- info->backup_root_index = (newest_index + 1) % BTRFS_NUM_BACKUP_ROOTS;
- }
-}
-
-/*
- * copy all the root pointers into the super backup array.
- * this will bump the backup pointer by one when it is
- * done
- */
-static void backup_super_roots(struct btrfs_fs_info *info)
-{
- int next_backup;
- struct btrfs_root_backup *root_backup;
- int last_backup;
-
- next_backup = info->backup_root_index;
- last_backup = (next_backup + BTRFS_NUM_BACKUP_ROOTS - 1) %
- BTRFS_NUM_BACKUP_ROOTS;
-
- /*
- * just overwrite the last backup if we're at the same generation
- * this happens only at umount
- */
- root_backup = info->super_for_commit->super_roots + last_backup;
- if (btrfs_backup_tree_root_gen(root_backup) ==
- btrfs_header_generation(info->tree_root->node))
- next_backup = last_backup;
-
- root_backup = info->super_for_commit->super_roots + next_backup;
-
- /*
- * make sure all of our padding and empty slots get zero filled
- * regardless of which ones we use today
- */
- memset(root_backup, 0, sizeof(*root_backup));
-
- info->backup_root_index = (next_backup + 1) % BTRFS_NUM_BACKUP_ROOTS;
-
- btrfs_set_backup_tree_root(root_backup, info->tree_root->node->start);
- btrfs_set_backup_tree_root_gen(root_backup,
- btrfs_header_generation(info->tree_root->node));
-
- btrfs_set_backup_tree_root_level(root_backup,
- btrfs_header_level(info->tree_root->node));
-
- btrfs_set_backup_chunk_root(root_backup, info->chunk_root->node->start);
- btrfs_set_backup_chunk_root_gen(root_backup,
- btrfs_header_generation(info->chunk_root->node));
- btrfs_set_backup_chunk_root_level(root_backup,
- btrfs_header_level(info->chunk_root->node));
-
- btrfs_set_backup_extent_root(root_backup, info->extent_root->node->start);
- btrfs_set_backup_extent_root_gen(root_backup,
- btrfs_header_generation(info->extent_root->node));
- btrfs_set_backup_extent_root_level(root_backup,
- btrfs_header_level(info->extent_root->node));
-
- /*
- * we might commit during log recovery, which happens before we set
- * the fs_root. Make sure it is valid before we fill it in.
- */
- if (info->fs_root && info->fs_root->node) {
- btrfs_set_backup_fs_root(root_backup,
- info->fs_root->node->start);
- btrfs_set_backup_fs_root_gen(root_backup,
- btrfs_header_generation(info->fs_root->node));
- btrfs_set_backup_fs_root_level(root_backup,
- btrfs_header_level(info->fs_root->node));
- }
-
- btrfs_set_backup_dev_root(root_backup, info->dev_root->node->start);
- btrfs_set_backup_dev_root_gen(root_backup,
- btrfs_header_generation(info->dev_root->node));
- btrfs_set_backup_dev_root_level(root_backup,
- btrfs_header_level(info->dev_root->node));
-
- btrfs_set_backup_csum_root(root_backup, info->csum_root->node->start);
- btrfs_set_backup_csum_root_gen(root_backup,
- btrfs_header_generation(info->csum_root->node));
- btrfs_set_backup_csum_root_level(root_backup,
- btrfs_header_level(info->csum_root->node));
-
- btrfs_set_backup_total_bytes(root_backup,
- btrfs_super_total_bytes(info->super_copy));
- btrfs_set_backup_bytes_used(root_backup,
- btrfs_super_bytes_used(info->super_copy));
- btrfs_set_backup_num_devices(root_backup,
- btrfs_super_num_devices(info->super_copy));
-
- /*
- * if we don't copy this out to the super_copy, it won't get remembered
- * for the next commit
- */
- memcpy(&info->super_copy->super_roots,
- &info->super_for_commit->super_roots,
- sizeof(*root_backup) * BTRFS_NUM_BACKUP_ROOTS);
-}
-
-/*
- * this copies info out of the root backup array and back into
- * the in-memory super block. It is meant to help iterate through
- * the array, so you send it the number of backups you've already
- * tried and the last backup index you used.
- *
- * this returns -1 when it has tried all the backups
- */
-static noinline int next_root_backup(struct btrfs_fs_info *info,
- struct btrfs_super_block *super,
- int *num_backups_tried, int *backup_index)
-{
- struct btrfs_root_backup *root_backup;
- int newest = *backup_index;
-
- if (*num_backups_tried == 0) {
- u64 gen = btrfs_super_generation(super);
-
- newest = find_newest_super_backup(info, gen);
- if (newest == -1)
- return -1;
-
- *backup_index = newest;
- *num_backups_tried = 1;
- } else if (*num_backups_tried == BTRFS_NUM_BACKUP_ROOTS) {
- /* we've tried all the backups, all done */
- return -1;
- } else {
- /* jump to the next oldest backup */
- newest = (*backup_index + BTRFS_NUM_BACKUP_ROOTS - 1) %
- BTRFS_NUM_BACKUP_ROOTS;
- *backup_index = newest;
- *num_backups_tried += 1;
- }
- root_backup = super->super_roots + newest;
-
- btrfs_set_super_generation(super,
- btrfs_backup_tree_root_gen(root_backup));
- btrfs_set_super_root(super, btrfs_backup_tree_root(root_backup));
- btrfs_set_super_root_level(super,
- btrfs_backup_tree_root_level(root_backup));
- btrfs_set_super_bytes_used(super, btrfs_backup_bytes_used(root_backup));
-
- /*
- * fixme: the total bytes and num_devices need to match or we should
- * need a fsck
- */
- btrfs_set_super_total_bytes(super, btrfs_backup_total_bytes(root_backup));
- btrfs_set_super_num_devices(super, btrfs_backup_num_devices(root_backup));
- return 0;
-}
-
-/* helper to cleanup tree roots */
-static void free_root_pointers(struct btrfs_fs_info *info, int chunk_root)
-{
- free_extent_buffer(info->tree_root->node);
- free_extent_buffer(info->tree_root->commit_root);
- free_extent_buffer(info->dev_root->node);
- free_extent_buffer(info->dev_root->commit_root);
- free_extent_buffer(info->extent_root->node);
- free_extent_buffer(info->extent_root->commit_root);
- free_extent_buffer(info->csum_root->node);
- free_extent_buffer(info->csum_root->commit_root);
-
- info->tree_root->node = NULL;
- info->tree_root->commit_root = NULL;
- info->dev_root->node = NULL;
- info->dev_root->commit_root = NULL;
- info->extent_root->node = NULL;
- info->extent_root->commit_root = NULL;
- info->csum_root->node = NULL;
- info->csum_root->commit_root = NULL;
-
- if (chunk_root) {
- free_extent_buffer(info->chunk_root->node);
- free_extent_buffer(info->chunk_root->commit_root);
- info->chunk_root->node = NULL;
- info->chunk_root->commit_root = NULL;
- }
-}
-
-
-int open_ctree(struct super_block *sb,
- struct btrfs_fs_devices *fs_devices,
- char *options)
-{
- u32 sectorsize;
- u32 nodesize;
- u32 leafsize;
- u32 blocksize;
- u32 stripesize;
- u64 generation;
- u64 features;
- struct btrfs_key location;
- struct buffer_head *bh;
- struct btrfs_super_block *disk_super;
- struct btrfs_fs_info *fs_info = btrfs_sb(sb);
- struct btrfs_root *tree_root;
- struct btrfs_root *extent_root;
- struct btrfs_root *csum_root;
- struct btrfs_root *chunk_root;
- struct btrfs_root *dev_root;
- struct btrfs_root *log_tree_root;
- int ret;
- int err = -EINVAL;
- int num_backups_tried = 0;
- int backup_index = 0;
-
- tree_root = fs_info->tree_root = btrfs_alloc_root(fs_info);
- extent_root = fs_info->extent_root = btrfs_alloc_root(fs_info);
- csum_root = fs_info->csum_root = btrfs_alloc_root(fs_info);
- chunk_root = fs_info->chunk_root = btrfs_alloc_root(fs_info);
- dev_root = fs_info->dev_root = btrfs_alloc_root(fs_info);
-
- if (!tree_root || !extent_root || !csum_root ||
- !chunk_root || !dev_root) {
- err = -ENOMEM;
- goto fail;
- }
-
- ret = init_srcu_struct(&fs_info->subvol_srcu);
- if (ret) {
- err = ret;
- goto fail;
- }
-
- ret = setup_bdi(fs_info, &fs_info->bdi);
- if (ret) {
- err = ret;
- goto fail_srcu;
- }
-
- fs_info->btree_inode = new_inode(sb);
- if (!fs_info->btree_inode) {
- err = -ENOMEM;
- goto fail_bdi;
- }
-
- mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS);
-
- INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC);
- INIT_LIST_HEAD(&fs_info->trans_list);
- INIT_LIST_HEAD(&fs_info->dead_roots);
- INIT_LIST_HEAD(&fs_info->delayed_iputs);
- INIT_LIST_HEAD(&fs_info->hashers);
- INIT_LIST_HEAD(&fs_info->delalloc_inodes);
- INIT_LIST_HEAD(&fs_info->ordered_operations);
- INIT_LIST_HEAD(&fs_info->caching_block_groups);
- spin_lock_init(&fs_info->delalloc_lock);
- spin_lock_init(&fs_info->trans_lock);
- spin_lock_init(&fs_info->ref_cache_lock);
- spin_lock_init(&fs_info->fs_roots_radix_lock);
- spin_lock_init(&fs_info->delayed_iput_lock);
- spin_lock_init(&fs_info->defrag_inodes_lock);
- spin_lock_init(&fs_info->free_chunk_lock);
- mutex_init(&fs_info->reloc_mutex);
-
- init_completion(&fs_info->kobj_unregister);
- INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots);
- INIT_LIST_HEAD(&fs_info->space_info);
- btrfs_mapping_init(&fs_info->mapping_tree);
- btrfs_init_block_rsv(&fs_info->global_block_rsv);
- btrfs_init_block_rsv(&fs_info->delalloc_block_rsv);
- btrfs_init_block_rsv(&fs_info->trans_block_rsv);
- btrfs_init_block_rsv(&fs_info->chunk_block_rsv);
- btrfs_init_block_rsv(&fs_info->empty_block_rsv);
- btrfs_init_block_rsv(&fs_info->delayed_block_rsv);
- atomic_set(&fs_info->nr_async_submits, 0);
- atomic_set(&fs_info->async_delalloc_pages, 0);
- atomic_set(&fs_info->async_submit_draining, 0);
- atomic_set(&fs_info->nr_async_bios, 0);
- atomic_set(&fs_info->defrag_running, 0);
- fs_info->sb = sb;
- fs_info->max_inline = 8192 * 1024;
- fs_info->metadata_ratio = 0;
- fs_info->defrag_inodes = RB_ROOT;
- fs_info->trans_no_join = 0;
- fs_info->free_chunk_space = 0;
-
- /* readahead state */
- INIT_RADIX_TREE(&fs_info->reada_tree, GFP_NOFS & ~__GFP_WAIT);
- spin_lock_init(&fs_info->reada_lock);
-
- fs_info->thread_pool_size = min_t(unsigned long,
- num_online_cpus() + 2, 8);
-
- INIT_LIST_HEAD(&fs_info->ordered_extents);
- spin_lock_init(&fs_info->ordered_extent_lock);
- fs_info->delayed_root = kmalloc(sizeof(struct btrfs_delayed_root),
- GFP_NOFS);
- if (!fs_info->delayed_root) {
- err = -ENOMEM;
- goto fail_iput;
- }
- btrfs_init_delayed_root(fs_info->delayed_root);
-
- mutex_init(&fs_info->scrub_lock);
- atomic_set(&fs_info->scrubs_running, 0);
- atomic_set(&fs_info->scrub_pause_req, 0);
- atomic_set(&fs_info->scrubs_paused, 0);
- atomic_set(&fs_info->scrub_cancel_req, 0);
- init_waitqueue_head(&fs_info->scrub_pause_wait);
- init_rwsem(&fs_info->scrub_super_lock);
- fs_info->scrub_workers_refcnt = 0;
-#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
- fs_info->check_integrity_print_mask = 0;
-#endif
-
- spin_lock_init(&fs_info->balance_lock);
- mutex_init(&fs_info->balance_mutex);
- atomic_set(&fs_info->balance_running, 0);
- atomic_set(&fs_info->balance_pause_req, 0);
- atomic_set(&fs_info->balance_cancel_req, 0);
- fs_info->balance_ctl = NULL;
- init_waitqueue_head(&fs_info->balance_wait_q);
-
- sb->s_blocksize = 4096;
- sb->s_blocksize_bits = blksize_bits(4096);
- sb->s_bdi = &fs_info->bdi;
-
- fs_info->btree_inode->i_ino = BTRFS_BTREE_INODE_OBJECTID;
- set_nlink(fs_info->btree_inode, 1);
- /*
- * we set the i_size on the btree inode to the max possible int.
- * the real end of the address space is determined by all of
- * the devices in the system
- */
- fs_info->btree_inode->i_size = OFFSET_MAX;
- fs_info->btree_inode->i_mapping->a_ops = &btree_aops;
- fs_info->btree_inode->i_mapping->backing_dev_info = &fs_info->bdi;
-
- RB_CLEAR_NODE(&BTRFS_I(fs_info->btree_inode)->rb_node);
- extent_io_tree_init(&BTRFS_I(fs_info->btree_inode)->io_tree,
- fs_info->btree_inode->i_mapping);
- BTRFS_I(fs_info->btree_inode)->io_tree.track_uptodate = 0;
- extent_map_tree_init(&BTRFS_I(fs_info->btree_inode)->extent_tree);
-
- BTRFS_I(fs_info->btree_inode)->io_tree.ops = &btree_extent_io_ops;
-
- BTRFS_I(fs_info->btree_inode)->root = tree_root;
- memset(&BTRFS_I(fs_info->btree_inode)->location, 0,
- sizeof(struct btrfs_key));
- BTRFS_I(fs_info->btree_inode)->dummy_inode = 1;
- insert_inode_hash(fs_info->btree_inode);
-
- spin_lock_init(&fs_info->block_group_cache_lock);
- fs_info->block_group_cache_tree = RB_ROOT;
-
- extent_io_tree_init(&fs_info->freed_extents[0],
- fs_info->btree_inode->i_mapping);
- extent_io_tree_init(&fs_info->freed_extents[1],
- fs_info->btree_inode->i_mapping);
- fs_info->pinned_extents = &fs_info->freed_extents[0];
- fs_info->do_barriers = 1;
-
-
- mutex_init(&fs_info->ordered_operations_mutex);
- mutex_init(&fs_info->tree_log_mutex);
- mutex_init(&fs_info->chunk_mutex);
- mutex_init(&fs_info->transaction_kthread_mutex);
- mutex_init(&fs_info->cleaner_mutex);
- mutex_init(&fs_info->volume_mutex);
- init_rwsem(&fs_info->extent_commit_sem);
- init_rwsem(&fs_info->cleanup_work_sem);
- init_rwsem(&fs_info->subvol_sem);
-
- btrfs_init_free_cluster(&fs_info->meta_alloc_cluster);
- btrfs_init_free_cluster(&fs_info->data_alloc_cluster);
-
- init_waitqueue_head(&fs_info->transaction_throttle);
- init_waitqueue_head(&fs_info->transaction_wait);
- init_waitqueue_head(&fs_info->transaction_blocked_wait);
- init_waitqueue_head(&fs_info->async_submit_wait);
-
- __setup_root(4096, 4096, 4096, 4096, tree_root,
- fs_info, BTRFS_ROOT_TREE_OBJECTID);
-
- invalidate_bdev(fs_devices->latest_bdev);
- bh = btrfs_read_dev_super(fs_devices->latest_bdev);
- if (!bh) {
- err = -EINVAL;
- goto fail_alloc;
- }
-
- memcpy(fs_info->super_copy, bh->b_data, sizeof(*fs_info->super_copy));
- memcpy(fs_info->super_for_commit, fs_info->super_copy,
- sizeof(*fs_info->super_for_commit));
- brelse(bh);
-
- memcpy(fs_info->fsid, fs_info->super_copy->fsid, BTRFS_FSID_SIZE);
-
- disk_super = fs_info->super_copy;
- if (!btrfs_super_root(disk_super))
- goto fail_alloc;
-
- /* check FS state, whether FS is broken. */
- fs_info->fs_state |= btrfs_super_flags(disk_super);
-
- ret = btrfs_check_super_valid(fs_info, sb->s_flags & MS_RDONLY);
- if (ret) {
- printk(KERN_ERR "btrfs: superblock contains fatal errors\n");
- err = ret;
- goto fail_alloc;
- }
-
- /*
- * run through our array of backup supers and setup
- * our ring pointer to the oldest one
- */
- generation = btrfs_super_generation(disk_super);
- find_oldest_super_backup(fs_info, generation);
-
- /*
- * In the long term, we'll store the compression type in the super
- * block, and it'll be used for per file compression control.
- */
- fs_info->compress_type = BTRFS_COMPRESS_ZLIB;
-
- ret = btrfs_parse_options(tree_root, options);
- if (ret) {
- err = ret;
- goto fail_alloc;
- }
-
- features = btrfs_super_incompat_flags(disk_super) &
- ~BTRFS_FEATURE_INCOMPAT_SUPP;
- if (features) {
- printk(KERN_ERR "BTRFS: couldn't mount because of "
- "unsupported optional features (%Lx).\n",
- (unsigned long long)features);
- err = -EINVAL;
- goto fail_alloc;
- }
-
- if (btrfs_super_leafsize(disk_super) !=
- btrfs_super_nodesize(disk_super)) {
- printk(KERN_ERR "BTRFS: couldn't mount because metadata "
- "blocksizes don't match. node %d leaf %d\n",
- btrfs_super_nodesize(disk_super),
- btrfs_super_leafsize(disk_super));
- err = -EINVAL;
- goto fail_alloc;
- }
- if (btrfs_super_leafsize(disk_super) > BTRFS_MAX_METADATA_BLOCKSIZE) {
- printk(KERN_ERR "BTRFS: couldn't mount because metadata "
- "blocksize (%d) was too large\n",
- btrfs_super_leafsize(disk_super));
- err = -EINVAL;
- goto fail_alloc;
- }
-
- features = btrfs_super_incompat_flags(disk_super);
- features |= BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF;
- if (tree_root->fs_info->compress_type & BTRFS_COMPRESS_LZO)
- features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO;
-
- /*
- * flag our filesystem as having big metadata blocks if
- * they are bigger than the page size
- */
- if (btrfs_super_leafsize(disk_super) > PAGE_CACHE_SIZE) {
- if (!(features & BTRFS_FEATURE_INCOMPAT_BIG_METADATA))
- printk(KERN_INFO "btrfs flagging fs with big metadata feature\n");
- features |= BTRFS_FEATURE_INCOMPAT_BIG_METADATA;
- }
-
- nodesize = btrfs_super_nodesize(disk_super);
- leafsize = btrfs_super_leafsize(disk_super);
- sectorsize = btrfs_super_sectorsize(disk_super);
- stripesize = btrfs_super_stripesize(disk_super);
-
- /*
- * mixed block groups end up with duplicate but slightly offset
- * extent buffers for the same range. It leads to corruptions
- */
- if ((features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS) &&
- (sectorsize != leafsize)) {
- printk(KERN_WARNING "btrfs: unequal leaf/node/sector sizes "
- "are not allowed for mixed block groups on %s\n",
- sb->s_id);
- goto fail_alloc;
- }
-
- btrfs_set_super_incompat_flags(disk_super, features);
-
- features = btrfs_super_compat_ro_flags(disk_super) &
- ~BTRFS_FEATURE_COMPAT_RO_SUPP;
- if (!(sb->s_flags & MS_RDONLY) && features) {
- printk(KERN_ERR "BTRFS: couldn't mount RDWR because of "
- "unsupported option features (%Lx).\n",
- (unsigned long long)features);
- err = -EINVAL;
- goto fail_alloc;
- }
-
- btrfs_init_workers(&fs_info->generic_worker,
- "genwork", 1, NULL);
-
- btrfs_init_workers(&fs_info->workers, "worker",
- fs_info->thread_pool_size,
- &fs_info->generic_worker);
-
- btrfs_init_workers(&fs_info->delalloc_workers, "delalloc",
- fs_info->thread_pool_size,
- &fs_info->generic_worker);
-
- btrfs_init_workers(&fs_info->submit_workers, "submit",
- min_t(u64, fs_devices->num_devices,
- fs_info->thread_pool_size),
- &fs_info->generic_worker);
-
- btrfs_init_workers(&fs_info->caching_workers, "cache",
- 2, &fs_info->generic_worker);
-
- /* a higher idle thresh on the submit workers makes it much more
- * likely that bios will be send down in a sane order to the
- * devices
- */
- fs_info->submit_workers.idle_thresh = 64;
-
- fs_info->workers.idle_thresh = 16;
- fs_info->workers.ordered = 1;
-
- fs_info->delalloc_workers.idle_thresh = 2;
- fs_info->delalloc_workers.ordered = 1;
-
- btrfs_init_workers(&fs_info->fixup_workers, "fixup", 1,
- &fs_info->generic_worker);
- btrfs_init_workers(&fs_info->endio_workers, "endio",
- fs_info->thread_pool_size,
- &fs_info->generic_worker);
- btrfs_init_workers(&fs_info->endio_meta_workers, "endio-meta",
- fs_info->thread_pool_size,
- &fs_info->generic_worker);
- btrfs_init_workers(&fs_info->endio_meta_write_workers,
- "endio-meta-write", fs_info->thread_pool_size,
- &fs_info->generic_worker);
- btrfs_init_workers(&fs_info->endio_write_workers, "endio-write",
- fs_info->thread_pool_size,
- &fs_info->generic_worker);
- btrfs_init_workers(&fs_info->endio_freespace_worker, "freespace-write",
- 1, &fs_info->generic_worker);
- btrfs_init_workers(&fs_info->delayed_workers, "delayed-meta",
- fs_info->thread_pool_size,
- &fs_info->generic_worker);
- btrfs_init_workers(&fs_info->readahead_workers, "readahead",
- fs_info->thread_pool_size,
- &fs_info->generic_worker);
-
- /*
- * endios are largely parallel and should have a very
- * low idle thresh
- */
- fs_info->endio_workers.idle_thresh = 4;
- fs_info->endio_meta_workers.idle_thresh = 4;
-
- fs_info->endio_write_workers.idle_thresh = 2;
- fs_info->endio_meta_write_workers.idle_thresh = 2;
- fs_info->readahead_workers.idle_thresh = 2;
-
- /*
- * btrfs_start_workers can really only fail because of ENOMEM so just
- * return -ENOMEM if any of these fail.
- */
- ret = btrfs_start_workers(&fs_info->workers);
- ret |= btrfs_start_workers(&fs_info->generic_worker);
- ret |= btrfs_start_workers(&fs_info->submit_workers);
- ret |= btrfs_start_workers(&fs_info->delalloc_workers);
- ret |= btrfs_start_workers(&fs_info->fixup_workers);
- ret |= btrfs_start_workers(&fs_info->endio_workers);
- ret |= btrfs_start_workers(&fs_info->endio_meta_workers);
- ret |= btrfs_start_workers(&fs_info->endio_meta_write_workers);
- ret |= btrfs_start_workers(&fs_info->endio_write_workers);
- ret |= btrfs_start_workers(&fs_info->endio_freespace_worker);
- ret |= btrfs_start_workers(&fs_info->delayed_workers);
- ret |= btrfs_start_workers(&fs_info->caching_workers);
- ret |= btrfs_start_workers(&fs_info->readahead_workers);
- if (ret) {
- ret = -ENOMEM;
- goto fail_sb_buffer;
- }
-
- fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super);
- fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages,
- 4 * 1024 * 1024 / PAGE_CACHE_SIZE);
-
- tree_root->nodesize = nodesize;
- tree_root->leafsize = leafsize;
- tree_root->sectorsize = sectorsize;
- tree_root->stripesize = stripesize;
-
- sb->s_blocksize = sectorsize;
- sb->s_blocksize_bits = blksize_bits(sectorsize);
-
- if (strncmp((char *)(&disk_super->magic), BTRFS_MAGIC,
- sizeof(disk_super->magic))) {
- printk(KERN_INFO "btrfs: valid FS not found on %s\n", sb->s_id);
- goto fail_sb_buffer;
- }
-
- if (sectorsize != PAGE_SIZE) {
- printk(KERN_WARNING "btrfs: Incompatible sector size(%lu) "
- "found on %s\n", (unsigned long)sectorsize, sb->s_id);
- goto fail_sb_buffer;
- }
-
- mutex_lock(&fs_info->chunk_mutex);
- ret = btrfs_read_sys_array(tree_root);
- mutex_unlock(&fs_info->chunk_mutex);
- if (ret) {
- printk(KERN_WARNING "btrfs: failed to read the system "
- "array on %s\n", sb->s_id);
- goto fail_sb_buffer;
- }
-
- blocksize = btrfs_level_size(tree_root,
- btrfs_super_chunk_root_level(disk_super));
- generation = btrfs_super_chunk_root_generation(disk_super);
-
- __setup_root(nodesize, leafsize, sectorsize, stripesize,
- chunk_root, fs_info, BTRFS_CHUNK_TREE_OBJECTID);
-
- chunk_root->node = read_tree_block(chunk_root,
- btrfs_super_chunk_root(disk_super),
- blocksize, generation);
- BUG_ON(!chunk_root->node); /* -ENOMEM */
- if (!test_bit(EXTENT_BUFFER_UPTODATE, &chunk_root->node->bflags)) {
- printk(KERN_WARNING "btrfs: failed to read chunk root on %s\n",
- sb->s_id);
- goto fail_tree_roots;
- }
- btrfs_set_root_node(&chunk_root->root_item, chunk_root->node);
- chunk_root->commit_root = btrfs_root_node(chunk_root);
-
- read_extent_buffer(chunk_root->node, fs_info->chunk_tree_uuid,
- (unsigned long)btrfs_header_chunk_tree_uuid(chunk_root->node),
- BTRFS_UUID_SIZE);
-
- ret = btrfs_read_chunk_tree(chunk_root);
- if (ret) {
- printk(KERN_WARNING "btrfs: failed to read chunk tree on %s\n",
- sb->s_id);
- goto fail_tree_roots;
- }
-
- btrfs_close_extra_devices(fs_devices);
-
- if (!fs_devices->latest_bdev) {
- printk(KERN_CRIT "btrfs: failed to read devices on %s\n",
- sb->s_id);
- goto fail_tree_roots;
- }
-
-retry_root_backup:
- blocksize = btrfs_level_size(tree_root,
- btrfs_super_root_level(disk_super));
- generation = btrfs_super_generation(disk_super);
-
- tree_root->node = read_tree_block(tree_root,
- btrfs_super_root(disk_super),
- blocksize, generation);
- if (!tree_root->node ||
- !test_bit(EXTENT_BUFFER_UPTODATE, &tree_root->node->bflags)) {
- printk(KERN_WARNING "btrfs: failed to read tree root on %s\n",
- sb->s_id);
-
- goto recovery_tree_root;
- }
-
- btrfs_set_root_node(&tree_root->root_item, tree_root->node);
- tree_root->commit_root = btrfs_root_node(tree_root);
-
- ret = find_and_setup_root(tree_root, fs_info,
- BTRFS_EXTENT_TREE_OBJECTID, extent_root);
- if (ret)
- goto recovery_tree_root;
- extent_root->track_dirty = 1;
-
- ret = find_and_setup_root(tree_root, fs_info,
- BTRFS_DEV_TREE_OBJECTID, dev_root);
- if (ret)
- goto recovery_tree_root;
- dev_root->track_dirty = 1;
-
- ret = find_and_setup_root(tree_root, fs_info,
- BTRFS_CSUM_TREE_OBJECTID, csum_root);
- if (ret)
- goto recovery_tree_root;
-
- csum_root->track_dirty = 1;
-
- fs_info->generation = generation;
- fs_info->last_trans_committed = generation;
-
- ret = btrfs_init_space_info(fs_info);
- if (ret) {
- printk(KERN_ERR "Failed to initial space info: %d\n", ret);
- goto fail_block_groups;
- }
-
- ret = btrfs_read_block_groups(extent_root);
- if (ret) {
- printk(KERN_ERR "Failed to read block groups: %d\n", ret);
- goto fail_block_groups;
- }
-
- fs_info->cleaner_kthread = kthread_run(cleaner_kthread, tree_root,
- "btrfs-cleaner");
- if (IS_ERR(fs_info->cleaner_kthread))
- goto fail_block_groups;
-
- fs_info->transaction_kthread = kthread_run(transaction_kthread,
- tree_root,
- "btrfs-transaction");
- if (IS_ERR(fs_info->transaction_kthread))
- goto fail_cleaner;
-
- if (!btrfs_test_opt(tree_root, SSD) &&
- !btrfs_test_opt(tree_root, NOSSD) &&
- !fs_info->fs_devices->rotating) {
- printk(KERN_INFO "Btrfs detected SSD devices, enabling SSD "
- "mode\n");
- btrfs_set_opt(fs_info->mount_opt, SSD);
- }
-
-#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
- if (btrfs_test_opt(tree_root, CHECK_INTEGRITY)) {
- ret = btrfsic_mount(tree_root, fs_devices,
- btrfs_test_opt(tree_root,
- CHECK_INTEGRITY_INCLUDING_EXTENT_DATA) ?
- 1 : 0,
- fs_info->check_integrity_print_mask);
- if (ret)
- printk(KERN_WARNING "btrfs: failed to initialize"
- " integrity check module %s\n", sb->s_id);
- }
-#endif
-
- /* do not make disk changes in broken FS */
- if (btrfs_super_log_root(disk_super) != 0 &&
- !(fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR)) {
- u64 bytenr = btrfs_super_log_root(disk_super);
-
- if (fs_devices->rw_devices == 0) {
- printk(KERN_WARNING "Btrfs log replay required "
- "on RO media\n");
- err = -EIO;
- goto fail_trans_kthread;
- }
- blocksize =
- btrfs_level_size(tree_root,
- btrfs_super_log_root_level(disk_super));
-
- log_tree_root = btrfs_alloc_root(fs_info);
- if (!log_tree_root) {
- err = -ENOMEM;
- goto fail_trans_kthread;
- }
-
- __setup_root(nodesize, leafsize, sectorsize, stripesize,
- log_tree_root, fs_info, BTRFS_TREE_LOG_OBJECTID);
-
- log_tree_root->node = read_tree_block(tree_root, bytenr,
- blocksize,
- generation + 1);
- /* returns with log_tree_root freed on success */
- ret = btrfs_recover_log_trees(log_tree_root);
- if (ret) {
- btrfs_error(tree_root->fs_info, ret,
- "Failed to recover log tree");
- free_extent_buffer(log_tree_root->node);
- kfree(log_tree_root);
- goto fail_trans_kthread;
- }
-
- if (sb->s_flags & MS_RDONLY) {
- ret = btrfs_commit_super(tree_root);
- if (ret)
- goto fail_trans_kthread;
- }
- }
-
- ret = btrfs_find_orphan_roots(tree_root);
- if (ret)
- goto fail_trans_kthread;
-
- if (!(sb->s_flags & MS_RDONLY)) {
- ret = btrfs_cleanup_fs_roots(fs_info);
- if (ret) {
- }
-
- ret = btrfs_recover_relocation(tree_root);
- if (ret < 0) {
- printk(KERN_WARNING
- "btrfs: failed to recover relocation\n");
- err = -EINVAL;
- goto fail_trans_kthread;
- }
- }
-
- location.objectid = BTRFS_FS_TREE_OBJECTID;
- location.type = BTRFS_ROOT_ITEM_KEY;
- location.offset = (u64)-1;
-
- fs_info->fs_root = btrfs_read_fs_root_no_name(fs_info, &location);
- if (!fs_info->fs_root)
- goto fail_trans_kthread;
- if (IS_ERR(fs_info->fs_root)) {
- err = PTR_ERR(fs_info->fs_root);
- goto fail_trans_kthread;
- }
-
- if (!(sb->s_flags & MS_RDONLY)) {
- down_read(&fs_info->cleanup_work_sem);
- err = btrfs_orphan_cleanup(fs_info->fs_root);
- if (!err)
- err = btrfs_orphan_cleanup(fs_info->tree_root);
- up_read(&fs_info->cleanup_work_sem);
-
- if (!err)
- err = btrfs_recover_balance(fs_info->tree_root);
-
- if (err) {
- close_ctree(tree_root);
- return err;
- }
- }
-
- return 0;
-
-fail_trans_kthread:
- kthread_stop(fs_info->transaction_kthread);
-fail_cleaner:
- kthread_stop(fs_info->cleaner_kthread);
-
- /*
- * make sure we're done with the btree inode before we stop our
- * kthreads
- */
- filemap_write_and_wait(fs_info->btree_inode->i_mapping);
- invalidate_inode_pages2(fs_info->btree_inode->i_mapping);
-
-fail_block_groups:
- btrfs_free_block_groups(fs_info);
-
-fail_tree_roots:
- free_root_pointers(fs_info, 1);
-
-fail_sb_buffer:
- btrfs_stop_workers(&fs_info->generic_worker);
- btrfs_stop_workers(&fs_info->readahead_workers);
- btrfs_stop_workers(&fs_info->fixup_workers);
- btrfs_stop_workers(&fs_info->delalloc_workers);
- btrfs_stop_workers(&fs_info->workers);
- btrfs_stop_workers(&fs_info->endio_workers);
- btrfs_stop_workers(&fs_info->endio_meta_workers);
- btrfs_stop_workers(&fs_info->endio_meta_write_workers);
- btrfs_stop_workers(&fs_info->endio_write_workers);
- btrfs_stop_workers(&fs_info->endio_freespace_worker);
- btrfs_stop_workers(&fs_info->submit_workers);
- btrfs_stop_workers(&fs_info->delayed_workers);
- btrfs_stop_workers(&fs_info->caching_workers);
-fail_alloc:
-fail_iput:
- btrfs_mapping_tree_free(&fs_info->mapping_tree);
-
- invalidate_inode_pages2(fs_info->btree_inode->i_mapping);
- iput(fs_info->btree_inode);
-fail_bdi:
- bdi_destroy(&fs_info->bdi);
-fail_srcu:
- cleanup_srcu_struct(&fs_info->subvol_srcu);
-fail:
- btrfs_close_devices(fs_info->fs_devices);
- return err;
-
-recovery_tree_root:
- if (!btrfs_test_opt(tree_root, RECOVERY))
- goto fail_tree_roots;
-
- free_root_pointers(fs_info, 0);
-
- /* don't use the log in recovery mode, it won't be valid */
- btrfs_set_super_log_root(disk_super, 0);
-
- /* we can't trust the free space cache either */
- btrfs_set_opt(fs_info->mount_opt, CLEAR_CACHE);
-
- ret = next_root_backup(fs_info, fs_info->super_copy,
- &num_backups_tried, &backup_index);
- if (ret == -1)
- goto fail_block_groups;
- goto retry_root_backup;
-}
-
-static void btrfs_end_buffer_write_sync(struct buffer_head *bh, int uptodate)
-{
- char b[BDEVNAME_SIZE];
-
- if (uptodate) {
- set_buffer_uptodate(bh);
- } else {
- printk_ratelimited(KERN_WARNING "lost page write due to "
- "I/O error on %s\n",
- bdevname(bh->b_bdev, b));
- /* note, we dont' set_buffer_write_io_error because we have
- * our own ways of dealing with the IO errors
- */
- clear_buffer_uptodate(bh);
- }
- unlock_buffer(bh);
- put_bh(bh);
-}
-
-struct buffer_head *btrfs_read_dev_super(struct block_device *bdev)
-{
- struct buffer_head *bh;
- struct buffer_head *latest = NULL;
- struct btrfs_super_block *super;
- int i;
- u64 transid = 0;
- u64 bytenr;
-
- /* we would like to check all the supers, but that would make
- * a btrfs mount succeed after a mkfs from a different FS.
- * So, we need to add a special mount option to scan for
- * later supers, using BTRFS_SUPER_MIRROR_MAX instead
- */
- for (i = 0; i < 1; i++) {
- bytenr = btrfs_sb_offset(i);
- if (bytenr + 4096 >= i_size_read(bdev->bd_inode))
- break;
- bh = __bread(bdev, bytenr / 4096, 4096);
- if (!bh)
- continue;
-
- super = (struct btrfs_super_block *)bh->b_data;
- if (btrfs_super_bytenr(super) != bytenr ||
- strncmp((char *)(&super->magic), BTRFS_MAGIC,
- sizeof(super->magic))) {
- brelse(bh);
- continue;
- }
-
- if (!latest || btrfs_super_generation(super) > transid) {
- brelse(latest);
- latest = bh;
- transid = btrfs_super_generation(super);
- } else {
- brelse(bh);
- }
- }
- return latest;
-}
-
-/*
- * this should be called twice, once with wait == 0 and
- * once with wait == 1. When wait == 0 is done, all the buffer heads
- * we write are pinned.
- *
- * They are released when wait == 1 is done.
- * max_mirrors must be the same for both runs, and it indicates how
- * many supers on this one device should be written.
- *
- * max_mirrors == 0 means to write them all.
- */
-static int write_dev_supers(struct btrfs_device *device,
- struct btrfs_super_block *sb,
- int do_barriers, int wait, int max_mirrors)
-{
- struct buffer_head *bh;
- int i;
- int ret;
- int errors = 0;
- u32 crc;
- u64 bytenr;
-
- if (max_mirrors == 0)
- max_mirrors = BTRFS_SUPER_MIRROR_MAX;
-
- for (i = 0; i < max_mirrors; i++) {
- bytenr = btrfs_sb_offset(i);
- if (bytenr + BTRFS_SUPER_INFO_SIZE >= device->total_bytes)
- break;
-
- if (wait) {
- bh = __find_get_block(device->bdev, bytenr / 4096,
- BTRFS_SUPER_INFO_SIZE);
- BUG_ON(!bh);
- wait_on_buffer(bh);
- if (!buffer_uptodate(bh))
- errors++;
-
- /* drop our reference */
- brelse(bh);
-
- /* drop the reference from the wait == 0 run */
- brelse(bh);
- continue;
- } else {
- btrfs_set_super_bytenr(sb, bytenr);
-
- crc = ~(u32)0;
- crc = btrfs_csum_data(NULL, (char *)sb +
- BTRFS_CSUM_SIZE, crc,
- BTRFS_SUPER_INFO_SIZE -
- BTRFS_CSUM_SIZE);
- btrfs_csum_final(crc, sb->csum);
-
- /*
- * one reference for us, and we leave it for the
- * caller
- */
- bh = __getblk(device->bdev, bytenr / 4096,
- BTRFS_SUPER_INFO_SIZE);
- memcpy(bh->b_data, sb, BTRFS_SUPER_INFO_SIZE);
-
- /* one reference for submit_bh */
- get_bh(bh);
-
- set_buffer_uptodate(bh);
- lock_buffer(bh);
- bh->b_end_io = btrfs_end_buffer_write_sync;
- }
-
- /*
- * we fua the first super. The others we allow
- * to go down lazy.
- */
- ret = btrfsic_submit_bh(WRITE_FUA, bh);
- if (ret)
- errors++;
- }
- return errors < i ? 0 : -1;
-}
-
-/*
- * endio for the write_dev_flush, this will wake anyone waiting
- * for the barrier when it is done
- */
-static void btrfs_end_empty_barrier(struct bio *bio, int err)
-{
- if (err) {
- if (err == -EOPNOTSUPP)
- set_bit(BIO_EOPNOTSUPP, &bio->bi_flags);
- clear_bit(BIO_UPTODATE, &bio->bi_flags);
- }
- if (bio->bi_private)
- complete(bio->bi_private);
- bio_put(bio);
-}
-
-/*
- * trigger flushes for one the devices. If you pass wait == 0, the flushes are
- * sent down. With wait == 1, it waits for the previous flush.
- *
- * any device where the flush fails with eopnotsupp are flagged as not-barrier
- * capable
- */
-static int write_dev_flush(struct btrfs_device *device, int wait)
-{
- struct bio *bio;
- int ret = 0;
-
- if (device->nobarriers)
- return 0;
-
- if (wait) {
- bio = device->flush_bio;
- if (!bio)
- return 0;
-
- wait_for_completion(&device->flush_wait);
-
- if (bio_flagged(bio, BIO_EOPNOTSUPP)) {
- printk("btrfs: disabling barriers on dev %s\n",
- device->name);
- device->nobarriers = 1;
- }
- if (!bio_flagged(bio, BIO_UPTODATE)) {
- ret = -EIO;
- }
-
- /* drop the reference from the wait == 0 run */
- bio_put(bio);
- device->flush_bio = NULL;
-
- return ret;
- }
-
- /*
- * one reference for us, and we leave it for the
- * caller
- */
- device->flush_bio = NULL;;
- bio = bio_alloc(GFP_NOFS, 0);
- if (!bio)
- return -ENOMEM;
-
- bio->bi_end_io = btrfs_end_empty_barrier;
- bio->bi_bdev = device->bdev;
- init_completion(&device->flush_wait);
- bio->bi_private = &device->flush_wait;
- device->flush_bio = bio;
-
- bio_get(bio);
- btrfsic_submit_bio(WRITE_FLUSH, bio);
-
- return 0;
-}
-
-/*
- * send an empty flush down to each device in parallel,
- * then wait for them
- */
-static int barrier_all_devices(struct btrfs_fs_info *info)
-{
- struct list_head *head;
- struct btrfs_device *dev;
- int errors = 0;
- int ret;
-
- /* send down all the barriers */
- head = &info->fs_devices->devices;
- list_for_each_entry_rcu(dev, head, dev_list) {
- if (!dev->bdev) {
- errors++;
- continue;
- }
- if (!dev->in_fs_metadata || !dev->writeable)
- continue;
-
- ret = write_dev_flush(dev, 0);
- if (ret)
- errors++;
- }
-
- /* wait for all the barriers */
- list_for_each_entry_rcu(dev, head, dev_list) {
- if (!dev->bdev) {
- errors++;
- continue;
- }
- if (!dev->in_fs_metadata || !dev->writeable)
- continue;
-
- ret = write_dev_flush(dev, 1);
- if (ret)
- errors++;
- }
- if (errors)
- return -EIO;
- return 0;
-}
-
-int write_all_supers(struct btrfs_root *root, int max_mirrors)
-{
- struct list_head *head;
- struct btrfs_device *dev;
- struct btrfs_super_block *sb;
- struct btrfs_dev_item *dev_item;
- int ret;
- int do_barriers;
- int max_errors;
- int total_errors = 0;
- u64 flags;
-
- max_errors = btrfs_super_num_devices(root->fs_info->super_copy) - 1;
- do_barriers = !btrfs_test_opt(root, NOBARRIER);
- backup_super_roots(root->fs_info);
-
- sb = root->fs_info->super_for_commit;
- dev_item = &sb->dev_item;
-
- mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
- head = &root->fs_info->fs_devices->devices;
-
- if (do_barriers)
- barrier_all_devices(root->fs_info);
-
- list_for_each_entry_rcu(dev, head, dev_list) {
- if (!dev->bdev) {
- total_errors++;
- continue;
- }
- if (!dev->in_fs_metadata || !dev->writeable)
- continue;
-
- btrfs_set_stack_device_generation(dev_item, 0);
- btrfs_set_stack_device_type(dev_item, dev->type);
- btrfs_set_stack_device_id(dev_item, dev->devid);
- btrfs_set_stack_device_total_bytes(dev_item, dev->total_bytes);
- btrfs_set_stack_device_bytes_used(dev_item, dev->bytes_used);
- btrfs_set_stack_device_io_align(dev_item, dev->io_align);
- btrfs_set_stack_device_io_width(dev_item, dev->io_width);
- btrfs_set_stack_device_sector_size(dev_item, dev->sector_size);
- memcpy(dev_item->uuid, dev->uuid, BTRFS_UUID_SIZE);
- memcpy(dev_item->fsid, dev->fs_devices->fsid, BTRFS_UUID_SIZE);
-
- flags = btrfs_super_flags(sb);
- btrfs_set_super_flags(sb, flags | BTRFS_HEADER_FLAG_WRITTEN);
-
- ret = write_dev_supers(dev, sb, do_barriers, 0, max_mirrors);
- if (ret)
- total_errors++;
- }
- if (total_errors > max_errors) {
- printk(KERN_ERR "btrfs: %d errors while writing supers\n",
- total_errors);
-
- /* This shouldn't happen. FUA is masked off if unsupported */
- BUG();
- }
-
- total_errors = 0;
- list_for_each_entry_rcu(dev, head, dev_list) {
- if (!dev->bdev)
- continue;
- if (!dev->in_fs_metadata || !dev->writeable)
- continue;
-
- ret = write_dev_supers(dev, sb, do_barriers, 1, max_mirrors);
- if (ret)
- total_errors++;
- }
- mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
- if (total_errors > max_errors) {
- btrfs_error(root->fs_info, -EIO,
- "%d errors while writing supers", total_errors);
- return -EIO;
- }
- return 0;
-}
-
-int write_ctree_super(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, int max_mirrors)
-{
- int ret;
-
- ret = write_all_supers(root, max_mirrors);
- return ret;
-}
-
-/* Kill all outstanding I/O */
-void btrfs_abort_devices(struct btrfs_root *root)
-{
- struct list_head *head;
- struct btrfs_device *dev;
- mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
- head = &root->fs_info->fs_devices->devices;
- list_for_each_entry_rcu(dev, head, dev_list) {
- blk_abort_queue(dev->bdev->bd_disk->queue);
- }
- mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
-}
-
-void btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root)
-{
- spin_lock(&fs_info->fs_roots_radix_lock);
- radix_tree_delete(&fs_info->fs_roots_radix,
- (unsigned long)root->root_key.objectid);
- spin_unlock(&fs_info->fs_roots_radix_lock);
-
- if (btrfs_root_refs(&root->root_item) == 0)
- synchronize_srcu(&fs_info->subvol_srcu);
-
- __btrfs_remove_free_space_cache(root->free_ino_pinned);
- __btrfs_remove_free_space_cache(root->free_ino_ctl);
- free_fs_root(root);
-}
-
-static void free_fs_root(struct btrfs_root *root)
-{
- iput(root->cache_inode);
- WARN_ON(!RB_EMPTY_ROOT(&root->inode_tree));
- if (root->anon_dev)
- free_anon_bdev(root->anon_dev);
- free_extent_buffer(root->node);
- free_extent_buffer(root->commit_root);
- kfree(root->free_ino_ctl);
- kfree(root->free_ino_pinned);
- kfree(root->name);
- kfree(root);
-}
-
-static void del_fs_roots(struct btrfs_fs_info *fs_info)
-{
- int ret;
- struct btrfs_root *gang[8];
- int i;
-
- while (!list_empty(&fs_info->dead_roots)) {
- gang[0] = list_entry(fs_info->dead_roots.next,
- struct btrfs_root, root_list);
- list_del(&gang[0]->root_list);
-
- if (gang[0]->in_radix) {
- btrfs_free_fs_root(fs_info, gang[0]);
- } else {
- free_extent_buffer(gang[0]->node);
- free_extent_buffer(gang[0]->commit_root);
- kfree(gang[0]);
- }
- }
-
- while (1) {
- ret = radix_tree_gang_lookup(&fs_info->fs_roots_radix,
- (void **)gang, 0,
- ARRAY_SIZE(gang));
- if (!ret)
- break;
- for (i = 0; i < ret; i++)
- btrfs_free_fs_root(fs_info, gang[i]);
- }
-}
-
-int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info)
-{
- u64 root_objectid = 0;
- struct btrfs_root *gang[8];
- int i;
- int ret;
-
- while (1) {
- ret = radix_tree_gang_lookup(&fs_info->fs_roots_radix,
- (void **)gang, root_objectid,
- ARRAY_SIZE(gang));
- if (!ret)
- break;
-
- root_objectid = gang[ret - 1]->root_key.objectid + 1;
- for (i = 0; i < ret; i++) {
- int err;
-
- root_objectid = gang[i]->root_key.objectid;
- err = btrfs_orphan_cleanup(gang[i]);
- if (err)
- return err;
- }
- root_objectid++;
- }
- return 0;
-}
-
-int btrfs_commit_super(struct btrfs_root *root)
-{
- struct btrfs_trans_handle *trans;
- int ret;
-
- mutex_lock(&root->fs_info->cleaner_mutex);
- btrfs_run_delayed_iputs(root);
- btrfs_clean_old_snapshots(root);
- mutex_unlock(&root->fs_info->cleaner_mutex);
-
- /* wait until ongoing cleanup work done */
- down_write(&root->fs_info->cleanup_work_sem);
- up_write(&root->fs_info->cleanup_work_sem);
-
- trans = btrfs_join_transaction(root);
- if (IS_ERR(trans))
- return PTR_ERR(trans);
- ret = btrfs_commit_transaction(trans, root);
- if (ret)
- return ret;
- /* run commit again to drop the original snapshot */
- trans = btrfs_join_transaction(root);
- if (IS_ERR(trans))
- return PTR_ERR(trans);
- ret = btrfs_commit_transaction(trans, root);
- if (ret)
- return ret;
- ret = btrfs_write_and_wait_transaction(NULL, root);
- if (ret) {
- btrfs_error(root->fs_info, ret,
- "Failed to sync btree inode to disk.");
- return ret;
- }
-
- ret = write_ctree_super(NULL, root, 0);
- return ret;
-}
-
-int close_ctree(struct btrfs_root *root)
-{
- struct btrfs_fs_info *fs_info = root->fs_info;
- int ret;
-
- fs_info->closing = 1;
- smp_mb();
-
- /* pause restriper - we want to resume on mount */
- btrfs_pause_balance(root->fs_info);
-
- btrfs_scrub_cancel(root);
-
- /* wait for any defraggers to finish */
- wait_event(fs_info->transaction_wait,
- (atomic_read(&fs_info->defrag_running) == 0));
-
- /* clear out the rbtree of defraggable inodes */
- btrfs_run_defrag_inodes(fs_info);
-
- /*
- * Here come 2 situations when btrfs is broken to flip readonly:
- *
- * 1. when btrfs flips readonly somewhere else before
- * btrfs_commit_super, sb->s_flags has MS_RDONLY flag,
- * and btrfs will skip to write sb directly to keep
- * ERROR state on disk.
- *
- * 2. when btrfs flips readonly just in btrfs_commit_super,
- * and in such case, btrfs cannot write sb via btrfs_commit_super,
- * and since fs_state has been set BTRFS_SUPER_FLAG_ERROR flag,
- * btrfs will cleanup all FS resources first and write sb then.
- */
- if (!(fs_info->sb->s_flags & MS_RDONLY)) {
- ret = btrfs_commit_super(root);
- if (ret)
- printk(KERN_ERR "btrfs: commit super ret %d\n", ret);
- }
-
- if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) {
- ret = btrfs_error_commit_super(root);
- if (ret)
- printk(KERN_ERR "btrfs: commit super ret %d\n", ret);
- }
-
- btrfs_put_block_group_cache(fs_info);
-
- kthread_stop(fs_info->transaction_kthread);
- kthread_stop(fs_info->cleaner_kthread);
-
- fs_info->closing = 2;
- smp_mb();
-
- if (fs_info->delalloc_bytes) {
- printk(KERN_INFO "btrfs: at unmount delalloc count %llu\n",
- (unsigned long long)fs_info->delalloc_bytes);
- }
- if (fs_info->total_ref_cache_size) {
- printk(KERN_INFO "btrfs: at umount reference cache size %llu\n",
- (unsigned long long)fs_info->total_ref_cache_size);
- }
-
- free_extent_buffer(fs_info->extent_root->node);
- free_extent_buffer(fs_info->extent_root->commit_root);
- free_extent_buffer(fs_info->tree_root->node);
- free_extent_buffer(fs_info->tree_root->commit_root);
- free_extent_buffer(fs_info->chunk_root->node);
- free_extent_buffer(fs_info->chunk_root->commit_root);
- free_extent_buffer(fs_info->dev_root->node);
- free_extent_buffer(fs_info->dev_root->commit_root);
- free_extent_buffer(fs_info->csum_root->node);
- free_extent_buffer(fs_info->csum_root->commit_root);
-
- btrfs_free_block_groups(fs_info);
-
- del_fs_roots(fs_info);
-
- iput(fs_info->btree_inode);
-
- btrfs_stop_workers(&fs_info->generic_worker);
- btrfs_stop_workers(&fs_info->fixup_workers);
- btrfs_stop_workers(&fs_info->delalloc_workers);
- btrfs_stop_workers(&fs_info->workers);
- btrfs_stop_workers(&fs_info->endio_workers);
- btrfs_stop_workers(&fs_info->endio_meta_workers);
- btrfs_stop_workers(&fs_info->endio_meta_write_workers);
- btrfs_stop_workers(&fs_info->endio_write_workers);
- btrfs_stop_workers(&fs_info->endio_freespace_worker);
- btrfs_stop_workers(&fs_info->submit_workers);
- btrfs_stop_workers(&fs_info->delayed_workers);
- btrfs_stop_workers(&fs_info->caching_workers);
- btrfs_stop_workers(&fs_info->readahead_workers);
-
-#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
- if (btrfs_test_opt(root, CHECK_INTEGRITY))
- btrfsic_unmount(root, fs_info->fs_devices);
-#endif
-
- btrfs_close_devices(fs_info->fs_devices);
- btrfs_mapping_tree_free(&fs_info->mapping_tree);
-
- bdi_destroy(&fs_info->bdi);
- cleanup_srcu_struct(&fs_info->subvol_srcu);
-
- return 0;
-}
-
-int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid,
- int atomic)
-{
- int ret;
- struct inode *btree_inode = buf->pages[0]->mapping->host;
-
- ret = extent_buffer_uptodate(buf);
- if (!ret)
- return ret;
-
- ret = verify_parent_transid(&BTRFS_I(btree_inode)->io_tree, buf,
- parent_transid, atomic);
- if (ret == -EAGAIN)
- return ret;
- return !ret;
-}
-
-int btrfs_set_buffer_uptodate(struct extent_buffer *buf)
-{
- return set_extent_buffer_uptodate(buf);
-}
-
-void btrfs_mark_buffer_dirty(struct extent_buffer *buf)
-{
- struct btrfs_root *root = BTRFS_I(buf->pages[0]->mapping->host)->root;
- u64 transid = btrfs_header_generation(buf);
- int was_dirty;
-
- btrfs_assert_tree_locked(buf);
- if (transid != root->fs_info->generation) {
- printk(KERN_CRIT "btrfs transid mismatch buffer %llu, "
- "found %llu running %llu\n",
- (unsigned long long)buf->start,
- (unsigned long long)transid,
- (unsigned long long)root->fs_info->generation);
- WARN_ON(1);
- }
- was_dirty = set_extent_buffer_dirty(buf);
- if (!was_dirty) {
- spin_lock(&root->fs_info->delalloc_lock);
- root->fs_info->dirty_metadata_bytes += buf->len;
- spin_unlock(&root->fs_info->delalloc_lock);
- }
-}
-
-void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr)
-{
- /*
- * looks as though older kernels can get into trouble with
- * this code, they end up stuck in balance_dirty_pages forever
- */
- u64 num_dirty;
- unsigned long thresh = 32 * 1024 * 1024;
-
- if (current->flags & PF_MEMALLOC)
- return;
-
- btrfs_balance_delayed_items(root);
-
- num_dirty = root->fs_info->dirty_metadata_bytes;
-
- if (num_dirty > thresh) {
- balance_dirty_pages_ratelimited_nr(
- root->fs_info->btree_inode->i_mapping, 1);
- }
- return;
-}
-
-void __btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr)
-{
- /*
- * looks as though older kernels can get into trouble with
- * this code, they end up stuck in balance_dirty_pages forever
- */
- u64 num_dirty;
- unsigned long thresh = 32 * 1024 * 1024;
-
- if (current->flags & PF_MEMALLOC)
- return;
-
- num_dirty = root->fs_info->dirty_metadata_bytes;
-
- if (num_dirty > thresh) {
- balance_dirty_pages_ratelimited_nr(
- root->fs_info->btree_inode->i_mapping, 1);
- }
- return;
-}
-
-int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid)
-{
- struct btrfs_root *root = BTRFS_I(buf->pages[0]->mapping->host)->root;
- return btree_read_extent_buffer_pages(root, buf, 0, parent_transid);
-}
-
-static int btree_lock_page_hook(struct page *page, void *data,
- void (*flush_fn)(void *))
-{
- struct inode *inode = page->mapping->host;
- struct btrfs_root *root = BTRFS_I(inode)->root;
- struct extent_buffer *eb;
-
- /*
- * We culled this eb but the page is still hanging out on the mapping,
- * carry on.
- */
- if (!PagePrivate(page))
- goto out;
-
- eb = (struct extent_buffer *)page->private;
- if (!eb) {
- WARN_ON(1);
- goto out;
- }
- if (page != eb->pages[0])
- goto out;
-
- if (!btrfs_try_tree_write_lock(eb)) {
- flush_fn(data);
- btrfs_tree_lock(eb);
- }
- btrfs_set_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN);
-
- if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) {
- spin_lock(&root->fs_info->delalloc_lock);
- if (root->fs_info->dirty_metadata_bytes >= eb->len)
- root->fs_info->dirty_metadata_bytes -= eb->len;
- else
- WARN_ON(1);
- spin_unlock(&root->fs_info->delalloc_lock);
- }
-
- btrfs_tree_unlock(eb);
-out:
- if (!trylock_page(page)) {
- flush_fn(data);
- lock_page(page);
- }
- return 0;
-}
-
-static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
- int read_only)
-{
- if (btrfs_super_csum_type(fs_info->super_copy) >= ARRAY_SIZE(btrfs_csum_sizes)) {
- printk(KERN_ERR "btrfs: unsupported checksum algorithm\n");
- return -EINVAL;
- }
-
- if (read_only)
- return 0;
-
- if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) {
- printk(KERN_WARNING "warning: mount fs with errors, "
- "running btrfsck is recommended\n");
- }
-
- return 0;
-}
-
-int btrfs_error_commit_super(struct btrfs_root *root)
-{
- int ret;
-
- mutex_lock(&root->fs_info->cleaner_mutex);
- btrfs_run_delayed_iputs(root);
- mutex_unlock(&root->fs_info->cleaner_mutex);
-
- down_write(&root->fs_info->cleanup_work_sem);
- up_write(&root->fs_info->cleanup_work_sem);
-
- /* cleanup FS via transaction */
- btrfs_cleanup_transaction(root);
-
- ret = write_ctree_super(NULL, root, 0);
-
- return ret;
-}
-
-static void btrfs_destroy_ordered_operations(struct btrfs_root *root)
-{
- struct btrfs_inode *btrfs_inode;
- struct list_head splice;
-
- INIT_LIST_HEAD(&splice);
-
- mutex_lock(&root->fs_info->ordered_operations_mutex);
- spin_lock(&root->fs_info->ordered_extent_lock);
-
- list_splice_init(&root->fs_info->ordered_operations, &splice);
- while (!list_empty(&splice)) {
- btrfs_inode = list_entry(splice.next, struct btrfs_inode,
- ordered_operations);
-
- list_del_init(&btrfs_inode->ordered_operations);
-
- btrfs_invalidate_inodes(btrfs_inode->root);
- }
-
- spin_unlock(&root->fs_info->ordered_extent_lock);
- mutex_unlock(&root->fs_info->ordered_operations_mutex);
-}
-
-static void btrfs_destroy_ordered_extents(struct btrfs_root *root)
-{
- struct list_head splice;
- struct btrfs_ordered_extent *ordered;
- struct inode *inode;
-
- INIT_LIST_HEAD(&splice);
-
- spin_lock(&root->fs_info->ordered_extent_lock);
-
- list_splice_init(&root->fs_info->ordered_extents, &splice);
- while (!list_empty(&splice)) {
- ordered = list_entry(splice.next, struct btrfs_ordered_extent,
- root_extent_list);
-
- list_del_init(&ordered->root_extent_list);
- atomic_inc(&ordered->refs);
-
- /* the inode may be getting freed (in sys_unlink path). */
- inode = igrab(ordered->inode);
-
- spin_unlock(&root->fs_info->ordered_extent_lock);
- if (inode)
- iput(inode);
-
- atomic_set(&ordered->refs, 1);
- btrfs_put_ordered_extent(ordered);
-
- spin_lock(&root->fs_info->ordered_extent_lock);
- }
-
- spin_unlock(&root->fs_info->ordered_extent_lock);
-}
-
-int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
- struct btrfs_root *root)
-{
- struct rb_node *node;
- struct btrfs_delayed_ref_root *delayed_refs;
- struct btrfs_delayed_ref_node *ref;
- int ret = 0;
-
- delayed_refs = &trans->delayed_refs;
-
-again:
- spin_lock(&delayed_refs->lock);
- if (delayed_refs->num_entries == 0) {
- spin_unlock(&delayed_refs->lock);
- printk(KERN_INFO "delayed_refs has NO entry\n");
- return ret;
- }
-
- node = rb_first(&delayed_refs->root);
- while (node) {
- ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
- node = rb_next(node);
-
- ref->in_tree = 0;
- rb_erase(&ref->rb_node, &delayed_refs->root);
- delayed_refs->num_entries--;
-
- atomic_set(&ref->refs, 1);
- if (btrfs_delayed_ref_is_head(ref)) {
- struct btrfs_delayed_ref_head *head;
-
- head = btrfs_delayed_node_to_head(ref);
- spin_unlock(&delayed_refs->lock);
- mutex_lock(&head->mutex);
- kfree(head->extent_op);
- delayed_refs->num_heads--;
- if (list_empty(&head->cluster))
- delayed_refs->num_heads_ready--;
- list_del_init(&head->cluster);
- mutex_unlock(&head->mutex);
- btrfs_put_delayed_ref(ref);
- goto again;
- }
- spin_unlock(&delayed_refs->lock);
- btrfs_put_delayed_ref(ref);
-
- cond_resched();
- spin_lock(&delayed_refs->lock);
- }
-
- spin_unlock(&delayed_refs->lock);
-
- return ret;
-}
-
-static void btrfs_destroy_pending_snapshots(struct btrfs_transaction *t)
-{
- struct btrfs_pending_snapshot *snapshot;
- struct list_head splice;
-
- INIT_LIST_HEAD(&splice);
-
- list_splice_init(&t->pending_snapshots, &splice);
-
- while (!list_empty(&splice)) {
- snapshot = list_entry(splice.next,
- struct btrfs_pending_snapshot,
- list);
-
- list_del_init(&snapshot->list);
-
- kfree(snapshot);
- }
-}
-
-static void btrfs_destroy_delalloc_inodes(struct btrfs_root *root)
-{
- struct btrfs_inode *btrfs_inode;
- struct list_head splice;
-
- INIT_LIST_HEAD(&splice);
-
- spin_lock(&root->fs_info->delalloc_lock);
- list_splice_init(&root->fs_info->delalloc_inodes, &splice);
-
- while (!list_empty(&splice)) {
- btrfs_inode = list_entry(splice.next, struct btrfs_inode,
- delalloc_inodes);
-
- list_del_init(&btrfs_inode->delalloc_inodes);
-
- btrfs_invalidate_inodes(btrfs_inode->root);
- }
-
- spin_unlock(&root->fs_info->delalloc_lock);
-}
-
-static int btrfs_destroy_marked_extents(struct btrfs_root *root,
- struct extent_io_tree *dirty_pages,
- int mark)
-{
- int ret;
- struct page *page;
- struct inode *btree_inode = root->fs_info->btree_inode;
- struct extent_buffer *eb;
- u64 start = 0;
- u64 end;
- u64 offset;
- unsigned long index;
-
- while (1) {
- ret = find_first_extent_bit(dirty_pages, start, &start, &end,
- mark);
- if (ret)
- break;
-
- clear_extent_bits(dirty_pages, start, end, mark, GFP_NOFS);
- while (start <= end) {
- index = start >> PAGE_CACHE_SHIFT;
- start = (u64)(index + 1) << PAGE_CACHE_SHIFT;
- page = find_get_page(btree_inode->i_mapping, index);
- if (!page)
- continue;
- offset = page_offset(page);
-
- spin_lock(&dirty_pages->buffer_lock);
- eb = radix_tree_lookup(
- &(&BTRFS_I(page->mapping->host)->io_tree)->buffer,
- offset >> PAGE_CACHE_SHIFT);
- spin_unlock(&dirty_pages->buffer_lock);
- if (eb) {
- ret = test_and_clear_bit(EXTENT_BUFFER_DIRTY,
- &eb->bflags);
- atomic_set(&eb->refs, 1);
- }
- if (PageWriteback(page))
- end_page_writeback(page);
-
- lock_page(page);
- if (PageDirty(page)) {
- clear_page_dirty_for_io(page);
- spin_lock_irq(&page->mapping->tree_lock);
- radix_tree_tag_clear(&page->mapping->page_tree,
- page_index(page),
- PAGECACHE_TAG_DIRTY);
- spin_unlock_irq(&page->mapping->tree_lock);
- }
-
- page->mapping->a_ops->invalidatepage(page, 0);
- unlock_page(page);
- }
- }
-
- return ret;
-}
-
-static int btrfs_destroy_pinned_extent(struct btrfs_root *root,
- struct extent_io_tree *pinned_extents)
-{
- struct extent_io_tree *unpin;
- u64 start;
- u64 end;
- int ret;
-
- unpin = pinned_extents;
- while (1) {
- ret = find_first_extent_bit(unpin, 0, &start, &end,
- EXTENT_DIRTY);
- if (ret)
- break;
-
- /* opt_discard */
- if (btrfs_test_opt(root, DISCARD))
- ret = btrfs_error_discard_extent(root, start,
- end + 1 - start,
- NULL);
-
- clear_extent_dirty(unpin, start, end, GFP_NOFS);
- btrfs_error_unpin_extent_range(root, start, end);
- cond_resched();
- }
-
- return 0;
-}
-
-void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans,
- struct btrfs_root *root)
-{
- btrfs_destroy_delayed_refs(cur_trans, root);
- btrfs_block_rsv_release(root, &root->fs_info->trans_block_rsv,
- cur_trans->dirty_pages.dirty_bytes);
-
- /* FIXME: cleanup wait for commit */
- cur_trans->in_commit = 1;
- cur_trans->blocked = 1;
- if (waitqueue_active(&root->fs_info->transaction_blocked_wait))
- wake_up(&root->fs_info->transaction_blocked_wait);
-
- cur_trans->blocked = 0;
- if (waitqueue_active(&root->fs_info->transaction_wait))
- wake_up(&root->fs_info->transaction_wait);
-
- cur_trans->commit_done = 1;
- if (waitqueue_active(&cur_trans->commit_wait))
- wake_up(&cur_trans->commit_wait);
-
- btrfs_destroy_pending_snapshots(cur_trans);
-
- btrfs_destroy_marked_extents(root, &cur_trans->dirty_pages,
- EXTENT_DIRTY);
-
- /*
- memset(cur_trans, 0, sizeof(*cur_trans));
- kmem_cache_free(btrfs_transaction_cachep, cur_trans);
- */
-}
-
-int btrfs_cleanup_transaction(struct btrfs_root *root)
-{
- struct btrfs_transaction *t;
- LIST_HEAD(list);
-
- mutex_lock(&root->fs_info->transaction_kthread_mutex);
-
- spin_lock(&root->fs_info->trans_lock);
- list_splice_init(&root->fs_info->trans_list, &list);
- root->fs_info->trans_no_join = 1;
- spin_unlock(&root->fs_info->trans_lock);
-
- while (!list_empty(&list)) {
- t = list_entry(list.next, struct btrfs_transaction, list);
- if (!t)
- break;
-
- btrfs_destroy_ordered_operations(root);
-
- btrfs_destroy_ordered_extents(root);
-
- btrfs_destroy_delayed_refs(t, root);
-
- btrfs_block_rsv_release(root,
- &root->fs_info->trans_block_rsv,
- t->dirty_pages.dirty_bytes);
-
- /* FIXME: cleanup wait for commit */
- t->in_commit = 1;
- t->blocked = 1;
- if (waitqueue_active(&root->fs_info->transaction_blocked_wait))
- wake_up(&root->fs_info->transaction_blocked_wait);
-
- t->blocked = 0;
- if (waitqueue_active(&root->fs_info->transaction_wait))
- wake_up(&root->fs_info->transaction_wait);
-
- t->commit_done = 1;
- if (waitqueue_active(&t->commit_wait))
- wake_up(&t->commit_wait);
-
- btrfs_destroy_pending_snapshots(t);
-
- btrfs_destroy_delalloc_inodes(root);
-
- spin_lock(&root->fs_info->trans_lock);
- root->fs_info->running_transaction = NULL;
- spin_unlock(&root->fs_info->trans_lock);
-
- btrfs_destroy_marked_extents(root, &t->dirty_pages,
- EXTENT_DIRTY);
-
- btrfs_destroy_pinned_extent(root,
- root->fs_info->pinned_extents);
-
- atomic_set(&t->use_count, 0);
- list_del_init(&t->list);
- memset(t, 0, sizeof(*t));
- kmem_cache_free(btrfs_transaction_cachep, t);
- }
-
- spin_lock(&root->fs_info->trans_lock);
- root->fs_info->trans_no_join = 0;
- spin_unlock(&root->fs_info->trans_lock);
- mutex_unlock(&root->fs_info->transaction_kthread_mutex);
-
- return 0;
-}
-
-static int btree_writepage_io_failed_hook(struct bio *bio, struct page *page,
- u64 start, u64 end,
- struct extent_state *state)
-{
- struct super_block *sb = page->mapping->host->i_sb;
- struct btrfs_fs_info *fs_info = btrfs_sb(sb);
- btrfs_error(fs_info, -EIO,
- "Error occured while writing out btree at %llu", start);
- return -EIO;
-}
-
-static struct extent_io_ops btree_extent_io_ops = {
- .write_cache_pages_lock_hook = btree_lock_page_hook,
- .readpage_end_io_hook = btree_readpage_end_io_hook,
- .readpage_io_failed_hook = btree_io_failed_hook,
- .submit_bio_hook = btree_submit_bio_hook,
- /* note we're sharing with inode.c for the merge bio hook */
- .merge_bio_hook = btrfs_merge_bio_hook,
- .writepage_io_failed_hook = btree_writepage_io_failed_hook,
-};
diff --git a/ANDROID_3.4.5/fs/btrfs/disk-io.h b/ANDROID_3.4.5/fs/btrfs/disk-io.h
deleted file mode 100644
index ab1830aa..00000000
--- a/ANDROID_3.4.5/fs/btrfs/disk-io.h
+++ /dev/null
@@ -1,106 +0,0 @@
-/*
- * Copyright (C) 2007 Oracle. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License v2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
- */
-
-#ifndef __DISKIO__
-#define __DISKIO__
-
-#define BTRFS_SUPER_INFO_OFFSET (64 * 1024)
-#define BTRFS_SUPER_INFO_SIZE 4096
-
-#define BTRFS_SUPER_MIRROR_MAX 3
-#define BTRFS_SUPER_MIRROR_SHIFT 12
-
-static inline u64 btrfs_sb_offset(int mirror)
-{
- u64 start = 16 * 1024;
- if (mirror)
- return start << (BTRFS_SUPER_MIRROR_SHIFT * mirror);
- return BTRFS_SUPER_INFO_OFFSET;
-}
-
-struct btrfs_device;
-struct btrfs_fs_devices;
-
-struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr,
- u32 blocksize, u64 parent_transid);
-int readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize,
- u64 parent_transid);
-int reada_tree_block_flagged(struct btrfs_root *root, u64 bytenr, u32 blocksize,
- int mirror_num, struct extent_buffer **eb);
-struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root,
- u64 bytenr, u32 blocksize);
-void clean_tree_block(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, struct extent_buffer *buf);
-int open_ctree(struct super_block *sb,
- struct btrfs_fs_devices *fs_devices,
- char *options);
-int close_ctree(struct btrfs_root *root);
-int write_ctree_super(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, int max_mirrors);
-struct buffer_head *btrfs_read_dev_super(struct block_device *bdev);
-int btrfs_commit_super(struct btrfs_root *root);
-int btrfs_error_commit_super(struct btrfs_root *root);
-struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root,
- u64 bytenr, u32 blocksize);
-struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root,
- struct btrfs_key *location);
-struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info,
- struct btrfs_key *location);
-int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info);
-void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr);
-void __btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr);
-void btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root);
-void btrfs_mark_buffer_dirty(struct extent_buffer *buf);
-int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid,
- int atomic);
-int btrfs_set_buffer_uptodate(struct extent_buffer *buf);
-int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid);
-u32 btrfs_csum_data(struct btrfs_root *root, char *data, u32 seed, size_t len);
-void btrfs_csum_final(u32 crc, char *result);
-int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
- int metadata);
-int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode,
- int rw, struct bio *bio, int mirror_num,
- unsigned long bio_flags, u64 bio_offset,
- extent_submit_bio_hook_t *submit_bio_start,
- extent_submit_bio_hook_t *submit_bio_done);
-unsigned long btrfs_async_submit_limit(struct btrfs_fs_info *info);
-int btrfs_write_tree_block(struct extent_buffer *buf);
-int btrfs_wait_tree_block_writeback(struct extent_buffer *buf);
-int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info);
-int btrfs_add_log_tree(struct btrfs_trans_handle *trans,
- struct btrfs_root *root);
-int btrfs_cleanup_transaction(struct btrfs_root *root);
-void btrfs_cleanup_one_transaction(struct btrfs_transaction *trans,
- struct btrfs_root *root);
-void btrfs_abort_devices(struct btrfs_root *root);
-
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-void btrfs_init_lockdep(void);
-void btrfs_set_buffer_lockdep_class(u64 objectid,
- struct extent_buffer *eb, int level);
-#else
-static inline void btrfs_init_lockdep(void)
-{ }
-static inline void btrfs_set_buffer_lockdep_class(u64 objectid,
- struct extent_buffer *eb, int level)
-{
-}
-#endif
-#endif
diff --git a/ANDROID_3.4.5/fs/btrfs/export.c b/ANDROID_3.4.5/fs/btrfs/export.c
deleted file mode 100644
index e887ee62..00000000
--- a/ANDROID_3.4.5/fs/btrfs/export.c
+++ /dev/null
@@ -1,317 +0,0 @@
-#include <linux/fs.h>
-#include <linux/types.h>
-#include "ctree.h"
-#include "disk-io.h"
-#include "btrfs_inode.h"
-#include "print-tree.h"
-#include "export.h"
-#include "compat.h"
-
-#define BTRFS_FID_SIZE_NON_CONNECTABLE (offsetof(struct btrfs_fid, \
- parent_objectid) / 4)
-#define BTRFS_FID_SIZE_CONNECTABLE (offsetof(struct btrfs_fid, \
- parent_root_objectid) / 4)
-#define BTRFS_FID_SIZE_CONNECTABLE_ROOT (sizeof(struct btrfs_fid) / 4)
-
-static int btrfs_encode_fh(struct dentry *dentry, u32 *fh, int *max_len,
- int connectable)
-{
- struct btrfs_fid *fid = (struct btrfs_fid *)fh;
- struct inode *inode = dentry->d_inode;
- int len = *max_len;
- int type;
-
- if (connectable && (len < BTRFS_FID_SIZE_CONNECTABLE)) {
- *max_len = BTRFS_FID_SIZE_CONNECTABLE;
- return 255;
- } else if (len < BTRFS_FID_SIZE_NON_CONNECTABLE) {
- *max_len = BTRFS_FID_SIZE_NON_CONNECTABLE;
- return 255;
- }
-
- len = BTRFS_FID_SIZE_NON_CONNECTABLE;
- type = FILEID_BTRFS_WITHOUT_PARENT;
-
- fid->objectid = btrfs_ino(inode);
- fid->root_objectid = BTRFS_I(inode)->root->objectid;
- fid->gen = inode->i_generation;
-
- if (connectable && !S_ISDIR(inode->i_mode)) {
- struct inode *parent;
- u64 parent_root_id;
-
- spin_lock(&dentry->d_lock);
-
- parent = dentry->d_parent->d_inode;
- fid->parent_objectid = BTRFS_I(parent)->location.objectid;
- fid->parent_gen = parent->i_generation;
- parent_root_id = BTRFS_I(parent)->root->objectid;
-
- spin_unlock(&dentry->d_lock);
-
- if (parent_root_id != fid->root_objectid) {
- fid->parent_root_objectid = parent_root_id;
- len = BTRFS_FID_SIZE_CONNECTABLE_ROOT;
- type = FILEID_BTRFS_WITH_PARENT_ROOT;
- } else {
- len = BTRFS_FID_SIZE_CONNECTABLE;
- type = FILEID_BTRFS_WITH_PARENT;
- }
- }
-
- *max_len = len;
- return type;
-}
-
-static struct dentry *btrfs_get_dentry(struct super_block *sb, u64 objectid,
- u64 root_objectid, u32 generation,
- int check_generation)
-{
- struct btrfs_fs_info *fs_info = btrfs_sb(sb);
- struct btrfs_root *root;
- struct inode *inode;
- struct btrfs_key key;
- int index;
- int err = 0;
-
- if (objectid < BTRFS_FIRST_FREE_OBJECTID)
- return ERR_PTR(-ESTALE);
-
- key.objectid = root_objectid;
- btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
- key.offset = (u64)-1;
-
- index = srcu_read_lock(&fs_info->subvol_srcu);
-
- root = btrfs_read_fs_root_no_name(fs_info, &key);
- if (IS_ERR(root)) {
- err = PTR_ERR(root);
- goto fail;
- }
-
- if (btrfs_root_refs(&root->root_item) == 0) {
- err = -ENOENT;
- goto fail;
- }
-
- key.objectid = objectid;
- btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
- key.offset = 0;
-
- inode = btrfs_iget(sb, &key, root, NULL);
- if (IS_ERR(inode)) {
- err = PTR_ERR(inode);
- goto fail;
- }
-
- srcu_read_unlock(&fs_info->subvol_srcu, index);
-
- if (check_generation && generation != inode->i_generation) {
- iput(inode);
- return ERR_PTR(-ESTALE);
- }
-
- return d_obtain_alias(inode);
-fail:
- srcu_read_unlock(&fs_info->subvol_srcu, index);
- return ERR_PTR(err);
-}
-
-static struct dentry *btrfs_fh_to_parent(struct super_block *sb, struct fid *fh,
- int fh_len, int fh_type)
-{
- struct btrfs_fid *fid = (struct btrfs_fid *) fh;
- u64 objectid, root_objectid;
- u32 generation;
-
- if (fh_type == FILEID_BTRFS_WITH_PARENT) {
- if (fh_len != BTRFS_FID_SIZE_CONNECTABLE)
- return NULL;
- root_objectid = fid->root_objectid;
- } else if (fh_type == FILEID_BTRFS_WITH_PARENT_ROOT) {
- if (fh_len != BTRFS_FID_SIZE_CONNECTABLE_ROOT)
- return NULL;
- root_objectid = fid->parent_root_objectid;
- } else
- return NULL;
-
- objectid = fid->parent_objectid;
- generation = fid->parent_gen;
-
- return btrfs_get_dentry(sb, objectid, root_objectid, generation, 1);
-}
-
-static struct dentry *btrfs_fh_to_dentry(struct super_block *sb, struct fid *fh,
- int fh_len, int fh_type)
-{
- struct btrfs_fid *fid = (struct btrfs_fid *) fh;
- u64 objectid, root_objectid;
- u32 generation;
-
- if ((fh_type != FILEID_BTRFS_WITH_PARENT ||
- fh_len != BTRFS_FID_SIZE_CONNECTABLE) &&
- (fh_type != FILEID_BTRFS_WITH_PARENT_ROOT ||
- fh_len != BTRFS_FID_SIZE_CONNECTABLE_ROOT) &&
- (fh_type != FILEID_BTRFS_WITHOUT_PARENT ||
- fh_len != BTRFS_FID_SIZE_NON_CONNECTABLE))
- return NULL;
-
- objectid = fid->objectid;
- root_objectid = fid->root_objectid;
- generation = fid->gen;
-
- return btrfs_get_dentry(sb, objectid, root_objectid, generation, 1);
-}
-
-static struct dentry *btrfs_get_parent(struct dentry *child)
-{
- struct inode *dir = child->d_inode;
- struct btrfs_root *root = BTRFS_I(dir)->root;
- struct btrfs_path *path;
- struct extent_buffer *leaf;
- struct btrfs_root_ref *ref;
- struct btrfs_key key;
- struct btrfs_key found_key;
- int ret;
-
- path = btrfs_alloc_path();
- if (!path)
- return ERR_PTR(-ENOMEM);
-
- if (btrfs_ino(dir) == BTRFS_FIRST_FREE_OBJECTID) {
- key.objectid = root->root_key.objectid;
- key.type = BTRFS_ROOT_BACKREF_KEY;
- key.offset = (u64)-1;
- root = root->fs_info->tree_root;
- } else {
- key.objectid = btrfs_ino(dir);
- key.type = BTRFS_INODE_REF_KEY;
- key.offset = (u64)-1;
- }
-
- ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
- if (ret < 0)
- goto fail;
-
- BUG_ON(ret == 0); /* Key with offset of -1 found */
- if (path->slots[0] == 0) {
- ret = -ENOENT;
- goto fail;
- }
-
- path->slots[0]--;
- leaf = path->nodes[0];
-
- btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
- if (found_key.objectid != key.objectid || found_key.type != key.type) {
- ret = -ENOENT;
- goto fail;
- }
-
- if (found_key.type == BTRFS_ROOT_BACKREF_KEY) {
- ref = btrfs_item_ptr(leaf, path->slots[0],
- struct btrfs_root_ref);
- key.objectid = btrfs_root_ref_dirid(leaf, ref);
- } else {
- key.objectid = found_key.offset;
- }
- btrfs_free_path(path);
-
- if (found_key.type == BTRFS_ROOT_BACKREF_KEY) {
- return btrfs_get_dentry(root->fs_info->sb, key.objectid,
- found_key.offset, 0, 0);
- }
-
- key.type = BTRFS_INODE_ITEM_KEY;
- key.offset = 0;
- return d_obtain_alias(btrfs_iget(root->fs_info->sb, &key, root, NULL));
-fail:
- btrfs_free_path(path);
- return ERR_PTR(ret);
-}
-
-static int btrfs_get_name(struct dentry *parent, char *name,
- struct dentry *child)
-{
- struct inode *inode = child->d_inode;
- struct inode *dir = parent->d_inode;
- struct btrfs_path *path;
- struct btrfs_root *root = BTRFS_I(dir)->root;
- struct btrfs_inode_ref *iref;
- struct btrfs_root_ref *rref;
- struct extent_buffer *leaf;
- unsigned long name_ptr;
- struct btrfs_key key;
- int name_len;
- int ret;
- u64 ino;
-
- if (!dir || !inode)
- return -EINVAL;
-
- if (!S_ISDIR(dir->i_mode))
- return -EINVAL;
-
- ino = btrfs_ino(inode);
-
- path = btrfs_alloc_path();
- if (!path)
- return -ENOMEM;
- path->leave_spinning = 1;
-
- if (ino == BTRFS_FIRST_FREE_OBJECTID) {
- key.objectid = BTRFS_I(inode)->root->root_key.objectid;
- key.type = BTRFS_ROOT_BACKREF_KEY;
- key.offset = (u64)-1;
- root = root->fs_info->tree_root;
- } else {
- key.objectid = ino;
- key.offset = btrfs_ino(dir);
- key.type = BTRFS_INODE_REF_KEY;
- }
-
- ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
- if (ret < 0) {
- btrfs_free_path(path);
- return ret;
- } else if (ret > 0) {
- if (ino == BTRFS_FIRST_FREE_OBJECTID) {
- path->slots[0]--;
- } else {
- btrfs_free_path(path);
- return -ENOENT;
- }
- }
- leaf = path->nodes[0];
-
- if (ino == BTRFS_FIRST_FREE_OBJECTID) {
- rref = btrfs_item_ptr(leaf, path->slots[0],
- struct btrfs_root_ref);
- name_ptr = (unsigned long)(rref + 1);
- name_len = btrfs_root_ref_name_len(leaf, rref);
- } else {
- iref = btrfs_item_ptr(leaf, path->slots[0],
- struct btrfs_inode_ref);
- name_ptr = (unsigned long)(iref + 1);
- name_len = btrfs_inode_ref_name_len(leaf, iref);
- }
-
- read_extent_buffer(leaf, name, name_ptr, name_len);
- btrfs_free_path(path);
-
- /*
- * have to add the null termination to make sure that reconnect_path
- * gets the right len for strlen
- */
- name[name_len] = '\0';
-
- return 0;
-}
-
-const struct export_operations btrfs_export_ops = {
- .encode_fh = btrfs_encode_fh,
- .fh_to_dentry = btrfs_fh_to_dentry,
- .fh_to_parent = btrfs_fh_to_parent,
- .get_parent = btrfs_get_parent,
- .get_name = btrfs_get_name,
-};
diff --git a/ANDROID_3.4.5/fs/btrfs/export.h b/ANDROID_3.4.5/fs/btrfs/export.h
deleted file mode 100644
index 074348a9..00000000
--- a/ANDROID_3.4.5/fs/btrfs/export.h
+++ /dev/null
@@ -1,19 +0,0 @@
-#ifndef BTRFS_EXPORT_H
-#define BTRFS_EXPORT_H
-
-#include <linux/exportfs.h>
-
-extern const struct export_operations btrfs_export_ops;
-
-struct btrfs_fid {
- u64 objectid;
- u64 root_objectid;
- u32 gen;
-
- u64 parent_objectid;
- u32 parent_gen;
-
- u64 parent_root_objectid;
-} __attribute__ ((packed));
-
-#endif
diff --git a/ANDROID_3.4.5/fs/btrfs/extent-tree.c b/ANDROID_3.4.5/fs/btrfs/extent-tree.c
deleted file mode 100644
index 49fd7b66..00000000
--- a/ANDROID_3.4.5/fs/btrfs/extent-tree.c
+++ /dev/null
@@ -1,8025 +0,0 @@
-/*
- * Copyright (C) 2007 Oracle. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License v2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
- */
-#include <linux/sched.h>
-#include <linux/pagemap.h>
-#include <linux/writeback.h>
-#include <linux/blkdev.h>
-#include <linux/sort.h>
-#include <linux/rcupdate.h>
-#include <linux/kthread.h>
-#include <linux/slab.h>
-#include <linux/ratelimit.h>
-#include "compat.h"
-#include "hash.h"
-#include "ctree.h"
-#include "disk-io.h"
-#include "print-tree.h"
-#include "transaction.h"
-#include "volumes.h"
-#include "locking.h"
-#include "free-space-cache.h"
-
-/*
- * control flags for do_chunk_alloc's force field
- * CHUNK_ALLOC_NO_FORCE means to only allocate a chunk
- * if we really need one.
- *
- * CHUNK_ALLOC_LIMITED means to only try and allocate one
- * if we have very few chunks already allocated. This is
- * used as part of the clustering code to help make sure
- * we have a good pool of storage to cluster in, without
- * filling the FS with empty chunks
- *
- * CHUNK_ALLOC_FORCE means it must try to allocate one
- *
- */
-enum {
- CHUNK_ALLOC_NO_FORCE = 0,
- CHUNK_ALLOC_LIMITED = 1,
- CHUNK_ALLOC_FORCE = 2,
-};
-
-/*
- * Control how reservations are dealt with.
- *
- * RESERVE_FREE - freeing a reservation.
- * RESERVE_ALLOC - allocating space and we need to update bytes_may_use for
- * ENOSPC accounting
- * RESERVE_ALLOC_NO_ACCOUNT - allocating space and we should not update
- * bytes_may_use as the ENOSPC accounting is done elsewhere
- */
-enum {
- RESERVE_FREE = 0,
- RESERVE_ALLOC = 1,
- RESERVE_ALLOC_NO_ACCOUNT = 2,
-};
-
-static int update_block_group(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- u64 bytenr, u64 num_bytes, int alloc);
-static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- u64 bytenr, u64 num_bytes, u64 parent,
- u64 root_objectid, u64 owner_objectid,
- u64 owner_offset, int refs_to_drop,
- struct btrfs_delayed_extent_op *extra_op);
-static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op,
- struct extent_buffer *leaf,
- struct btrfs_extent_item *ei);
-static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- u64 parent, u64 root_objectid,
- u64 flags, u64 owner, u64 offset,
- struct btrfs_key *ins, int ref_mod);
-static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- u64 parent, u64 root_objectid,
- u64 flags, struct btrfs_disk_key *key,
- int level, struct btrfs_key *ins);
-static int do_chunk_alloc(struct btrfs_trans_handle *trans,
- struct btrfs_root *extent_root, u64 alloc_bytes,
- u64 flags, int force);
-static int find_next_key(struct btrfs_path *path, int level,
- struct btrfs_key *key);
-static void dump_space_info(struct btrfs_space_info *info, u64 bytes,
- int dump_block_groups);
-static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache,
- u64 num_bytes, int reserve);
-
-static noinline int
-block_group_cache_done(struct btrfs_block_group_cache *cache)
-{
- smp_mb();
- return cache->cached == BTRFS_CACHE_FINISHED;
-}
-
-static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits)
-{
- return (cache->flags & bits) == bits;
-}
-
-static void btrfs_get_block_group(struct btrfs_block_group_cache *cache)
-{
- atomic_inc(&cache->count);
-}
-
-void btrfs_put_block_group(struct btrfs_block_group_cache *cache)
-{
- if (atomic_dec_and_test(&cache->count)) {
- WARN_ON(cache->pinned > 0);
- WARN_ON(cache->reserved > 0);
- kfree(cache->free_space_ctl);
- kfree(cache);
- }
-}
-
-/*
- * this adds the block group to the fs_info rb tree for the block group
- * cache
- */
-static int btrfs_add_block_group_cache(struct btrfs_fs_info *info,
- struct btrfs_block_group_cache *block_group)
-{
- struct rb_node **p;
- struct rb_node *parent = NULL;
- struct btrfs_block_group_cache *cache;
-
- spin_lock(&info->block_group_cache_lock);
- p = &info->block_group_cache_tree.rb_node;
-
- while (*p) {
- parent = *p;
- cache = rb_entry(parent, struct btrfs_block_group_cache,
- cache_node);
- if (block_group->key.objectid < cache->key.objectid) {
- p = &(*p)->rb_left;
- } else if (block_group->key.objectid > cache->key.objectid) {
- p = &(*p)->rb_right;
- } else {
- spin_unlock(&info->block_group_cache_lock);
- return -EEXIST;
- }
- }
-
- rb_link_node(&block_group->cache_node, parent, p);
- rb_insert_color(&block_group->cache_node,
- &info->block_group_cache_tree);
- spin_unlock(&info->block_group_cache_lock);
-
- return 0;
-}
-
-/*
- * This will return the block group at or after bytenr if contains is 0, else
- * it will return the block group that contains the bytenr
- */
-static struct btrfs_block_group_cache *
-block_group_cache_tree_search(struct btrfs_fs_info *info, u64 bytenr,
- int contains)
-{
- struct btrfs_block_group_cache *cache, *ret = NULL;
- struct rb_node *n;
- u64 end, start;
-
- spin_lock(&info->block_group_cache_lock);
- n = info->block_group_cache_tree.rb_node;
-
- while (n) {
- cache = rb_entry(n, struct btrfs_block_group_cache,
- cache_node);
- end = cache->key.objectid + cache->key.offset - 1;
- start = cache->key.objectid;
-
- if (bytenr < start) {
- if (!contains && (!ret || start < ret->key.objectid))
- ret = cache;
- n = n->rb_left;
- } else if (bytenr > start) {
- if (contains && bytenr <= end) {
- ret = cache;
- break;
- }
- n = n->rb_right;
- } else {
- ret = cache;
- break;
- }
- }
- if (ret)
- btrfs_get_block_group(ret);
- spin_unlock(&info->block_group_cache_lock);
-
- return ret;
-}
-
-static int add_excluded_extent(struct btrfs_root *root,
- u64 start, u64 num_bytes)
-{
- u64 end = start + num_bytes - 1;
- set_extent_bits(&root->fs_info->freed_extents[0],
- start, end, EXTENT_UPTODATE, GFP_NOFS);
- set_extent_bits(&root->fs_info->freed_extents[1],
- start, end, EXTENT_UPTODATE, GFP_NOFS);
- return 0;
-}
-
-static void free_excluded_extents(struct btrfs_root *root,
- struct btrfs_block_group_cache *cache)
-{
- u64 start, end;
-
- start = cache->key.objectid;
- end = start + cache->key.offset - 1;
-
- clear_extent_bits(&root->fs_info->freed_extents[0],
- start, end, EXTENT_UPTODATE, GFP_NOFS);
- clear_extent_bits(&root->fs_info->freed_extents[1],
- start, end, EXTENT_UPTODATE, GFP_NOFS);
-}
-
-static int exclude_super_stripes(struct btrfs_root *root,
- struct btrfs_block_group_cache *cache)
-{
- u64 bytenr;
- u64 *logical;
- int stripe_len;
- int i, nr, ret;
-
- if (cache->key.objectid < BTRFS_SUPER_INFO_OFFSET) {
- stripe_len = BTRFS_SUPER_INFO_OFFSET - cache->key.objectid;
- cache->bytes_super += stripe_len;
- ret = add_excluded_extent(root, cache->key.objectid,
- stripe_len);
- BUG_ON(ret); /* -ENOMEM */
- }
-
- for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
- bytenr = btrfs_sb_offset(i);
- ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
- cache->key.objectid, bytenr,
- 0, &logical, &nr, &stripe_len);
- BUG_ON(ret); /* -ENOMEM */
-
- while (nr--) {
- cache->bytes_super += stripe_len;
- ret = add_excluded_extent(root, logical[nr],
- stripe_len);
- BUG_ON(ret); /* -ENOMEM */
- }
-
- kfree(logical);
- }
- return 0;
-}
-
-static struct btrfs_caching_control *
-get_caching_control(struct btrfs_block_group_cache *cache)
-{
- struct btrfs_caching_control *ctl;
-
- spin_lock(&cache->lock);
- if (cache->cached != BTRFS_CACHE_STARTED) {
- spin_unlock(&cache->lock);
- return NULL;
- }
-
- /* We're loading it the fast way, so we don't have a caching_ctl. */
- if (!cache->caching_ctl) {
- spin_unlock(&cache->lock);
- return NULL;
- }
-
- ctl = cache->caching_ctl;
- atomic_inc(&ctl->count);
- spin_unlock(&cache->lock);
- return ctl;
-}
-
-static void put_caching_control(struct btrfs_caching_control *ctl)
-{
- if (atomic_dec_and_test(&ctl->count))
- kfree(ctl);
-}
-
-/*
- * this is only called by cache_block_group, since we could have freed extents
- * we need to check the pinned_extents for any extents that can't be used yet
- * since their free space will be released as soon as the transaction commits.
- */
-static u64 add_new_free_space(struct btrfs_block_group_cache *block_group,
- struct btrfs_fs_info *info, u64 start, u64 end)
-{
- u64 extent_start, extent_end, size, total_added = 0;
- int ret;
-
- while (start < end) {
- ret = find_first_extent_bit(info->pinned_extents, start,
- &extent_start, &extent_end,
- EXTENT_DIRTY | EXTENT_UPTODATE);
- if (ret)
- break;
-
- if (extent_start <= start) {
- start = extent_end + 1;
- } else if (extent_start > start && extent_start < end) {
- size = extent_start - start;
- total_added += size;
- ret = btrfs_add_free_space(block_group, start,
- size);
- BUG_ON(ret); /* -ENOMEM or logic error */
- start = extent_end + 1;
- } else {
- break;
- }
- }
-
- if (start < end) {
- size = end - start;
- total_added += size;
- ret = btrfs_add_free_space(block_group, start, size);
- BUG_ON(ret); /* -ENOMEM or logic error */
- }
-
- return total_added;
-}
-
-static noinline void caching_thread(struct btrfs_work *work)
-{
- struct btrfs_block_group_cache *block_group;
- struct btrfs_fs_info *fs_info;
- struct btrfs_caching_control *caching_ctl;
- struct btrfs_root *extent_root;
- struct btrfs_path *path;
- struct extent_buffer *leaf;
- struct btrfs_key key;
- u64 total_found = 0;
- u64 last = 0;
- u32 nritems;
- int ret = 0;
-
- caching_ctl = container_of(work, struct btrfs_caching_control, work);
- block_group = caching_ctl->block_group;
- fs_info = block_group->fs_info;
- extent_root = fs_info->extent_root;
-
- path = btrfs_alloc_path();
- if (!path)
- goto out;
-
- last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET);
-
- /*
- * We don't want to deadlock with somebody trying to allocate a new
- * extent for the extent root while also trying to search the extent
- * root to add free space. So we skip locking and search the commit
- * root, since its read-only
- */
- path->skip_locking = 1;
- path->search_commit_root = 1;
- path->reada = 1;
-
- key.objectid = last;
- key.offset = 0;
- key.type = BTRFS_EXTENT_ITEM_KEY;
-again:
- mutex_lock(&caching_ctl->mutex);
- /* need to make sure the commit_root doesn't disappear */
- down_read(&fs_info->extent_commit_sem);
-
- ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
- if (ret < 0)
- goto err;
-
- leaf = path->nodes[0];
- nritems = btrfs_header_nritems(leaf);
-
- while (1) {
- if (btrfs_fs_closing(fs_info) > 1) {
- last = (u64)-1;
- break;
- }
-
- if (path->slots[0] < nritems) {
- btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
- } else {
- ret = find_next_key(path, 0, &key);
- if (ret)
- break;
-
- if (need_resched() ||
- btrfs_next_leaf(extent_root, path)) {
- caching_ctl->progress = last;
- btrfs_release_path(path);
- up_read(&fs_info->extent_commit_sem);
- mutex_unlock(&caching_ctl->mutex);
- cond_resched();
- goto again;
- }
- leaf = path->nodes[0];
- nritems = btrfs_header_nritems(leaf);
- continue;
- }
-
- if (key.objectid < block_group->key.objectid) {
- path->slots[0]++;
- continue;
- }
-
- if (key.objectid >= block_group->key.objectid +
- block_group->key.offset)
- break;
-
- if (key.type == BTRFS_EXTENT_ITEM_KEY) {
- total_found += add_new_free_space(block_group,
- fs_info, last,
- key.objectid);
- last = key.objectid + key.offset;
-
- if (total_found > (1024 * 1024 * 2)) {
- total_found = 0;
- wake_up(&caching_ctl->wait);
- }
- }
- path->slots[0]++;
- }
- ret = 0;
-
- total_found += add_new_free_space(block_group, fs_info, last,
- block_group->key.objectid +
- block_group->key.offset);
- caching_ctl->progress = (u64)-1;
-
- spin_lock(&block_group->lock);
- block_group->caching_ctl = NULL;
- block_group->cached = BTRFS_CACHE_FINISHED;
- spin_unlock(&block_group->lock);
-
-err:
- btrfs_free_path(path);
- up_read(&fs_info->extent_commit_sem);
-
- free_excluded_extents(extent_root, block_group);
-
- mutex_unlock(&caching_ctl->mutex);
-out:
- wake_up(&caching_ctl->wait);
-
- put_caching_control(caching_ctl);
- btrfs_put_block_group(block_group);
-}
-
-static int cache_block_group(struct btrfs_block_group_cache *cache,
- struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- int load_cache_only)
-{
- DEFINE_WAIT(wait);
- struct btrfs_fs_info *fs_info = cache->fs_info;
- struct btrfs_caching_control *caching_ctl;
- int ret = 0;
-
- caching_ctl = kzalloc(sizeof(*caching_ctl), GFP_NOFS);
- if (!caching_ctl)
- return -ENOMEM;
-
- INIT_LIST_HEAD(&caching_ctl->list);
- mutex_init(&caching_ctl->mutex);
- init_waitqueue_head(&caching_ctl->wait);
- caching_ctl->block_group = cache;
- caching_ctl->progress = cache->key.objectid;
- atomic_set(&caching_ctl->count, 1);
- caching_ctl->work.func = caching_thread;
-
- spin_lock(&cache->lock);
- /*
- * This should be a rare occasion, but this could happen I think in the
- * case where one thread starts to load the space cache info, and then
- * some other thread starts a transaction commit which tries to do an
- * allocation while the other thread is still loading the space cache
- * info. The previous loop should have kept us from choosing this block
- * group, but if we've moved to the state where we will wait on caching
- * block groups we need to first check if we're doing a fast load here,
- * so we can wait for it to finish, otherwise we could end up allocating
- * from a block group who's cache gets evicted for one reason or
- * another.
- */
- while (cache->cached == BTRFS_CACHE_FAST) {
- struct btrfs_caching_control *ctl;
-
- ctl = cache->caching_ctl;
- atomic_inc(&ctl->count);
- prepare_to_wait(&ctl->wait, &wait, TASK_UNINTERRUPTIBLE);
- spin_unlock(&cache->lock);
-
- schedule();
-
- finish_wait(&ctl->wait, &wait);
- put_caching_control(ctl);
- spin_lock(&cache->lock);
- }
-
- if (cache->cached != BTRFS_CACHE_NO) {
- spin_unlock(&cache->lock);
- kfree(caching_ctl);
- return 0;
- }
- WARN_ON(cache->caching_ctl);
- cache->caching_ctl = caching_ctl;
- cache->cached = BTRFS_CACHE_FAST;
- spin_unlock(&cache->lock);
-
- /*
- * We can't do the read from on-disk cache during a commit since we need
- * to have the normal tree locking. Also if we are currently trying to
- * allocate blocks for the tree root we can't do the fast caching since
- * we likely hold important locks.
- */
- if (fs_info->mount_opt & BTRFS_MOUNT_SPACE_CACHE) {
- ret = load_free_space_cache(fs_info, cache);
-
- spin_lock(&cache->lock);
- if (ret == 1) {
- cache->caching_ctl = NULL;
- cache->cached = BTRFS_CACHE_FINISHED;
- cache->last_byte_to_unpin = (u64)-1;
- } else {
- if (load_cache_only) {
- cache->caching_ctl = NULL;
- cache->cached = BTRFS_CACHE_NO;
- } else {
- cache->cached = BTRFS_CACHE_STARTED;
- }
- }
- spin_unlock(&cache->lock);
- wake_up(&caching_ctl->wait);
- if (ret == 1) {
- put_caching_control(caching_ctl);
- free_excluded_extents(fs_info->extent_root, cache);
- return 0;
- }
- } else {
- /*
- * We are not going to do the fast caching, set cached to the
- * appropriate value and wakeup any waiters.
- */
- spin_lock(&cache->lock);
- if (load_cache_only) {
- cache->caching_ctl = NULL;
- cache->cached = BTRFS_CACHE_NO;
- } else {
- cache->cached = BTRFS_CACHE_STARTED;
- }
- spin_unlock(&cache->lock);
- wake_up(&caching_ctl->wait);
- }
-
- if (load_cache_only) {
- put_caching_control(caching_ctl);
- return 0;
- }
-
- down_write(&fs_info->extent_commit_sem);
- atomic_inc(&caching_ctl->count);
- list_add_tail(&caching_ctl->list, &fs_info->caching_block_groups);
- up_write(&fs_info->extent_commit_sem);
-
- btrfs_get_block_group(cache);
-
- btrfs_queue_worker(&fs_info->caching_workers, &caching_ctl->work);
-
- return ret;
-}
-
-/*
- * return the block group that starts at or after bytenr
- */
-static struct btrfs_block_group_cache *
-btrfs_lookup_first_block_group(struct btrfs_fs_info *info, u64 bytenr)
-{
- struct btrfs_block_group_cache *cache;
-
- cache = block_group_cache_tree_search(info, bytenr, 0);
-
- return cache;
-}
-
-/*
- * return the block group that contains the given bytenr
- */
-struct btrfs_block_group_cache *btrfs_lookup_block_group(
- struct btrfs_fs_info *info,
- u64 bytenr)
-{
- struct btrfs_block_group_cache *cache;
-
- cache = block_group_cache_tree_search(info, bytenr, 1);
-
- return cache;
-}
-
-static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info,
- u64 flags)
-{
- struct list_head *head = &info->space_info;
- struct btrfs_space_info *found;
-
- flags &= BTRFS_BLOCK_GROUP_TYPE_MASK;
-
- rcu_read_lock();
- list_for_each_entry_rcu(found, head, list) {
- if (found->flags & flags) {
- rcu_read_unlock();
- return found;
- }
- }
- rcu_read_unlock();
- return NULL;
-}
-
-/*
- * after adding space to the filesystem, we need to clear the full flags
- * on all the space infos.
- */
-void btrfs_clear_space_info_full(struct btrfs_fs_info *info)
-{
- struct list_head *head = &info->space_info;
- struct btrfs_space_info *found;
-
- rcu_read_lock();
- list_for_each_entry_rcu(found, head, list)
- found->full = 0;
- rcu_read_unlock();
-}
-
-static u64 div_factor(u64 num, int factor)
-{
- if (factor == 10)
- return num;
- num *= factor;
- do_div(num, 10);
- return num;
-}
-
-static u64 div_factor_fine(u64 num, int factor)
-{
- if (factor == 100)
- return num;
- num *= factor;
- do_div(num, 100);
- return num;
-}
-
-u64 btrfs_find_block_group(struct btrfs_root *root,
- u64 search_start, u64 search_hint, int owner)
-{
- struct btrfs_block_group_cache *cache;
- u64 used;
- u64 last = max(search_hint, search_start);
- u64 group_start = 0;
- int full_search = 0;
- int factor = 9;
- int wrapped = 0;
-again:
- while (1) {
- cache = btrfs_lookup_first_block_group(root->fs_info, last);
- if (!cache)
- break;
-
- spin_lock(&cache->lock);
- last = cache->key.objectid + cache->key.offset;
- used = btrfs_block_group_used(&cache->item);
-
- if ((full_search || !cache->ro) &&
- block_group_bits(cache, BTRFS_BLOCK_GROUP_METADATA)) {
- if (used + cache->pinned + cache->reserved <
- div_factor(cache->key.offset, factor)) {
- group_start = cache->key.objectid;
- spin_unlock(&cache->lock);
- btrfs_put_block_group(cache);
- goto found;
- }
- }
- spin_unlock(&cache->lock);
- btrfs_put_block_group(cache);
- cond_resched();
- }
- if (!wrapped) {
- last = search_start;
- wrapped = 1;
- goto again;
- }
- if (!full_search && factor < 10) {
- last = search_start;
- full_search = 1;
- factor = 10;
- goto again;
- }
-found:
- return group_start;
-}
-
-/* simple helper to search for an existing extent at a given offset */
-int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len)
-{
- int ret;
- struct btrfs_key key;
- struct btrfs_path *path;
-
- path = btrfs_alloc_path();
- if (!path)
- return -ENOMEM;
-
- key.objectid = start;
- key.offset = len;
- btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
- ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
- 0, 0);
- btrfs_free_path(path);
- return ret;
-}
-
-/*
- * helper function to lookup reference count and flags of extent.
- *
- * the head node for delayed ref is used to store the sum of all the
- * reference count modifications queued up in the rbtree. the head
- * node may also store the extent flags to set. This way you can check
- * to see what the reference count and extent flags would be if all of
- * the delayed refs are not processed.
- */
-int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, u64 bytenr,
- u64 num_bytes, u64 *refs, u64 *flags)
-{
- struct btrfs_delayed_ref_head *head;
- struct btrfs_delayed_ref_root *delayed_refs;
- struct btrfs_path *path;
- struct btrfs_extent_item *ei;
- struct extent_buffer *leaf;
- struct btrfs_key key;
- u32 item_size;
- u64 num_refs;
- u64 extent_flags;
- int ret;
-
- path = btrfs_alloc_path();
- if (!path)
- return -ENOMEM;
-
- key.objectid = bytenr;
- key.type = BTRFS_EXTENT_ITEM_KEY;
- key.offset = num_bytes;
- if (!trans) {
- path->skip_locking = 1;
- path->search_commit_root = 1;
- }
-again:
- ret = btrfs_search_slot(trans, root->fs_info->extent_root,
- &key, path, 0, 0);
- if (ret < 0)
- goto out_free;
-
- if (ret == 0) {
- leaf = path->nodes[0];
- item_size = btrfs_item_size_nr(leaf, path->slots[0]);
- if (item_size >= sizeof(*ei)) {
- ei = btrfs_item_ptr(leaf, path->slots[0],
- struct btrfs_extent_item);
- num_refs = btrfs_extent_refs(leaf, ei);
- extent_flags = btrfs_extent_flags(leaf, ei);
- } else {
-#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
- struct btrfs_extent_item_v0 *ei0;
- BUG_ON(item_size != sizeof(*ei0));
- ei0 = btrfs_item_ptr(leaf, path->slots[0],
- struct btrfs_extent_item_v0);
- num_refs = btrfs_extent_refs_v0(leaf, ei0);
- /* FIXME: this isn't correct for data */
- extent_flags = BTRFS_BLOCK_FLAG_FULL_BACKREF;
-#else
- BUG();
-#endif
- }
- BUG_ON(num_refs == 0);
- } else {
- num_refs = 0;
- extent_flags = 0;
- ret = 0;
- }
-
- if (!trans)
- goto out;
-
- delayed_refs = &trans->transaction->delayed_refs;
- spin_lock(&delayed_refs->lock);
- head = btrfs_find_delayed_ref_head(trans, bytenr);
- if (head) {
- if (!mutex_trylock(&head->mutex)) {
- atomic_inc(&head->node.refs);
- spin_unlock(&delayed_refs->lock);
-
- btrfs_release_path(path);
-
- /*
- * Mutex was contended, block until it's released and try
- * again
- */
- mutex_lock(&head->mutex);
- mutex_unlock(&head->mutex);
- btrfs_put_delayed_ref(&head->node);
- goto again;
- }
- if (head->extent_op && head->extent_op->update_flags)
- extent_flags |= head->extent_op->flags_to_set;
- else
- BUG_ON(num_refs == 0);
-
- num_refs += head->node.ref_mod;
- mutex_unlock(&head->mutex);
- }
- spin_unlock(&delayed_refs->lock);
-out:
- WARN_ON(num_refs == 0);
- if (refs)
- *refs = num_refs;
- if (flags)
- *flags = extent_flags;
-out_free:
- btrfs_free_path(path);
- return ret;
-}
-
-/*
- * Back reference rules. Back refs have three main goals:
- *
- * 1) differentiate between all holders of references to an extent so that
- * when a reference is dropped we can make sure it was a valid reference
- * before freeing the extent.
- *
- * 2) Provide enough information to quickly find the holders of an extent
- * if we notice a given block is corrupted or bad.
- *
- * 3) Make it easy to migrate blocks for FS shrinking or storage pool
- * maintenance. This is actually the same as #2, but with a slightly
- * different use case.
- *
- * There are two kinds of back refs. The implicit back refs is optimized
- * for pointers in non-shared tree blocks. For a given pointer in a block,
- * back refs of this kind provide information about the block's owner tree
- * and the pointer's key. These information allow us to find the block by
- * b-tree searching. The full back refs is for pointers in tree blocks not
- * referenced by their owner trees. The location of tree block is recorded
- * in the back refs. Actually the full back refs is generic, and can be
- * used in all cases the implicit back refs is used. The major shortcoming
- * of the full back refs is its overhead. Every time a tree block gets
- * COWed, we have to update back refs entry for all pointers in it.
- *
- * For a newly allocated tree block, we use implicit back refs for
- * pointers in it. This means most tree related operations only involve
- * implicit back refs. For a tree block created in old transaction, the
- * only way to drop a reference to it is COW it. So we can detect the
- * event that tree block loses its owner tree's reference and do the
- * back refs conversion.
- *
- * When a tree block is COW'd through a tree, there are four cases:
- *
- * The reference count of the block is one and the tree is the block's
- * owner tree. Nothing to do in this case.
- *
- * The reference count of the block is one and the tree is not the
- * block's owner tree. In this case, full back refs is used for pointers
- * in the block. Remove these full back refs, add implicit back refs for
- * every pointers in the new block.
- *
- * The reference count of the block is greater than one and the tree is
- * the block's owner tree. In this case, implicit back refs is used for
- * pointers in the block. Add full back refs for every pointers in the
- * block, increase lower level extents' reference counts. The original
- * implicit back refs are entailed to the new block.
- *
- * The reference count of the block is greater than one and the tree is
- * not the block's owner tree. Add implicit back refs for every pointer in
- * the new block, increase lower level extents' reference count.
- *
- * Back Reference Key composing:
- *
- * The key objectid corresponds to the first byte in the extent,
- * The key type is used to differentiate between types of back refs.
- * There are different meanings of the key offset for different types
- * of back refs.
- *
- * File extents can be referenced by:
- *
- * - multiple snapshots, subvolumes, or different generations in one subvol
- * - different files inside a single subvolume
- * - different offsets inside a file (bookend extents in file.c)
- *
- * The extent ref structure for the implicit back refs has fields for:
- *
- * - Objectid of the subvolume root
- * - objectid of the file holding the reference
- * - original offset in the file
- * - how many bookend extents
- *
- * The key offset for the implicit back refs is hash of the first
- * three fields.
- *
- * The extent ref structure for the full back refs has field for:
- *
- * - number of pointers in the tree leaf
- *
- * The key offset for the implicit back refs is the first byte of
- * the tree leaf
- *
- * When a file extent is allocated, The implicit back refs is used.
- * the fields are filled in:
- *
- * (root_key.objectid, inode objectid, offset in file, 1)
- *
- * When a file extent is removed file truncation, we find the
- * corresponding implicit back refs and check the following fields:
- *
- * (btrfs_header_owner(leaf), inode objectid, offset in file)
- *
- * Btree extents can be referenced by:
- *
- * - Different subvolumes
- *
- * Both the implicit back refs and the full back refs for tree blocks
- * only consist of key. The key offset for the implicit back refs is
- * objectid of block's owner tree. The key offset for the full back refs
- * is the first byte of parent block.
- *
- * When implicit back refs is used, information about the lowest key and
- * level of the tree block are required. These information are stored in
- * tree block info structure.
- */
-
-#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
-static int convert_extent_item_v0(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_path *path,
- u64 owner, u32 extra_size)
-{
- struct btrfs_extent_item *item;
- struct btrfs_extent_item_v0 *ei0;
- struct btrfs_extent_ref_v0 *ref0;
- struct btrfs_tree_block_info *bi;
- struct extent_buffer *leaf;
- struct btrfs_key key;
- struct btrfs_key found_key;
- u32 new_size = sizeof(*item);
- u64 refs;
- int ret;
-
- leaf = path->nodes[0];
- BUG_ON(btrfs_item_size_nr(leaf, path->slots[0]) != sizeof(*ei0));
-
- btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
- ei0 = btrfs_item_ptr(leaf, path->slots[0],
- struct btrfs_extent_item_v0);
- refs = btrfs_extent_refs_v0(leaf, ei0);
-
- if (owner == (u64)-1) {
- while (1) {
- if (path->slots[0] >= btrfs_header_nritems(leaf)) {
- ret = btrfs_next_leaf(root, path);
- if (ret < 0)
- return ret;
- BUG_ON(ret > 0); /* Corruption */
- leaf = path->nodes[0];
- }
- btrfs_item_key_to_cpu(leaf, &found_key,
- path->slots[0]);
- BUG_ON(key.objectid != found_key.objectid);
- if (found_key.type != BTRFS_EXTENT_REF_V0_KEY) {
- path->slots[0]++;
- continue;
- }
- ref0 = btrfs_item_ptr(leaf, path->slots[0],
- struct btrfs_extent_ref_v0);
- owner = btrfs_ref_objectid_v0(leaf, ref0);
- break;
- }
- }
- btrfs_release_path(path);
-
- if (owner < BTRFS_FIRST_FREE_OBJECTID)
- new_size += sizeof(*bi);
-
- new_size -= sizeof(*ei0);
- ret = btrfs_search_slot(trans, root, &key, path,
- new_size + extra_size, 1);
- if (ret < 0)
- return ret;
- BUG_ON(ret); /* Corruption */
-
- btrfs_extend_item(trans, root, path, new_size);
-
- leaf = path->nodes[0];
- item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
- btrfs_set_extent_refs(leaf, item, refs);
- /* FIXME: get real generation */
- btrfs_set_extent_generation(leaf, item, 0);
- if (owner < BTRFS_FIRST_FREE_OBJECTID) {
- btrfs_set_extent_flags(leaf, item,
- BTRFS_EXTENT_FLAG_TREE_BLOCK |
- BTRFS_BLOCK_FLAG_FULL_BACKREF);
- bi = (struct btrfs_tree_block_info *)(item + 1);
- /* FIXME: get first key of the block */
- memset_extent_buffer(leaf, 0, (unsigned long)bi, sizeof(*bi));
- btrfs_set_tree_block_level(leaf, bi, (int)owner);
- } else {
- btrfs_set_extent_flags(leaf, item, BTRFS_EXTENT_FLAG_DATA);
- }
- btrfs_mark_buffer_dirty(leaf);
- return 0;
-}
-#endif
-
-static u64 hash_extent_data_ref(u64 root_objectid, u64 owner, u64 offset)
-{
- u32 high_crc = ~(u32)0;
- u32 low_crc = ~(u32)0;
- __le64 lenum;
-
- lenum = cpu_to_le64(root_objectid);
- high_crc = crc32c(high_crc, &lenum, sizeof(lenum));
- lenum = cpu_to_le64(owner);
- low_crc = crc32c(low_crc, &lenum, sizeof(lenum));
- lenum = cpu_to_le64(offset);
- low_crc = crc32c(low_crc, &lenum, sizeof(lenum));
-
- return ((u64)high_crc << 31) ^ (u64)low_crc;
-}
-
-static u64 hash_extent_data_ref_item(struct extent_buffer *leaf,
- struct btrfs_extent_data_ref *ref)
-{
- return hash_extent_data_ref(btrfs_extent_data_ref_root(leaf, ref),
- btrfs_extent_data_ref_objectid(leaf, ref),
- btrfs_extent_data_ref_offset(leaf, ref));
-}
-
-static int match_extent_data_ref(struct extent_buffer *leaf,
- struct btrfs_extent_data_ref *ref,
- u64 root_objectid, u64 owner, u64 offset)
-{
- if (btrfs_extent_data_ref_root(leaf, ref) != root_objectid ||
- btrfs_extent_data_ref_objectid(leaf, ref) != owner ||
- btrfs_extent_data_ref_offset(leaf, ref) != offset)
- return 0;
- return 1;
-}
-
-static noinline int lookup_extent_data_ref(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_path *path,
- u64 bytenr, u64 parent,
- u64 root_objectid,
- u64 owner, u64 offset)
-{
- struct btrfs_key key;
- struct btrfs_extent_data_ref *ref;
- struct extent_buffer *leaf;
- u32 nritems;
- int ret;
- int recow;
- int err = -ENOENT;
-
- key.objectid = bytenr;
- if (parent) {
- key.type = BTRFS_SHARED_DATA_REF_KEY;
- key.offset = parent;
- } else {
- key.type = BTRFS_EXTENT_DATA_REF_KEY;
- key.offset = hash_extent_data_ref(root_objectid,
- owner, offset);
- }
-again:
- recow = 0;
- ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
- if (ret < 0) {
- err = ret;
- goto fail;
- }
-
- if (parent) {
- if (!ret)
- return 0;
-#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
- key.type = BTRFS_EXTENT_REF_V0_KEY;
- btrfs_release_path(path);
- ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
- if (ret < 0) {
- err = ret;
- goto fail;
- }
- if (!ret)
- return 0;
-#endif
- goto fail;
- }
-
- leaf = path->nodes[0];
- nritems = btrfs_header_nritems(leaf);
- while (1) {
- if (path->slots[0] >= nritems) {
- ret = btrfs_next_leaf(root, path);
- if (ret < 0)
- err = ret;
- if (ret)
- goto fail;
-
- leaf = path->nodes[0];
- nritems = btrfs_header_nritems(leaf);
- recow = 1;
- }
-
- btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
- if (key.objectid != bytenr ||
- key.type != BTRFS_EXTENT_DATA_REF_KEY)
- goto fail;
-
- ref = btrfs_item_ptr(leaf, path->slots[0],
- struct btrfs_extent_data_ref);
-
- if (match_extent_data_ref(leaf, ref, root_objectid,
- owner, offset)) {
- if (recow) {
- btrfs_release_path(path);
- goto again;
- }
- err = 0;
- break;
- }
- path->slots[0]++;
- }
-fail:
- return err;
-}
-
-static noinline int insert_extent_data_ref(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_path *path,
- u64 bytenr, u64 parent,
- u64 root_objectid, u64 owner,
- u64 offset, int refs_to_add)
-{
- struct btrfs_key key;
- struct extent_buffer *leaf;
- u32 size;
- u32 num_refs;
- int ret;
-
- key.objectid = bytenr;
- if (parent) {
- key.type = BTRFS_SHARED_DATA_REF_KEY;
- key.offset = parent;
- size = sizeof(struct btrfs_shared_data_ref);
- } else {
- key.type = BTRFS_EXTENT_DATA_REF_KEY;
- key.offset = hash_extent_data_ref(root_objectid,
- owner, offset);
- size = sizeof(struct btrfs_extent_data_ref);
- }
-
- ret = btrfs_insert_empty_item(trans, root, path, &key, size);
- if (ret && ret != -EEXIST)
- goto fail;
-
- leaf = path->nodes[0];
- if (parent) {
- struct btrfs_shared_data_ref *ref;
- ref = btrfs_item_ptr(leaf, path->slots[0],
- struct btrfs_shared_data_ref);
- if (ret == 0) {
- btrfs_set_shared_data_ref_count(leaf, ref, refs_to_add);
- } else {
- num_refs = btrfs_shared_data_ref_count(leaf, ref);
- num_refs += refs_to_add;
- btrfs_set_shared_data_ref_count(leaf, ref, num_refs);
- }
- } else {
- struct btrfs_extent_data_ref *ref;
- while (ret == -EEXIST) {
- ref = btrfs_item_ptr(leaf, path->slots[0],
- struct btrfs_extent_data_ref);
- if (match_extent_data_ref(leaf, ref, root_objectid,
- owner, offset))
- break;
- btrfs_release_path(path);
- key.offset++;
- ret = btrfs_insert_empty_item(trans, root, path, &key,
- size);
- if (ret && ret != -EEXIST)
- goto fail;
-
- leaf = path->nodes[0];
- }
- ref = btrfs_item_ptr(leaf, path->slots[0],
- struct btrfs_extent_data_ref);
- if (ret == 0) {
- btrfs_set_extent_data_ref_root(leaf, ref,
- root_objectid);
- btrfs_set_extent_data_ref_objectid(leaf, ref, owner);
- btrfs_set_extent_data_ref_offset(leaf, ref, offset);
- btrfs_set_extent_data_ref_count(leaf, ref, refs_to_add);
- } else {
- num_refs = btrfs_extent_data_ref_count(leaf, ref);
- num_refs += refs_to_add;
- btrfs_set_extent_data_ref_count(leaf, ref, num_refs);
- }
- }
- btrfs_mark_buffer_dirty(leaf);
- ret = 0;
-fail:
- btrfs_release_path(path);
- return ret;
-}
-
-static noinline int remove_extent_data_ref(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_path *path,
- int refs_to_drop)
-{
- struct btrfs_key key;
- struct btrfs_extent_data_ref *ref1 = NULL;
- struct btrfs_shared_data_ref *ref2 = NULL;
- struct extent_buffer *leaf;
- u32 num_refs = 0;
- int ret = 0;
-
- leaf = path->nodes[0];
- btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
-
- if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
- ref1 = btrfs_item_ptr(leaf, path->slots[0],
- struct btrfs_extent_data_ref);
- num_refs = btrfs_extent_data_ref_count(leaf, ref1);
- } else if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
- ref2 = btrfs_item_ptr(leaf, path->slots[0],
- struct btrfs_shared_data_ref);
- num_refs = btrfs_shared_data_ref_count(leaf, ref2);
-#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
- } else if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
- struct btrfs_extent_ref_v0 *ref0;
- ref0 = btrfs_item_ptr(leaf, path->slots[0],
- struct btrfs_extent_ref_v0);
- num_refs = btrfs_ref_count_v0(leaf, ref0);
-#endif
- } else {
- BUG();
- }
-
- BUG_ON(num_refs < refs_to_drop);
- num_refs -= refs_to_drop;
-
- if (num_refs == 0) {
- ret = btrfs_del_item(trans, root, path);
- } else {
- if (key.type == BTRFS_EXTENT_DATA_REF_KEY)
- btrfs_set_extent_data_ref_count(leaf, ref1, num_refs);
- else if (key.type == BTRFS_SHARED_DATA_REF_KEY)
- btrfs_set_shared_data_ref_count(leaf, ref2, num_refs);
-#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
- else {
- struct btrfs_extent_ref_v0 *ref0;
- ref0 = btrfs_item_ptr(leaf, path->slots[0],
- struct btrfs_extent_ref_v0);
- btrfs_set_ref_count_v0(leaf, ref0, num_refs);
- }
-#endif
- btrfs_mark_buffer_dirty(leaf);
- }
- return ret;
-}
-
-static noinline u32 extent_data_ref_count(struct btrfs_root *root,
- struct btrfs_path *path,
- struct btrfs_extent_inline_ref *iref)
-{
- struct btrfs_key key;
- struct extent_buffer *leaf;
- struct btrfs_extent_data_ref *ref1;
- struct btrfs_shared_data_ref *ref2;
- u32 num_refs = 0;
-
- leaf = path->nodes[0];
- btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
- if (iref) {
- if (btrfs_extent_inline_ref_type(leaf, iref) ==
- BTRFS_EXTENT_DATA_REF_KEY) {
- ref1 = (struct btrfs_extent_data_ref *)(&iref->offset);
- num_refs = btrfs_extent_data_ref_count(leaf, ref1);
- } else {
- ref2 = (struct btrfs_shared_data_ref *)(iref + 1);
- num_refs = btrfs_shared_data_ref_count(leaf, ref2);
- }
- } else if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
- ref1 = btrfs_item_ptr(leaf, path->slots[0],
- struct btrfs_extent_data_ref);
- num_refs = btrfs_extent_data_ref_count(leaf, ref1);
- } else if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
- ref2 = btrfs_item_ptr(leaf, path->slots[0],
- struct btrfs_shared_data_ref);
- num_refs = btrfs_shared_data_ref_count(leaf, ref2);
-#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
- } else if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
- struct btrfs_extent_ref_v0 *ref0;
- ref0 = btrfs_item_ptr(leaf, path->slots[0],
- struct btrfs_extent_ref_v0);
- num_refs = btrfs_ref_count_v0(leaf, ref0);
-#endif
- } else {
- WARN_ON(1);
- }
- return num_refs;
-}
-
-static noinline int lookup_tree_block_ref(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_path *path,
- u64 bytenr, u64 parent,
- u64 root_objectid)
-{
- struct btrfs_key key;
- int ret;
-
- key.objectid = bytenr;
- if (parent) {
- key.type = BTRFS_SHARED_BLOCK_REF_KEY;
- key.offset = parent;
- } else {
- key.type = BTRFS_TREE_BLOCK_REF_KEY;
- key.offset = root_objectid;
- }
-
- ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
- if (ret > 0)
- ret = -ENOENT;
-#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
- if (ret == -ENOENT && parent) {
- btrfs_release_path(path);
- key.type = BTRFS_EXTENT_REF_V0_KEY;
- ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
- if (ret > 0)
- ret = -ENOENT;
- }
-#endif
- return ret;
-}
-
-static noinline int insert_tree_block_ref(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_path *path,
- u64 bytenr, u64 parent,
- u64 root_objectid)
-{
- struct btrfs_key key;
- int ret;
-
- key.objectid = bytenr;
- if (parent) {
- key.type = BTRFS_SHARED_BLOCK_REF_KEY;
- key.offset = parent;
- } else {
- key.type = BTRFS_TREE_BLOCK_REF_KEY;
- key.offset = root_objectid;
- }
-
- ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
- btrfs_release_path(path);
- return ret;
-}
-
-static inline int extent_ref_type(u64 parent, u64 owner)
-{
- int type;
- if (owner < BTRFS_FIRST_FREE_OBJECTID) {
- if (parent > 0)
- type = BTRFS_SHARED_BLOCK_REF_KEY;
- else
- type = BTRFS_TREE_BLOCK_REF_KEY;
- } else {
- if (parent > 0)
- type = BTRFS_SHARED_DATA_REF_KEY;
- else
- type = BTRFS_EXTENT_DATA_REF_KEY;
- }
- return type;
-}
-
-static int find_next_key(struct btrfs_path *path, int level,
- struct btrfs_key *key)
-
-{
- for (; level < BTRFS_MAX_LEVEL; level++) {
- if (!path->nodes[level])
- break;
- if (path->slots[level] + 1 >=
- btrfs_header_nritems(path->nodes[level]))
- continue;
- if (level == 0)
- btrfs_item_key_to_cpu(path->nodes[level], key,
- path->slots[level] + 1);
- else
- btrfs_node_key_to_cpu(path->nodes[level], key,
- path->slots[level] + 1);
- return 0;
- }
- return 1;
-}
-
-/*
- * look for inline back ref. if back ref is found, *ref_ret is set
- * to the address of inline back ref, and 0 is returned.
- *
- * if back ref isn't found, *ref_ret is set to the address where it
- * should be inserted, and -ENOENT is returned.
- *
- * if insert is true and there are too many inline back refs, the path
- * points to the extent item, and -EAGAIN is returned.
- *
- * NOTE: inline back refs are ordered in the same way that back ref
- * items in the tree are ordered.
- */
-static noinline_for_stack
-int lookup_inline_extent_backref(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_path *path,
- struct btrfs_extent_inline_ref **ref_ret,
- u64 bytenr, u64 num_bytes,
- u64 parent, u64 root_objectid,
- u64 owner, u64 offset, int insert)
-{
- struct btrfs_key key;
- struct extent_buffer *leaf;
- struct btrfs_extent_item *ei;
- struct btrfs_extent_inline_ref *iref;
- u64 flags;
- u64 item_size;
- unsigned long ptr;
- unsigned long end;
- int extra_size;
- int type;
- int want;
- int ret;
- int err = 0;
-
- key.objectid = bytenr;
- key.type = BTRFS_EXTENT_ITEM_KEY;
- key.offset = num_bytes;
-
- want = extent_ref_type(parent, owner);
- if (insert) {
- extra_size = btrfs_extent_inline_ref_size(want);
- path->keep_locks = 1;
- } else
- extra_size = -1;
- ret = btrfs_search_slot(trans, root, &key, path, extra_size, 1);
- if (ret < 0) {
- err = ret;
- goto out;
- }
- if (ret && !insert) {
- err = -ENOENT;
- goto out;
- }
- BUG_ON(ret); /* Corruption */
-
- leaf = path->nodes[0];
- item_size = btrfs_item_size_nr(leaf, path->slots[0]);
-#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
- if (item_size < sizeof(*ei)) {
- if (!insert) {
- err = -ENOENT;
- goto out;
- }
- ret = convert_extent_item_v0(trans, root, path, owner,
- extra_size);
- if (ret < 0) {
- err = ret;
- goto out;
- }
- leaf = path->nodes[0];
- item_size = btrfs_item_size_nr(leaf, path->slots[0]);
- }
-#endif
- BUG_ON(item_size < sizeof(*ei));
-
- ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
- flags = btrfs_extent_flags(leaf, ei);
-
- ptr = (unsigned long)(ei + 1);
- end = (unsigned long)ei + item_size;
-
- if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
- ptr += sizeof(struct btrfs_tree_block_info);
- BUG_ON(ptr > end);
- } else {
- BUG_ON(!(flags & BTRFS_EXTENT_FLAG_DATA));
- }
-
- err = -ENOENT;
- while (1) {
- if (ptr >= end) {
- WARN_ON(ptr > end);
- break;
- }
- iref = (struct btrfs_extent_inline_ref *)ptr;
- type = btrfs_extent_inline_ref_type(leaf, iref);
- if (want < type)
- break;
- if (want > type) {
- ptr += btrfs_extent_inline_ref_size(type);
- continue;
- }
-
- if (type == BTRFS_EXTENT_DATA_REF_KEY) {
- struct btrfs_extent_data_ref *dref;
- dref = (struct btrfs_extent_data_ref *)(&iref->offset);
- if (match_extent_data_ref(leaf, dref, root_objectid,
- owner, offset)) {
- err = 0;
- break;
- }
- if (hash_extent_data_ref_item(leaf, dref) <
- hash_extent_data_ref(root_objectid, owner, offset))
- break;
- } else {
- u64 ref_offset;
- ref_offset = btrfs_extent_inline_ref_offset(leaf, iref);
- if (parent > 0) {
- if (parent == ref_offset) {
- err = 0;
- break;
- }
- if (ref_offset < parent)
- break;
- } else {
- if (root_objectid == ref_offset) {
- err = 0;
- break;
- }
- if (ref_offset < root_objectid)
- break;
- }
- }
- ptr += btrfs_extent_inline_ref_size(type);
- }
- if (err == -ENOENT && insert) {
- if (item_size + extra_size >=
- BTRFS_MAX_EXTENT_ITEM_SIZE(root)) {
- err = -EAGAIN;
- goto out;
- }
- /*
- * To add new inline back ref, we have to make sure
- * there is no corresponding back ref item.
- * For simplicity, we just do not add new inline back
- * ref if there is any kind of item for this block
- */
- if (find_next_key(path, 0, &key) == 0 &&
- key.objectid == bytenr &&
- key.type < BTRFS_BLOCK_GROUP_ITEM_KEY) {
- err = -EAGAIN;
- goto out;
- }
- }
- *ref_ret = (struct btrfs_extent_inline_ref *)ptr;
-out:
- if (insert) {
- path->keep_locks = 0;
- btrfs_unlock_up_safe(path, 1);
- }
- return err;
-}
-
-/*
- * helper to add new inline back ref
- */
-static noinline_for_stack
-void setup_inline_extent_backref(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_path *path,
- struct btrfs_extent_inline_ref *iref,
- u64 parent, u64 root_objectid,
- u64 owner, u64 offset, int refs_to_add,
- struct btrfs_delayed_extent_op *extent_op)
-{
- struct extent_buffer *leaf;
- struct btrfs_extent_item *ei;
- unsigned long ptr;
- unsigned long end;
- unsigned long item_offset;
- u64 refs;
- int size;
- int type;
-
- leaf = path->nodes[0];
- ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
- item_offset = (unsigned long)iref - (unsigned long)ei;
-
- type = extent_ref_type(parent, owner);
- size = btrfs_extent_inline_ref_size(type);
-
- btrfs_extend_item(trans, root, path, size);
-
- ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
- refs = btrfs_extent_refs(leaf, ei);
- refs += refs_to_add;
- btrfs_set_extent_refs(leaf, ei, refs);
- if (extent_op)
- __run_delayed_extent_op(extent_op, leaf, ei);
-
- ptr = (unsigned long)ei + item_offset;
- end = (unsigned long)ei + btrfs_item_size_nr(leaf, path->slots[0]);
- if (ptr < end - size)
- memmove_extent_buffer(leaf, ptr + size, ptr,
- end - size - ptr);
-
- iref = (struct btrfs_extent_inline_ref *)ptr;
- btrfs_set_extent_inline_ref_type(leaf, iref, type);
- if (type == BTRFS_EXTENT_DATA_REF_KEY) {
- struct btrfs_extent_data_ref *dref;
- dref = (struct btrfs_extent_data_ref *)(&iref->offset);
- btrfs_set_extent_data_ref_root(leaf, dref, root_objectid);
- btrfs_set_extent_data_ref_objectid(leaf, dref, owner);
- btrfs_set_extent_data_ref_offset(leaf, dref, offset);
- btrfs_set_extent_data_ref_count(leaf, dref, refs_to_add);
- } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
- struct btrfs_shared_data_ref *sref;
- sref = (struct btrfs_shared_data_ref *)(iref + 1);
- btrfs_set_shared_data_ref_count(leaf, sref, refs_to_add);
- btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
- } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
- btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
- } else {
- btrfs_set_extent_inline_ref_offset(leaf, iref, root_objectid);
- }
- btrfs_mark_buffer_dirty(leaf);
-}
-
-static int lookup_extent_backref(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_path *path,
- struct btrfs_extent_inline_ref **ref_ret,
- u64 bytenr, u64 num_bytes, u64 parent,
- u64 root_objectid, u64 owner, u64 offset)
-{
- int ret;
-
- ret = lookup_inline_extent_backref(trans, root, path, ref_ret,
- bytenr, num_bytes, parent,
- root_objectid, owner, offset, 0);
- if (ret != -ENOENT)
- return ret;
-
- btrfs_release_path(path);
- *ref_ret = NULL;
-
- if (owner < BTRFS_FIRST_FREE_OBJECTID) {
- ret = lookup_tree_block_ref(trans, root, path, bytenr, parent,
- root_objectid);
- } else {
- ret = lookup_extent_data_ref(trans, root, path, bytenr, parent,
- root_objectid, owner, offset);
- }
- return ret;
-}
-
-/*
- * helper to update/remove inline back ref
- */
-static noinline_for_stack
-void update_inline_extent_backref(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_path *path,
- struct btrfs_extent_inline_ref *iref,
- int refs_to_mod,
- struct btrfs_delayed_extent_op *extent_op)
-{
- struct extent_buffer *leaf;
- struct btrfs_extent_item *ei;
- struct btrfs_extent_data_ref *dref = NULL;
- struct btrfs_shared_data_ref *sref = NULL;
- unsigned long ptr;
- unsigned long end;
- u32 item_size;
- int size;
- int type;
- u64 refs;
-
- leaf = path->nodes[0];
- ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
- refs = btrfs_extent_refs(leaf, ei);
- WARN_ON(refs_to_mod < 0 && refs + refs_to_mod <= 0);
- refs += refs_to_mod;
- btrfs_set_extent_refs(leaf, ei, refs);
- if (extent_op)
- __run_delayed_extent_op(extent_op, leaf, ei);
-
- type = btrfs_extent_inline_ref_type(leaf, iref);
-
- if (type == BTRFS_EXTENT_DATA_REF_KEY) {
- dref = (struct btrfs_extent_data_ref *)(&iref->offset);
- refs = btrfs_extent_data_ref_count(leaf, dref);
- } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
- sref = (struct btrfs_shared_data_ref *)(iref + 1);
- refs = btrfs_shared_data_ref_count(leaf, sref);
- } else {
- refs = 1;
- BUG_ON(refs_to_mod != -1);
- }
-
- BUG_ON(refs_to_mod < 0 && refs < -refs_to_mod);
- refs += refs_to_mod;
-
- if (refs > 0) {
- if (type == BTRFS_EXTENT_DATA_REF_KEY)
- btrfs_set_extent_data_ref_count(leaf, dref, refs);
- else
- btrfs_set_shared_data_ref_count(leaf, sref, refs);
- } else {
- size = btrfs_extent_inline_ref_size(type);
- item_size = btrfs_item_size_nr(leaf, path->slots[0]);
- ptr = (unsigned long)iref;
- end = (unsigned long)ei + item_size;
- if (ptr + size < end)
- memmove_extent_buffer(leaf, ptr, ptr + size,
- end - ptr - size);
- item_size -= size;
- btrfs_truncate_item(trans, root, path, item_size, 1);
- }
- btrfs_mark_buffer_dirty(leaf);
-}
-
-static noinline_for_stack
-int insert_inline_extent_backref(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_path *path,
- u64 bytenr, u64 num_bytes, u64 parent,
- u64 root_objectid, u64 owner,
- u64 offset, int refs_to_add,
- struct btrfs_delayed_extent_op *extent_op)
-{
- struct btrfs_extent_inline_ref *iref;
- int ret;
-
- ret = lookup_inline_extent_backref(trans, root, path, &iref,
- bytenr, num_bytes, parent,
- root_objectid, owner, offset, 1);
- if (ret == 0) {
- BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID);
- update_inline_extent_backref(trans, root, path, iref,
- refs_to_add, extent_op);
- } else if (ret == -ENOENT) {
- setup_inline_extent_backref(trans, root, path, iref, parent,
- root_objectid, owner, offset,
- refs_to_add, extent_op);
- ret = 0;
- }
- return ret;
-}
-
-static int insert_extent_backref(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_path *path,
- u64 bytenr, u64 parent, u64 root_objectid,
- u64 owner, u64 offset, int refs_to_add)
-{
- int ret;
- if (owner < BTRFS_FIRST_FREE_OBJECTID) {
- BUG_ON(refs_to_add != 1);
- ret = insert_tree_block_ref(trans, root, path, bytenr,
- parent, root_objectid);
- } else {
- ret = insert_extent_data_ref(trans, root, path, bytenr,
- parent, root_objectid,
- owner, offset, refs_to_add);
- }
- return ret;
-}
-
-static int remove_extent_backref(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_path *path,
- struct btrfs_extent_inline_ref *iref,
- int refs_to_drop, int is_data)
-{
- int ret = 0;
-
- BUG_ON(!is_data && refs_to_drop != 1);
- if (iref) {
- update_inline_extent_backref(trans, root, path, iref,
- -refs_to_drop, NULL);
- } else if (is_data) {
- ret = remove_extent_data_ref(trans, root, path, refs_to_drop);
- } else {
- ret = btrfs_del_item(trans, root, path);
- }
- return ret;
-}
-
-static int btrfs_issue_discard(struct block_device *bdev,
- u64 start, u64 len)
-{
- return blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_NOFS, 0);
-}
-
-static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
- u64 num_bytes, u64 *actual_bytes)
-{
- int ret;
- u64 discarded_bytes = 0;
- struct btrfs_bio *bbio = NULL;
-
-
- /* Tell the block device(s) that the sectors can be discarded */
- ret = btrfs_map_block(&root->fs_info->mapping_tree, REQ_DISCARD,
- bytenr, &num_bytes, &bbio, 0);
- /* Error condition is -ENOMEM */
- if (!ret) {
- struct btrfs_bio_stripe *stripe = bbio->stripes;
- int i;
-
-
- for (i = 0; i < bbio->num_stripes; i++, stripe++) {
- if (!stripe->dev->can_discard)
- continue;
-
- ret = btrfs_issue_discard(stripe->dev->bdev,
- stripe->physical,
- stripe->length);
- if (!ret)
- discarded_bytes += stripe->length;
- else if (ret != -EOPNOTSUPP)
- break; /* Logic errors or -ENOMEM, or -EIO but I don't know how that could happen JDM */
-
- /*
- * Just in case we get back EOPNOTSUPP for some reason,
- * just ignore the return value so we don't screw up
- * people calling discard_extent.
- */
- ret = 0;
- }
- kfree(bbio);
- }
-
- if (actual_bytes)
- *actual_bytes = discarded_bytes;
-
-
- return ret;
-}
-
-/* Can return -ENOMEM */
-int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- u64 bytenr, u64 num_bytes, u64 parent,
- u64 root_objectid, u64 owner, u64 offset, int for_cow)
-{
- int ret;
- struct btrfs_fs_info *fs_info = root->fs_info;
-
- BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID &&
- root_objectid == BTRFS_TREE_LOG_OBJECTID);
-
- if (owner < BTRFS_FIRST_FREE_OBJECTID) {
- ret = btrfs_add_delayed_tree_ref(fs_info, trans, bytenr,
- num_bytes,
- parent, root_objectid, (int)owner,
- BTRFS_ADD_DELAYED_REF, NULL, for_cow);
- } else {
- ret = btrfs_add_delayed_data_ref(fs_info, trans, bytenr,
- num_bytes,
- parent, root_objectid, owner, offset,
- BTRFS_ADD_DELAYED_REF, NULL, for_cow);
- }
- return ret;
-}
-
-static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- u64 bytenr, u64 num_bytes,
- u64 parent, u64 root_objectid,
- u64 owner, u64 offset, int refs_to_add,
- struct btrfs_delayed_extent_op *extent_op)
-{
- struct btrfs_path *path;
- struct extent_buffer *leaf;
- struct btrfs_extent_item *item;
- u64 refs;
- int ret;
- int err = 0;
-
- path = btrfs_alloc_path();
- if (!path)
- return -ENOMEM;
-
- path->reada = 1;
- path->leave_spinning = 1;
- /* this will setup the path even if it fails to insert the back ref */
- ret = insert_inline_extent_backref(trans, root->fs_info->extent_root,
- path, bytenr, num_bytes, parent,
- root_objectid, owner, offset,
- refs_to_add, extent_op);
- if (ret == 0)
- goto out;
-
- if (ret != -EAGAIN) {
- err = ret;
- goto out;
- }
-
- leaf = path->nodes[0];
- item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
- refs = btrfs_extent_refs(leaf, item);
- btrfs_set_extent_refs(leaf, item, refs + refs_to_add);
- if (extent_op)
- __run_delayed_extent_op(extent_op, leaf, item);
-
- btrfs_mark_buffer_dirty(leaf);
- btrfs_release_path(path);
-
- path->reada = 1;
- path->leave_spinning = 1;
-
- /* now insert the actual backref */
- ret = insert_extent_backref(trans, root->fs_info->extent_root,
- path, bytenr, parent, root_objectid,
- owner, offset, refs_to_add);
- if (ret)
- btrfs_abort_transaction(trans, root, ret);
-out:
- btrfs_free_path(path);
- return err;
-}
-
-static int run_delayed_data_ref(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_delayed_ref_node *node,
- struct btrfs_delayed_extent_op *extent_op,
- int insert_reserved)
-{
- int ret = 0;
- struct btrfs_delayed_data_ref *ref;
- struct btrfs_key ins;
- u64 parent = 0;
- u64 ref_root = 0;
- u64 flags = 0;
-
- ins.objectid = node->bytenr;
- ins.offset = node->num_bytes;
- ins.type = BTRFS_EXTENT_ITEM_KEY;
-
- ref = btrfs_delayed_node_to_data_ref(node);
- if (node->type == BTRFS_SHARED_DATA_REF_KEY)
- parent = ref->parent;
- else
- ref_root = ref->root;
-
- if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) {
- if (extent_op) {
- BUG_ON(extent_op->update_key);
- flags |= extent_op->flags_to_set;
- }
- ret = alloc_reserved_file_extent(trans, root,
- parent, ref_root, flags,
- ref->objectid, ref->offset,
- &ins, node->ref_mod);
- } else if (node->action == BTRFS_ADD_DELAYED_REF) {
- ret = __btrfs_inc_extent_ref(trans, root, node->bytenr,
- node->num_bytes, parent,
- ref_root, ref->objectid,
- ref->offset, node->ref_mod,
- extent_op);
- } else if (node->action == BTRFS_DROP_DELAYED_REF) {
- ret = __btrfs_free_extent(trans, root, node->bytenr,
- node->num_bytes, parent,
- ref_root, ref->objectid,
- ref->offset, node->ref_mod,
- extent_op);
- } else {
- BUG();
- }
- return ret;
-}
-
-static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op,
- struct extent_buffer *leaf,
- struct btrfs_extent_item *ei)
-{
- u64 flags = btrfs_extent_flags(leaf, ei);
- if (extent_op->update_flags) {
- flags |= extent_op->flags_to_set;
- btrfs_set_extent_flags(leaf, ei, flags);
- }
-
- if (extent_op->update_key) {
- struct btrfs_tree_block_info *bi;
- BUG_ON(!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK));
- bi = (struct btrfs_tree_block_info *)(ei + 1);
- btrfs_set_tree_block_key(leaf, bi, &extent_op->key);
- }
-}
-
-static int run_delayed_extent_op(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_delayed_ref_node *node,
- struct btrfs_delayed_extent_op *extent_op)
-{
- struct btrfs_key key;
- struct btrfs_path *path;
- struct btrfs_extent_item *ei;
- struct extent_buffer *leaf;
- u32 item_size;
- int ret;
- int err = 0;
-
- if (trans->aborted)
- return 0;
-
- path = btrfs_alloc_path();
- if (!path)
- return -ENOMEM;
-
- key.objectid = node->bytenr;
- key.type = BTRFS_EXTENT_ITEM_KEY;
- key.offset = node->num_bytes;
-
- path->reada = 1;
- path->leave_spinning = 1;
- ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key,
- path, 0, 1);
- if (ret < 0) {
- err = ret;
- goto out;
- }
- if (ret > 0) {
- err = -EIO;
- goto out;
- }
-
- leaf = path->nodes[0];
- item_size = btrfs_item_size_nr(leaf, path->slots[0]);
-#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
- if (item_size < sizeof(*ei)) {
- ret = convert_extent_item_v0(trans, root->fs_info->extent_root,
- path, (u64)-1, 0);
- if (ret < 0) {
- err = ret;
- goto out;
- }
- leaf = path->nodes[0];
- item_size = btrfs_item_size_nr(leaf, path->slots[0]);
- }
-#endif
- BUG_ON(item_size < sizeof(*ei));
- ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
- __run_delayed_extent_op(extent_op, leaf, ei);
-
- btrfs_mark_buffer_dirty(leaf);
-out:
- btrfs_free_path(path);
- return err;
-}
-
-static int run_delayed_tree_ref(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_delayed_ref_node *node,
- struct btrfs_delayed_extent_op *extent_op,
- int insert_reserved)
-{
- int ret = 0;
- struct btrfs_delayed_tree_ref *ref;
- struct btrfs_key ins;
- u64 parent = 0;
- u64 ref_root = 0;
-
- ins.objectid = node->bytenr;
- ins.offset = node->num_bytes;
- ins.type = BTRFS_EXTENT_ITEM_KEY;
-
- ref = btrfs_delayed_node_to_tree_ref(node);
- if (node->type == BTRFS_SHARED_BLOCK_REF_KEY)
- parent = ref->parent;
- else
- ref_root = ref->root;
-
- BUG_ON(node->ref_mod != 1);
- if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) {
- BUG_ON(!extent_op || !extent_op->update_flags ||
- !extent_op->update_key);
- ret = alloc_reserved_tree_block(trans, root,
- parent, ref_root,
- extent_op->flags_to_set,
- &extent_op->key,
- ref->level, &ins);
- } else if (node->action == BTRFS_ADD_DELAYED_REF) {
- ret = __btrfs_inc_extent_ref(trans, root, node->bytenr,
- node->num_bytes, parent, ref_root,
- ref->level, 0, 1, extent_op);
- } else if (node->action == BTRFS_DROP_DELAYED_REF) {
- ret = __btrfs_free_extent(trans, root, node->bytenr,
- node->num_bytes, parent, ref_root,
- ref->level, 0, 1, extent_op);
- } else {
- BUG();
- }
- return ret;
-}
-
-/* helper function to actually process a single delayed ref entry */
-static int run_one_delayed_ref(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_delayed_ref_node *node,
- struct btrfs_delayed_extent_op *extent_op,
- int insert_reserved)
-{
- int ret = 0;
-
- if (trans->aborted)
- return 0;
-
- if (btrfs_delayed_ref_is_head(node)) {
- struct btrfs_delayed_ref_head *head;
- /*
- * we've hit the end of the chain and we were supposed
- * to insert this extent into the tree. But, it got
- * deleted before we ever needed to insert it, so all
- * we have to do is clean up the accounting
- */
- BUG_ON(extent_op);
- head = btrfs_delayed_node_to_head(node);
- if (insert_reserved) {
- btrfs_pin_extent(root, node->bytenr,
- node->num_bytes, 1);
- if (head->is_data) {
- ret = btrfs_del_csums(trans, root,
- node->bytenr,
- node->num_bytes);
- }
- }
- mutex_unlock(&head->mutex);
- return ret;
- }
-
- if (node->type == BTRFS_TREE_BLOCK_REF_KEY ||
- node->type == BTRFS_SHARED_BLOCK_REF_KEY)
- ret = run_delayed_tree_ref(trans, root, node, extent_op,
- insert_reserved);
- else if (node->type == BTRFS_EXTENT_DATA_REF_KEY ||
- node->type == BTRFS_SHARED_DATA_REF_KEY)
- ret = run_delayed_data_ref(trans, root, node, extent_op,
- insert_reserved);
- else
- BUG();
- return ret;
-}
-
-static noinline struct btrfs_delayed_ref_node *
-select_delayed_ref(struct btrfs_delayed_ref_head *head)
-{
- struct rb_node *node;
- struct btrfs_delayed_ref_node *ref;
- int action = BTRFS_ADD_DELAYED_REF;
-again:
- /*
- * select delayed ref of type BTRFS_ADD_DELAYED_REF first.
- * this prevents ref count from going down to zero when
- * there still are pending delayed ref.
- */
- node = rb_prev(&head->node.rb_node);
- while (1) {
- if (!node)
- break;
- ref = rb_entry(node, struct btrfs_delayed_ref_node,
- rb_node);
- if (ref->bytenr != head->node.bytenr)
- break;
- if (ref->action == action)
- return ref;
- node = rb_prev(node);
- }
- if (action == BTRFS_ADD_DELAYED_REF) {
- action = BTRFS_DROP_DELAYED_REF;
- goto again;
- }
- return NULL;
-}
-
-/*
- * Returns 0 on success or if called with an already aborted transaction.
- * Returns -ENOMEM or -EIO on failure and will abort the transaction.
- */
-static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct list_head *cluster)
-{
- struct btrfs_delayed_ref_root *delayed_refs;
- struct btrfs_delayed_ref_node *ref;
- struct btrfs_delayed_ref_head *locked_ref = NULL;
- struct btrfs_delayed_extent_op *extent_op;
- int ret;
- int count = 0;
- int must_insert_reserved = 0;
-
- delayed_refs = &trans->transaction->delayed_refs;
- while (1) {
- if (!locked_ref) {
- /* pick a new head ref from the cluster list */
- if (list_empty(cluster))
- break;
-
- locked_ref = list_entry(cluster->next,
- struct btrfs_delayed_ref_head, cluster);
-
- /* grab the lock that says we are going to process
- * all the refs for this head */
- ret = btrfs_delayed_ref_lock(trans, locked_ref);
-
- /*
- * we may have dropped the spin lock to get the head
- * mutex lock, and that might have given someone else
- * time to free the head. If that's true, it has been
- * removed from our list and we can move on.
- */
- if (ret == -EAGAIN) {
- locked_ref = NULL;
- count++;
- continue;
- }
- }
-
- /*
- * locked_ref is the head node, so we have to go one
- * node back for any delayed ref updates
- */
- ref = select_delayed_ref(locked_ref);
-
- if (ref && ref->seq &&
- btrfs_check_delayed_seq(delayed_refs, ref->seq)) {
- /*
- * there are still refs with lower seq numbers in the
- * process of being added. Don't run this ref yet.
- */
- list_del_init(&locked_ref->cluster);
- mutex_unlock(&locked_ref->mutex);
- locked_ref = NULL;
- delayed_refs->num_heads_ready++;
- spin_unlock(&delayed_refs->lock);
- cond_resched();
- spin_lock(&delayed_refs->lock);
- continue;
- }
-
- /*
- * record the must insert reserved flag before we
- * drop the spin lock.
- */
- must_insert_reserved = locked_ref->must_insert_reserved;
- locked_ref->must_insert_reserved = 0;
-
- extent_op = locked_ref->extent_op;
- locked_ref->extent_op = NULL;
-
- if (!ref) {
- /* All delayed refs have been processed, Go ahead
- * and send the head node to run_one_delayed_ref,
- * so that any accounting fixes can happen
- */
- ref = &locked_ref->node;
-
- if (extent_op && must_insert_reserved) {
- kfree(extent_op);
- extent_op = NULL;
- }
-
- if (extent_op) {
- spin_unlock(&delayed_refs->lock);
-
- ret = run_delayed_extent_op(trans, root,
- ref, extent_op);
- kfree(extent_op);
-
- if (ret) {
- printk(KERN_DEBUG "btrfs: run_delayed_extent_op returned %d\n", ret);
- spin_lock(&delayed_refs->lock);
- return ret;
- }
-
- goto next;
- }
-
- list_del_init(&locked_ref->cluster);
- locked_ref = NULL;
- }
-
- ref->in_tree = 0;
- rb_erase(&ref->rb_node, &delayed_refs->root);
- delayed_refs->num_entries--;
- /*
- * we modified num_entries, but as we're currently running
- * delayed refs, skip
- * wake_up(&delayed_refs->seq_wait);
- * here.
- */
- spin_unlock(&delayed_refs->lock);
-
- ret = run_one_delayed_ref(trans, root, ref, extent_op,
- must_insert_reserved);
-
- btrfs_put_delayed_ref(ref);
- kfree(extent_op);
- count++;
-
- if (ret) {
- printk(KERN_DEBUG "btrfs: run_one_delayed_ref returned %d\n", ret);
- spin_lock(&delayed_refs->lock);
- return ret;
- }
-
-next:
- do_chunk_alloc(trans, root->fs_info->extent_root,
- 2 * 1024 * 1024,
- btrfs_get_alloc_profile(root, 0),
- CHUNK_ALLOC_NO_FORCE);
- cond_resched();
- spin_lock(&delayed_refs->lock);
- }
- return count;
-}
-
-
-static void wait_for_more_refs(struct btrfs_delayed_ref_root *delayed_refs,
- unsigned long num_refs)
-{
- struct list_head *first_seq = delayed_refs->seq_head.next;
-
- spin_unlock(&delayed_refs->lock);
- pr_debug("waiting for more refs (num %ld, first %p)\n",
- num_refs, first_seq);
- wait_event(delayed_refs->seq_wait,
- num_refs != delayed_refs->num_entries ||
- delayed_refs->seq_head.next != first_seq);
- pr_debug("done waiting for more refs (num %ld, first %p)\n",
- delayed_refs->num_entries, delayed_refs->seq_head.next);
- spin_lock(&delayed_refs->lock);
-}
-
-/*
- * this starts processing the delayed reference count updates and
- * extent insertions we have queued up so far. count can be
- * 0, which means to process everything in the tree at the start
- * of the run (but not newly added entries), or it can be some target
- * number you'd like to process.
- *
- * Returns 0 on success or if called with an aborted transaction
- * Returns <0 on error and aborts the transaction
- */
-int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, unsigned long count)
-{
- struct rb_node *node;
- struct btrfs_delayed_ref_root *delayed_refs;
- struct btrfs_delayed_ref_node *ref;
- struct list_head cluster;
- int ret;
- u64 delayed_start;
- int run_all = count == (unsigned long)-1;
- int run_most = 0;
- unsigned long num_refs = 0;
- int consider_waiting;
-
- /* We'll clean this up in btrfs_cleanup_transaction */
- if (trans->aborted)
- return 0;
-
- if (root == root->fs_info->extent_root)
- root = root->fs_info->tree_root;
-
- do_chunk_alloc(trans, root->fs_info->extent_root,
- 2 * 1024 * 1024, btrfs_get_alloc_profile(root, 0),
- CHUNK_ALLOC_NO_FORCE);
-
- delayed_refs = &trans->transaction->delayed_refs;
- INIT_LIST_HEAD(&cluster);
-again:
- consider_waiting = 0;
- spin_lock(&delayed_refs->lock);
- if (count == 0) {
- count = delayed_refs->num_entries * 2;
- run_most = 1;
- }
- while (1) {
- if (!(run_all || run_most) &&
- delayed_refs->num_heads_ready < 64)
- break;
-
- /*
- * go find something we can process in the rbtree. We start at
- * the beginning of the tree, and then build a cluster
- * of refs to process starting at the first one we are able to
- * lock
- */
- delayed_start = delayed_refs->run_delayed_start;
- ret = btrfs_find_ref_cluster(trans, &cluster,
- delayed_refs->run_delayed_start);
- if (ret)
- break;
-
- if (delayed_start >= delayed_refs->run_delayed_start) {
- if (consider_waiting == 0) {
- /*
- * btrfs_find_ref_cluster looped. let's do one
- * more cycle. if we don't run any delayed ref
- * during that cycle (because we can't because
- * all of them are blocked) and if the number of
- * refs doesn't change, we avoid busy waiting.
- */
- consider_waiting = 1;
- num_refs = delayed_refs->num_entries;
- } else {
- wait_for_more_refs(delayed_refs, num_refs);
- /*
- * after waiting, things have changed. we
- * dropped the lock and someone else might have
- * run some refs, built new clusters and so on.
- * therefore, we restart staleness detection.
- */
- consider_waiting = 0;
- }
- }
-
- ret = run_clustered_refs(trans, root, &cluster);
- if (ret < 0) {
- spin_unlock(&delayed_refs->lock);
- btrfs_abort_transaction(trans, root, ret);
- return ret;
- }
-
- count -= min_t(unsigned long, ret, count);
-
- if (count == 0)
- break;
-
- if (ret || delayed_refs->run_delayed_start == 0) {
- /* refs were run, let's reset staleness detection */
- consider_waiting = 0;
- }
- }
-
- if (run_all) {
- node = rb_first(&delayed_refs->root);
- if (!node)
- goto out;
- count = (unsigned long)-1;
-
- while (node) {
- ref = rb_entry(node, struct btrfs_delayed_ref_node,
- rb_node);
- if (btrfs_delayed_ref_is_head(ref)) {
- struct btrfs_delayed_ref_head *head;
-
- head = btrfs_delayed_node_to_head(ref);
- atomic_inc(&ref->refs);
-
- spin_unlock(&delayed_refs->lock);
- /*
- * Mutex was contended, block until it's
- * released and try again
- */
- mutex_lock(&head->mutex);
- mutex_unlock(&head->mutex);
-
- btrfs_put_delayed_ref(ref);
- cond_resched();
- goto again;
- }
- node = rb_next(node);
- }
- spin_unlock(&delayed_refs->lock);
- schedule_timeout(1);
- goto again;
- }
-out:
- spin_unlock(&delayed_refs->lock);
- return 0;
-}
-
-int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- u64 bytenr, u64 num_bytes, u64 flags,
- int is_data)
-{
- struct btrfs_delayed_extent_op *extent_op;
- int ret;
-
- extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS);
- if (!extent_op)
- return -ENOMEM;
-
- extent_op->flags_to_set = flags;
- extent_op->update_flags = 1;
- extent_op->update_key = 0;
- extent_op->is_data = is_data ? 1 : 0;
-
- ret = btrfs_add_delayed_extent_op(root->fs_info, trans, bytenr,
- num_bytes, extent_op);
- if (ret)
- kfree(extent_op);
- return ret;
-}
-
-static noinline int check_delayed_ref(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_path *path,
- u64 objectid, u64 offset, u64 bytenr)
-{
- struct btrfs_delayed_ref_head *head;
- struct btrfs_delayed_ref_node *ref;
- struct btrfs_delayed_data_ref *data_ref;
- struct btrfs_delayed_ref_root *delayed_refs;
- struct rb_node *node;
- int ret = 0;
-
- ret = -ENOENT;
- delayed_refs = &trans->transaction->delayed_refs;
- spin_lock(&delayed_refs->lock);
- head = btrfs_find_delayed_ref_head(trans, bytenr);
- if (!head)
- goto out;
-
- if (!mutex_trylock(&head->mutex)) {
- atomic_inc(&head->node.refs);
- spin_unlock(&delayed_refs->lock);
-
- btrfs_release_path(path);
-
- /*
- * Mutex was contended, block until it's released and let
- * caller try again
- */
- mutex_lock(&head->mutex);
- mutex_unlock(&head->mutex);
- btrfs_put_delayed_ref(&head->node);
- return -EAGAIN;
- }
-
- node = rb_prev(&head->node.rb_node);
- if (!node)
- goto out_unlock;
-
- ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
-
- if (ref->bytenr != bytenr)
- goto out_unlock;
-
- ret = 1;
- if (ref->type != BTRFS_EXTENT_DATA_REF_KEY)
- goto out_unlock;
-
- data_ref = btrfs_delayed_node_to_data_ref(ref);
-
- node = rb_prev(node);
- if (node) {
- ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
- if (ref->bytenr == bytenr)
- goto out_unlock;
- }
-
- if (data_ref->root != root->root_key.objectid ||
- data_ref->objectid != objectid || data_ref->offset != offset)
- goto out_unlock;
-
- ret = 0;
-out_unlock:
- mutex_unlock(&head->mutex);
-out:
- spin_unlock(&delayed_refs->lock);
- return ret;
-}
-
-static noinline int check_committed_ref(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_path *path,
- u64 objectid, u64 offset, u64 bytenr)
-{
- struct btrfs_root *extent_root = root->fs_info->extent_root;
- struct extent_buffer *leaf;
- struct btrfs_extent_data_ref *ref;
- struct btrfs_extent_inline_ref *iref;
- struct btrfs_extent_item *ei;
- struct btrfs_key key;
- u32 item_size;
- int ret;
-
- key.objectid = bytenr;
- key.offset = (u64)-1;
- key.type = BTRFS_EXTENT_ITEM_KEY;
-
- ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
- if (ret < 0)
- goto out;
- BUG_ON(ret == 0); /* Corruption */
-
- ret = -ENOENT;
- if (path->slots[0] == 0)
- goto out;
-
- path->slots[0]--;
- leaf = path->nodes[0];
- btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
-
- if (key.objectid != bytenr || key.type != BTRFS_EXTENT_ITEM_KEY)
- goto out;
-
- ret = 1;
- item_size = btrfs_item_size_nr(leaf, path->slots[0]);
-#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
- if (item_size < sizeof(*ei)) {
- WARN_ON(item_size != sizeof(struct btrfs_extent_item_v0));
- goto out;
- }
-#endif
- ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
-
- if (item_size != sizeof(*ei) +
- btrfs_extent_inline_ref_size(BTRFS_EXTENT_DATA_REF_KEY))
- goto out;
-
- if (btrfs_extent_generation(leaf, ei) <=
- btrfs_root_last_snapshot(&root->root_item))
- goto out;
-
- iref = (struct btrfs_extent_inline_ref *)(ei + 1);
- if (btrfs_extent_inline_ref_type(leaf, iref) !=
- BTRFS_EXTENT_DATA_REF_KEY)
- goto out;
-
- ref = (struct btrfs_extent_data_ref *)(&iref->offset);
- if (btrfs_extent_refs(leaf, ei) !=
- btrfs_extent_data_ref_count(leaf, ref) ||
- btrfs_extent_data_ref_root(leaf, ref) !=
- root->root_key.objectid ||
- btrfs_extent_data_ref_objectid(leaf, ref) != objectid ||
- btrfs_extent_data_ref_offset(leaf, ref) != offset)
- goto out;
-
- ret = 0;
-out:
- return ret;
-}
-
-int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- u64 objectid, u64 offset, u64 bytenr)
-{
- struct btrfs_path *path;
- int ret;
- int ret2;
-
- path = btrfs_alloc_path();
- if (!path)
- return -ENOENT;
-
- do {
- ret = check_committed_ref(trans, root, path, objectid,
- offset, bytenr);
- if (ret && ret != -ENOENT)
- goto out;
-
- ret2 = check_delayed_ref(trans, root, path, objectid,
- offset, bytenr);
- } while (ret2 == -EAGAIN);
-
- if (ret2 && ret2 != -ENOENT) {
- ret = ret2;
- goto out;
- }
-
- if (ret != -ENOENT || ret2 != -ENOENT)
- ret = 0;
-out:
- btrfs_free_path(path);
- if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
- WARN_ON(ret > 0);
- return ret;
-}
-
-static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct extent_buffer *buf,
- int full_backref, int inc, int for_cow)
-{
- u64 bytenr;
- u64 num_bytes;
- u64 parent;
- u64 ref_root;
- u32 nritems;
- struct btrfs_key key;
- struct btrfs_file_extent_item *fi;
- int i;
- int level;
- int ret = 0;
- int (*process_func)(struct btrfs_trans_handle *, struct btrfs_root *,
- u64, u64, u64, u64, u64, u64, int);
-
- ref_root = btrfs_header_owner(buf);
- nritems = btrfs_header_nritems(buf);
- level = btrfs_header_level(buf);
-
- if (!root->ref_cows && level == 0)
- return 0;
-
- if (inc)
- process_func = btrfs_inc_extent_ref;
- else
- process_func = btrfs_free_extent;
-
- if (full_backref)
- parent = buf->start;
- else
- parent = 0;
-
- for (i = 0; i < nritems; i++) {
- if (level == 0) {
- btrfs_item_key_to_cpu(buf, &key, i);
- if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
- continue;
- fi = btrfs_item_ptr(buf, i,
- struct btrfs_file_extent_item);
- if (btrfs_file_extent_type(buf, fi) ==
- BTRFS_FILE_EXTENT_INLINE)
- continue;
- bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
- if (bytenr == 0)
- continue;
-
- num_bytes = btrfs_file_extent_disk_num_bytes(buf, fi);
- key.offset -= btrfs_file_extent_offset(buf, fi);
- ret = process_func(trans, root, bytenr, num_bytes,
- parent, ref_root, key.objectid,
- key.offset, for_cow);
- if (ret)
- goto fail;
- } else {
- bytenr = btrfs_node_blockptr(buf, i);
- num_bytes = btrfs_level_size(root, level - 1);
- ret = process_func(trans, root, bytenr, num_bytes,
- parent, ref_root, level - 1, 0,
- for_cow);
- if (ret)
- goto fail;
- }
- }
- return 0;
-fail:
- return ret;
-}
-
-int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
- struct extent_buffer *buf, int full_backref, int for_cow)
-{
- return __btrfs_mod_ref(trans, root, buf, full_backref, 1, for_cow);
-}
-
-int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
- struct extent_buffer *buf, int full_backref, int for_cow)
-{
- return __btrfs_mod_ref(trans, root, buf, full_backref, 0, for_cow);
-}
-
-static int write_one_cache_group(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_path *path,
- struct btrfs_block_group_cache *cache)
-{
- int ret;
- struct btrfs_root *extent_root = root->fs_info->extent_root;
- unsigned long bi;
- struct extent_buffer *leaf;
-
- ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1);
- if (ret < 0)
- goto fail;
- BUG_ON(ret); /* Corruption */
-
- leaf = path->nodes[0];
- bi = btrfs_item_ptr_offset(leaf, path->slots[0]);
- write_extent_buffer(leaf, &cache->item, bi, sizeof(cache->item));
- btrfs_mark_buffer_dirty(leaf);
- btrfs_release_path(path);
-fail:
- if (ret) {
- btrfs_abort_transaction(trans, root, ret);
- return ret;
- }
- return 0;
-
-}
-
-static struct btrfs_block_group_cache *
-next_block_group(struct btrfs_root *root,
- struct btrfs_block_group_cache *cache)
-{
- struct rb_node *node;
- spin_lock(&root->fs_info->block_group_cache_lock);
- node = rb_next(&cache->cache_node);
- btrfs_put_block_group(cache);
- if (node) {
- cache = rb_entry(node, struct btrfs_block_group_cache,
- cache_node);
- btrfs_get_block_group(cache);
- } else
- cache = NULL;
- spin_unlock(&root->fs_info->block_group_cache_lock);
- return cache;
-}
-
-static int cache_save_setup(struct btrfs_block_group_cache *block_group,
- struct btrfs_trans_handle *trans,
- struct btrfs_path *path)
-{
- struct btrfs_root *root = block_group->fs_info->tree_root;
- struct inode *inode = NULL;
- u64 alloc_hint = 0;
- int dcs = BTRFS_DC_ERROR;
- int num_pages = 0;
- int retries = 0;
- int ret = 0;
-
- /*
- * If this block group is smaller than 100 megs don't bother caching the
- * block group.
- */
- if (block_group->key.offset < (100 * 1024 * 1024)) {
- spin_lock(&block_group->lock);
- block_group->disk_cache_state = BTRFS_DC_WRITTEN;
- spin_unlock(&block_group->lock);
- return 0;
- }
-
-again:
- inode = lookup_free_space_inode(root, block_group, path);
- if (IS_ERR(inode) && PTR_ERR(inode) != -ENOENT) {
- ret = PTR_ERR(inode);
- btrfs_release_path(path);
- goto out;
- }
-
- if (IS_ERR(inode)) {
- BUG_ON(retries);
- retries++;
-
- if (block_group->ro)
- goto out_free;
-
- ret = create_free_space_inode(root, trans, block_group, path);
- if (ret)
- goto out_free;
- goto again;
- }
-
- /* We've already setup this transaction, go ahead and exit */
- if (block_group->cache_generation == trans->transid &&
- i_size_read(inode)) {
- dcs = BTRFS_DC_SETUP;
- goto out_put;
- }
-
- /*
- * We want to set the generation to 0, that way if anything goes wrong
- * from here on out we know not to trust this cache when we load up next
- * time.
- */
- BTRFS_I(inode)->generation = 0;
- ret = btrfs_update_inode(trans, root, inode);
- WARN_ON(ret);
-
- if (i_size_read(inode) > 0) {
- ret = btrfs_truncate_free_space_cache(root, trans, path,
- inode);
- if (ret)
- goto out_put;
- }
-
- spin_lock(&block_group->lock);
- if (block_group->cached != BTRFS_CACHE_FINISHED) {
- /* We're not cached, don't bother trying to write stuff out */
- dcs = BTRFS_DC_WRITTEN;
- spin_unlock(&block_group->lock);
- goto out_put;
- }
- spin_unlock(&block_group->lock);
-
- num_pages = (int)div64_u64(block_group->key.offset, 1024 * 1024 * 1024);
- if (!num_pages)
- num_pages = 1;
-
- /*
- * Just to make absolutely sure we have enough space, we're going to
- * preallocate 12 pages worth of space for each block group. In
- * practice we ought to use at most 8, but we need extra space so we can
- * add our header and have a terminator between the extents and the
- * bitmaps.
- */
- num_pages *= 16;
- num_pages *= PAGE_CACHE_SIZE;
-
- ret = btrfs_check_data_free_space(inode, num_pages);
- if (ret)
- goto out_put;
-
- ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, num_pages,
- num_pages, num_pages,
- &alloc_hint);
- if (!ret)
- dcs = BTRFS_DC_SETUP;
- btrfs_free_reserved_data_space(inode, num_pages);
-
-out_put:
- iput(inode);
-out_free:
- btrfs_release_path(path);
-out:
- spin_lock(&block_group->lock);
- if (!ret && dcs == BTRFS_DC_SETUP)
- block_group->cache_generation = trans->transid;
- block_group->disk_cache_state = dcs;
- spin_unlock(&block_group->lock);
-
- return ret;
-}
-
-int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
- struct btrfs_root *root)
-{
- struct btrfs_block_group_cache *cache;
- int err = 0;
- struct btrfs_path *path;
- u64 last = 0;
-
- path = btrfs_alloc_path();
- if (!path)
- return -ENOMEM;
-
-again:
- while (1) {
- cache = btrfs_lookup_first_block_group(root->fs_info, last);
- while (cache) {
- if (cache->disk_cache_state == BTRFS_DC_CLEAR)
- break;
- cache = next_block_group(root, cache);
- }
- if (!cache) {
- if (last == 0)
- break;
- last = 0;
- continue;
- }
- err = cache_save_setup(cache, trans, path);
- last = cache->key.objectid + cache->key.offset;
- btrfs_put_block_group(cache);
- }
-
- while (1) {
- if (last == 0) {
- err = btrfs_run_delayed_refs(trans, root,
- (unsigned long)-1);
- if (err) /* File system offline */
- goto out;
- }
-
- cache = btrfs_lookup_first_block_group(root->fs_info, last);
- while (cache) {
- if (cache->disk_cache_state == BTRFS_DC_CLEAR) {
- btrfs_put_block_group(cache);
- goto again;
- }
-
- if (cache->dirty)
- break;
- cache = next_block_group(root, cache);
- }
- if (!cache) {
- if (last == 0)
- break;
- last = 0;
- continue;
- }
-
- if (cache->disk_cache_state == BTRFS_DC_SETUP)
- cache->disk_cache_state = BTRFS_DC_NEED_WRITE;
- cache->dirty = 0;
- last = cache->key.objectid + cache->key.offset;
-
- err = write_one_cache_group(trans, root, path, cache);
- if (err) /* File system offline */
- goto out;
-
- btrfs_put_block_group(cache);
- }
-
- while (1) {
- /*
- * I don't think this is needed since we're just marking our
- * preallocated extent as written, but just in case it can't
- * hurt.
- */
- if (last == 0) {
- err = btrfs_run_delayed_refs(trans, root,
- (unsigned long)-1);
- if (err) /* File system offline */
- goto out;
- }
-
- cache = btrfs_lookup_first_block_group(root->fs_info, last);
- while (cache) {
- /*
- * Really this shouldn't happen, but it could if we
- * couldn't write the entire preallocated extent and
- * splitting the extent resulted in a new block.
- */
- if (cache->dirty) {
- btrfs_put_block_group(cache);
- goto again;
- }
- if (cache->disk_cache_state == BTRFS_DC_NEED_WRITE)
- break;
- cache = next_block_group(root, cache);
- }
- if (!cache) {
- if (last == 0)
- break;
- last = 0;
- continue;
- }
-
- err = btrfs_write_out_cache(root, trans, cache, path);
-
- /*
- * If we didn't have an error then the cache state is still
- * NEED_WRITE, so we can set it to WRITTEN.
- */
- if (!err && cache->disk_cache_state == BTRFS_DC_NEED_WRITE)
- cache->disk_cache_state = BTRFS_DC_WRITTEN;
- last = cache->key.objectid + cache->key.offset;
- btrfs_put_block_group(cache);
- }
-out:
-
- btrfs_free_path(path);
- return err;
-}
-
-int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr)
-{
- struct btrfs_block_group_cache *block_group;
- int readonly = 0;
-
- block_group = btrfs_lookup_block_group(root->fs_info, bytenr);
- if (!block_group || block_group->ro)
- readonly = 1;
- if (block_group)
- btrfs_put_block_group(block_group);
- return readonly;
-}
-
-static int update_space_info(struct btrfs_fs_info *info, u64 flags,
- u64 total_bytes, u64 bytes_used,
- struct btrfs_space_info **space_info)
-{
- struct btrfs_space_info *found;
- int i;
- int factor;
-
- if (flags & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1 |
- BTRFS_BLOCK_GROUP_RAID10))
- factor = 2;
- else
- factor = 1;
-
- found = __find_space_info(info, flags);
- if (found) {
- spin_lock(&found->lock);
- found->total_bytes += total_bytes;
- found->disk_total += total_bytes * factor;
- found->bytes_used += bytes_used;
- found->disk_used += bytes_used * factor;
- found->full = 0;
- spin_unlock(&found->lock);
- *space_info = found;
- return 0;
- }
- found = kzalloc(sizeof(*found), GFP_NOFS);
- if (!found)
- return -ENOMEM;
-
- for (i = 0; i < BTRFS_NR_RAID_TYPES; i++)
- INIT_LIST_HEAD(&found->block_groups[i]);
- init_rwsem(&found->groups_sem);
- spin_lock_init(&found->lock);
- found->flags = flags & BTRFS_BLOCK_GROUP_TYPE_MASK;
- found->total_bytes = total_bytes;
- found->disk_total = total_bytes * factor;
- found->bytes_used = bytes_used;
- found->disk_used = bytes_used * factor;
- found->bytes_pinned = 0;
- found->bytes_reserved = 0;
- found->bytes_readonly = 0;
- found->bytes_may_use = 0;
- found->full = 0;
- found->force_alloc = CHUNK_ALLOC_NO_FORCE;
- found->chunk_alloc = 0;
- found->flush = 0;
- init_waitqueue_head(&found->wait);
- *space_info = found;
- list_add_rcu(&found->list, &info->space_info);
- return 0;
-}
-
-static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
-{
- u64 extra_flags = chunk_to_extended(flags) &
- BTRFS_EXTENDED_PROFILE_MASK;
-
- if (flags & BTRFS_BLOCK_GROUP_DATA)
- fs_info->avail_data_alloc_bits |= extra_flags;
- if (flags & BTRFS_BLOCK_GROUP_METADATA)
- fs_info->avail_metadata_alloc_bits |= extra_flags;
- if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
- fs_info->avail_system_alloc_bits |= extra_flags;
-}
-
-/*
- * returns target flags in extended format or 0 if restripe for this
- * chunk_type is not in progress
- *
- * should be called with either volume_mutex or balance_lock held
- */
-static u64 get_restripe_target(struct btrfs_fs_info *fs_info, u64 flags)
-{
- struct btrfs_balance_control *bctl = fs_info->balance_ctl;
- u64 target = 0;
-
- if (!bctl)
- return 0;
-
- if (flags & BTRFS_BLOCK_GROUP_DATA &&
- bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT) {
- target = BTRFS_BLOCK_GROUP_DATA | bctl->data.target;
- } else if (flags & BTRFS_BLOCK_GROUP_SYSTEM &&
- bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) {
- target = BTRFS_BLOCK_GROUP_SYSTEM | bctl->sys.target;
- } else if (flags & BTRFS_BLOCK_GROUP_METADATA &&
- bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT) {
- target = BTRFS_BLOCK_GROUP_METADATA | bctl->meta.target;
- }
-
- return target;
-}
-
-/*
- * @flags: available profiles in extended format (see ctree.h)
- *
- * Returns reduced profile in chunk format. If profile changing is in
- * progress (either running or paused) picks the target profile (if it's
- * already available), otherwise falls back to plain reducing.
- */
-u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags)
-{
- /*
- * we add in the count of missing devices because we want
- * to make sure that any RAID levels on a degraded FS
- * continue to be honored.
- */
- u64 num_devices = root->fs_info->fs_devices->rw_devices +
- root->fs_info->fs_devices->missing_devices;
- u64 target;
-
- /*
- * see if restripe for this chunk_type is in progress, if so
- * try to reduce to the target profile
- */
- spin_lock(&root->fs_info->balance_lock);
- target = get_restripe_target(root->fs_info, flags);
- if (target) {
- /* pick target profile only if it's already available */
- if ((flags & target) & BTRFS_EXTENDED_PROFILE_MASK) {
- spin_unlock(&root->fs_info->balance_lock);
- return extended_to_chunk(target);
- }
- }
- spin_unlock(&root->fs_info->balance_lock);
-
- if (num_devices == 1)
- flags &= ~(BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID0);
- if (num_devices < 4)
- flags &= ~BTRFS_BLOCK_GROUP_RAID10;
-
- if ((flags & BTRFS_BLOCK_GROUP_DUP) &&
- (flags & (BTRFS_BLOCK_GROUP_RAID1 |
- BTRFS_BLOCK_GROUP_RAID10))) {
- flags &= ~BTRFS_BLOCK_GROUP_DUP;
- }
-
- if ((flags & BTRFS_BLOCK_GROUP_RAID1) &&
- (flags & BTRFS_BLOCK_GROUP_RAID10)) {
- flags &= ~BTRFS_BLOCK_GROUP_RAID1;
- }
-
- if ((flags & BTRFS_BLOCK_GROUP_RAID0) &&
- ((flags & BTRFS_BLOCK_GROUP_RAID1) |
- (flags & BTRFS_BLOCK_GROUP_RAID10) |
- (flags & BTRFS_BLOCK_GROUP_DUP))) {
- flags &= ~BTRFS_BLOCK_GROUP_RAID0;
- }
-
- return extended_to_chunk(flags);
-}
-
-static u64 get_alloc_profile(struct btrfs_root *root, u64 flags)
-{
- if (flags & BTRFS_BLOCK_GROUP_DATA)
- flags |= root->fs_info->avail_data_alloc_bits;
- else if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
- flags |= root->fs_info->avail_system_alloc_bits;
- else if (flags & BTRFS_BLOCK_GROUP_METADATA)
- flags |= root->fs_info->avail_metadata_alloc_bits;
-
- return btrfs_reduce_alloc_profile(root, flags);
-}
-
-u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data)
-{
- u64 flags;
-
- if (data)
- flags = BTRFS_BLOCK_GROUP_DATA;
- else if (root == root->fs_info->chunk_root)
- flags = BTRFS_BLOCK_GROUP_SYSTEM;
- else
- flags = BTRFS_BLOCK_GROUP_METADATA;
-
- return get_alloc_profile(root, flags);
-}
-
-void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *inode)
-{
- BTRFS_I(inode)->space_info = __find_space_info(root->fs_info,
- BTRFS_BLOCK_GROUP_DATA);
-}
-
-/*
- * This will check the space that the inode allocates from to make sure we have
- * enough space for bytes.
- */
-int btrfs_check_data_free_space(struct inode *inode, u64 bytes)
-{
- struct btrfs_space_info *data_sinfo;
- struct btrfs_root *root = BTRFS_I(inode)->root;
- u64 used;
- int ret = 0, committed = 0, alloc_chunk = 1;
-
- /* make sure bytes are sectorsize aligned */
- bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1);
-
- if (root == root->fs_info->tree_root ||
- BTRFS_I(inode)->location.objectid == BTRFS_FREE_INO_OBJECTID) {
- alloc_chunk = 0;
- committed = 1;
- }
-
- data_sinfo = BTRFS_I(inode)->space_info;
- if (!data_sinfo)
- goto alloc;
-
-again:
- /* make sure we have enough space to handle the data first */
- spin_lock(&data_sinfo->lock);
- used = data_sinfo->bytes_used + data_sinfo->bytes_reserved +
- data_sinfo->bytes_pinned + data_sinfo->bytes_readonly +
- data_sinfo->bytes_may_use;
-
- if (used + bytes > data_sinfo->total_bytes) {
- struct btrfs_trans_handle *trans;
-
- /*
- * if we don't have enough free bytes in this space then we need
- * to alloc a new chunk.
- */
- if (!data_sinfo->full && alloc_chunk) {
- u64 alloc_target;
-
- data_sinfo->force_alloc = CHUNK_ALLOC_FORCE;
- spin_unlock(&data_sinfo->lock);
-alloc:
- alloc_target = btrfs_get_alloc_profile(root, 1);
- trans = btrfs_join_transaction(root);
- if (IS_ERR(trans))
- return PTR_ERR(trans);
-
- ret = do_chunk_alloc(trans, root->fs_info->extent_root,
- bytes + 2 * 1024 * 1024,
- alloc_target,
- CHUNK_ALLOC_NO_FORCE);
- btrfs_end_transaction(trans, root);
- if (ret < 0) {
- if (ret != -ENOSPC)
- return ret;
- else
- goto commit_trans;
- }
-
- if (!data_sinfo) {
- btrfs_set_inode_space_info(root, inode);
- data_sinfo = BTRFS_I(inode)->space_info;
- }
- goto again;
- }
-
- /*
- * If we have less pinned bytes than we want to allocate then
- * don't bother committing the transaction, it won't help us.
- */
- if (data_sinfo->bytes_pinned < bytes)
- committed = 1;
- spin_unlock(&data_sinfo->lock);
-
- /* commit the current transaction and try again */
-commit_trans:
- if (!committed &&
- !atomic_read(&root->fs_info->open_ioctl_trans)) {
- committed = 1;
- trans = btrfs_join_transaction(root);
- if (IS_ERR(trans))
- return PTR_ERR(trans);
- ret = btrfs_commit_transaction(trans, root);
- if (ret)
- return ret;
- goto again;
- }
-
- return -ENOSPC;
- }
- data_sinfo->bytes_may_use += bytes;
- trace_btrfs_space_reservation(root->fs_info, "space_info",
- data_sinfo->flags, bytes, 1);
- spin_unlock(&data_sinfo->lock);
-
- return 0;
-}
-
-/*
- * Called if we need to clear a data reservation for this inode.
- */
-void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes)
-{
- struct btrfs_root *root = BTRFS_I(inode)->root;
- struct btrfs_space_info *data_sinfo;
-
- /* make sure bytes are sectorsize aligned */
- bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1);
-
- data_sinfo = BTRFS_I(inode)->space_info;
- spin_lock(&data_sinfo->lock);
- data_sinfo->bytes_may_use -= bytes;
- trace_btrfs_space_reservation(root->fs_info, "space_info",
- data_sinfo->flags, bytes, 0);
- spin_unlock(&data_sinfo->lock);
-}
-
-static void force_metadata_allocation(struct btrfs_fs_info *info)
-{
- struct list_head *head = &info->space_info;
- struct btrfs_space_info *found;
-
- rcu_read_lock();
- list_for_each_entry_rcu(found, head, list) {
- if (found->flags & BTRFS_BLOCK_GROUP_METADATA)
- found->force_alloc = CHUNK_ALLOC_FORCE;
- }
- rcu_read_unlock();
-}
-
-static int should_alloc_chunk(struct btrfs_root *root,
- struct btrfs_space_info *sinfo, u64 alloc_bytes,
- int force)
-{
- struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv;
- u64 num_bytes = sinfo->total_bytes - sinfo->bytes_readonly;
- u64 num_allocated = sinfo->bytes_used + sinfo->bytes_reserved;
- u64 thresh;
-
- if (force == CHUNK_ALLOC_FORCE)
- return 1;
-
- /*
- * We need to take into account the global rsv because for all intents
- * and purposes it's used space. Don't worry about locking the
- * global_rsv, it doesn't change except when the transaction commits.
- */
- num_allocated += global_rsv->size;
-
- /*
- * in limited mode, we want to have some free space up to
- * about 1% of the FS size.
- */
- if (force == CHUNK_ALLOC_LIMITED) {
- thresh = btrfs_super_total_bytes(root->fs_info->super_copy);
- thresh = max_t(u64, 64 * 1024 * 1024,
- div_factor_fine(thresh, 1));
-
- if (num_bytes - num_allocated < thresh)
- return 1;
- }
- thresh = btrfs_super_total_bytes(root->fs_info->super_copy);
-
- /* 256MB or 2% of the FS */
- thresh = max_t(u64, 256 * 1024 * 1024, div_factor_fine(thresh, 2));
- /* system chunks need a much small threshold */
- if (sinfo->flags & BTRFS_BLOCK_GROUP_SYSTEM)
- thresh = 32 * 1024 * 1024;
-
- if (num_bytes > thresh && sinfo->bytes_used < div_factor(num_bytes, 8))
- return 0;
- return 1;
-}
-
-static u64 get_system_chunk_thresh(struct btrfs_root *root, u64 type)
-{
- u64 num_dev;
-
- if (type & BTRFS_BLOCK_GROUP_RAID10 ||
- type & BTRFS_BLOCK_GROUP_RAID0)
- num_dev = root->fs_info->fs_devices->rw_devices;
- else if (type & BTRFS_BLOCK_GROUP_RAID1)
- num_dev = 2;
- else
- num_dev = 1; /* DUP or single */
-
- /* metadata for updaing devices and chunk tree */
- return btrfs_calc_trans_metadata_size(root, num_dev + 1);
-}
-
-static void check_system_chunk(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, u64 type)
-{
- struct btrfs_space_info *info;
- u64 left;
- u64 thresh;
-
- info = __find_space_info(root->fs_info, BTRFS_BLOCK_GROUP_SYSTEM);
- spin_lock(&info->lock);
- left = info->total_bytes - info->bytes_used - info->bytes_pinned -
- info->bytes_reserved - info->bytes_readonly;
- spin_unlock(&info->lock);
-
- thresh = get_system_chunk_thresh(root, type);
- if (left < thresh && btrfs_test_opt(root, ENOSPC_DEBUG)) {
- printk(KERN_INFO "left=%llu, need=%llu, flags=%llu\n",
- left, thresh, type);
- dump_space_info(info, 0, 0);
- }
-
- if (left < thresh) {
- u64 flags;
-
- flags = btrfs_get_alloc_profile(root->fs_info->chunk_root, 0);
- btrfs_alloc_chunk(trans, root, flags);
- }
-}
-
-static int do_chunk_alloc(struct btrfs_trans_handle *trans,
- struct btrfs_root *extent_root, u64 alloc_bytes,
- u64 flags, int force)
-{
- struct btrfs_space_info *space_info;
- struct btrfs_fs_info *fs_info = extent_root->fs_info;
- int wait_for_alloc = 0;
- int ret = 0;
-
- space_info = __find_space_info(extent_root->fs_info, flags);
- if (!space_info) {
- ret = update_space_info(extent_root->fs_info, flags,
- 0, 0, &space_info);
- BUG_ON(ret); /* -ENOMEM */
- }
- BUG_ON(!space_info); /* Logic error */
-
-again:
- spin_lock(&space_info->lock);
- if (force < space_info->force_alloc)
- force = space_info->force_alloc;
- if (space_info->full) {
- spin_unlock(&space_info->lock);
- return 0;
- }
-
- if (!should_alloc_chunk(extent_root, space_info, alloc_bytes, force)) {
- spin_unlock(&space_info->lock);
- return 0;
- } else if (space_info->chunk_alloc) {
- wait_for_alloc = 1;
- } else {
- space_info->chunk_alloc = 1;
- }
-
- spin_unlock(&space_info->lock);
-
- mutex_lock(&fs_info->chunk_mutex);
-
- /*
- * The chunk_mutex is held throughout the entirety of a chunk
- * allocation, so once we've acquired the chunk_mutex we know that the
- * other guy is done and we need to recheck and see if we should
- * allocate.
- */
- if (wait_for_alloc) {
- mutex_unlock(&fs_info->chunk_mutex);
- wait_for_alloc = 0;
- goto again;
- }
-
- /*
- * If we have mixed data/metadata chunks we want to make sure we keep
- * allocating mixed chunks instead of individual chunks.
- */
- if (btrfs_mixed_space_info(space_info))
- flags |= (BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_METADATA);
-
- /*
- * if we're doing a data chunk, go ahead and make sure that
- * we keep a reasonable number of metadata chunks allocated in the
- * FS as well.
- */
- if (flags & BTRFS_BLOCK_GROUP_DATA && fs_info->metadata_ratio) {
- fs_info->data_chunk_allocations++;
- if (!(fs_info->data_chunk_allocations %
- fs_info->metadata_ratio))
- force_metadata_allocation(fs_info);
- }
-
- /*
- * Check if we have enough space in SYSTEM chunk because we may need
- * to update devices.
- */
- check_system_chunk(trans, extent_root, flags);
-
- ret = btrfs_alloc_chunk(trans, extent_root, flags);
- if (ret < 0 && ret != -ENOSPC)
- goto out;
-
- spin_lock(&space_info->lock);
- if (ret)
- space_info->full = 1;
- else
- ret = 1;
-
- space_info->force_alloc = CHUNK_ALLOC_NO_FORCE;
- space_info->chunk_alloc = 0;
- spin_unlock(&space_info->lock);
-out:
- mutex_unlock(&extent_root->fs_info->chunk_mutex);
- return ret;
-}
-
-/*
- * shrink metadata reservation for delalloc
- */
-static int shrink_delalloc(struct btrfs_root *root, u64 to_reclaim,
- bool wait_ordered)
-{
- struct btrfs_block_rsv *block_rsv;
- struct btrfs_space_info *space_info;
- struct btrfs_trans_handle *trans;
- u64 reserved;
- u64 max_reclaim;
- u64 reclaimed = 0;
- long time_left;
- unsigned long nr_pages = (2 * 1024 * 1024) >> PAGE_CACHE_SHIFT;
- int loops = 0;
- unsigned long progress;
-
- trans = (struct btrfs_trans_handle *)current->journal_info;
- block_rsv = &root->fs_info->delalloc_block_rsv;
- space_info = block_rsv->space_info;
-
- smp_mb();
- reserved = space_info->bytes_may_use;
- progress = space_info->reservation_progress;
-
- if (reserved == 0)
- return 0;
-
- smp_mb();
- if (root->fs_info->delalloc_bytes == 0) {
- if (trans)
- return 0;
- btrfs_wait_ordered_extents(root, 0, 0);
- return 0;
- }
-
- max_reclaim = min(reserved, to_reclaim);
- nr_pages = max_t(unsigned long, nr_pages,
- max_reclaim >> PAGE_CACHE_SHIFT);
- while (loops < 1024) {
- /* have the flusher threads jump in and do some IO */
- smp_mb();
- nr_pages = min_t(unsigned long, nr_pages,
- root->fs_info->delalloc_bytes >> PAGE_CACHE_SHIFT);
- writeback_inodes_sb_nr_if_idle(root->fs_info->sb, nr_pages,
- WB_REASON_FS_FREE_SPACE);
-
- spin_lock(&space_info->lock);
- if (reserved > space_info->bytes_may_use)
- reclaimed += reserved - space_info->bytes_may_use;
- reserved = space_info->bytes_may_use;
- spin_unlock(&space_info->lock);
-
- loops++;
-
- if (reserved == 0 || reclaimed >= max_reclaim)
- break;
-
- if (trans && trans->transaction->blocked)
- return -EAGAIN;
-
- if (wait_ordered && !trans) {
- btrfs_wait_ordered_extents(root, 0, 0);
- } else {
- time_left = schedule_timeout_interruptible(1);
-
- /* We were interrupted, exit */
- if (time_left)
- break;
- }
-
- /* we've kicked the IO a few times, if anything has been freed,
- * exit. There is no sense in looping here for a long time
- * when we really need to commit the transaction, or there are
- * just too many writers without enough free space
- */
-
- if (loops > 3) {
- smp_mb();
- if (progress != space_info->reservation_progress)
- break;
- }
-
- }
-
- return reclaimed >= to_reclaim;
-}
-
-/**
- * maybe_commit_transaction - possibly commit the transaction if its ok to
- * @root - the root we're allocating for
- * @bytes - the number of bytes we want to reserve
- * @force - force the commit
- *
- * This will check to make sure that committing the transaction will actually
- * get us somewhere and then commit the transaction if it does. Otherwise it
- * will return -ENOSPC.
- */
-static int may_commit_transaction(struct btrfs_root *root,
- struct btrfs_space_info *space_info,
- u64 bytes, int force)
-{
- struct btrfs_block_rsv *delayed_rsv = &root->fs_info->delayed_block_rsv;
- struct btrfs_trans_handle *trans;
-
- trans = (struct btrfs_trans_handle *)current->journal_info;
- if (trans)
- return -EAGAIN;
-
- if (force)
- goto commit;
-
- /* See if there is enough pinned space to make this reservation */
- spin_lock(&space_info->lock);
- if (space_info->bytes_pinned >= bytes) {
- spin_unlock(&space_info->lock);
- goto commit;
- }
- spin_unlock(&space_info->lock);
-
- /*
- * See if there is some space in the delayed insertion reservation for
- * this reservation.
- */
- if (space_info != delayed_rsv->space_info)
- return -ENOSPC;
-
- spin_lock(&space_info->lock);
- spin_lock(&delayed_rsv->lock);
- if (space_info->bytes_pinned + delayed_rsv->size < bytes) {
- spin_unlock(&delayed_rsv->lock);
- spin_unlock(&space_info->lock);
- return -ENOSPC;
- }
- spin_unlock(&delayed_rsv->lock);
- spin_unlock(&space_info->lock);
-
-commit:
- trans = btrfs_join_transaction(root);
- if (IS_ERR(trans))
- return -ENOSPC;
-
- return btrfs_commit_transaction(trans, root);
-}
-
-/**
- * reserve_metadata_bytes - try to reserve bytes from the block_rsv's space
- * @root - the root we're allocating for
- * @block_rsv - the block_rsv we're allocating for
- * @orig_bytes - the number of bytes we want
- * @flush - wether or not we can flush to make our reservation
- *
- * This will reserve orgi_bytes number of bytes from the space info associated
- * with the block_rsv. If there is not enough space it will make an attempt to
- * flush out space to make room. It will do this by flushing delalloc if
- * possible or committing the transaction. If flush is 0 then no attempts to
- * regain reservations will be made and this will fail if there is not enough
- * space already.
- */
-static int reserve_metadata_bytes(struct btrfs_root *root,
- struct btrfs_block_rsv *block_rsv,
- u64 orig_bytes, int flush)
-{
- struct btrfs_space_info *space_info = block_rsv->space_info;
- u64 used;
- u64 num_bytes = orig_bytes;
- int retries = 0;
- int ret = 0;
- bool committed = false;
- bool flushing = false;
- bool wait_ordered = false;
-
-again:
- ret = 0;
- spin_lock(&space_info->lock);
- /*
- * We only want to wait if somebody other than us is flushing and we are
- * actually alloed to flush.
- */
- while (flush && !flushing && space_info->flush) {
- spin_unlock(&space_info->lock);
- /*
- * If we have a trans handle we can't wait because the flusher
- * may have to commit the transaction, which would mean we would
- * deadlock since we are waiting for the flusher to finish, but
- * hold the current transaction open.
- */
- if (current->journal_info)
- return -EAGAIN;
- ret = wait_event_killable(space_info->wait, !space_info->flush);
- /* Must have been killed, return */
- if (ret)
- return -EINTR;
-
- spin_lock(&space_info->lock);
- }
-
- ret = -ENOSPC;
- used = space_info->bytes_used + space_info->bytes_reserved +
- space_info->bytes_pinned + space_info->bytes_readonly +
- space_info->bytes_may_use;
-
- /*
- * The idea here is that we've not already over-reserved the block group
- * then we can go ahead and save our reservation first and then start
- * flushing if we need to. Otherwise if we've already overcommitted
- * lets start flushing stuff first and then come back and try to make
- * our reservation.
- */
- if (used <= space_info->total_bytes) {
- if (used + orig_bytes <= space_info->total_bytes) {
- space_info->bytes_may_use += orig_bytes;
- trace_btrfs_space_reservation(root->fs_info,
- "space_info", space_info->flags, orig_bytes, 1);
- ret = 0;
- } else {
- /*
- * Ok set num_bytes to orig_bytes since we aren't
- * overocmmitted, this way we only try and reclaim what
- * we need.
- */
- num_bytes = orig_bytes;
- }
- } else {
- /*
- * Ok we're over committed, set num_bytes to the overcommitted
- * amount plus the amount of bytes that we need for this
- * reservation.
- */
- wait_ordered = true;
- num_bytes = used - space_info->total_bytes +
- (orig_bytes * (retries + 1));
- }
-
- if (ret) {
- u64 profile = btrfs_get_alloc_profile(root, 0);
- u64 avail;
-
- /*
- * If we have a lot of space that's pinned, don't bother doing
- * the overcommit dance yet and just commit the transaction.
- */
- avail = (space_info->total_bytes - space_info->bytes_used) * 8;
- do_div(avail, 10);
- if (space_info->bytes_pinned >= avail && flush && !committed) {
- space_info->flush = 1;
- flushing = true;
- spin_unlock(&space_info->lock);
- ret = may_commit_transaction(root, space_info,
- orig_bytes, 1);
- if (ret)
- goto out;
- committed = true;
- goto again;
- }
-
- spin_lock(&root->fs_info->free_chunk_lock);
- avail = root->fs_info->free_chunk_space;
-
- /*
- * If we have dup, raid1 or raid10 then only half of the free
- * space is actually useable.
- */
- if (profile & (BTRFS_BLOCK_GROUP_DUP |
- BTRFS_BLOCK_GROUP_RAID1 |
- BTRFS_BLOCK_GROUP_RAID10))
- avail >>= 1;
-
- /*
- * If we aren't flushing don't let us overcommit too much, say
- * 1/8th of the space. If we can flush, let it overcommit up to
- * 1/2 of the space.
- */
- if (flush)
- avail >>= 3;
- else
- avail >>= 1;
- spin_unlock(&root->fs_info->free_chunk_lock);
-
- if (used + num_bytes < space_info->total_bytes + avail) {
- space_info->bytes_may_use += orig_bytes;
- trace_btrfs_space_reservation(root->fs_info,
- "space_info", space_info->flags, orig_bytes, 1);
- ret = 0;
- } else {
- wait_ordered = true;
- }
- }
-
- /*
- * Couldn't make our reservation, save our place so while we're trying
- * to reclaim space we can actually use it instead of somebody else
- * stealing it from us.
- */
- if (ret && flush) {
- flushing = true;
- space_info->flush = 1;
- }
-
- spin_unlock(&space_info->lock);
-
- if (!ret || !flush)
- goto out;
-
- /*
- * We do synchronous shrinking since we don't actually unreserve
- * metadata until after the IO is completed.
- */
- ret = shrink_delalloc(root, num_bytes, wait_ordered);
- if (ret < 0)
- goto out;
-
- ret = 0;
-
- /*
- * So if we were overcommitted it's possible that somebody else flushed
- * out enough space and we simply didn't have enough space to reclaim,
- * so go back around and try again.
- */
- if (retries < 2) {
- wait_ordered = true;
- retries++;
- goto again;
- }
-
- ret = -ENOSPC;
- if (committed)
- goto out;
-
- ret = may_commit_transaction(root, space_info, orig_bytes, 0);
- if (!ret) {
- committed = true;
- goto again;
- }
-
-out:
- if (flushing) {
- spin_lock(&space_info->lock);
- space_info->flush = 0;
- wake_up_all(&space_info->wait);
- spin_unlock(&space_info->lock);
- }
- return ret;
-}
-
-static struct btrfs_block_rsv *get_block_rsv(
- const struct btrfs_trans_handle *trans,
- const struct btrfs_root *root)
-{
- struct btrfs_block_rsv *block_rsv = NULL;
-
- if (root->ref_cows || root == root->fs_info->csum_root)
- block_rsv = trans->block_rsv;
-
- if (!block_rsv)
- block_rsv = root->block_rsv;
-
- if (!block_rsv)
- block_rsv = &root->fs_info->empty_block_rsv;
-
- return block_rsv;
-}
-
-static int block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv,
- u64 num_bytes)
-{
- int ret = -ENOSPC;
- spin_lock(&block_rsv->lock);
- if (block_rsv->reserved >= num_bytes) {
- block_rsv->reserved -= num_bytes;
- if (block_rsv->reserved < block_rsv->size)
- block_rsv->full = 0;
- ret = 0;
- }
- spin_unlock(&block_rsv->lock);
- return ret;
-}
-
-static void block_rsv_add_bytes(struct btrfs_block_rsv *block_rsv,
- u64 num_bytes, int update_size)
-{
- spin_lock(&block_rsv->lock);
- block_rsv->reserved += num_bytes;
- if (update_size)
- block_rsv->size += num_bytes;
- else if (block_rsv->reserved >= block_rsv->size)
- block_rsv->full = 1;
- spin_unlock(&block_rsv->lock);
-}
-
-static void block_rsv_release_bytes(struct btrfs_fs_info *fs_info,
- struct btrfs_block_rsv *block_rsv,
- struct btrfs_block_rsv *dest, u64 num_bytes)
-{
- struct btrfs_space_info *space_info = block_rsv->space_info;
-
- spin_lock(&block_rsv->lock);
- if (num_bytes == (u64)-1)
- num_bytes = block_rsv->size;
- block_rsv->size -= num_bytes;
- if (block_rsv->reserved >= block_rsv->size) {
- num_bytes = block_rsv->reserved - block_rsv->size;
- block_rsv->reserved = block_rsv->size;
- block_rsv->full = 1;
- } else {
- num_bytes = 0;
- }
- spin_unlock(&block_rsv->lock);
-
- if (num_bytes > 0) {
- if (dest) {
- spin_lock(&dest->lock);
- if (!dest->full) {
- u64 bytes_to_add;
-
- bytes_to_add = dest->size - dest->reserved;
- bytes_to_add = min(num_bytes, bytes_to_add);
- dest->reserved += bytes_to_add;
- if (dest->reserved >= dest->size)
- dest->full = 1;
- num_bytes -= bytes_to_add;
- }
- spin_unlock(&dest->lock);
- }
- if (num_bytes) {
- spin_lock(&space_info->lock);
- space_info->bytes_may_use -= num_bytes;
- trace_btrfs_space_reservation(fs_info, "space_info",
- space_info->flags, num_bytes, 0);
- space_info->reservation_progress++;
- spin_unlock(&space_info->lock);
- }
- }
-}
-
-static int block_rsv_migrate_bytes(struct btrfs_block_rsv *src,
- struct btrfs_block_rsv *dst, u64 num_bytes)
-{
- int ret;
-
- ret = block_rsv_use_bytes(src, num_bytes);
- if (ret)
- return ret;
-
- block_rsv_add_bytes(dst, num_bytes, 1);
- return 0;
-}
-
-void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv)
-{
- memset(rsv, 0, sizeof(*rsv));
- spin_lock_init(&rsv->lock);
-}
-
-struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_root *root)
-{
- struct btrfs_block_rsv *block_rsv;
- struct btrfs_fs_info *fs_info = root->fs_info;
-
- block_rsv = kmalloc(sizeof(*block_rsv), GFP_NOFS);
- if (!block_rsv)
- return NULL;
-
- btrfs_init_block_rsv(block_rsv);
- block_rsv->space_info = __find_space_info(fs_info,
- BTRFS_BLOCK_GROUP_METADATA);
- return block_rsv;
-}
-
-void btrfs_free_block_rsv(struct btrfs_root *root,
- struct btrfs_block_rsv *rsv)
-{
- btrfs_block_rsv_release(root, rsv, (u64)-1);
- kfree(rsv);
-}
-
-static inline int __block_rsv_add(struct btrfs_root *root,
- struct btrfs_block_rsv *block_rsv,
- u64 num_bytes, int flush)
-{
- int ret;
-
- if (num_bytes == 0)
- return 0;
-
- ret = reserve_metadata_bytes(root, block_rsv, num_bytes, flush);
- if (!ret) {
- block_rsv_add_bytes(block_rsv, num_bytes, 1);
- return 0;
- }
-
- return ret;
-}
-
-int btrfs_block_rsv_add(struct btrfs_root *root,
- struct btrfs_block_rsv *block_rsv,
- u64 num_bytes)
-{
- return __block_rsv_add(root, block_rsv, num_bytes, 1);
-}
-
-int btrfs_block_rsv_add_noflush(struct btrfs_root *root,
- struct btrfs_block_rsv *block_rsv,
- u64 num_bytes)
-{
- return __block_rsv_add(root, block_rsv, num_bytes, 0);
-}
-
-int btrfs_block_rsv_check(struct btrfs_root *root,
- struct btrfs_block_rsv *block_rsv, int min_factor)
-{
- u64 num_bytes = 0;
- int ret = -ENOSPC;
-
- if (!block_rsv)
- return 0;
-
- spin_lock(&block_rsv->lock);
- num_bytes = div_factor(block_rsv->size, min_factor);
- if (block_rsv->reserved >= num_bytes)
- ret = 0;
- spin_unlock(&block_rsv->lock);
-
- return ret;
-}
-
-static inline int __btrfs_block_rsv_refill(struct btrfs_root *root,
- struct btrfs_block_rsv *block_rsv,
- u64 min_reserved, int flush)
-{
- u64 num_bytes = 0;
- int ret = -ENOSPC;
-
- if (!block_rsv)
- return 0;
-
- spin_lock(&block_rsv->lock);
- num_bytes = min_reserved;
- if (block_rsv->reserved >= num_bytes)
- ret = 0;
- else
- num_bytes -= block_rsv->reserved;
- spin_unlock(&block_rsv->lock);
-
- if (!ret)
- return 0;
-
- ret = reserve_metadata_bytes(root, block_rsv, num_bytes, flush);
- if (!ret) {
- block_rsv_add_bytes(block_rsv, num_bytes, 0);
- return 0;
- }
-
- return ret;
-}
-
-int btrfs_block_rsv_refill(struct btrfs_root *root,
- struct btrfs_block_rsv *block_rsv,
- u64 min_reserved)
-{
- return __btrfs_block_rsv_refill(root, block_rsv, min_reserved, 1);
-}
-
-int btrfs_block_rsv_refill_noflush(struct btrfs_root *root,
- struct btrfs_block_rsv *block_rsv,
- u64 min_reserved)
-{
- return __btrfs_block_rsv_refill(root, block_rsv, min_reserved, 0);
-}
-
-int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src_rsv,
- struct btrfs_block_rsv *dst_rsv,
- u64 num_bytes)
-{
- return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes);
-}
-
-void btrfs_block_rsv_release(struct btrfs_root *root,
- struct btrfs_block_rsv *block_rsv,
- u64 num_bytes)
-{
- struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv;
- if (global_rsv->full || global_rsv == block_rsv ||
- block_rsv->space_info != global_rsv->space_info)
- global_rsv = NULL;
- block_rsv_release_bytes(root->fs_info, block_rsv, global_rsv,
- num_bytes);
-}
-
-/*
- * helper to calculate size of global block reservation.
- * the desired value is sum of space used by extent tree,
- * checksum tree and root tree
- */
-static u64 calc_global_metadata_size(struct btrfs_fs_info *fs_info)
-{
- struct btrfs_space_info *sinfo;
- u64 num_bytes;
- u64 meta_used;
- u64 data_used;
- int csum_size = btrfs_super_csum_size(fs_info->super_copy);
-
- sinfo = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_DATA);
- spin_lock(&sinfo->lock);
- data_used = sinfo->bytes_used;
- spin_unlock(&sinfo->lock);
-
- sinfo = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
- spin_lock(&sinfo->lock);
- if (sinfo->flags & BTRFS_BLOCK_GROUP_DATA)
- data_used = 0;
- meta_used = sinfo->bytes_used;
- spin_unlock(&sinfo->lock);
-
- num_bytes = (data_used >> fs_info->sb->s_blocksize_bits) *
- csum_size * 2;
- num_bytes += div64_u64(data_used + meta_used, 50);
-
- if (num_bytes * 3 > meta_used)
- num_bytes = div64_u64(meta_used, 3);
-
- return ALIGN(num_bytes, fs_info->extent_root->leafsize << 10);
-}
-
-static void update_global_block_rsv(struct btrfs_fs_info *fs_info)
-{
- struct btrfs_block_rsv *block_rsv = &fs_info->global_block_rsv;
- struct btrfs_space_info *sinfo = block_rsv->space_info;
- u64 num_bytes;
-
- num_bytes = calc_global_metadata_size(fs_info);
-
- spin_lock(&sinfo->lock);
- spin_lock(&block_rsv->lock);
-
- block_rsv->size = num_bytes;
-
- num_bytes = sinfo->bytes_used + sinfo->bytes_pinned +
- sinfo->bytes_reserved + sinfo->bytes_readonly +
- sinfo->bytes_may_use;
-
- if (sinfo->total_bytes > num_bytes) {
- num_bytes = sinfo->total_bytes - num_bytes;
- block_rsv->reserved += num_bytes;
- sinfo->bytes_may_use += num_bytes;
- trace_btrfs_space_reservation(fs_info, "space_info",
- sinfo->flags, num_bytes, 1);
- }
-
- if (block_rsv->reserved >= block_rsv->size) {
- num_bytes = block_rsv->reserved - block_rsv->size;
- sinfo->bytes_may_use -= num_bytes;
- trace_btrfs_space_reservation(fs_info, "space_info",
- sinfo->flags, num_bytes, 0);
- sinfo->reservation_progress++;
- block_rsv->reserved = block_rsv->size;
- block_rsv->full = 1;
- }
-
- spin_unlock(&block_rsv->lock);
- spin_unlock(&sinfo->lock);
-}
-
-static void init_global_block_rsv(struct btrfs_fs_info *fs_info)
-{
- struct btrfs_space_info *space_info;
-
- space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_SYSTEM);
- fs_info->chunk_block_rsv.space_info = space_info;
-
- space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
- fs_info->global_block_rsv.space_info = space_info;
- fs_info->delalloc_block_rsv.space_info = space_info;
- fs_info->trans_block_rsv.space_info = space_info;
- fs_info->empty_block_rsv.space_info = space_info;
- fs_info->delayed_block_rsv.space_info = space_info;
-
- fs_info->extent_root->block_rsv = &fs_info->global_block_rsv;
- fs_info->csum_root->block_rsv = &fs_info->global_block_rsv;
- fs_info->dev_root->block_rsv = &fs_info->global_block_rsv;
- fs_info->tree_root->block_rsv = &fs_info->global_block_rsv;
- fs_info->chunk_root->block_rsv = &fs_info->chunk_block_rsv;
-
- update_global_block_rsv(fs_info);
-}
-
-static void release_global_block_rsv(struct btrfs_fs_info *fs_info)
-{
- block_rsv_release_bytes(fs_info, &fs_info->global_block_rsv, NULL,
- (u64)-1);
- WARN_ON(fs_info->delalloc_block_rsv.size > 0);
- WARN_ON(fs_info->delalloc_block_rsv.reserved > 0);
- WARN_ON(fs_info->trans_block_rsv.size > 0);
- WARN_ON(fs_info->trans_block_rsv.reserved > 0);
- WARN_ON(fs_info->chunk_block_rsv.size > 0);
- WARN_ON(fs_info->chunk_block_rsv.reserved > 0);
- WARN_ON(fs_info->delayed_block_rsv.size > 0);
- WARN_ON(fs_info->delayed_block_rsv.reserved > 0);
-}
-
-void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans,
- struct btrfs_root *root)
-{
- if (!trans->bytes_reserved)
- return;
-
- trace_btrfs_space_reservation(root->fs_info, "transaction",
- trans->transid, trans->bytes_reserved, 0);
- btrfs_block_rsv_release(root, trans->block_rsv, trans->bytes_reserved);
- trans->bytes_reserved = 0;
-}
-
-/* Can only return 0 or -ENOSPC */
-int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans,
- struct inode *inode)
-{
- struct btrfs_root *root = BTRFS_I(inode)->root;
- struct btrfs_block_rsv *src_rsv = get_block_rsv(trans, root);
- struct btrfs_block_rsv *dst_rsv = root->orphan_block_rsv;
-
- /*
- * We need to hold space in order to delete our orphan item once we've
- * added it, so this takes the reservation so we can release it later
- * when we are truly done with the orphan item.
- */
- u64 num_bytes = btrfs_calc_trans_metadata_size(root, 1);
- trace_btrfs_space_reservation(root->fs_info, "orphan",
- btrfs_ino(inode), num_bytes, 1);
- return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes);
-}
-
-void btrfs_orphan_release_metadata(struct inode *inode)
-{
- struct btrfs_root *root = BTRFS_I(inode)->root;
- u64 num_bytes = btrfs_calc_trans_metadata_size(root, 1);
- trace_btrfs_space_reservation(root->fs_info, "orphan",
- btrfs_ino(inode), num_bytes, 0);
- btrfs_block_rsv_release(root, root->orphan_block_rsv, num_bytes);
-}
-
-int btrfs_snap_reserve_metadata(struct btrfs_trans_handle *trans,
- struct btrfs_pending_snapshot *pending)
-{
- struct btrfs_root *root = pending->root;
- struct btrfs_block_rsv *src_rsv = get_block_rsv(trans, root);
- struct btrfs_block_rsv *dst_rsv = &pending->block_rsv;
- /*
- * two for root back/forward refs, two for directory entries
- * and one for root of the snapshot.
- */
- u64 num_bytes = btrfs_calc_trans_metadata_size(root, 5);
- dst_rsv->space_info = src_rsv->space_info;
- return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes);
-}
-
-/**
- * drop_outstanding_extent - drop an outstanding extent
- * @inode: the inode we're dropping the extent for
- *
- * This is called when we are freeing up an outstanding extent, either called
- * after an error or after an extent is written. This will return the number of
- * reserved extents that need to be freed. This must be called with
- * BTRFS_I(inode)->lock held.
- */
-static unsigned drop_outstanding_extent(struct inode *inode)
-{
- unsigned drop_inode_space = 0;
- unsigned dropped_extents = 0;
-
- BUG_ON(!BTRFS_I(inode)->outstanding_extents);
- BTRFS_I(inode)->outstanding_extents--;
-
- if (BTRFS_I(inode)->outstanding_extents == 0 &&
- BTRFS_I(inode)->delalloc_meta_reserved) {
- drop_inode_space = 1;
- BTRFS_I(inode)->delalloc_meta_reserved = 0;
- }
-
- /*
- * If we have more or the same amount of outsanding extents than we have
- * reserved then we need to leave the reserved extents count alone.
- */
- if (BTRFS_I(inode)->outstanding_extents >=
- BTRFS_I(inode)->reserved_extents)
- return drop_inode_space;
-
- dropped_extents = BTRFS_I(inode)->reserved_extents -
- BTRFS_I(inode)->outstanding_extents;
- BTRFS_I(inode)->reserved_extents -= dropped_extents;
- return dropped_extents + drop_inode_space;
-}
-
-/**
- * calc_csum_metadata_size - return the amount of metada space that must be
- * reserved/free'd for the given bytes.
- * @inode: the inode we're manipulating
- * @num_bytes: the number of bytes in question
- * @reserve: 1 if we are reserving space, 0 if we are freeing space
- *
- * This adjusts the number of csum_bytes in the inode and then returns the
- * correct amount of metadata that must either be reserved or freed. We
- * calculate how many checksums we can fit into one leaf and then divide the
- * number of bytes that will need to be checksumed by this value to figure out
- * how many checksums will be required. If we are adding bytes then the number
- * may go up and we will return the number of additional bytes that must be
- * reserved. If it is going down we will return the number of bytes that must
- * be freed.
- *
- * This must be called with BTRFS_I(inode)->lock held.
- */
-static u64 calc_csum_metadata_size(struct inode *inode, u64 num_bytes,
- int reserve)
-{
- struct btrfs_root *root = BTRFS_I(inode)->root;
- u64 csum_size;
- int num_csums_per_leaf;
- int num_csums;
- int old_csums;
-
- if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM &&
- BTRFS_I(inode)->csum_bytes == 0)
- return 0;
-
- old_csums = (int)div64_u64(BTRFS_I(inode)->csum_bytes, root->sectorsize);
- if (reserve)
- BTRFS_I(inode)->csum_bytes += num_bytes;
- else
- BTRFS_I(inode)->csum_bytes -= num_bytes;
- csum_size = BTRFS_LEAF_DATA_SIZE(root) - sizeof(struct btrfs_item);
- num_csums_per_leaf = (int)div64_u64(csum_size,
- sizeof(struct btrfs_csum_item) +
- sizeof(struct btrfs_disk_key));
- num_csums = (int)div64_u64(BTRFS_I(inode)->csum_bytes, root->sectorsize);
- num_csums = num_csums + num_csums_per_leaf - 1;
- num_csums = num_csums / num_csums_per_leaf;
-
- old_csums = old_csums + num_csums_per_leaf - 1;
- old_csums = old_csums / num_csums_per_leaf;
-
- /* No change, no need to reserve more */
- if (old_csums == num_csums)
- return 0;
-
- if (reserve)
- return btrfs_calc_trans_metadata_size(root,
- num_csums - old_csums);
-
- return btrfs_calc_trans_metadata_size(root, old_csums - num_csums);
-}
-
-int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
-{
- struct btrfs_root *root = BTRFS_I(inode)->root;
- struct btrfs_block_rsv *block_rsv = &root->fs_info->delalloc_block_rsv;
- u64 to_reserve = 0;
- u64 csum_bytes;
- unsigned nr_extents = 0;
- int extra_reserve = 0;
- int flush = 1;
- int ret;
-
- /* Need to be holding the i_mutex here if we aren't free space cache */
- if (btrfs_is_free_space_inode(root, inode))
- flush = 0;
-
- if (flush && btrfs_transaction_in_commit(root->fs_info))
- schedule_timeout(1);
-
- mutex_lock(&BTRFS_I(inode)->delalloc_mutex);
- num_bytes = ALIGN(num_bytes, root->sectorsize);
-
- spin_lock(&BTRFS_I(inode)->lock);
- BTRFS_I(inode)->outstanding_extents++;
-
- if (BTRFS_I(inode)->outstanding_extents >
- BTRFS_I(inode)->reserved_extents)
- nr_extents = BTRFS_I(inode)->outstanding_extents -
- BTRFS_I(inode)->reserved_extents;
-
- /*
- * Add an item to reserve for updating the inode when we complete the
- * delalloc io.
- */
- if (!BTRFS_I(inode)->delalloc_meta_reserved) {
- nr_extents++;
- extra_reserve = 1;
- }
-
- to_reserve = btrfs_calc_trans_metadata_size(root, nr_extents);
- to_reserve += calc_csum_metadata_size(inode, num_bytes, 1);
- csum_bytes = BTRFS_I(inode)->csum_bytes;
- spin_unlock(&BTRFS_I(inode)->lock);
-
- ret = reserve_metadata_bytes(root, block_rsv, to_reserve, flush);
- if (ret) {
- u64 to_free = 0;
- unsigned dropped;
-
- spin_lock(&BTRFS_I(inode)->lock);
- dropped = drop_outstanding_extent(inode);
- /*
- * If the inodes csum_bytes is the same as the original
- * csum_bytes then we know we haven't raced with any free()ers
- * so we can just reduce our inodes csum bytes and carry on.
- * Otherwise we have to do the normal free thing to account for
- * the case that the free side didn't free up its reserve
- * because of this outstanding reservation.
- */
- if (BTRFS_I(inode)->csum_bytes == csum_bytes)
- calc_csum_metadata_size(inode, num_bytes, 0);
- else
- to_free = calc_csum_metadata_size(inode, num_bytes, 0);
- spin_unlock(&BTRFS_I(inode)->lock);
- if (dropped)
- to_free += btrfs_calc_trans_metadata_size(root, dropped);
-
- if (to_free) {
- btrfs_block_rsv_release(root, block_rsv, to_free);
- trace_btrfs_space_reservation(root->fs_info,
- "delalloc",
- btrfs_ino(inode),
- to_free, 0);
- }
- mutex_unlock(&BTRFS_I(inode)->delalloc_mutex);
- return ret;
- }
-
- spin_lock(&BTRFS_I(inode)->lock);
- if (extra_reserve) {
- BTRFS_I(inode)->delalloc_meta_reserved = 1;
- nr_extents--;
- }
- BTRFS_I(inode)->reserved_extents += nr_extents;
- spin_unlock(&BTRFS_I(inode)->lock);
- mutex_unlock(&BTRFS_I(inode)->delalloc_mutex);
-
- if (to_reserve)
- trace_btrfs_space_reservation(root->fs_info,"delalloc",
- btrfs_ino(inode), to_reserve, 1);
- block_rsv_add_bytes(block_rsv, to_reserve, 1);
-
- return 0;
-}
-
-/**
- * btrfs_delalloc_release_metadata - release a metadata reservation for an inode
- * @inode: the inode to release the reservation for
- * @num_bytes: the number of bytes we're releasing
- *
- * This will release the metadata reservation for an inode. This can be called
- * once we complete IO for a given set of bytes to release their metadata
- * reservations.
- */
-void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes)
-{
- struct btrfs_root *root = BTRFS_I(inode)->root;
- u64 to_free = 0;
- unsigned dropped;
-
- num_bytes = ALIGN(num_bytes, root->sectorsize);
- spin_lock(&BTRFS_I(inode)->lock);
- dropped = drop_outstanding_extent(inode);
-
- to_free = calc_csum_metadata_size(inode, num_bytes, 0);
- spin_unlock(&BTRFS_I(inode)->lock);
- if (dropped > 0)
- to_free += btrfs_calc_trans_metadata_size(root, dropped);
-
- trace_btrfs_space_reservation(root->fs_info, "delalloc",
- btrfs_ino(inode), to_free, 0);
- btrfs_block_rsv_release(root, &root->fs_info->delalloc_block_rsv,
- to_free);
-}
-
-/**
- * btrfs_delalloc_reserve_space - reserve data and metadata space for delalloc
- * @inode: inode we're writing to
- * @num_bytes: the number of bytes we want to allocate
- *
- * This will do the following things
- *
- * o reserve space in the data space info for num_bytes
- * o reserve space in the metadata space info based on number of outstanding
- * extents and how much csums will be needed
- * o add to the inodes ->delalloc_bytes
- * o add it to the fs_info's delalloc inodes list.
- *
- * This will return 0 for success and -ENOSPC if there is no space left.
- */
-int btrfs_delalloc_reserve_space(struct inode *inode, u64 num_bytes)
-{
- int ret;
-
- ret = btrfs_check_data_free_space(inode, num_bytes);
- if (ret)
- return ret;
-
- ret = btrfs_delalloc_reserve_metadata(inode, num_bytes);
- if (ret) {
- btrfs_free_reserved_data_space(inode, num_bytes);
- return ret;
- }
-
- return 0;
-}
-
-/**
- * btrfs_delalloc_release_space - release data and metadata space for delalloc
- * @inode: inode we're releasing space for
- * @num_bytes: the number of bytes we want to free up
- *
- * This must be matched with a call to btrfs_delalloc_reserve_space. This is
- * called in the case that we don't need the metadata AND data reservations
- * anymore. So if there is an error or we insert an inline extent.
- *
- * This function will release the metadata space that was not used and will
- * decrement ->delalloc_bytes and remove it from the fs_info delalloc_inodes
- * list if there are no delalloc bytes left.
- */
-void btrfs_delalloc_release_space(struct inode *inode, u64 num_bytes)
-{
- btrfs_delalloc_release_metadata(inode, num_bytes);
- btrfs_free_reserved_data_space(inode, num_bytes);
-}
-
-static int update_block_group(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- u64 bytenr, u64 num_bytes, int alloc)
-{
- struct btrfs_block_group_cache *cache = NULL;
- struct btrfs_fs_info *info = root->fs_info;
- u64 total = num_bytes;
- u64 old_val;
- u64 byte_in_group;
- int factor;
-
- /* block accounting for super block */
- spin_lock(&info->delalloc_lock);
- old_val = btrfs_super_bytes_used(info->super_copy);
- if (alloc)
- old_val += num_bytes;
- else
- old_val -= num_bytes;
- btrfs_set_super_bytes_used(info->super_copy, old_val);
- spin_unlock(&info->delalloc_lock);
-
- while (total) {
- cache = btrfs_lookup_block_group(info, bytenr);
- if (!cache)
- return -ENOENT;
- if (cache->flags & (BTRFS_BLOCK_GROUP_DUP |
- BTRFS_BLOCK_GROUP_RAID1 |
- BTRFS_BLOCK_GROUP_RAID10))
- factor = 2;
- else
- factor = 1;
- /*
- * If this block group has free space cache written out, we
- * need to make sure to load it if we are removing space. This
- * is because we need the unpinning stage to actually add the
- * space back to the block group, otherwise we will leak space.
- */
- if (!alloc && cache->cached == BTRFS_CACHE_NO)
- cache_block_group(cache, trans, NULL, 1);
-
- byte_in_group = bytenr - cache->key.objectid;
- WARN_ON(byte_in_group > cache->key.offset);
-
- spin_lock(&cache->space_info->lock);
- spin_lock(&cache->lock);
-
- if (btrfs_test_opt(root, SPACE_CACHE) &&
- cache->disk_cache_state < BTRFS_DC_CLEAR)
- cache->disk_cache_state = BTRFS_DC_CLEAR;
-
- cache->dirty = 1;
- old_val = btrfs_block_group_used(&cache->item);
- num_bytes = min(total, cache->key.offset - byte_in_group);
- if (alloc) {
- old_val += num_bytes;
- btrfs_set_block_group_used(&cache->item, old_val);
- cache->reserved -= num_bytes;
- cache->space_info->bytes_reserved -= num_bytes;
- cache->space_info->bytes_used += num_bytes;
- cache->space_info->disk_used += num_bytes * factor;
- spin_unlock(&cache->lock);
- spin_unlock(&cache->space_info->lock);
- } else {
- old_val -= num_bytes;
- btrfs_set_block_group_used(&cache->item, old_val);
- cache->pinned += num_bytes;
- cache->space_info->bytes_pinned += num_bytes;
- cache->space_info->bytes_used -= num_bytes;
- cache->space_info->disk_used -= num_bytes * factor;
- spin_unlock(&cache->lock);
- spin_unlock(&cache->space_info->lock);
-
- set_extent_dirty(info->pinned_extents,
- bytenr, bytenr + num_bytes - 1,
- GFP_NOFS | __GFP_NOFAIL);
- }
- btrfs_put_block_group(cache);
- total -= num_bytes;
- bytenr += num_bytes;
- }
- return 0;
-}
-
-static u64 first_logical_byte(struct btrfs_root *root, u64 search_start)
-{
- struct btrfs_block_group_cache *cache;
- u64 bytenr;
-
- cache = btrfs_lookup_first_block_group(root->fs_info, search_start);
- if (!cache)
- return 0;
-
- bytenr = cache->key.objectid;
- btrfs_put_block_group(cache);
-
- return bytenr;
-}
-
-static int pin_down_extent(struct btrfs_root *root,
- struct btrfs_block_group_cache *cache,
- u64 bytenr, u64 num_bytes, int reserved)
-{
- spin_lock(&cache->space_info->lock);
- spin_lock(&cache->lock);
- cache->pinned += num_bytes;
- cache->space_info->bytes_pinned += num_bytes;
- if (reserved) {
- cache->reserved -= num_bytes;
- cache->space_info->bytes_reserved -= num_bytes;
- }
- spin_unlock(&cache->lock);
- spin_unlock(&cache->space_info->lock);
-
- set_extent_dirty(root->fs_info->pinned_extents, bytenr,
- bytenr + num_bytes - 1, GFP_NOFS | __GFP_NOFAIL);
- return 0;
-}
-
-/*
- * this function must be called within transaction
- */
-int btrfs_pin_extent(struct btrfs_root *root,
- u64 bytenr, u64 num_bytes, int reserved)
-{
- struct btrfs_block_group_cache *cache;
-
- cache = btrfs_lookup_block_group(root->fs_info, bytenr);
- BUG_ON(!cache); /* Logic error */
-
- pin_down_extent(root, cache, bytenr, num_bytes, reserved);
-
- btrfs_put_block_group(cache);
- return 0;
-}
-
-/*
- * this function must be called within transaction
- */
-int btrfs_pin_extent_for_log_replay(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- u64 bytenr, u64 num_bytes)
-{
- struct btrfs_block_group_cache *cache;
-
- cache = btrfs_lookup_block_group(root->fs_info, bytenr);
- BUG_ON(!cache); /* Logic error */
-
- /*
- * pull in the free space cache (if any) so that our pin
- * removes the free space from the cache. We have load_only set
- * to one because the slow code to read in the free extents does check
- * the pinned extents.
- */
- cache_block_group(cache, trans, root, 1);
-
- pin_down_extent(root, cache, bytenr, num_bytes, 0);
-
- /* remove us from the free space cache (if we're there at all) */
- btrfs_remove_free_space(cache, bytenr, num_bytes);
- btrfs_put_block_group(cache);
- return 0;
-}
-
-/**
- * btrfs_update_reserved_bytes - update the block_group and space info counters
- * @cache: The cache we are manipulating
- * @num_bytes: The number of bytes in question
- * @reserve: One of the reservation enums
- *
- * This is called by the allocator when it reserves space, or by somebody who is
- * freeing space that was never actually used on disk. For example if you
- * reserve some space for a new leaf in transaction A and before transaction A
- * commits you free that leaf, you call this with reserve set to 0 in order to
- * clear the reservation.
- *
- * Metadata reservations should be called with RESERVE_ALLOC so we do the proper
- * ENOSPC accounting. For data we handle the reservation through clearing the
- * delalloc bits in the io_tree. We have to do this since we could end up
- * allocating less disk space for the amount of data we have reserved in the
- * case of compression.
- *
- * If this is a reservation and the block group has become read only we cannot
- * make the reservation and return -EAGAIN, otherwise this function always
- * succeeds.
- */
-static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache,
- u64 num_bytes, int reserve)
-{
- struct btrfs_space_info *space_info = cache->space_info;
- int ret = 0;
-
- spin_lock(&space_info->lock);
- spin_lock(&cache->lock);
- if (reserve != RESERVE_FREE) {
- if (cache->ro) {
- ret = -EAGAIN;
- } else {
- cache->reserved += num_bytes;
- space_info->bytes_reserved += num_bytes;
- if (reserve == RESERVE_ALLOC) {
- trace_btrfs_space_reservation(cache->fs_info,
- "space_info", space_info->flags,
- num_bytes, 0);
- space_info->bytes_may_use -= num_bytes;
- }
- }
- } else {
- if (cache->ro)
- space_info->bytes_readonly += num_bytes;
- cache->reserved -= num_bytes;
- space_info->bytes_reserved -= num_bytes;
- space_info->reservation_progress++;
- }
- spin_unlock(&cache->lock);
- spin_unlock(&space_info->lock);
- return ret;
-}
-
-void btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans,
- struct btrfs_root *root)
-{
- struct btrfs_fs_info *fs_info = root->fs_info;
- struct btrfs_caching_control *next;
- struct btrfs_caching_control *caching_ctl;
- struct btrfs_block_group_cache *cache;
-
- down_write(&fs_info->extent_commit_sem);
-
- list_for_each_entry_safe(caching_ctl, next,
- &fs_info->caching_block_groups, list) {
- cache = caching_ctl->block_group;
- if (block_group_cache_done(cache)) {
- cache->last_byte_to_unpin = (u64)-1;
- list_del_init(&caching_ctl->list);
- put_caching_control(caching_ctl);
- } else {
- cache->last_byte_to_unpin = caching_ctl->progress;
- }
- }
-
- if (fs_info->pinned_extents == &fs_info->freed_extents[0])
- fs_info->pinned_extents = &fs_info->freed_extents[1];
- else
- fs_info->pinned_extents = &fs_info->freed_extents[0];
-
- up_write(&fs_info->extent_commit_sem);
-
- update_global_block_rsv(fs_info);
-}
-
-static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end)
-{
- struct btrfs_fs_info *fs_info = root->fs_info;
- struct btrfs_block_group_cache *cache = NULL;
- u64 len;
-
- while (start <= end) {
- if (!cache ||
- start >= cache->key.objectid + cache->key.offset) {
- if (cache)
- btrfs_put_block_group(cache);
- cache = btrfs_lookup_block_group(fs_info, start);
- BUG_ON(!cache); /* Logic error */
- }
-
- len = cache->key.objectid + cache->key.offset - start;
- len = min(len, end + 1 - start);
-
- if (start < cache->last_byte_to_unpin) {
- len = min(len, cache->last_byte_to_unpin - start);
- btrfs_add_free_space(cache, start, len);
- }
-
- start += len;
-
- spin_lock(&cache->space_info->lock);
- spin_lock(&cache->lock);
- cache->pinned -= len;
- cache->space_info->bytes_pinned -= len;
- if (cache->ro)
- cache->space_info->bytes_readonly += len;
- spin_unlock(&cache->lock);
- spin_unlock(&cache->space_info->lock);
- }
-
- if (cache)
- btrfs_put_block_group(cache);
- return 0;
-}
-
-int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
- struct btrfs_root *root)
-{
- struct btrfs_fs_info *fs_info = root->fs_info;
- struct extent_io_tree *unpin;
- u64 start;
- u64 end;
- int ret;
-
- if (trans->aborted)
- return 0;
-
- if (fs_info->pinned_extents == &fs_info->freed_extents[0])
- unpin = &fs_info->freed_extents[1];
- else
- unpin = &fs_info->freed_extents[0];
-
- while (1) {
- ret = find_first_extent_bit(unpin, 0, &start, &end,
- EXTENT_DIRTY);
- if (ret)
- break;
-
- if (btrfs_test_opt(root, DISCARD))
- ret = btrfs_discard_extent(root, start,
- end + 1 - start, NULL);
-
- clear_extent_dirty(unpin, start, end, GFP_NOFS);
- unpin_extent_range(root, start, end);
- cond_resched();
- }
-
- return 0;
-}
-
-static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- u64 bytenr, u64 num_bytes, u64 parent,
- u64 root_objectid, u64 owner_objectid,
- u64 owner_offset, int refs_to_drop,
- struct btrfs_delayed_extent_op *extent_op)
-{
- struct btrfs_key key;
- struct btrfs_path *path;
- struct btrfs_fs_info *info = root->fs_info;
- struct btrfs_root *extent_root = info->extent_root;
- struct extent_buffer *leaf;
- struct btrfs_extent_item *ei;
- struct btrfs_extent_inline_ref *iref;
- int ret;
- int is_data;
- int extent_slot = 0;
- int found_extent = 0;
- int num_to_del = 1;
- u32 item_size;
- u64 refs;
-
- path = btrfs_alloc_path();
- if (!path)
- return -ENOMEM;
-
- path->reada = 1;
- path->leave_spinning = 1;
-
- is_data = owner_objectid >= BTRFS_FIRST_FREE_OBJECTID;
- BUG_ON(!is_data && refs_to_drop != 1);
-
- ret = lookup_extent_backref(trans, extent_root, path, &iref,
- bytenr, num_bytes, parent,
- root_objectid, owner_objectid,
- owner_offset);
- if (ret == 0) {
- extent_slot = path->slots[0];
- while (extent_slot >= 0) {
- btrfs_item_key_to_cpu(path->nodes[0], &key,
- extent_slot);
- if (key.objectid != bytenr)
- break;
- if (key.type == BTRFS_EXTENT_ITEM_KEY &&
- key.offset == num_bytes) {
- found_extent = 1;
- break;
- }
- if (path->slots[0] - extent_slot > 5)
- break;
- extent_slot--;
- }
-#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
- item_size = btrfs_item_size_nr(path->nodes[0], extent_slot);
- if (found_extent && item_size < sizeof(*ei))
- found_extent = 0;
-#endif
- if (!found_extent) {
- BUG_ON(iref);
- ret = remove_extent_backref(trans, extent_root, path,
- NULL, refs_to_drop,
- is_data);
- if (ret)
- goto abort;
- btrfs_release_path(path);
- path->leave_spinning = 1;
-
- key.objectid = bytenr;
- key.type = BTRFS_EXTENT_ITEM_KEY;
- key.offset = num_bytes;
-
- ret = btrfs_search_slot(trans, extent_root,
- &key, path, -1, 1);
- if (ret) {
- printk(KERN_ERR "umm, got %d back from search"
- ", was looking for %llu\n", ret,
- (unsigned long long)bytenr);
- if (ret > 0)
- btrfs_print_leaf(extent_root,
- path->nodes[0]);
- }
- if (ret < 0)
- goto abort;
- extent_slot = path->slots[0];
- }
- } else if (ret == -ENOENT) {
- btrfs_print_leaf(extent_root, path->nodes[0]);
- WARN_ON(1);
- printk(KERN_ERR "btrfs unable to find ref byte nr %llu "
- "parent %llu root %llu owner %llu offset %llu\n",
- (unsigned long long)bytenr,
- (unsigned long long)parent,
- (unsigned long long)root_objectid,
- (unsigned long long)owner_objectid,
- (unsigned long long)owner_offset);
- } else {
- goto abort;
- }
-
- leaf = path->nodes[0];
- item_size = btrfs_item_size_nr(leaf, extent_slot);
-#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
- if (item_size < sizeof(*ei)) {
- BUG_ON(found_extent || extent_slot != path->slots[0]);
- ret = convert_extent_item_v0(trans, extent_root, path,
- owner_objectid, 0);
- if (ret < 0)
- goto abort;
-
- btrfs_release_path(path);
- path->leave_spinning = 1;
-
- key.objectid = bytenr;
- key.type = BTRFS_EXTENT_ITEM_KEY;
- key.offset = num_bytes;
-
- ret = btrfs_search_slot(trans, extent_root, &key, path,
- -1, 1);
- if (ret) {
- printk(KERN_ERR "umm, got %d back from search"
- ", was looking for %llu\n", ret,
- (unsigned long long)bytenr);
- btrfs_print_leaf(extent_root, path->nodes[0]);
- }
- if (ret < 0)
- goto abort;
- extent_slot = path->slots[0];
- leaf = path->nodes[0];
- item_size = btrfs_item_size_nr(leaf, extent_slot);
- }
-#endif
- BUG_ON(item_size < sizeof(*ei));
- ei = btrfs_item_ptr(leaf, extent_slot,
- struct btrfs_extent_item);
- if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID) {
- struct btrfs_tree_block_info *bi;
- BUG_ON(item_size < sizeof(*ei) + sizeof(*bi));
- bi = (struct btrfs_tree_block_info *)(ei + 1);
- WARN_ON(owner_objectid != btrfs_tree_block_level(leaf, bi));
- }
-
- refs = btrfs_extent_refs(leaf, ei);
- BUG_ON(refs < refs_to_drop);
- refs -= refs_to_drop;
-
- if (refs > 0) {
- if (extent_op)
- __run_delayed_extent_op(extent_op, leaf, ei);
- /*
- * In the case of inline back ref, reference count will
- * be updated by remove_extent_backref
- */
- if (iref) {
- BUG_ON(!found_extent);
- } else {
- btrfs_set_extent_refs(leaf, ei, refs);
- btrfs_mark_buffer_dirty(leaf);
- }
- if (found_extent) {
- ret = remove_extent_backref(trans, extent_root, path,
- iref, refs_to_drop,
- is_data);
- if (ret)
- goto abort;
- }
- } else {
- if (found_extent) {
- BUG_ON(is_data && refs_to_drop !=
- extent_data_ref_count(root, path, iref));
- if (iref) {
- BUG_ON(path->slots[0] != extent_slot);
- } else {
- BUG_ON(path->slots[0] != extent_slot + 1);
- path->slots[0] = extent_slot;
- num_to_del = 2;
- }
- }
-
- ret = btrfs_del_items(trans, extent_root, path, path->slots[0],
- num_to_del);
- if (ret)
- goto abort;
- btrfs_release_path(path);
-
- if (is_data) {
- ret = btrfs_del_csums(trans, root, bytenr, num_bytes);
- if (ret)
- goto abort;
- }
-
- ret = update_block_group(trans, root, bytenr, num_bytes, 0);
- if (ret)
- goto abort;
- }
-out:
- btrfs_free_path(path);
- return ret;
-
-abort:
- btrfs_abort_transaction(trans, extent_root, ret);
- goto out;
-}
-
-/*
- * when we free an block, it is possible (and likely) that we free the last
- * delayed ref for that extent as well. This searches the delayed ref tree for
- * a given extent, and if there are no other delayed refs to be processed, it
- * removes it from the tree.
- */
-static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, u64 bytenr)
-{
- struct btrfs_delayed_ref_head *head;
- struct btrfs_delayed_ref_root *delayed_refs;
- struct btrfs_delayed_ref_node *ref;
- struct rb_node *node;
- int ret = 0;
-
- delayed_refs = &trans->transaction->delayed_refs;
- spin_lock(&delayed_refs->lock);
- head = btrfs_find_delayed_ref_head(trans, bytenr);
- if (!head)
- goto out;
-
- node = rb_prev(&head->node.rb_node);
- if (!node)
- goto out;
-
- ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
-
- /* there are still entries for this ref, we can't drop it */
- if (ref->bytenr == bytenr)
- goto out;
-
- if (head->extent_op) {
- if (!head->must_insert_reserved)
- goto out;
- kfree(head->extent_op);
- head->extent_op = NULL;
- }
-
- /*
- * waiting for the lock here would deadlock. If someone else has it
- * locked they are already in the process of dropping it anyway
- */
- if (!mutex_trylock(&head->mutex))
- goto out;
-
- /*
- * at this point we have a head with no other entries. Go
- * ahead and process it.
- */
- head->node.in_tree = 0;
- rb_erase(&head->node.rb_node, &delayed_refs->root);
-
- delayed_refs->num_entries--;
- if (waitqueue_active(&delayed_refs->seq_wait))
- wake_up(&delayed_refs->seq_wait);
-
- /*
- * we don't take a ref on the node because we're removing it from the
- * tree, so we just steal the ref the tree was holding.
- */
- delayed_refs->num_heads--;
- if (list_empty(&head->cluster))
- delayed_refs->num_heads_ready--;
-
- list_del_init(&head->cluster);
- spin_unlock(&delayed_refs->lock);
-
- BUG_ON(head->extent_op);
- if (head->must_insert_reserved)
- ret = 1;
-
- mutex_unlock(&head->mutex);
- btrfs_put_delayed_ref(&head->node);
- return ret;
-out:
- spin_unlock(&delayed_refs->lock);
- return 0;
-}
-
-void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct extent_buffer *buf,
- u64 parent, int last_ref, int for_cow)
-{
- struct btrfs_block_group_cache *cache = NULL;
- int ret;
-
- if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
- ret = btrfs_add_delayed_tree_ref(root->fs_info, trans,
- buf->start, buf->len,
- parent, root->root_key.objectid,
- btrfs_header_level(buf),
- BTRFS_DROP_DELAYED_REF, NULL, for_cow);
- BUG_ON(ret); /* -ENOMEM */
- }
-
- if (!last_ref)
- return;
-
- cache = btrfs_lookup_block_group(root->fs_info, buf->start);
-
- if (btrfs_header_generation(buf) == trans->transid) {
- if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
- ret = check_ref_cleanup(trans, root, buf->start);
- if (!ret)
- goto out;
- }
-
- if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) {
- pin_down_extent(root, cache, buf->start, buf->len, 1);
- goto out;
- }
-
- WARN_ON(test_bit(EXTENT_BUFFER_DIRTY, &buf->bflags));
-
- btrfs_add_free_space(cache, buf->start, buf->len);
- btrfs_update_reserved_bytes(cache, buf->len, RESERVE_FREE);
- }
-out:
- /*
- * Deleting the buffer, clear the corrupt flag since it doesn't matter
- * anymore.
- */
- clear_bit(EXTENT_BUFFER_CORRUPT, &buf->bflags);
- btrfs_put_block_group(cache);
-}
-
-/* Can return -ENOMEM */
-int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root,
- u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid,
- u64 owner, u64 offset, int for_cow)
-{
- int ret;
- struct btrfs_fs_info *fs_info = root->fs_info;
-
- /*
- * tree log blocks never actually go into the extent allocation
- * tree, just update pinning info and exit early.
- */
- if (root_objectid == BTRFS_TREE_LOG_OBJECTID) {
- WARN_ON(owner >= BTRFS_FIRST_FREE_OBJECTID);
- /* unlocks the pinned mutex */
- btrfs_pin_extent(root, bytenr, num_bytes, 1);
- ret = 0;
- } else if (owner < BTRFS_FIRST_FREE_OBJECTID) {
- ret = btrfs_add_delayed_tree_ref(fs_info, trans, bytenr,
- num_bytes,
- parent, root_objectid, (int)owner,
- BTRFS_DROP_DELAYED_REF, NULL, for_cow);
- } else {
- ret = btrfs_add_delayed_data_ref(fs_info, trans, bytenr,
- num_bytes,
- parent, root_objectid, owner,
- offset, BTRFS_DROP_DELAYED_REF,
- NULL, for_cow);
- }
- return ret;
-}
-
-static u64 stripe_align(struct btrfs_root *root, u64 val)
-{
- u64 mask = ((u64)root->stripesize - 1);
- u64 ret = (val + mask) & ~mask;
- return ret;
-}
-
-/*
- * when we wait for progress in the block group caching, its because
- * our allocation attempt failed at least once. So, we must sleep
- * and let some progress happen before we try again.
- *
- * This function will sleep at least once waiting for new free space to
- * show up, and then it will check the block group free space numbers
- * for our min num_bytes. Another option is to have it go ahead
- * and look in the rbtree for a free extent of a given size, but this
- * is a good start.
- */
-static noinline int
-wait_block_group_cache_progress(struct btrfs_block_group_cache *cache,
- u64 num_bytes)
-{
- struct btrfs_caching_control *caching_ctl;
- DEFINE_WAIT(wait);
-
- caching_ctl = get_caching_control(cache);
- if (!caching_ctl)
- return 0;
-
- wait_event(caching_ctl->wait, block_group_cache_done(cache) ||
- (cache->free_space_ctl->free_space >= num_bytes));
-
- put_caching_control(caching_ctl);
- return 0;
-}
-
-static noinline int
-wait_block_group_cache_done(struct btrfs_block_group_cache *cache)
-{
- struct btrfs_caching_control *caching_ctl;
- DEFINE_WAIT(wait);
-
- caching_ctl = get_caching_control(cache);
- if (!caching_ctl)
- return 0;
-
- wait_event(caching_ctl->wait, block_group_cache_done(cache));
-
- put_caching_control(caching_ctl);
- return 0;
-}
-
-static int __get_block_group_index(u64 flags)
-{
- int index;
-
- if (flags & BTRFS_BLOCK_GROUP_RAID10)
- index = 0;
- else if (flags & BTRFS_BLOCK_GROUP_RAID1)
- index = 1;
- else if (flags & BTRFS_BLOCK_GROUP_DUP)
- index = 2;
- else if (flags & BTRFS_BLOCK_GROUP_RAID0)
- index = 3;
- else
- index = 4;
-
- return index;
-}
-
-static int get_block_group_index(struct btrfs_block_group_cache *cache)
-{
- return __get_block_group_index(cache->flags);
-}
-
-enum btrfs_loop_type {
- LOOP_CACHING_NOWAIT = 0,
- LOOP_CACHING_WAIT = 1,
- LOOP_ALLOC_CHUNK = 2,
- LOOP_NO_EMPTY_SIZE = 3,
-};
-
-/*
- * walks the btree of allocated extents and find a hole of a given size.
- * The key ins is changed to record the hole:
- * ins->objectid == block start
- * ins->flags = BTRFS_EXTENT_ITEM_KEY
- * ins->offset == number of blocks
- * Any available blocks before search_start are skipped.
- */
-static noinline int find_free_extent(struct btrfs_trans_handle *trans,
- struct btrfs_root *orig_root,
- u64 num_bytes, u64 empty_size,
- u64 hint_byte, struct btrfs_key *ins,
- u64 data)
-{
- int ret = 0;
- struct btrfs_root *root = orig_root->fs_info->extent_root;
- struct btrfs_free_cluster *last_ptr = NULL;
- struct btrfs_block_group_cache *block_group = NULL;
- struct btrfs_block_group_cache *used_block_group;
- u64 search_start = 0;
- int empty_cluster = 2 * 1024 * 1024;
- int allowed_chunk_alloc = 0;
- int done_chunk_alloc = 0;
- struct btrfs_space_info *space_info;
- int loop = 0;
- int index = 0;
- int alloc_type = (data & BTRFS_BLOCK_GROUP_DATA) ?
- RESERVE_ALLOC_NO_ACCOUNT : RESERVE_ALLOC;
- bool found_uncached_bg = false;
- bool failed_cluster_refill = false;
- bool failed_alloc = false;
- bool use_cluster = true;
- bool have_caching_bg = false;
-
- WARN_ON(num_bytes < root->sectorsize);
- btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY);
- ins->objectid = 0;
- ins->offset = 0;
-
- trace_find_free_extent(orig_root, num_bytes, empty_size, data);
-
- space_info = __find_space_info(root->fs_info, data);
- if (!space_info) {
- printk(KERN_ERR "No space info for %llu\n", data);
- return -ENOSPC;
- }
-
- /*
- * If the space info is for both data and metadata it means we have a
- * small filesystem and we can't use the clustering stuff.
- */
- if (btrfs_mixed_space_info(space_info))
- use_cluster = false;
-
- if (orig_root->ref_cows || empty_size)
- allowed_chunk_alloc = 1;
-
- if (data & BTRFS_BLOCK_GROUP_METADATA && use_cluster) {
- last_ptr = &root->fs_info->meta_alloc_cluster;
- if (!btrfs_test_opt(root, SSD))
- empty_cluster = 64 * 1024;
- }
-
- if ((data & BTRFS_BLOCK_GROUP_DATA) && use_cluster &&
- btrfs_test_opt(root, SSD)) {
- last_ptr = &root->fs_info->data_alloc_cluster;
- }
-
- if (last_ptr) {
- spin_lock(&last_ptr->lock);
- if (last_ptr->block_group)
- hint_byte = last_ptr->window_start;
- spin_unlock(&last_ptr->lock);
- }
-
- search_start = max(search_start, first_logical_byte(root, 0));
- search_start = max(search_start, hint_byte);
-
- if (!last_ptr)
- empty_cluster = 0;
-
- if (search_start == hint_byte) {
- block_group = btrfs_lookup_block_group(root->fs_info,
- search_start);
- used_block_group = block_group;
- /*
- * we don't want to use the block group if it doesn't match our
- * allocation bits, or if its not cached.
- *
- * However if we are re-searching with an ideal block group
- * picked out then we don't care that the block group is cached.
- */
- if (block_group && block_group_bits(block_group, data) &&
- block_group->cached != BTRFS_CACHE_NO) {
- down_read(&space_info->groups_sem);
- if (list_empty(&block_group->list) ||
- block_group->ro) {
- /*
- * someone is removing this block group,
- * we can't jump into the have_block_group
- * target because our list pointers are not
- * valid
- */
- btrfs_put_block_group(block_group);
- up_read(&space_info->groups_sem);
- } else {
- index = get_block_group_index(block_group);
- goto have_block_group;
- }
- } else if (block_group) {
- btrfs_put_block_group(block_group);
- }
- }
-search:
- have_caching_bg = false;
- down_read(&space_info->groups_sem);
- list_for_each_entry(block_group, &space_info->block_groups[index],
- list) {
- u64 offset;
- int cached;
-
- used_block_group = block_group;
- btrfs_get_block_group(block_group);
- search_start = block_group->key.objectid;
-
- /*
- * this can happen if we end up cycling through all the
- * raid types, but we want to make sure we only allocate
- * for the proper type.
- */
- if (!block_group_bits(block_group, data)) {
- u64 extra = BTRFS_BLOCK_GROUP_DUP |
- BTRFS_BLOCK_GROUP_RAID1 |
- BTRFS_BLOCK_GROUP_RAID10;
-
- /*
- * if they asked for extra copies and this block group
- * doesn't provide them, bail. This does allow us to
- * fill raid0 from raid1.
- */
- if ((data & extra) && !(block_group->flags & extra))
- goto loop;
- }
-
-have_block_group:
- cached = block_group_cache_done(block_group);
- if (unlikely(!cached)) {
- found_uncached_bg = true;
- ret = cache_block_group(block_group, trans,
- orig_root, 0);
- BUG_ON(ret < 0);
- ret = 0;
- }
-
- if (unlikely(block_group->ro))
- goto loop;
-
- /*
- * Ok we want to try and use the cluster allocator, so
- * lets look there
- */
- if (last_ptr) {
- /*
- * the refill lock keeps out other
- * people trying to start a new cluster
- */
- spin_lock(&last_ptr->refill_lock);
- used_block_group = last_ptr->block_group;
- if (used_block_group != block_group &&
- (!used_block_group ||
- used_block_group->ro ||
- !block_group_bits(used_block_group, data))) {
- used_block_group = block_group;
- goto refill_cluster;
- }
-
- if (used_block_group != block_group)
- btrfs_get_block_group(used_block_group);
-
- offset = btrfs_alloc_from_cluster(used_block_group,
- last_ptr, num_bytes, used_block_group->key.objectid);
- if (offset) {
- /* we have a block, we're done */
- spin_unlock(&last_ptr->refill_lock);
- trace_btrfs_reserve_extent_cluster(root,
- block_group, search_start, num_bytes);
- goto checks;
- }
-
- WARN_ON(last_ptr->block_group != used_block_group);
- if (used_block_group != block_group) {
- btrfs_put_block_group(used_block_group);
- used_block_group = block_group;
- }
-refill_cluster:
- BUG_ON(used_block_group != block_group);
- /* If we are on LOOP_NO_EMPTY_SIZE, we can't
- * set up a new clusters, so lets just skip it
- * and let the allocator find whatever block
- * it can find. If we reach this point, we
- * will have tried the cluster allocator
- * plenty of times and not have found
- * anything, so we are likely way too
- * fragmented for the clustering stuff to find
- * anything.
- *
- * However, if the cluster is taken from the
- * current block group, release the cluster
- * first, so that we stand a better chance of
- * succeeding in the unclustered
- * allocation. */
- if (loop >= LOOP_NO_EMPTY_SIZE &&
- last_ptr->block_group != block_group) {
- spin_unlock(&last_ptr->refill_lock);
- goto unclustered_alloc;
- }
-
- /*
- * this cluster didn't work out, free it and
- * start over
- */
- btrfs_return_cluster_to_free_space(NULL, last_ptr);
-
- if (loop >= LOOP_NO_EMPTY_SIZE) {
- spin_unlock(&last_ptr->refill_lock);
- goto unclustered_alloc;
- }
-
- /* allocate a cluster in this block group */
- ret = btrfs_find_space_cluster(trans, root,
- block_group, last_ptr,
- search_start, num_bytes,
- empty_cluster + empty_size);
- if (ret == 0) {
- /*
- * now pull our allocation out of this
- * cluster
- */
- offset = btrfs_alloc_from_cluster(block_group,
- last_ptr, num_bytes,
- search_start);
- if (offset) {
- /* we found one, proceed */
- spin_unlock(&last_ptr->refill_lock);
- trace_btrfs_reserve_extent_cluster(root,
- block_group, search_start,
- num_bytes);
- goto checks;
- }
- } else if (!cached && loop > LOOP_CACHING_NOWAIT
- && !failed_cluster_refill) {
- spin_unlock(&last_ptr->refill_lock);
-
- failed_cluster_refill = true;
- wait_block_group_cache_progress(block_group,
- num_bytes + empty_cluster + empty_size);
- goto have_block_group;
- }
-
- /*
- * at this point we either didn't find a cluster
- * or we weren't able to allocate a block from our
- * cluster. Free the cluster we've been trying
- * to use, and go to the next block group
- */
- btrfs_return_cluster_to_free_space(NULL, last_ptr);
- spin_unlock(&last_ptr->refill_lock);
- goto loop;
- }
-
-unclustered_alloc:
- spin_lock(&block_group->free_space_ctl->tree_lock);
- if (cached &&
- block_group->free_space_ctl->free_space <
- num_bytes + empty_cluster + empty_size) {
- spin_unlock(&block_group->free_space_ctl->tree_lock);
- goto loop;
- }
- spin_unlock(&block_group->free_space_ctl->tree_lock);
-
- offset = btrfs_find_space_for_alloc(block_group, search_start,
- num_bytes, empty_size);
- /*
- * If we didn't find a chunk, and we haven't failed on this
- * block group before, and this block group is in the middle of
- * caching and we are ok with waiting, then go ahead and wait
- * for progress to be made, and set failed_alloc to true.
- *
- * If failed_alloc is true then we've already waited on this
- * block group once and should move on to the next block group.
- */
- if (!offset && !failed_alloc && !cached &&
- loop > LOOP_CACHING_NOWAIT) {
- wait_block_group_cache_progress(block_group,
- num_bytes + empty_size);
- failed_alloc = true;
- goto have_block_group;
- } else if (!offset) {
- if (!cached)
- have_caching_bg = true;
- goto loop;
- }
-checks:
- search_start = stripe_align(root, offset);
-
- /* move on to the next group */
- if (search_start + num_bytes >
- used_block_group->key.objectid + used_block_group->key.offset) {
- btrfs_add_free_space(used_block_group, offset, num_bytes);
- goto loop;
- }
-
- if (offset < search_start)
- btrfs_add_free_space(used_block_group, offset,
- search_start - offset);
- BUG_ON(offset > search_start);
-
- ret = btrfs_update_reserved_bytes(used_block_group, num_bytes,
- alloc_type);
- if (ret == -EAGAIN) {
- btrfs_add_free_space(used_block_group, offset, num_bytes);
- goto loop;
- }
-
- /* we are all good, lets return */
- ins->objectid = search_start;
- ins->offset = num_bytes;
-
- trace_btrfs_reserve_extent(orig_root, block_group,
- search_start, num_bytes);
- if (offset < search_start)
- btrfs_add_free_space(used_block_group, offset,
- search_start - offset);
- BUG_ON(offset > search_start);
- if (used_block_group != block_group)
- btrfs_put_block_group(used_block_group);
- btrfs_put_block_group(block_group);
- break;
-loop:
- failed_cluster_refill = false;
- failed_alloc = false;
- BUG_ON(index != get_block_group_index(block_group));
- if (used_block_group != block_group)
- btrfs_put_block_group(used_block_group);
- btrfs_put_block_group(block_group);
- }
- up_read(&space_info->groups_sem);
-
- if (!ins->objectid && loop >= LOOP_CACHING_WAIT && have_caching_bg)
- goto search;
-
- if (!ins->objectid && ++index < BTRFS_NR_RAID_TYPES)
- goto search;
-
- /*
- * LOOP_CACHING_NOWAIT, search partially cached block groups, kicking
- * caching kthreads as we move along
- * LOOP_CACHING_WAIT, search everything, and wait if our bg is caching
- * LOOP_ALLOC_CHUNK, force a chunk allocation and try again
- * LOOP_NO_EMPTY_SIZE, set empty_size and empty_cluster to 0 and try
- * again
- */
- if (!ins->objectid && loop < LOOP_NO_EMPTY_SIZE) {
- index = 0;
- loop++;
- if (loop == LOOP_ALLOC_CHUNK) {
- if (allowed_chunk_alloc) {
- ret = do_chunk_alloc(trans, root, num_bytes +
- 2 * 1024 * 1024, data,
- CHUNK_ALLOC_LIMITED);
- if (ret < 0) {
- btrfs_abort_transaction(trans,
- root, ret);
- goto out;
- }
- allowed_chunk_alloc = 0;
- if (ret == 1)
- done_chunk_alloc = 1;
- } else if (!done_chunk_alloc &&
- space_info->force_alloc ==
- CHUNK_ALLOC_NO_FORCE) {
- space_info->force_alloc = CHUNK_ALLOC_LIMITED;
- }
-
- /*
- * We didn't allocate a chunk, go ahead and drop the
- * empty size and loop again.
- */
- if (!done_chunk_alloc)
- loop = LOOP_NO_EMPTY_SIZE;
- }
-
- if (loop == LOOP_NO_EMPTY_SIZE) {
- empty_size = 0;
- empty_cluster = 0;
- }
-
- goto search;
- } else if (!ins->objectid) {
- ret = -ENOSPC;
- } else if (ins->objectid) {
- ret = 0;
- }
-out:
-
- return ret;
-}
-
-static void dump_space_info(struct btrfs_space_info *info, u64 bytes,
- int dump_block_groups)
-{
- struct btrfs_block_group_cache *cache;
- int index = 0;
-
- spin_lock(&info->lock);
- printk(KERN_INFO "space_info %llu has %llu free, is %sfull\n",
- (unsigned long long)info->flags,
- (unsigned long long)(info->total_bytes - info->bytes_used -
- info->bytes_pinned - info->bytes_reserved -
- info->bytes_readonly),
- (info->full) ? "" : "not ");
- printk(KERN_INFO "space_info total=%llu, used=%llu, pinned=%llu, "
- "reserved=%llu, may_use=%llu, readonly=%llu\n",
- (unsigned long long)info->total_bytes,
- (unsigned long long)info->bytes_used,
- (unsigned long long)info->bytes_pinned,
- (unsigned long long)info->bytes_reserved,
- (unsigned long long)info->bytes_may_use,
- (unsigned long long)info->bytes_readonly);
- spin_unlock(&info->lock);
-
- if (!dump_block_groups)
- return;
-
- down_read(&info->groups_sem);
-again:
- list_for_each_entry(cache, &info->block_groups[index], list) {
- spin_lock(&cache->lock);
- printk(KERN_INFO "block group %llu has %llu bytes, %llu used "
- "%llu pinned %llu reserved\n",
- (unsigned long long)cache->key.objectid,
- (unsigned long long)cache->key.offset,
- (unsigned long long)btrfs_block_group_used(&cache->item),
- (unsigned long long)cache->pinned,
- (unsigned long long)cache->reserved);
- btrfs_dump_free_space(cache, bytes);
- spin_unlock(&cache->lock);
- }
- if (++index < BTRFS_NR_RAID_TYPES)
- goto again;
- up_read(&info->groups_sem);
-}
-
-int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- u64 num_bytes, u64 min_alloc_size,
- u64 empty_size, u64 hint_byte,
- struct btrfs_key *ins, u64 data)
-{
- bool final_tried = false;
- int ret;
-
- data = btrfs_get_alloc_profile(root, data);
-again:
- /*
- * the only place that sets empty_size is btrfs_realloc_node, which
- * is not called recursively on allocations
- */
- if (empty_size || root->ref_cows) {
- ret = do_chunk_alloc(trans, root->fs_info->extent_root,
- num_bytes + 2 * 1024 * 1024, data,
- CHUNK_ALLOC_NO_FORCE);
- if (ret < 0 && ret != -ENOSPC) {
- btrfs_abort_transaction(trans, root, ret);
- return ret;
- }
- }
-
- WARN_ON(num_bytes < root->sectorsize);
- ret = find_free_extent(trans, root, num_bytes, empty_size,
- hint_byte, ins, data);
-
- if (ret == -ENOSPC) {
- if (!final_tried) {
- num_bytes = num_bytes >> 1;
- num_bytes = num_bytes & ~(root->sectorsize - 1);
- num_bytes = max(num_bytes, min_alloc_size);
- ret = do_chunk_alloc(trans, root->fs_info->extent_root,
- num_bytes, data, CHUNK_ALLOC_FORCE);
- if (ret < 0 && ret != -ENOSPC) {
- btrfs_abort_transaction(trans, root, ret);
- return ret;
- }
- if (num_bytes == min_alloc_size)
- final_tried = true;
- goto again;
- } else if (btrfs_test_opt(root, ENOSPC_DEBUG)) {
- struct btrfs_space_info *sinfo;
-
- sinfo = __find_space_info(root->fs_info, data);
- printk(KERN_ERR "btrfs allocation failed flags %llu, "
- "wanted %llu\n", (unsigned long long)data,
- (unsigned long long)num_bytes);
- if (sinfo)
- dump_space_info(sinfo, num_bytes, 1);
- }
- }
-
- trace_btrfs_reserved_extent_alloc(root, ins->objectid, ins->offset);
-
- return ret;
-}
-
-static int __btrfs_free_reserved_extent(struct btrfs_root *root,
- u64 start, u64 len, int pin)
-{
- struct btrfs_block_group_cache *cache;
- int ret = 0;
-
- cache = btrfs_lookup_block_group(root->fs_info, start);
- if (!cache) {
- printk(KERN_ERR "Unable to find block group for %llu\n",
- (unsigned long long)start);
- return -ENOSPC;
- }
-
- if (btrfs_test_opt(root, DISCARD))
- ret = btrfs_discard_extent(root, start, len, NULL);
-
- if (pin)
- pin_down_extent(root, cache, start, len, 1);
- else {
- btrfs_add_free_space(cache, start, len);
- btrfs_update_reserved_bytes(cache, len, RESERVE_FREE);
- }
- btrfs_put_block_group(cache);
-
- trace_btrfs_reserved_extent_free(root, start, len);
-
- return ret;
-}
-
-int btrfs_free_reserved_extent(struct btrfs_root *root,
- u64 start, u64 len)
-{
- return __btrfs_free_reserved_extent(root, start, len, 0);
-}
-
-int btrfs_free_and_pin_reserved_extent(struct btrfs_root *root,
- u64 start, u64 len)
-{
- return __btrfs_free_reserved_extent(root, start, len, 1);
-}
-
-static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- u64 parent, u64 root_objectid,
- u64 flags, u64 owner, u64 offset,
- struct btrfs_key *ins, int ref_mod)
-{
- int ret;
- struct btrfs_fs_info *fs_info = root->fs_info;
- struct btrfs_extent_item *extent_item;
- struct btrfs_extent_inline_ref *iref;
- struct btrfs_path *path;
- struct extent_buffer *leaf;
- int type;
- u32 size;
-
- if (parent > 0)
- type = BTRFS_SHARED_DATA_REF_KEY;
- else
- type = BTRFS_EXTENT_DATA_REF_KEY;
-
- size = sizeof(*extent_item) + btrfs_extent_inline_ref_size(type);
-
- path = btrfs_alloc_path();
- if (!path)
- return -ENOMEM;
-
- path->leave_spinning = 1;
- ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path,
- ins, size);
- if (ret) {
- btrfs_free_path(path);
- return ret;
- }
-
- leaf = path->nodes[0];
- extent_item = btrfs_item_ptr(leaf, path->slots[0],
- struct btrfs_extent_item);
- btrfs_set_extent_refs(leaf, extent_item, ref_mod);
- btrfs_set_extent_generation(leaf, extent_item, trans->transid);
- btrfs_set_extent_flags(leaf, extent_item,
- flags | BTRFS_EXTENT_FLAG_DATA);
-
- iref = (struct btrfs_extent_inline_ref *)(extent_item + 1);
- btrfs_set_extent_inline_ref_type(leaf, iref, type);
- if (parent > 0) {
- struct btrfs_shared_data_ref *ref;
- ref = (struct btrfs_shared_data_ref *)(iref + 1);
- btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
- btrfs_set_shared_data_ref_count(leaf, ref, ref_mod);
- } else {
- struct btrfs_extent_data_ref *ref;
- ref = (struct btrfs_extent_data_ref *)(&iref->offset);
- btrfs_set_extent_data_ref_root(leaf, ref, root_objectid);
- btrfs_set_extent_data_ref_objectid(leaf, ref, owner);
- btrfs_set_extent_data_ref_offset(leaf, ref, offset);
- btrfs_set_extent_data_ref_count(leaf, ref, ref_mod);
- }
-
- btrfs_mark_buffer_dirty(path->nodes[0]);
- btrfs_free_path(path);
-
- ret = update_block_group(trans, root, ins->objectid, ins->offset, 1);
- if (ret) { /* -ENOENT, logic error */
- printk(KERN_ERR "btrfs update block group failed for %llu "
- "%llu\n", (unsigned long long)ins->objectid,
- (unsigned long long)ins->offset);
- BUG();
- }
- return ret;
-}
-
-static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- u64 parent, u64 root_objectid,
- u64 flags, struct btrfs_disk_key *key,
- int level, struct btrfs_key *ins)
-{
- int ret;
- struct btrfs_fs_info *fs_info = root->fs_info;
- struct btrfs_extent_item *extent_item;
- struct btrfs_tree_block_info *block_info;
- struct btrfs_extent_inline_ref *iref;
- struct btrfs_path *path;
- struct extent_buffer *leaf;
- u32 size = sizeof(*extent_item) + sizeof(*block_info) + sizeof(*iref);
-
- path = btrfs_alloc_path();
- if (!path)
- return -ENOMEM;
-
- path->leave_spinning = 1;
- ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path,
- ins, size);
- if (ret) {
- btrfs_free_path(path);
- return ret;
- }
-
- leaf = path->nodes[0];
- extent_item = btrfs_item_ptr(leaf, path->slots[0],
- struct btrfs_extent_item);
- btrfs_set_extent_refs(leaf, extent_item, 1);
- btrfs_set_extent_generation(leaf, extent_item, trans->transid);
- btrfs_set_extent_flags(leaf, extent_item,
- flags | BTRFS_EXTENT_FLAG_TREE_BLOCK);
- block_info = (struct btrfs_tree_block_info *)(extent_item + 1);
-
- btrfs_set_tree_block_key(leaf, block_info, key);
- btrfs_set_tree_block_level(leaf, block_info, level);
-
- iref = (struct btrfs_extent_inline_ref *)(block_info + 1);
- if (parent > 0) {
- BUG_ON(!(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF));
- btrfs_set_extent_inline_ref_type(leaf, iref,
- BTRFS_SHARED_BLOCK_REF_KEY);
- btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
- } else {
- btrfs_set_extent_inline_ref_type(leaf, iref,
- BTRFS_TREE_BLOCK_REF_KEY);
- btrfs_set_extent_inline_ref_offset(leaf, iref, root_objectid);
- }
-
- btrfs_mark_buffer_dirty(leaf);
- btrfs_free_path(path);
-
- ret = update_block_group(trans, root, ins->objectid, ins->offset, 1);
- if (ret) { /* -ENOENT, logic error */
- printk(KERN_ERR "btrfs update block group failed for %llu "
- "%llu\n", (unsigned long long)ins->objectid,
- (unsigned long long)ins->offset);
- BUG();
- }
- return ret;
-}
-
-int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- u64 root_objectid, u64 owner,
- u64 offset, struct btrfs_key *ins)
-{
- int ret;
-
- BUG_ON(root_objectid == BTRFS_TREE_LOG_OBJECTID);
-
- ret = btrfs_add_delayed_data_ref(root->fs_info, trans, ins->objectid,
- ins->offset, 0,
- root_objectid, owner, offset,
- BTRFS_ADD_DELAYED_EXTENT, NULL, 0);
- return ret;
-}
-
-/*
- * this is used by the tree logging recovery code. It records that
- * an extent has been allocated and makes sure to clear the free
- * space cache bits as well
- */
-int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- u64 root_objectid, u64 owner, u64 offset,
- struct btrfs_key *ins)
-{
- int ret;
- struct btrfs_block_group_cache *block_group;
- struct btrfs_caching_control *caching_ctl;
- u64 start = ins->objectid;
- u64 num_bytes = ins->offset;
-
- block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid);
- cache_block_group(block_group, trans, NULL, 0);
- caching_ctl = get_caching_control(block_group);
-
- if (!caching_ctl) {
- BUG_ON(!block_group_cache_done(block_group));
- ret = btrfs_remove_free_space(block_group, start, num_bytes);
- BUG_ON(ret); /* -ENOMEM */
- } else {
- mutex_lock(&caching_ctl->mutex);
-
- if (start >= caching_ctl->progress) {
- ret = add_excluded_extent(root, start, num_bytes);
- BUG_ON(ret); /* -ENOMEM */
- } else if (start + num_bytes <= caching_ctl->progress) {
- ret = btrfs_remove_free_space(block_group,
- start, num_bytes);
- BUG_ON(ret); /* -ENOMEM */
- } else {
- num_bytes = caching_ctl->progress - start;
- ret = btrfs_remove_free_space(block_group,
- start, num_bytes);
- BUG_ON(ret); /* -ENOMEM */
-
- start = caching_ctl->progress;
- num_bytes = ins->objectid + ins->offset -
- caching_ctl->progress;
- ret = add_excluded_extent(root, start, num_bytes);
- BUG_ON(ret); /* -ENOMEM */
- }
-
- mutex_unlock(&caching_ctl->mutex);
- put_caching_control(caching_ctl);
- }
-
- ret = btrfs_update_reserved_bytes(block_group, ins->offset,
- RESERVE_ALLOC_NO_ACCOUNT);
- BUG_ON(ret); /* logic error */
- btrfs_put_block_group(block_group);
- ret = alloc_reserved_file_extent(trans, root, 0, root_objectid,
- 0, owner, offset, ins, 1);
- return ret;
-}
-
-struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- u64 bytenr, u32 blocksize,
- int level)
-{
- struct extent_buffer *buf;
-
- buf = btrfs_find_create_tree_block(root, bytenr, blocksize);
- if (!buf)
- return ERR_PTR(-ENOMEM);
- btrfs_set_header_generation(buf, trans->transid);
- btrfs_set_buffer_lockdep_class(root->root_key.objectid, buf, level);
- btrfs_tree_lock(buf);
- clean_tree_block(trans, root, buf);
- clear_bit(EXTENT_BUFFER_STALE, &buf->bflags);
-
- btrfs_set_lock_blocking(buf);
- btrfs_set_buffer_uptodate(buf);
-
- if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) {
- /*
- * we allow two log transactions at a time, use different
- * EXENT bit to differentiate dirty pages.
- */
- if (root->log_transid % 2 == 0)
- set_extent_dirty(&root->dirty_log_pages, buf->start,
- buf->start + buf->len - 1, GFP_NOFS);
- else
- set_extent_new(&root->dirty_log_pages, buf->start,
- buf->start + buf->len - 1, GFP_NOFS);
- } else {
- set_extent_dirty(&trans->transaction->dirty_pages, buf->start,
- buf->start + buf->len - 1, GFP_NOFS);
- }
- trans->blocks_used++;
- /* this returns a buffer locked for blocking */
- return buf;
-}
-
-static struct btrfs_block_rsv *
-use_block_rsv(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, u32 blocksize)
-{
- struct btrfs_block_rsv *block_rsv;
- struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv;
- int ret;
-
- block_rsv = get_block_rsv(trans, root);
-
- if (block_rsv->size == 0) {
- ret = reserve_metadata_bytes(root, block_rsv, blocksize, 0);
- /*
- * If we couldn't reserve metadata bytes try and use some from
- * the global reserve.
- */
- if (ret && block_rsv != global_rsv) {
- ret = block_rsv_use_bytes(global_rsv, blocksize);
- if (!ret)
- return global_rsv;
- return ERR_PTR(ret);
- } else if (ret) {
- return ERR_PTR(ret);
- }
- return block_rsv;
- }
-
- ret = block_rsv_use_bytes(block_rsv, blocksize);
- if (!ret)
- return block_rsv;
- if (ret) {
- static DEFINE_RATELIMIT_STATE(_rs,
- DEFAULT_RATELIMIT_INTERVAL,
- /*DEFAULT_RATELIMIT_BURST*/ 2);
- if (__ratelimit(&_rs)) {
- printk(KERN_DEBUG "btrfs: block rsv returned %d\n", ret);
- WARN_ON(1);
- }
- ret = reserve_metadata_bytes(root, block_rsv, blocksize, 0);
- if (!ret) {
- return block_rsv;
- } else if (ret && block_rsv != global_rsv) {
- ret = block_rsv_use_bytes(global_rsv, blocksize);
- if (!ret)
- return global_rsv;
- }
- }
-
- return ERR_PTR(-ENOSPC);
-}
-
-static void unuse_block_rsv(struct btrfs_fs_info *fs_info,
- struct btrfs_block_rsv *block_rsv, u32 blocksize)
-{
- block_rsv_add_bytes(block_rsv, blocksize, 0);
- block_rsv_release_bytes(fs_info, block_rsv, NULL, 0);
-}
-
-/*
- * finds a free extent and does all the dirty work required for allocation
- * returns the key for the extent through ins, and a tree buffer for
- * the first block of the extent through buf.
- *
- * returns the tree buffer or NULL.
- */
-struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, u32 blocksize,
- u64 parent, u64 root_objectid,
- struct btrfs_disk_key *key, int level,
- u64 hint, u64 empty_size, int for_cow)
-{
- struct btrfs_key ins;
- struct btrfs_block_rsv *block_rsv;
- struct extent_buffer *buf;
- u64 flags = 0;
- int ret;
-
-
- block_rsv = use_block_rsv(trans, root, blocksize);
- if (IS_ERR(block_rsv))
- return ERR_CAST(block_rsv);
-
- ret = btrfs_reserve_extent(trans, root, blocksize, blocksize,
- empty_size, hint, &ins, 0);
- if (ret) {
- unuse_block_rsv(root->fs_info, block_rsv, blocksize);
- return ERR_PTR(ret);
- }
-
- buf = btrfs_init_new_buffer(trans, root, ins.objectid,
- blocksize, level);
- BUG_ON(IS_ERR(buf)); /* -ENOMEM */
-
- if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
- if (parent == 0)
- parent = ins.objectid;
- flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
- } else
- BUG_ON(parent > 0);
-
- if (root_objectid != BTRFS_TREE_LOG_OBJECTID) {
- struct btrfs_delayed_extent_op *extent_op;
- extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS);
- BUG_ON(!extent_op); /* -ENOMEM */
- if (key)
- memcpy(&extent_op->key, key, sizeof(extent_op->key));
- else
- memset(&extent_op->key, 0, sizeof(extent_op->key));
- extent_op->flags_to_set = flags;
- extent_op->update_key = 1;
- extent_op->update_flags = 1;
- extent_op->is_data = 0;
-
- ret = btrfs_add_delayed_tree_ref(root->fs_info, trans,
- ins.objectid,
- ins.offset, parent, root_objectid,
- level, BTRFS_ADD_DELAYED_EXTENT,
- extent_op, for_cow);
- BUG_ON(ret); /* -ENOMEM */
- }
- return buf;
-}
-
-struct walk_control {
- u64 refs[BTRFS_MAX_LEVEL];
- u64 flags[BTRFS_MAX_LEVEL];
- struct btrfs_key update_progress;
- int stage;
- int level;
- int shared_level;
- int update_ref;
- int keep_locks;
- int reada_slot;
- int reada_count;
- int for_reloc;
-};
-
-#define DROP_REFERENCE 1
-#define UPDATE_BACKREF 2
-
-static noinline void reada_walk_down(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct walk_control *wc,
- struct btrfs_path *path)
-{
- u64 bytenr;
- u64 generation;
- u64 refs;
- u64 flags;
- u32 nritems;
- u32 blocksize;
- struct btrfs_key key;
- struct extent_buffer *eb;
- int ret;
- int slot;
- int nread = 0;
-
- if (path->slots[wc->level] < wc->reada_slot) {
- wc->reada_count = wc->reada_count * 2 / 3;
- wc->reada_count = max(wc->reada_count, 2);
- } else {
- wc->reada_count = wc->reada_count * 3 / 2;
- wc->reada_count = min_t(int, wc->reada_count,
- BTRFS_NODEPTRS_PER_BLOCK(root));
- }
-
- eb = path->nodes[wc->level];
- nritems = btrfs_header_nritems(eb);
- blocksize = btrfs_level_size(root, wc->level - 1);
-
- for (slot = path->slots[wc->level]; slot < nritems; slot++) {
- if (nread >= wc->reada_count)
- break;
-
- cond_resched();
- bytenr = btrfs_node_blockptr(eb, slot);
- generation = btrfs_node_ptr_generation(eb, slot);
-
- if (slot == path->slots[wc->level])
- goto reada;
-
- if (wc->stage == UPDATE_BACKREF &&
- generation <= root->root_key.offset)
- continue;
-
- /* We don't lock the tree block, it's OK to be racy here */
- ret = btrfs_lookup_extent_info(trans, root, bytenr, blocksize,
- &refs, &flags);
- /* We don't care about errors in readahead. */
- if (ret < 0)
- continue;
- BUG_ON(refs == 0);
-
- if (wc->stage == DROP_REFERENCE) {
- if (refs == 1)
- goto reada;
-
- if (wc->level == 1 &&
- (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF))
- continue;
- if (!wc->update_ref ||
- generation <= root->root_key.offset)
- continue;
- btrfs_node_key_to_cpu(eb, &key, slot);
- ret = btrfs_comp_cpu_keys(&key,
- &wc->update_progress);
- if (ret < 0)
- continue;
- } else {
- if (wc->level == 1 &&
- (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF))
- continue;
- }
-reada:
- ret = readahead_tree_block(root, bytenr, blocksize,
- generation);
- if (ret)
- break;
- nread++;
- }
- wc->reada_slot = slot;
-}
-
-/*
- * hepler to process tree block while walking down the tree.
- *
- * when wc->stage == UPDATE_BACKREF, this function updates
- * back refs for pointers in the block.
- *
- * NOTE: return value 1 means we should stop walking down.
- */
-static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_path *path,
- struct walk_control *wc, int lookup_info)
-{
- int level = wc->level;
- struct extent_buffer *eb = path->nodes[level];
- u64 flag = BTRFS_BLOCK_FLAG_FULL_BACKREF;
- int ret;
-
- if (wc->stage == UPDATE_BACKREF &&
- btrfs_header_owner(eb) != root->root_key.objectid)
- return 1;
-
- /*
- * when reference count of tree block is 1, it won't increase
- * again. once full backref flag is set, we never clear it.
- */
- if (lookup_info &&
- ((wc->stage == DROP_REFERENCE && wc->refs[level] != 1) ||
- (wc->stage == UPDATE_BACKREF && !(wc->flags[level] & flag)))) {
- BUG_ON(!path->locks[level]);
- ret = btrfs_lookup_extent_info(trans, root,
- eb->start, eb->len,
- &wc->refs[level],
- &wc->flags[level]);
- BUG_ON(ret == -ENOMEM);
- if (ret)
- return ret;
- BUG_ON(wc->refs[level] == 0);
- }
-
- if (wc->stage == DROP_REFERENCE) {
- if (wc->refs[level] > 1)
- return 1;
-
- if (path->locks[level] && !wc->keep_locks) {
- btrfs_tree_unlock_rw(eb, path->locks[level]);
- path->locks[level] = 0;
- }
- return 0;
- }
-
- /* wc->stage == UPDATE_BACKREF */
- if (!(wc->flags[level] & flag)) {
- BUG_ON(!path->locks[level]);
- ret = btrfs_inc_ref(trans, root, eb, 1, wc->for_reloc);
- BUG_ON(ret); /* -ENOMEM */
- ret = btrfs_dec_ref(trans, root, eb, 0, wc->for_reloc);
- BUG_ON(ret); /* -ENOMEM */
- ret = btrfs_set_disk_extent_flags(trans, root, eb->start,
- eb->len, flag, 0);
- BUG_ON(ret); /* -ENOMEM */
- wc->flags[level] |= flag;
- }
-
- /*
- * the block is shared by multiple trees, so it's not good to
- * keep the tree lock
- */
- if (path->locks[level] && level > 0) {
- btrfs_tree_unlock_rw(eb, path->locks[level]);
- path->locks[level] = 0;
- }
- return 0;
-}
-
-/*
- * hepler to process tree block pointer.
- *
- * when wc->stage == DROP_REFERENCE, this function checks
- * reference count of the block pointed to. if the block
- * is shared and we need update back refs for the subtree
- * rooted at the block, this function changes wc->stage to
- * UPDATE_BACKREF. if the block is shared and there is no
- * need to update back, this function drops the reference
- * to the block.
- *
- * NOTE: return value 1 means we should stop walking down.
- */
-static noinline int do_walk_down(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_path *path,
- struct walk_control *wc, int *lookup_info)
-{
- u64 bytenr;
- u64 generation;
- u64 parent;
- u32 blocksize;
- struct btrfs_key key;
- struct extent_buffer *next;
- int level = wc->level;
- int reada = 0;
- int ret = 0;
-
- generation = btrfs_node_ptr_generation(path->nodes[level],
- path->slots[level]);
- /*
- * if the lower level block was created before the snapshot
- * was created, we know there is no need to update back refs
- * for the subtree
- */
- if (wc->stage == UPDATE_BACKREF &&
- generation <= root->root_key.offset) {
- *lookup_info = 1;
- return 1;
- }
-
- bytenr = btrfs_node_blockptr(path->nodes[level], path->slots[level]);
- blocksize = btrfs_level_size(root, level - 1);
-
- next = btrfs_find_tree_block(root, bytenr, blocksize);
- if (!next) {
- next = btrfs_find_create_tree_block(root, bytenr, blocksize);
- if (!next)
- return -ENOMEM;
- reada = 1;
- }
- btrfs_tree_lock(next);
- btrfs_set_lock_blocking(next);
-
- ret = btrfs_lookup_extent_info(trans, root, bytenr, blocksize,
- &wc->refs[level - 1],
- &wc->flags[level - 1]);
- if (ret < 0) {
- btrfs_tree_unlock(next);
- return ret;
- }
-
- BUG_ON(wc->refs[level - 1] == 0);
- *lookup_info = 0;
-
- if (wc->stage == DROP_REFERENCE) {
- if (wc->refs[level - 1] > 1) {
- if (level == 1 &&
- (wc->flags[0] & BTRFS_BLOCK_FLAG_FULL_BACKREF))
- goto skip;
-
- if (!wc->update_ref ||
- generation <= root->root_key.offset)
- goto skip;
-
- btrfs_node_key_to_cpu(path->nodes[level], &key,
- path->slots[level]);
- ret = btrfs_comp_cpu_keys(&key, &wc->update_progress);
- if (ret < 0)
- goto skip;
-
- wc->stage = UPDATE_BACKREF;
- wc->shared_level = level - 1;
- }
- } else {
- if (level == 1 &&
- (wc->flags[0] & BTRFS_BLOCK_FLAG_FULL_BACKREF))
- goto skip;
- }
-
- if (!btrfs_buffer_uptodate(next, generation, 0)) {
- btrfs_tree_unlock(next);
- free_extent_buffer(next);
- next = NULL;
- *lookup_info = 1;
- }
-
- if (!next) {
- if (reada && level == 1)
- reada_walk_down(trans, root, wc, path);
- next = read_tree_block(root, bytenr, blocksize, generation);
- if (!next)
- return -EIO;
- btrfs_tree_lock(next);
- btrfs_set_lock_blocking(next);
- }
-
- level--;
- BUG_ON(level != btrfs_header_level(next));
- path->nodes[level] = next;
- path->slots[level] = 0;
- path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
- wc->level = level;
- if (wc->level == 1)
- wc->reada_slot = 0;
- return 0;
-skip:
- wc->refs[level - 1] = 0;
- wc->flags[level - 1] = 0;
- if (wc->stage == DROP_REFERENCE) {
- if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
- parent = path->nodes[level]->start;
- } else {
- BUG_ON(root->root_key.objectid !=
- btrfs_header_owner(path->nodes[level]));
- parent = 0;
- }
-
- ret = btrfs_free_extent(trans, root, bytenr, blocksize, parent,
- root->root_key.objectid, level - 1, 0, 0);
- BUG_ON(ret); /* -ENOMEM */
- }
- btrfs_tree_unlock(next);
- free_extent_buffer(next);
- *lookup_info = 1;
- return 1;
-}
-
-/*
- * hepler to process tree block while walking up the tree.
- *
- * when wc->stage == DROP_REFERENCE, this function drops
- * reference count on the block.
- *
- * when wc->stage == UPDATE_BACKREF, this function changes
- * wc->stage back to DROP_REFERENCE if we changed wc->stage
- * to UPDATE_BACKREF previously while processing the block.
- *
- * NOTE: return value 1 means we should stop walking up.
- */
-static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_path *path,
- struct walk_control *wc)
-{
- int ret;
- int level = wc->level;
- struct extent_buffer *eb = path->nodes[level];
- u64 parent = 0;
-
- if (wc->stage == UPDATE_BACKREF) {
- BUG_ON(wc->shared_level < level);
- if (level < wc->shared_level)
- goto out;
-
- ret = find_next_key(path, level + 1, &wc->update_progress);
- if (ret > 0)
- wc->update_ref = 0;
-
- wc->stage = DROP_REFERENCE;
- wc->shared_level = -1;
- path->slots[level] = 0;
-
- /*
- * check reference count again if the block isn't locked.
- * we should start walking down the tree again if reference
- * count is one.
- */
- if (!path->locks[level]) {
- BUG_ON(level == 0);
- btrfs_tree_lock(eb);
- btrfs_set_lock_blocking(eb);
- path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
-
- ret = btrfs_lookup_extent_info(trans, root,
- eb->start, eb->len,
- &wc->refs[level],
- &wc->flags[level]);
- if (ret < 0) {
- btrfs_tree_unlock_rw(eb, path->locks[level]);
- return ret;
- }
- BUG_ON(wc->refs[level] == 0);
- if (wc->refs[level] == 1) {
- btrfs_tree_unlock_rw(eb, path->locks[level]);
- return 1;
- }
- }
- }
-
- /* wc->stage == DROP_REFERENCE */
- BUG_ON(wc->refs[level] > 1 && !path->locks[level]);
-
- if (wc->refs[level] == 1) {
- if (level == 0) {
- if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
- ret = btrfs_dec_ref(trans, root, eb, 1,
- wc->for_reloc);
- else
- ret = btrfs_dec_ref(trans, root, eb, 0,
- wc->for_reloc);
- BUG_ON(ret); /* -ENOMEM */
- }
- /* make block locked assertion in clean_tree_block happy */
- if (!path->locks[level] &&
- btrfs_header_generation(eb) == trans->transid) {
- btrfs_tree_lock(eb);
- btrfs_set_lock_blocking(eb);
- path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
- }
- clean_tree_block(trans, root, eb);
- }
-
- if (eb == root->node) {
- if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
- parent = eb->start;
- else
- BUG_ON(root->root_key.objectid !=
- btrfs_header_owner(eb));
- } else {
- if (wc->flags[level + 1] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
- parent = path->nodes[level + 1]->start;
- else
- BUG_ON(root->root_key.objectid !=
- btrfs_header_owner(path->nodes[level + 1]));
- }
-
- btrfs_free_tree_block(trans, root, eb, parent, wc->refs[level] == 1, 0);
-out:
- wc->refs[level] = 0;
- wc->flags[level] = 0;
- return 0;
-}
-
-static noinline int walk_down_tree(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_path *path,
- struct walk_control *wc)
-{
- int level = wc->level;
- int lookup_info = 1;
- int ret;
-
- while (level >= 0) {
- ret = walk_down_proc(trans, root, path, wc, lookup_info);
- if (ret > 0)
- break;
-
- if (level == 0)
- break;
-
- if (path->slots[level] >=
- btrfs_header_nritems(path->nodes[level]))
- break;
-
- ret = do_walk_down(trans, root, path, wc, &lookup_info);
- if (ret > 0) {
- path->slots[level]++;
- continue;
- } else if (ret < 0)
- return ret;
- level = wc->level;
- }
- return 0;
-}
-
-static noinline int walk_up_tree(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_path *path,
- struct walk_control *wc, int max_level)
-{
- int level = wc->level;
- int ret;
-
- path->slots[level] = btrfs_header_nritems(path->nodes[level]);
- while (level < max_level && path->nodes[level]) {
- wc->level = level;
- if (path->slots[level] + 1 <
- btrfs_header_nritems(path->nodes[level])) {
- path->slots[level]++;
- return 0;
- } else {
- ret = walk_up_proc(trans, root, path, wc);
- if (ret > 0)
- return 0;
-
- if (path->locks[level]) {
- btrfs_tree_unlock_rw(path->nodes[level],
- path->locks[level]);
- path->locks[level] = 0;
- }
- free_extent_buffer(path->nodes[level]);
- path->nodes[level] = NULL;
- level++;
- }
- }
- return 1;
-}
-
-/*
- * drop a subvolume tree.
- *
- * this function traverses the tree freeing any blocks that only
- * referenced by the tree.
- *
- * when a shared tree block is found. this function decreases its
- * reference count by one. if update_ref is true, this function
- * also make sure backrefs for the shared block and all lower level
- * blocks are properly updated.
- */
-int btrfs_drop_snapshot(struct btrfs_root *root,
- struct btrfs_block_rsv *block_rsv, int update_ref,
- int for_reloc)
-{
- struct btrfs_path *path;
- struct btrfs_trans_handle *trans;
- struct btrfs_root *tree_root = root->fs_info->tree_root;
- struct btrfs_root_item *root_item = &root->root_item;
- struct walk_control *wc;
- struct btrfs_key key;
- int err = 0;
- int ret;
- int level;
-
- path = btrfs_alloc_path();
- if (!path) {
- err = -ENOMEM;
- goto out;
- }
-
- wc = kzalloc(sizeof(*wc), GFP_NOFS);
- if (!wc) {
- btrfs_free_path(path);
- err = -ENOMEM;
- goto out;
- }
-
- trans = btrfs_start_transaction(tree_root, 0);
- if (IS_ERR(trans)) {
- err = PTR_ERR(trans);
- goto out_free;
- }
-
- if (block_rsv)
- trans->block_rsv = block_rsv;
-
- if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
- level = btrfs_header_level(root->node);
- path->nodes[level] = btrfs_lock_root_node(root);
- btrfs_set_lock_blocking(path->nodes[level]);
- path->slots[level] = 0;
- path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
- memset(&wc->update_progress, 0,
- sizeof(wc->update_progress));
- } else {
- btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
- memcpy(&wc->update_progress, &key,
- sizeof(wc->update_progress));
-
- level = root_item->drop_level;
- BUG_ON(level == 0);
- path->lowest_level = level;
- ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
- path->lowest_level = 0;
- if (ret < 0) {
- err = ret;
- goto out_end_trans;
- }
- WARN_ON(ret > 0);
-
- /*
- * unlock our path, this is safe because only this
- * function is allowed to delete this snapshot
- */
- btrfs_unlock_up_safe(path, 0);
-
- level = btrfs_header_level(root->node);
- while (1) {
- btrfs_tree_lock(path->nodes[level]);
- btrfs_set_lock_blocking(path->nodes[level]);
-
- ret = btrfs_lookup_extent_info(trans, root,
- path->nodes[level]->start,
- path->nodes[level]->len,
- &wc->refs[level],
- &wc->flags[level]);
- if (ret < 0) {
- err = ret;
- goto out_end_trans;
- }
- BUG_ON(wc->refs[level] == 0);
-
- if (level == root_item->drop_level)
- break;
-
- btrfs_tree_unlock(path->nodes[level]);
- WARN_ON(wc->refs[level] != 1);
- level--;
- }
- }
-
- wc->level = level;
- wc->shared_level = -1;
- wc->stage = DROP_REFERENCE;
- wc->update_ref = update_ref;
- wc->keep_locks = 0;
- wc->for_reloc = for_reloc;
- wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(root);
-
- while (1) {
- ret = walk_down_tree(trans, root, path, wc);
- if (ret < 0) {
- err = ret;
- break;
- }
-
- ret = walk_up_tree(trans, root, path, wc, BTRFS_MAX_LEVEL);
- if (ret < 0) {
- err = ret;
- break;
- }
-
- if (ret > 0) {
- BUG_ON(wc->stage != DROP_REFERENCE);
- break;
- }
-
- if (wc->stage == DROP_REFERENCE) {
- level = wc->level;
- btrfs_node_key(path->nodes[level],
- &root_item->drop_progress,
- path->slots[level]);
- root_item->drop_level = level;
- }
-
- BUG_ON(wc->level == 0);
- if (btrfs_should_end_transaction(trans, tree_root)) {
- ret = btrfs_update_root(trans, tree_root,
- &root->root_key,
- root_item);
- if (ret) {
- btrfs_abort_transaction(trans, tree_root, ret);
- err = ret;
- goto out_end_trans;
- }
-
- btrfs_end_transaction_throttle(trans, tree_root);
- trans = btrfs_start_transaction(tree_root, 0);
- if (IS_ERR(trans)) {
- err = PTR_ERR(trans);
- goto out_free;
- }
- if (block_rsv)
- trans->block_rsv = block_rsv;
- }
- }
- btrfs_release_path(path);
- if (err)
- goto out_end_trans;
-
- ret = btrfs_del_root(trans, tree_root, &root->root_key);
- if (ret) {
- btrfs_abort_transaction(trans, tree_root, ret);
- goto out_end_trans;
- }
-
- if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
- ret = btrfs_find_last_root(tree_root, root->root_key.objectid,
- NULL, NULL);
- if (ret < 0) {
- btrfs_abort_transaction(trans, tree_root, ret);
- err = ret;
- goto out_end_trans;
- } else if (ret > 0) {
- /* if we fail to delete the orphan item this time
- * around, it'll get picked up the next time.
- *
- * The most common failure here is just -ENOENT.
- */
- btrfs_del_orphan_item(trans, tree_root,
- root->root_key.objectid);
- }
- }
-
- if (root->in_radix) {
- btrfs_free_fs_root(tree_root->fs_info, root);
- } else {
- free_extent_buffer(root->node);
- free_extent_buffer(root->commit_root);
- kfree(root);
- }
-out_end_trans:
- btrfs_end_transaction_throttle(trans, tree_root);
-out_free:
- kfree(wc);
- btrfs_free_path(path);
-out:
- if (err)
- btrfs_std_error(root->fs_info, err);
- return err;
-}
-
-/*
- * drop subtree rooted at tree block 'node'.
- *
- * NOTE: this function will unlock and release tree block 'node'
- * only used by relocation code
- */
-int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct extent_buffer *node,
- struct extent_buffer *parent)
-{
- struct btrfs_path *path;
- struct walk_control *wc;
- int level;
- int parent_level;
- int ret = 0;
- int wret;
-
- BUG_ON(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID);
-
- path = btrfs_alloc_path();
- if (!path)
- return -ENOMEM;
-
- wc = kzalloc(sizeof(*wc), GFP_NOFS);
- if (!wc) {
- btrfs_free_path(path);
- return -ENOMEM;
- }
-
- btrfs_assert_tree_locked(parent);
- parent_level = btrfs_header_level(parent);
- extent_buffer_get(parent);
- path->nodes[parent_level] = parent;
- path->slots[parent_level] = btrfs_header_nritems(parent);
-
- btrfs_assert_tree_locked(node);
- level = btrfs_header_level(node);
- path->nodes[level] = node;
- path->slots[level] = 0;
- path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
-
- wc->refs[parent_level] = 1;
- wc->flags[parent_level] = BTRFS_BLOCK_FLAG_FULL_BACKREF;
- wc->level = level;
- wc->shared_level = -1;
- wc->stage = DROP_REFERENCE;
- wc->update_ref = 0;
- wc->keep_locks = 1;
- wc->for_reloc = 1;
- wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(root);
-
- while (1) {
- wret = walk_down_tree(trans, root, path, wc);
- if (wret < 0) {
- ret = wret;
- break;
- }
-
- wret = walk_up_tree(trans, root, path, wc, parent_level);
- if (wret < 0)
- ret = wret;
- if (wret != 0)
- break;
- }
-
- kfree(wc);
- btrfs_free_path(path);
- return ret;
-}
-
-static u64 update_block_group_flags(struct btrfs_root *root, u64 flags)
-{
- u64 num_devices;
- u64 stripped;
-
- /*
- * if restripe for this chunk_type is on pick target profile and
- * return, otherwise do the usual balance
- */
- stripped = get_restripe_target(root->fs_info, flags);
- if (stripped)
- return extended_to_chunk(stripped);
-
- /*
- * we add in the count of missing devices because we want
- * to make sure that any RAID levels on a degraded FS
- * continue to be honored.
- */
- num_devices = root->fs_info->fs_devices->rw_devices +
- root->fs_info->fs_devices->missing_devices;
-
- stripped = BTRFS_BLOCK_GROUP_RAID0 |
- BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10;
-
- if (num_devices == 1) {
- stripped |= BTRFS_BLOCK_GROUP_DUP;
- stripped = flags & ~stripped;
-
- /* turn raid0 into single device chunks */
- if (flags & BTRFS_BLOCK_GROUP_RAID0)
- return stripped;
-
- /* turn mirroring into duplication */
- if (flags & (BTRFS_BLOCK_GROUP_RAID1 |
- BTRFS_BLOCK_GROUP_RAID10))
- return stripped | BTRFS_BLOCK_GROUP_DUP;
- } else {
- /* they already had raid on here, just return */
- if (flags & stripped)
- return flags;
-
- stripped |= BTRFS_BLOCK_GROUP_DUP;
- stripped = flags & ~stripped;
-
- /* switch duplicated blocks with raid1 */
- if (flags & BTRFS_BLOCK_GROUP_DUP)
- return stripped | BTRFS_BLOCK_GROUP_RAID1;
-
- /* this is drive concat, leave it alone */
- }
-
- return flags;
-}
-
-static int set_block_group_ro(struct btrfs_block_group_cache *cache, int force)
-{
- struct btrfs_space_info *sinfo = cache->space_info;
- u64 num_bytes;
- u64 min_allocable_bytes;
- int ret = -ENOSPC;
-
-
- /*
- * We need some metadata space and system metadata space for
- * allocating chunks in some corner cases until we force to set
- * it to be readonly.
- */
- if ((sinfo->flags &
- (BTRFS_BLOCK_GROUP_SYSTEM | BTRFS_BLOCK_GROUP_METADATA)) &&
- !force)
- min_allocable_bytes = 1 * 1024 * 1024;
- else
- min_allocable_bytes = 0;
-
- spin_lock(&sinfo->lock);
- spin_lock(&cache->lock);
-
- if (cache->ro) {
- ret = 0;
- goto out;
- }
-
- num_bytes = cache->key.offset - cache->reserved - cache->pinned -
- cache->bytes_super - btrfs_block_group_used(&cache->item);
-
- if (sinfo->bytes_used + sinfo->bytes_reserved + sinfo->bytes_pinned +
- sinfo->bytes_may_use + sinfo->bytes_readonly + num_bytes +
- min_allocable_bytes <= sinfo->total_bytes) {
- sinfo->bytes_readonly += num_bytes;
- cache->ro = 1;
- ret = 0;
- }
-out:
- spin_unlock(&cache->lock);
- spin_unlock(&sinfo->lock);
- return ret;
-}
-
-int btrfs_set_block_group_ro(struct btrfs_root *root,
- struct btrfs_block_group_cache *cache)
-
-{
- struct btrfs_trans_handle *trans;
- u64 alloc_flags;
- int ret;
-
- BUG_ON(cache->ro);
-
- trans = btrfs_join_transaction(root);
- if (IS_ERR(trans))
- return PTR_ERR(trans);
-
- alloc_flags = update_block_group_flags(root, cache->flags);
- if (alloc_flags != cache->flags) {
- ret = do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags,
- CHUNK_ALLOC_FORCE);
- if (ret < 0)
- goto out;
- }
-
- ret = set_block_group_ro(cache, 0);
- if (!ret)
- goto out;
- alloc_flags = get_alloc_profile(root, cache->space_info->flags);
- ret = do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags,
- CHUNK_ALLOC_FORCE);
- if (ret < 0)
- goto out;
- ret = set_block_group_ro(cache, 0);
-out:
- btrfs_end_transaction(trans, root);
- return ret;
-}
-
-int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, u64 type)
-{
- u64 alloc_flags = get_alloc_profile(root, type);
- return do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags,
- CHUNK_ALLOC_FORCE);
-}
-
-/*
- * helper to account the unused space of all the readonly block group in the
- * list. takes mirrors into account.
- */
-static u64 __btrfs_get_ro_block_group_free_space(struct list_head *groups_list)
-{
- struct btrfs_block_group_cache *block_group;
- u64 free_bytes = 0;
- int factor;
-
- list_for_each_entry(block_group, groups_list, list) {
- spin_lock(&block_group->lock);
-
- if (!block_group->ro) {
- spin_unlock(&block_group->lock);
- continue;
- }
-
- if (block_group->flags & (BTRFS_BLOCK_GROUP_RAID1 |
- BTRFS_BLOCK_GROUP_RAID10 |
- BTRFS_BLOCK_GROUP_DUP))
- factor = 2;
- else
- factor = 1;
-
- free_bytes += (block_group->key.offset -
- btrfs_block_group_used(&block_group->item)) *
- factor;
-
- spin_unlock(&block_group->lock);
- }
-
- return free_bytes;
-}
-
-/*
- * helper to account the unused space of all the readonly block group in the
- * space_info. takes mirrors into account.
- */
-u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo)
-{
- int i;
- u64 free_bytes = 0;
-
- spin_lock(&sinfo->lock);
-
- for(i = 0; i < BTRFS_NR_RAID_TYPES; i++)
- if (!list_empty(&sinfo->block_groups[i]))
- free_bytes += __btrfs_get_ro_block_group_free_space(
- &sinfo->block_groups[i]);
-
- spin_unlock(&sinfo->lock);
-
- return free_bytes;
-}
-
-void btrfs_set_block_group_rw(struct btrfs_root *root,
- struct btrfs_block_group_cache *cache)
-{
- struct btrfs_space_info *sinfo = cache->space_info;
- u64 num_bytes;
-
- BUG_ON(!cache->ro);
-
- spin_lock(&sinfo->lock);
- spin_lock(&cache->lock);
- num_bytes = cache->key.offset - cache->reserved - cache->pinned -
- cache->bytes_super - btrfs_block_group_used(&cache->item);
- sinfo->bytes_readonly -= num_bytes;
- cache->ro = 0;
- spin_unlock(&cache->lock);
- spin_unlock(&sinfo->lock);
-}
-
-/*
- * checks to see if its even possible to relocate this block group.
- *
- * @return - -1 if it's not a good idea to relocate this block group, 0 if its
- * ok to go ahead and try.
- */
-int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr)
-{
- struct btrfs_block_group_cache *block_group;
- struct btrfs_space_info *space_info;
- struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices;
- struct btrfs_device *device;
- u64 min_free;
- u64 dev_min = 1;
- u64 dev_nr = 0;
- u64 target;
- int index;
- int full = 0;
- int ret = 0;
-
- block_group = btrfs_lookup_block_group(root->fs_info, bytenr);
-
- /* odd, couldn't find the block group, leave it alone */
- if (!block_group)
- return -1;
-
- min_free = btrfs_block_group_used(&block_group->item);
-
- /* no bytes used, we're good */
- if (!min_free)
- goto out;
-
- space_info = block_group->space_info;
- spin_lock(&space_info->lock);
-
- full = space_info->full;
-
- /*
- * if this is the last block group we have in this space, we can't
- * relocate it unless we're able to allocate a new chunk below.
- *
- * Otherwise, we need to make sure we have room in the space to handle
- * all of the extents from this block group. If we can, we're good
- */
- if ((space_info->total_bytes != block_group->key.offset) &&
- (space_info->bytes_used + space_info->bytes_reserved +
- space_info->bytes_pinned + space_info->bytes_readonly +
- min_free < space_info->total_bytes)) {
- spin_unlock(&space_info->lock);
- goto out;
- }
- spin_unlock(&space_info->lock);
-
- /*
- * ok we don't have enough space, but maybe we have free space on our
- * devices to allocate new chunks for relocation, so loop through our
- * alloc devices and guess if we have enough space. if this block
- * group is going to be restriped, run checks against the target
- * profile instead of the current one.
- */
- ret = -1;
-
- /*
- * index:
- * 0: raid10
- * 1: raid1
- * 2: dup
- * 3: raid0
- * 4: single
- */
- target = get_restripe_target(root->fs_info, block_group->flags);
- if (target) {
- index = __get_block_group_index(extended_to_chunk(target));
- } else {
- /*
- * this is just a balance, so if we were marked as full
- * we know there is no space for a new chunk
- */
- if (full)
- goto out;
-
- index = get_block_group_index(block_group);
- }
-
- if (index == 0) {
- dev_min = 4;
- /* Divide by 2 */
- min_free >>= 1;
- } else if (index == 1) {
- dev_min = 2;
- } else if (index == 2) {
- /* Multiply by 2 */
- min_free <<= 1;
- } else if (index == 3) {
- dev_min = fs_devices->rw_devices;
- do_div(min_free, dev_min);
- }
-
- mutex_lock(&root->fs_info->chunk_mutex);
- list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) {
- u64 dev_offset;
-
- /*
- * check to make sure we can actually find a chunk with enough
- * space to fit our block group in.
- */
- if (device->total_bytes > device->bytes_used + min_free) {
- ret = find_free_dev_extent(device, min_free,
- &dev_offset, NULL);
- if (!ret)
- dev_nr++;
-
- if (dev_nr >= dev_min)
- break;
-
- ret = -1;
- }
- }
- mutex_unlock(&root->fs_info->chunk_mutex);
-out:
- btrfs_put_block_group(block_group);
- return ret;
-}
-
-static int find_first_block_group(struct btrfs_root *root,
- struct btrfs_path *path, struct btrfs_key *key)
-{
- int ret = 0;
- struct btrfs_key found_key;
- struct extent_buffer *leaf;
- int slot;
-
- ret = btrfs_search_slot(NULL, root, key, path, 0, 0);
- if (ret < 0)
- goto out;
-
- while (1) {
- slot = path->slots[0];
- leaf = path->nodes[0];
- if (slot >= btrfs_header_nritems(leaf)) {
- ret = btrfs_next_leaf(root, path);
- if (ret == 0)
- continue;
- if (ret < 0)
- goto out;
- break;
- }
- btrfs_item_key_to_cpu(leaf, &found_key, slot);
-
- if (found_key.objectid >= key->objectid &&
- found_key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
- ret = 0;
- goto out;
- }
- path->slots[0]++;
- }
-out:
- return ret;
-}
-
-void btrfs_put_block_group_cache(struct btrfs_fs_info *info)
-{
- struct btrfs_block_group_cache *block_group;
- u64 last = 0;
-
- while (1) {
- struct inode *inode;
-
- block_group = btrfs_lookup_first_block_group(info, last);
- while (block_group) {
- spin_lock(&block_group->lock);
- if (block_group->iref)
- break;
- spin_unlock(&block_group->lock);
- block_group = next_block_group(info->tree_root,
- block_group);
- }
- if (!block_group) {
- if (last == 0)
- break;
- last = 0;
- continue;
- }
-
- inode = block_group->inode;
- block_group->iref = 0;
- block_group->inode = NULL;
- spin_unlock(&block_group->lock);
- iput(inode);
- last = block_group->key.objectid + block_group->key.offset;
- btrfs_put_block_group(block_group);
- }
-}
-
-int btrfs_free_block_groups(struct btrfs_fs_info *info)
-{
- struct btrfs_block_group_cache *block_group;
- struct btrfs_space_info *space_info;
- struct btrfs_caching_control *caching_ctl;
- struct rb_node *n;
-
- down_write(&info->extent_commit_sem);
- while (!list_empty(&info->caching_block_groups)) {
- caching_ctl = list_entry(info->caching_block_groups.next,
- struct btrfs_caching_control, list);
- list_del(&caching_ctl->list);
- put_caching_control(caching_ctl);
- }
- up_write(&info->extent_commit_sem);
-
- spin_lock(&info->block_group_cache_lock);
- while ((n = rb_last(&info->block_group_cache_tree)) != NULL) {
- block_group = rb_entry(n, struct btrfs_block_group_cache,
- cache_node);
- rb_erase(&block_group->cache_node,
- &info->block_group_cache_tree);
- spin_unlock(&info->block_group_cache_lock);
-
- down_write(&block_group->space_info->groups_sem);
- list_del(&block_group->list);
- up_write(&block_group->space_info->groups_sem);
-
- if (block_group->cached == BTRFS_CACHE_STARTED)
- wait_block_group_cache_done(block_group);
-
- /*
- * We haven't cached this block group, which means we could
- * possibly have excluded extents on this block group.
- */
- if (block_group->cached == BTRFS_CACHE_NO)
- free_excluded_extents(info->extent_root, block_group);
-
- btrfs_remove_free_space_cache(block_group);
- btrfs_put_block_group(block_group);
-
- spin_lock(&info->block_group_cache_lock);
- }
- spin_unlock(&info->block_group_cache_lock);
-
- /* now that all the block groups are freed, go through and
- * free all the space_info structs. This is only called during
- * the final stages of unmount, and so we know nobody is
- * using them. We call synchronize_rcu() once before we start,
- * just to be on the safe side.
- */
- synchronize_rcu();
-
- release_global_block_rsv(info);
-
- while(!list_empty(&info->space_info)) {
- space_info = list_entry(info->space_info.next,
- struct btrfs_space_info,
- list);
- if (space_info->bytes_pinned > 0 ||
- space_info->bytes_reserved > 0 ||
- space_info->bytes_may_use > 0) {
- WARN_ON(1);
- dump_space_info(space_info, 0, 0);
- }
- list_del(&space_info->list);
- kfree(space_info);
- }
- return 0;
-}
-
-static void __link_block_group(struct btrfs_space_info *space_info,
- struct btrfs_block_group_cache *cache)
-{
- int index = get_block_group_index(cache);
-
- down_write(&space_info->groups_sem);
- list_add_tail(&cache->list, &space_info->block_groups[index]);
- up_write(&space_info->groups_sem);
-}
-
-int btrfs_read_block_groups(struct btrfs_root *root)
-{
- struct btrfs_path *path;
- int ret;
- struct btrfs_block_group_cache *cache;
- struct btrfs_fs_info *info = root->fs_info;
- struct btrfs_space_info *space_info;
- struct btrfs_key key;
- struct btrfs_key found_key;
- struct extent_buffer *leaf;
- int need_clear = 0;
- u64 cache_gen;
-
- root = info->extent_root;
- key.objectid = 0;
- key.offset = 0;
- btrfs_set_key_type(&key, BTRFS_BLOCK_GROUP_ITEM_KEY);
- path = btrfs_alloc_path();
- if (!path)
- return -ENOMEM;
- path->reada = 1;
-
- cache_gen = btrfs_super_cache_generation(root->fs_info->super_copy);
- if (btrfs_test_opt(root, SPACE_CACHE) &&
- btrfs_super_generation(root->fs_info->super_copy) != cache_gen)
- need_clear = 1;
- if (btrfs_test_opt(root, CLEAR_CACHE))
- need_clear = 1;
-
- while (1) {
- ret = find_first_block_group(root, path, &key);
- if (ret > 0)
- break;
- if (ret != 0)
- goto error;
- leaf = path->nodes[0];
- btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
- cache = kzalloc(sizeof(*cache), GFP_NOFS);
- if (!cache) {
- ret = -ENOMEM;
- goto error;
- }
- cache->free_space_ctl = kzalloc(sizeof(*cache->free_space_ctl),
- GFP_NOFS);
- if (!cache->free_space_ctl) {
- kfree(cache);
- ret = -ENOMEM;
- goto error;
- }
-
- atomic_set(&cache->count, 1);
- spin_lock_init(&cache->lock);
- cache->fs_info = info;
- INIT_LIST_HEAD(&cache->list);
- INIT_LIST_HEAD(&cache->cluster_list);
-
- if (need_clear)
- cache->disk_cache_state = BTRFS_DC_CLEAR;
-
- read_extent_buffer(leaf, &cache->item,
- btrfs_item_ptr_offset(leaf, path->slots[0]),
- sizeof(cache->item));
- memcpy(&cache->key, &found_key, sizeof(found_key));
-
- key.objectid = found_key.objectid + found_key.offset;
- btrfs_release_path(path);
- cache->flags = btrfs_block_group_flags(&cache->item);
- cache->sectorsize = root->sectorsize;
-
- btrfs_init_free_space_ctl(cache);
-
- /*
- * We need to exclude the super stripes now so that the space
- * info has super bytes accounted for, otherwise we'll think
- * we have more space than we actually do.
- */
- exclude_super_stripes(root, cache);
-
- /*
- * check for two cases, either we are full, and therefore
- * don't need to bother with the caching work since we won't
- * find any space, or we are empty, and we can just add all
- * the space in and be done with it. This saves us _alot_ of
- * time, particularly in the full case.
- */
- if (found_key.offset == btrfs_block_group_used(&cache->item)) {
- cache->last_byte_to_unpin = (u64)-1;
- cache->cached = BTRFS_CACHE_FINISHED;
- free_excluded_extents(root, cache);
- } else if (btrfs_block_group_used(&cache->item) == 0) {
- cache->last_byte_to_unpin = (u64)-1;
- cache->cached = BTRFS_CACHE_FINISHED;
- add_new_free_space(cache, root->fs_info,
- found_key.objectid,
- found_key.objectid +
- found_key.offset);
- free_excluded_extents(root, cache);
- }
-
- ret = update_space_info(info, cache->flags, found_key.offset,
- btrfs_block_group_used(&cache->item),
- &space_info);
- BUG_ON(ret); /* -ENOMEM */
- cache->space_info = space_info;
- spin_lock(&cache->space_info->lock);
- cache->space_info->bytes_readonly += cache->bytes_super;
- spin_unlock(&cache->space_info->lock);
-
- __link_block_group(space_info, cache);
-
- ret = btrfs_add_block_group_cache(root->fs_info, cache);
- BUG_ON(ret); /* Logic error */
-
- set_avail_alloc_bits(root->fs_info, cache->flags);
- if (btrfs_chunk_readonly(root, cache->key.objectid))
- set_block_group_ro(cache, 1);
- }
-
- list_for_each_entry_rcu(space_info, &root->fs_info->space_info, list) {
- if (!(get_alloc_profile(root, space_info->flags) &
- (BTRFS_BLOCK_GROUP_RAID10 |
- BTRFS_BLOCK_GROUP_RAID1 |
- BTRFS_BLOCK_GROUP_DUP)))
- continue;
- /*
- * avoid allocating from un-mirrored block group if there are
- * mirrored block groups.
- */
- list_for_each_entry(cache, &space_info->block_groups[3], list)
- set_block_group_ro(cache, 1);
- list_for_each_entry(cache, &space_info->block_groups[4], list)
- set_block_group_ro(cache, 1);
- }
-
- init_global_block_rsv(info);
- ret = 0;
-error:
- btrfs_free_path(path);
- return ret;
-}
-
-int btrfs_make_block_group(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, u64 bytes_used,
- u64 type, u64 chunk_objectid, u64 chunk_offset,
- u64 size)
-{
- int ret;
- struct btrfs_root *extent_root;
- struct btrfs_block_group_cache *cache;
-
- extent_root = root->fs_info->extent_root;
-
- root->fs_info->last_trans_log_full_commit = trans->transid;
-
- cache = kzalloc(sizeof(*cache), GFP_NOFS);
- if (!cache)
- return -ENOMEM;
- cache->free_space_ctl = kzalloc(sizeof(*cache->free_space_ctl),
- GFP_NOFS);
- if (!cache->free_space_ctl) {
- kfree(cache);
- return -ENOMEM;
- }
-
- cache->key.objectid = chunk_offset;
- cache->key.offset = size;
- cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
- cache->sectorsize = root->sectorsize;
- cache->fs_info = root->fs_info;
-
- atomic_set(&cache->count, 1);
- spin_lock_init(&cache->lock);
- INIT_LIST_HEAD(&cache->list);
- INIT_LIST_HEAD(&cache->cluster_list);
-
- btrfs_init_free_space_ctl(cache);
-
- btrfs_set_block_group_used(&cache->item, bytes_used);
- btrfs_set_block_group_chunk_objectid(&cache->item, chunk_objectid);
- cache->flags = type;
- btrfs_set_block_group_flags(&cache->item, type);
-
- cache->last_byte_to_unpin = (u64)-1;
- cache->cached = BTRFS_CACHE_FINISHED;
- exclude_super_stripes(root, cache);
-
- add_new_free_space(cache, root->fs_info, chunk_offset,
- chunk_offset + size);
-
- free_excluded_extents(root, cache);
-
- ret = update_space_info(root->fs_info, cache->flags, size, bytes_used,
- &cache->space_info);
- BUG_ON(ret); /* -ENOMEM */
- update_global_block_rsv(root->fs_info);
-
- spin_lock(&cache->space_info->lock);
- cache->space_info->bytes_readonly += cache->bytes_super;
- spin_unlock(&cache->space_info->lock);
-
- __link_block_group(cache->space_info, cache);
-
- ret = btrfs_add_block_group_cache(root->fs_info, cache);
- BUG_ON(ret); /* Logic error */
-
- ret = btrfs_insert_item(trans, extent_root, &cache->key, &cache->item,
- sizeof(cache->item));
- if (ret) {
- btrfs_abort_transaction(trans, extent_root, ret);
- return ret;
- }
-
- set_avail_alloc_bits(extent_root->fs_info, type);
-
- return 0;
-}
-
-static void clear_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
-{
- u64 extra_flags = chunk_to_extended(flags) &
- BTRFS_EXTENDED_PROFILE_MASK;
-
- if (flags & BTRFS_BLOCK_GROUP_DATA)
- fs_info->avail_data_alloc_bits &= ~extra_flags;
- if (flags & BTRFS_BLOCK_GROUP_METADATA)
- fs_info->avail_metadata_alloc_bits &= ~extra_flags;
- if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
- fs_info->avail_system_alloc_bits &= ~extra_flags;
-}
-
-int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, u64 group_start)
-{
- struct btrfs_path *path;
- struct btrfs_block_group_cache *block_group;
- struct btrfs_free_cluster *cluster;
- struct btrfs_root *tree_root = root->fs_info->tree_root;
- struct btrfs_key key;
- struct inode *inode;
- int ret;
- int index;
- int factor;
-
- root = root->fs_info->extent_root;
-
- block_group = btrfs_lookup_block_group(root->fs_info, group_start);
- BUG_ON(!block_group);
- BUG_ON(!block_group->ro);
-
- /*
- * Free the reserved super bytes from this block group before
- * remove it.
- */
- free_excluded_extents(root, block_group);
-
- memcpy(&key, &block_group->key, sizeof(key));
- index = get_block_group_index(block_group);
- if (block_group->flags & (BTRFS_BLOCK_GROUP_DUP |
- BTRFS_BLOCK_GROUP_RAID1 |
- BTRFS_BLOCK_GROUP_RAID10))
- factor = 2;
- else
- factor = 1;
-
- /* make sure this block group isn't part of an allocation cluster */
- cluster = &root->fs_info->data_alloc_cluster;
- spin_lock(&cluster->refill_lock);
- btrfs_return_cluster_to_free_space(block_group, cluster);
- spin_unlock(&cluster->refill_lock);
-
- /*
- * make sure this block group isn't part of a metadata
- * allocation cluster
- */
- cluster = &root->fs_info->meta_alloc_cluster;
- spin_lock(&cluster->refill_lock);
- btrfs_return_cluster_to_free_space(block_group, cluster);
- spin_unlock(&cluster->refill_lock);
-
- path = btrfs_alloc_path();
- if (!path) {
- ret = -ENOMEM;
- goto out;
- }
-
- inode = lookup_free_space_inode(tree_root, block_group, path);
- if (!IS_ERR(inode)) {
- ret = btrfs_orphan_add(trans, inode);
- if (ret) {
- btrfs_add_delayed_iput(inode);
- goto out;
- }
- clear_nlink(inode);
- /* One for the block groups ref */
- spin_lock(&block_group->lock);
- if (block_group->iref) {
- block_group->iref = 0;
- block_group->inode = NULL;
- spin_unlock(&block_group->lock);
- iput(inode);
- } else {
- spin_unlock(&block_group->lock);
- }
- /* One for our lookup ref */
- btrfs_add_delayed_iput(inode);
- }
-
- key.objectid = BTRFS_FREE_SPACE_OBJECTID;
- key.offset = block_group->key.objectid;
- key.type = 0;
-
- ret = btrfs_search_slot(trans, tree_root, &key, path, -1, 1);
- if (ret < 0)
- goto out;
- if (ret > 0)
- btrfs_release_path(path);
- if (ret == 0) {
- ret = btrfs_del_item(trans, tree_root, path);
- if (ret)
- goto out;
- btrfs_release_path(path);
- }
-
- spin_lock(&root->fs_info->block_group_cache_lock);
- rb_erase(&block_group->cache_node,
- &root->fs_info->block_group_cache_tree);
- spin_unlock(&root->fs_info->block_group_cache_lock);
-
- down_write(&block_group->space_info->groups_sem);
- /*
- * we must use list_del_init so people can check to see if they
- * are still on the list after taking the semaphore
- */
- list_del_init(&block_group->list);
- if (list_empty(&block_group->space_info->block_groups[index]))
- clear_avail_alloc_bits(root->fs_info, block_group->flags);
- up_write(&block_group->space_info->groups_sem);
-
- if (block_group->cached == BTRFS_CACHE_STARTED)
- wait_block_group_cache_done(block_group);
-
- btrfs_remove_free_space_cache(block_group);
-
- spin_lock(&block_group->space_info->lock);
- block_group->space_info->total_bytes -= block_group->key.offset;
- block_group->space_info->bytes_readonly -= block_group->key.offset;
- block_group->space_info->disk_total -= block_group->key.offset * factor;
- spin_unlock(&block_group->space_info->lock);
-
- memcpy(&key, &block_group->key, sizeof(key));
-
- btrfs_clear_space_info_full(root->fs_info);
-
- btrfs_put_block_group(block_group);
- btrfs_put_block_group(block_group);
-
- ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
- if (ret > 0)
- ret = -EIO;
- if (ret < 0)
- goto out;
-
- ret = btrfs_del_item(trans, root, path);
-out:
- btrfs_free_path(path);
- return ret;
-}
-
-int btrfs_init_space_info(struct btrfs_fs_info *fs_info)
-{
- struct btrfs_space_info *space_info;
- struct btrfs_super_block *disk_super;
- u64 features;
- u64 flags;
- int mixed = 0;
- int ret;
-
- disk_super = fs_info->super_copy;
- if (!btrfs_super_root(disk_super))
- return 1;
-
- features = btrfs_super_incompat_flags(disk_super);
- if (features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)
- mixed = 1;
-
- flags = BTRFS_BLOCK_GROUP_SYSTEM;
- ret = update_space_info(fs_info, flags, 0, 0, &space_info);
- if (ret)
- goto out;
-
- if (mixed) {
- flags = BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA;
- ret = update_space_info(fs_info, flags, 0, 0, &space_info);
- } else {
- flags = BTRFS_BLOCK_GROUP_METADATA;
- ret = update_space_info(fs_info, flags, 0, 0, &space_info);
- if (ret)
- goto out;
-
- flags = BTRFS_BLOCK_GROUP_DATA;
- ret = update_space_info(fs_info, flags, 0, 0, &space_info);
- }
-out:
- return ret;
-}
-
-int btrfs_error_unpin_extent_range(struct btrfs_root *root, u64 start, u64 end)
-{
- return unpin_extent_range(root, start, end);
-}
-
-int btrfs_error_discard_extent(struct btrfs_root *root, u64 bytenr,
- u64 num_bytes, u64 *actual_bytes)
-{
- return btrfs_discard_extent(root, bytenr, num_bytes, actual_bytes);
-}
-
-int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range)
-{
- struct btrfs_fs_info *fs_info = root->fs_info;
- struct btrfs_block_group_cache *cache = NULL;
- u64 group_trimmed;
- u64 start;
- u64 end;
- u64 trimmed = 0;
- u64 total_bytes = btrfs_super_total_bytes(fs_info->super_copy);
- int ret = 0;
-
- /*
- * try to trim all FS space, our block group may start from non-zero.
- */
- if (range->len == total_bytes)
- cache = btrfs_lookup_first_block_group(fs_info, range->start);
- else
- cache = btrfs_lookup_block_group(fs_info, range->start);
-
- while (cache) {
- if (cache->key.objectid >= (range->start + range->len)) {
- btrfs_put_block_group(cache);
- break;
- }
-
- start = max(range->start, cache->key.objectid);
- end = min(range->start + range->len,
- cache->key.objectid + cache->key.offset);
-
- if (end - start >= range->minlen) {
- if (!block_group_cache_done(cache)) {
- ret = cache_block_group(cache, NULL, root, 0);
- if (!ret)
- wait_block_group_cache_done(cache);
- }
- ret = btrfs_trim_block_group(cache,
- &group_trimmed,
- start,
- end,
- range->minlen);
-
- trimmed += group_trimmed;
- if (ret) {
- btrfs_put_block_group(cache);
- break;
- }
- }
-
- cache = next_block_group(fs_info->tree_root, cache);
- }
-
- range->len = trimmed;
- return ret;
-}
diff --git a/ANDROID_3.4.5/fs/btrfs/extent_io.c b/ANDROID_3.4.5/fs/btrfs/extent_io.c
deleted file mode 100644
index c9018a05..00000000
--- a/ANDROID_3.4.5/fs/btrfs/extent_io.c
+++ /dev/null
@@ -1,4891 +0,0 @@
-#include <linux/bitops.h>
-#include <linux/slab.h>
-#include <linux/bio.h>
-#include <linux/mm.h>
-#include <linux/pagemap.h>
-#include <linux/page-flags.h>
-#include <linux/module.h>
-#include <linux/spinlock.h>
-#include <linux/blkdev.h>
-#include <linux/swap.h>
-#include <linux/writeback.h>
-#include <linux/pagevec.h>
-#include <linux/prefetch.h>
-#include <linux/cleancache.h>
-#include "extent_io.h"
-#include "extent_map.h"
-#include "compat.h"
-#include "ctree.h"
-#include "btrfs_inode.h"
-#include "volumes.h"
-#include "check-integrity.h"
-#include "locking.h"
-
-static struct kmem_cache *extent_state_cache;
-static struct kmem_cache *extent_buffer_cache;
-
-static LIST_HEAD(buffers);
-static LIST_HEAD(states);
-
-#define LEAK_DEBUG 0
-#if LEAK_DEBUG
-static DEFINE_SPINLOCK(leak_lock);
-#endif
-
-#define BUFFER_LRU_MAX 64
-
-struct tree_entry {
- u64 start;
- u64 end;
- struct rb_node rb_node;
-};
-
-struct extent_page_data {
- struct bio *bio;
- struct extent_io_tree *tree;
- get_extent_t *get_extent;
-
- /* tells writepage not to lock the state bits for this range
- * it still does the unlocking
- */
- unsigned int extent_locked:1;
-
- /* tells the submit_bio code to use a WRITE_SYNC */
- unsigned int sync_io:1;
-};
-
-static noinline void flush_write_bio(void *data);
-static inline struct btrfs_fs_info *
-tree_fs_info(struct extent_io_tree *tree)
-{
- return btrfs_sb(tree->mapping->host->i_sb);
-}
-
-int __init extent_io_init(void)
-{
- extent_state_cache = kmem_cache_create("extent_state",
- sizeof(struct extent_state), 0,
- SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
- if (!extent_state_cache)
- return -ENOMEM;
-
- extent_buffer_cache = kmem_cache_create("extent_buffers",
- sizeof(struct extent_buffer), 0,
- SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
- if (!extent_buffer_cache)
- goto free_state_cache;
- return 0;
-
-free_state_cache:
- kmem_cache_destroy(extent_state_cache);
- return -ENOMEM;
-}
-
-void extent_io_exit(void)
-{
- struct extent_state *state;
- struct extent_buffer *eb;
-
- while (!list_empty(&states)) {
- state = list_entry(states.next, struct extent_state, leak_list);
- printk(KERN_ERR "btrfs state leak: start %llu end %llu "
- "state %lu in tree %p refs %d\n",
- (unsigned long long)state->start,
- (unsigned long long)state->end,
- state->state, state->tree, atomic_read(&state->refs));
- list_del(&state->leak_list);
- kmem_cache_free(extent_state_cache, state);
-
- }
-
- while (!list_empty(&buffers)) {
- eb = list_entry(buffers.next, struct extent_buffer, leak_list);
- printk(KERN_ERR "btrfs buffer leak start %llu len %lu "
- "refs %d\n", (unsigned long long)eb->start,
- eb->len, atomic_read(&eb->refs));
- list_del(&eb->leak_list);
- kmem_cache_free(extent_buffer_cache, eb);
- }
- if (extent_state_cache)
- kmem_cache_destroy(extent_state_cache);
- if (extent_buffer_cache)
- kmem_cache_destroy(extent_buffer_cache);
-}
-
-void extent_io_tree_init(struct extent_io_tree *tree,
- struct address_space *mapping)
-{
- tree->state = RB_ROOT;
- INIT_RADIX_TREE(&tree->buffer, GFP_ATOMIC);
- tree->ops = NULL;
- tree->dirty_bytes = 0;
- spin_lock_init(&tree->lock);
- spin_lock_init(&tree->buffer_lock);
- tree->mapping = mapping;
-}
-
-static struct extent_state *alloc_extent_state(gfp_t mask)
-{
- struct extent_state *state;
-#if LEAK_DEBUG
- unsigned long flags;
-#endif
-
- state = kmem_cache_alloc(extent_state_cache, mask);
- if (!state)
- return state;
- state->state = 0;
- state->private = 0;
- state->tree = NULL;
-#if LEAK_DEBUG
- spin_lock_irqsave(&leak_lock, flags);
- list_add(&state->leak_list, &states);
- spin_unlock_irqrestore(&leak_lock, flags);
-#endif
- atomic_set(&state->refs, 1);
- init_waitqueue_head(&state->wq);
- trace_alloc_extent_state(state, mask, _RET_IP_);
- return state;
-}
-
-void free_extent_state(struct extent_state *state)
-{
- if (!state)
- return;
- if (atomic_dec_and_test(&state->refs)) {
-#if LEAK_DEBUG
- unsigned long flags;
-#endif
- WARN_ON(state->tree);
-#if LEAK_DEBUG
- spin_lock_irqsave(&leak_lock, flags);
- list_del(&state->leak_list);
- spin_unlock_irqrestore(&leak_lock, flags);
-#endif
- trace_free_extent_state(state, _RET_IP_);
- kmem_cache_free(extent_state_cache, state);
- }
-}
-
-static struct rb_node *tree_insert(struct rb_root *root, u64 offset,
- struct rb_node *node)
-{
- struct rb_node **p = &root->rb_node;
- struct rb_node *parent = NULL;
- struct tree_entry *entry;
-
- while (*p) {
- parent = *p;
- entry = rb_entry(parent, struct tree_entry, rb_node);
-
- if (offset < entry->start)
- p = &(*p)->rb_left;
- else if (offset > entry->end)
- p = &(*p)->rb_right;
- else
- return parent;
- }
-
- entry = rb_entry(node, struct tree_entry, rb_node);
- rb_link_node(node, parent, p);
- rb_insert_color(node, root);
- return NULL;
-}
-
-static struct rb_node *__etree_search(struct extent_io_tree *tree, u64 offset,
- struct rb_node **prev_ret,
- struct rb_node **next_ret)
-{
- struct rb_root *root = &tree->state;
- struct rb_node *n = root->rb_node;
- struct rb_node *prev = NULL;
- struct rb_node *orig_prev = NULL;
- struct tree_entry *entry;
- struct tree_entry *prev_entry = NULL;
-
- while (n) {
- entry = rb_entry(n, struct tree_entry, rb_node);
- prev = n;
- prev_entry = entry;
-
- if (offset < entry->start)
- n = n->rb_left;
- else if (offset > entry->end)
- n = n->rb_right;
- else
- return n;
- }
-
- if (prev_ret) {
- orig_prev = prev;
- while (prev && offset > prev_entry->end) {
- prev = rb_next(prev);
- prev_entry = rb_entry(prev, struct tree_entry, rb_node);
- }
- *prev_ret = prev;
- prev = orig_prev;
- }
-
- if (next_ret) {
- prev_entry = rb_entry(prev, struct tree_entry, rb_node);
- while (prev && offset < prev_entry->start) {
- prev = rb_prev(prev);
- prev_entry = rb_entry(prev, struct tree_entry, rb_node);
- }
- *next_ret = prev;
- }
- return NULL;
-}
-
-static inline struct rb_node *tree_search(struct extent_io_tree *tree,
- u64 offset)
-{
- struct rb_node *prev = NULL;
- struct rb_node *ret;
-
- ret = __etree_search(tree, offset, &prev, NULL);
- if (!ret)
- return prev;
- return ret;
-}
-
-static void merge_cb(struct extent_io_tree *tree, struct extent_state *new,
- struct extent_state *other)
-{
- if (tree->ops && tree->ops->merge_extent_hook)
- tree->ops->merge_extent_hook(tree->mapping->host, new,
- other);
-}
-
-/*
- * utility function to look for merge candidates inside a given range.
- * Any extents with matching state are merged together into a single
- * extent in the tree. Extents with EXTENT_IO in their state field
- * are not merged because the end_io handlers need to be able to do
- * operations on them without sleeping (or doing allocations/splits).
- *
- * This should be called with the tree lock held.
- */
-static void merge_state(struct extent_io_tree *tree,
- struct extent_state *state)
-{
- struct extent_state *other;
- struct rb_node *other_node;
-
- if (state->state & (EXTENT_IOBITS | EXTENT_BOUNDARY))
- return;
-
- other_node = rb_prev(&state->rb_node);
- if (other_node) {
- other = rb_entry(other_node, struct extent_state, rb_node);
- if (other->end == state->start - 1 &&
- other->state == state->state) {
- merge_cb(tree, state, other);
- state->start = other->start;
- other->tree = NULL;
- rb_erase(&other->rb_node, &tree->state);
- free_extent_state(other);
- }
- }
- other_node = rb_next(&state->rb_node);
- if (other_node) {
- other = rb_entry(other_node, struct extent_state, rb_node);
- if (other->start == state->end + 1 &&
- other->state == state->state) {
- merge_cb(tree, state, other);
- state->end = other->end;
- other->tree = NULL;
- rb_erase(&other->rb_node, &tree->state);
- free_extent_state(other);
- }
- }
-}
-
-static void set_state_cb(struct extent_io_tree *tree,
- struct extent_state *state, int *bits)
-{
- if (tree->ops && tree->ops->set_bit_hook)
- tree->ops->set_bit_hook(tree->mapping->host, state, bits);
-}
-
-static void clear_state_cb(struct extent_io_tree *tree,
- struct extent_state *state, int *bits)
-{
- if (tree->ops && tree->ops->clear_bit_hook)
- tree->ops->clear_bit_hook(tree->mapping->host, state, bits);
-}
-
-static void set_state_bits(struct extent_io_tree *tree,
- struct extent_state *state, int *bits);
-
-/*
- * insert an extent_state struct into the tree. 'bits' are set on the
- * struct before it is inserted.
- *
- * This may return -EEXIST if the extent is already there, in which case the
- * state struct is freed.
- *
- * The tree lock is not taken internally. This is a utility function and
- * probably isn't what you want to call (see set/clear_extent_bit).
- */
-static int insert_state(struct extent_io_tree *tree,
- struct extent_state *state, u64 start, u64 end,
- int *bits)
-{
- struct rb_node *node;
-
- if (end < start) {
- printk(KERN_ERR "btrfs end < start %llu %llu\n",
- (unsigned long long)end,
- (unsigned long long)start);
- WARN_ON(1);
- }
- state->start = start;
- state->end = end;
-
- set_state_bits(tree, state, bits);
-
- node = tree_insert(&tree->state, end, &state->rb_node);
- if (node) {
- struct extent_state *found;
- found = rb_entry(node, struct extent_state, rb_node);
- printk(KERN_ERR "btrfs found node %llu %llu on insert of "
- "%llu %llu\n", (unsigned long long)found->start,
- (unsigned long long)found->end,
- (unsigned long long)start, (unsigned long long)end);
- return -EEXIST;
- }
- state->tree = tree;
- merge_state(tree, state);
- return 0;
-}
-
-static void split_cb(struct extent_io_tree *tree, struct extent_state *orig,
- u64 split)
-{
- if (tree->ops && tree->ops->split_extent_hook)
- tree->ops->split_extent_hook(tree->mapping->host, orig, split);
-}
-
-/*
- * split a given extent state struct in two, inserting the preallocated
- * struct 'prealloc' as the newly created second half. 'split' indicates an
- * offset inside 'orig' where it should be split.
- *
- * Before calling,
- * the tree has 'orig' at [orig->start, orig->end]. After calling, there
- * are two extent state structs in the tree:
- * prealloc: [orig->start, split - 1]
- * orig: [ split, orig->end ]
- *
- * The tree locks are not taken by this function. They need to be held
- * by the caller.
- */
-static int split_state(struct extent_io_tree *tree, struct extent_state *orig,
- struct extent_state *prealloc, u64 split)
-{
- struct rb_node *node;
-
- split_cb(tree, orig, split);
-
- prealloc->start = orig->start;
- prealloc->end = split - 1;
- prealloc->state = orig->state;
- orig->start = split;
-
- node = tree_insert(&tree->state, prealloc->end, &prealloc->rb_node);
- if (node) {
- free_extent_state(prealloc);
- return -EEXIST;
- }
- prealloc->tree = tree;
- return 0;
-}
-
-static struct extent_state *next_state(struct extent_state *state)
-{
- struct rb_node *next = rb_next(&state->rb_node);
- if (next)
- return rb_entry(next, struct extent_state, rb_node);
- else
- return NULL;
-}
-
-/*
- * utility function to clear some bits in an extent state struct.
- * it will optionally wake up any one waiting on this state (wake == 1)
- *
- * If no bits are set on the state struct after clearing things, the
- * struct is freed and removed from the tree
- */
-static struct extent_state *clear_state_bit(struct extent_io_tree *tree,
- struct extent_state *state,
- int *bits, int wake)
-{
- struct extent_state *next;
- int bits_to_clear = *bits & ~EXTENT_CTLBITS;
-
- if ((bits_to_clear & EXTENT_DIRTY) && (state->state & EXTENT_DIRTY)) {
- u64 range = state->end - state->start + 1;
- WARN_ON(range > tree->dirty_bytes);
- tree->dirty_bytes -= range;
- }
- clear_state_cb(tree, state, bits);
- state->state &= ~bits_to_clear;
- if (wake)
- wake_up(&state->wq);
- if (state->state == 0) {
- next = next_state(state);
- if (state->tree) {
- rb_erase(&state->rb_node, &tree->state);
- state->tree = NULL;
- free_extent_state(state);
- } else {
- WARN_ON(1);
- }
- } else {
- merge_state(tree, state);
- next = next_state(state);
- }
- return next;
-}
-
-static struct extent_state *
-alloc_extent_state_atomic(struct extent_state *prealloc)
-{
- if (!prealloc)
- prealloc = alloc_extent_state(GFP_ATOMIC);
-
- return prealloc;
-}
-
-void extent_io_tree_panic(struct extent_io_tree *tree, int err)
-{
- btrfs_panic(tree_fs_info(tree), err, "Locking error: "
- "Extent tree was modified by another "
- "thread while locked.");
-}
-
-/*
- * clear some bits on a range in the tree. This may require splitting
- * or inserting elements in the tree, so the gfp mask is used to
- * indicate which allocations or sleeping are allowed.
- *
- * pass 'wake' == 1 to kick any sleepers, and 'delete' == 1 to remove
- * the given range from the tree regardless of state (ie for truncate).
- *
- * the range [start, end] is inclusive.
- *
- * This takes the tree lock, and returns 0 on success and < 0 on error.
- */
-int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
- int bits, int wake, int delete,
- struct extent_state **cached_state,
- gfp_t mask)
-{
- struct extent_state *state;
- struct extent_state *cached;
- struct extent_state *prealloc = NULL;
- struct rb_node *node;
- u64 last_end;
- int err;
- int clear = 0;
-
- if (delete)
- bits |= ~EXTENT_CTLBITS;
- bits |= EXTENT_FIRST_DELALLOC;
-
- if (bits & (EXTENT_IOBITS | EXTENT_BOUNDARY))
- clear = 1;
-again:
- if (!prealloc && (mask & __GFP_WAIT)) {
- prealloc = alloc_extent_state(mask);
- if (!prealloc)
- return -ENOMEM;
- }
-
- spin_lock(&tree->lock);
- if (cached_state) {
- cached = *cached_state;
-
- if (clear) {
- *cached_state = NULL;
- cached_state = NULL;
- }
-
- if (cached && cached->tree && cached->start <= start &&
- cached->end > start) {
- if (clear)
- atomic_dec(&cached->refs);
- state = cached;
- goto hit_next;
- }
- if (clear)
- free_extent_state(cached);
- }
- /*
- * this search will find the extents that end after
- * our range starts
- */
- node = tree_search(tree, start);
- if (!node)
- goto out;
- state = rb_entry(node, struct extent_state, rb_node);
-hit_next:
- if (state->start > end)
- goto out;
- WARN_ON(state->end < start);
- last_end = state->end;
-
- /* the state doesn't have the wanted bits, go ahead */
- if (!(state->state & bits)) {
- state = next_state(state);
- goto next;
- }
-
- /*
- * | ---- desired range ---- |
- * | state | or
- * | ------------- state -------------- |
- *
- * We need to split the extent we found, and may flip
- * bits on second half.
- *
- * If the extent we found extends past our range, we
- * just split and search again. It'll get split again
- * the next time though.
- *
- * If the extent we found is inside our range, we clear
- * the desired bit on it.
- */
-
- if (state->start < start) {
- prealloc = alloc_extent_state_atomic(prealloc);
- BUG_ON(!prealloc);
- err = split_state(tree, state, prealloc, start);
- if (err)
- extent_io_tree_panic(tree, err);
-
- prealloc = NULL;
- if (err)
- goto out;
- if (state->end <= end) {
- clear_state_bit(tree, state, &bits, wake);
- if (last_end == (u64)-1)
- goto out;
- start = last_end + 1;
- }
- goto search_again;
- }
- /*
- * | ---- desired range ---- |
- * | state |
- * We need to split the extent, and clear the bit
- * on the first half
- */
- if (state->start <= end && state->end > end) {
- prealloc = alloc_extent_state_atomic(prealloc);
- BUG_ON(!prealloc);
- err = split_state(tree, state, prealloc, end + 1);
- if (err)
- extent_io_tree_panic(tree, err);
-
- if (wake)
- wake_up(&state->wq);
-
- clear_state_bit(tree, prealloc, &bits, wake);
-
- prealloc = NULL;
- goto out;
- }
-
- state = clear_state_bit(tree, state, &bits, wake);
-next:
- if (last_end == (u64)-1)
- goto out;
- start = last_end + 1;
- if (start <= end && state && !need_resched())
- goto hit_next;
- goto search_again;
-
-out:
- spin_unlock(&tree->lock);
- if (prealloc)
- free_extent_state(prealloc);
-
- return 0;
-
-search_again:
- if (start > end)
- goto out;
- spin_unlock(&tree->lock);
- if (mask & __GFP_WAIT)
- cond_resched();
- goto again;
-}
-
-static void wait_on_state(struct extent_io_tree *tree,
- struct extent_state *state)
- __releases(tree->lock)
- __acquires(tree->lock)
-{
- DEFINE_WAIT(wait);
- prepare_to_wait(&state->wq, &wait, TASK_UNINTERRUPTIBLE);
- spin_unlock(&tree->lock);
- schedule();
- spin_lock(&tree->lock);
- finish_wait(&state->wq, &wait);
-}
-
-/*
- * waits for one or more bits to clear on a range in the state tree.
- * The range [start, end] is inclusive.
- * The tree lock is taken by this function
- */
-void wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits)
-{
- struct extent_state *state;
- struct rb_node *node;
-
- spin_lock(&tree->lock);
-again:
- while (1) {
- /*
- * this search will find all the extents that end after
- * our range starts
- */
- node = tree_search(tree, start);
- if (!node)
- break;
-
- state = rb_entry(node, struct extent_state, rb_node);
-
- if (state->start > end)
- goto out;
-
- if (state->state & bits) {
- start = state->start;
- atomic_inc(&state->refs);
- wait_on_state(tree, state);
- free_extent_state(state);
- goto again;
- }
- start = state->end + 1;
-
- if (start > end)
- break;
-
- cond_resched_lock(&tree->lock);
- }
-out:
- spin_unlock(&tree->lock);
-}
-
-static void set_state_bits(struct extent_io_tree *tree,
- struct extent_state *state,
- int *bits)
-{
- int bits_to_set = *bits & ~EXTENT_CTLBITS;
-
- set_state_cb(tree, state, bits);
- if ((bits_to_set & EXTENT_DIRTY) && !(state->state & EXTENT_DIRTY)) {
- u64 range = state->end - state->start + 1;
- tree->dirty_bytes += range;
- }
- state->state |= bits_to_set;
-}
-
-static void cache_state(struct extent_state *state,
- struct extent_state **cached_ptr)
-{
- if (cached_ptr && !(*cached_ptr)) {
- if (state->state & (EXTENT_IOBITS | EXTENT_BOUNDARY)) {
- *cached_ptr = state;
- atomic_inc(&state->refs);
- }
- }
-}
-
-static void uncache_state(struct extent_state **cached_ptr)
-{
- if (cached_ptr && (*cached_ptr)) {
- struct extent_state *state = *cached_ptr;
- *cached_ptr = NULL;
- free_extent_state(state);
- }
-}
-
-/*
- * set some bits on a range in the tree. This may require allocations or
- * sleeping, so the gfp mask is used to indicate what is allowed.
- *
- * If any of the exclusive bits are set, this will fail with -EEXIST if some
- * part of the range already has the desired bits set. The start of the
- * existing range is returned in failed_start in this case.
- *
- * [start, end] is inclusive This takes the tree lock.
- */
-
-static int __must_check
-__set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
- int bits, int exclusive_bits, u64 *failed_start,
- struct extent_state **cached_state, gfp_t mask)
-{
- struct extent_state *state;
- struct extent_state *prealloc = NULL;
- struct rb_node *node;
- int err = 0;
- u64 last_start;
- u64 last_end;
-
- bits |= EXTENT_FIRST_DELALLOC;
-again:
- if (!prealloc && (mask & __GFP_WAIT)) {
- prealloc = alloc_extent_state(mask);
- BUG_ON(!prealloc);
- }
-
- spin_lock(&tree->lock);
- if (cached_state && *cached_state) {
- state = *cached_state;
- if (state->start <= start && state->end > start &&
- state->tree) {
- node = &state->rb_node;
- goto hit_next;
- }
- }
- /*
- * this search will find all the extents that end after
- * our range starts.
- */
- node = tree_search(tree, start);
- if (!node) {
- prealloc = alloc_extent_state_atomic(prealloc);
- BUG_ON(!prealloc);
- err = insert_state(tree, prealloc, start, end, &bits);
- if (err)
- extent_io_tree_panic(tree, err);
-
- prealloc = NULL;
- goto out;
- }
- state = rb_entry(node, struct extent_state, rb_node);
-hit_next:
- last_start = state->start;
- last_end = state->end;
-
- /*
- * | ---- desired range ---- |
- * | state |
- *
- * Just lock what we found and keep going
- */
- if (state->start == start && state->end <= end) {
- struct rb_node *next_node;
- if (state->state & exclusive_bits) {
- *failed_start = state->start;
- err = -EEXIST;
- goto out;
- }
-
- set_state_bits(tree, state, &bits);
-
- cache_state(state, cached_state);
- merge_state(tree, state);
- if (last_end == (u64)-1)
- goto out;
-
- start = last_end + 1;
- next_node = rb_next(&state->rb_node);
- if (next_node && start < end && prealloc && !need_resched()) {
- state = rb_entry(next_node, struct extent_state,
- rb_node);
- if (state->start == start)
- goto hit_next;
- }
- goto search_again;
- }
-
- /*
- * | ---- desired range ---- |
- * | state |
- * or
- * | ------------- state -------------- |
- *
- * We need to split the extent we found, and may flip bits on
- * second half.
- *
- * If the extent we found extends past our
- * range, we just split and search again. It'll get split
- * again the next time though.
- *
- * If the extent we found is inside our range, we set the
- * desired bit on it.
- */
- if (state->start < start) {
- if (state->state & exclusive_bits) {
- *failed_start = start;
- err = -EEXIST;
- goto out;
- }
-
- prealloc = alloc_extent_state_atomic(prealloc);
- BUG_ON(!prealloc);
- err = split_state(tree, state, prealloc, start);
- if (err)
- extent_io_tree_panic(tree, err);
-
- prealloc = NULL;
- if (err)
- goto out;
- if (state->end <= end) {
- set_state_bits(tree, state, &bits);
- cache_state(state, cached_state);
- merge_state(tree, state);
- if (last_end == (u64)-1)
- goto out;
- start = last_end + 1;
- }
- goto search_again;
- }
- /*
- * | ---- desired range ---- |
- * | state | or | state |
- *
- * There's a hole, we need to insert something in it and
- * ignore the extent we found.
- */
- if (state->start > start) {
- u64 this_end;
- if (end < last_start)
- this_end = end;
- else
- this_end = last_start - 1;
-
- prealloc = alloc_extent_state_atomic(prealloc);
- BUG_ON(!prealloc);
-
- /*
- * Avoid to free 'prealloc' if it can be merged with
- * the later extent.
- */
- err = insert_state(tree, prealloc, start, this_end,
- &bits);
- if (err)
- extent_io_tree_panic(tree, err);
-
- cache_state(prealloc, cached_state);
- prealloc = NULL;
- start = this_end + 1;
- goto search_again;
- }
- /*
- * | ---- desired range ---- |
- * | state |
- * We need to split the extent, and set the bit
- * on the first half
- */
- if (state->start <= end && state->end > end) {
- if (state->state & exclusive_bits) {
- *failed_start = start;
- err = -EEXIST;
- goto out;
- }
-
- prealloc = alloc_extent_state_atomic(prealloc);
- BUG_ON(!prealloc);
- err = split_state(tree, state, prealloc, end + 1);
- if (err)
- extent_io_tree_panic(tree, err);
-
- set_state_bits(tree, prealloc, &bits);
- cache_state(prealloc, cached_state);
- merge_state(tree, prealloc);
- prealloc = NULL;
- goto out;
- }
-
- goto search_again;
-
-out:
- spin_unlock(&tree->lock);
- if (prealloc)
- free_extent_state(prealloc);
-
- return err;
-
-search_again:
- if (start > end)
- goto out;
- spin_unlock(&tree->lock);
- if (mask & __GFP_WAIT)
- cond_resched();
- goto again;
-}
-
-int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits,
- u64 *failed_start, struct extent_state **cached_state,
- gfp_t mask)
-{
- return __set_extent_bit(tree, start, end, bits, 0, failed_start,
- cached_state, mask);
-}
-
-
-/**
- * convert_extent - convert all bits in a given range from one bit to another
- * @tree: the io tree to search
- * @start: the start offset in bytes
- * @end: the end offset in bytes (inclusive)
- * @bits: the bits to set in this range
- * @clear_bits: the bits to clear in this range
- * @mask: the allocation mask
- *
- * This will go through and set bits for the given range. If any states exist
- * already in this range they are set with the given bit and cleared of the
- * clear_bits. This is only meant to be used by things that are mergeable, ie
- * converting from say DELALLOC to DIRTY. This is not meant to be used with
- * boundary bits like LOCK.
- */
-int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
- int bits, int clear_bits, gfp_t mask)
-{
- struct extent_state *state;
- struct extent_state *prealloc = NULL;
- struct rb_node *node;
- int err = 0;
- u64 last_start;
- u64 last_end;
-
-again:
- if (!prealloc && (mask & __GFP_WAIT)) {
- prealloc = alloc_extent_state(mask);
- if (!prealloc)
- return -ENOMEM;
- }
-
- spin_lock(&tree->lock);
- /*
- * this search will find all the extents that end after
- * our range starts.
- */
- node = tree_search(tree, start);
- if (!node) {
- prealloc = alloc_extent_state_atomic(prealloc);
- if (!prealloc) {
- err = -ENOMEM;
- goto out;
- }
- err = insert_state(tree, prealloc, start, end, &bits);
- prealloc = NULL;
- if (err)
- extent_io_tree_panic(tree, err);
- goto out;
- }
- state = rb_entry(node, struct extent_state, rb_node);
-hit_next:
- last_start = state->start;
- last_end = state->end;
-
- /*
- * | ---- desired range ---- |
- * | state |
- *
- * Just lock what we found and keep going
- */
- if (state->start == start && state->end <= end) {
- struct rb_node *next_node;
-
- set_state_bits(tree, state, &bits);
- clear_state_bit(tree, state, &clear_bits, 0);
- if (last_end == (u64)-1)
- goto out;
-
- start = last_end + 1;
- next_node = rb_next(&state->rb_node);
- if (next_node && start < end && prealloc && !need_resched()) {
- state = rb_entry(next_node, struct extent_state,
- rb_node);
- if (state->start == start)
- goto hit_next;
- }
- goto search_again;
- }
-
- /*
- * | ---- desired range ---- |
- * | state |
- * or
- * | ------------- state -------------- |
- *
- * We need to split the extent we found, and may flip bits on
- * second half.
- *
- * If the extent we found extends past our
- * range, we just split and search again. It'll get split
- * again the next time though.
- *
- * If the extent we found is inside our range, we set the
- * desired bit on it.
- */
- if (state->start < start) {
- prealloc = alloc_extent_state_atomic(prealloc);
- if (!prealloc) {
- err = -ENOMEM;
- goto out;
- }
- err = split_state(tree, state, prealloc, start);
- if (err)
- extent_io_tree_panic(tree, err);
- prealloc = NULL;
- if (err)
- goto out;
- if (state->end <= end) {
- set_state_bits(tree, state, &bits);
- clear_state_bit(tree, state, &clear_bits, 0);
- if (last_end == (u64)-1)
- goto out;
- start = last_end + 1;
- }
- goto search_again;
- }
- /*
- * | ---- desired range ---- |
- * | state | or | state |
- *
- * There's a hole, we need to insert something in it and
- * ignore the extent we found.
- */
- if (state->start > start) {
- u64 this_end;
- if (end < last_start)
- this_end = end;
- else
- this_end = last_start - 1;
-
- prealloc = alloc_extent_state_atomic(prealloc);
- if (!prealloc) {
- err = -ENOMEM;
- goto out;
- }
-
- /*
- * Avoid to free 'prealloc' if it can be merged with
- * the later extent.
- */
- err = insert_state(tree, prealloc, start, this_end,
- &bits);
- if (err)
- extent_io_tree_panic(tree, err);
- prealloc = NULL;
- start = this_end + 1;
- goto search_again;
- }
- /*
- * | ---- desired range ---- |
- * | state |
- * We need to split the extent, and set the bit
- * on the first half
- */
- if (state->start <= end && state->end > end) {
- prealloc = alloc_extent_state_atomic(prealloc);
- if (!prealloc) {
- err = -ENOMEM;
- goto out;
- }
-
- err = split_state(tree, state, prealloc, end + 1);
- if (err)
- extent_io_tree_panic(tree, err);
-
- set_state_bits(tree, prealloc, &bits);
- clear_state_bit(tree, prealloc, &clear_bits, 0);
- prealloc = NULL;
- goto out;
- }
-
- goto search_again;
-
-out:
- spin_unlock(&tree->lock);
- if (prealloc)
- free_extent_state(prealloc);
-
- return err;
-
-search_again:
- if (start > end)
- goto out;
- spin_unlock(&tree->lock);
- if (mask & __GFP_WAIT)
- cond_resched();
- goto again;
-}
-
-/* wrappers around set/clear extent bit */
-int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
- gfp_t mask)
-{
- return set_extent_bit(tree, start, end, EXTENT_DIRTY, NULL,
- NULL, mask);
-}
-
-int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
- int bits, gfp_t mask)
-{
- return set_extent_bit(tree, start, end, bits, NULL,
- NULL, mask);
-}
-
-int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
- int bits, gfp_t mask)
-{
- return clear_extent_bit(tree, start, end, bits, 0, 0, NULL, mask);
-}
-
-int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end,
- struct extent_state **cached_state, gfp_t mask)
-{
- return set_extent_bit(tree, start, end,
- EXTENT_DELALLOC | EXTENT_UPTODATE,
- NULL, cached_state, mask);
-}
-
-int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
- gfp_t mask)
-{
- return clear_extent_bit(tree, start, end,
- EXTENT_DIRTY | EXTENT_DELALLOC |
- EXTENT_DO_ACCOUNTING, 0, 0, NULL, mask);
-}
-
-int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end,
- gfp_t mask)
-{
- return set_extent_bit(tree, start, end, EXTENT_NEW, NULL,
- NULL, mask);
-}
-
-int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
- struct extent_state **cached_state, gfp_t mask)
-{
- return set_extent_bit(tree, start, end, EXTENT_UPTODATE, 0,
- cached_state, mask);
-}
-
-static int clear_extent_uptodate(struct extent_io_tree *tree, u64 start,
- u64 end, struct extent_state **cached_state,
- gfp_t mask)
-{
- return clear_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, 0,
- cached_state, mask);
-}
-
-/*
- * either insert or lock state struct between start and end use mask to tell
- * us if waiting is desired.
- */
-int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
- int bits, struct extent_state **cached_state)
-{
- int err;
- u64 failed_start;
- while (1) {
- err = __set_extent_bit(tree, start, end, EXTENT_LOCKED | bits,
- EXTENT_LOCKED, &failed_start,
- cached_state, GFP_NOFS);
- if (err == -EEXIST) {
- wait_extent_bit(tree, failed_start, end, EXTENT_LOCKED);
- start = failed_start;
- } else
- break;
- WARN_ON(start > end);
- }
- return err;
-}
-
-int lock_extent(struct extent_io_tree *tree, u64 start, u64 end)
-{
- return lock_extent_bits(tree, start, end, 0, NULL);
-}
-
-int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end)
-{
- int err;
- u64 failed_start;
-
- err = __set_extent_bit(tree, start, end, EXTENT_LOCKED, EXTENT_LOCKED,
- &failed_start, NULL, GFP_NOFS);
- if (err == -EEXIST) {
- if (failed_start > start)
- clear_extent_bit(tree, start, failed_start - 1,
- EXTENT_LOCKED, 1, 0, NULL, GFP_NOFS);
- return 0;
- }
- return 1;
-}
-
-int unlock_extent_cached(struct extent_io_tree *tree, u64 start, u64 end,
- struct extent_state **cached, gfp_t mask)
-{
- return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, cached,
- mask);
-}
-
-int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end)
-{
- return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, NULL,
- GFP_NOFS);
-}
-
-/*
- * helper function to set both pages and extents in the tree writeback
- */
-static int set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end)
-{
- unsigned long index = start >> PAGE_CACHE_SHIFT;
- unsigned long end_index = end >> PAGE_CACHE_SHIFT;
- struct page *page;
-
- while (index <= end_index) {
- page = find_get_page(tree->mapping, index);
- BUG_ON(!page); /* Pages should be in the extent_io_tree */
- set_page_writeback(page);
- page_cache_release(page);
- index++;
- }
- return 0;
-}
-
-/* find the first state struct with 'bits' set after 'start', and
- * return it. tree->lock must be held. NULL will returned if
- * nothing was found after 'start'
- */
-struct extent_state *find_first_extent_bit_state(struct extent_io_tree *tree,
- u64 start, int bits)
-{
- struct rb_node *node;
- struct extent_state *state;
-
- /*
- * this search will find all the extents that end after
- * our range starts.
- */
- node = tree_search(tree, start);
- if (!node)
- goto out;
-
- while (1) {
- state = rb_entry(node, struct extent_state, rb_node);
- if (state->end >= start && (state->state & bits))
- return state;
-
- node = rb_next(node);
- if (!node)
- break;
- }
-out:
- return NULL;
-}
-
-/*
- * find the first offset in the io tree with 'bits' set. zero is
- * returned if we find something, and *start_ret and *end_ret are
- * set to reflect the state struct that was found.
- *
- * If nothing was found, 1 is returned, < 0 on error
- */
-int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
- u64 *start_ret, u64 *end_ret, int bits)
-{
- struct extent_state *state;
- int ret = 1;
-
- spin_lock(&tree->lock);
- state = find_first_extent_bit_state(tree, start, bits);
- if (state) {
- *start_ret = state->start;
- *end_ret = state->end;
- ret = 0;
- }
- spin_unlock(&tree->lock);
- return ret;
-}
-
-/*
- * find a contiguous range of bytes in the file marked as delalloc, not
- * more than 'max_bytes'. start and end are used to return the range,
- *
- * 1 is returned if we find something, 0 if nothing was in the tree
- */
-static noinline u64 find_delalloc_range(struct extent_io_tree *tree,
- u64 *start, u64 *end, u64 max_bytes,
- struct extent_state **cached_state)
-{
- struct rb_node *node;
- struct extent_state *state;
- u64 cur_start = *start;
- u64 found = 0;
- u64 total_bytes = 0;
-
- spin_lock(&tree->lock);
-
- /*
- * this search will find all the extents that end after
- * our range starts.
- */
- node = tree_search(tree, cur_start);
- if (!node) {
- if (!found)
- *end = (u64)-1;
- goto out;
- }
-
- while (1) {
- state = rb_entry(node, struct extent_state, rb_node);
- if (found && (state->start != cur_start ||
- (state->state & EXTENT_BOUNDARY))) {
- goto out;
- }
- if (!(state->state & EXTENT_DELALLOC)) {
- if (!found)
- *end = state->end;
- goto out;
- }
- if (!found) {
- *start = state->start;
- *cached_state = state;
- atomic_inc(&state->refs);
- }
- found++;
- *end = state->end;
- cur_start = state->end + 1;
- node = rb_next(node);
- if (!node)
- break;
- total_bytes += state->end - state->start + 1;
- if (total_bytes >= max_bytes)
- break;
- }
-out:
- spin_unlock(&tree->lock);
- return found;
-}
-
-static noinline void __unlock_for_delalloc(struct inode *inode,
- struct page *locked_page,
- u64 start, u64 end)
-{
- int ret;
- struct page *pages[16];
- unsigned long index = start >> PAGE_CACHE_SHIFT;
- unsigned long end_index = end >> PAGE_CACHE_SHIFT;
- unsigned long nr_pages = end_index - index + 1;
- int i;
-
- if (index == locked_page->index && end_index == index)
- return;
-
- while (nr_pages > 0) {
- ret = find_get_pages_contig(inode->i_mapping, index,
- min_t(unsigned long, nr_pages,
- ARRAY_SIZE(pages)), pages);
- for (i = 0; i < ret; i++) {
- if (pages[i] != locked_page)
- unlock_page(pages[i]);
- page_cache_release(pages[i]);
- }
- nr_pages -= ret;
- index += ret;
- cond_resched();
- }
-}
-
-static noinline int lock_delalloc_pages(struct inode *inode,
- struct page *locked_page,
- u64 delalloc_start,
- u64 delalloc_end)
-{
- unsigned long index = delalloc_start >> PAGE_CACHE_SHIFT;
- unsigned long start_index = index;
- unsigned long end_index = delalloc_end >> PAGE_CACHE_SHIFT;
- unsigned long pages_locked = 0;
- struct page *pages[16];
- unsigned long nrpages;
- int ret;
- int i;
-
- /* the caller is responsible for locking the start index */
- if (index == locked_page->index && index == end_index)
- return 0;
-
- /* skip the page at the start index */
- nrpages = end_index - index + 1;
- while (nrpages > 0) {
- ret = find_get_pages_contig(inode->i_mapping, index,
- min_t(unsigned long,
- nrpages, ARRAY_SIZE(pages)), pages);
- if (ret == 0) {
- ret = -EAGAIN;
- goto done;
- }
- /* now we have an array of pages, lock them all */
- for (i = 0; i < ret; i++) {
- /*
- * the caller is taking responsibility for
- * locked_page
- */
- if (pages[i] != locked_page) {
- lock_page(pages[i]);
- if (!PageDirty(pages[i]) ||
- pages[i]->mapping != inode->i_mapping) {
- ret = -EAGAIN;
- unlock_page(pages[i]);
- page_cache_release(pages[i]);
- goto done;
- }
- }
- page_cache_release(pages[i]);
- pages_locked++;
- }
- nrpages -= ret;
- index += ret;
- cond_resched();
- }
- ret = 0;
-done:
- if (ret && pages_locked) {
- __unlock_for_delalloc(inode, locked_page,
- delalloc_start,
- ((u64)(start_index + pages_locked - 1)) <<
- PAGE_CACHE_SHIFT);
- }
- return ret;
-}
-
-/*
- * find a contiguous range of bytes in the file marked as delalloc, not
- * more than 'max_bytes'. start and end are used to return the range,
- *
- * 1 is returned if we find something, 0 if nothing was in the tree
- */
-static noinline u64 find_lock_delalloc_range(struct inode *inode,
- struct extent_io_tree *tree,
- struct page *locked_page,
- u64 *start, u64 *end,
- u64 max_bytes)
-{
- u64 delalloc_start;
- u64 delalloc_end;
- u64 found;
- struct extent_state *cached_state = NULL;
- int ret;
- int loops = 0;
-
-again:
- /* step one, find a bunch of delalloc bytes starting at start */
- delalloc_start = *start;
- delalloc_end = 0;
- found = find_delalloc_range(tree, &delalloc_start, &delalloc_end,
- max_bytes, &cached_state);
- if (!found || delalloc_end <= *start) {
- *start = delalloc_start;
- *end = delalloc_end;
- free_extent_state(cached_state);
- return found;
- }
-
- /*
- * start comes from the offset of locked_page. We have to lock
- * pages in order, so we can't process delalloc bytes before
- * locked_page
- */
- if (delalloc_start < *start)
- delalloc_start = *start;
-
- /*
- * make sure to limit the number of pages we try to lock down
- * if we're looping.
- */
- if (delalloc_end + 1 - delalloc_start > max_bytes && loops)
- delalloc_end = delalloc_start + PAGE_CACHE_SIZE - 1;
-
- /* step two, lock all the pages after the page that has start */
- ret = lock_delalloc_pages(inode, locked_page,
- delalloc_start, delalloc_end);
- if (ret == -EAGAIN) {
- /* some of the pages are gone, lets avoid looping by
- * shortening the size of the delalloc range we're searching
- */
- free_extent_state(cached_state);
- if (!loops) {
- unsigned long offset = (*start) & (PAGE_CACHE_SIZE - 1);
- max_bytes = PAGE_CACHE_SIZE - offset;
- loops = 1;
- goto again;
- } else {
- found = 0;
- goto out_failed;
- }
- }
- BUG_ON(ret); /* Only valid values are 0 and -EAGAIN */
-
- /* step three, lock the state bits for the whole range */
- lock_extent_bits(tree, delalloc_start, delalloc_end, 0, &cached_state);
-
- /* then test to make sure it is all still delalloc */
- ret = test_range_bit(tree, delalloc_start, delalloc_end,
- EXTENT_DELALLOC, 1, cached_state);
- if (!ret) {
- unlock_extent_cached(tree, delalloc_start, delalloc_end,
- &cached_state, GFP_NOFS);
- __unlock_for_delalloc(inode, locked_page,
- delalloc_start, delalloc_end);
- cond_resched();
- goto again;
- }
- free_extent_state(cached_state);
- *start = delalloc_start;
- *end = delalloc_end;
-out_failed:
- return found;
-}
-
-int extent_clear_unlock_delalloc(struct inode *inode,
- struct extent_io_tree *tree,
- u64 start, u64 end, struct page *locked_page,
- unsigned long op)
-{
- int ret;
- struct page *pages[16];
- unsigned long index = start >> PAGE_CACHE_SHIFT;
- unsigned long end_index = end >> PAGE_CACHE_SHIFT;
- unsigned long nr_pages = end_index - index + 1;
- int i;
- int clear_bits = 0;
-
- if (op & EXTENT_CLEAR_UNLOCK)
- clear_bits |= EXTENT_LOCKED;
- if (op & EXTENT_CLEAR_DIRTY)
- clear_bits |= EXTENT_DIRTY;
-
- if (op & EXTENT_CLEAR_DELALLOC)
- clear_bits |= EXTENT_DELALLOC;
-
- clear_extent_bit(tree, start, end, clear_bits, 1, 0, NULL, GFP_NOFS);
- if (!(op & (EXTENT_CLEAR_UNLOCK_PAGE | EXTENT_CLEAR_DIRTY |
- EXTENT_SET_WRITEBACK | EXTENT_END_WRITEBACK |
- EXTENT_SET_PRIVATE2)))
- return 0;
-
- while (nr_pages > 0) {
- ret = find_get_pages_contig(inode->i_mapping, index,
- min_t(unsigned long,
- nr_pages, ARRAY_SIZE(pages)), pages);
- for (i = 0; i < ret; i++) {
-
- if (op & EXTENT_SET_PRIVATE2)
- SetPagePrivate2(pages[i]);
-
- if (pages[i] == locked_page) {
- page_cache_release(pages[i]);
- continue;
- }
- if (op & EXTENT_CLEAR_DIRTY)
- clear_page_dirty_for_io(pages[i]);
- if (op & EXTENT_SET_WRITEBACK)
- set_page_writeback(pages[i]);
- if (op & EXTENT_END_WRITEBACK)
- end_page_writeback(pages[i]);
- if (op & EXTENT_CLEAR_UNLOCK_PAGE)
- unlock_page(pages[i]);
- page_cache_release(pages[i]);
- }
- nr_pages -= ret;
- index += ret;
- cond_resched();
- }
- return 0;
-}
-
-/*
- * count the number of bytes in the tree that have a given bit(s)
- * set. This can be fairly slow, except for EXTENT_DIRTY which is
- * cached. The total number found is returned.
- */
-u64 count_range_bits(struct extent_io_tree *tree,
- u64 *start, u64 search_end, u64 max_bytes,
- unsigned long bits, int contig)
-{
- struct rb_node *node;
- struct extent_state *state;
- u64 cur_start = *start;
- u64 total_bytes = 0;
- u64 last = 0;
- int found = 0;
-
- if (search_end <= cur_start) {
- WARN_ON(1);
- return 0;
- }
-
- spin_lock(&tree->lock);
- if (cur_start == 0 && bits == EXTENT_DIRTY) {
- total_bytes = tree->dirty_bytes;
- goto out;
- }
- /*
- * this search will find all the extents that end after
- * our range starts.
- */
- node = tree_search(tree, cur_start);
- if (!node)
- goto out;
-
- while (1) {
- state = rb_entry(node, struct extent_state, rb_node);
- if (state->start > search_end)
- break;
- if (contig && found && state->start > last + 1)
- break;
- if (state->end >= cur_start && (state->state & bits) == bits) {
- total_bytes += min(search_end, state->end) + 1 -
- max(cur_start, state->start);
- if (total_bytes >= max_bytes)
- break;
- if (!found) {
- *start = max(cur_start, state->start);
- found = 1;
- }
- last = state->end;
- } else if (contig && found) {
- break;
- }
- node = rb_next(node);
- if (!node)
- break;
- }
-out:
- spin_unlock(&tree->lock);
- return total_bytes;
-}
-
-/*
- * set the private field for a given byte offset in the tree. If there isn't
- * an extent_state there already, this does nothing.
- */
-int set_state_private(struct extent_io_tree *tree, u64 start, u64 private)
-{
- struct rb_node *node;
- struct extent_state *state;
- int ret = 0;
-
- spin_lock(&tree->lock);
- /*
- * this search will find all the extents that end after
- * our range starts.
- */
- node = tree_search(tree, start);
- if (!node) {
- ret = -ENOENT;
- goto out;
- }
- state = rb_entry(node, struct extent_state, rb_node);
- if (state->start != start) {
- ret = -ENOENT;
- goto out;
- }
- state->private = private;
-out:
- spin_unlock(&tree->lock);
- return ret;
-}
-
-int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private)
-{
- struct rb_node *node;
- struct extent_state *state;
- int ret = 0;
-
- spin_lock(&tree->lock);
- /*
- * this search will find all the extents that end after
- * our range starts.
- */
- node = tree_search(tree, start);
- if (!node) {
- ret = -ENOENT;
- goto out;
- }
- state = rb_entry(node, struct extent_state, rb_node);
- if (state->start != start) {
- ret = -ENOENT;
- goto out;
- }
- *private = state->private;
-out:
- spin_unlock(&tree->lock);
- return ret;
-}
-
-/*
- * searches a range in the state tree for a given mask.
- * If 'filled' == 1, this returns 1 only if every extent in the tree
- * has the bits set. Otherwise, 1 is returned if any bit in the
- * range is found set.
- */
-int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
- int bits, int filled, struct extent_state *cached)
-{
- struct extent_state *state = NULL;
- struct rb_node *node;
- int bitset = 0;
-
- spin_lock(&tree->lock);
- if (cached && cached->tree && cached->start <= start &&
- cached->end > start)
- node = &cached->rb_node;
- else
- node = tree_search(tree, start);
- while (node && start <= end) {
- state = rb_entry(node, struct extent_state, rb_node);
-
- if (filled && state->start > start) {
- bitset = 0;
- break;
- }
-
- if (state->start > end)
- break;
-
- if (state->state & bits) {
- bitset = 1;
- if (!filled)
- break;
- } else if (filled) {
- bitset = 0;
- break;
- }
-
- if (state->end == (u64)-1)
- break;
-
- start = state->end + 1;
- if (start > end)
- break;
- node = rb_next(node);
- if (!node) {
- if (filled)
- bitset = 0;
- break;
- }
- }
- spin_unlock(&tree->lock);
- return bitset;
-}
-
-/*
- * helper function to set a given page up to date if all the
- * extents in the tree for that page are up to date
- */
-static void check_page_uptodate(struct extent_io_tree *tree, struct page *page)
-{
- u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
- u64 end = start + PAGE_CACHE_SIZE - 1;
- if (test_range_bit(tree, start, end, EXTENT_UPTODATE, 1, NULL))
- SetPageUptodate(page);
-}
-
-/*
- * helper function to unlock a page if all the extents in the tree
- * for that page are unlocked
- */
-static void check_page_locked(struct extent_io_tree *tree, struct page *page)
-{
- u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
- u64 end = start + PAGE_CACHE_SIZE - 1;
- if (!test_range_bit(tree, start, end, EXTENT_LOCKED, 0, NULL))
- unlock_page(page);
-}
-
-/*
- * helper function to end page writeback if all the extents
- * in the tree for that page are done with writeback
- */
-static void check_page_writeback(struct extent_io_tree *tree,
- struct page *page)
-{
- end_page_writeback(page);
-}
-
-/*
- * When IO fails, either with EIO or csum verification fails, we
- * try other mirrors that might have a good copy of the data. This
- * io_failure_record is used to record state as we go through all the
- * mirrors. If another mirror has good data, the page is set up to date
- * and things continue. If a good mirror can't be found, the original
- * bio end_io callback is called to indicate things have failed.
- */
-struct io_failure_record {
- struct page *page;
- u64 start;
- u64 len;
- u64 logical;
- unsigned long bio_flags;
- int this_mirror;
- int failed_mirror;
- int in_validation;
-};
-
-static int free_io_failure(struct inode *inode, struct io_failure_record *rec,
- int did_repair)
-{
- int ret;
- int err = 0;
- struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree;
-
- set_state_private(failure_tree, rec->start, 0);
- ret = clear_extent_bits(failure_tree, rec->start,
- rec->start + rec->len - 1,
- EXTENT_LOCKED | EXTENT_DIRTY, GFP_NOFS);
- if (ret)
- err = ret;
-
- if (did_repair) {
- ret = clear_extent_bits(&BTRFS_I(inode)->io_tree, rec->start,
- rec->start + rec->len - 1,
- EXTENT_DAMAGED, GFP_NOFS);
- if (ret && !err)
- err = ret;
- }
-
- kfree(rec);
- return err;
-}
-
-static void repair_io_failure_callback(struct bio *bio, int err)
-{
- complete(bio->bi_private);
-}
-
-/*
- * this bypasses the standard btrfs submit functions deliberately, as
- * the standard behavior is to write all copies in a raid setup. here we only
- * want to write the one bad copy. so we do the mapping for ourselves and issue
- * submit_bio directly.
- * to avoid any synchonization issues, wait for the data after writing, which
- * actually prevents the read that triggered the error from finishing.
- * currently, there can be no more than two copies of every data bit. thus,
- * exactly one rewrite is required.
- */
-int repair_io_failure(struct btrfs_mapping_tree *map_tree, u64 start,
- u64 length, u64 logical, struct page *page,
- int mirror_num)
-{
- struct bio *bio;
- struct btrfs_device *dev;
- DECLARE_COMPLETION_ONSTACK(compl);
- u64 map_length = 0;
- u64 sector;
- struct btrfs_bio *bbio = NULL;
- int ret;
-
- BUG_ON(!mirror_num);
-
- bio = bio_alloc(GFP_NOFS, 1);
- if (!bio)
- return -EIO;
- bio->bi_private = &compl;
- bio->bi_end_io = repair_io_failure_callback;
- bio->bi_size = 0;
- map_length = length;
-
- ret = btrfs_map_block(map_tree, WRITE, logical,
- &map_length, &bbio, mirror_num);
- if (ret) {
- bio_put(bio);
- return -EIO;
- }
- BUG_ON(mirror_num != bbio->mirror_num);
- sector = bbio->stripes[mirror_num-1].physical >> 9;
- bio->bi_sector = sector;
- dev = bbio->stripes[mirror_num-1].dev;
- kfree(bbio);
- if (!dev || !dev->bdev || !dev->writeable) {
- bio_put(bio);
- return -EIO;
- }
- bio->bi_bdev = dev->bdev;
- bio_add_page(bio, page, length, start-page_offset(page));
- btrfsic_submit_bio(WRITE_SYNC, bio);
- wait_for_completion(&compl);
-
- if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) {
- /* try to remap that extent elsewhere? */
- bio_put(bio);
- return -EIO;
- }
-
- printk(KERN_INFO "btrfs read error corrected: ino %lu off %llu (dev %s "
- "sector %llu)\n", page->mapping->host->i_ino, start,
- dev->name, sector);
-
- bio_put(bio);
- return 0;
-}
-
-int repair_eb_io_failure(struct btrfs_root *root, struct extent_buffer *eb,
- int mirror_num)
-{
- struct btrfs_mapping_tree *map_tree = &root->fs_info->mapping_tree;
- u64 start = eb->start;
- unsigned long i, num_pages = num_extent_pages(eb->start, eb->len);
- int ret = 0;
-
- for (i = 0; i < num_pages; i++) {
- struct page *p = extent_buffer_page(eb, i);
- ret = repair_io_failure(map_tree, start, PAGE_CACHE_SIZE,
- start, p, mirror_num);
- if (ret)
- break;
- start += PAGE_CACHE_SIZE;
- }
-
- return ret;
-}
-
-/*
- * each time an IO finishes, we do a fast check in the IO failure tree
- * to see if we need to process or clean up an io_failure_record
- */
-static int clean_io_failure(u64 start, struct page *page)
-{
- u64 private;
- u64 private_failure;
- struct io_failure_record *failrec;
- struct btrfs_mapping_tree *map_tree;
- struct extent_state *state;
- int num_copies;
- int did_repair = 0;
- int ret;
- struct inode *inode = page->mapping->host;
-
- private = 0;
- ret = count_range_bits(&BTRFS_I(inode)->io_failure_tree, &private,
- (u64)-1, 1, EXTENT_DIRTY, 0);
- if (!ret)
- return 0;
-
- ret = get_state_private(&BTRFS_I(inode)->io_failure_tree, start,
- &private_failure);
- if (ret)
- return 0;
-
- failrec = (struct io_failure_record *)(unsigned long) private_failure;
- BUG_ON(!failrec->this_mirror);
-
- if (failrec->in_validation) {
- /* there was no real error, just free the record */
- pr_debug("clean_io_failure: freeing dummy error at %llu\n",
- failrec->start);
- did_repair = 1;
- goto out;
- }
-
- spin_lock(&BTRFS_I(inode)->io_tree.lock);
- state = find_first_extent_bit_state(&BTRFS_I(inode)->io_tree,
- failrec->start,
- EXTENT_LOCKED);
- spin_unlock(&BTRFS_I(inode)->io_tree.lock);
-
- if (state && state->start == failrec->start) {
- map_tree = &BTRFS_I(inode)->root->fs_info->mapping_tree;
- num_copies = btrfs_num_copies(map_tree, failrec->logical,
- failrec->len);
- if (num_copies > 1) {
- ret = repair_io_failure(map_tree, start, failrec->len,
- failrec->logical, page,
- failrec->failed_mirror);
- did_repair = !ret;
- }
- }
-
-out:
- if (!ret)
- ret = free_io_failure(inode, failrec, did_repair);
-
- return ret;
-}
-
-/*
- * this is a generic handler for readpage errors (default
- * readpage_io_failed_hook). if other copies exist, read those and write back
- * good data to the failed position. does not investigate in remapping the
- * failed extent elsewhere, hoping the device will be smart enough to do this as
- * needed
- */
-
-static int bio_readpage_error(struct bio *failed_bio, struct page *page,
- u64 start, u64 end, int failed_mirror,
- struct extent_state *state)
-{
- struct io_failure_record *failrec = NULL;
- u64 private;
- struct extent_map *em;
- struct inode *inode = page->mapping->host;
- struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree;
- struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
- struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
- struct bio *bio;
- int num_copies;
- int ret;
- int read_mode;
- u64 logical;
-
- BUG_ON(failed_bio->bi_rw & REQ_WRITE);
-
- ret = get_state_private(failure_tree, start, &private);
- if (ret) {
- failrec = kzalloc(sizeof(*failrec), GFP_NOFS);
- if (!failrec)
- return -ENOMEM;
- failrec->start = start;
- failrec->len = end - start + 1;
- failrec->this_mirror = 0;
- failrec->bio_flags = 0;
- failrec->in_validation = 0;
-
- read_lock(&em_tree->lock);
- em = lookup_extent_mapping(em_tree, start, failrec->len);
- if (!em) {
- read_unlock(&em_tree->lock);
- kfree(failrec);
- return -EIO;
- }
-
- if (em->start > start || em->start + em->len < start) {
- free_extent_map(em);
- em = NULL;
- }
- read_unlock(&em_tree->lock);
-
- if (!em || IS_ERR(em)) {
- kfree(failrec);
- return -EIO;
- }
- logical = start - em->start;
- logical = em->block_start + logical;
- if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) {
- logical = em->block_start;
- failrec->bio_flags = EXTENT_BIO_COMPRESSED;
- extent_set_compress_type(&failrec->bio_flags,
- em->compress_type);
- }
- pr_debug("bio_readpage_error: (new) logical=%llu, start=%llu, "
- "len=%llu\n", logical, start, failrec->len);
- failrec->logical = logical;
- free_extent_map(em);
-
- /* set the bits in the private failure tree */
- ret = set_extent_bits(failure_tree, start, end,
- EXTENT_LOCKED | EXTENT_DIRTY, GFP_NOFS);
- if (ret >= 0)
- ret = set_state_private(failure_tree, start,
- (u64)(unsigned long)failrec);
- /* set the bits in the inode's tree */
- if (ret >= 0)
- ret = set_extent_bits(tree, start, end, EXTENT_DAMAGED,
- GFP_NOFS);
- if (ret < 0) {
- kfree(failrec);
- return ret;
- }
- } else {
- failrec = (struct io_failure_record *)(unsigned long)private;
- pr_debug("bio_readpage_error: (found) logical=%llu, "
- "start=%llu, len=%llu, validation=%d\n",
- failrec->logical, failrec->start, failrec->len,
- failrec->in_validation);
- /*
- * when data can be on disk more than twice, add to failrec here
- * (e.g. with a list for failed_mirror) to make
- * clean_io_failure() clean all those errors at once.
- */
- }
- num_copies = btrfs_num_copies(
- &BTRFS_I(inode)->root->fs_info->mapping_tree,
- failrec->logical, failrec->len);
- if (num_copies == 1) {
- /*
- * we only have a single copy of the data, so don't bother with
- * all the retry and error correction code that follows. no
- * matter what the error is, it is very likely to persist.
- */
- pr_debug("bio_readpage_error: cannot repair, num_copies == 1. "
- "state=%p, num_copies=%d, next_mirror %d, "
- "failed_mirror %d\n", state, num_copies,
- failrec->this_mirror, failed_mirror);
- free_io_failure(inode, failrec, 0);
- return -EIO;
- }
-
- if (!state) {
- spin_lock(&tree->lock);
- state = find_first_extent_bit_state(tree, failrec->start,
- EXTENT_LOCKED);
- if (state && state->start != failrec->start)
- state = NULL;
- spin_unlock(&tree->lock);
- }
-
- /*
- * there are two premises:
- * a) deliver good data to the caller
- * b) correct the bad sectors on disk
- */
- if (failed_bio->bi_vcnt > 1) {
- /*
- * to fulfill b), we need to know the exact failing sectors, as
- * we don't want to rewrite any more than the failed ones. thus,
- * we need separate read requests for the failed bio
- *
- * if the following BUG_ON triggers, our validation request got
- * merged. we need separate requests for our algorithm to work.
- */
- BUG_ON(failrec->in_validation);
- failrec->in_validation = 1;
- failrec->this_mirror = failed_mirror;
- read_mode = READ_SYNC | REQ_FAILFAST_DEV;
- } else {
- /*
- * we're ready to fulfill a) and b) alongside. get a good copy
- * of the failed sector and if we succeed, we have setup
- * everything for repair_io_failure to do the rest for us.
- */
- if (failrec->in_validation) {
- BUG_ON(failrec->this_mirror != failed_mirror);
- failrec->in_validation = 0;
- failrec->this_mirror = 0;
- }
- failrec->failed_mirror = failed_mirror;
- failrec->this_mirror++;
- if (failrec->this_mirror == failed_mirror)
- failrec->this_mirror++;
- read_mode = READ_SYNC;
- }
-
- if (!state || failrec->this_mirror > num_copies) {
- pr_debug("bio_readpage_error: (fail) state=%p, num_copies=%d, "
- "next_mirror %d, failed_mirror %d\n", state,
- num_copies, failrec->this_mirror, failed_mirror);
- free_io_failure(inode, failrec, 0);
- return -EIO;
- }
-
- bio = bio_alloc(GFP_NOFS, 1);
- if (!bio) {
- free_io_failure(inode, failrec, 0);
- return -EIO;
- }
- bio->bi_private = state;
- bio->bi_end_io = failed_bio->bi_end_io;
- bio->bi_sector = failrec->logical >> 9;
- bio->bi_bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev;
- bio->bi_size = 0;
-
- bio_add_page(bio, page, failrec->len, start - page_offset(page));
-
- pr_debug("bio_readpage_error: submitting new read[%#x] to "
- "this_mirror=%d, num_copies=%d, in_validation=%d\n", read_mode,
- failrec->this_mirror, num_copies, failrec->in_validation);
-
- ret = tree->ops->submit_bio_hook(inode, read_mode, bio,
- failrec->this_mirror,
- failrec->bio_flags, 0);
- return ret;
-}
-
-/* lots and lots of room for performance fixes in the end_bio funcs */
-
-int end_extent_writepage(struct page *page, int err, u64 start, u64 end)
-{
- int uptodate = (err == 0);
- struct extent_io_tree *tree;
- int ret;
-
- tree = &BTRFS_I(page->mapping->host)->io_tree;
-
- if (tree->ops && tree->ops->writepage_end_io_hook) {
- ret = tree->ops->writepage_end_io_hook(page, start,
- end, NULL, uptodate);
- if (ret)
- uptodate = 0;
- }
-
- if (!uptodate && tree->ops &&
- tree->ops->writepage_io_failed_hook) {
- ret = tree->ops->writepage_io_failed_hook(NULL, page,
- start, end, NULL);
- /* Writeback already completed */
- if (ret == 0)
- return 1;
- }
-
- if (!uptodate) {
- clear_extent_uptodate(tree, start, end, NULL, GFP_NOFS);
- ClearPageUptodate(page);
- SetPageError(page);
- }
- return 0;
-}
-
-/*
- * after a writepage IO is done, we need to:
- * clear the uptodate bits on error
- * clear the writeback bits in the extent tree for this IO
- * end_page_writeback if the page has no more pending IO
- *
- * Scheduling is not allowed, so the extent state tree is expected
- * to have one and only one object corresponding to this IO.
- */
-static void end_bio_extent_writepage(struct bio *bio, int err)
-{
- struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
- struct extent_io_tree *tree;
- u64 start;
- u64 end;
- int whole_page;
-
- do {
- struct page *page = bvec->bv_page;
- tree = &BTRFS_I(page->mapping->host)->io_tree;
-
- start = ((u64)page->index << PAGE_CACHE_SHIFT) +
- bvec->bv_offset;
- end = start + bvec->bv_len - 1;
-
- if (bvec->bv_offset == 0 && bvec->bv_len == PAGE_CACHE_SIZE)
- whole_page = 1;
- else
- whole_page = 0;
-
- if (--bvec >= bio->bi_io_vec)
- prefetchw(&bvec->bv_page->flags);
-
- if (end_extent_writepage(page, err, start, end))
- continue;
-
- if (whole_page)
- end_page_writeback(page);
- else
- check_page_writeback(tree, page);
- } while (bvec >= bio->bi_io_vec);
-
- bio_put(bio);
-}
-
-/*
- * after a readpage IO is done, we need to:
- * clear the uptodate bits on error
- * set the uptodate bits if things worked
- * set the page up to date if all extents in the tree are uptodate
- * clear the lock bit in the extent tree
- * unlock the page if there are no other extents locked for it
- *
- * Scheduling is not allowed, so the extent state tree is expected
- * to have one and only one object corresponding to this IO.
- */
-static void end_bio_extent_readpage(struct bio *bio, int err)
-{
- int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
- struct bio_vec *bvec_end = bio->bi_io_vec + bio->bi_vcnt - 1;
- struct bio_vec *bvec = bio->bi_io_vec;
- struct extent_io_tree *tree;
- u64 start;
- u64 end;
- int whole_page;
- int mirror;
- int ret;
-
- if (err)
- uptodate = 0;
-
- do {
- struct page *page = bvec->bv_page;
- struct extent_state *cached = NULL;
- struct extent_state *state;
-
- pr_debug("end_bio_extent_readpage: bi_vcnt=%d, idx=%d, err=%d, "
- "mirror=%ld\n", bio->bi_vcnt, bio->bi_idx, err,
- (long int)bio->bi_bdev);
- tree = &BTRFS_I(page->mapping->host)->io_tree;
-
- start = ((u64)page->index << PAGE_CACHE_SHIFT) +
- bvec->bv_offset;
- end = start + bvec->bv_len - 1;
-
- if (bvec->bv_offset == 0 && bvec->bv_len == PAGE_CACHE_SIZE)
- whole_page = 1;
- else
- whole_page = 0;
-
- if (++bvec <= bvec_end)
- prefetchw(&bvec->bv_page->flags);
-
- spin_lock(&tree->lock);
- state = find_first_extent_bit_state(tree, start, EXTENT_LOCKED);
- if (state && state->start == start) {
- /*
- * take a reference on the state, unlock will drop
- * the ref
- */
- cache_state(state, &cached);
- }
- spin_unlock(&tree->lock);
-
- mirror = (int)(unsigned long)bio->bi_bdev;
- if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) {
- ret = tree->ops->readpage_end_io_hook(page, start, end,
- state, mirror);
- if (ret)
- uptodate = 0;
- else
- clean_io_failure(start, page);
- }
-
- if (!uptodate && tree->ops && tree->ops->readpage_io_failed_hook) {
- ret = tree->ops->readpage_io_failed_hook(page, mirror);
- if (!ret && !err &&
- test_bit(BIO_UPTODATE, &bio->bi_flags))
- uptodate = 1;
- } else if (!uptodate) {
- /*
- * The generic bio_readpage_error handles errors the
- * following way: If possible, new read requests are
- * created and submitted and will end up in
- * end_bio_extent_readpage as well (if we're lucky, not
- * in the !uptodate case). In that case it returns 0 and
- * we just go on with the next page in our bio. If it
- * can't handle the error it will return -EIO and we
- * remain responsible for that page.
- */
- ret = bio_readpage_error(bio, page, start, end, mirror, NULL);
- if (ret == 0) {
- uptodate =
- test_bit(BIO_UPTODATE, &bio->bi_flags);
- if (err)
- uptodate = 0;
- uncache_state(&cached);
- continue;
- }
- }
-
- if (uptodate && tree->track_uptodate) {
- set_extent_uptodate(tree, start, end, &cached,
- GFP_ATOMIC);
- }
- unlock_extent_cached(tree, start, end, &cached, GFP_ATOMIC);
-
- if (whole_page) {
- if (uptodate) {
- SetPageUptodate(page);
- } else {
- ClearPageUptodate(page);
- SetPageError(page);
- }
- unlock_page(page);
- } else {
- if (uptodate) {
- check_page_uptodate(tree, page);
- } else {
- ClearPageUptodate(page);
- SetPageError(page);
- }
- check_page_locked(tree, page);
- }
- } while (bvec <= bvec_end);
-
- bio_put(bio);
-}
-
-struct bio *
-btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs,
- gfp_t gfp_flags)
-{
- struct bio *bio;
-
- bio = bio_alloc(gfp_flags, nr_vecs);
-
- if (bio == NULL && (current->flags & PF_MEMALLOC)) {
- while (!bio && (nr_vecs /= 2))
- bio = bio_alloc(gfp_flags, nr_vecs);
- }
-
- if (bio) {
- bio->bi_size = 0;
- bio->bi_bdev = bdev;
- bio->bi_sector = first_sector;
- }
- return bio;
-}
-
-/*
- * Since writes are async, they will only return -ENOMEM.
- * Reads can return the full range of I/O error conditions.
- */
-static int __must_check submit_one_bio(int rw, struct bio *bio,
- int mirror_num, unsigned long bio_flags)
-{
- int ret = 0;
- struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
- struct page *page = bvec->bv_page;
- struct extent_io_tree *tree = bio->bi_private;
- u64 start;
-
- start = ((u64)page->index << PAGE_CACHE_SHIFT) + bvec->bv_offset;
-
- bio->bi_private = NULL;
-
- bio_get(bio);
-
- if (tree->ops && tree->ops->submit_bio_hook)
- ret = tree->ops->submit_bio_hook(page->mapping->host, rw, bio,
- mirror_num, bio_flags, start);
- else
- btrfsic_submit_bio(rw, bio);
-
- if (bio_flagged(bio, BIO_EOPNOTSUPP))
- ret = -EOPNOTSUPP;
- bio_put(bio);
- return ret;
-}
-
-static int merge_bio(struct extent_io_tree *tree, struct page *page,
- unsigned long offset, size_t size, struct bio *bio,
- unsigned long bio_flags)
-{
- int ret = 0;
- if (tree->ops && tree->ops->merge_bio_hook)
- ret = tree->ops->merge_bio_hook(page, offset, size, bio,
- bio_flags);
- BUG_ON(ret < 0);
- return ret;
-
-}
-
-static int submit_extent_page(int rw, struct extent_io_tree *tree,
- struct page *page, sector_t sector,
- size_t size, unsigned long offset,
- struct block_device *bdev,
- struct bio **bio_ret,
- unsigned long max_pages,
- bio_end_io_t end_io_func,
- int mirror_num,
- unsigned long prev_bio_flags,
- unsigned long bio_flags)
-{
- int ret = 0;
- struct bio *bio;
- int nr;
- int contig = 0;
- int this_compressed = bio_flags & EXTENT_BIO_COMPRESSED;
- int old_compressed = prev_bio_flags & EXTENT_BIO_COMPRESSED;
- size_t page_size = min_t(size_t, size, PAGE_CACHE_SIZE);
-
- if (bio_ret && *bio_ret) {
- bio = *bio_ret;
- if (old_compressed)
- contig = bio->bi_sector == sector;
- else
- contig = bio->bi_sector + (bio->bi_size >> 9) ==
- sector;
-
- if (prev_bio_flags != bio_flags || !contig ||
- merge_bio(tree, page, offset, page_size, bio, bio_flags) ||
- bio_add_page(bio, page, page_size, offset) < page_size) {
- ret = submit_one_bio(rw, bio, mirror_num,
- prev_bio_flags);
- if (ret < 0)
- return ret;
- bio = NULL;
- } else {
- return 0;
- }
- }
- if (this_compressed)
- nr = BIO_MAX_PAGES;
- else
- nr = bio_get_nr_vecs(bdev);
-
- bio = btrfs_bio_alloc(bdev, sector, nr, GFP_NOFS | __GFP_HIGH);
- if (!bio)
- return -ENOMEM;
-
- bio_add_page(bio, page, page_size, offset);
- bio->bi_end_io = end_io_func;
- bio->bi_private = tree;
-
- if (bio_ret)
- *bio_ret = bio;
- else
- ret = submit_one_bio(rw, bio, mirror_num, bio_flags);
-
- return ret;
-}
-
-void attach_extent_buffer_page(struct extent_buffer *eb, struct page *page)
-{
- if (!PagePrivate(page)) {
- SetPagePrivate(page);
- page_cache_get(page);
- set_page_private(page, (unsigned long)eb);
- } else {
- WARN_ON(page->private != (unsigned long)eb);
- }
-}
-
-void set_page_extent_mapped(struct page *page)
-{
- if (!PagePrivate(page)) {
- SetPagePrivate(page);
- page_cache_get(page);
- set_page_private(page, EXTENT_PAGE_PRIVATE);
- }
-}
-
-/*
- * basic readpage implementation. Locked extent state structs are inserted
- * into the tree that are removed when the IO is done (by the end_io
- * handlers)
- * XXX JDM: This needs looking at to ensure proper page locking
- */
-static int __extent_read_full_page(struct extent_io_tree *tree,
- struct page *page,
- get_extent_t *get_extent,
- struct bio **bio, int mirror_num,
- unsigned long *bio_flags)
-{
- struct inode *inode = page->mapping->host;
- u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
- u64 page_end = start + PAGE_CACHE_SIZE - 1;
- u64 end;
- u64 cur = start;
- u64 extent_offset;
- u64 last_byte = i_size_read(inode);
- u64 block_start;
- u64 cur_end;
- sector_t sector;
- struct extent_map *em;
- struct block_device *bdev;
- struct btrfs_ordered_extent *ordered;
- int ret;
- int nr = 0;
- size_t pg_offset = 0;
- size_t iosize;
- size_t disk_io_size;
- size_t blocksize = inode->i_sb->s_blocksize;
- unsigned long this_bio_flag = 0;
-
- set_page_extent_mapped(page);
-
- if (!PageUptodate(page)) {
- if (cleancache_get_page(page) == 0) {
- BUG_ON(blocksize != PAGE_SIZE);
- goto out;
- }
- }
-
- end = page_end;
- while (1) {
- lock_extent(tree, start, end);
- ordered = btrfs_lookup_ordered_extent(inode, start);
- if (!ordered)
- break;
- unlock_extent(tree, start, end);
- btrfs_start_ordered_extent(inode, ordered, 1);
- btrfs_put_ordered_extent(ordered);
- }
-
- if (page->index == last_byte >> PAGE_CACHE_SHIFT) {
- char *userpage;
- size_t zero_offset = last_byte & (PAGE_CACHE_SIZE - 1);
-
- if (zero_offset) {
- iosize = PAGE_CACHE_SIZE - zero_offset;
- userpage = kmap_atomic(page);
- memset(userpage + zero_offset, 0, iosize);
- flush_dcache_page(page);
- kunmap_atomic(userpage);
- }
- }
- while (cur <= end) {
- if (cur >= last_byte) {
- char *userpage;
- struct extent_state *cached = NULL;
-
- iosize = PAGE_CACHE_SIZE - pg_offset;
- userpage = kmap_atomic(page);
- memset(userpage + pg_offset, 0, iosize);
- flush_dcache_page(page);
- kunmap_atomic(userpage);
- set_extent_uptodate(tree, cur, cur + iosize - 1,
- &cached, GFP_NOFS);
- unlock_extent_cached(tree, cur, cur + iosize - 1,
- &cached, GFP_NOFS);
- break;
- }
- em = get_extent(inode, page, pg_offset, cur,
- end - cur + 1, 0);
- if (IS_ERR_OR_NULL(em)) {
- SetPageError(page);
- unlock_extent(tree, cur, end);
- break;
- }
- extent_offset = cur - em->start;
- BUG_ON(extent_map_end(em) <= cur);
- BUG_ON(end < cur);
-
- if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) {
- this_bio_flag = EXTENT_BIO_COMPRESSED;
- extent_set_compress_type(&this_bio_flag,
- em->compress_type);
- }
-
- iosize = min(extent_map_end(em) - cur, end - cur + 1);
- cur_end = min(extent_map_end(em) - 1, end);
- iosize = (iosize + blocksize - 1) & ~((u64)blocksize - 1);
- if (this_bio_flag & EXTENT_BIO_COMPRESSED) {
- disk_io_size = em->block_len;
- sector = em->block_start >> 9;
- } else {
- sector = (em->block_start + extent_offset) >> 9;
- disk_io_size = iosize;
- }
- bdev = em->bdev;
- block_start = em->block_start;
- if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
- block_start = EXTENT_MAP_HOLE;
- free_extent_map(em);
- em = NULL;
-
- /* we've found a hole, just zero and go on */
- if (block_start == EXTENT_MAP_HOLE) {
- char *userpage;
- struct extent_state *cached = NULL;
-
- userpage = kmap_atomic(page);
- memset(userpage + pg_offset, 0, iosize);
- flush_dcache_page(page);
- kunmap_atomic(userpage);
-
- set_extent_uptodate(tree, cur, cur + iosize - 1,
- &cached, GFP_NOFS);
- unlock_extent_cached(tree, cur, cur + iosize - 1,
- &cached, GFP_NOFS);
- cur = cur + iosize;
- pg_offset += iosize;
- continue;
- }
- /* the get_extent function already copied into the page */
- if (test_range_bit(tree, cur, cur_end,
- EXTENT_UPTODATE, 1, NULL)) {
- check_page_uptodate(tree, page);
- unlock_extent(tree, cur, cur + iosize - 1);
- cur = cur + iosize;
- pg_offset += iosize;
- continue;
- }
- /* we have an inline extent but it didn't get marked up
- * to date. Error out
- */
- if (block_start == EXTENT_MAP_INLINE) {
- SetPageError(page);
- unlock_extent(tree, cur, cur + iosize - 1);
- cur = cur + iosize;
- pg_offset += iosize;
- continue;
- }
-
- ret = 0;
- if (tree->ops && tree->ops->readpage_io_hook) {
- ret = tree->ops->readpage_io_hook(page, cur,
- cur + iosize - 1);
- }
- if (!ret) {
- unsigned long pnr = (last_byte >> PAGE_CACHE_SHIFT) + 1;
- pnr -= page->index;
- ret = submit_extent_page(READ, tree, page,
- sector, disk_io_size, pg_offset,
- bdev, bio, pnr,
- end_bio_extent_readpage, mirror_num,
- *bio_flags,
- this_bio_flag);
- BUG_ON(ret == -ENOMEM);
- nr++;
- *bio_flags = this_bio_flag;
- }
- if (ret)
- SetPageError(page);
- cur = cur + iosize;
- pg_offset += iosize;
- }
-out:
- if (!nr) {
- if (!PageError(page))
- SetPageUptodate(page);
- unlock_page(page);
- }
- return 0;
-}
-
-int extent_read_full_page(struct extent_io_tree *tree, struct page *page,
- get_extent_t *get_extent, int mirror_num)
-{
- struct bio *bio = NULL;
- unsigned long bio_flags = 0;
- int ret;
-
- ret = __extent_read_full_page(tree, page, get_extent, &bio, mirror_num,
- &bio_flags);
- if (bio)
- ret = submit_one_bio(READ, bio, mirror_num, bio_flags);
- return ret;
-}
-
-static noinline void update_nr_written(struct page *page,
- struct writeback_control *wbc,
- unsigned long nr_written)
-{
- wbc->nr_to_write -= nr_written;
- if (wbc->range_cyclic || (wbc->nr_to_write > 0 &&
- wbc->range_start == 0 && wbc->range_end == LLONG_MAX))
- page->mapping->writeback_index = page->index + nr_written;
-}
-
-/*
- * the writepage semantics are similar to regular writepage. extent
- * records are inserted to lock ranges in the tree, and as dirty areas
- * are found, they are marked writeback. Then the lock bits are removed
- * and the end_io handler clears the writeback ranges
- */
-static int __extent_writepage(struct page *page, struct writeback_control *wbc,
- void *data)
-{
- struct inode *inode = page->mapping->host;
- struct extent_page_data *epd = data;
- struct extent_io_tree *tree = epd->tree;
- u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
- u64 delalloc_start;
- u64 page_end = start + PAGE_CACHE_SIZE - 1;
- u64 end;
- u64 cur = start;
- u64 extent_offset;
- u64 last_byte = i_size_read(inode);
- u64 block_start;
- u64 iosize;
- sector_t sector;
- struct extent_state *cached_state = NULL;
- struct extent_map *em;
- struct block_device *bdev;
- int ret;
- int nr = 0;
- size_t pg_offset = 0;
- size_t blocksize;
- loff_t i_size = i_size_read(inode);
- unsigned long end_index = i_size >> PAGE_CACHE_SHIFT;
- u64 nr_delalloc;
- u64 delalloc_end;
- int page_started;
- int compressed;
- int write_flags;
- unsigned long nr_written = 0;
- bool fill_delalloc = true;
-
- if (wbc->sync_mode == WB_SYNC_ALL)
- write_flags = WRITE_SYNC;
- else
- write_flags = WRITE;
-
- trace___extent_writepage(page, inode, wbc);
-
- WARN_ON(!PageLocked(page));
-
- ClearPageError(page);
-
- pg_offset = i_size & (PAGE_CACHE_SIZE - 1);
- if (page->index > end_index ||
- (page->index == end_index && !pg_offset)) {
- page->mapping->a_ops->invalidatepage(page, 0);
- unlock_page(page);
- return 0;
- }
-
- if (page->index == end_index) {
- char *userpage;
-
- userpage = kmap_atomic(page);
- memset(userpage + pg_offset, 0,
- PAGE_CACHE_SIZE - pg_offset);
- kunmap_atomic(userpage);
- flush_dcache_page(page);
- }
- pg_offset = 0;
-
- set_page_extent_mapped(page);
-
- if (!tree->ops || !tree->ops->fill_delalloc)
- fill_delalloc = false;
-
- delalloc_start = start;
- delalloc_end = 0;
- page_started = 0;
- if (!epd->extent_locked && fill_delalloc) {
- u64 delalloc_to_write = 0;
- /*
- * make sure the wbc mapping index is at least updated
- * to this page.
- */
- update_nr_written(page, wbc, 0);
-
- while (delalloc_end < page_end) {
- nr_delalloc = find_lock_delalloc_range(inode, tree,
- page,
- &delalloc_start,
- &delalloc_end,
- 128 * 1024 * 1024);
- if (nr_delalloc == 0) {
- delalloc_start = delalloc_end + 1;
- continue;
- }
- ret = tree->ops->fill_delalloc(inode, page,
- delalloc_start,
- delalloc_end,
- &page_started,
- &nr_written);
- /* File system has been set read-only */
- if (ret) {
- SetPageError(page);
- goto done;
- }
- /*
- * delalloc_end is already one less than the total
- * length, so we don't subtract one from
- * PAGE_CACHE_SIZE
- */
- delalloc_to_write += (delalloc_end - delalloc_start +
- PAGE_CACHE_SIZE) >>
- PAGE_CACHE_SHIFT;
- delalloc_start = delalloc_end + 1;
- }
- if (wbc->nr_to_write < delalloc_to_write) {
- int thresh = 8192;
-
- if (delalloc_to_write < thresh * 2)
- thresh = delalloc_to_write;
- wbc->nr_to_write = min_t(u64, delalloc_to_write,
- thresh);
- }
-
- /* did the fill delalloc function already unlock and start
- * the IO?
- */
- if (page_started) {
- ret = 0;
- /*
- * we've unlocked the page, so we can't update
- * the mapping's writeback index, just update
- * nr_to_write.
- */
- wbc->nr_to_write -= nr_written;
- goto done_unlocked;
- }
- }
- if (tree->ops && tree->ops->writepage_start_hook) {
- ret = tree->ops->writepage_start_hook(page, start,
- page_end);
- if (ret) {
- /* Fixup worker will requeue */
- if (ret == -EBUSY)
- wbc->pages_skipped++;
- else
- redirty_page_for_writepage(wbc, page);
- update_nr_written(page, wbc, nr_written);
- unlock_page(page);
- ret = 0;
- goto done_unlocked;
- }
- }
-
- /*
- * we don't want to touch the inode after unlocking the page,
- * so we update the mapping writeback index now
- */
- update_nr_written(page, wbc, nr_written + 1);
-
- end = page_end;
- if (last_byte <= start) {
- if (tree->ops && tree->ops->writepage_end_io_hook)
- tree->ops->writepage_end_io_hook(page, start,
- page_end, NULL, 1);
- goto done;
- }
-
- blocksize = inode->i_sb->s_blocksize;
-
- while (cur <= end) {
- if (cur >= last_byte) {
- if (tree->ops && tree->ops->writepage_end_io_hook)
- tree->ops->writepage_end_io_hook(page, cur,
- page_end, NULL, 1);
- break;
- }
- em = epd->get_extent(inode, page, pg_offset, cur,
- end - cur + 1, 1);
- if (IS_ERR_OR_NULL(em)) {
- SetPageError(page);
- break;
- }
-
- extent_offset = cur - em->start;
- BUG_ON(extent_map_end(em) <= cur);
- BUG_ON(end < cur);
- iosize = min(extent_map_end(em) - cur, end - cur + 1);
- iosize = (iosize + blocksize - 1) & ~((u64)blocksize - 1);
- sector = (em->block_start + extent_offset) >> 9;
- bdev = em->bdev;
- block_start = em->block_start;
- compressed = test_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
- free_extent_map(em);
- em = NULL;
-
- /*
- * compressed and inline extents are written through other
- * paths in the FS
- */
- if (compressed || block_start == EXTENT_MAP_HOLE ||
- block_start == EXTENT_MAP_INLINE) {
- /*
- * end_io notification does not happen here for
- * compressed extents
- */
- if (!compressed && tree->ops &&
- tree->ops->writepage_end_io_hook)
- tree->ops->writepage_end_io_hook(page, cur,
- cur + iosize - 1,
- NULL, 1);
- else if (compressed) {
- /* we don't want to end_page_writeback on
- * a compressed extent. this happens
- * elsewhere
- */
- nr++;
- }
-
- cur += iosize;
- pg_offset += iosize;
- continue;
- }
- /* leave this out until we have a page_mkwrite call */
- if (0 && !test_range_bit(tree, cur, cur + iosize - 1,
- EXTENT_DIRTY, 0, NULL)) {
- cur = cur + iosize;
- pg_offset += iosize;
- continue;
- }
-
- if (tree->ops && tree->ops->writepage_io_hook) {
- ret = tree->ops->writepage_io_hook(page, cur,
- cur + iosize - 1);
- } else {
- ret = 0;
- }
- if (ret) {
- SetPageError(page);
- } else {
- unsigned long max_nr = end_index + 1;
-
- set_range_writeback(tree, cur, cur + iosize - 1);
- if (!PageWriteback(page)) {
- printk(KERN_ERR "btrfs warning page %lu not "
- "writeback, cur %llu end %llu\n",
- page->index, (unsigned long long)cur,
- (unsigned long long)end);
- }
-
- ret = submit_extent_page(write_flags, tree, page,
- sector, iosize, pg_offset,
- bdev, &epd->bio, max_nr,
- end_bio_extent_writepage,
- 0, 0, 0);
- if (ret)
- SetPageError(page);
- }
- cur = cur + iosize;
- pg_offset += iosize;
- nr++;
- }
-done:
- if (nr == 0) {
- /* make sure the mapping tag for page dirty gets cleared */
- set_page_writeback(page);
- end_page_writeback(page);
- }
- unlock_page(page);
-
-done_unlocked:
-
- /* drop our reference on any cached states */
- free_extent_state(cached_state);
- return 0;
-}
-
-static int eb_wait(void *word)
-{
- io_schedule();
- return 0;
-}
-
-static void wait_on_extent_buffer_writeback(struct extent_buffer *eb)
-{
- wait_on_bit(&eb->bflags, EXTENT_BUFFER_WRITEBACK, eb_wait,
- TASK_UNINTERRUPTIBLE);
-}
-
-static int lock_extent_buffer_for_io(struct extent_buffer *eb,
- struct btrfs_fs_info *fs_info,
- struct extent_page_data *epd)
-{
- unsigned long i, num_pages;
- int flush = 0;
- int ret = 0;
-
- if (!btrfs_try_tree_write_lock(eb)) {
- flush = 1;
- flush_write_bio(epd);
- btrfs_tree_lock(eb);
- }
-
- if (test_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags)) {
- btrfs_tree_unlock(eb);
- if (!epd->sync_io)
- return 0;
- if (!flush) {
- flush_write_bio(epd);
- flush = 1;
- }
- while (1) {
- wait_on_extent_buffer_writeback(eb);
- btrfs_tree_lock(eb);
- if (!test_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags))
- break;
- btrfs_tree_unlock(eb);
- }
- }
-
- if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) {
- set_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags);
- btrfs_set_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN);
- spin_lock(&fs_info->delalloc_lock);
- if (fs_info->dirty_metadata_bytes >= eb->len)
- fs_info->dirty_metadata_bytes -= eb->len;
- else
- WARN_ON(1);
- spin_unlock(&fs_info->delalloc_lock);
- ret = 1;
- }
-
- btrfs_tree_unlock(eb);
-
- if (!ret)
- return ret;
-
- num_pages = num_extent_pages(eb->start, eb->len);
- for (i = 0; i < num_pages; i++) {
- struct page *p = extent_buffer_page(eb, i);
-
- if (!trylock_page(p)) {
- if (!flush) {
- flush_write_bio(epd);
- flush = 1;
- }
- lock_page(p);
- }
- }
-
- return ret;
-}
-
-static void end_extent_buffer_writeback(struct extent_buffer *eb)
-{
- clear_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags);
- smp_mb__after_clear_bit();
- wake_up_bit(&eb->bflags, EXTENT_BUFFER_WRITEBACK);
-}
-
-static void end_bio_extent_buffer_writepage(struct bio *bio, int err)
-{
- int uptodate = err == 0;
- struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
- struct extent_buffer *eb;
- int done;
-
- do {
- struct page *page = bvec->bv_page;
-
- bvec--;
- eb = (struct extent_buffer *)page->private;
- BUG_ON(!eb);
- done = atomic_dec_and_test(&eb->io_pages);
-
- if (!uptodate || test_bit(EXTENT_BUFFER_IOERR, &eb->bflags)) {
- set_bit(EXTENT_BUFFER_IOERR, &eb->bflags);
- ClearPageUptodate(page);
- SetPageError(page);
- }
-
- end_page_writeback(page);
-
- if (!done)
- continue;
-
- end_extent_buffer_writeback(eb);
- } while (bvec >= bio->bi_io_vec);
-
- bio_put(bio);
-
-}
-
-static int write_one_eb(struct extent_buffer *eb,
- struct btrfs_fs_info *fs_info,
- struct writeback_control *wbc,
- struct extent_page_data *epd)
-{
- struct block_device *bdev = fs_info->fs_devices->latest_bdev;
- u64 offset = eb->start;
- unsigned long i, num_pages;
- int rw = (epd->sync_io ? WRITE_SYNC : WRITE);
- int ret;
-
- clear_bit(EXTENT_BUFFER_IOERR, &eb->bflags);
- num_pages = num_extent_pages(eb->start, eb->len);
- atomic_set(&eb->io_pages, num_pages);
- for (i = 0; i < num_pages; i++) {
- struct page *p = extent_buffer_page(eb, i);
-
- clear_page_dirty_for_io(p);
- set_page_writeback(p);
- ret = submit_extent_page(rw, eb->tree, p, offset >> 9,
- PAGE_CACHE_SIZE, 0, bdev, &epd->bio,
- -1, end_bio_extent_buffer_writepage,
- 0, 0, 0);
- if (ret) {
- set_bit(EXTENT_BUFFER_IOERR, &eb->bflags);
- SetPageError(p);
- if (atomic_sub_and_test(num_pages - i, &eb->io_pages))
- end_extent_buffer_writeback(eb);
- ret = -EIO;
- break;
- }
- offset += PAGE_CACHE_SIZE;
- update_nr_written(p, wbc, 1);
- unlock_page(p);
- }
-
- if (unlikely(ret)) {
- for (; i < num_pages; i++) {
- struct page *p = extent_buffer_page(eb, i);
- unlock_page(p);
- }
- }
-
- return ret;
-}
-
-int btree_write_cache_pages(struct address_space *mapping,
- struct writeback_control *wbc)
-{
- struct extent_io_tree *tree = &BTRFS_I(mapping->host)->io_tree;
- struct btrfs_fs_info *fs_info = BTRFS_I(mapping->host)->root->fs_info;
- struct extent_buffer *eb, *prev_eb = NULL;
- struct extent_page_data epd = {
- .bio = NULL,
- .tree = tree,
- .extent_locked = 0,
- .sync_io = wbc->sync_mode == WB_SYNC_ALL,
- };
- int ret = 0;
- int done = 0;
- int nr_to_write_done = 0;
- struct pagevec pvec;
- int nr_pages;
- pgoff_t index;
- pgoff_t end; /* Inclusive */
- int scanned = 0;
- int tag;
-
- pagevec_init(&pvec, 0);
- if (wbc->range_cyclic) {
- index = mapping->writeback_index; /* Start from prev offset */
- end = -1;
- } else {
- index = wbc->range_start >> PAGE_CACHE_SHIFT;
- end = wbc->range_end >> PAGE_CACHE_SHIFT;
- scanned = 1;
- }
- if (wbc->sync_mode == WB_SYNC_ALL)
- tag = PAGECACHE_TAG_TOWRITE;
- else
- tag = PAGECACHE_TAG_DIRTY;
-retry:
- if (wbc->sync_mode == WB_SYNC_ALL)
- tag_pages_for_writeback(mapping, index, end);
- while (!done && !nr_to_write_done && (index <= end) &&
- (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag,
- min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) {
- unsigned i;
-
- scanned = 1;
- for (i = 0; i < nr_pages; i++) {
- struct page *page = pvec.pages[i];
-
- if (!PagePrivate(page))
- continue;
-
- if (!wbc->range_cyclic && page->index > end) {
- done = 1;
- break;
- }
-
- eb = (struct extent_buffer *)page->private;
- if (!eb) {
- WARN_ON(1);
- continue;
- }
-
- if (eb == prev_eb)
- continue;
-
- if (!atomic_inc_not_zero(&eb->refs)) {
- WARN_ON(1);
- continue;
- }
-
- prev_eb = eb;
- ret = lock_extent_buffer_for_io(eb, fs_info, &epd);
- if (!ret) {
- free_extent_buffer(eb);
- continue;
- }
-
- ret = write_one_eb(eb, fs_info, wbc, &epd);
- if (ret) {
- done = 1;
- free_extent_buffer(eb);
- break;
- }
- free_extent_buffer(eb);
-
- /*
- * the filesystem may choose to bump up nr_to_write.
- * We have to make sure to honor the new nr_to_write
- * at any time
- */
- nr_to_write_done = wbc->nr_to_write <= 0;
- }
- pagevec_release(&pvec);
- cond_resched();
- }
- if (!scanned && !done) {
- /*
- * We hit the last page and there is more work to be done: wrap
- * back to the start of the file
- */
- scanned = 1;
- index = 0;
- goto retry;
- }
- flush_write_bio(&epd);
- return ret;
-}
-
-/**
- * write_cache_pages - walk the list of dirty pages of the given address space and write all of them.
- * @mapping: address space structure to write
- * @wbc: subtract the number of written pages from *@wbc->nr_to_write
- * @writepage: function called for each page
- * @data: data passed to writepage function
- *
- * If a page is already under I/O, write_cache_pages() skips it, even
- * if it's dirty. This is desirable behaviour for memory-cleaning writeback,
- * but it is INCORRECT for data-integrity system calls such as fsync(). fsync()
- * and msync() need to guarantee that all the data which was dirty at the time
- * the call was made get new I/O started against them. If wbc->sync_mode is
- * WB_SYNC_ALL then we were called for data integrity and we must wait for
- * existing IO to complete.
- */
-static int extent_write_cache_pages(struct extent_io_tree *tree,
- struct address_space *mapping,
- struct writeback_control *wbc,
- writepage_t writepage, void *data,
- void (*flush_fn)(void *))
-{
- int ret = 0;
- int done = 0;
- int nr_to_write_done = 0;
- struct pagevec pvec;
- int nr_pages;
- pgoff_t index;
- pgoff_t end; /* Inclusive */
- int scanned = 0;
- int tag;
-
- pagevec_init(&pvec, 0);
- if (wbc->range_cyclic) {
- index = mapping->writeback_index; /* Start from prev offset */
- end = -1;
- } else {
- index = wbc->range_start >> PAGE_CACHE_SHIFT;
- end = wbc->range_end >> PAGE_CACHE_SHIFT;
- scanned = 1;
- }
- if (wbc->sync_mode == WB_SYNC_ALL)
- tag = PAGECACHE_TAG_TOWRITE;
- else
- tag = PAGECACHE_TAG_DIRTY;
-retry:
- if (wbc->sync_mode == WB_SYNC_ALL)
- tag_pages_for_writeback(mapping, index, end);
- while (!done && !nr_to_write_done && (index <= end) &&
- (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag,
- min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) {
- unsigned i;
-
- scanned = 1;
- for (i = 0; i < nr_pages; i++) {
- struct page *page = pvec.pages[i];
-
- /*
- * At this point we hold neither mapping->tree_lock nor
- * lock on the page itself: the page may be truncated or
- * invalidated (changing page->mapping to NULL), or even
- * swizzled back from swapper_space to tmpfs file
- * mapping
- */
- if (tree->ops &&
- tree->ops->write_cache_pages_lock_hook) {
- tree->ops->write_cache_pages_lock_hook(page,
- data, flush_fn);
- } else {
- if (!trylock_page(page)) {
- flush_fn(data);
- lock_page(page);
- }
- }
-
- if (unlikely(page->mapping != mapping)) {
- unlock_page(page);
- continue;
- }
-
- if (!wbc->range_cyclic && page->index > end) {
- done = 1;
- unlock_page(page);
- continue;
- }
-
- if (wbc->sync_mode != WB_SYNC_NONE) {
- if (PageWriteback(page))
- flush_fn(data);
- wait_on_page_writeback(page);
- }
-
- if (PageWriteback(page) ||
- !clear_page_dirty_for_io(page)) {
- unlock_page(page);
- continue;
- }
-
- ret = (*writepage)(page, wbc, data);
-
- if (unlikely(ret == AOP_WRITEPAGE_ACTIVATE)) {
- unlock_page(page);
- ret = 0;
- }
- if (ret)
- done = 1;
-
- /*
- * the filesystem may choose to bump up nr_to_write.
- * We have to make sure to honor the new nr_to_write
- * at any time
- */
- nr_to_write_done = wbc->nr_to_write <= 0;
- }
- pagevec_release(&pvec);
- cond_resched();
- }
- if (!scanned && !done) {
- /*
- * We hit the last page and there is more work to be done: wrap
- * back to the start of the file
- */
- scanned = 1;
- index = 0;
- goto retry;
- }
- return ret;
-}
-
-static void flush_epd_write_bio(struct extent_page_data *epd)
-{
- if (epd->bio) {
- int rw = WRITE;
- int ret;
-
- if (epd->sync_io)
- rw = WRITE_SYNC;
-
- ret = submit_one_bio(rw, epd->bio, 0, 0);
- BUG_ON(ret < 0); /* -ENOMEM */
- epd->bio = NULL;
- }
-}
-
-static noinline void flush_write_bio(void *data)
-{
- struct extent_page_data *epd = data;
- flush_epd_write_bio(epd);
-}
-
-int extent_write_full_page(struct extent_io_tree *tree, struct page *page,
- get_extent_t *get_extent,
- struct writeback_control *wbc)
-{
- int ret;
- struct extent_page_data epd = {
- .bio = NULL,
- .tree = tree,
- .get_extent = get_extent,
- .extent_locked = 0,
- .sync_io = wbc->sync_mode == WB_SYNC_ALL,
- };
-
- ret = __extent_writepage(page, wbc, &epd);
-
- flush_epd_write_bio(&epd);
- return ret;
-}
-
-int extent_write_locked_range(struct extent_io_tree *tree, struct inode *inode,
- u64 start, u64 end, get_extent_t *get_extent,
- int mode)
-{
- int ret = 0;
- struct address_space *mapping = inode->i_mapping;
- struct page *page;
- unsigned long nr_pages = (end - start + PAGE_CACHE_SIZE) >>
- PAGE_CACHE_SHIFT;
-
- struct extent_page_data epd = {
- .bio = NULL,
- .tree = tree,
- .get_extent = get_extent,
- .extent_locked = 1,
- .sync_io = mode == WB_SYNC_ALL,
- };
- struct writeback_control wbc_writepages = {
- .sync_mode = mode,
- .nr_to_write = nr_pages * 2,
- .range_start = start,
- .range_end = end + 1,
- };
-
- while (start <= end) {
- page = find_get_page(mapping, start >> PAGE_CACHE_SHIFT);
- if (clear_page_dirty_for_io(page))
- ret = __extent_writepage(page, &wbc_writepages, &epd);
- else {
- if (tree->ops && tree->ops->writepage_end_io_hook)
- tree->ops->writepage_end_io_hook(page, start,
- start + PAGE_CACHE_SIZE - 1,
- NULL, 1);
- unlock_page(page);
- }
- page_cache_release(page);
- start += PAGE_CACHE_SIZE;
- }
-
- flush_epd_write_bio(&epd);
- return ret;
-}
-
-int extent_writepages(struct extent_io_tree *tree,
- struct address_space *mapping,
- get_extent_t *get_extent,
- struct writeback_control *wbc)
-{
- int ret = 0;
- struct extent_page_data epd = {
- .bio = NULL,
- .tree = tree,
- .get_extent = get_extent,
- .extent_locked = 0,
- .sync_io = wbc->sync_mode == WB_SYNC_ALL,
- };
-
- ret = extent_write_cache_pages(tree, mapping, wbc,
- __extent_writepage, &epd,
- flush_write_bio);
- flush_epd_write_bio(&epd);
- return ret;
-}
-
-int extent_readpages(struct extent_io_tree *tree,
- struct address_space *mapping,
- struct list_head *pages, unsigned nr_pages,
- get_extent_t get_extent)
-{
- struct bio *bio = NULL;
- unsigned page_idx;
- unsigned long bio_flags = 0;
-
- for (page_idx = 0; page_idx < nr_pages; page_idx++) {
- struct page *page = list_entry(pages->prev, struct page, lru);
-
- prefetchw(&page->flags);
- list_del(&page->lru);
- if (!add_to_page_cache_lru(page, mapping,
- page->index, GFP_NOFS)) {
- __extent_read_full_page(tree, page, get_extent,
- &bio, 0, &bio_flags);
- }
- page_cache_release(page);
- }
- BUG_ON(!list_empty(pages));
- if (bio)
- return submit_one_bio(READ, bio, 0, bio_flags);
- return 0;
-}
-
-/*
- * basic invalidatepage code, this waits on any locked or writeback
- * ranges corresponding to the page, and then deletes any extent state
- * records from the tree
- */
-int extent_invalidatepage(struct extent_io_tree *tree,
- struct page *page, unsigned long offset)
-{
- struct extent_state *cached_state = NULL;
- u64 start = ((u64)page->index << PAGE_CACHE_SHIFT);
- u64 end = start + PAGE_CACHE_SIZE - 1;
- size_t blocksize = page->mapping->host->i_sb->s_blocksize;
-
- start += (offset + blocksize - 1) & ~(blocksize - 1);
- if (start > end)
- return 0;
-
- lock_extent_bits(tree, start, end, 0, &cached_state);
- wait_on_page_writeback(page);
- clear_extent_bit(tree, start, end,
- EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC |
- EXTENT_DO_ACCOUNTING,
- 1, 1, &cached_state, GFP_NOFS);
- return 0;
-}
-
-/*
- * a helper for releasepage, this tests for areas of the page that
- * are locked or under IO and drops the related state bits if it is safe
- * to drop the page.
- */
-int try_release_extent_state(struct extent_map_tree *map,
- struct extent_io_tree *tree, struct page *page,
- gfp_t mask)
-{
- u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
- u64 end = start + PAGE_CACHE_SIZE - 1;
- int ret = 1;
-
- if (test_range_bit(tree, start, end,
- EXTENT_IOBITS, 0, NULL))
- ret = 0;
- else {
- if ((mask & GFP_NOFS) == GFP_NOFS)
- mask = GFP_NOFS;
- /*
- * at this point we can safely clear everything except the
- * locked bit and the nodatasum bit
- */
- ret = clear_extent_bit(tree, start, end,
- ~(EXTENT_LOCKED | EXTENT_NODATASUM),
- 0, 0, NULL, mask);
-
- /* if clear_extent_bit failed for enomem reasons,
- * we can't allow the release to continue.
- */
- if (ret < 0)
- ret = 0;
- else
- ret = 1;
- }
- return ret;
-}
-
-/*
- * a helper for releasepage. As long as there are no locked extents
- * in the range corresponding to the page, both state records and extent
- * map records are removed
- */
-int try_release_extent_mapping(struct extent_map_tree *map,
- struct extent_io_tree *tree, struct page *page,
- gfp_t mask)
-{
- struct extent_map *em;
- u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
- u64 end = start + PAGE_CACHE_SIZE - 1;
-
- if ((mask & __GFP_WAIT) &&
- page->mapping->host->i_size > 16 * 1024 * 1024) {
- u64 len;
- while (start <= end) {
- len = end - start + 1;
- write_lock(&map->lock);
- em = lookup_extent_mapping(map, start, len);
- if (!em) {
- write_unlock(&map->lock);
- break;
- }
- if (test_bit(EXTENT_FLAG_PINNED, &em->flags) ||
- em->start != start) {
- write_unlock(&map->lock);
- free_extent_map(em);
- break;
- }
- if (!test_range_bit(tree, em->start,
- extent_map_end(em) - 1,
- EXTENT_LOCKED | EXTENT_WRITEBACK,
- 0, NULL)) {
- remove_extent_mapping(map, em);
- /* once for the rb tree */
- free_extent_map(em);
- }
- start = extent_map_end(em);
- write_unlock(&map->lock);
-
- /* once for us */
- free_extent_map(em);
- }
- }
- return try_release_extent_state(map, tree, page, mask);
-}
-
-/*
- * helper function for fiemap, which doesn't want to see any holes.
- * This maps until we find something past 'last'
- */
-static struct extent_map *get_extent_skip_holes(struct inode *inode,
- u64 offset,
- u64 last,
- get_extent_t *get_extent)
-{
- u64 sectorsize = BTRFS_I(inode)->root->sectorsize;
- struct extent_map *em;
- u64 len;
-
- if (offset >= last)
- return NULL;
-
- while(1) {
- len = last - offset;
- if (len == 0)
- break;
- len = (len + sectorsize - 1) & ~(sectorsize - 1);
- em = get_extent(inode, NULL, 0, offset, len, 0);
- if (IS_ERR_OR_NULL(em))
- return em;
-
- /* if this isn't a hole return it */
- if (!test_bit(EXTENT_FLAG_VACANCY, &em->flags) &&
- em->block_start != EXTENT_MAP_HOLE) {
- return em;
- }
-
- /* this is a hole, advance to the next extent */
- offset = extent_map_end(em);
- free_extent_map(em);
- if (offset >= last)
- break;
- }
- return NULL;
-}
-
-int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
- __u64 start, __u64 len, get_extent_t *get_extent)
-{
- int ret = 0;
- u64 off = start;
- u64 max = start + len;
- u32 flags = 0;
- u32 found_type;
- u64 last;
- u64 last_for_get_extent = 0;
- u64 disko = 0;
- u64 isize = i_size_read(inode);
- struct btrfs_key found_key;
- struct extent_map *em = NULL;
- struct extent_state *cached_state = NULL;
- struct btrfs_path *path;
- struct btrfs_file_extent_item *item;
- int end = 0;
- u64 em_start = 0;
- u64 em_len = 0;
- u64 em_end = 0;
- unsigned long emflags;
-
- if (len == 0)
- return -EINVAL;
-
- path = btrfs_alloc_path();
- if (!path)
- return -ENOMEM;
- path->leave_spinning = 1;
-
- start = ALIGN(start, BTRFS_I(inode)->root->sectorsize);
- len = ALIGN(len, BTRFS_I(inode)->root->sectorsize);
-
- /*
- * lookup the last file extent. We're not using i_size here
- * because there might be preallocation past i_size
- */
- ret = btrfs_lookup_file_extent(NULL, BTRFS_I(inode)->root,
- path, btrfs_ino(inode), -1, 0);
- if (ret < 0) {
- btrfs_free_path(path);
- return ret;
- }
- WARN_ON(!ret);
- path->slots[0]--;
- item = btrfs_item_ptr(path->nodes[0], path->slots[0],
- struct btrfs_file_extent_item);
- btrfs_item_key_to_cpu(path->nodes[0], &found_key, path->slots[0]);
- found_type = btrfs_key_type(&found_key);
-
- /* No extents, but there might be delalloc bits */
- if (found_key.objectid != btrfs_ino(inode) ||
- found_type != BTRFS_EXTENT_DATA_KEY) {
- /* have to trust i_size as the end */
- last = (u64)-1;
- last_for_get_extent = isize;
- } else {
- /*
- * remember the start of the last extent. There are a
- * bunch of different factors that go into the length of the
- * extent, so its much less complex to remember where it started
- */
- last = found_key.offset;
- last_for_get_extent = last + 1;
- }
- btrfs_free_path(path);
-
- /*
- * we might have some extents allocated but more delalloc past those
- * extents. so, we trust isize unless the start of the last extent is
- * beyond isize
- */
- if (last < isize) {
- last = (u64)-1;
- last_for_get_extent = isize;
- }
-
- lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + len, 0,
- &cached_state);
-
- em = get_extent_skip_holes(inode, start, last_for_get_extent,
- get_extent);
- if (!em)
- goto out;
- if (IS_ERR(em)) {
- ret = PTR_ERR(em);
- goto out;
- }
-
- while (!end) {
- u64 offset_in_extent;
-
- /* break if the extent we found is outside the range */
- if (em->start >= max || extent_map_end(em) < off)
- break;
-
- /*
- * get_extent may return an extent that starts before our
- * requested range. We have to make sure the ranges
- * we return to fiemap always move forward and don't
- * overlap, so adjust the offsets here
- */
- em_start = max(em->start, off);
-
- /*
- * record the offset from the start of the extent
- * for adjusting the disk offset below
- */
- offset_in_extent = em_start - em->start;
- em_end = extent_map_end(em);
- em_len = em_end - em_start;
- emflags = em->flags;
- disko = 0;
- flags = 0;
-
- /*
- * bump off for our next call to get_extent
- */
- off = extent_map_end(em);
- if (off >= max)
- end = 1;
-
- if (em->block_start == EXTENT_MAP_LAST_BYTE) {
- end = 1;
- flags |= FIEMAP_EXTENT_LAST;
- } else if (em->block_start == EXTENT_MAP_INLINE) {
- flags |= (FIEMAP_EXTENT_DATA_INLINE |
- FIEMAP_EXTENT_NOT_ALIGNED);
- } else if (em->block_start == EXTENT_MAP_DELALLOC) {
- flags |= (FIEMAP_EXTENT_DELALLOC |
- FIEMAP_EXTENT_UNKNOWN);
- } else {
- disko = em->block_start + offset_in_extent;
- }
- if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags))
- flags |= FIEMAP_EXTENT_ENCODED;
-
- free_extent_map(em);
- em = NULL;
- if ((em_start >= last) || em_len == (u64)-1 ||
- (last == (u64)-1 && isize <= em_end)) {
- flags |= FIEMAP_EXTENT_LAST;
- end = 1;
- }
-
- /* now scan forward to see if this is really the last extent. */
- em = get_extent_skip_holes(inode, off, last_for_get_extent,
- get_extent);
- if (IS_ERR(em)) {
- ret = PTR_ERR(em);
- goto out;
- }
- if (!em) {
- flags |= FIEMAP_EXTENT_LAST;
- end = 1;
- }
- ret = fiemap_fill_next_extent(fieinfo, em_start, disko,
- em_len, flags);
- if (ret)
- goto out_free;
- }
-out_free:
- free_extent_map(em);
-out:
- unlock_extent_cached(&BTRFS_I(inode)->io_tree, start, start + len,
- &cached_state, GFP_NOFS);
- return ret;
-}
-
-inline struct page *extent_buffer_page(struct extent_buffer *eb,
- unsigned long i)
-{
- return eb->pages[i];
-}
-
-inline unsigned long num_extent_pages(u64 start, u64 len)
-{
- return ((start + len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT) -
- (start >> PAGE_CACHE_SHIFT);
-}
-
-static void __free_extent_buffer(struct extent_buffer *eb)
-{
-#if LEAK_DEBUG
- unsigned long flags;
- spin_lock_irqsave(&leak_lock, flags);
- list_del(&eb->leak_list);
- spin_unlock_irqrestore(&leak_lock, flags);
-#endif
- if (eb->pages && eb->pages != eb->inline_pages)
- kfree(eb->pages);
- kmem_cache_free(extent_buffer_cache, eb);
-}
-
-static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree,
- u64 start,
- unsigned long len,
- gfp_t mask)
-{
- struct extent_buffer *eb = NULL;
-#if LEAK_DEBUG
- unsigned long flags;
-#endif
-
- eb = kmem_cache_zalloc(extent_buffer_cache, mask);
- if (eb == NULL)
- return NULL;
- eb->start = start;
- eb->len = len;
- eb->tree = tree;
- rwlock_init(&eb->lock);
- atomic_set(&eb->write_locks, 0);
- atomic_set(&eb->read_locks, 0);
- atomic_set(&eb->blocking_readers, 0);
- atomic_set(&eb->blocking_writers, 0);
- atomic_set(&eb->spinning_readers, 0);
- atomic_set(&eb->spinning_writers, 0);
- eb->lock_nested = 0;
- init_waitqueue_head(&eb->write_lock_wq);
- init_waitqueue_head(&eb->read_lock_wq);
-
-#if LEAK_DEBUG
- spin_lock_irqsave(&leak_lock, flags);
- list_add(&eb->leak_list, &buffers);
- spin_unlock_irqrestore(&leak_lock, flags);
-#endif
- spin_lock_init(&eb->refs_lock);
- atomic_set(&eb->refs, 1);
- atomic_set(&eb->io_pages, 0);
-
- if (len > MAX_INLINE_EXTENT_BUFFER_SIZE) {
- struct page **pages;
- int num_pages = (len + PAGE_CACHE_SIZE - 1) >>
- PAGE_CACHE_SHIFT;
- pages = kzalloc(num_pages, mask);
- if (!pages) {
- __free_extent_buffer(eb);
- return NULL;
- }
- eb->pages = pages;
- } else {
- eb->pages = eb->inline_pages;
- }
-
- return eb;
-}
-
-static int extent_buffer_under_io(struct extent_buffer *eb)
-{
- return (atomic_read(&eb->io_pages) ||
- test_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags) ||
- test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags));
-}
-
-/*
- * Helper for releasing extent buffer page.
- */
-static void btrfs_release_extent_buffer_page(struct extent_buffer *eb,
- unsigned long start_idx)
-{
- unsigned long index;
- struct page *page;
-
- BUG_ON(extent_buffer_under_io(eb));
-
- index = num_extent_pages(eb->start, eb->len);
- if (start_idx >= index)
- return;
-
- do {
- index--;
- page = extent_buffer_page(eb, index);
- if (page) {
- spin_lock(&page->mapping->private_lock);
- /*
- * We do this since we'll remove the pages after we've
- * removed the eb from the radix tree, so we could race
- * and have this page now attached to the new eb. So
- * only clear page_private if it's still connected to
- * this eb.
- */
- if (PagePrivate(page) &&
- page->private == (unsigned long)eb) {
- BUG_ON(test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags));
- BUG_ON(PageDirty(page));
- BUG_ON(PageWriteback(page));
- /*
- * We need to make sure we haven't be attached
- * to a new eb.
- */
- ClearPagePrivate(page);
- set_page_private(page, 0);
- /* One for the page private */
- page_cache_release(page);
- }
- spin_unlock(&page->mapping->private_lock);
-
- /* One for when we alloced the page */
- page_cache_release(page);
- }
- } while (index != start_idx);
-}
-
-/*
- * Helper for releasing the extent buffer.
- */
-static inline void btrfs_release_extent_buffer(struct extent_buffer *eb)
-{
- btrfs_release_extent_buffer_page(eb, 0);
- __free_extent_buffer(eb);
-}
-
-static void check_buffer_tree_ref(struct extent_buffer *eb)
-{
- /* the ref bit is tricky. We have to make sure it is set
- * if we have the buffer dirty. Otherwise the
- * code to free a buffer can end up dropping a dirty
- * page
- *
- * Once the ref bit is set, it won't go away while the
- * buffer is dirty or in writeback, and it also won't
- * go away while we have the reference count on the
- * eb bumped.
- *
- * We can't just set the ref bit without bumping the
- * ref on the eb because free_extent_buffer might
- * see the ref bit and try to clear it. If this happens
- * free_extent_buffer might end up dropping our original
- * ref by mistake and freeing the page before we are able
- * to add one more ref.
- *
- * So bump the ref count first, then set the bit. If someone
- * beat us to it, drop the ref we added.
- */
- if (!test_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags)) {
- atomic_inc(&eb->refs);
- if (test_and_set_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags))
- atomic_dec(&eb->refs);
- }
-}
-
-static void mark_extent_buffer_accessed(struct extent_buffer *eb)
-{
- unsigned long num_pages, i;
-
- check_buffer_tree_ref(eb);
-
- num_pages = num_extent_pages(eb->start, eb->len);
- for (i = 0; i < num_pages; i++) {
- struct page *p = extent_buffer_page(eb, i);
- mark_page_accessed(p);
- }
-}
-
-struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
- u64 start, unsigned long len)
-{
- unsigned long num_pages = num_extent_pages(start, len);
- unsigned long i;
- unsigned long index = start >> PAGE_CACHE_SHIFT;
- struct extent_buffer *eb;
- struct extent_buffer *exists = NULL;
- struct page *p;
- struct address_space *mapping = tree->mapping;
- int uptodate = 1;
- int ret;
-
- rcu_read_lock();
- eb = radix_tree_lookup(&tree->buffer, start >> PAGE_CACHE_SHIFT);
- if (eb && atomic_inc_not_zero(&eb->refs)) {
- rcu_read_unlock();
- mark_extent_buffer_accessed(eb);
- return eb;
- }
- rcu_read_unlock();
-
- eb = __alloc_extent_buffer(tree, start, len, GFP_NOFS);
- if (!eb)
- return NULL;
-
- for (i = 0; i < num_pages; i++, index++) {
- p = find_or_create_page(mapping, index, GFP_NOFS);
- if (!p) {
- WARN_ON(1);
- goto free_eb;
- }
-
- spin_lock(&mapping->private_lock);
- if (PagePrivate(p)) {
- /*
- * We could have already allocated an eb for this page
- * and attached one so lets see if we can get a ref on
- * the existing eb, and if we can we know it's good and
- * we can just return that one, else we know we can just
- * overwrite page->private.
- */
- exists = (struct extent_buffer *)p->private;
- if (atomic_inc_not_zero(&exists->refs)) {
- spin_unlock(&mapping->private_lock);
- unlock_page(p);
- page_cache_release(p);
- mark_extent_buffer_accessed(exists);
- goto free_eb;
- }
-
- /*
- * Do this so attach doesn't complain and we need to
- * drop the ref the old guy had.
- */
- ClearPagePrivate(p);
- WARN_ON(PageDirty(p));
- page_cache_release(p);
- }
- attach_extent_buffer_page(eb, p);
- spin_unlock(&mapping->private_lock);
- WARN_ON(PageDirty(p));
- mark_page_accessed(p);
- eb->pages[i] = p;
- if (!PageUptodate(p))
- uptodate = 0;
-
- /*
- * see below about how we avoid a nasty race with release page
- * and why we unlock later
- */
- }
- if (uptodate)
- set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
-again:
- ret = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM);
- if (ret)
- goto free_eb;
-
- spin_lock(&tree->buffer_lock);
- ret = radix_tree_insert(&tree->buffer, start >> PAGE_CACHE_SHIFT, eb);
- if (ret == -EEXIST) {
- exists = radix_tree_lookup(&tree->buffer,
- start >> PAGE_CACHE_SHIFT);
- if (!atomic_inc_not_zero(&exists->refs)) {
- spin_unlock(&tree->buffer_lock);
- radix_tree_preload_end();
- exists = NULL;
- goto again;
- }
- spin_unlock(&tree->buffer_lock);
- radix_tree_preload_end();
- mark_extent_buffer_accessed(exists);
- goto free_eb;
- }
- /* add one reference for the tree */
- spin_lock(&eb->refs_lock);
- check_buffer_tree_ref(eb);
- spin_unlock(&eb->refs_lock);
- spin_unlock(&tree->buffer_lock);
- radix_tree_preload_end();
-
- /*
- * there is a race where release page may have
- * tried to find this extent buffer in the radix
- * but failed. It will tell the VM it is safe to
- * reclaim the, and it will clear the page private bit.
- * We must make sure to set the page private bit properly
- * after the extent buffer is in the radix tree so
- * it doesn't get lost
- */
- SetPageChecked(eb->pages[0]);
- for (i = 1; i < num_pages; i++) {
- p = extent_buffer_page(eb, i);
- ClearPageChecked(p);
- unlock_page(p);
- }
- unlock_page(eb->pages[0]);
- return eb;
-
-free_eb:
- for (i = 0; i < num_pages; i++) {
- if (eb->pages[i])
- unlock_page(eb->pages[i]);
- }
-
- WARN_ON(!atomic_dec_and_test(&eb->refs));
- btrfs_release_extent_buffer(eb);
- return exists;
-}
-
-struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree,
- u64 start, unsigned long len)
-{
- struct extent_buffer *eb;
-
- rcu_read_lock();
- eb = radix_tree_lookup(&tree->buffer, start >> PAGE_CACHE_SHIFT);
- if (eb && atomic_inc_not_zero(&eb->refs)) {
- rcu_read_unlock();
- mark_extent_buffer_accessed(eb);
- return eb;
- }
- rcu_read_unlock();
-
- return NULL;
-}
-
-static inline void btrfs_release_extent_buffer_rcu(struct rcu_head *head)
-{
- struct extent_buffer *eb =
- container_of(head, struct extent_buffer, rcu_head);
-
- __free_extent_buffer(eb);
-}
-
-/* Expects to have eb->eb_lock already held */
-static void release_extent_buffer(struct extent_buffer *eb, gfp_t mask)
-{
- WARN_ON(atomic_read(&eb->refs) == 0);
- if (atomic_dec_and_test(&eb->refs)) {
- struct extent_io_tree *tree = eb->tree;
-
- spin_unlock(&eb->refs_lock);
-
- spin_lock(&tree->buffer_lock);
- radix_tree_delete(&tree->buffer,
- eb->start >> PAGE_CACHE_SHIFT);
- spin_unlock(&tree->buffer_lock);
-
- /* Should be safe to release our pages at this point */
- btrfs_release_extent_buffer_page(eb, 0);
-
- call_rcu(&eb->rcu_head, btrfs_release_extent_buffer_rcu);
- return;
- }
- spin_unlock(&eb->refs_lock);
-}
-
-void free_extent_buffer(struct extent_buffer *eb)
-{
- if (!eb)
- return;
-
- spin_lock(&eb->refs_lock);
- if (atomic_read(&eb->refs) == 2 &&
- test_bit(EXTENT_BUFFER_STALE, &eb->bflags) &&
- !extent_buffer_under_io(eb) &&
- test_and_clear_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags))
- atomic_dec(&eb->refs);
-
- /*
- * I know this is terrible, but it's temporary until we stop tracking
- * the uptodate bits and such for the extent buffers.
- */
- release_extent_buffer(eb, GFP_ATOMIC);
-}
-
-void free_extent_buffer_stale(struct extent_buffer *eb)
-{
- if (!eb)
- return;
-
- spin_lock(&eb->refs_lock);
- set_bit(EXTENT_BUFFER_STALE, &eb->bflags);
-
- if (atomic_read(&eb->refs) == 2 && !extent_buffer_under_io(eb) &&
- test_and_clear_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags))
- atomic_dec(&eb->refs);
- release_extent_buffer(eb, GFP_NOFS);
-}
-
-void clear_extent_buffer_dirty(struct extent_buffer *eb)
-{
- unsigned long i;
- unsigned long num_pages;
- struct page *page;
-
- num_pages = num_extent_pages(eb->start, eb->len);
-
- for (i = 0; i < num_pages; i++) {
- page = extent_buffer_page(eb, i);
- if (!PageDirty(page))
- continue;
-
- lock_page(page);
- WARN_ON(!PagePrivate(page));
-
- clear_page_dirty_for_io(page);
- spin_lock_irq(&page->mapping->tree_lock);
- if (!PageDirty(page)) {
- radix_tree_tag_clear(&page->mapping->page_tree,
- page_index(page),
- PAGECACHE_TAG_DIRTY);
- }
- spin_unlock_irq(&page->mapping->tree_lock);
- ClearPageError(page);
- unlock_page(page);
- }
- WARN_ON(atomic_read(&eb->refs) == 0);
-}
-
-int set_extent_buffer_dirty(struct extent_buffer *eb)
-{
- unsigned long i;
- unsigned long num_pages;
- int was_dirty = 0;
-
- check_buffer_tree_ref(eb);
-
- was_dirty = test_and_set_bit(EXTENT_BUFFER_DIRTY, &eb->bflags);
-
- num_pages = num_extent_pages(eb->start, eb->len);
- WARN_ON(atomic_read(&eb->refs) == 0);
- WARN_ON(!test_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags));
-
- for (i = 0; i < num_pages; i++)
- set_page_dirty(extent_buffer_page(eb, i));
- return was_dirty;
-}
-
-static int range_straddles_pages(u64 start, u64 len)
-{
- if (len < PAGE_CACHE_SIZE)
- return 1;
- if (start & (PAGE_CACHE_SIZE - 1))
- return 1;
- if ((start + len) & (PAGE_CACHE_SIZE - 1))
- return 1;
- return 0;
-}
-
-int clear_extent_buffer_uptodate(struct extent_buffer *eb)
-{
- unsigned long i;
- struct page *page;
- unsigned long num_pages;
-
- clear_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
- num_pages = num_extent_pages(eb->start, eb->len);
- for (i = 0; i < num_pages; i++) {
- page = extent_buffer_page(eb, i);
- if (page)
- ClearPageUptodate(page);
- }
- return 0;
-}
-
-int set_extent_buffer_uptodate(struct extent_buffer *eb)
-{
- unsigned long i;
- struct page *page;
- unsigned long num_pages;
-
- set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
- num_pages = num_extent_pages(eb->start, eb->len);
- for (i = 0; i < num_pages; i++) {
- page = extent_buffer_page(eb, i);
- SetPageUptodate(page);
- }
- return 0;
-}
-
-int extent_range_uptodate(struct extent_io_tree *tree,
- u64 start, u64 end)
-{
- struct page *page;
- int ret;
- int pg_uptodate = 1;
- int uptodate;
- unsigned long index;
-
- if (range_straddles_pages(start, end - start + 1)) {
- ret = test_range_bit(tree, start, end,
- EXTENT_UPTODATE, 1, NULL);
- if (ret)
- return 1;
- }
- while (start <= end) {
- index = start >> PAGE_CACHE_SHIFT;
- page = find_get_page(tree->mapping, index);
- if (!page)
- return 1;
- uptodate = PageUptodate(page);
- page_cache_release(page);
- if (!uptodate) {
- pg_uptodate = 0;
- break;
- }
- start += PAGE_CACHE_SIZE;
- }
- return pg_uptodate;
-}
-
-int extent_buffer_uptodate(struct extent_buffer *eb)
-{
- return test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
-}
-
-int read_extent_buffer_pages(struct extent_io_tree *tree,
- struct extent_buffer *eb, u64 start, int wait,
- get_extent_t *get_extent, int mirror_num)
-{
- unsigned long i;
- unsigned long start_i;
- struct page *page;
- int err;
- int ret = 0;
- int locked_pages = 0;
- int all_uptodate = 1;
- unsigned long num_pages;
- unsigned long num_reads = 0;
- struct bio *bio = NULL;
- unsigned long bio_flags = 0;
-
- if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags))
- return 0;
-
- if (start) {
- WARN_ON(start < eb->start);
- start_i = (start >> PAGE_CACHE_SHIFT) -
- (eb->start >> PAGE_CACHE_SHIFT);
- } else {
- start_i = 0;
- }
-
- num_pages = num_extent_pages(eb->start, eb->len);
- for (i = start_i; i < num_pages; i++) {
- page = extent_buffer_page(eb, i);
- if (wait == WAIT_NONE) {
- if (!trylock_page(page))
- goto unlock_exit;
- } else {
- lock_page(page);
- }
- locked_pages++;
- if (!PageUptodate(page)) {
- num_reads++;
- all_uptodate = 0;
- }
- }
- if (all_uptodate) {
- if (start_i == 0)
- set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
- goto unlock_exit;
- }
-
- clear_bit(EXTENT_BUFFER_IOERR, &eb->bflags);
- eb->read_mirror = 0;
- atomic_set(&eb->io_pages, num_reads);
- for (i = start_i; i < num_pages; i++) {
- page = extent_buffer_page(eb, i);
- if (!PageUptodate(page)) {
- ClearPageError(page);
- err = __extent_read_full_page(tree, page,
- get_extent, &bio,
- mirror_num, &bio_flags);
- if (err)
- ret = err;
- } else {
- unlock_page(page);
- }
- }
-
- if (bio) {
- err = submit_one_bio(READ, bio, mirror_num, bio_flags);
- if (err)
- return err;
- }
-
- if (ret || wait != WAIT_COMPLETE)
- return ret;
-
- for (i = start_i; i < num_pages; i++) {
- page = extent_buffer_page(eb, i);
- wait_on_page_locked(page);
- if (!PageUptodate(page))
- ret = -EIO;
- }
-
- return ret;
-
-unlock_exit:
- i = start_i;
- while (locked_pages > 0) {
- page = extent_buffer_page(eb, i);
- i++;
- unlock_page(page);
- locked_pages--;
- }
- return ret;
-}
-
-void read_extent_buffer(struct extent_buffer *eb, void *dstv,
- unsigned long start,
- unsigned long len)
-{
- size_t cur;
- size_t offset;
- struct page *page;
- char *kaddr;
- char *dst = (char *)dstv;
- size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
- unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
-
- WARN_ON(start > eb->len);
- WARN_ON(start + len > eb->start + eb->len);
-
- offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1);
-
- while (len > 0) {
- page = extent_buffer_page(eb, i);
-
- cur = min(len, (PAGE_CACHE_SIZE - offset));
- kaddr = page_address(page);
- memcpy(dst, kaddr + offset, cur);
-
- dst += cur;
- len -= cur;
- offset = 0;
- i++;
- }
-}
-
-int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start,
- unsigned long min_len, char **map,
- unsigned long *map_start,
- unsigned long *map_len)
-{
- size_t offset = start & (PAGE_CACHE_SIZE - 1);
- char *kaddr;
- struct page *p;
- size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
- unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
- unsigned long end_i = (start_offset + start + min_len - 1) >>
- PAGE_CACHE_SHIFT;
-
- if (i != end_i)
- return -EINVAL;
-
- if (i == 0) {
- offset = start_offset;
- *map_start = 0;
- } else {
- offset = 0;
- *map_start = ((u64)i << PAGE_CACHE_SHIFT) - start_offset;
- }
-
- if (start + min_len > eb->len) {
- printk(KERN_ERR "btrfs bad mapping eb start %llu len %lu, "
- "wanted %lu %lu\n", (unsigned long long)eb->start,
- eb->len, start, min_len);
- WARN_ON(1);
- return -EINVAL;
- }
-
- p = extent_buffer_page(eb, i);
- kaddr = page_address(p);
- *map = kaddr + offset;
- *map_len = PAGE_CACHE_SIZE - offset;
- return 0;
-}
-
-int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv,
- unsigned long start,
- unsigned long len)
-{
- size_t cur;
- size_t offset;
- struct page *page;
- char *kaddr;
- char *ptr = (char *)ptrv;
- size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
- unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
- int ret = 0;
-
- WARN_ON(start > eb->len);
- WARN_ON(start + len > eb->start + eb->len);
-
- offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1);
-
- while (len > 0) {
- page = extent_buffer_page(eb, i);
-
- cur = min(len, (PAGE_CACHE_SIZE - offset));
-
- kaddr = page_address(page);
- ret = memcmp(ptr, kaddr + offset, cur);
- if (ret)
- break;
-
- ptr += cur;
- len -= cur;
- offset = 0;
- i++;
- }
- return ret;
-}
-
-void write_extent_buffer(struct extent_buffer *eb, const void *srcv,
- unsigned long start, unsigned long len)
-{
- size_t cur;
- size_t offset;
- struct page *page;
- char *kaddr;
- char *src = (char *)srcv;
- size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
- unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
-
- WARN_ON(start > eb->len);
- WARN_ON(start + len > eb->start + eb->len);
-
- offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1);
-
- while (len > 0) {
- page = extent_buffer_page(eb, i);
- WARN_ON(!PageUptodate(page));
-
- cur = min(len, PAGE_CACHE_SIZE - offset);
- kaddr = page_address(page);
- memcpy(kaddr + offset, src, cur);
-
- src += cur;
- len -= cur;
- offset = 0;
- i++;
- }
-}
-
-void memset_extent_buffer(struct extent_buffer *eb, char c,
- unsigned long start, unsigned long len)
-{
- size_t cur;
- size_t offset;
- struct page *page;
- char *kaddr;
- size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
- unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
-
- WARN_ON(start > eb->len);
- WARN_ON(start + len > eb->start + eb->len);
-
- offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1);
-
- while (len > 0) {
- page = extent_buffer_page(eb, i);
- WARN_ON(!PageUptodate(page));
-
- cur = min(len, PAGE_CACHE_SIZE - offset);
- kaddr = page_address(page);
- memset(kaddr + offset, c, cur);
-
- len -= cur;
- offset = 0;
- i++;
- }
-}
-
-void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src,
- unsigned long dst_offset, unsigned long src_offset,
- unsigned long len)
-{
- u64 dst_len = dst->len;
- size_t cur;
- size_t offset;
- struct page *page;
- char *kaddr;
- size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1);
- unsigned long i = (start_offset + dst_offset) >> PAGE_CACHE_SHIFT;
-
- WARN_ON(src->len != dst_len);
-
- offset = (start_offset + dst_offset) &
- ((unsigned long)PAGE_CACHE_SIZE - 1);
-
- while (len > 0) {
- page = extent_buffer_page(dst, i);
- WARN_ON(!PageUptodate(page));
-
- cur = min(len, (unsigned long)(PAGE_CACHE_SIZE - offset));
-
- kaddr = page_address(page);
- read_extent_buffer(src, kaddr + offset, src_offset, cur);
-
- src_offset += cur;
- len -= cur;
- offset = 0;
- i++;
- }
-}
-
-static void move_pages(struct page *dst_page, struct page *src_page,
- unsigned long dst_off, unsigned long src_off,
- unsigned long len)
-{
- char *dst_kaddr = page_address(dst_page);
- if (dst_page == src_page) {
- memmove(dst_kaddr + dst_off, dst_kaddr + src_off, len);
- } else {
- char *src_kaddr = page_address(src_page);
- char *p = dst_kaddr + dst_off + len;
- char *s = src_kaddr + src_off + len;
-
- while (len--)
- *--p = *--s;
- }
-}
-
-static inline bool areas_overlap(unsigned long src, unsigned long dst, unsigned long len)
-{
- unsigned long distance = (src > dst) ? src - dst : dst - src;
- return distance < len;
-}
-
-static void copy_pages(struct page *dst_page, struct page *src_page,
- unsigned long dst_off, unsigned long src_off,
- unsigned long len)
-{
- char *dst_kaddr = page_address(dst_page);
- char *src_kaddr;
- int must_memmove = 0;
-
- if (dst_page != src_page) {
- src_kaddr = page_address(src_page);
- } else {
- src_kaddr = dst_kaddr;
- if (areas_overlap(src_off, dst_off, len))
- must_memmove = 1;
- }
-
- if (must_memmove)
- memmove(dst_kaddr + dst_off, src_kaddr + src_off, len);
- else
- memcpy(dst_kaddr + dst_off, src_kaddr + src_off, len);
-}
-
-void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
- unsigned long src_offset, unsigned long len)
-{
- size_t cur;
- size_t dst_off_in_page;
- size_t src_off_in_page;
- size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1);
- unsigned long dst_i;
- unsigned long src_i;
-
- if (src_offset + len > dst->len) {
- printk(KERN_ERR "btrfs memmove bogus src_offset %lu move "
- "len %lu dst len %lu\n", src_offset, len, dst->len);
- BUG_ON(1);
- }
- if (dst_offset + len > dst->len) {
- printk(KERN_ERR "btrfs memmove bogus dst_offset %lu move "
- "len %lu dst len %lu\n", dst_offset, len, dst->len);
- BUG_ON(1);
- }
-
- while (len > 0) {
- dst_off_in_page = (start_offset + dst_offset) &
- ((unsigned long)PAGE_CACHE_SIZE - 1);
- src_off_in_page = (start_offset + src_offset) &
- ((unsigned long)PAGE_CACHE_SIZE - 1);
-
- dst_i = (start_offset + dst_offset) >> PAGE_CACHE_SHIFT;
- src_i = (start_offset + src_offset) >> PAGE_CACHE_SHIFT;
-
- cur = min(len, (unsigned long)(PAGE_CACHE_SIZE -
- src_off_in_page));
- cur = min_t(unsigned long, cur,
- (unsigned long)(PAGE_CACHE_SIZE - dst_off_in_page));
-
- copy_pages(extent_buffer_page(dst, dst_i),
- extent_buffer_page(dst, src_i),
- dst_off_in_page, src_off_in_page, cur);
-
- src_offset += cur;
- dst_offset += cur;
- len -= cur;
- }
-}
-
-void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
- unsigned long src_offset, unsigned long len)
-{
- size_t cur;
- size_t dst_off_in_page;
- size_t src_off_in_page;
- unsigned long dst_end = dst_offset + len - 1;
- unsigned long src_end = src_offset + len - 1;
- size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1);
- unsigned long dst_i;
- unsigned long src_i;
-
- if (src_offset + len > dst->len) {
- printk(KERN_ERR "btrfs memmove bogus src_offset %lu move "
- "len %lu len %lu\n", src_offset, len, dst->len);
- BUG_ON(1);
- }
- if (dst_offset + len > dst->len) {
- printk(KERN_ERR "btrfs memmove bogus dst_offset %lu move "
- "len %lu len %lu\n", dst_offset, len, dst->len);
- BUG_ON(1);
- }
- if (dst_offset < src_offset) {
- memcpy_extent_buffer(dst, dst_offset, src_offset, len);
- return;
- }
- while (len > 0) {
- dst_i = (start_offset + dst_end) >> PAGE_CACHE_SHIFT;
- src_i = (start_offset + src_end) >> PAGE_CACHE_SHIFT;
-
- dst_off_in_page = (start_offset + dst_end) &
- ((unsigned long)PAGE_CACHE_SIZE - 1);
- src_off_in_page = (start_offset + src_end) &
- ((unsigned long)PAGE_CACHE_SIZE - 1);
-
- cur = min_t(unsigned long, len, src_off_in_page + 1);
- cur = min(cur, dst_off_in_page + 1);
- move_pages(extent_buffer_page(dst, dst_i),
- extent_buffer_page(dst, src_i),
- dst_off_in_page - cur + 1,
- src_off_in_page - cur + 1, cur);
-
- dst_end -= cur;
- src_end -= cur;
- len -= cur;
- }
-}
-
-int try_release_extent_buffer(struct page *page, gfp_t mask)
-{
- struct extent_buffer *eb;
-
- /*
- * We need to make sure noboody is attaching this page to an eb right
- * now.
- */
- spin_lock(&page->mapping->private_lock);
- if (!PagePrivate(page)) {
- spin_unlock(&page->mapping->private_lock);
- return 1;
- }
-
- eb = (struct extent_buffer *)page->private;
- BUG_ON(!eb);
-
- /*
- * This is a little awful but should be ok, we need to make sure that
- * the eb doesn't disappear out from under us while we're looking at
- * this page.
- */
- spin_lock(&eb->refs_lock);
- if (atomic_read(&eb->refs) != 1 || extent_buffer_under_io(eb)) {
- spin_unlock(&eb->refs_lock);
- spin_unlock(&page->mapping->private_lock);
- return 0;
- }
- spin_unlock(&page->mapping->private_lock);
-
- if ((mask & GFP_NOFS) == GFP_NOFS)
- mask = GFP_NOFS;
-
- /*
- * If tree ref isn't set then we know the ref on this eb is a real ref,
- * so just return, this page will likely be freed soon anyway.
- */
- if (!test_and_clear_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags)) {
- spin_unlock(&eb->refs_lock);
- return 0;
- }
- release_extent_buffer(eb, mask);
-
- return 1;
-}
diff --git a/ANDROID_3.4.5/fs/btrfs/extent_io.h b/ANDROID_3.4.5/fs/btrfs/extent_io.h
deleted file mode 100644
index b516c3b8..00000000
--- a/ANDROID_3.4.5/fs/btrfs/extent_io.h
+++ /dev/null
@@ -1,331 +0,0 @@
-#ifndef __EXTENTIO__
-#define __EXTENTIO__
-
-#include <linux/rbtree.h>
-
-/* bits for the extent state */
-#define EXTENT_DIRTY 1
-#define EXTENT_WRITEBACK (1 << 1)
-#define EXTENT_UPTODATE (1 << 2)
-#define EXTENT_LOCKED (1 << 3)
-#define EXTENT_NEW (1 << 4)
-#define EXTENT_DELALLOC (1 << 5)
-#define EXTENT_DEFRAG (1 << 6)
-#define EXTENT_DEFRAG_DONE (1 << 7)
-#define EXTENT_BUFFER_FILLED (1 << 8)
-#define EXTENT_BOUNDARY (1 << 9)
-#define EXTENT_NODATASUM (1 << 10)
-#define EXTENT_DO_ACCOUNTING (1 << 11)
-#define EXTENT_FIRST_DELALLOC (1 << 12)
-#define EXTENT_NEED_WAIT (1 << 13)
-#define EXTENT_DAMAGED (1 << 14)
-#define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK)
-#define EXTENT_CTLBITS (EXTENT_DO_ACCOUNTING | EXTENT_FIRST_DELALLOC)
-
-/*
- * flags for bio submission. The high bits indicate the compression
- * type for this bio
- */
-#define EXTENT_BIO_COMPRESSED 1
-#define EXTENT_BIO_FLAG_SHIFT 16
-
-/* these are bit numbers for test/set bit */
-#define EXTENT_BUFFER_UPTODATE 0
-#define EXTENT_BUFFER_BLOCKING 1
-#define EXTENT_BUFFER_DIRTY 2
-#define EXTENT_BUFFER_CORRUPT 3
-#define EXTENT_BUFFER_READAHEAD 4 /* this got triggered by readahead */
-#define EXTENT_BUFFER_TREE_REF 5
-#define EXTENT_BUFFER_STALE 6
-#define EXTENT_BUFFER_WRITEBACK 7
-#define EXTENT_BUFFER_IOERR 8
-
-/* these are flags for extent_clear_unlock_delalloc */
-#define EXTENT_CLEAR_UNLOCK_PAGE 0x1
-#define EXTENT_CLEAR_UNLOCK 0x2
-#define EXTENT_CLEAR_DELALLOC 0x4
-#define EXTENT_CLEAR_DIRTY 0x8
-#define EXTENT_SET_WRITEBACK 0x10
-#define EXTENT_END_WRITEBACK 0x20
-#define EXTENT_SET_PRIVATE2 0x40
-#define EXTENT_CLEAR_ACCOUNTING 0x80
-
-/*
- * page->private values. Every page that is controlled by the extent
- * map has page->private set to one.
- */
-#define EXTENT_PAGE_PRIVATE 1
-#define EXTENT_PAGE_PRIVATE_FIRST_PAGE 3
-
-struct extent_state;
-struct btrfs_root;
-
-typedef int (extent_submit_bio_hook_t)(struct inode *inode, int rw,
- struct bio *bio, int mirror_num,
- unsigned long bio_flags, u64 bio_offset);
-struct extent_io_ops {
- int (*fill_delalloc)(struct inode *inode, struct page *locked_page,
- u64 start, u64 end, int *page_started,
- unsigned long *nr_written);
- int (*writepage_start_hook)(struct page *page, u64 start, u64 end);
- int (*writepage_io_hook)(struct page *page, u64 start, u64 end);
- extent_submit_bio_hook_t *submit_bio_hook;
- int (*merge_bio_hook)(struct page *page, unsigned long offset,
- size_t size, struct bio *bio,
- unsigned long bio_flags);
- int (*readpage_io_hook)(struct page *page, u64 start, u64 end);
- int (*readpage_io_failed_hook)(struct page *page, int failed_mirror);
- int (*writepage_io_failed_hook)(struct bio *bio, struct page *page,
- u64 start, u64 end,
- struct extent_state *state);
- int (*readpage_end_io_hook)(struct page *page, u64 start, u64 end,
- struct extent_state *state, int mirror);
- int (*writepage_end_io_hook)(struct page *page, u64 start, u64 end,
- struct extent_state *state, int uptodate);
- void (*set_bit_hook)(struct inode *inode, struct extent_state *state,
- int *bits);
- void (*clear_bit_hook)(struct inode *inode, struct extent_state *state,
- int *bits);
- void (*merge_extent_hook)(struct inode *inode,
- struct extent_state *new,
- struct extent_state *other);
- void (*split_extent_hook)(struct inode *inode,
- struct extent_state *orig, u64 split);
- int (*write_cache_pages_lock_hook)(struct page *page, void *data,
- void (*flush_fn)(void *));
-};
-
-struct extent_io_tree {
- struct rb_root state;
- struct radix_tree_root buffer;
- struct address_space *mapping;
- u64 dirty_bytes;
- int track_uptodate;
- spinlock_t lock;
- spinlock_t buffer_lock;
- struct extent_io_ops *ops;
-};
-
-struct extent_state {
- u64 start;
- u64 end; /* inclusive */
- struct rb_node rb_node;
-
- /* ADD NEW ELEMENTS AFTER THIS */
- struct extent_io_tree *tree;
- wait_queue_head_t wq;
- atomic_t refs;
- unsigned long state;
-
- /* for use by the FS */
- u64 private;
-
- struct list_head leak_list;
-};
-
-#define INLINE_EXTENT_BUFFER_PAGES 16
-#define MAX_INLINE_EXTENT_BUFFER_SIZE (INLINE_EXTENT_BUFFER_PAGES * PAGE_CACHE_SIZE)
-struct extent_buffer {
- u64 start;
- unsigned long len;
- unsigned long map_start;
- unsigned long map_len;
- unsigned long bflags;
- struct extent_io_tree *tree;
- spinlock_t refs_lock;
- atomic_t refs;
- atomic_t io_pages;
- int read_mirror;
- struct list_head leak_list;
- struct rcu_head rcu_head;
- pid_t lock_owner;
-
- /* count of read lock holders on the extent buffer */
- atomic_t write_locks;
- atomic_t read_locks;
- atomic_t blocking_writers;
- atomic_t blocking_readers;
- atomic_t spinning_readers;
- atomic_t spinning_writers;
- int lock_nested;
-
- /* protects write locks */
- rwlock_t lock;
-
- /* readers use lock_wq while they wait for the write
- * lock holders to unlock
- */
- wait_queue_head_t write_lock_wq;
-
- /* writers use read_lock_wq while they wait for readers
- * to unlock
- */
- wait_queue_head_t read_lock_wq;
- wait_queue_head_t lock_wq;
- struct page *inline_pages[INLINE_EXTENT_BUFFER_PAGES];
- struct page **pages;
-};
-
-static inline void extent_set_compress_type(unsigned long *bio_flags,
- int compress_type)
-{
- *bio_flags |= compress_type << EXTENT_BIO_FLAG_SHIFT;
-}
-
-static inline int extent_compress_type(unsigned long bio_flags)
-{
- return bio_flags >> EXTENT_BIO_FLAG_SHIFT;
-}
-
-struct extent_map_tree;
-
-typedef struct extent_map *(get_extent_t)(struct inode *inode,
- struct page *page,
- size_t pg_offset,
- u64 start, u64 len,
- int create);
-
-void extent_io_tree_init(struct extent_io_tree *tree,
- struct address_space *mapping);
-int try_release_extent_mapping(struct extent_map_tree *map,
- struct extent_io_tree *tree, struct page *page,
- gfp_t mask);
-int try_release_extent_buffer(struct page *page, gfp_t mask);
-int try_release_extent_state(struct extent_map_tree *map,
- struct extent_io_tree *tree, struct page *page,
- gfp_t mask);
-int lock_extent(struct extent_io_tree *tree, u64 start, u64 end);
-int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
- int bits, struct extent_state **cached);
-int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end);
-int unlock_extent_cached(struct extent_io_tree *tree, u64 start, u64 end,
- struct extent_state **cached, gfp_t mask);
-int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end);
-int extent_read_full_page(struct extent_io_tree *tree, struct page *page,
- get_extent_t *get_extent, int mirror_num);
-int __init extent_io_init(void);
-void extent_io_exit(void);
-
-u64 count_range_bits(struct extent_io_tree *tree,
- u64 *start, u64 search_end,
- u64 max_bytes, unsigned long bits, int contig);
-
-void free_extent_state(struct extent_state *state);
-int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
- int bits, int filled, struct extent_state *cached_state);
-int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
- int bits, gfp_t mask);
-int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
- int bits, int wake, int delete, struct extent_state **cached,
- gfp_t mask);
-int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
- int bits, gfp_t mask);
-int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
- int bits, u64 *failed_start,
- struct extent_state **cached_state, gfp_t mask);
-int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
- struct extent_state **cached_state, gfp_t mask);
-int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end,
- gfp_t mask);
-int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
- gfp_t mask);
-int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
- gfp_t mask);
-int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
- int bits, int clear_bits, gfp_t mask);
-int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end,
- struct extent_state **cached_state, gfp_t mask);
-int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
- u64 *start_ret, u64 *end_ret, int bits);
-struct extent_state *find_first_extent_bit_state(struct extent_io_tree *tree,
- u64 start, int bits);
-int extent_invalidatepage(struct extent_io_tree *tree,
- struct page *page, unsigned long offset);
-int extent_write_full_page(struct extent_io_tree *tree, struct page *page,
- get_extent_t *get_extent,
- struct writeback_control *wbc);
-int extent_write_locked_range(struct extent_io_tree *tree, struct inode *inode,
- u64 start, u64 end, get_extent_t *get_extent,
- int mode);
-int extent_writepages(struct extent_io_tree *tree,
- struct address_space *mapping,
- get_extent_t *get_extent,
- struct writeback_control *wbc);
-int btree_write_cache_pages(struct address_space *mapping,
- struct writeback_control *wbc);
-int extent_readpages(struct extent_io_tree *tree,
- struct address_space *mapping,
- struct list_head *pages, unsigned nr_pages,
- get_extent_t get_extent);
-int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
- __u64 start, __u64 len, get_extent_t *get_extent);
-int set_state_private(struct extent_io_tree *tree, u64 start, u64 private);
-int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private);
-void set_page_extent_mapped(struct page *page);
-
-struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
- u64 start, unsigned long len);
-struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree,
- u64 start, unsigned long len);
-void free_extent_buffer(struct extent_buffer *eb);
-void free_extent_buffer_stale(struct extent_buffer *eb);
-#define WAIT_NONE 0
-#define WAIT_COMPLETE 1
-#define WAIT_PAGE_LOCK 2
-int read_extent_buffer_pages(struct extent_io_tree *tree,
- struct extent_buffer *eb, u64 start, int wait,
- get_extent_t *get_extent, int mirror_num);
-unsigned long num_extent_pages(u64 start, u64 len);
-struct page *extent_buffer_page(struct extent_buffer *eb, unsigned long i);
-
-static inline void extent_buffer_get(struct extent_buffer *eb)
-{
- atomic_inc(&eb->refs);
-}
-
-int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv,
- unsigned long start,
- unsigned long len);
-void read_extent_buffer(struct extent_buffer *eb, void *dst,
- unsigned long start,
- unsigned long len);
-void write_extent_buffer(struct extent_buffer *eb, const void *src,
- unsigned long start, unsigned long len);
-void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src,
- unsigned long dst_offset, unsigned long src_offset,
- unsigned long len);
-void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
- unsigned long src_offset, unsigned long len);
-void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
- unsigned long src_offset, unsigned long len);
-void memset_extent_buffer(struct extent_buffer *eb, char c,
- unsigned long start, unsigned long len);
-void wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits);
-void clear_extent_buffer_dirty(struct extent_buffer *eb);
-int set_extent_buffer_dirty(struct extent_buffer *eb);
-int set_extent_buffer_uptodate(struct extent_buffer *eb);
-int clear_extent_buffer_uptodate(struct extent_buffer *eb);
-int extent_buffer_uptodate(struct extent_buffer *eb);
-int map_private_extent_buffer(struct extent_buffer *eb, unsigned long offset,
- unsigned long min_len, char **map,
- unsigned long *map_start,
- unsigned long *map_len);
-int extent_range_uptodate(struct extent_io_tree *tree,
- u64 start, u64 end);
-int extent_clear_unlock_delalloc(struct inode *inode,
- struct extent_io_tree *tree,
- u64 start, u64 end, struct page *locked_page,
- unsigned long op);
-struct bio *
-btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs,
- gfp_t gfp_flags);
-
-struct btrfs_mapping_tree;
-
-int repair_io_failure(struct btrfs_mapping_tree *map_tree, u64 start,
- u64 length, u64 logical, struct page *page,
- int mirror_num);
-int end_extent_writepage(struct page *page, int err, u64 start, u64 end);
-int repair_eb_io_failure(struct btrfs_root *root, struct extent_buffer *eb,
- int mirror_num);
-#endif
diff --git a/ANDROID_3.4.5/fs/btrfs/extent_map.c b/ANDROID_3.4.5/fs/btrfs/extent_map.c
deleted file mode 100644
index 7c97b330..00000000
--- a/ANDROID_3.4.5/fs/btrfs/extent_map.c
+++ /dev/null
@@ -1,363 +0,0 @@
-#include <linux/err.h>
-#include <linux/slab.h>
-#include <linux/module.h>
-#include <linux/spinlock.h>
-#include <linux/hardirq.h>
-#include "ctree.h"
-#include "extent_map.h"
-
-
-static struct kmem_cache *extent_map_cache;
-
-int __init extent_map_init(void)
-{
- extent_map_cache = kmem_cache_create("extent_map",
- sizeof(struct extent_map), 0,
- SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
- if (!extent_map_cache)
- return -ENOMEM;
- return 0;
-}
-
-void extent_map_exit(void)
-{
- if (extent_map_cache)
- kmem_cache_destroy(extent_map_cache);
-}
-
-/**
- * extent_map_tree_init - initialize extent map tree
- * @tree: tree to initialize
- *
- * Initialize the extent tree @tree. Should be called for each new inode
- * or other user of the extent_map interface.
- */
-void extent_map_tree_init(struct extent_map_tree *tree)
-{
- tree->map = RB_ROOT;
- rwlock_init(&tree->lock);
-}
-
-/**
- * alloc_extent_map - allocate new extent map structure
- *
- * Allocate a new extent_map structure. The new structure is
- * returned with a reference count of one and needs to be
- * freed using free_extent_map()
- */
-struct extent_map *alloc_extent_map(void)
-{
- struct extent_map *em;
- em = kmem_cache_alloc(extent_map_cache, GFP_NOFS);
- if (!em)
- return NULL;
- em->in_tree = 0;
- em->flags = 0;
- em->compress_type = BTRFS_COMPRESS_NONE;
- atomic_set(&em->refs, 1);
- return em;
-}
-
-/**
- * free_extent_map - drop reference count of an extent_map
- * @em: extent map beeing releasead
- *
- * Drops the reference out on @em by one and free the structure
- * if the reference count hits zero.
- */
-void free_extent_map(struct extent_map *em)
-{
- if (!em)
- return;
- WARN_ON(atomic_read(&em->refs) == 0);
- if (atomic_dec_and_test(&em->refs)) {
- WARN_ON(em->in_tree);
- kmem_cache_free(extent_map_cache, em);
- }
-}
-
-static struct rb_node *tree_insert(struct rb_root *root, u64 offset,
- struct rb_node *node)
-{
- struct rb_node **p = &root->rb_node;
- struct rb_node *parent = NULL;
- struct extent_map *entry;
-
- while (*p) {
- parent = *p;
- entry = rb_entry(parent, struct extent_map, rb_node);
-
- WARN_ON(!entry->in_tree);
-
- if (offset < entry->start)
- p = &(*p)->rb_left;
- else if (offset >= extent_map_end(entry))
- p = &(*p)->rb_right;
- else
- return parent;
- }
-
- entry = rb_entry(node, struct extent_map, rb_node);
- entry->in_tree = 1;
- rb_link_node(node, parent, p);
- rb_insert_color(node, root);
- return NULL;
-}
-
-/*
- * search through the tree for an extent_map with a given offset. If
- * it can't be found, try to find some neighboring extents
- */
-static struct rb_node *__tree_search(struct rb_root *root, u64 offset,
- struct rb_node **prev_ret,
- struct rb_node **next_ret)
-{
- struct rb_node *n = root->rb_node;
- struct rb_node *prev = NULL;
- struct rb_node *orig_prev = NULL;
- struct extent_map *entry;
- struct extent_map *prev_entry = NULL;
-
- while (n) {
- entry = rb_entry(n, struct extent_map, rb_node);
- prev = n;
- prev_entry = entry;
-
- WARN_ON(!entry->in_tree);
-
- if (offset < entry->start)
- n = n->rb_left;
- else if (offset >= extent_map_end(entry))
- n = n->rb_right;
- else
- return n;
- }
-
- if (prev_ret) {
- orig_prev = prev;
- while (prev && offset >= extent_map_end(prev_entry)) {
- prev = rb_next(prev);
- prev_entry = rb_entry(prev, struct extent_map, rb_node);
- }
- *prev_ret = prev;
- prev = orig_prev;
- }
-
- if (next_ret) {
- prev_entry = rb_entry(prev, struct extent_map, rb_node);
- while (prev && offset < prev_entry->start) {
- prev = rb_prev(prev);
- prev_entry = rb_entry(prev, struct extent_map, rb_node);
- }
- *next_ret = prev;
- }
- return NULL;
-}
-
-/* check to see if two extent_map structs are adjacent and safe to merge */
-static int mergable_maps(struct extent_map *prev, struct extent_map *next)
-{
- if (test_bit(EXTENT_FLAG_PINNED, &prev->flags))
- return 0;
-
- /*
- * don't merge compressed extents, we need to know their
- * actual size
- */
- if (test_bit(EXTENT_FLAG_COMPRESSED, &prev->flags))
- return 0;
-
- if (extent_map_end(prev) == next->start &&
- prev->flags == next->flags &&
- prev->bdev == next->bdev &&
- ((next->block_start == EXTENT_MAP_HOLE &&
- prev->block_start == EXTENT_MAP_HOLE) ||
- (next->block_start == EXTENT_MAP_INLINE &&
- prev->block_start == EXTENT_MAP_INLINE) ||
- (next->block_start == EXTENT_MAP_DELALLOC &&
- prev->block_start == EXTENT_MAP_DELALLOC) ||
- (next->block_start < EXTENT_MAP_LAST_BYTE - 1 &&
- next->block_start == extent_map_block_end(prev)))) {
- return 1;
- }
- return 0;
-}
-
-static void try_merge_map(struct extent_map_tree *tree, struct extent_map *em)
-{
- struct extent_map *merge = NULL;
- struct rb_node *rb;
-
- if (em->start != 0) {
- rb = rb_prev(&em->rb_node);
- if (rb)
- merge = rb_entry(rb, struct extent_map, rb_node);
- if (rb && mergable_maps(merge, em)) {
- em->start = merge->start;
- em->len += merge->len;
- em->block_len += merge->block_len;
- em->block_start = merge->block_start;
- merge->in_tree = 0;
- rb_erase(&merge->rb_node, &tree->map);
- free_extent_map(merge);
- }
- }
-
- rb = rb_next(&em->rb_node);
- if (rb)
- merge = rb_entry(rb, struct extent_map, rb_node);
- if (rb && mergable_maps(em, merge)) {
- em->len += merge->len;
- em->block_len += merge->len;
- rb_erase(&merge->rb_node, &tree->map);
- merge->in_tree = 0;
- free_extent_map(merge);
- }
-}
-
-int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len)
-{
- int ret = 0;
- struct extent_map *em;
-
- write_lock(&tree->lock);
- em = lookup_extent_mapping(tree, start, len);
-
- WARN_ON(!em || em->start != start);
-
- if (!em)
- goto out;
-
- clear_bit(EXTENT_FLAG_PINNED, &em->flags);
-
- try_merge_map(tree, em);
-
- free_extent_map(em);
-out:
- write_unlock(&tree->lock);
- return ret;
-
-}
-
-/**
- * add_extent_mapping - add new extent map to the extent tree
- * @tree: tree to insert new map in
- * @em: map to insert
- *
- * Insert @em into @tree or perform a simple forward/backward merge with
- * existing mappings. The extent_map struct passed in will be inserted
- * into the tree directly, with an additional reference taken, or a
- * reference dropped if the merge attempt was successful.
- */
-int add_extent_mapping(struct extent_map_tree *tree,
- struct extent_map *em)
-{
- int ret = 0;
- struct rb_node *rb;
- struct extent_map *exist;
-
- exist = lookup_extent_mapping(tree, em->start, em->len);
- if (exist) {
- free_extent_map(exist);
- ret = -EEXIST;
- goto out;
- }
- rb = tree_insert(&tree->map, em->start, &em->rb_node);
- if (rb) {
- ret = -EEXIST;
- goto out;
- }
- atomic_inc(&em->refs);
-
- try_merge_map(tree, em);
-out:
- return ret;
-}
-
-/* simple helper to do math around the end of an extent, handling wrap */
-static u64 range_end(u64 start, u64 len)
-{
- if (start + len < start)
- return (u64)-1;
- return start + len;
-}
-
-struct extent_map *__lookup_extent_mapping(struct extent_map_tree *tree,
- u64 start, u64 len, int strict)
-{
- struct extent_map *em;
- struct rb_node *rb_node;
- struct rb_node *prev = NULL;
- struct rb_node *next = NULL;
- u64 end = range_end(start, len);
-
- rb_node = __tree_search(&tree->map, start, &prev, &next);
- if (!rb_node) {
- if (prev)
- rb_node = prev;
- else if (next)
- rb_node = next;
- else
- return NULL;
- }
-
- em = rb_entry(rb_node, struct extent_map, rb_node);
-
- if (strict && !(end > em->start && start < extent_map_end(em)))
- return NULL;
-
- atomic_inc(&em->refs);
- return em;
-}
-
-/**
- * lookup_extent_mapping - lookup extent_map
- * @tree: tree to lookup in
- * @start: byte offset to start the search
- * @len: length of the lookup range
- *
- * Find and return the first extent_map struct in @tree that intersects the
- * [start, len] range. There may be additional objects in the tree that
- * intersect, so check the object returned carefully to make sure that no
- * additional lookups are needed.
- */
-struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree,
- u64 start, u64 len)
-{
- return __lookup_extent_mapping(tree, start, len, 1);
-}
-
-/**
- * search_extent_mapping - find a nearby extent map
- * @tree: tree to lookup in
- * @start: byte offset to start the search
- * @len: length of the lookup range
- *
- * Find and return the first extent_map struct in @tree that intersects the
- * [start, len] range.
- *
- * If one can't be found, any nearby extent may be returned
- */
-struct extent_map *search_extent_mapping(struct extent_map_tree *tree,
- u64 start, u64 len)
-{
- return __lookup_extent_mapping(tree, start, len, 0);
-}
-
-/**
- * remove_extent_mapping - removes an extent_map from the extent tree
- * @tree: extent tree to remove from
- * @em: extent map beeing removed
- *
- * Removes @em from @tree. No reference counts are dropped, and no checks
- * are done to see if the range is in use
- */
-int remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em)
-{
- int ret = 0;
-
- WARN_ON(test_bit(EXTENT_FLAG_PINNED, &em->flags));
- rb_erase(&em->rb_node, &tree->map);
- em->in_tree = 0;
- return ret;
-}
diff --git a/ANDROID_3.4.5/fs/btrfs/extent_map.h b/ANDROID_3.4.5/fs/btrfs/extent_map.h
deleted file mode 100644
index 1195f097..00000000
--- a/ANDROID_3.4.5/fs/btrfs/extent_map.h
+++ /dev/null
@@ -1,66 +0,0 @@
-#ifndef __EXTENTMAP__
-#define __EXTENTMAP__
-
-#include <linux/rbtree.h>
-
-#define EXTENT_MAP_LAST_BYTE (u64)-4
-#define EXTENT_MAP_HOLE (u64)-3
-#define EXTENT_MAP_INLINE (u64)-2
-#define EXTENT_MAP_DELALLOC (u64)-1
-
-/* bits for the flags field */
-#define EXTENT_FLAG_PINNED 0 /* this entry not yet on disk, don't free it */
-#define EXTENT_FLAG_COMPRESSED 1
-#define EXTENT_FLAG_VACANCY 2 /* no file extent item found */
-#define EXTENT_FLAG_PREALLOC 3 /* pre-allocated extent */
-
-struct extent_map {
- struct rb_node rb_node;
-
- /* all of these are in bytes */
- u64 start;
- u64 len;
- u64 orig_start;
- u64 block_start;
- u64 block_len;
- unsigned long flags;
- struct block_device *bdev;
- atomic_t refs;
- unsigned int in_tree;
- unsigned int compress_type;
-};
-
-struct extent_map_tree {
- struct rb_root map;
- rwlock_t lock;
-};
-
-static inline u64 extent_map_end(struct extent_map *em)
-{
- if (em->start + em->len < em->start)
- return (u64)-1;
- return em->start + em->len;
-}
-
-static inline u64 extent_map_block_end(struct extent_map *em)
-{
- if (em->block_start + em->block_len < em->block_start)
- return (u64)-1;
- return em->block_start + em->block_len;
-}
-
-void extent_map_tree_init(struct extent_map_tree *tree);
-struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree,
- u64 start, u64 len);
-int add_extent_mapping(struct extent_map_tree *tree,
- struct extent_map *em);
-int remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em);
-
-struct extent_map *alloc_extent_map(void);
-void free_extent_map(struct extent_map *em);
-int __init extent_map_init(void);
-void extent_map_exit(void);
-int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len);
-struct extent_map *search_extent_mapping(struct extent_map_tree *tree,
- u64 start, u64 len);
-#endif
diff --git a/ANDROID_3.4.5/fs/btrfs/file-item.c b/ANDROID_3.4.5/fs/btrfs/file-item.c
deleted file mode 100644
index 5d158d32..00000000
--- a/ANDROID_3.4.5/fs/btrfs/file-item.c
+++ /dev/null
@@ -1,861 +0,0 @@
-/*
- * Copyright (C) 2007 Oracle. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License v2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
- */
-
-#include <linux/bio.h>
-#include <linux/slab.h>
-#include <linux/pagemap.h>
-#include <linux/highmem.h>
-#include "ctree.h"
-#include "disk-io.h"
-#include "transaction.h"
-#include "print-tree.h"
-
-#define __MAX_CSUM_ITEMS(r, size) ((((BTRFS_LEAF_DATA_SIZE(r) - \
- sizeof(struct btrfs_item) * 2) / \
- size) - 1))
-
-#define MAX_CSUM_ITEMS(r, size) (min(__MAX_CSUM_ITEMS(r, size), PAGE_CACHE_SIZE))
-
-#define MAX_ORDERED_SUM_BYTES(r) ((PAGE_SIZE - \
- sizeof(struct btrfs_ordered_sum)) / \
- sizeof(struct btrfs_sector_sum) * \
- (r)->sectorsize - (r)->sectorsize)
-
-int btrfs_insert_file_extent(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- u64 objectid, u64 pos,
- u64 disk_offset, u64 disk_num_bytes,
- u64 num_bytes, u64 offset, u64 ram_bytes,
- u8 compression, u8 encryption, u16 other_encoding)
-{
- int ret = 0;
- struct btrfs_file_extent_item *item;
- struct btrfs_key file_key;
- struct btrfs_path *path;
- struct extent_buffer *leaf;
-
- path = btrfs_alloc_path();
- if (!path)
- return -ENOMEM;
- file_key.objectid = objectid;
- file_key.offset = pos;
- btrfs_set_key_type(&file_key, BTRFS_EXTENT_DATA_KEY);
-
- path->leave_spinning = 1;
- ret = btrfs_insert_empty_item(trans, root, path, &file_key,
- sizeof(*item));
- if (ret < 0)
- goto out;
- BUG_ON(ret); /* Can't happen */
- leaf = path->nodes[0];
- item = btrfs_item_ptr(leaf, path->slots[0],
- struct btrfs_file_extent_item);
- btrfs_set_file_extent_disk_bytenr(leaf, item, disk_offset);
- btrfs_set_file_extent_disk_num_bytes(leaf, item, disk_num_bytes);
- btrfs_set_file_extent_offset(leaf, item, offset);
- btrfs_set_file_extent_num_bytes(leaf, item, num_bytes);
- btrfs_set_file_extent_ram_bytes(leaf, item, ram_bytes);
- btrfs_set_file_extent_generation(leaf, item, trans->transid);
- btrfs_set_file_extent_type(leaf, item, BTRFS_FILE_EXTENT_REG);
- btrfs_set_file_extent_compression(leaf, item, compression);
- btrfs_set_file_extent_encryption(leaf, item, encryption);
- btrfs_set_file_extent_other_encoding(leaf, item, other_encoding);
-
- btrfs_mark_buffer_dirty(leaf);
-out:
- btrfs_free_path(path);
- return ret;
-}
-
-struct btrfs_csum_item *btrfs_lookup_csum(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_path *path,
- u64 bytenr, int cow)
-{
- int ret;
- struct btrfs_key file_key;
- struct btrfs_key found_key;
- struct btrfs_csum_item *item;
- struct extent_buffer *leaf;
- u64 csum_offset = 0;
- u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
- int csums_in_item;
-
- file_key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
- file_key.offset = bytenr;
- btrfs_set_key_type(&file_key, BTRFS_EXTENT_CSUM_KEY);
- ret = btrfs_search_slot(trans, root, &file_key, path, 0, cow);
- if (ret < 0)
- goto fail;
- leaf = path->nodes[0];
- if (ret > 0) {
- ret = 1;
- if (path->slots[0] == 0)
- goto fail;
- path->slots[0]--;
- btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
- if (btrfs_key_type(&found_key) != BTRFS_EXTENT_CSUM_KEY)
- goto fail;
-
- csum_offset = (bytenr - found_key.offset) >>
- root->fs_info->sb->s_blocksize_bits;
- csums_in_item = btrfs_item_size_nr(leaf, path->slots[0]);
- csums_in_item /= csum_size;
-
- if (csum_offset >= csums_in_item) {
- ret = -EFBIG;
- goto fail;
- }
- }
- item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_csum_item);
- item = (struct btrfs_csum_item *)((unsigned char *)item +
- csum_offset * csum_size);
- return item;
-fail:
- if (ret > 0)
- ret = -ENOENT;
- return ERR_PTR(ret);
-}
-
-
-int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_path *path, u64 objectid,
- u64 offset, int mod)
-{
- int ret;
- struct btrfs_key file_key;
- int ins_len = mod < 0 ? -1 : 0;
- int cow = mod != 0;
-
- file_key.objectid = objectid;
- file_key.offset = offset;
- btrfs_set_key_type(&file_key, BTRFS_EXTENT_DATA_KEY);
- ret = btrfs_search_slot(trans, root, &file_key, path, ins_len, cow);
- return ret;
-}
-
-
-static int __btrfs_lookup_bio_sums(struct btrfs_root *root,
- struct inode *inode, struct bio *bio,
- u64 logical_offset, u32 *dst, int dio)
-{
- u32 sum;
- struct bio_vec *bvec = bio->bi_io_vec;
- int bio_index = 0;
- u64 offset = 0;
- u64 item_start_offset = 0;
- u64 item_last_offset = 0;
- u64 disk_bytenr;
- u32 diff;
- u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
- int ret;
- struct btrfs_path *path;
- struct btrfs_csum_item *item = NULL;
- struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
-
- path = btrfs_alloc_path();
- if (!path)
- return -ENOMEM;
- if (bio->bi_size > PAGE_CACHE_SIZE * 8)
- path->reada = 2;
-
- WARN_ON(bio->bi_vcnt <= 0);
-
- /*
- * the free space stuff is only read when it hasn't been
- * updated in the current transaction. So, we can safely
- * read from the commit root and sidestep a nasty deadlock
- * between reading the free space cache and updating the csum tree.
- */
- if (btrfs_is_free_space_inode(root, inode)) {
- path->search_commit_root = 1;
- path->skip_locking = 1;
- }
-
- disk_bytenr = (u64)bio->bi_sector << 9;
- if (dio)
- offset = logical_offset;
- while (bio_index < bio->bi_vcnt) {
- if (!dio)
- offset = page_offset(bvec->bv_page) + bvec->bv_offset;
- ret = btrfs_find_ordered_sum(inode, offset, disk_bytenr, &sum);
- if (ret == 0)
- goto found;
-
- if (!item || disk_bytenr < item_start_offset ||
- disk_bytenr >= item_last_offset) {
- struct btrfs_key found_key;
- u32 item_size;
-
- if (item)
- btrfs_release_path(path);
- item = btrfs_lookup_csum(NULL, root->fs_info->csum_root,
- path, disk_bytenr, 0);
- if (IS_ERR(item)) {
- ret = PTR_ERR(item);
- if (ret == -ENOENT || ret == -EFBIG)
- ret = 0;
- sum = 0;
- if (BTRFS_I(inode)->root->root_key.objectid ==
- BTRFS_DATA_RELOC_TREE_OBJECTID) {
- set_extent_bits(io_tree, offset,
- offset + bvec->bv_len - 1,
- EXTENT_NODATASUM, GFP_NOFS);
- } else {
- printk(KERN_INFO "btrfs no csum found "
- "for inode %llu start %llu\n",
- (unsigned long long)
- btrfs_ino(inode),
- (unsigned long long)offset);
- }
- item = NULL;
- btrfs_release_path(path);
- goto found;
- }
- btrfs_item_key_to_cpu(path->nodes[0], &found_key,
- path->slots[0]);
-
- item_start_offset = found_key.offset;
- item_size = btrfs_item_size_nr(path->nodes[0],
- path->slots[0]);
- item_last_offset = item_start_offset +
- (item_size / csum_size) *
- root->sectorsize;
- item = btrfs_item_ptr(path->nodes[0], path->slots[0],
- struct btrfs_csum_item);
- }
- /*
- * this byte range must be able to fit inside
- * a single leaf so it will also fit inside a u32
- */
- diff = disk_bytenr - item_start_offset;
- diff = diff / root->sectorsize;
- diff = diff * csum_size;
-
- read_extent_buffer(path->nodes[0], &sum,
- ((unsigned long)item) + diff,
- csum_size);
-found:
- if (dst)
- *dst++ = sum;
- else
- set_state_private(io_tree, offset, sum);
- disk_bytenr += bvec->bv_len;
- offset += bvec->bv_len;
- bio_index++;
- bvec++;
- }
- btrfs_free_path(path);
- return 0;
-}
-
-int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode,
- struct bio *bio, u32 *dst)
-{
- return __btrfs_lookup_bio_sums(root, inode, bio, 0, dst, 0);
-}
-
-int btrfs_lookup_bio_sums_dio(struct btrfs_root *root, struct inode *inode,
- struct bio *bio, u64 offset, u32 *dst)
-{
- return __btrfs_lookup_bio_sums(root, inode, bio, offset, dst, 1);
-}
-
-int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
- struct list_head *list, int search_commit)
-{
- struct btrfs_key key;
- struct btrfs_path *path;
- struct extent_buffer *leaf;
- struct btrfs_ordered_sum *sums;
- struct btrfs_sector_sum *sector_sum;
- struct btrfs_csum_item *item;
- LIST_HEAD(tmplist);
- unsigned long offset;
- int ret;
- size_t size;
- u64 csum_end;
- u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
-
- path = btrfs_alloc_path();
- if (!path)
- return -ENOMEM;
-
- if (search_commit) {
- path->skip_locking = 1;
- path->reada = 2;
- path->search_commit_root = 1;
- }
-
- key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
- key.offset = start;
- key.type = BTRFS_EXTENT_CSUM_KEY;
-
- ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
- if (ret < 0)
- goto fail;
- if (ret > 0 && path->slots[0] > 0) {
- leaf = path->nodes[0];
- btrfs_item_key_to_cpu(leaf, &key, path->slots[0] - 1);
- if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
- key.type == BTRFS_EXTENT_CSUM_KEY) {
- offset = (start - key.offset) >>
- root->fs_info->sb->s_blocksize_bits;
- if (offset * csum_size <
- btrfs_item_size_nr(leaf, path->slots[0] - 1))
- path->slots[0]--;
- }
- }
-
- while (start <= end) {
- leaf = path->nodes[0];
- if (path->slots[0] >= btrfs_header_nritems(leaf)) {
- ret = btrfs_next_leaf(root, path);
- if (ret < 0)
- goto fail;
- if (ret > 0)
- break;
- leaf = path->nodes[0];
- }
-
- btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
- if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
- key.type != BTRFS_EXTENT_CSUM_KEY)
- break;
-
- btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
- if (key.offset > end)
- break;
-
- if (key.offset > start)
- start = key.offset;
-
- size = btrfs_item_size_nr(leaf, path->slots[0]);
- csum_end = key.offset + (size / csum_size) * root->sectorsize;
- if (csum_end <= start) {
- path->slots[0]++;
- continue;
- }
-
- csum_end = min(csum_end, end + 1);
- item = btrfs_item_ptr(path->nodes[0], path->slots[0],
- struct btrfs_csum_item);
- while (start < csum_end) {
- size = min_t(size_t, csum_end - start,
- MAX_ORDERED_SUM_BYTES(root));
- sums = kzalloc(btrfs_ordered_sum_size(root, size),
- GFP_NOFS);
- if (!sums) {
- ret = -ENOMEM;
- goto fail;
- }
-
- sector_sum = sums->sums;
- sums->bytenr = start;
- sums->len = size;
-
- offset = (start - key.offset) >>
- root->fs_info->sb->s_blocksize_bits;
- offset *= csum_size;
-
- while (size > 0) {
- read_extent_buffer(path->nodes[0],
- &sector_sum->sum,
- ((unsigned long)item) +
- offset, csum_size);
- sector_sum->bytenr = start;
-
- size -= root->sectorsize;
- start += root->sectorsize;
- offset += csum_size;
- sector_sum++;
- }
- list_add_tail(&sums->list, &tmplist);
- }
- path->slots[0]++;
- }
- ret = 0;
-fail:
- while (ret < 0 && !list_empty(&tmplist)) {
- sums = list_entry(&tmplist, struct btrfs_ordered_sum, list);
- list_del(&sums->list);
- kfree(sums);
- }
- list_splice_tail(&tmplist, list);
-
- btrfs_free_path(path);
- return ret;
-}
-
-int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode,
- struct bio *bio, u64 file_start, int contig)
-{
- struct btrfs_ordered_sum *sums;
- struct btrfs_sector_sum *sector_sum;
- struct btrfs_ordered_extent *ordered;
- char *data;
- struct bio_vec *bvec = bio->bi_io_vec;
- int bio_index = 0;
- unsigned long total_bytes = 0;
- unsigned long this_sum_bytes = 0;
- u64 offset;
- u64 disk_bytenr;
-
- WARN_ON(bio->bi_vcnt <= 0);
- sums = kzalloc(btrfs_ordered_sum_size(root, bio->bi_size), GFP_NOFS);
- if (!sums)
- return -ENOMEM;
-
- sector_sum = sums->sums;
- disk_bytenr = (u64)bio->bi_sector << 9;
- sums->len = bio->bi_size;
- INIT_LIST_HEAD(&sums->list);
-
- if (contig)
- offset = file_start;
- else
- offset = page_offset(bvec->bv_page) + bvec->bv_offset;
-
- ordered = btrfs_lookup_ordered_extent(inode, offset);
- BUG_ON(!ordered); /* Logic error */
- sums->bytenr = ordered->start;
-
- while (bio_index < bio->bi_vcnt) {
- if (!contig)
- offset = page_offset(bvec->bv_page) + bvec->bv_offset;
-
- if (!contig && (offset >= ordered->file_offset + ordered->len ||
- offset < ordered->file_offset)) {
- unsigned long bytes_left;
- sums->len = this_sum_bytes;
- this_sum_bytes = 0;
- btrfs_add_ordered_sum(inode, ordered, sums);
- btrfs_put_ordered_extent(ordered);
-
- bytes_left = bio->bi_size - total_bytes;
-
- sums = kzalloc(btrfs_ordered_sum_size(root, bytes_left),
- GFP_NOFS);
- BUG_ON(!sums); /* -ENOMEM */
- sector_sum = sums->sums;
- sums->len = bytes_left;
- ordered = btrfs_lookup_ordered_extent(inode, offset);
- BUG_ON(!ordered); /* Logic error */
- sums->bytenr = ordered->start;
- }
-
- data = kmap_atomic(bvec->bv_page);
- sector_sum->sum = ~(u32)0;
- sector_sum->sum = btrfs_csum_data(root,
- data + bvec->bv_offset,
- sector_sum->sum,
- bvec->bv_len);
- kunmap_atomic(data);
- btrfs_csum_final(sector_sum->sum,
- (char *)&sector_sum->sum);
- sector_sum->bytenr = disk_bytenr;
-
- sector_sum++;
- bio_index++;
- total_bytes += bvec->bv_len;
- this_sum_bytes += bvec->bv_len;
- disk_bytenr += bvec->bv_len;
- offset += bvec->bv_len;
- bvec++;
- }
- this_sum_bytes = 0;
- btrfs_add_ordered_sum(inode, ordered, sums);
- btrfs_put_ordered_extent(ordered);
- return 0;
-}
-
-/*
- * helper function for csum removal, this expects the
- * key to describe the csum pointed to by the path, and it expects
- * the csum to overlap the range [bytenr, len]
- *
- * The csum should not be entirely contained in the range and the
- * range should not be entirely contained in the csum.
- *
- * This calls btrfs_truncate_item with the correct args based on the
- * overlap, and fixes up the key as required.
- */
-static noinline void truncate_one_csum(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_path *path,
- struct btrfs_key *key,
- u64 bytenr, u64 len)
-{
- struct extent_buffer *leaf;
- u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
- u64 csum_end;
- u64 end_byte = bytenr + len;
- u32 blocksize_bits = root->fs_info->sb->s_blocksize_bits;
-
- leaf = path->nodes[0];
- csum_end = btrfs_item_size_nr(leaf, path->slots[0]) / csum_size;
- csum_end <<= root->fs_info->sb->s_blocksize_bits;
- csum_end += key->offset;
-
- if (key->offset < bytenr && csum_end <= end_byte) {
- /*
- * [ bytenr - len ]
- * [ ]
- * [csum ]
- * A simple truncate off the end of the item
- */
- u32 new_size = (bytenr - key->offset) >> blocksize_bits;
- new_size *= csum_size;
- btrfs_truncate_item(trans, root, path, new_size, 1);
- } else if (key->offset >= bytenr && csum_end > end_byte &&
- end_byte > key->offset) {
- /*
- * [ bytenr - len ]
- * [ ]
- * [csum ]
- * we need to truncate from the beginning of the csum
- */
- u32 new_size = (csum_end - end_byte) >> blocksize_bits;
- new_size *= csum_size;
-
- btrfs_truncate_item(trans, root, path, new_size, 0);
-
- key->offset = end_byte;
- btrfs_set_item_key_safe(trans, root, path, key);
- } else {
- BUG();
- }
-}
-
-/*
- * deletes the csum items from the csum tree for a given
- * range of bytes.
- */
-int btrfs_del_csums(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, u64 bytenr, u64 len)
-{
- struct btrfs_path *path;
- struct btrfs_key key;
- u64 end_byte = bytenr + len;
- u64 csum_end;
- struct extent_buffer *leaf;
- int ret;
- u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
- int blocksize_bits = root->fs_info->sb->s_blocksize_bits;
-
- root = root->fs_info->csum_root;
-
- path = btrfs_alloc_path();
- if (!path)
- return -ENOMEM;
-
- while (1) {
- key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
- key.offset = end_byte - 1;
- key.type = BTRFS_EXTENT_CSUM_KEY;
-
- path->leave_spinning = 1;
- ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
- if (ret > 0) {
- if (path->slots[0] == 0)
- break;
- path->slots[0]--;
- } else if (ret < 0) {
- break;
- }
-
- leaf = path->nodes[0];
- btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
-
- if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
- key.type != BTRFS_EXTENT_CSUM_KEY) {
- break;
- }
-
- if (key.offset >= end_byte)
- break;
-
- csum_end = btrfs_item_size_nr(leaf, path->slots[0]) / csum_size;
- csum_end <<= blocksize_bits;
- csum_end += key.offset;
-
- /* this csum ends before we start, we're done */
- if (csum_end <= bytenr)
- break;
-
- /* delete the entire item, it is inside our range */
- if (key.offset >= bytenr && csum_end <= end_byte) {
- ret = btrfs_del_item(trans, root, path);
- if (ret)
- goto out;
- if (key.offset == bytenr)
- break;
- } else if (key.offset < bytenr && csum_end > end_byte) {
- unsigned long offset;
- unsigned long shift_len;
- unsigned long item_offset;
- /*
- * [ bytenr - len ]
- * [csum ]
- *
- * Our bytes are in the middle of the csum,
- * we need to split this item and insert a new one.
- *
- * But we can't drop the path because the
- * csum could change, get removed, extended etc.
- *
- * The trick here is the max size of a csum item leaves
- * enough room in the tree block for a single
- * item header. So, we split the item in place,
- * adding a new header pointing to the existing
- * bytes. Then we loop around again and we have
- * a nicely formed csum item that we can neatly
- * truncate.
- */
- offset = (bytenr - key.offset) >> blocksize_bits;
- offset *= csum_size;
-
- shift_len = (len >> blocksize_bits) * csum_size;
-
- item_offset = btrfs_item_ptr_offset(leaf,
- path->slots[0]);
-
- memset_extent_buffer(leaf, 0, item_offset + offset,
- shift_len);
- key.offset = bytenr;
-
- /*
- * btrfs_split_item returns -EAGAIN when the
- * item changed size or key
- */
- ret = btrfs_split_item(trans, root, path, &key, offset);
- if (ret && ret != -EAGAIN) {
- btrfs_abort_transaction(trans, root, ret);
- goto out;
- }
-
- key.offset = end_byte - 1;
- } else {
- truncate_one_csum(trans, root, path, &key, bytenr, len);
- if (key.offset < bytenr)
- break;
- }
- btrfs_release_path(path);
- }
- ret = 0;
-out:
- btrfs_free_path(path);
- return ret;
-}
-
-int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_ordered_sum *sums)
-{
- u64 bytenr;
- int ret;
- struct btrfs_key file_key;
- struct btrfs_key found_key;
- u64 next_offset;
- u64 total_bytes = 0;
- int found_next;
- struct btrfs_path *path;
- struct btrfs_csum_item *item;
- struct btrfs_csum_item *item_end;
- struct extent_buffer *leaf = NULL;
- u64 csum_offset;
- struct btrfs_sector_sum *sector_sum;
- u32 nritems;
- u32 ins_size;
- u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
-
- path = btrfs_alloc_path();
- if (!path)
- return -ENOMEM;
-
- sector_sum = sums->sums;
-again:
- next_offset = (u64)-1;
- found_next = 0;
- file_key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
- file_key.offset = sector_sum->bytenr;
- bytenr = sector_sum->bytenr;
- btrfs_set_key_type(&file_key, BTRFS_EXTENT_CSUM_KEY);
-
- item = btrfs_lookup_csum(trans, root, path, sector_sum->bytenr, 1);
- if (!IS_ERR(item)) {
- leaf = path->nodes[0];
- ret = 0;
- goto found;
- }
- ret = PTR_ERR(item);
- if (ret != -EFBIG && ret != -ENOENT)
- goto fail_unlock;
-
- if (ret == -EFBIG) {
- u32 item_size;
- /* we found one, but it isn't big enough yet */
- leaf = path->nodes[0];
- item_size = btrfs_item_size_nr(leaf, path->slots[0]);
- if ((item_size / csum_size) >=
- MAX_CSUM_ITEMS(root, csum_size)) {
- /* already at max size, make a new one */
- goto insert;
- }
- } else {
- int slot = path->slots[0] + 1;
- /* we didn't find a csum item, insert one */
- nritems = btrfs_header_nritems(path->nodes[0]);
- if (path->slots[0] >= nritems - 1) {
- ret = btrfs_next_leaf(root, path);
- if (ret == 1)
- found_next = 1;
- if (ret != 0)
- goto insert;
- slot = 0;
- }
- btrfs_item_key_to_cpu(path->nodes[0], &found_key, slot);
- if (found_key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
- found_key.type != BTRFS_EXTENT_CSUM_KEY) {
- found_next = 1;
- goto insert;
- }
- next_offset = found_key.offset;
- found_next = 1;
- goto insert;
- }
-
- /*
- * at this point, we know the tree has an item, but it isn't big
- * enough yet to put our csum in. Grow it
- */
- btrfs_release_path(path);
- ret = btrfs_search_slot(trans, root, &file_key, path,
- csum_size, 1);
- if (ret < 0)
- goto fail_unlock;
-
- if (ret > 0) {
- if (path->slots[0] == 0)
- goto insert;
- path->slots[0]--;
- }
-
- leaf = path->nodes[0];
- btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
- csum_offset = (bytenr - found_key.offset) >>
- root->fs_info->sb->s_blocksize_bits;
-
- if (btrfs_key_type(&found_key) != BTRFS_EXTENT_CSUM_KEY ||
- found_key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
- csum_offset >= MAX_CSUM_ITEMS(root, csum_size)) {
- goto insert;
- }
-
- if (csum_offset >= btrfs_item_size_nr(leaf, path->slots[0]) /
- csum_size) {
- u32 diff = (csum_offset + 1) * csum_size;
-
- /*
- * is the item big enough already? we dropped our lock
- * before and need to recheck
- */
- if (diff < btrfs_item_size_nr(leaf, path->slots[0]))
- goto csum;
-
- diff = diff - btrfs_item_size_nr(leaf, path->slots[0]);
- if (diff != csum_size)
- goto insert;
-
- btrfs_extend_item(trans, root, path, diff);
- goto csum;
- }
-
-insert:
- btrfs_release_path(path);
- csum_offset = 0;
- if (found_next) {
- u64 tmp = total_bytes + root->sectorsize;
- u64 next_sector = sector_sum->bytenr;
- struct btrfs_sector_sum *next = sector_sum + 1;
-
- while (tmp < sums->len) {
- if (next_sector + root->sectorsize != next->bytenr)
- break;
- tmp += root->sectorsize;
- next_sector = next->bytenr;
- next++;
- }
- tmp = min(tmp, next_offset - file_key.offset);
- tmp >>= root->fs_info->sb->s_blocksize_bits;
- tmp = max((u64)1, tmp);
- tmp = min(tmp, (u64)MAX_CSUM_ITEMS(root, csum_size));
- ins_size = csum_size * tmp;
- } else {
- ins_size = csum_size;
- }
- path->leave_spinning = 1;
- ret = btrfs_insert_empty_item(trans, root, path, &file_key,
- ins_size);
- path->leave_spinning = 0;
- if (ret < 0)
- goto fail_unlock;
- if (ret != 0) {
- WARN_ON(1);
- goto fail_unlock;
- }
-csum:
- leaf = path->nodes[0];
- item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_csum_item);
- ret = 0;
- item = (struct btrfs_csum_item *)((unsigned char *)item +
- csum_offset * csum_size);
-found:
- item_end = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_csum_item);
- item_end = (struct btrfs_csum_item *)((unsigned char *)item_end +
- btrfs_item_size_nr(leaf, path->slots[0]));
-next_sector:
-
- write_extent_buffer(leaf, &sector_sum->sum, (unsigned long)item, csum_size);
-
- total_bytes += root->sectorsize;
- sector_sum++;
- if (total_bytes < sums->len) {
- item = (struct btrfs_csum_item *)((char *)item +
- csum_size);
- if (item < item_end && bytenr + PAGE_CACHE_SIZE ==
- sector_sum->bytenr) {
- bytenr = sector_sum->bytenr;
- goto next_sector;
- }
- }
-
- btrfs_mark_buffer_dirty(path->nodes[0]);
- if (total_bytes < sums->len) {
- btrfs_release_path(path);
- cond_resched();
- goto again;
- }
-out:
- btrfs_free_path(path);
- return ret;
-
-fail_unlock:
- goto out;
-}
diff --git a/ANDROID_3.4.5/fs/btrfs/file.c b/ANDROID_3.4.5/fs/btrfs/file.c
deleted file mode 100644
index 53bf2d76..00000000
--- a/ANDROID_3.4.5/fs/btrfs/file.c
+++ /dev/null
@@ -1,1908 +0,0 @@
-/*
- * Copyright (C) 2007 Oracle. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License v2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
- */
-
-#include <linux/fs.h>
-#include <linux/pagemap.h>
-#include <linux/highmem.h>
-#include <linux/time.h>
-#include <linux/init.h>
-#include <linux/string.h>
-#include <linux/backing-dev.h>
-#include <linux/mpage.h>
-#include <linux/falloc.h>
-#include <linux/swap.h>
-#include <linux/writeback.h>
-#include <linux/statfs.h>
-#include <linux/compat.h>
-#include <linux/slab.h>
-#include "ctree.h"
-#include "disk-io.h"
-#include "transaction.h"
-#include "btrfs_inode.h"
-#include "ioctl.h"
-#include "print-tree.h"
-#include "tree-log.h"
-#include "locking.h"
-#include "compat.h"
-
-/*
- * when auto defrag is enabled we
- * queue up these defrag structs to remember which
- * inodes need defragging passes
- */
-struct inode_defrag {
- struct rb_node rb_node;
- /* objectid */
- u64 ino;
- /*
- * transid where the defrag was added, we search for
- * extents newer than this
- */
- u64 transid;
-
- /* root objectid */
- u64 root;
-
- /* last offset we were able to defrag */
- u64 last_offset;
-
- /* if we've wrapped around back to zero once already */
- int cycled;
-};
-
-/* pop a record for an inode into the defrag tree. The lock
- * must be held already
- *
- * If you're inserting a record for an older transid than an
- * existing record, the transid already in the tree is lowered
- *
- * If an existing record is found the defrag item you
- * pass in is freed
- */
-static void __btrfs_add_inode_defrag(struct inode *inode,
- struct inode_defrag *defrag)
-{
- struct btrfs_root *root = BTRFS_I(inode)->root;
- struct inode_defrag *entry;
- struct rb_node **p;
- struct rb_node *parent = NULL;
-
- p = &root->fs_info->defrag_inodes.rb_node;
- while (*p) {
- parent = *p;
- entry = rb_entry(parent, struct inode_defrag, rb_node);
-
- if (defrag->ino < entry->ino)
- p = &parent->rb_left;
- else if (defrag->ino > entry->ino)
- p = &parent->rb_right;
- else {
- /* if we're reinserting an entry for
- * an old defrag run, make sure to
- * lower the transid of our existing record
- */
- if (defrag->transid < entry->transid)
- entry->transid = defrag->transid;
- if (defrag->last_offset > entry->last_offset)
- entry->last_offset = defrag->last_offset;
- goto exists;
- }
- }
- BTRFS_I(inode)->in_defrag = 1;
- rb_link_node(&defrag->rb_node, parent, p);
- rb_insert_color(&defrag->rb_node, &root->fs_info->defrag_inodes);
- return;
-
-exists:
- kfree(defrag);
- return;
-
-}
-
-/*
- * insert a defrag record for this inode if auto defrag is
- * enabled
- */
-int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
- struct inode *inode)
-{
- struct btrfs_root *root = BTRFS_I(inode)->root;
- struct inode_defrag *defrag;
- u64 transid;
-
- if (!btrfs_test_opt(root, AUTO_DEFRAG))
- return 0;
-
- if (btrfs_fs_closing(root->fs_info))
- return 0;
-
- if (BTRFS_I(inode)->in_defrag)
- return 0;
-
- if (trans)
- transid = trans->transid;
- else
- transid = BTRFS_I(inode)->root->last_trans;
-
- defrag = kzalloc(sizeof(*defrag), GFP_NOFS);
- if (!defrag)
- return -ENOMEM;
-
- defrag->ino = btrfs_ino(inode);
- defrag->transid = transid;
- defrag->root = root->root_key.objectid;
-
- spin_lock(&root->fs_info->defrag_inodes_lock);
- if (!BTRFS_I(inode)->in_defrag)
- __btrfs_add_inode_defrag(inode, defrag);
- else
- kfree(defrag);
- spin_unlock(&root->fs_info->defrag_inodes_lock);
- return 0;
-}
-
-/*
- * must be called with the defrag_inodes lock held
- */
-struct inode_defrag *btrfs_find_defrag_inode(struct btrfs_fs_info *info, u64 ino,
- struct rb_node **next)
-{
- struct inode_defrag *entry = NULL;
- struct rb_node *p;
- struct rb_node *parent = NULL;
-
- p = info->defrag_inodes.rb_node;
- while (p) {
- parent = p;
- entry = rb_entry(parent, struct inode_defrag, rb_node);
-
- if (ino < entry->ino)
- p = parent->rb_left;
- else if (ino > entry->ino)
- p = parent->rb_right;
- else
- return entry;
- }
-
- if (next) {
- while (parent && ino > entry->ino) {
- parent = rb_next(parent);
- entry = rb_entry(parent, struct inode_defrag, rb_node);
- }
- *next = parent;
- }
- return NULL;
-}
-
-/*
- * run through the list of inodes in the FS that need
- * defragging
- */
-int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info)
-{
- struct inode_defrag *defrag;
- struct btrfs_root *inode_root;
- struct inode *inode;
- struct rb_node *n;
- struct btrfs_key key;
- struct btrfs_ioctl_defrag_range_args range;
- u64 first_ino = 0;
- int num_defrag;
- int defrag_batch = 1024;
-
- memset(&range, 0, sizeof(range));
- range.len = (u64)-1;
-
- atomic_inc(&fs_info->defrag_running);
- spin_lock(&fs_info->defrag_inodes_lock);
- while(1) {
- n = NULL;
-
- /* find an inode to defrag */
- defrag = btrfs_find_defrag_inode(fs_info, first_ino, &n);
- if (!defrag) {
- if (n)
- defrag = rb_entry(n, struct inode_defrag, rb_node);
- else if (first_ino) {
- first_ino = 0;
- continue;
- } else {
- break;
- }
- }
-
- /* remove it from the rbtree */
- first_ino = defrag->ino + 1;
- rb_erase(&defrag->rb_node, &fs_info->defrag_inodes);
-
- if (btrfs_fs_closing(fs_info))
- goto next_free;
-
- spin_unlock(&fs_info->defrag_inodes_lock);
-
- /* get the inode */
- key.objectid = defrag->root;
- btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
- key.offset = (u64)-1;
- inode_root = btrfs_read_fs_root_no_name(fs_info, &key);
- if (IS_ERR(inode_root))
- goto next;
-
- key.objectid = defrag->ino;
- btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
- key.offset = 0;
-
- inode = btrfs_iget(fs_info->sb, &key, inode_root, NULL);
- if (IS_ERR(inode))
- goto next;
-
- /* do a chunk of defrag */
- BTRFS_I(inode)->in_defrag = 0;
- range.start = defrag->last_offset;
- num_defrag = btrfs_defrag_file(inode, NULL, &range, defrag->transid,
- defrag_batch);
- /*
- * if we filled the whole defrag batch, there
- * must be more work to do. Queue this defrag
- * again
- */
- if (num_defrag == defrag_batch) {
- defrag->last_offset = range.start;
- __btrfs_add_inode_defrag(inode, defrag);
- /*
- * we don't want to kfree defrag, we added it back to
- * the rbtree
- */
- defrag = NULL;
- } else if (defrag->last_offset && !defrag->cycled) {
- /*
- * we didn't fill our defrag batch, but
- * we didn't start at zero. Make sure we loop
- * around to the start of the file.
- */
- defrag->last_offset = 0;
- defrag->cycled = 1;
- __btrfs_add_inode_defrag(inode, defrag);
- defrag = NULL;
- }
-
- iput(inode);
-next:
- spin_lock(&fs_info->defrag_inodes_lock);
-next_free:
- kfree(defrag);
- }
- spin_unlock(&fs_info->defrag_inodes_lock);
-
- atomic_dec(&fs_info->defrag_running);
-
- /*
- * during unmount, we use the transaction_wait queue to
- * wait for the defragger to stop
- */
- wake_up(&fs_info->transaction_wait);
- return 0;
-}
-
-/* simple helper to fault in pages and copy. This should go away
- * and be replaced with calls into generic code.
- */
-static noinline int btrfs_copy_from_user(loff_t pos, int num_pages,
- size_t write_bytes,
- struct page **prepared_pages,
- struct iov_iter *i)
-{
- size_t copied = 0;
- size_t total_copied = 0;
- int pg = 0;
- int offset = pos & (PAGE_CACHE_SIZE - 1);
-
- while (write_bytes > 0) {
- size_t count = min_t(size_t,
- PAGE_CACHE_SIZE - offset, write_bytes);
- struct page *page = prepared_pages[pg];
- /*
- * Copy data from userspace to the current page
- *
- * Disable pagefault to avoid recursive lock since
- * the pages are already locked
- */
- pagefault_disable();
- copied = iov_iter_copy_from_user_atomic(page, i, offset, count);
- pagefault_enable();
-
- /* Flush processor's dcache for this page */
- flush_dcache_page(page);
-
- /*
- * if we get a partial write, we can end up with
- * partially up to date pages. These add
- * a lot of complexity, so make sure they don't
- * happen by forcing this copy to be retried.
- *
- * The rest of the btrfs_file_write code will fall
- * back to page at a time copies after we return 0.
- */
- if (!PageUptodate(page) && copied < count)
- copied = 0;
-
- iov_iter_advance(i, copied);
- write_bytes -= copied;
- total_copied += copied;
-
- /* Return to btrfs_file_aio_write to fault page */
- if (unlikely(copied == 0))
- break;
-
- if (unlikely(copied < PAGE_CACHE_SIZE - offset)) {
- offset += copied;
- } else {
- pg++;
- offset = 0;
- }
- }
- return total_copied;
-}
-
-/*
- * unlocks pages after btrfs_file_write is done with them
- */
-void btrfs_drop_pages(struct page **pages, size_t num_pages)
-{
- size_t i;
- for (i = 0; i < num_pages; i++) {
- /* page checked is some magic around finding pages that
- * have been modified without going through btrfs_set_page_dirty
- * clear it here
- */
- ClearPageChecked(pages[i]);
- unlock_page(pages[i]);
- mark_page_accessed(pages[i]);
- page_cache_release(pages[i]);
- }
-}
-
-/*
- * after copy_from_user, pages need to be dirtied and we need to make
- * sure holes are created between the current EOF and the start of
- * any next extents (if required).
- *
- * this also makes the decision about creating an inline extent vs
- * doing real data extents, marking pages dirty and delalloc as required.
- */
-int btrfs_dirty_pages(struct btrfs_root *root, struct inode *inode,
- struct page **pages, size_t num_pages,
- loff_t pos, size_t write_bytes,
- struct extent_state **cached)
-{
- int err = 0;
- int i;
- u64 num_bytes;
- u64 start_pos;
- u64 end_of_last_block;
- u64 end_pos = pos + write_bytes;
- loff_t isize = i_size_read(inode);
-
- start_pos = pos & ~((u64)root->sectorsize - 1);
- num_bytes = (write_bytes + pos - start_pos +
- root->sectorsize - 1) & ~((u64)root->sectorsize - 1);
-
- end_of_last_block = start_pos + num_bytes - 1;
- err = btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block,
- cached);
- if (err)
- return err;
-
- for (i = 0; i < num_pages; i++) {
- struct page *p = pages[i];
- SetPageUptodate(p);
- ClearPageChecked(p);
- set_page_dirty(p);
- }
-
- /*
- * we've only changed i_size in ram, and we haven't updated
- * the disk i_size. There is no need to log the inode
- * at this time.
- */
- if (end_pos > isize)
- i_size_write(inode, end_pos);
- return 0;
-}
-
-/*
- * this drops all the extents in the cache that intersect the range
- * [start, end]. Existing extents are split as required.
- */
-int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
- int skip_pinned)
-{
- struct extent_map *em;
- struct extent_map *split = NULL;
- struct extent_map *split2 = NULL;
- struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
- u64 len = end - start + 1;
- int ret;
- int testend = 1;
- unsigned long flags;
- int compressed = 0;
-
- WARN_ON(end < start);
- if (end == (u64)-1) {
- len = (u64)-1;
- testend = 0;
- }
- while (1) {
- if (!split)
- split = alloc_extent_map();
- if (!split2)
- split2 = alloc_extent_map();
- BUG_ON(!split || !split2); /* -ENOMEM */
-
- write_lock(&em_tree->lock);
- em = lookup_extent_mapping(em_tree, start, len);
- if (!em) {
- write_unlock(&em_tree->lock);
- break;
- }
- flags = em->flags;
- if (skip_pinned && test_bit(EXTENT_FLAG_PINNED, &em->flags)) {
- if (testend && em->start + em->len >= start + len) {
- free_extent_map(em);
- write_unlock(&em_tree->lock);
- break;
- }
- start = em->start + em->len;
- if (testend)
- len = start + len - (em->start + em->len);
- free_extent_map(em);
- write_unlock(&em_tree->lock);
- continue;
- }
- compressed = test_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
- clear_bit(EXTENT_FLAG_PINNED, &em->flags);
- remove_extent_mapping(em_tree, em);
-
- if (em->block_start < EXTENT_MAP_LAST_BYTE &&
- em->start < start) {
- split->start = em->start;
- split->len = start - em->start;
- split->orig_start = em->orig_start;
- split->block_start = em->block_start;
-
- if (compressed)
- split->block_len = em->block_len;
- else
- split->block_len = split->len;
-
- split->bdev = em->bdev;
- split->flags = flags;
- split->compress_type = em->compress_type;
- ret = add_extent_mapping(em_tree, split);
- BUG_ON(ret); /* Logic error */
- free_extent_map(split);
- split = split2;
- split2 = NULL;
- }
- if (em->block_start < EXTENT_MAP_LAST_BYTE &&
- testend && em->start + em->len > start + len) {
- u64 diff = start + len - em->start;
-
- split->start = start + len;
- split->len = em->start + em->len - (start + len);
- split->bdev = em->bdev;
- split->flags = flags;
- split->compress_type = em->compress_type;
-
- if (compressed) {
- split->block_len = em->block_len;
- split->block_start = em->block_start;
- split->orig_start = em->orig_start;
- } else {
- split->block_len = split->len;
- split->block_start = em->block_start + diff;
- split->orig_start = split->start;
- }
-
- ret = add_extent_mapping(em_tree, split);
- BUG_ON(ret); /* Logic error */
- free_extent_map(split);
- split = NULL;
- }
- write_unlock(&em_tree->lock);
-
- /* once for us */
- free_extent_map(em);
- /* once for the tree*/
- free_extent_map(em);
- }
- if (split)
- free_extent_map(split);
- if (split2)
- free_extent_map(split2);
- return 0;
-}
-
-/*
- * this is very complex, but the basic idea is to drop all extents
- * in the range start - end. hint_block is filled in with a block number
- * that would be a good hint to the block allocator for this file.
- *
- * If an extent intersects the range but is not entirely inside the range
- * it is either truncated or split. Anything entirely inside the range
- * is deleted from the tree.
- */
-int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct inode *inode,
- u64 start, u64 end, u64 *hint_byte, int drop_cache)
-{
- struct btrfs_root *root = BTRFS_I(inode)->root;
- struct extent_buffer *leaf;
- struct btrfs_file_extent_item *fi;
- struct btrfs_path *path;
- struct btrfs_key key;
- struct btrfs_key new_key;
- u64 ino = btrfs_ino(inode);
- u64 search_start = start;
- u64 disk_bytenr = 0;
- u64 num_bytes = 0;
- u64 extent_offset = 0;
- u64 extent_end = 0;
- int del_nr = 0;
- int del_slot = 0;
- int extent_type;
- int recow;
- int ret;
- int modify_tree = -1;
-
- if (drop_cache)
- btrfs_drop_extent_cache(inode, start, end - 1, 0);
-
- path = btrfs_alloc_path();
- if (!path)
- return -ENOMEM;
-
- if (start >= BTRFS_I(inode)->disk_i_size)
- modify_tree = 0;
-
- while (1) {
- recow = 0;
- ret = btrfs_lookup_file_extent(trans, root, path, ino,
- search_start, modify_tree);
- if (ret < 0)
- break;
- if (ret > 0 && path->slots[0] > 0 && search_start == start) {
- leaf = path->nodes[0];
- btrfs_item_key_to_cpu(leaf, &key, path->slots[0] - 1);
- if (key.objectid == ino &&
- key.type == BTRFS_EXTENT_DATA_KEY)
- path->slots[0]--;
- }
- ret = 0;
-next_slot:
- leaf = path->nodes[0];
- if (path->slots[0] >= btrfs_header_nritems(leaf)) {
- BUG_ON(del_nr > 0);
- ret = btrfs_next_leaf(root, path);
- if (ret < 0)
- break;
- if (ret > 0) {
- ret = 0;
- break;
- }
- leaf = path->nodes[0];
- recow = 1;
- }
-
- btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
- if (key.objectid > ino ||
- key.type > BTRFS_EXTENT_DATA_KEY || key.offset >= end)
- break;
-
- fi = btrfs_item_ptr(leaf, path->slots[0],
- struct btrfs_file_extent_item);
- extent_type = btrfs_file_extent_type(leaf, fi);
-
- if (extent_type == BTRFS_FILE_EXTENT_REG ||
- extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
- disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
- num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
- extent_offset = btrfs_file_extent_offset(leaf, fi);
- extent_end = key.offset +
- btrfs_file_extent_num_bytes(leaf, fi);
- } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
- extent_end = key.offset +
- btrfs_file_extent_inline_len(leaf, fi);
- } else {
- WARN_ON(1);
- extent_end = search_start;
- }
-
- if (extent_end <= search_start) {
- path->slots[0]++;
- goto next_slot;
- }
-
- search_start = max(key.offset, start);
- if (recow || !modify_tree) {
- modify_tree = -1;
- btrfs_release_path(path);
- continue;
- }
-
- /*
- * | - range to drop - |
- * | -------- extent -------- |
- */
- if (start > key.offset && end < extent_end) {
- BUG_ON(del_nr > 0);
- BUG_ON(extent_type == BTRFS_FILE_EXTENT_INLINE);
-
- memcpy(&new_key, &key, sizeof(new_key));
- new_key.offset = start;
- ret = btrfs_duplicate_item(trans, root, path,
- &new_key);
- if (ret == -EAGAIN) {
- btrfs_release_path(path);
- continue;
- }
- if (ret < 0)
- break;
-
- leaf = path->nodes[0];
- fi = btrfs_item_ptr(leaf, path->slots[0] - 1,
- struct btrfs_file_extent_item);
- btrfs_set_file_extent_num_bytes(leaf, fi,
- start - key.offset);
-
- fi = btrfs_item_ptr(leaf, path->slots[0],
- struct btrfs_file_extent_item);
-
- extent_offset += start - key.offset;
- btrfs_set_file_extent_offset(leaf, fi, extent_offset);
- btrfs_set_file_extent_num_bytes(leaf, fi,
- extent_end - start);
- btrfs_mark_buffer_dirty(leaf);
-
- if (disk_bytenr > 0) {
- ret = btrfs_inc_extent_ref(trans, root,
- disk_bytenr, num_bytes, 0,
- root->root_key.objectid,
- new_key.objectid,
- start - extent_offset, 0);
- BUG_ON(ret); /* -ENOMEM */
- *hint_byte = disk_bytenr;
- }
- key.offset = start;
- }
- /*
- * | ---- range to drop ----- |
- * | -------- extent -------- |
- */
- if (start <= key.offset && end < extent_end) {
- BUG_ON(extent_type == BTRFS_FILE_EXTENT_INLINE);
-
- memcpy(&new_key, &key, sizeof(new_key));
- new_key.offset = end;
- btrfs_set_item_key_safe(trans, root, path, &new_key);
-
- extent_offset += end - key.offset;
- btrfs_set_file_extent_offset(leaf, fi, extent_offset);
- btrfs_set_file_extent_num_bytes(leaf, fi,
- extent_end - end);
- btrfs_mark_buffer_dirty(leaf);
- if (disk_bytenr > 0) {
- inode_sub_bytes(inode, end - key.offset);
- *hint_byte = disk_bytenr;
- }
- break;
- }
-
- search_start = extent_end;
- /*
- * | ---- range to drop ----- |
- * | -------- extent -------- |
- */
- if (start > key.offset && end >= extent_end) {
- BUG_ON(del_nr > 0);
- BUG_ON(extent_type == BTRFS_FILE_EXTENT_INLINE);
-
- btrfs_set_file_extent_num_bytes(leaf, fi,
- start - key.offset);
- btrfs_mark_buffer_dirty(leaf);
- if (disk_bytenr > 0) {
- inode_sub_bytes(inode, extent_end - start);
- *hint_byte = disk_bytenr;
- }
- if (end == extent_end)
- break;
-
- path->slots[0]++;
- goto next_slot;
- }
-
- /*
- * | ---- range to drop ----- |
- * | ------ extent ------ |
- */
- if (start <= key.offset && end >= extent_end) {
- if (del_nr == 0) {
- del_slot = path->slots[0];
- del_nr = 1;
- } else {
- BUG_ON(del_slot + del_nr != path->slots[0]);
- del_nr++;
- }
-
- if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
- inode_sub_bytes(inode,
- extent_end - key.offset);
- extent_end = ALIGN(extent_end,
- root->sectorsize);
- } else if (disk_bytenr > 0) {
- ret = btrfs_free_extent(trans, root,
- disk_bytenr, num_bytes, 0,
- root->root_key.objectid,
- key.objectid, key.offset -
- extent_offset, 0);
- BUG_ON(ret); /* -ENOMEM */
- inode_sub_bytes(inode,
- extent_end - key.offset);
- *hint_byte = disk_bytenr;
- }
-
- if (end == extent_end)
- break;
-
- if (path->slots[0] + 1 < btrfs_header_nritems(leaf)) {
- path->slots[0]++;
- goto next_slot;
- }
-
- ret = btrfs_del_items(trans, root, path, del_slot,
- del_nr);
- if (ret) {
- btrfs_abort_transaction(trans, root, ret);
- goto out;
- }
-
- del_nr = 0;
- del_slot = 0;
-
- btrfs_release_path(path);
- continue;
- }
-
- BUG_ON(1);
- }
-
- if (!ret && del_nr > 0) {
- ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
- if (ret)
- btrfs_abort_transaction(trans, root, ret);
- }
-
-out:
- btrfs_free_path(path);
- return ret;
-}
-
-static int extent_mergeable(struct extent_buffer *leaf, int slot,
- u64 objectid, u64 bytenr, u64 orig_offset,
- u64 *start, u64 *end)
-{
- struct btrfs_file_extent_item *fi;
- struct btrfs_key key;
- u64 extent_end;
-
- if (slot < 0 || slot >= btrfs_header_nritems(leaf))
- return 0;
-
- btrfs_item_key_to_cpu(leaf, &key, slot);
- if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
- return 0;
-
- fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
- if (btrfs_file_extent_type(leaf, fi) != BTRFS_FILE_EXTENT_REG ||
- btrfs_file_extent_disk_bytenr(leaf, fi) != bytenr ||
- btrfs_file_extent_offset(leaf, fi) != key.offset - orig_offset ||
- btrfs_file_extent_compression(leaf, fi) ||
- btrfs_file_extent_encryption(leaf, fi) ||
- btrfs_file_extent_other_encoding(leaf, fi))
- return 0;
-
- extent_end = key.offset + btrfs_file_extent_num_bytes(leaf, fi);
- if ((*start && *start != key.offset) || (*end && *end != extent_end))
- return 0;
-
- *start = key.offset;
- *end = extent_end;
- return 1;
-}
-
-/*
- * Mark extent in the range start - end as written.
- *
- * This changes extent type from 'pre-allocated' to 'regular'. If only
- * part of extent is marked as written, the extent will be split into
- * two or three.
- */
-int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
- struct inode *inode, u64 start, u64 end)
-{
- struct btrfs_root *root = BTRFS_I(inode)->root;
- struct extent_buffer *leaf;
- struct btrfs_path *path;
- struct btrfs_file_extent_item *fi;
- struct btrfs_key key;
- struct btrfs_key new_key;
- u64 bytenr;
- u64 num_bytes;
- u64 extent_end;
- u64 orig_offset;
- u64 other_start;
- u64 other_end;
- u64 split;
- int del_nr = 0;
- int del_slot = 0;
- int recow;
- int ret;
- u64 ino = btrfs_ino(inode);
-
- btrfs_drop_extent_cache(inode, start, end - 1, 0);
-
- path = btrfs_alloc_path();
- if (!path)
- return -ENOMEM;
-again:
- recow = 0;
- split = start;
- key.objectid = ino;
- key.type = BTRFS_EXTENT_DATA_KEY;
- key.offset = split;
-
- ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
- if (ret < 0)
- goto out;
- if (ret > 0 && path->slots[0] > 0)
- path->slots[0]--;
-
- leaf = path->nodes[0];
- btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
- BUG_ON(key.objectid != ino || key.type != BTRFS_EXTENT_DATA_KEY);
- fi = btrfs_item_ptr(leaf, path->slots[0],
- struct btrfs_file_extent_item);
- BUG_ON(btrfs_file_extent_type(leaf, fi) !=
- BTRFS_FILE_EXTENT_PREALLOC);
- extent_end = key.offset + btrfs_file_extent_num_bytes(leaf, fi);
- BUG_ON(key.offset > start || extent_end < end);
-
- bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
- num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
- orig_offset = key.offset - btrfs_file_extent_offset(leaf, fi);
- memcpy(&new_key, &key, sizeof(new_key));
-
- if (start == key.offset && end < extent_end) {
- other_start = 0;
- other_end = start;
- if (extent_mergeable(leaf, path->slots[0] - 1,
- ino, bytenr, orig_offset,
- &other_start, &other_end)) {
- new_key.offset = end;
- btrfs_set_item_key_safe(trans, root, path, &new_key);
- fi = btrfs_item_ptr(leaf, path->slots[0],
- struct btrfs_file_extent_item);
- btrfs_set_file_extent_num_bytes(leaf, fi,
- extent_end - end);
- btrfs_set_file_extent_offset(leaf, fi,
- end - orig_offset);
- fi = btrfs_item_ptr(leaf, path->slots[0] - 1,
- struct btrfs_file_extent_item);
- btrfs_set_file_extent_num_bytes(leaf, fi,
- end - other_start);
- btrfs_mark_buffer_dirty(leaf);
- goto out;
- }
- }
-
- if (start > key.offset && end == extent_end) {
- other_start = end;
- other_end = 0;
- if (extent_mergeable(leaf, path->slots[0] + 1,
- ino, bytenr, orig_offset,
- &other_start, &other_end)) {
- fi = btrfs_item_ptr(leaf, path->slots[0],
- struct btrfs_file_extent_item);
- btrfs_set_file_extent_num_bytes(leaf, fi,
- start - key.offset);
- path->slots[0]++;
- new_key.offset = start;
- btrfs_set_item_key_safe(trans, root, path, &new_key);
-
- fi = btrfs_item_ptr(leaf, path->slots[0],
- struct btrfs_file_extent_item);
- btrfs_set_file_extent_num_bytes(leaf, fi,
- other_end - start);
- btrfs_set_file_extent_offset(leaf, fi,
- start - orig_offset);
- btrfs_mark_buffer_dirty(leaf);
- goto out;
- }
- }
-
- while (start > key.offset || end < extent_end) {
- if (key.offset == start)
- split = end;
-
- new_key.offset = split;
- ret = btrfs_duplicate_item(trans, root, path, &new_key);
- if (ret == -EAGAIN) {
- btrfs_release_path(path);
- goto again;
- }
- if (ret < 0) {
- btrfs_abort_transaction(trans, root, ret);
- goto out;
- }
-
- leaf = path->nodes[0];
- fi = btrfs_item_ptr(leaf, path->slots[0] - 1,
- struct btrfs_file_extent_item);
- btrfs_set_file_extent_num_bytes(leaf, fi,
- split - key.offset);
-
- fi = btrfs_item_ptr(leaf, path->slots[0],
- struct btrfs_file_extent_item);
-
- btrfs_set_file_extent_offset(leaf, fi, split - orig_offset);
- btrfs_set_file_extent_num_bytes(leaf, fi,
- extent_end - split);
- btrfs_mark_buffer_dirty(leaf);
-
- ret = btrfs_inc_extent_ref(trans, root, bytenr, num_bytes, 0,
- root->root_key.objectid,
- ino, orig_offset, 0);
- BUG_ON(ret); /* -ENOMEM */
-
- if (split == start) {
- key.offset = start;
- } else {
- BUG_ON(start != key.offset);
- path->slots[0]--;
- extent_end = end;
- }
- recow = 1;
- }
-
- other_start = end;
- other_end = 0;
- if (extent_mergeable(leaf, path->slots[0] + 1,
- ino, bytenr, orig_offset,
- &other_start, &other_end)) {
- if (recow) {
- btrfs_release_path(path);
- goto again;
- }
- extent_end = other_end;
- del_slot = path->slots[0] + 1;
- del_nr++;
- ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
- 0, root->root_key.objectid,
- ino, orig_offset, 0);
- BUG_ON(ret); /* -ENOMEM */
- }
- other_start = 0;
- other_end = start;
- if (extent_mergeable(leaf, path->slots[0] - 1,
- ino, bytenr, orig_offset,
- &other_start, &other_end)) {
- if (recow) {
- btrfs_release_path(path);
- goto again;
- }
- key.offset = other_start;
- del_slot = path->slots[0];
- del_nr++;
- ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
- 0, root->root_key.objectid,
- ino, orig_offset, 0);
- BUG_ON(ret); /* -ENOMEM */
- }
- if (del_nr == 0) {
- fi = btrfs_item_ptr(leaf, path->slots[0],
- struct btrfs_file_extent_item);
- btrfs_set_file_extent_type(leaf, fi,
- BTRFS_FILE_EXTENT_REG);
- btrfs_mark_buffer_dirty(leaf);
- } else {
- fi = btrfs_item_ptr(leaf, del_slot - 1,
- struct btrfs_file_extent_item);
- btrfs_set_file_extent_type(leaf, fi,
- BTRFS_FILE_EXTENT_REG);
- btrfs_set_file_extent_num_bytes(leaf, fi,
- extent_end - key.offset);
- btrfs_mark_buffer_dirty(leaf);
-
- ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
- if (ret < 0) {
- btrfs_abort_transaction(trans, root, ret);
- goto out;
- }
- }
-out:
- btrfs_free_path(path);
- return 0;
-}
-
-/*
- * on error we return an unlocked page and the error value
- * on success we return a locked page and 0
- */
-static int prepare_uptodate_page(struct page *page, u64 pos,
- bool force_uptodate)
-{
- int ret = 0;
-
- if (((pos & (PAGE_CACHE_SIZE - 1)) || force_uptodate) &&
- !PageUptodate(page)) {
- ret = btrfs_readpage(NULL, page);
- if (ret)
- return ret;
- lock_page(page);
- if (!PageUptodate(page)) {
- unlock_page(page);
- return -EIO;
- }
- }
- return 0;
-}
-
-/*
- * this gets pages into the page cache and locks them down, it also properly
- * waits for data=ordered extents to finish before allowing the pages to be
- * modified.
- */
-static noinline int prepare_pages(struct btrfs_root *root, struct file *file,
- struct page **pages, size_t num_pages,
- loff_t pos, unsigned long first_index,
- size_t write_bytes, bool force_uptodate)
-{
- struct extent_state *cached_state = NULL;
- int i;
- unsigned long index = pos >> PAGE_CACHE_SHIFT;
- struct inode *inode = fdentry(file)->d_inode;
- gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping);
- int err = 0;
- int faili = 0;
- u64 start_pos;
- u64 last_pos;
-
- start_pos = pos & ~((u64)root->sectorsize - 1);
- last_pos = ((u64)index + num_pages) << PAGE_CACHE_SHIFT;
-
-again:
- for (i = 0; i < num_pages; i++) {
- pages[i] = find_or_create_page(inode->i_mapping, index + i,
- mask | __GFP_WRITE);
- if (!pages[i]) {
- faili = i - 1;
- err = -ENOMEM;
- goto fail;
- }
-
- if (i == 0)
- err = prepare_uptodate_page(pages[i], pos,
- force_uptodate);
- if (i == num_pages - 1)
- err = prepare_uptodate_page(pages[i],
- pos + write_bytes, false);
- if (err) {
- page_cache_release(pages[i]);
- faili = i - 1;
- goto fail;
- }
- wait_on_page_writeback(pages[i]);
- }
- err = 0;
- if (start_pos < inode->i_size) {
- struct btrfs_ordered_extent *ordered;
- lock_extent_bits(&BTRFS_I(inode)->io_tree,
- start_pos, last_pos - 1, 0, &cached_state);
- ordered = btrfs_lookup_first_ordered_extent(inode,
- last_pos - 1);
- if (ordered &&
- ordered->file_offset + ordered->len > start_pos &&
- ordered->file_offset < last_pos) {
- btrfs_put_ordered_extent(ordered);
- unlock_extent_cached(&BTRFS_I(inode)->io_tree,
- start_pos, last_pos - 1,
- &cached_state, GFP_NOFS);
- for (i = 0; i < num_pages; i++) {
- unlock_page(pages[i]);
- page_cache_release(pages[i]);
- }
- btrfs_wait_ordered_range(inode, start_pos,
- last_pos - start_pos);
- goto again;
- }
- if (ordered)
- btrfs_put_ordered_extent(ordered);
-
- clear_extent_bit(&BTRFS_I(inode)->io_tree, start_pos,
- last_pos - 1, EXTENT_DIRTY | EXTENT_DELALLOC |
- EXTENT_DO_ACCOUNTING, 0, 0, &cached_state,
- GFP_NOFS);
- unlock_extent_cached(&BTRFS_I(inode)->io_tree,
- start_pos, last_pos - 1, &cached_state,
- GFP_NOFS);
- }
- for (i = 0; i < num_pages; i++) {
- if (clear_page_dirty_for_io(pages[i]))
- account_page_redirty(pages[i]);
- set_page_extent_mapped(pages[i]);
- WARN_ON(!PageLocked(pages[i]));
- }
- return 0;
-fail:
- while (faili >= 0) {
- unlock_page(pages[faili]);
- page_cache_release(pages[faili]);
- faili--;
- }
- return err;
-
-}
-
-static noinline ssize_t __btrfs_buffered_write(struct file *file,
- struct iov_iter *i,
- loff_t pos)
-{
- struct inode *inode = fdentry(file)->d_inode;
- struct btrfs_root *root = BTRFS_I(inode)->root;
- struct page **pages = NULL;
- unsigned long first_index;
- size_t num_written = 0;
- int nrptrs;
- int ret = 0;
- bool force_page_uptodate = false;
-
- nrptrs = min((iov_iter_count(i) + PAGE_CACHE_SIZE - 1) /
- PAGE_CACHE_SIZE, PAGE_CACHE_SIZE /
- (sizeof(struct page *)));
- nrptrs = min(nrptrs, current->nr_dirtied_pause - current->nr_dirtied);
- nrptrs = max(nrptrs, 8);
- pages = kmalloc(nrptrs * sizeof(struct page *), GFP_KERNEL);
- if (!pages)
- return -ENOMEM;
-
- first_index = pos >> PAGE_CACHE_SHIFT;
-
- while (iov_iter_count(i) > 0) {
- size_t offset = pos & (PAGE_CACHE_SIZE - 1);
- size_t write_bytes = min(iov_iter_count(i),
- nrptrs * (size_t)PAGE_CACHE_SIZE -
- offset);
- size_t num_pages = (write_bytes + offset +
- PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
- size_t dirty_pages;
- size_t copied;
-
- WARN_ON(num_pages > nrptrs);
-
- /*
- * Fault pages before locking them in prepare_pages
- * to avoid recursive lock
- */
- if (unlikely(iov_iter_fault_in_readable(i, write_bytes))) {
- ret = -EFAULT;
- break;
- }
-
- ret = btrfs_delalloc_reserve_space(inode,
- num_pages << PAGE_CACHE_SHIFT);
- if (ret)
- break;
-
- /*
- * This is going to setup the pages array with the number of
- * pages we want, so we don't really need to worry about the
- * contents of pages from loop to loop
- */
- ret = prepare_pages(root, file, pages, num_pages,
- pos, first_index, write_bytes,
- force_page_uptodate);
- if (ret) {
- btrfs_delalloc_release_space(inode,
- num_pages << PAGE_CACHE_SHIFT);
- break;
- }
-
- copied = btrfs_copy_from_user(pos, num_pages,
- write_bytes, pages, i);
-
- /*
- * if we have trouble faulting in the pages, fall
- * back to one page at a time
- */
- if (copied < write_bytes)
- nrptrs = 1;
-
- if (copied == 0) {
- force_page_uptodate = true;
- dirty_pages = 0;
- } else {
- force_page_uptodate = false;
- dirty_pages = (copied + offset +
- PAGE_CACHE_SIZE - 1) >>
- PAGE_CACHE_SHIFT;
- }
-
- /*
- * If we had a short copy we need to release the excess delaloc
- * bytes we reserved. We need to increment outstanding_extents
- * because btrfs_delalloc_release_space will decrement it, but
- * we still have an outstanding extent for the chunk we actually
- * managed to copy.
- */
- if (num_pages > dirty_pages) {
- if (copied > 0) {
- spin_lock(&BTRFS_I(inode)->lock);
- BTRFS_I(inode)->outstanding_extents++;
- spin_unlock(&BTRFS_I(inode)->lock);
- }
- btrfs_delalloc_release_space(inode,
- (num_pages - dirty_pages) <<
- PAGE_CACHE_SHIFT);
- }
-
- if (copied > 0) {
- ret = btrfs_dirty_pages(root, inode, pages,
- dirty_pages, pos, copied,
- NULL);
- if (ret) {
- btrfs_delalloc_release_space(inode,
- dirty_pages << PAGE_CACHE_SHIFT);
- btrfs_drop_pages(pages, num_pages);
- break;
- }
- }
-
- btrfs_drop_pages(pages, num_pages);
-
- cond_resched();
-
- balance_dirty_pages_ratelimited_nr(inode->i_mapping,
- dirty_pages);
- if (dirty_pages < (root->leafsize >> PAGE_CACHE_SHIFT) + 1)
- btrfs_btree_balance_dirty(root, 1);
-
- pos += copied;
- num_written += copied;
- }
-
- kfree(pages);
-
- return num_written ? num_written : ret;
-}
-
-static ssize_t __btrfs_direct_write(struct kiocb *iocb,
- const struct iovec *iov,
- unsigned long nr_segs, loff_t pos,
- loff_t *ppos, size_t count, size_t ocount)
-{
- struct file *file = iocb->ki_filp;
- struct inode *inode = fdentry(file)->d_inode;
- struct iov_iter i;
- ssize_t written;
- ssize_t written_buffered;
- loff_t endbyte;
- int err;
-
- written = generic_file_direct_write(iocb, iov, &nr_segs, pos, ppos,
- count, ocount);
-
- /*
- * the generic O_DIRECT will update in-memory i_size after the
- * DIOs are done. But our endio handlers that update the on
- * disk i_size never update past the in memory i_size. So we
- * need one more update here to catch any additions to the
- * file
- */
- if (inode->i_size != BTRFS_I(inode)->disk_i_size) {
- btrfs_ordered_update_i_size(inode, inode->i_size, NULL);
- mark_inode_dirty(inode);
- }
-
- if (written < 0 || written == count)
- return written;
-
- pos += written;
- count -= written;
- iov_iter_init(&i, iov, nr_segs, count, written);
- written_buffered = __btrfs_buffered_write(file, &i, pos);
- if (written_buffered < 0) {
- err = written_buffered;
- goto out;
- }
- endbyte = pos + written_buffered - 1;
- err = filemap_write_and_wait_range(file->f_mapping, pos, endbyte);
- if (err)
- goto out;
- written += written_buffered;
- *ppos = pos + written_buffered;
- invalidate_mapping_pages(file->f_mapping, pos >> PAGE_CACHE_SHIFT,
- endbyte >> PAGE_CACHE_SHIFT);
-out:
- return written ? written : err;
-}
-
-static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
- const struct iovec *iov,
- unsigned long nr_segs, loff_t pos)
-{
- struct file *file = iocb->ki_filp;
- struct inode *inode = fdentry(file)->d_inode;
- struct btrfs_root *root = BTRFS_I(inode)->root;
- loff_t *ppos = &iocb->ki_pos;
- u64 start_pos;
- ssize_t num_written = 0;
- ssize_t err = 0;
- size_t count, ocount;
-
- vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
-
- mutex_lock(&inode->i_mutex);
-
- err = generic_segment_checks(iov, &nr_segs, &ocount, VERIFY_READ);
- if (err) {
- mutex_unlock(&inode->i_mutex);
- goto out;
- }
- count = ocount;
-
- current->backing_dev_info = inode->i_mapping->backing_dev_info;
- err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode));
- if (err) {
- mutex_unlock(&inode->i_mutex);
- goto out;
- }
-
- if (count == 0) {
- mutex_unlock(&inode->i_mutex);
- goto out;
- }
-
- err = file_remove_suid(file);
- if (err) {
- mutex_unlock(&inode->i_mutex);
- goto out;
- }
-
- /*
- * If BTRFS flips readonly due to some impossible error
- * (fs_info->fs_state now has BTRFS_SUPER_FLAG_ERROR),
- * although we have opened a file as writable, we have
- * to stop this write operation to ensure FS consistency.
- */
- if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) {
- mutex_unlock(&inode->i_mutex);
- err = -EROFS;
- goto out;
- }
-
- err = btrfs_update_time(file);
- if (err) {
- mutex_unlock(&inode->i_mutex);
- goto out;
- }
- BTRFS_I(inode)->sequence++;
-
- start_pos = round_down(pos, root->sectorsize);
- if (start_pos > i_size_read(inode)) {
- err = btrfs_cont_expand(inode, i_size_read(inode), start_pos);
- if (err) {
- mutex_unlock(&inode->i_mutex);
- goto out;
- }
- }
-
- if (unlikely(file->f_flags & O_DIRECT)) {
- num_written = __btrfs_direct_write(iocb, iov, nr_segs,
- pos, ppos, count, ocount);
- } else {
- struct iov_iter i;
-
- iov_iter_init(&i, iov, nr_segs, count, num_written);
-
- num_written = __btrfs_buffered_write(file, &i, pos);
- if (num_written > 0)
- *ppos = pos + num_written;
- }
-
- mutex_unlock(&inode->i_mutex);
-
- /*
- * we want to make sure fsync finds this change
- * but we haven't joined a transaction running right now.
- *
- * Later on, someone is sure to update the inode and get the
- * real transid recorded.
- *
- * We set last_trans now to the fs_info generation + 1,
- * this will either be one more than the running transaction
- * or the generation used for the next transaction if there isn't
- * one running right now.
- */
- BTRFS_I(inode)->last_trans = root->fs_info->generation + 1;
- if (num_written > 0 || num_written == -EIOCBQUEUED) {
- err = generic_write_sync(file, pos, num_written);
- if (err < 0 && num_written > 0)
- num_written = err;
- }
-out:
- current->backing_dev_info = NULL;
- return num_written ? num_written : err;
-}
-
-int btrfs_release_file(struct inode *inode, struct file *filp)
-{
- /*
- * ordered_data_close is set by settattr when we are about to truncate
- * a file from a non-zero size to a zero size. This tries to
- * flush down new bytes that may have been written if the
- * application were using truncate to replace a file in place.
- */
- if (BTRFS_I(inode)->ordered_data_close) {
- BTRFS_I(inode)->ordered_data_close = 0;
- btrfs_add_ordered_operation(NULL, BTRFS_I(inode)->root, inode);
- if (inode->i_size > BTRFS_ORDERED_OPERATIONS_FLUSH_LIMIT)
- filemap_flush(inode->i_mapping);
- }
- if (filp->private_data)
- btrfs_ioctl_trans_end(filp);
- return 0;
-}
-
-/*
- * fsync call for both files and directories. This logs the inode into
- * the tree log instead of forcing full commits whenever possible.
- *
- * It needs to call filemap_fdatawait so that all ordered extent updates are
- * in the metadata btree are up to date for copying to the log.
- *
- * It drops the inode mutex before doing the tree log commit. This is an
- * important optimization for directories because holding the mutex prevents
- * new operations on the dir while we write to disk.
- */
-int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
-{
- struct dentry *dentry = file->f_path.dentry;
- struct inode *inode = dentry->d_inode;
- struct btrfs_root *root = BTRFS_I(inode)->root;
- int ret = 0;
- struct btrfs_trans_handle *trans;
-
- trace_btrfs_sync_file(file, datasync);
-
- ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
- if (ret)
- return ret;
- mutex_lock(&inode->i_mutex);
-
- /* we wait first, since the writeback may change the inode */
- root->log_batch++;
- btrfs_wait_ordered_range(inode, 0, (u64)-1);
- root->log_batch++;
-
- /*
- * check the transaction that last modified this inode
- * and see if its already been committed
- */
- if (!BTRFS_I(inode)->last_trans) {
- mutex_unlock(&inode->i_mutex);
- goto out;
- }
-
- /*
- * if the last transaction that changed this file was before
- * the current transaction, we can bail out now without any
- * syncing
- */
- smp_mb();
- if (BTRFS_I(inode)->last_trans <=
- root->fs_info->last_trans_committed) {
- BTRFS_I(inode)->last_trans = 0;
- mutex_unlock(&inode->i_mutex);
- goto out;
- }
-
- /*
- * ok we haven't committed the transaction yet, lets do a commit
- */
- if (file->private_data)
- btrfs_ioctl_trans_end(file);
-
- trans = btrfs_start_transaction(root, 0);
- if (IS_ERR(trans)) {
- ret = PTR_ERR(trans);
- mutex_unlock(&inode->i_mutex);
- goto out;
- }
-
- ret = btrfs_log_dentry_safe(trans, root, dentry);
- if (ret < 0) {
- mutex_unlock(&inode->i_mutex);
- goto out;
- }
-
- /* we've logged all the items and now have a consistent
- * version of the file in the log. It is possible that
- * someone will come in and modify the file, but that's
- * fine because the log is consistent on disk, and we
- * have references to all of the file's extents
- *
- * It is possible that someone will come in and log the
- * file again, but that will end up using the synchronization
- * inside btrfs_sync_log to keep things safe.
- */
- mutex_unlock(&inode->i_mutex);
-
- if (ret != BTRFS_NO_LOG_SYNC) {
- if (ret > 0) {
- ret = btrfs_commit_transaction(trans, root);
- } else {
- ret = btrfs_sync_log(trans, root);
- if (ret == 0)
- ret = btrfs_end_transaction(trans, root);
- else
- ret = btrfs_commit_transaction(trans, root);
- }
- } else {
- ret = btrfs_end_transaction(trans, root);
- }
-out:
- return ret > 0 ? -EIO : ret;
-}
-
-static const struct vm_operations_struct btrfs_file_vm_ops = {
- .fault = filemap_fault,
- .page_mkwrite = btrfs_page_mkwrite,
-};
-
-static int btrfs_file_mmap(struct file *filp, struct vm_area_struct *vma)
-{
- struct address_space *mapping = filp->f_mapping;
-
- if (!mapping->a_ops->readpage)
- return -ENOEXEC;
-
- file_accessed(filp);
- vma->vm_ops = &btrfs_file_vm_ops;
- vma->vm_flags |= VM_CAN_NONLINEAR;
-
- return 0;
-}
-
-static long btrfs_fallocate(struct file *file, int mode,
- loff_t offset, loff_t len)
-{
- struct inode *inode = file->f_path.dentry->d_inode;
- struct extent_state *cached_state = NULL;
- u64 cur_offset;
- u64 last_byte;
- u64 alloc_start;
- u64 alloc_end;
- u64 alloc_hint = 0;
- u64 locked_end;
- u64 mask = BTRFS_I(inode)->root->sectorsize - 1;
- struct extent_map *em;
- int ret;
-
- alloc_start = offset & ~mask;
- alloc_end = (offset + len + mask) & ~mask;
-
- /* We only support the FALLOC_FL_KEEP_SIZE mode */
- if (mode & ~FALLOC_FL_KEEP_SIZE)
- return -EOPNOTSUPP;
-
- /*
- * Make sure we have enough space before we do the
- * allocation.
- */
- ret = btrfs_check_data_free_space(inode, len);
- if (ret)
- return ret;
-
- /*
- * wait for ordered IO before we have any locks. We'll loop again
- * below with the locks held.
- */
- btrfs_wait_ordered_range(inode, alloc_start, alloc_end - alloc_start);
-
- mutex_lock(&inode->i_mutex);
- ret = inode_newsize_ok(inode, alloc_end);
- if (ret)
- goto out;
-
- if (alloc_start > inode->i_size) {
- ret = btrfs_cont_expand(inode, i_size_read(inode),
- alloc_start);
- if (ret)
- goto out;
- }
-
- locked_end = alloc_end - 1;
- while (1) {
- struct btrfs_ordered_extent *ordered;
-
- /* the extent lock is ordered inside the running
- * transaction
- */
- lock_extent_bits(&BTRFS_I(inode)->io_tree, alloc_start,
- locked_end, 0, &cached_state);
- ordered = btrfs_lookup_first_ordered_extent(inode,
- alloc_end - 1);
- if (ordered &&
- ordered->file_offset + ordered->len > alloc_start &&
- ordered->file_offset < alloc_end) {
- btrfs_put_ordered_extent(ordered);
- unlock_extent_cached(&BTRFS_I(inode)->io_tree,
- alloc_start, locked_end,
- &cached_state, GFP_NOFS);
- /*
- * we can't wait on the range with the transaction
- * running or with the extent lock held
- */
- btrfs_wait_ordered_range(inode, alloc_start,
- alloc_end - alloc_start);
- } else {
- if (ordered)
- btrfs_put_ordered_extent(ordered);
- break;
- }
- }
-
- cur_offset = alloc_start;
- while (1) {
- u64 actual_end;
-
- em = btrfs_get_extent(inode, NULL, 0, cur_offset,
- alloc_end - cur_offset, 0);
- if (IS_ERR_OR_NULL(em)) {
- if (!em)
- ret = -ENOMEM;
- else
- ret = PTR_ERR(em);
- break;
- }
- last_byte = min(extent_map_end(em), alloc_end);
- actual_end = min_t(u64, extent_map_end(em), offset + len);
- last_byte = (last_byte + mask) & ~mask;
-
- if (em->block_start == EXTENT_MAP_HOLE ||
- (cur_offset >= inode->i_size &&
- !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) {
- ret = btrfs_prealloc_file_range(inode, mode, cur_offset,
- last_byte - cur_offset,
- 1 << inode->i_blkbits,
- offset + len,
- &alloc_hint);
-
- if (ret < 0) {
- free_extent_map(em);
- break;
- }
- } else if (actual_end > inode->i_size &&
- !(mode & FALLOC_FL_KEEP_SIZE)) {
- /*
- * We didn't need to allocate any more space, but we
- * still extended the size of the file so we need to
- * update i_size.
- */
- inode->i_ctime = CURRENT_TIME;
- i_size_write(inode, actual_end);
- btrfs_ordered_update_i_size(inode, actual_end, NULL);
- }
- free_extent_map(em);
-
- cur_offset = last_byte;
- if (cur_offset >= alloc_end) {
- ret = 0;
- break;
- }
- }
- unlock_extent_cached(&BTRFS_I(inode)->io_tree, alloc_start, locked_end,
- &cached_state, GFP_NOFS);
-out:
- mutex_unlock(&inode->i_mutex);
- /* Let go of our reservation. */
- btrfs_free_reserved_data_space(inode, len);
- return ret;
-}
-
-static int find_desired_extent(struct inode *inode, loff_t *offset, int origin)
-{
- struct btrfs_root *root = BTRFS_I(inode)->root;
- struct extent_map *em;
- struct extent_state *cached_state = NULL;
- u64 lockstart = *offset;
- u64 lockend = i_size_read(inode);
- u64 start = *offset;
- u64 orig_start = *offset;
- u64 len = i_size_read(inode);
- u64 last_end = 0;
- int ret = 0;
-
- lockend = max_t(u64, root->sectorsize, lockend);
- if (lockend <= lockstart)
- lockend = lockstart + root->sectorsize;
-
- len = lockend - lockstart + 1;
-
- len = max_t(u64, len, root->sectorsize);
- if (inode->i_size == 0)
- return -ENXIO;
-
- lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend, 0,
- &cached_state);
-
- /*
- * Delalloc is such a pain. If we have a hole and we have pending
- * delalloc for a portion of the hole we will get back a hole that
- * exists for the entire range since it hasn't been actually written
- * yet. So to take care of this case we need to look for an extent just
- * before the position we want in case there is outstanding delalloc
- * going on here.
- */
- if (origin == SEEK_HOLE && start != 0) {
- if (start <= root->sectorsize)
- em = btrfs_get_extent_fiemap(inode, NULL, 0, 0,
- root->sectorsize, 0);
- else
- em = btrfs_get_extent_fiemap(inode, NULL, 0,
- start - root->sectorsize,
- root->sectorsize, 0);
- if (IS_ERR(em)) {
- ret = PTR_ERR(em);
- goto out;
- }
- last_end = em->start + em->len;
- if (em->block_start == EXTENT_MAP_DELALLOC)
- last_end = min_t(u64, last_end, inode->i_size);
- free_extent_map(em);
- }
-
- while (1) {
- em = btrfs_get_extent_fiemap(inode, NULL, 0, start, len, 0);
- if (IS_ERR(em)) {
- ret = PTR_ERR(em);
- break;
- }
-
- if (em->block_start == EXTENT_MAP_HOLE) {
- if (test_bit(EXTENT_FLAG_VACANCY, &em->flags)) {
- if (last_end <= orig_start) {
- free_extent_map(em);
- ret = -ENXIO;
- break;
- }
- }
-
- if (origin == SEEK_HOLE) {
- *offset = start;
- free_extent_map(em);
- break;
- }
- } else {
- if (origin == SEEK_DATA) {
- if (em->block_start == EXTENT_MAP_DELALLOC) {
- if (start >= inode->i_size) {
- free_extent_map(em);
- ret = -ENXIO;
- break;
- }
- }
-
- *offset = start;
- free_extent_map(em);
- break;
- }
- }
-
- start = em->start + em->len;
- last_end = em->start + em->len;
-
- if (em->block_start == EXTENT_MAP_DELALLOC)
- last_end = min_t(u64, last_end, inode->i_size);
-
- if (test_bit(EXTENT_FLAG_VACANCY, &em->flags)) {
- free_extent_map(em);
- ret = -ENXIO;
- break;
- }
- free_extent_map(em);
- cond_resched();
- }
- if (!ret)
- *offset = min(*offset, inode->i_size);
-out:
- unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
- &cached_state, GFP_NOFS);
- return ret;
-}
-
-static loff_t btrfs_file_llseek(struct file *file, loff_t offset, int origin)
-{
- struct inode *inode = file->f_mapping->host;
- int ret;
-
- mutex_lock(&inode->i_mutex);
- switch (origin) {
- case SEEK_END:
- case SEEK_CUR:
- offset = generic_file_llseek(file, offset, origin);
- goto out;
- case SEEK_DATA:
- case SEEK_HOLE:
- if (offset >= i_size_read(inode)) {
- mutex_unlock(&inode->i_mutex);
- return -ENXIO;
- }
-
- ret = find_desired_extent(inode, &offset, origin);
- if (ret) {
- mutex_unlock(&inode->i_mutex);
- return ret;
- }
- }
-
- if (offset < 0 && !(file->f_mode & FMODE_UNSIGNED_OFFSET)) {
- offset = -EINVAL;
- goto out;
- }
- if (offset > inode->i_sb->s_maxbytes) {
- offset = -EINVAL;
- goto out;
- }
-
- /* Special lock needed here? */
- if (offset != file->f_pos) {
- file->f_pos = offset;
- file->f_version = 0;
- }
-out:
- mutex_unlock(&inode->i_mutex);
- return offset;
-}
-
-const struct file_operations btrfs_file_operations = {
- .llseek = btrfs_file_llseek,
- .read = do_sync_read,
- .write = do_sync_write,
- .aio_read = generic_file_aio_read,
- .splice_read = generic_file_splice_read,
- .aio_write = btrfs_file_aio_write,
- .mmap = btrfs_file_mmap,
- .open = generic_file_open,
- .release = btrfs_release_file,
- .fsync = btrfs_sync_file,
- .fallocate = btrfs_fallocate,
- .unlocked_ioctl = btrfs_ioctl,
-#ifdef CONFIG_COMPAT
- .compat_ioctl = btrfs_ioctl,
-#endif
-};
diff --git a/ANDROID_3.4.5/fs/btrfs/free-space-cache.c b/ANDROID_3.4.5/fs/btrfs/free-space-cache.c
deleted file mode 100644
index 202008ec..00000000
--- a/ANDROID_3.4.5/fs/btrfs/free-space-cache.c
+++ /dev/null
@@ -1,2943 +0,0 @@
-/*
- * Copyright (C) 2008 Red Hat. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License v2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
- */
-
-#include <linux/pagemap.h>
-#include <linux/sched.h>
-#include <linux/slab.h>
-#include <linux/math64.h>
-#include <linux/ratelimit.h>
-#include "ctree.h"
-#include "free-space-cache.h"
-#include "transaction.h"
-#include "disk-io.h"
-#include "extent_io.h"
-#include "inode-map.h"
-
-#define BITS_PER_BITMAP (PAGE_CACHE_SIZE * 8)
-#define MAX_CACHE_BYTES_PER_GIG (32 * 1024)
-
-static int link_free_space(struct btrfs_free_space_ctl *ctl,
- struct btrfs_free_space *info);
-
-static struct inode *__lookup_free_space_inode(struct btrfs_root *root,
- struct btrfs_path *path,
- u64 offset)
-{
- struct btrfs_key key;
- struct btrfs_key location;
- struct btrfs_disk_key disk_key;
- struct btrfs_free_space_header *header;
- struct extent_buffer *leaf;
- struct inode *inode = NULL;
- int ret;
-
- key.objectid = BTRFS_FREE_SPACE_OBJECTID;
- key.offset = offset;
- key.type = 0;
-
- ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
- if (ret < 0)
- return ERR_PTR(ret);
- if (ret > 0) {
- btrfs_release_path(path);
- return ERR_PTR(-ENOENT);
- }
-
- leaf = path->nodes[0];
- header = btrfs_item_ptr(leaf, path->slots[0],
- struct btrfs_free_space_header);
- btrfs_free_space_key(leaf, header, &disk_key);
- btrfs_disk_key_to_cpu(&location, &disk_key);
- btrfs_release_path(path);
-
- inode = btrfs_iget(root->fs_info->sb, &location, root, NULL);
- if (!inode)
- return ERR_PTR(-ENOENT);
- if (IS_ERR(inode))
- return inode;
- if (is_bad_inode(inode)) {
- iput(inode);
- return ERR_PTR(-ENOENT);
- }
-
- inode->i_mapping->flags &= ~__GFP_FS;
-
- return inode;
-}
-
-struct inode *lookup_free_space_inode(struct btrfs_root *root,
- struct btrfs_block_group_cache
- *block_group, struct btrfs_path *path)
-{
- struct inode *inode = NULL;
- u32 flags = BTRFS_INODE_NODATASUM | BTRFS_INODE_NODATACOW;
-
- spin_lock(&block_group->lock);
- if (block_group->inode)
- inode = igrab(block_group->inode);
- spin_unlock(&block_group->lock);
- if (inode)
- return inode;
-
- inode = __lookup_free_space_inode(root, path,
- block_group->key.objectid);
- if (IS_ERR(inode))
- return inode;
-
- spin_lock(&block_group->lock);
- if (!((BTRFS_I(inode)->flags & flags) == flags)) {
- printk(KERN_INFO "Old style space inode found, converting.\n");
- BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM |
- BTRFS_INODE_NODATACOW;
- block_group->disk_cache_state = BTRFS_DC_CLEAR;
- }
-
- if (!block_group->iref) {
- block_group->inode = igrab(inode);
- block_group->iref = 1;
- }
- spin_unlock(&block_group->lock);
-
- return inode;
-}
-
-int __create_free_space_inode(struct btrfs_root *root,
- struct btrfs_trans_handle *trans,
- struct btrfs_path *path, u64 ino, u64 offset)
-{
- struct btrfs_key key;
- struct btrfs_disk_key disk_key;
- struct btrfs_free_space_header *header;
- struct btrfs_inode_item *inode_item;
- struct extent_buffer *leaf;
- u64 flags = BTRFS_INODE_NOCOMPRESS | BTRFS_INODE_PREALLOC;
- int ret;
-
- ret = btrfs_insert_empty_inode(trans, root, path, ino);
- if (ret)
- return ret;
-
- /* We inline crc's for the free disk space cache */
- if (ino != BTRFS_FREE_INO_OBJECTID)
- flags |= BTRFS_INODE_NODATASUM | BTRFS_INODE_NODATACOW;
-
- leaf = path->nodes[0];
- inode_item = btrfs_item_ptr(leaf, path->slots[0],
- struct btrfs_inode_item);
- btrfs_item_key(leaf, &disk_key, path->slots[0]);
- memset_extent_buffer(leaf, 0, (unsigned long)inode_item,
- sizeof(*inode_item));
- btrfs_set_inode_generation(leaf, inode_item, trans->transid);
- btrfs_set_inode_size(leaf, inode_item, 0);
- btrfs_set_inode_nbytes(leaf, inode_item, 0);
- btrfs_set_inode_uid(leaf, inode_item, 0);
- btrfs_set_inode_gid(leaf, inode_item, 0);
- btrfs_set_inode_mode(leaf, inode_item, S_IFREG | 0600);
- btrfs_set_inode_flags(leaf, inode_item, flags);
- btrfs_set_inode_nlink(leaf, inode_item, 1);
- btrfs_set_inode_transid(leaf, inode_item, trans->transid);
- btrfs_set_inode_block_group(leaf, inode_item, offset);
- btrfs_mark_buffer_dirty(leaf);
- btrfs_release_path(path);
-
- key.objectid = BTRFS_FREE_SPACE_OBJECTID;
- key.offset = offset;
- key.type = 0;
-
- ret = btrfs_insert_empty_item(trans, root, path, &key,
- sizeof(struct btrfs_free_space_header));
- if (ret < 0) {
- btrfs_release_path(path);
- return ret;
- }
- leaf = path->nodes[0];
- header = btrfs_item_ptr(leaf, path->slots[0],
- struct btrfs_free_space_header);
- memset_extent_buffer(leaf, 0, (unsigned long)header, sizeof(*header));
- btrfs_set_free_space_key(leaf, header, &disk_key);
- btrfs_mark_buffer_dirty(leaf);
- btrfs_release_path(path);
-
- return 0;
-}
-
-int create_free_space_inode(struct btrfs_root *root,
- struct btrfs_trans_handle *trans,
- struct btrfs_block_group_cache *block_group,
- struct btrfs_path *path)
-{
- int ret;
- u64 ino;
-
- ret = btrfs_find_free_objectid(root, &ino);
- if (ret < 0)
- return ret;
-
- return __create_free_space_inode(root, trans, path, ino,
- block_group->key.objectid);
-}
-
-int btrfs_truncate_free_space_cache(struct btrfs_root *root,
- struct btrfs_trans_handle *trans,
- struct btrfs_path *path,
- struct inode *inode)
-{
- struct btrfs_block_rsv *rsv;
- u64 needed_bytes;
- loff_t oldsize;
- int ret = 0;
-
- rsv = trans->block_rsv;
- trans->block_rsv = &root->fs_info->global_block_rsv;
-
- /* 1 for slack space, 1 for updating the inode */
- needed_bytes = btrfs_calc_trunc_metadata_size(root, 1) +
- btrfs_calc_trans_metadata_size(root, 1);
-
- spin_lock(&trans->block_rsv->lock);
- if (trans->block_rsv->reserved < needed_bytes) {
- spin_unlock(&trans->block_rsv->lock);
- trans->block_rsv = rsv;
- return -ENOSPC;
- }
- spin_unlock(&trans->block_rsv->lock);
-
- oldsize = i_size_read(inode);
- btrfs_i_size_write(inode, 0);
- truncate_pagecache(inode, oldsize, 0);
-
- /*
- * We don't need an orphan item because truncating the free space cache
- * will never be split across transactions.
- */
- ret = btrfs_truncate_inode_items(trans, root, inode,
- 0, BTRFS_EXTENT_DATA_KEY);
-
- if (ret) {
- trans->block_rsv = rsv;
- btrfs_abort_transaction(trans, root, ret);
- return ret;
- }
-
- ret = btrfs_update_inode(trans, root, inode);
- if (ret)
- btrfs_abort_transaction(trans, root, ret);
- trans->block_rsv = rsv;
-
- return ret;
-}
-
-static int readahead_cache(struct inode *inode)
-{
- struct file_ra_state *ra;
- unsigned long last_index;
-
- ra = kzalloc(sizeof(*ra), GFP_NOFS);
- if (!ra)
- return -ENOMEM;
-
- file_ra_state_init(ra, inode->i_mapping);
- last_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT;
-
- page_cache_sync_readahead(inode->i_mapping, ra, NULL, 0, last_index);
-
- kfree(ra);
-
- return 0;
-}
-
-struct io_ctl {
- void *cur, *orig;
- struct page *page;
- struct page **pages;
- struct btrfs_root *root;
- unsigned long size;
- int index;
- int num_pages;
- unsigned check_crcs:1;
-};
-
-static int io_ctl_init(struct io_ctl *io_ctl, struct inode *inode,
- struct btrfs_root *root)
-{
- memset(io_ctl, 0, sizeof(struct io_ctl));
- io_ctl->num_pages = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >>
- PAGE_CACHE_SHIFT;
- io_ctl->pages = kzalloc(sizeof(struct page *) * io_ctl->num_pages,
- GFP_NOFS);
- if (!io_ctl->pages)
- return -ENOMEM;
- io_ctl->root = root;
- if (btrfs_ino(inode) != BTRFS_FREE_INO_OBJECTID)
- io_ctl->check_crcs = 1;
- return 0;
-}
-
-static void io_ctl_free(struct io_ctl *io_ctl)
-{
- kfree(io_ctl->pages);
-}
-
-static void io_ctl_unmap_page(struct io_ctl *io_ctl)
-{
- if (io_ctl->cur) {
- kunmap(io_ctl->page);
- io_ctl->cur = NULL;
- io_ctl->orig = NULL;
- }
-}
-
-static void io_ctl_map_page(struct io_ctl *io_ctl, int clear)
-{
- WARN_ON(io_ctl->cur);
- BUG_ON(io_ctl->index >= io_ctl->num_pages);
- io_ctl->page = io_ctl->pages[io_ctl->index++];
- io_ctl->cur = kmap(io_ctl->page);
- io_ctl->orig = io_ctl->cur;
- io_ctl->size = PAGE_CACHE_SIZE;
- if (clear)
- memset(io_ctl->cur, 0, PAGE_CACHE_SIZE);
-}
-
-static void io_ctl_drop_pages(struct io_ctl *io_ctl)
-{
- int i;
-
- io_ctl_unmap_page(io_ctl);
-
- for (i = 0; i < io_ctl->num_pages; i++) {
- if (io_ctl->pages[i]) {
- ClearPageChecked(io_ctl->pages[i]);
- unlock_page(io_ctl->pages[i]);
- page_cache_release(io_ctl->pages[i]);
- }
- }
-}
-
-static int io_ctl_prepare_pages(struct io_ctl *io_ctl, struct inode *inode,
- int uptodate)
-{
- struct page *page;
- gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping);
- int i;
-
- for (i = 0; i < io_ctl->num_pages; i++) {
- page = find_or_create_page(inode->i_mapping, i, mask);
- if (!page) {
- io_ctl_drop_pages(io_ctl);
- return -ENOMEM;
- }
- io_ctl->pages[i] = page;
- if (uptodate && !PageUptodate(page)) {
- btrfs_readpage(NULL, page);
- lock_page(page);
- if (!PageUptodate(page)) {
- printk(KERN_ERR "btrfs: error reading free "
- "space cache\n");
- io_ctl_drop_pages(io_ctl);
- return -EIO;
- }
- }
- }
-
- for (i = 0; i < io_ctl->num_pages; i++) {
- clear_page_dirty_for_io(io_ctl->pages[i]);
- set_page_extent_mapped(io_ctl->pages[i]);
- }
-
- return 0;
-}
-
-static void io_ctl_set_generation(struct io_ctl *io_ctl, u64 generation)
-{
- u64 *val;
-
- io_ctl_map_page(io_ctl, 1);
-
- /*
- * Skip the csum areas. If we don't check crcs then we just have a
- * 64bit chunk at the front of the first page.
- */
- if (io_ctl->check_crcs) {
- io_ctl->cur += (sizeof(u32) * io_ctl->num_pages);
- io_ctl->size -= sizeof(u64) + (sizeof(u32) * io_ctl->num_pages);
- } else {
- io_ctl->cur += sizeof(u64);
- io_ctl->size -= sizeof(u64) * 2;
- }
-
- val = io_ctl->cur;
- *val = cpu_to_le64(generation);
- io_ctl->cur += sizeof(u64);
-}
-
-static int io_ctl_check_generation(struct io_ctl *io_ctl, u64 generation)
-{
- u64 *gen;
-
- /*
- * Skip the crc area. If we don't check crcs then we just have a 64bit
- * chunk at the front of the first page.
- */
- if (io_ctl->check_crcs) {
- io_ctl->cur += sizeof(u32) * io_ctl->num_pages;
- io_ctl->size -= sizeof(u64) +
- (sizeof(u32) * io_ctl->num_pages);
- } else {
- io_ctl->cur += sizeof(u64);
- io_ctl->size -= sizeof(u64) * 2;
- }
-
- gen = io_ctl->cur;
- if (le64_to_cpu(*gen) != generation) {
- printk_ratelimited(KERN_ERR "btrfs: space cache generation "
- "(%Lu) does not match inode (%Lu)\n", *gen,
- generation);
- io_ctl_unmap_page(io_ctl);
- return -EIO;
- }
- io_ctl->cur += sizeof(u64);
- return 0;
-}
-
-static void io_ctl_set_crc(struct io_ctl *io_ctl, int index)
-{
- u32 *tmp;
- u32 crc = ~(u32)0;
- unsigned offset = 0;
-
- if (!io_ctl->check_crcs) {
- io_ctl_unmap_page(io_ctl);
- return;
- }
-
- if (index == 0)
- offset = sizeof(u32) * io_ctl->num_pages;
-
- crc = btrfs_csum_data(io_ctl->root, io_ctl->orig + offset, crc,
- PAGE_CACHE_SIZE - offset);
- btrfs_csum_final(crc, (char *)&crc);
- io_ctl_unmap_page(io_ctl);
- tmp = kmap(io_ctl->pages[0]);
- tmp += index;
- *tmp = crc;
- kunmap(io_ctl->pages[0]);
-}
-
-static int io_ctl_check_crc(struct io_ctl *io_ctl, int index)
-{
- u32 *tmp, val;
- u32 crc = ~(u32)0;
- unsigned offset = 0;
-
- if (!io_ctl->check_crcs) {
- io_ctl_map_page(io_ctl, 0);
- return 0;
- }
-
- if (index == 0)
- offset = sizeof(u32) * io_ctl->num_pages;
-
- tmp = kmap(io_ctl->pages[0]);
- tmp += index;
- val = *tmp;
- kunmap(io_ctl->pages[0]);
-
- io_ctl_map_page(io_ctl, 0);
- crc = btrfs_csum_data(io_ctl->root, io_ctl->orig + offset, crc,
- PAGE_CACHE_SIZE - offset);
- btrfs_csum_final(crc, (char *)&crc);
- if (val != crc) {
- printk_ratelimited(KERN_ERR "btrfs: csum mismatch on free "
- "space cache\n");
- io_ctl_unmap_page(io_ctl);
- return -EIO;
- }
-
- return 0;
-}
-
-static int io_ctl_add_entry(struct io_ctl *io_ctl, u64 offset, u64 bytes,
- void *bitmap)
-{
- struct btrfs_free_space_entry *entry;
-
- if (!io_ctl->cur)
- return -ENOSPC;
-
- entry = io_ctl->cur;
- entry->offset = cpu_to_le64(offset);
- entry->bytes = cpu_to_le64(bytes);
- entry->type = (bitmap) ? BTRFS_FREE_SPACE_BITMAP :
- BTRFS_FREE_SPACE_EXTENT;
- io_ctl->cur += sizeof(struct btrfs_free_space_entry);
- io_ctl->size -= sizeof(struct btrfs_free_space_entry);
-
- if (io_ctl->size >= sizeof(struct btrfs_free_space_entry))
- return 0;
-
- io_ctl_set_crc(io_ctl, io_ctl->index - 1);
-
- /* No more pages to map */
- if (io_ctl->index >= io_ctl->num_pages)
- return 0;
-
- /* map the next page */
- io_ctl_map_page(io_ctl, 1);
- return 0;
-}
-
-static int io_ctl_add_bitmap(struct io_ctl *io_ctl, void *bitmap)
-{
- if (!io_ctl->cur)
- return -ENOSPC;
-
- /*
- * If we aren't at the start of the current page, unmap this one and
- * map the next one if there is any left.
- */
- if (io_ctl->cur != io_ctl->orig) {
- io_ctl_set_crc(io_ctl, io_ctl->index - 1);
- if (io_ctl->index >= io_ctl->num_pages)
- return -ENOSPC;
- io_ctl_map_page(io_ctl, 0);
- }
-
- memcpy(io_ctl->cur, bitmap, PAGE_CACHE_SIZE);
- io_ctl_set_crc(io_ctl, io_ctl->index - 1);
- if (io_ctl->index < io_ctl->num_pages)
- io_ctl_map_page(io_ctl, 0);
- return 0;
-}
-
-static void io_ctl_zero_remaining_pages(struct io_ctl *io_ctl)
-{
- /*
- * If we're not on the boundary we know we've modified the page and we
- * need to crc the page.
- */
- if (io_ctl->cur != io_ctl->orig)
- io_ctl_set_crc(io_ctl, io_ctl->index - 1);
- else
- io_ctl_unmap_page(io_ctl);
-
- while (io_ctl->index < io_ctl->num_pages) {
- io_ctl_map_page(io_ctl, 1);
- io_ctl_set_crc(io_ctl, io_ctl->index - 1);
- }
-}
-
-static int io_ctl_read_entry(struct io_ctl *io_ctl,
- struct btrfs_free_space *entry, u8 *type)
-{
- struct btrfs_free_space_entry *e;
- int ret;
-
- if (!io_ctl->cur) {
- ret = io_ctl_check_crc(io_ctl, io_ctl->index);
- if (ret)
- return ret;
- }
-
- e = io_ctl->cur;
- entry->offset = le64_to_cpu(e->offset);
- entry->bytes = le64_to_cpu(e->bytes);
- *type = e->type;
- io_ctl->cur += sizeof(struct btrfs_free_space_entry);
- io_ctl->size -= sizeof(struct btrfs_free_space_entry);
-
- if (io_ctl->size >= sizeof(struct btrfs_free_space_entry))
- return 0;
-
- io_ctl_unmap_page(io_ctl);
-
- return 0;
-}
-
-static int io_ctl_read_bitmap(struct io_ctl *io_ctl,
- struct btrfs_free_space *entry)
-{
- int ret;
-
- ret = io_ctl_check_crc(io_ctl, io_ctl->index);
- if (ret)
- return ret;
-
- memcpy(entry->bitmap, io_ctl->cur, PAGE_CACHE_SIZE);
- io_ctl_unmap_page(io_ctl);
-
- return 0;
-}
-
-int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
- struct btrfs_free_space_ctl *ctl,
- struct btrfs_path *path, u64 offset)
-{
- struct btrfs_free_space_header *header;
- struct extent_buffer *leaf;
- struct io_ctl io_ctl;
- struct btrfs_key key;
- struct btrfs_free_space *e, *n;
- struct list_head bitmaps;
- u64 num_entries;
- u64 num_bitmaps;
- u64 generation;
- u8 type;
- int ret = 0;
-
- INIT_LIST_HEAD(&bitmaps);
-
- /* Nothing in the space cache, goodbye */
- if (!i_size_read(inode))
- return 0;
-
- key.objectid = BTRFS_FREE_SPACE_OBJECTID;
- key.offset = offset;
- key.type = 0;
-
- ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
- if (ret < 0)
- return 0;
- else if (ret > 0) {
- btrfs_release_path(path);
- return 0;
- }
-
- ret = -1;
-
- leaf = path->nodes[0];
- header = btrfs_item_ptr(leaf, path->slots[0],
- struct btrfs_free_space_header);
- num_entries = btrfs_free_space_entries(leaf, header);
- num_bitmaps = btrfs_free_space_bitmaps(leaf, header);
- generation = btrfs_free_space_generation(leaf, header);
- btrfs_release_path(path);
-
- if (BTRFS_I(inode)->generation != generation) {
- printk(KERN_ERR "btrfs: free space inode generation (%llu) did"
- " not match free space cache generation (%llu)\n",
- (unsigned long long)BTRFS_I(inode)->generation,
- (unsigned long long)generation);
- return 0;
- }
-
- if (!num_entries)
- return 0;
-
- ret = io_ctl_init(&io_ctl, inode, root);
- if (ret)
- return ret;
-
- ret = readahead_cache(inode);
- if (ret)
- goto out;
-
- ret = io_ctl_prepare_pages(&io_ctl, inode, 1);
- if (ret)
- goto out;
-
- ret = io_ctl_check_crc(&io_ctl, 0);
- if (ret)
- goto free_cache;
-
- ret = io_ctl_check_generation(&io_ctl, generation);
- if (ret)
- goto free_cache;
-
- while (num_entries) {
- e = kmem_cache_zalloc(btrfs_free_space_cachep,
- GFP_NOFS);
- if (!e)
- goto free_cache;
-
- ret = io_ctl_read_entry(&io_ctl, e, &type);
- if (ret) {
- kmem_cache_free(btrfs_free_space_cachep, e);
- goto free_cache;
- }
-
- if (!e->bytes) {
- kmem_cache_free(btrfs_free_space_cachep, e);
- goto free_cache;
- }
-
- if (type == BTRFS_FREE_SPACE_EXTENT) {
- spin_lock(&ctl->tree_lock);
- ret = link_free_space(ctl, e);
- spin_unlock(&ctl->tree_lock);
- if (ret) {
- printk(KERN_ERR "Duplicate entries in "
- "free space cache, dumping\n");
- kmem_cache_free(btrfs_free_space_cachep, e);
- goto free_cache;
- }
- } else {
- BUG_ON(!num_bitmaps);
- num_bitmaps--;
- e->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS);
- if (!e->bitmap) {
- kmem_cache_free(
- btrfs_free_space_cachep, e);
- goto free_cache;
- }
- spin_lock(&ctl->tree_lock);
- ret = link_free_space(ctl, e);
- ctl->total_bitmaps++;
- ctl->op->recalc_thresholds(ctl);
- spin_unlock(&ctl->tree_lock);
- if (ret) {
- printk(KERN_ERR "Duplicate entries in "
- "free space cache, dumping\n");
- kmem_cache_free(btrfs_free_space_cachep, e);
- goto free_cache;
- }
- list_add_tail(&e->list, &bitmaps);
- }
-
- num_entries--;
- }
-
- io_ctl_unmap_page(&io_ctl);
-
- /*
- * We add the bitmaps at the end of the entries in order that
- * the bitmap entries are added to the cache.
- */
- list_for_each_entry_safe(e, n, &bitmaps, list) {
- list_del_init(&e->list);
- ret = io_ctl_read_bitmap(&io_ctl, e);
- if (ret)
- goto free_cache;
- }
-
- io_ctl_drop_pages(&io_ctl);
- ret = 1;
-out:
- io_ctl_free(&io_ctl);
- return ret;
-free_cache:
- io_ctl_drop_pages(&io_ctl);
- __btrfs_remove_free_space_cache(ctl);
- goto out;
-}
-
-int load_free_space_cache(struct btrfs_fs_info *fs_info,
- struct btrfs_block_group_cache *block_group)
-{
- struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
- struct btrfs_root *root = fs_info->tree_root;
- struct inode *inode;
- struct btrfs_path *path;
- int ret = 0;
- bool matched;
- u64 used = btrfs_block_group_used(&block_group->item);
-
- /*
- * If this block group has been marked to be cleared for one reason or
- * another then we can't trust the on disk cache, so just return.
- */
- spin_lock(&block_group->lock);
- if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) {
- spin_unlock(&block_group->lock);
- return 0;
- }
- spin_unlock(&block_group->lock);
-
- path = btrfs_alloc_path();
- if (!path)
- return 0;
- path->search_commit_root = 1;
- path->skip_locking = 1;
-
- inode = lookup_free_space_inode(root, block_group, path);
- if (IS_ERR(inode)) {
- btrfs_free_path(path);
- return 0;
- }
-
- /* We may have converted the inode and made the cache invalid. */
- spin_lock(&block_group->lock);
- if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) {
- spin_unlock(&block_group->lock);
- btrfs_free_path(path);
- goto out;
- }
- spin_unlock(&block_group->lock);
-
- ret = __load_free_space_cache(fs_info->tree_root, inode, ctl,
- path, block_group->key.objectid);
- btrfs_free_path(path);
- if (ret <= 0)
- goto out;
-
- spin_lock(&ctl->tree_lock);
- matched = (ctl->free_space == (block_group->key.offset - used -
- block_group->bytes_super));
- spin_unlock(&ctl->tree_lock);
-
- if (!matched) {
- __btrfs_remove_free_space_cache(ctl);
- printk(KERN_ERR "block group %llu has an wrong amount of free "
- "space\n", block_group->key.objectid);
- ret = -1;
- }
-out:
- if (ret < 0) {
- /* This cache is bogus, make sure it gets cleared */
- spin_lock(&block_group->lock);
- block_group->disk_cache_state = BTRFS_DC_CLEAR;
- spin_unlock(&block_group->lock);
- ret = 0;
-
- printk(KERN_ERR "btrfs: failed to load free space cache "
- "for block group %llu\n", block_group->key.objectid);
- }
-
- iput(inode);
- return ret;
-}
-
-/**
- * __btrfs_write_out_cache - write out cached info to an inode
- * @root - the root the inode belongs to
- * @ctl - the free space cache we are going to write out
- * @block_group - the block_group for this cache if it belongs to a block_group
- * @trans - the trans handle
- * @path - the path to use
- * @offset - the offset for the key we'll insert
- *
- * This function writes out a free space cache struct to disk for quick recovery
- * on mount. This will return 0 if it was successfull in writing the cache out,
- * and -1 if it was not.
- */
-int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
- struct btrfs_free_space_ctl *ctl,
- struct btrfs_block_group_cache *block_group,
- struct btrfs_trans_handle *trans,
- struct btrfs_path *path, u64 offset)
-{
- struct btrfs_free_space_header *header;
- struct extent_buffer *leaf;
- struct rb_node *node;
- struct list_head *pos, *n;
- struct extent_state *cached_state = NULL;
- struct btrfs_free_cluster *cluster = NULL;
- struct extent_io_tree *unpin = NULL;
- struct io_ctl io_ctl;
- struct list_head bitmap_list;
- struct btrfs_key key;
- u64 start, extent_start, extent_end, len;
- int entries = 0;
- int bitmaps = 0;
- int ret;
- int err = -1;
-
- INIT_LIST_HEAD(&bitmap_list);
-
- if (!i_size_read(inode))
- return -1;
-
- ret = io_ctl_init(&io_ctl, inode, root);
- if (ret)
- return -1;
-
- /* Get the cluster for this block_group if it exists */
- if (block_group && !list_empty(&block_group->cluster_list))
- cluster = list_entry(block_group->cluster_list.next,
- struct btrfs_free_cluster,
- block_group_list);
-
- /* Lock all pages first so we can lock the extent safely. */
- io_ctl_prepare_pages(&io_ctl, inode, 0);
-
- lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1,
- 0, &cached_state);
-
- node = rb_first(&ctl->free_space_offset);
- if (!node && cluster) {
- node = rb_first(&cluster->root);
- cluster = NULL;
- }
-
- /* Make sure we can fit our crcs into the first page */
- if (io_ctl.check_crcs &&
- (io_ctl.num_pages * sizeof(u32)) >= PAGE_CACHE_SIZE) {
- WARN_ON(1);
- goto out_nospc;
- }
-
- io_ctl_set_generation(&io_ctl, trans->transid);
-
- /* Write out the extent entries */
- while (node) {
- struct btrfs_free_space *e;
-
- e = rb_entry(node, struct btrfs_free_space, offset_index);
- entries++;
-
- ret = io_ctl_add_entry(&io_ctl, e->offset, e->bytes,
- e->bitmap);
- if (ret)
- goto out_nospc;
-
- if (e->bitmap) {
- list_add_tail(&e->list, &bitmap_list);
- bitmaps++;
- }
- node = rb_next(node);
- if (!node && cluster) {
- node = rb_first(&cluster->root);
- cluster = NULL;
- }
- }
-
- /*
- * We want to add any pinned extents to our free space cache
- * so we don't leak the space
- */
-
- /*
- * We shouldn't have switched the pinned extents yet so this is the
- * right one
- */
- unpin = root->fs_info->pinned_extents;
-
- if (block_group)
- start = block_group->key.objectid;
-
- while (block_group && (start < block_group->key.objectid +
- block_group->key.offset)) {
- ret = find_first_extent_bit(unpin, start,
- &extent_start, &extent_end,
- EXTENT_DIRTY);
- if (ret) {
- ret = 0;
- break;
- }
-
- /* This pinned extent is out of our range */
- if (extent_start >= block_group->key.objectid +
- block_group->key.offset)
- break;
-
- extent_start = max(extent_start, start);
- extent_end = min(block_group->key.objectid +
- block_group->key.offset, extent_end + 1);
- len = extent_end - extent_start;
-
- entries++;
- ret = io_ctl_add_entry(&io_ctl, extent_start, len, NULL);
- if (ret)
- goto out_nospc;
-
- start = extent_end;
- }
-
- /* Write out the bitmaps */
- list_for_each_safe(pos, n, &bitmap_list) {
- struct btrfs_free_space *entry =
- list_entry(pos, struct btrfs_free_space, list);
-
- ret = io_ctl_add_bitmap(&io_ctl, entry->bitmap);
- if (ret)
- goto out_nospc;
- list_del_init(&entry->list);
- }
-
- /* Zero out the rest of the pages just to make sure */
- io_ctl_zero_remaining_pages(&io_ctl);
-
- ret = btrfs_dirty_pages(root, inode, io_ctl.pages, io_ctl.num_pages,
- 0, i_size_read(inode), &cached_state);
- io_ctl_drop_pages(&io_ctl);
- unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0,
- i_size_read(inode) - 1, &cached_state, GFP_NOFS);
-
- if (ret)
- goto out;
-
-
- ret = filemap_write_and_wait(inode->i_mapping);
- if (ret)
- goto out;
-
- key.objectid = BTRFS_FREE_SPACE_OBJECTID;
- key.offset = offset;
- key.type = 0;
-
- ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
- if (ret < 0) {
- clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1,
- EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, NULL,
- GFP_NOFS);
- goto out;
- }
- leaf = path->nodes[0];
- if (ret > 0) {
- struct btrfs_key found_key;
- BUG_ON(!path->slots[0]);
- path->slots[0]--;
- btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
- if (found_key.objectid != BTRFS_FREE_SPACE_OBJECTID ||
- found_key.offset != offset) {
- clear_extent_bit(&BTRFS_I(inode)->io_tree, 0,
- inode->i_size - 1,
- EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0,
- NULL, GFP_NOFS);
- btrfs_release_path(path);
- goto out;
- }
- }
-
- BTRFS_I(inode)->generation = trans->transid;
- header = btrfs_item_ptr(leaf, path->slots[0],
- struct btrfs_free_space_header);
- btrfs_set_free_space_entries(leaf, header, entries);
- btrfs_set_free_space_bitmaps(leaf, header, bitmaps);
- btrfs_set_free_space_generation(leaf, header, trans->transid);
- btrfs_mark_buffer_dirty(leaf);
- btrfs_release_path(path);
-
- err = 0;
-out:
- io_ctl_free(&io_ctl);
- if (err) {
- invalidate_inode_pages2(inode->i_mapping);
- BTRFS_I(inode)->generation = 0;
- }
- btrfs_update_inode(trans, root, inode);
- return err;
-
-out_nospc:
- list_for_each_safe(pos, n, &bitmap_list) {
- struct btrfs_free_space *entry =
- list_entry(pos, struct btrfs_free_space, list);
- list_del_init(&entry->list);
- }
- io_ctl_drop_pages(&io_ctl);
- unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0,
- i_size_read(inode) - 1, &cached_state, GFP_NOFS);
- goto out;
-}
-
-int btrfs_write_out_cache(struct btrfs_root *root,
- struct btrfs_trans_handle *trans,
- struct btrfs_block_group_cache *block_group,
- struct btrfs_path *path)
-{
- struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
- struct inode *inode;
- int ret = 0;
-
- root = root->fs_info->tree_root;
-
- spin_lock(&block_group->lock);
- if (block_group->disk_cache_state < BTRFS_DC_SETUP) {
- spin_unlock(&block_group->lock);
- return 0;
- }
- spin_unlock(&block_group->lock);
-
- inode = lookup_free_space_inode(root, block_group, path);
- if (IS_ERR(inode))
- return 0;
-
- ret = __btrfs_write_out_cache(root, inode, ctl, block_group, trans,
- path, block_group->key.objectid);
- if (ret) {
- spin_lock(&block_group->lock);
- block_group->disk_cache_state = BTRFS_DC_ERROR;
- spin_unlock(&block_group->lock);
- ret = 0;
-#ifdef DEBUG
- printk(KERN_ERR "btrfs: failed to write free space cache "
- "for block group %llu\n", block_group->key.objectid);
-#endif
- }
-
- iput(inode);
- return ret;
-}
-
-static inline unsigned long offset_to_bit(u64 bitmap_start, u32 unit,
- u64 offset)
-{
- BUG_ON(offset < bitmap_start);
- offset -= bitmap_start;
- return (unsigned long)(div_u64(offset, unit));
-}
-
-static inline unsigned long bytes_to_bits(u64 bytes, u32 unit)
-{
- return (unsigned long)(div_u64(bytes, unit));
-}
-
-static inline u64 offset_to_bitmap(struct btrfs_free_space_ctl *ctl,
- u64 offset)
-{
- u64 bitmap_start;
- u64 bytes_per_bitmap;
-
- bytes_per_bitmap = BITS_PER_BITMAP * ctl->unit;
- bitmap_start = offset - ctl->start;
- bitmap_start = div64_u64(bitmap_start, bytes_per_bitmap);
- bitmap_start *= bytes_per_bitmap;
- bitmap_start += ctl->start;
-
- return bitmap_start;
-}
-
-static int tree_insert_offset(struct rb_root *root, u64 offset,
- struct rb_node *node, int bitmap)
-{
- struct rb_node **p = &root->rb_node;
- struct rb_node *parent = NULL;
- struct btrfs_free_space *info;
-
- while (*p) {
- parent = *p;
- info = rb_entry(parent, struct btrfs_free_space, offset_index);
-
- if (offset < info->offset) {
- p = &(*p)->rb_left;
- } else if (offset > info->offset) {
- p = &(*p)->rb_right;
- } else {
- /*
- * we could have a bitmap entry and an extent entry
- * share the same offset. If this is the case, we want
- * the extent entry to always be found first if we do a
- * linear search through the tree, since we want to have
- * the quickest allocation time, and allocating from an
- * extent is faster than allocating from a bitmap. So
- * if we're inserting a bitmap and we find an entry at
- * this offset, we want to go right, or after this entry
- * logically. If we are inserting an extent and we've
- * found a bitmap, we want to go left, or before
- * logically.
- */
- if (bitmap) {
- if (info->bitmap) {
- WARN_ON_ONCE(1);
- return -EEXIST;
- }
- p = &(*p)->rb_right;
- } else {
- if (!info->bitmap) {
- WARN_ON_ONCE(1);
- return -EEXIST;
- }
- p = &(*p)->rb_left;
- }
- }
- }
-
- rb_link_node(node, parent, p);
- rb_insert_color(node, root);
-
- return 0;
-}
-
-/*
- * searches the tree for the given offset.
- *
- * fuzzy - If this is set, then we are trying to make an allocation, and we just
- * want a section that has at least bytes size and comes at or after the given
- * offset.
- */
-static struct btrfs_free_space *
-tree_search_offset(struct btrfs_free_space_ctl *ctl,
- u64 offset, int bitmap_only, int fuzzy)
-{
- struct rb_node *n = ctl->free_space_offset.rb_node;
- struct btrfs_free_space *entry, *prev = NULL;
-
- /* find entry that is closest to the 'offset' */
- while (1) {
- if (!n) {
- entry = NULL;
- break;
- }
-
- entry = rb_entry(n, struct btrfs_free_space, offset_index);
- prev = entry;
-
- if (offset < entry->offset)
- n = n->rb_left;
- else if (offset > entry->offset)
- n = n->rb_right;
- else
- break;
- }
-
- if (bitmap_only) {
- if (!entry)
- return NULL;
- if (entry->bitmap)
- return entry;
-
- /*
- * bitmap entry and extent entry may share same offset,
- * in that case, bitmap entry comes after extent entry.
- */
- n = rb_next(n);
- if (!n)
- return NULL;
- entry = rb_entry(n, struct btrfs_free_space, offset_index);
- if (entry->offset != offset)
- return NULL;
-
- WARN_ON(!entry->bitmap);
- return entry;
- } else if (entry) {
- if (entry->bitmap) {
- /*
- * if previous extent entry covers the offset,
- * we should return it instead of the bitmap entry
- */
- n = &entry->offset_index;
- while (1) {
- n = rb_prev(n);
- if (!n)
- break;
- prev = rb_entry(n, struct btrfs_free_space,
- offset_index);
- if (!prev->bitmap) {
- if (prev->offset + prev->bytes > offset)
- entry = prev;
- break;
- }
- }
- }
- return entry;
- }
-
- if (!prev)
- return NULL;
-
- /* find last entry before the 'offset' */
- entry = prev;
- if (entry->offset > offset) {
- n = rb_prev(&entry->offset_index);
- if (n) {
- entry = rb_entry(n, struct btrfs_free_space,
- offset_index);
- BUG_ON(entry->offset > offset);
- } else {
- if (fuzzy)
- return entry;
- else
- return NULL;
- }
- }
-
- if (entry->bitmap) {
- n = &entry->offset_index;
- while (1) {
- n = rb_prev(n);
- if (!n)
- break;
- prev = rb_entry(n, struct btrfs_free_space,
- offset_index);
- if (!prev->bitmap) {
- if (prev->offset + prev->bytes > offset)
- return prev;
- break;
- }
- }
- if (entry->offset + BITS_PER_BITMAP * ctl->unit > offset)
- return entry;
- } else if (entry->offset + entry->bytes > offset)
- return entry;
-
- if (!fuzzy)
- return NULL;
-
- while (1) {
- if (entry->bitmap) {
- if (entry->offset + BITS_PER_BITMAP *
- ctl->unit > offset)
- break;
- } else {
- if (entry->offset + entry->bytes > offset)
- break;
- }
-
- n = rb_next(&entry->offset_index);
- if (!n)
- return NULL;
- entry = rb_entry(n, struct btrfs_free_space, offset_index);
- }
- return entry;
-}
-
-static inline void
-__unlink_free_space(struct btrfs_free_space_ctl *ctl,
- struct btrfs_free_space *info)
-{
- rb_erase(&info->offset_index, &ctl->free_space_offset);
- ctl->free_extents--;
-}
-
-static void unlink_free_space(struct btrfs_free_space_ctl *ctl,
- struct btrfs_free_space *info)
-{
- __unlink_free_space(ctl, info);
- ctl->free_space -= info->bytes;
-}
-
-static int link_free_space(struct btrfs_free_space_ctl *ctl,
- struct btrfs_free_space *info)
-{
- int ret = 0;
-
- BUG_ON(!info->bitmap && !info->bytes);
- ret = tree_insert_offset(&ctl->free_space_offset, info->offset,
- &info->offset_index, (info->bitmap != NULL));
- if (ret)
- return ret;
-
- ctl->free_space += info->bytes;
- ctl->free_extents++;
- return ret;
-}
-
-static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl)
-{
- struct btrfs_block_group_cache *block_group = ctl->private;
- u64 max_bytes;
- u64 bitmap_bytes;
- u64 extent_bytes;
- u64 size = block_group->key.offset;
- u64 bytes_per_bg = BITS_PER_BITMAP * block_group->sectorsize;
- int max_bitmaps = div64_u64(size + bytes_per_bg - 1, bytes_per_bg);
-
- BUG_ON(ctl->total_bitmaps > max_bitmaps);
-
- /*
- * The goal is to keep the total amount of memory used per 1gb of space
- * at or below 32k, so we need to adjust how much memory we allow to be
- * used by extent based free space tracking
- */
- if (size < 1024 * 1024 * 1024)
- max_bytes = MAX_CACHE_BYTES_PER_GIG;
- else
- max_bytes = MAX_CACHE_BYTES_PER_GIG *
- div64_u64(size, 1024 * 1024 * 1024);
-
- /*
- * we want to account for 1 more bitmap than what we have so we can make
- * sure we don't go over our overall goal of MAX_CACHE_BYTES_PER_GIG as
- * we add more bitmaps.
- */
- bitmap_bytes = (ctl->total_bitmaps + 1) * PAGE_CACHE_SIZE;
-
- if (bitmap_bytes >= max_bytes) {
- ctl->extents_thresh = 0;
- return;
- }
-
- /*
- * we want the extent entry threshold to always be at most 1/2 the maxw
- * bytes we can have, or whatever is less than that.
- */
- extent_bytes = max_bytes - bitmap_bytes;
- extent_bytes = min_t(u64, extent_bytes, div64_u64(max_bytes, 2));
-
- ctl->extents_thresh =
- div64_u64(extent_bytes, (sizeof(struct btrfs_free_space)));
-}
-
-static inline void __bitmap_clear_bits(struct btrfs_free_space_ctl *ctl,
- struct btrfs_free_space *info,
- u64 offset, u64 bytes)
-{
- unsigned long start, count;
-
- start = offset_to_bit(info->offset, ctl->unit, offset);
- count = bytes_to_bits(bytes, ctl->unit);
- BUG_ON(start + count > BITS_PER_BITMAP);
-
- bitmap_clear(info->bitmap, start, count);
-
- info->bytes -= bytes;
-}
-
-static void bitmap_clear_bits(struct btrfs_free_space_ctl *ctl,
- struct btrfs_free_space *info, u64 offset,
- u64 bytes)
-{
- __bitmap_clear_bits(ctl, info, offset, bytes);
- ctl->free_space -= bytes;
-}
-
-static void bitmap_set_bits(struct btrfs_free_space_ctl *ctl,
- struct btrfs_free_space *info, u64 offset,
- u64 bytes)
-{
- unsigned long start, count;
-
- start = offset_to_bit(info->offset, ctl->unit, offset);
- count = bytes_to_bits(bytes, ctl->unit);
- BUG_ON(start + count > BITS_PER_BITMAP);
-
- bitmap_set(info->bitmap, start, count);
-
- info->bytes += bytes;
- ctl->free_space += bytes;
-}
-
-static int search_bitmap(struct btrfs_free_space_ctl *ctl,
- struct btrfs_free_space *bitmap_info, u64 *offset,
- u64 *bytes)
-{
- unsigned long found_bits = 0;
- unsigned long bits, i;
- unsigned long next_zero;
-
- i = offset_to_bit(bitmap_info->offset, ctl->unit,
- max_t(u64, *offset, bitmap_info->offset));
- bits = bytes_to_bits(*bytes, ctl->unit);
-
- for (i = find_next_bit(bitmap_info->bitmap, BITS_PER_BITMAP, i);
- i < BITS_PER_BITMAP;
- i = find_next_bit(bitmap_info->bitmap, BITS_PER_BITMAP, i + 1)) {
- next_zero = find_next_zero_bit(bitmap_info->bitmap,
- BITS_PER_BITMAP, i);
- if ((next_zero - i) >= bits) {
- found_bits = next_zero - i;
- break;
- }
- i = next_zero;
- }
-
- if (found_bits) {
- *offset = (u64)(i * ctl->unit) + bitmap_info->offset;
- *bytes = (u64)(found_bits) * ctl->unit;
- return 0;
- }
-
- return -1;
-}
-
-static struct btrfs_free_space *
-find_free_space(struct btrfs_free_space_ctl *ctl, u64 *offset, u64 *bytes)
-{
- struct btrfs_free_space *entry;
- struct rb_node *node;
- int ret;
-
- if (!ctl->free_space_offset.rb_node)
- return NULL;
-
- entry = tree_search_offset(ctl, offset_to_bitmap(ctl, *offset), 0, 1);
- if (!entry)
- return NULL;
-
- for (node = &entry->offset_index; node; node = rb_next(node)) {
- entry = rb_entry(node, struct btrfs_free_space, offset_index);
- if (entry->bytes < *bytes)
- continue;
-
- if (entry->bitmap) {
- ret = search_bitmap(ctl, entry, offset, bytes);
- if (!ret)
- return entry;
- continue;
- }
-
- *offset = entry->offset;
- *bytes = entry->bytes;
- return entry;
- }
-
- return NULL;
-}
-
-static void add_new_bitmap(struct btrfs_free_space_ctl *ctl,
- struct btrfs_free_space *info, u64 offset)
-{
- info->offset = offset_to_bitmap(ctl, offset);
- info->bytes = 0;
- INIT_LIST_HEAD(&info->list);
- link_free_space(ctl, info);
- ctl->total_bitmaps++;
-
- ctl->op->recalc_thresholds(ctl);
-}
-
-static void free_bitmap(struct btrfs_free_space_ctl *ctl,
- struct btrfs_free_space *bitmap_info)
-{
- unlink_free_space(ctl, bitmap_info);
- kfree(bitmap_info->bitmap);
- kmem_cache_free(btrfs_free_space_cachep, bitmap_info);
- ctl->total_bitmaps--;
- ctl->op->recalc_thresholds(ctl);
-}
-
-static noinline int remove_from_bitmap(struct btrfs_free_space_ctl *ctl,
- struct btrfs_free_space *bitmap_info,
- u64 *offset, u64 *bytes)
-{
- u64 end;
- u64 search_start, search_bytes;
- int ret;
-
-again:
- end = bitmap_info->offset + (u64)(BITS_PER_BITMAP * ctl->unit) - 1;
-
- /*
- * XXX - this can go away after a few releases.
- *
- * since the only user of btrfs_remove_free_space is the tree logging
- * stuff, and the only way to test that is under crash conditions, we
- * want to have this debug stuff here just in case somethings not
- * working. Search the bitmap for the space we are trying to use to
- * make sure its actually there. If its not there then we need to stop
- * because something has gone wrong.
- */
- search_start = *offset;
- search_bytes = *bytes;
- search_bytes = min(search_bytes, end - search_start + 1);
- ret = search_bitmap(ctl, bitmap_info, &search_start, &search_bytes);
- BUG_ON(ret < 0 || search_start != *offset);
-
- if (*offset > bitmap_info->offset && *offset + *bytes > end) {
- bitmap_clear_bits(ctl, bitmap_info, *offset, end - *offset + 1);
- *bytes -= end - *offset + 1;
- *offset = end + 1;
- } else if (*offset >= bitmap_info->offset && *offset + *bytes <= end) {
- bitmap_clear_bits(ctl, bitmap_info, *offset, *bytes);
- *bytes = 0;
- }
-
- if (*bytes) {
- struct rb_node *next = rb_next(&bitmap_info->offset_index);
- if (!bitmap_info->bytes)
- free_bitmap(ctl, bitmap_info);
-
- /*
- * no entry after this bitmap, but we still have bytes to
- * remove, so something has gone wrong.
- */
- if (!next)
- return -EINVAL;
-
- bitmap_info = rb_entry(next, struct btrfs_free_space,
- offset_index);
-
- /*
- * if the next entry isn't a bitmap we need to return to let the
- * extent stuff do its work.
- */
- if (!bitmap_info->bitmap)
- return -EAGAIN;
-
- /*
- * Ok the next item is a bitmap, but it may not actually hold
- * the information for the rest of this free space stuff, so
- * look for it, and if we don't find it return so we can try
- * everything over again.
- */
- search_start = *offset;
- search_bytes = *bytes;
- ret = search_bitmap(ctl, bitmap_info, &search_start,
- &search_bytes);
- if (ret < 0 || search_start != *offset)
- return -EAGAIN;
-
- goto again;
- } else if (!bitmap_info->bytes)
- free_bitmap(ctl, bitmap_info);
-
- return 0;
-}
-
-static u64 add_bytes_to_bitmap(struct btrfs_free_space_ctl *ctl,
- struct btrfs_free_space *info, u64 offset,
- u64 bytes)
-{
- u64 bytes_to_set = 0;
- u64 end;
-
- end = info->offset + (u64)(BITS_PER_BITMAP * ctl->unit);
-
- bytes_to_set = min(end - offset, bytes);
-
- bitmap_set_bits(ctl, info, offset, bytes_to_set);
-
- return bytes_to_set;
-
-}
-
-static bool use_bitmap(struct btrfs_free_space_ctl *ctl,
- struct btrfs_free_space *info)
-{
- struct btrfs_block_group_cache *block_group = ctl->private;
-
- /*
- * If we are below the extents threshold then we can add this as an
- * extent, and don't have to deal with the bitmap
- */
- if (ctl->free_extents < ctl->extents_thresh) {
- /*
- * If this block group has some small extents we don't want to
- * use up all of our free slots in the cache with them, we want
- * to reserve them to larger extents, however if we have plent
- * of cache left then go ahead an dadd them, no sense in adding
- * the overhead of a bitmap if we don't have to.
- */
- if (info->bytes <= block_group->sectorsize * 4) {
- if (ctl->free_extents * 2 <= ctl->extents_thresh)
- return false;
- } else {
- return false;
- }
- }
-
- /*
- * some block groups are so tiny they can't be enveloped by a bitmap, so
- * don't even bother to create a bitmap for this
- */
- if (BITS_PER_BITMAP * block_group->sectorsize >
- block_group->key.offset)
- return false;
-
- return true;
-}
-
-static struct btrfs_free_space_op free_space_op = {
- .recalc_thresholds = recalculate_thresholds,
- .use_bitmap = use_bitmap,
-};
-
-static int insert_into_bitmap(struct btrfs_free_space_ctl *ctl,
- struct btrfs_free_space *info)
-{
- struct btrfs_free_space *bitmap_info;
- struct btrfs_block_group_cache *block_group = NULL;
- int added = 0;
- u64 bytes, offset, bytes_added;
- int ret;
-
- bytes = info->bytes;
- offset = info->offset;
-
- if (!ctl->op->use_bitmap(ctl, info))
- return 0;
-
- if (ctl->op == &free_space_op)
- block_group = ctl->private;
-again:
- /*
- * Since we link bitmaps right into the cluster we need to see if we
- * have a cluster here, and if so and it has our bitmap we need to add
- * the free space to that bitmap.
- */
- if (block_group && !list_empty(&block_group->cluster_list)) {
- struct btrfs_free_cluster *cluster;
- struct rb_node *node;
- struct btrfs_free_space *entry;
-
- cluster = list_entry(block_group->cluster_list.next,
- struct btrfs_free_cluster,
- block_group_list);
- spin_lock(&cluster->lock);
- node = rb_first(&cluster->root);
- if (!node) {
- spin_unlock(&cluster->lock);
- goto no_cluster_bitmap;
- }
-
- entry = rb_entry(node, struct btrfs_free_space, offset_index);
- if (!entry->bitmap) {
- spin_unlock(&cluster->lock);
- goto no_cluster_bitmap;
- }
-
- if (entry->offset == offset_to_bitmap(ctl, offset)) {
- bytes_added = add_bytes_to_bitmap(ctl, entry,
- offset, bytes);
- bytes -= bytes_added;
- offset += bytes_added;
- }
- spin_unlock(&cluster->lock);
- if (!bytes) {
- ret = 1;
- goto out;
- }
- }
-
-no_cluster_bitmap:
- bitmap_info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset),
- 1, 0);
- if (!bitmap_info) {
- BUG_ON(added);
- goto new_bitmap;
- }
-
- bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes);
- bytes -= bytes_added;
- offset += bytes_added;
- added = 0;
-
- if (!bytes) {
- ret = 1;
- goto out;
- } else
- goto again;
-
-new_bitmap:
- if (info && info->bitmap) {
- add_new_bitmap(ctl, info, offset);
- added = 1;
- info = NULL;
- goto again;
- } else {
- spin_unlock(&ctl->tree_lock);
-
- /* no pre-allocated info, allocate a new one */
- if (!info) {
- info = kmem_cache_zalloc(btrfs_free_space_cachep,
- GFP_NOFS);
- if (!info) {
- spin_lock(&ctl->tree_lock);
- ret = -ENOMEM;
- goto out;
- }
- }
-
- /* allocate the bitmap */
- info->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS);
- spin_lock(&ctl->tree_lock);
- if (!info->bitmap) {
- ret = -ENOMEM;
- goto out;
- }
- goto again;
- }
-
-out:
- if (info) {
- if (info->bitmap)
- kfree(info->bitmap);
- kmem_cache_free(btrfs_free_space_cachep, info);
- }
-
- return ret;
-}
-
-static bool try_merge_free_space(struct btrfs_free_space_ctl *ctl,
- struct btrfs_free_space *info, bool update_stat)
-{
- struct btrfs_free_space *left_info;
- struct btrfs_free_space *right_info;
- bool merged = false;
- u64 offset = info->offset;
- u64 bytes = info->bytes;
-
- /*
- * first we want to see if there is free space adjacent to the range we
- * are adding, if there is remove that struct and add a new one to
- * cover the entire range
- */
- right_info = tree_search_offset(ctl, offset + bytes, 0, 0);
- if (right_info && rb_prev(&right_info->offset_index))
- left_info = rb_entry(rb_prev(&right_info->offset_index),
- struct btrfs_free_space, offset_index);
- else
- left_info = tree_search_offset(ctl, offset - 1, 0, 0);
-
- if (right_info && !right_info->bitmap) {
- if (update_stat)
- unlink_free_space(ctl, right_info);
- else
- __unlink_free_space(ctl, right_info);
- info->bytes += right_info->bytes;
- kmem_cache_free(btrfs_free_space_cachep, right_info);
- merged = true;
- }
-
- if (left_info && !left_info->bitmap &&
- left_info->offset + left_info->bytes == offset) {
- if (update_stat)
- unlink_free_space(ctl, left_info);
- else
- __unlink_free_space(ctl, left_info);
- info->offset = left_info->offset;
- info->bytes += left_info->bytes;
- kmem_cache_free(btrfs_free_space_cachep, left_info);
- merged = true;
- }
-
- return merged;
-}
-
-int __btrfs_add_free_space(struct btrfs_free_space_ctl *ctl,
- u64 offset, u64 bytes)
-{
- struct btrfs_free_space *info;
- int ret = 0;
-
- info = kmem_cache_zalloc(btrfs_free_space_cachep, GFP_NOFS);
- if (!info)
- return -ENOMEM;
-
- info->offset = offset;
- info->bytes = bytes;
-
- spin_lock(&ctl->tree_lock);
-
- if (try_merge_free_space(ctl, info, true))
- goto link;
-
- /*
- * There was no extent directly to the left or right of this new
- * extent then we know we're going to have to allocate a new extent, so
- * before we do that see if we need to drop this into a bitmap
- */
- ret = insert_into_bitmap(ctl, info);
- if (ret < 0) {
- goto out;
- } else if (ret) {
- ret = 0;
- goto out;
- }
-link:
- ret = link_free_space(ctl, info);
- if (ret)
- kmem_cache_free(btrfs_free_space_cachep, info);
-out:
- spin_unlock(&ctl->tree_lock);
-
- if (ret) {
- printk(KERN_CRIT "btrfs: unable to add free space :%d\n", ret);
- BUG_ON(ret == -EEXIST);
- }
-
- return ret;
-}
-
-int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group,
- u64 offset, u64 bytes)
-{
- struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
- struct btrfs_free_space *info;
- struct btrfs_free_space *next_info = NULL;
- int ret = 0;
-
- spin_lock(&ctl->tree_lock);
-
-again:
- info = tree_search_offset(ctl, offset, 0, 0);
- if (!info) {
- /*
- * oops didn't find an extent that matched the space we wanted
- * to remove, look for a bitmap instead
- */
- info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset),
- 1, 0);
- if (!info) {
- /* the tree logging code might be calling us before we
- * have fully loaded the free space rbtree for this
- * block group. So it is possible the entry won't
- * be in the rbtree yet at all. The caching code
- * will make sure not to put it in the rbtree if
- * the logging code has pinned it.
- */
- goto out_lock;
- }
- }
-
- if (info->bytes < bytes && rb_next(&info->offset_index)) {
- u64 end;
- next_info = rb_entry(rb_next(&info->offset_index),
- struct btrfs_free_space,
- offset_index);
-
- if (next_info->bitmap)
- end = next_info->offset +
- BITS_PER_BITMAP * ctl->unit - 1;
- else
- end = next_info->offset + next_info->bytes;
-
- if (next_info->bytes < bytes ||
- next_info->offset > offset || offset > end) {
- printk(KERN_CRIT "Found free space at %llu, size %llu,"
- " trying to use %llu\n",
- (unsigned long long)info->offset,
- (unsigned long long)info->bytes,
- (unsigned long long)bytes);
- WARN_ON(1);
- ret = -EINVAL;
- goto out_lock;
- }
-
- info = next_info;
- }
-
- if (info->bytes == bytes) {
- unlink_free_space(ctl, info);
- if (info->bitmap) {
- kfree(info->bitmap);
- ctl->total_bitmaps--;
- }
- kmem_cache_free(btrfs_free_space_cachep, info);
- ret = 0;
- goto out_lock;
- }
-
- if (!info->bitmap && info->offset == offset) {
- unlink_free_space(ctl, info);
- info->offset += bytes;
- info->bytes -= bytes;
- ret = link_free_space(ctl, info);
- WARN_ON(ret);
- goto out_lock;
- }
-
- if (!info->bitmap && info->offset <= offset &&
- info->offset + info->bytes >= offset + bytes) {
- u64 old_start = info->offset;
- /*
- * we're freeing space in the middle of the info,
- * this can happen during tree log replay
- *
- * first unlink the old info and then
- * insert it again after the hole we're creating
- */
- unlink_free_space(ctl, info);
- if (offset + bytes < info->offset + info->bytes) {
- u64 old_end = info->offset + info->bytes;
-
- info->offset = offset + bytes;
- info->bytes = old_end - info->offset;
- ret = link_free_space(ctl, info);
- WARN_ON(ret);
- if (ret)
- goto out_lock;
- } else {
- /* the hole we're creating ends at the end
- * of the info struct, just free the info
- */
- kmem_cache_free(btrfs_free_space_cachep, info);
- }
- spin_unlock(&ctl->tree_lock);
-
- /* step two, insert a new info struct to cover
- * anything before the hole
- */
- ret = btrfs_add_free_space(block_group, old_start,
- offset - old_start);
- WARN_ON(ret); /* -ENOMEM */
- goto out;
- }
-
- ret = remove_from_bitmap(ctl, info, &offset, &bytes);
- if (ret == -EAGAIN)
- goto again;
- BUG_ON(ret); /* logic error */
-out_lock:
- spin_unlock(&ctl->tree_lock);
-out:
- return ret;
-}
-
-void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group,
- u64 bytes)
-{
- struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
- struct btrfs_free_space *info;
- struct rb_node *n;
- int count = 0;
-
- for (n = rb_first(&ctl->free_space_offset); n; n = rb_next(n)) {
- info = rb_entry(n, struct btrfs_free_space, offset_index);
- if (info->bytes >= bytes)
- count++;
- printk(KERN_CRIT "entry offset %llu, bytes %llu, bitmap %s\n",
- (unsigned long long)info->offset,
- (unsigned long long)info->bytes,
- (info->bitmap) ? "yes" : "no");
- }
- printk(KERN_INFO "block group has cluster?: %s\n",
- list_empty(&block_group->cluster_list) ? "no" : "yes");
- printk(KERN_INFO "%d blocks of free space at or bigger than bytes is"
- "\n", count);
-}
-
-void btrfs_init_free_space_ctl(struct btrfs_block_group_cache *block_group)
-{
- struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
-
- spin_lock_init(&ctl->tree_lock);
- ctl->unit = block_group->sectorsize;
- ctl->start = block_group->key.objectid;
- ctl->private = block_group;
- ctl->op = &free_space_op;
-
- /*
- * we only want to have 32k of ram per block group for keeping
- * track of free space, and if we pass 1/2 of that we want to
- * start converting things over to using bitmaps
- */
- ctl->extents_thresh = ((1024 * 32) / 2) /
- sizeof(struct btrfs_free_space);
-}
-
-/*
- * for a given cluster, put all of its extents back into the free
- * space cache. If the block group passed doesn't match the block group
- * pointed to by the cluster, someone else raced in and freed the
- * cluster already. In that case, we just return without changing anything
- */
-static int
-__btrfs_return_cluster_to_free_space(
- struct btrfs_block_group_cache *block_group,
- struct btrfs_free_cluster *cluster)
-{
- struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
- struct btrfs_free_space *entry;
- struct rb_node *node;
-
- spin_lock(&cluster->lock);
- if (cluster->block_group != block_group)
- goto out;
-
- cluster->block_group = NULL;
- cluster->window_start = 0;
- list_del_init(&cluster->block_group_list);
-
- node = rb_first(&cluster->root);
- while (node) {
- bool bitmap;
-
- entry = rb_entry(node, struct btrfs_free_space, offset_index);
- node = rb_next(&entry->offset_index);
- rb_erase(&entry->offset_index, &cluster->root);
-
- bitmap = (entry->bitmap != NULL);
- if (!bitmap)
- try_merge_free_space(ctl, entry, false);
- tree_insert_offset(&ctl->free_space_offset,
- entry->offset, &entry->offset_index, bitmap);
- }
- cluster->root = RB_ROOT;
-
-out:
- spin_unlock(&cluster->lock);
- btrfs_put_block_group(block_group);
- return 0;
-}
-
-void __btrfs_remove_free_space_cache_locked(struct btrfs_free_space_ctl *ctl)
-{
- struct btrfs_free_space *info;
- struct rb_node *node;
-
- while ((node = rb_last(&ctl->free_space_offset)) != NULL) {
- info = rb_entry(node, struct btrfs_free_space, offset_index);
- if (!info->bitmap) {
- unlink_free_space(ctl, info);
- kmem_cache_free(btrfs_free_space_cachep, info);
- } else {
- free_bitmap(ctl, info);
- }
- if (need_resched()) {
- spin_unlock(&ctl->tree_lock);
- cond_resched();
- spin_lock(&ctl->tree_lock);
- }
- }
-}
-
-void __btrfs_remove_free_space_cache(struct btrfs_free_space_ctl *ctl)
-{
- spin_lock(&ctl->tree_lock);
- __btrfs_remove_free_space_cache_locked(ctl);
- spin_unlock(&ctl->tree_lock);
-}
-
-void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group)
-{
- struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
- struct btrfs_free_cluster *cluster;
- struct list_head *head;
-
- spin_lock(&ctl->tree_lock);
- while ((head = block_group->cluster_list.next) !=
- &block_group->cluster_list) {
- cluster = list_entry(head, struct btrfs_free_cluster,
- block_group_list);
-
- WARN_ON(cluster->block_group != block_group);
- __btrfs_return_cluster_to_free_space(block_group, cluster);
- if (need_resched()) {
- spin_unlock(&ctl->tree_lock);
- cond_resched();
- spin_lock(&ctl->tree_lock);
- }
- }
- __btrfs_remove_free_space_cache_locked(ctl);
- spin_unlock(&ctl->tree_lock);
-
-}
-
-u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group,
- u64 offset, u64 bytes, u64 empty_size)
-{
- struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
- struct btrfs_free_space *entry = NULL;
- u64 bytes_search = bytes + empty_size;
- u64 ret = 0;
-
- spin_lock(&ctl->tree_lock);
- entry = find_free_space(ctl, &offset, &bytes_search);
- if (!entry)
- goto out;
-
- ret = offset;
- if (entry->bitmap) {
- bitmap_clear_bits(ctl, entry, offset, bytes);
- if (!entry->bytes)
- free_bitmap(ctl, entry);
- } else {
- unlink_free_space(ctl, entry);
- entry->offset += bytes;
- entry->bytes -= bytes;
- if (!entry->bytes)
- kmem_cache_free(btrfs_free_space_cachep, entry);
- else
- link_free_space(ctl, entry);
- }
-
-out:
- spin_unlock(&ctl->tree_lock);
-
- return ret;
-}
-
-/*
- * given a cluster, put all of its extents back into the free space
- * cache. If a block group is passed, this function will only free
- * a cluster that belongs to the passed block group.
- *
- * Otherwise, it'll get a reference on the block group pointed to by the
- * cluster and remove the cluster from it.
- */
-int btrfs_return_cluster_to_free_space(
- struct btrfs_block_group_cache *block_group,
- struct btrfs_free_cluster *cluster)
-{
- struct btrfs_free_space_ctl *ctl;
- int ret;
-
- /* first, get a safe pointer to the block group */
- spin_lock(&cluster->lock);
- if (!block_group) {
- block_group = cluster->block_group;
- if (!block_group) {
- spin_unlock(&cluster->lock);
- return 0;
- }
- } else if (cluster->block_group != block_group) {
- /* someone else has already freed it don't redo their work */
- spin_unlock(&cluster->lock);
- return 0;
- }
- atomic_inc(&block_group->count);
- spin_unlock(&cluster->lock);
-
- ctl = block_group->free_space_ctl;
-
- /* now return any extents the cluster had on it */
- spin_lock(&ctl->tree_lock);
- ret = __btrfs_return_cluster_to_free_space(block_group, cluster);
- spin_unlock(&ctl->tree_lock);
-
- /* finally drop our ref */
- btrfs_put_block_group(block_group);
- return ret;
-}
-
-static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group,
- struct btrfs_free_cluster *cluster,
- struct btrfs_free_space *entry,
- u64 bytes, u64 min_start)
-{
- struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
- int err;
- u64 search_start = cluster->window_start;
- u64 search_bytes = bytes;
- u64 ret = 0;
-
- search_start = min_start;
- search_bytes = bytes;
-
- err = search_bitmap(ctl, entry, &search_start, &search_bytes);
- if (err)
- return 0;
-
- ret = search_start;
- __bitmap_clear_bits(ctl, entry, ret, bytes);
-
- return ret;
-}
-
-/*
- * given a cluster, try to allocate 'bytes' from it, returns 0
- * if it couldn't find anything suitably large, or a logical disk offset
- * if things worked out
- */
-u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group,
- struct btrfs_free_cluster *cluster, u64 bytes,
- u64 min_start)
-{
- struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
- struct btrfs_free_space *entry = NULL;
- struct rb_node *node;
- u64 ret = 0;
-
- spin_lock(&cluster->lock);
- if (bytes > cluster->max_size)
- goto out;
-
- if (cluster->block_group != block_group)
- goto out;
-
- node = rb_first(&cluster->root);
- if (!node)
- goto out;
-
- entry = rb_entry(node, struct btrfs_free_space, offset_index);
- while(1) {
- if (entry->bytes < bytes ||
- (!entry->bitmap && entry->offset < min_start)) {
- node = rb_next(&entry->offset_index);
- if (!node)
- break;
- entry = rb_entry(node, struct btrfs_free_space,
- offset_index);
- continue;
- }
-
- if (entry->bitmap) {
- ret = btrfs_alloc_from_bitmap(block_group,
- cluster, entry, bytes,
- cluster->window_start);
- if (ret == 0) {
- node = rb_next(&entry->offset_index);
- if (!node)
- break;
- entry = rb_entry(node, struct btrfs_free_space,
- offset_index);
- continue;
- }
- cluster->window_start += bytes;
- } else {
- ret = entry->offset;
-
- entry->offset += bytes;
- entry->bytes -= bytes;
- }
-
- if (entry->bytes == 0)
- rb_erase(&entry->offset_index, &cluster->root);
- break;
- }
-out:
- spin_unlock(&cluster->lock);
-
- if (!ret)
- return 0;
-
- spin_lock(&ctl->tree_lock);
-
- ctl->free_space -= bytes;
- if (entry->bytes == 0) {
- ctl->free_extents--;
- if (entry->bitmap) {
- kfree(entry->bitmap);
- ctl->total_bitmaps--;
- ctl->op->recalc_thresholds(ctl);
- }
- kmem_cache_free(btrfs_free_space_cachep, entry);
- }
-
- spin_unlock(&ctl->tree_lock);
-
- return ret;
-}
-
-static int btrfs_bitmap_cluster(struct btrfs_block_group_cache *block_group,
- struct btrfs_free_space *entry,
- struct btrfs_free_cluster *cluster,
- u64 offset, u64 bytes,
- u64 cont1_bytes, u64 min_bytes)
-{
- struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
- unsigned long next_zero;
- unsigned long i;
- unsigned long want_bits;
- unsigned long min_bits;
- unsigned long found_bits;
- unsigned long start = 0;
- unsigned long total_found = 0;
- int ret;
-
- i = offset_to_bit(entry->offset, block_group->sectorsize,
- max_t(u64, offset, entry->offset));
- want_bits = bytes_to_bits(bytes, block_group->sectorsize);
- min_bits = bytes_to_bits(min_bytes, block_group->sectorsize);
-
-again:
- found_bits = 0;
- for (i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, i);
- i < BITS_PER_BITMAP;
- i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, i + 1)) {
- next_zero = find_next_zero_bit(entry->bitmap,
- BITS_PER_BITMAP, i);
- if (next_zero - i >= min_bits) {
- found_bits = next_zero - i;
- break;
- }
- i = next_zero;
- }
-
- if (!found_bits)
- return -ENOSPC;
-
- if (!total_found) {
- start = i;
- cluster->max_size = 0;
- }
-
- total_found += found_bits;
-
- if (cluster->max_size < found_bits * block_group->sectorsize)
- cluster->max_size = found_bits * block_group->sectorsize;
-
- if (total_found < want_bits || cluster->max_size < cont1_bytes) {
- i = next_zero + 1;
- goto again;
- }
-
- cluster->window_start = start * block_group->sectorsize +
- entry->offset;
- rb_erase(&entry->offset_index, &ctl->free_space_offset);
- ret = tree_insert_offset(&cluster->root, entry->offset,
- &entry->offset_index, 1);
- BUG_ON(ret); /* -EEXIST; Logic error */
-
- trace_btrfs_setup_cluster(block_group, cluster,
- total_found * block_group->sectorsize, 1);
- return 0;
-}
-
-/*
- * This searches the block group for just extents to fill the cluster with.
- * Try to find a cluster with at least bytes total bytes, at least one
- * extent of cont1_bytes, and other clusters of at least min_bytes.
- */
-static noinline int
-setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group,
- struct btrfs_free_cluster *cluster,
- struct list_head *bitmaps, u64 offset, u64 bytes,
- u64 cont1_bytes, u64 min_bytes)
-{
- struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
- struct btrfs_free_space *first = NULL;
- struct btrfs_free_space *entry = NULL;
- struct btrfs_free_space *last;
- struct rb_node *node;
- u64 window_start;
- u64 window_free;
- u64 max_extent;
- u64 total_size = 0;
-
- entry = tree_search_offset(ctl, offset, 0, 1);
- if (!entry)
- return -ENOSPC;
-
- /*
- * We don't want bitmaps, so just move along until we find a normal
- * extent entry.
- */
- while (entry->bitmap || entry->bytes < min_bytes) {
- if (entry->bitmap && list_empty(&entry->list))
- list_add_tail(&entry->list, bitmaps);
- node = rb_next(&entry->offset_index);
- if (!node)
- return -ENOSPC;
- entry = rb_entry(node, struct btrfs_free_space, offset_index);
- }
-
- window_start = entry->offset;
- window_free = entry->bytes;
- max_extent = entry->bytes;
- first = entry;
- last = entry;
-
- for (node = rb_next(&entry->offset_index); node;
- node = rb_next(&entry->offset_index)) {
- entry = rb_entry(node, struct btrfs_free_space, offset_index);
-
- if (entry->bitmap) {
- if (list_empty(&entry->list))
- list_add_tail(&entry->list, bitmaps);
- continue;
- }
-
- if (entry->bytes < min_bytes)
- continue;
-
- last = entry;
- window_free += entry->bytes;
- if (entry->bytes > max_extent)
- max_extent = entry->bytes;
- }
-
- if (window_free < bytes || max_extent < cont1_bytes)
- return -ENOSPC;
-
- cluster->window_start = first->offset;
-
- node = &first->offset_index;
-
- /*
- * now we've found our entries, pull them out of the free space
- * cache and put them into the cluster rbtree
- */
- do {
- int ret;
-
- entry = rb_entry(node, struct btrfs_free_space, offset_index);
- node = rb_next(&entry->offset_index);
- if (entry->bitmap || entry->bytes < min_bytes)
- continue;
-
- rb_erase(&entry->offset_index, &ctl->free_space_offset);
- ret = tree_insert_offset(&cluster->root, entry->offset,
- &entry->offset_index, 0);
- total_size += entry->bytes;
- BUG_ON(ret); /* -EEXIST; Logic error */
- } while (node && entry != last);
-
- cluster->max_size = max_extent;
- trace_btrfs_setup_cluster(block_group, cluster, total_size, 0);
- return 0;
-}
-
-/*
- * This specifically looks for bitmaps that may work in the cluster, we assume
- * that we have already failed to find extents that will work.
- */
-static noinline int
-setup_cluster_bitmap(struct btrfs_block_group_cache *block_group,
- struct btrfs_free_cluster *cluster,
- struct list_head *bitmaps, u64 offset, u64 bytes,
- u64 cont1_bytes, u64 min_bytes)
-{
- struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
- struct btrfs_free_space *entry;
- int ret = -ENOSPC;
- u64 bitmap_offset = offset_to_bitmap(ctl, offset);
-
- if (ctl->total_bitmaps == 0)
- return -ENOSPC;
-
- /*
- * The bitmap that covers offset won't be in the list unless offset
- * is just its start offset.
- */
- entry = list_first_entry(bitmaps, struct btrfs_free_space, list);
- if (entry->offset != bitmap_offset) {
- entry = tree_search_offset(ctl, bitmap_offset, 1, 0);
- if (entry && list_empty(&entry->list))
- list_add(&entry->list, bitmaps);
- }
-
- list_for_each_entry(entry, bitmaps, list) {
- if (entry->bytes < bytes)
- continue;
- ret = btrfs_bitmap_cluster(block_group, entry, cluster, offset,
- bytes, cont1_bytes, min_bytes);
- if (!ret)
- return 0;
- }
-
- /*
- * The bitmaps list has all the bitmaps that record free space
- * starting after offset, so no more search is required.
- */
- return -ENOSPC;
-}
-
-/*
- * here we try to find a cluster of blocks in a block group. The goal
- * is to find at least bytes+empty_size.
- * We might not find them all in one contiguous area.
- *
- * returns zero and sets up cluster if things worked out, otherwise
- * it returns -enospc
- */
-int btrfs_find_space_cluster(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_block_group_cache *block_group,
- struct btrfs_free_cluster *cluster,
- u64 offset, u64 bytes, u64 empty_size)
-{
- struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
- struct btrfs_free_space *entry, *tmp;
- LIST_HEAD(bitmaps);
- u64 min_bytes;
- u64 cont1_bytes;
- int ret;
-
- /*
- * Choose the minimum extent size we'll require for this
- * cluster. For SSD_SPREAD, don't allow any fragmentation.
- * For metadata, allow allocates with smaller extents. For
- * data, keep it dense.
- */
- if (btrfs_test_opt(root, SSD_SPREAD)) {
- cont1_bytes = min_bytes = bytes + empty_size;
- } else if (block_group->flags & BTRFS_BLOCK_GROUP_METADATA) {
- cont1_bytes = bytes;
- min_bytes = block_group->sectorsize;
- } else {
- cont1_bytes = max(bytes, (bytes + empty_size) >> 2);
- min_bytes = block_group->sectorsize;
- }
-
- spin_lock(&ctl->tree_lock);
-
- /*
- * If we know we don't have enough space to make a cluster don't even
- * bother doing all the work to try and find one.
- */
- if (ctl->free_space < bytes) {
- spin_unlock(&ctl->tree_lock);
- return -ENOSPC;
- }
-
- spin_lock(&cluster->lock);
-
- /* someone already found a cluster, hooray */
- if (cluster->block_group) {
- ret = 0;
- goto out;
- }
-
- trace_btrfs_find_cluster(block_group, offset, bytes, empty_size,
- min_bytes);
-
- INIT_LIST_HEAD(&bitmaps);
- ret = setup_cluster_no_bitmap(block_group, cluster, &bitmaps, offset,
- bytes + empty_size,
- cont1_bytes, min_bytes);
- if (ret)
- ret = setup_cluster_bitmap(block_group, cluster, &bitmaps,
- offset, bytes + empty_size,
- cont1_bytes, min_bytes);
-
- /* Clear our temporary list */
- list_for_each_entry_safe(entry, tmp, &bitmaps, list)
- list_del_init(&entry->list);
-
- if (!ret) {
- atomic_inc(&block_group->count);
- list_add_tail(&cluster->block_group_list,
- &block_group->cluster_list);
- cluster->block_group = block_group;
- } else {
- trace_btrfs_failed_cluster_setup(block_group);
- }
-out:
- spin_unlock(&cluster->lock);
- spin_unlock(&ctl->tree_lock);
-
- return ret;
-}
-
-/*
- * simple code to zero out a cluster
- */
-void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster)
-{
- spin_lock_init(&cluster->lock);
- spin_lock_init(&cluster->refill_lock);
- cluster->root = RB_ROOT;
- cluster->max_size = 0;
- INIT_LIST_HEAD(&cluster->block_group_list);
- cluster->block_group = NULL;
-}
-
-static int do_trimming(struct btrfs_block_group_cache *block_group,
- u64 *total_trimmed, u64 start, u64 bytes,
- u64 reserved_start, u64 reserved_bytes)
-{
- struct btrfs_space_info *space_info = block_group->space_info;
- struct btrfs_fs_info *fs_info = block_group->fs_info;
- int ret;
- int update = 0;
- u64 trimmed = 0;
-
- spin_lock(&space_info->lock);
- spin_lock(&block_group->lock);
- if (!block_group->ro) {
- block_group->reserved += reserved_bytes;
- space_info->bytes_reserved += reserved_bytes;
- update = 1;
- }
- spin_unlock(&block_group->lock);
- spin_unlock(&space_info->lock);
-
- ret = btrfs_error_discard_extent(fs_info->extent_root,
- start, bytes, &trimmed);
- if (!ret)
- *total_trimmed += trimmed;
-
- btrfs_add_free_space(block_group, reserved_start, reserved_bytes);
-
- if (update) {
- spin_lock(&space_info->lock);
- spin_lock(&block_group->lock);
- if (block_group->ro)
- space_info->bytes_readonly += reserved_bytes;
- block_group->reserved -= reserved_bytes;
- space_info->bytes_reserved -= reserved_bytes;
- spin_unlock(&space_info->lock);
- spin_unlock(&block_group->lock);
- }
-
- return ret;
-}
-
-static int trim_no_bitmap(struct btrfs_block_group_cache *block_group,
- u64 *total_trimmed, u64 start, u64 end, u64 minlen)
-{
- struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
- struct btrfs_free_space *entry;
- struct rb_node *node;
- int ret = 0;
- u64 extent_start;
- u64 extent_bytes;
- u64 bytes;
-
- while (start < end) {
- spin_lock(&ctl->tree_lock);
-
- if (ctl->free_space < minlen) {
- spin_unlock(&ctl->tree_lock);
- break;
- }
-
- entry = tree_search_offset(ctl, start, 0, 1);
- if (!entry) {
- spin_unlock(&ctl->tree_lock);
- break;
- }
-
- /* skip bitmaps */
- while (entry->bitmap) {
- node = rb_next(&entry->offset_index);
- if (!node) {
- spin_unlock(&ctl->tree_lock);
- goto out;
- }
- entry = rb_entry(node, struct btrfs_free_space,
- offset_index);
- }
-
- if (entry->offset >= end) {
- spin_unlock(&ctl->tree_lock);
- break;
- }
-
- extent_start = entry->offset;
- extent_bytes = entry->bytes;
- start = max(start, extent_start);
- bytes = min(extent_start + extent_bytes, end) - start;
- if (bytes < minlen) {
- spin_unlock(&ctl->tree_lock);
- goto next;
- }
-
- unlink_free_space(ctl, entry);
- kmem_cache_free(btrfs_free_space_cachep, entry);
-
- spin_unlock(&ctl->tree_lock);
-
- ret = do_trimming(block_group, total_trimmed, start, bytes,
- extent_start, extent_bytes);
- if (ret)
- break;
-next:
- start += bytes;
-
- if (fatal_signal_pending(current)) {
- ret = -ERESTARTSYS;
- break;
- }
-
- cond_resched();
- }
-out:
- return ret;
-}
-
-static int trim_bitmaps(struct btrfs_block_group_cache *block_group,
- u64 *total_trimmed, u64 start, u64 end, u64 minlen)
-{
- struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
- struct btrfs_free_space *entry;
- int ret = 0;
- int ret2;
- u64 bytes;
- u64 offset = offset_to_bitmap(ctl, start);
-
- while (offset < end) {
- bool next_bitmap = false;
-
- spin_lock(&ctl->tree_lock);
-
- if (ctl->free_space < minlen) {
- spin_unlock(&ctl->tree_lock);
- break;
- }
-
- entry = tree_search_offset(ctl, offset, 1, 0);
- if (!entry) {
- spin_unlock(&ctl->tree_lock);
- next_bitmap = true;
- goto next;
- }
-
- bytes = minlen;
- ret2 = search_bitmap(ctl, entry, &start, &bytes);
- if (ret2 || start >= end) {
- spin_unlock(&ctl->tree_lock);
- next_bitmap = true;
- goto next;
- }
-
- bytes = min(bytes, end - start);
- if (bytes < minlen) {
- spin_unlock(&ctl->tree_lock);
- goto next;
- }
-
- bitmap_clear_bits(ctl, entry, start, bytes);
- if (entry->bytes == 0)
- free_bitmap(ctl, entry);
-
- spin_unlock(&ctl->tree_lock);
-
- ret = do_trimming(block_group, total_trimmed, start, bytes,
- start, bytes);
- if (ret)
- break;
-next:
- if (next_bitmap) {
- offset += BITS_PER_BITMAP * ctl->unit;
- } else {
- start += bytes;
- if (start >= offset + BITS_PER_BITMAP * ctl->unit)
- offset += BITS_PER_BITMAP * ctl->unit;
- }
-
- if (fatal_signal_pending(current)) {
- ret = -ERESTARTSYS;
- break;
- }
-
- cond_resched();
- }
-
- return ret;
-}
-
-int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group,
- u64 *trimmed, u64 start, u64 end, u64 minlen)
-{
- int ret;
-
- *trimmed = 0;
-
- ret = trim_no_bitmap(block_group, trimmed, start, end, minlen);
- if (ret)
- return ret;
-
- ret = trim_bitmaps(block_group, trimmed, start, end, minlen);
-
- return ret;
-}
-
-/*
- * Find the left-most item in the cache tree, and then return the
- * smallest inode number in the item.
- *
- * Note: the returned inode number may not be the smallest one in
- * the tree, if the left-most item is a bitmap.
- */
-u64 btrfs_find_ino_for_alloc(struct btrfs_root *fs_root)
-{
- struct btrfs_free_space_ctl *ctl = fs_root->free_ino_ctl;
- struct btrfs_free_space *entry = NULL;
- u64 ino = 0;
-
- spin_lock(&ctl->tree_lock);
-
- if (RB_EMPTY_ROOT(&ctl->free_space_offset))
- goto out;
-
- entry = rb_entry(rb_first(&ctl->free_space_offset),
- struct btrfs_free_space, offset_index);
-
- if (!entry->bitmap) {
- ino = entry->offset;
-
- unlink_free_space(ctl, entry);
- entry->offset++;
- entry->bytes--;
- if (!entry->bytes)
- kmem_cache_free(btrfs_free_space_cachep, entry);
- else
- link_free_space(ctl, entry);
- } else {
- u64 offset = 0;
- u64 count = 1;
- int ret;
-
- ret = search_bitmap(ctl, entry, &offset, &count);
- /* Logic error; Should be empty if it can't find anything */
- BUG_ON(ret);
-
- ino = offset;
- bitmap_clear_bits(ctl, entry, offset, 1);
- if (entry->bytes == 0)
- free_bitmap(ctl, entry);
- }
-out:
- spin_unlock(&ctl->tree_lock);
-
- return ino;
-}
-
-struct inode *lookup_free_ino_inode(struct btrfs_root *root,
- struct btrfs_path *path)
-{
- struct inode *inode = NULL;
-
- spin_lock(&root->cache_lock);
- if (root->cache_inode)
- inode = igrab(root->cache_inode);
- spin_unlock(&root->cache_lock);
- if (inode)
- return inode;
-
- inode = __lookup_free_space_inode(root, path, 0);
- if (IS_ERR(inode))
- return inode;
-
- spin_lock(&root->cache_lock);
- if (!btrfs_fs_closing(root->fs_info))
- root->cache_inode = igrab(inode);
- spin_unlock(&root->cache_lock);
-
- return inode;
-}
-
-int create_free_ino_inode(struct btrfs_root *root,
- struct btrfs_trans_handle *trans,
- struct btrfs_path *path)
-{
- return __create_free_space_inode(root, trans, path,
- BTRFS_FREE_INO_OBJECTID, 0);
-}
-
-int load_free_ino_cache(struct btrfs_fs_info *fs_info, struct btrfs_root *root)
-{
- struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
- struct btrfs_path *path;
- struct inode *inode;
- int ret = 0;
- u64 root_gen = btrfs_root_generation(&root->root_item);
-
- if (!btrfs_test_opt(root, INODE_MAP_CACHE))
- return 0;
-
- /*
- * If we're unmounting then just return, since this does a search on the
- * normal root and not the commit root and we could deadlock.
- */
- if (btrfs_fs_closing(fs_info))
- return 0;
-
- path = btrfs_alloc_path();
- if (!path)
- return 0;
-
- inode = lookup_free_ino_inode(root, path);
- if (IS_ERR(inode))
- goto out;
-
- if (root_gen != BTRFS_I(inode)->generation)
- goto out_put;
-
- ret = __load_free_space_cache(root, inode, ctl, path, 0);
-
- if (ret < 0)
- printk(KERN_ERR "btrfs: failed to load free ino cache for "
- "root %llu\n", root->root_key.objectid);
-out_put:
- iput(inode);
-out:
- btrfs_free_path(path);
- return ret;
-}
-
-int btrfs_write_out_ino_cache(struct btrfs_root *root,
- struct btrfs_trans_handle *trans,
- struct btrfs_path *path)
-{
- struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
- struct inode *inode;
- int ret;
-
- if (!btrfs_test_opt(root, INODE_MAP_CACHE))
- return 0;
-
- inode = lookup_free_ino_inode(root, path);
- if (IS_ERR(inode))
- return 0;
-
- ret = __btrfs_write_out_cache(root, inode, ctl, NULL, trans, path, 0);
- if (ret) {
- btrfs_delalloc_release_metadata(inode, inode->i_size);
-#ifdef DEBUG
- printk(KERN_ERR "btrfs: failed to write free ino cache "
- "for root %llu\n", root->root_key.objectid);
-#endif
- }
-
- iput(inode);
- return ret;
-}
diff --git a/ANDROID_3.4.5/fs/btrfs/free-space-cache.h b/ANDROID_3.4.5/fs/btrfs/free-space-cache.h
deleted file mode 100644
index 8f2613f7..00000000
--- a/ANDROID_3.4.5/fs/btrfs/free-space-cache.h
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- * Copyright (C) 2009 Oracle. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License v2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
- */
-
-#ifndef __BTRFS_FREE_SPACE_CACHE
-#define __BTRFS_FREE_SPACE_CACHE
-
-struct btrfs_free_space {
- struct rb_node offset_index;
- u64 offset;
- u64 bytes;
- unsigned long *bitmap;
- struct list_head list;
-};
-
-struct btrfs_free_space_ctl {
- spinlock_t tree_lock;
- struct rb_root free_space_offset;
- u64 free_space;
- int extents_thresh;
- int free_extents;
- int total_bitmaps;
- int unit;
- u64 start;
- struct btrfs_free_space_op *op;
- void *private;
-};
-
-struct btrfs_free_space_op {
- void (*recalc_thresholds)(struct btrfs_free_space_ctl *ctl);
- bool (*use_bitmap)(struct btrfs_free_space_ctl *ctl,
- struct btrfs_free_space *info);
-};
-
-struct inode *lookup_free_space_inode(struct btrfs_root *root,
- struct btrfs_block_group_cache
- *block_group, struct btrfs_path *path);
-int create_free_space_inode(struct btrfs_root *root,
- struct btrfs_trans_handle *trans,
- struct btrfs_block_group_cache *block_group,
- struct btrfs_path *path);
-
-int btrfs_truncate_free_space_cache(struct btrfs_root *root,
- struct btrfs_trans_handle *trans,
- struct btrfs_path *path,
- struct inode *inode);
-int load_free_space_cache(struct btrfs_fs_info *fs_info,
- struct btrfs_block_group_cache *block_group);
-int btrfs_write_out_cache(struct btrfs_root *root,
- struct btrfs_trans_handle *trans,
- struct btrfs_block_group_cache *block_group,
- struct btrfs_path *path);
-
-struct inode *lookup_free_ino_inode(struct btrfs_root *root,
- struct btrfs_path *path);
-int create_free_ino_inode(struct btrfs_root *root,
- struct btrfs_trans_handle *trans,
- struct btrfs_path *path);
-int load_free_ino_cache(struct btrfs_fs_info *fs_info,
- struct btrfs_root *root);
-int btrfs_write_out_ino_cache(struct btrfs_root *root,
- struct btrfs_trans_handle *trans,
- struct btrfs_path *path);
-
-void btrfs_init_free_space_ctl(struct btrfs_block_group_cache *block_group);
-int __btrfs_add_free_space(struct btrfs_free_space_ctl *ctl,
- u64 bytenr, u64 size);
-static inline int
-btrfs_add_free_space(struct btrfs_block_group_cache *block_group,
- u64 bytenr, u64 size)
-{
- return __btrfs_add_free_space(block_group->free_space_ctl,
- bytenr, size);
-}
-int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group,
- u64 bytenr, u64 size);
-void __btrfs_remove_free_space_cache(struct btrfs_free_space_ctl *ctl);
-void btrfs_remove_free_space_cache(struct btrfs_block_group_cache
- *block_group);
-u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group,
- u64 offset, u64 bytes, u64 empty_size);
-u64 btrfs_find_ino_for_alloc(struct btrfs_root *fs_root);
-void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group,
- u64 bytes);
-int btrfs_find_space_cluster(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_block_group_cache *block_group,
- struct btrfs_free_cluster *cluster,
- u64 offset, u64 bytes, u64 empty_size);
-void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster);
-u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group,
- struct btrfs_free_cluster *cluster, u64 bytes,
- u64 min_start);
-int btrfs_return_cluster_to_free_space(
- struct btrfs_block_group_cache *block_group,
- struct btrfs_free_cluster *cluster);
-int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group,
- u64 *trimmed, u64 start, u64 end, u64 minlen);
-#endif
diff --git a/ANDROID_3.4.5/fs/btrfs/hash.h b/ANDROID_3.4.5/fs/btrfs/hash.h
deleted file mode 100644
index db2ff977..00000000
--- a/ANDROID_3.4.5/fs/btrfs/hash.h
+++ /dev/null
@@ -1,27 +0,0 @@
-/*
- * Copyright (C) 2007 Oracle. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License v2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
- */
-
-#ifndef __HASH__
-#define __HASH__
-
-#include <linux/crc32c.h>
-static inline u64 btrfs_name_hash(const char *name, int len)
-{
- return crc32c((u32)~1, name, len);
-}
-#endif
diff --git a/ANDROID_3.4.5/fs/btrfs/inode-item.c b/ANDROID_3.4.5/fs/btrfs/inode-item.c
deleted file mode 100644
index a13cf1a9..00000000
--- a/ANDROID_3.4.5/fs/btrfs/inode-item.c
+++ /dev/null
@@ -1,236 +0,0 @@
-/*
- * Copyright (C) 2007 Oracle. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License v2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
- */
-
-#include "ctree.h"
-#include "disk-io.h"
-#include "transaction.h"
-#include "print-tree.h"
-
-static int find_name_in_backref(struct btrfs_path *path, const char *name,
- int name_len, struct btrfs_inode_ref **ref_ret)
-{
- struct extent_buffer *leaf;
- struct btrfs_inode_ref *ref;
- unsigned long ptr;
- unsigned long name_ptr;
- u32 item_size;
- u32 cur_offset = 0;
- int len;
-
- leaf = path->nodes[0];
- item_size = btrfs_item_size_nr(leaf, path->slots[0]);
- ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
- while (cur_offset < item_size) {
- ref = (struct btrfs_inode_ref *)(ptr + cur_offset);
- len = btrfs_inode_ref_name_len(leaf, ref);
- name_ptr = (unsigned long)(ref + 1);
- cur_offset += len + sizeof(*ref);
- if (len != name_len)
- continue;
- if (memcmp_extent_buffer(leaf, name, name_ptr, name_len) == 0) {
- *ref_ret = ref;
- return 1;
- }
- }
- return 0;
-}
-
-struct btrfs_inode_ref *
-btrfs_lookup_inode_ref(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_path *path,
- const char *name, int name_len,
- u64 inode_objectid, u64 ref_objectid, int mod)
-{
- struct btrfs_key key;
- struct btrfs_inode_ref *ref;
- int ins_len = mod < 0 ? -1 : 0;
- int cow = mod != 0;
- int ret;
-
- key.objectid = inode_objectid;
- key.type = BTRFS_INODE_REF_KEY;
- key.offset = ref_objectid;
-
- ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow);
- if (ret < 0)
- return ERR_PTR(ret);
- if (ret > 0)
- return NULL;
- if (!find_name_in_backref(path, name, name_len, &ref))
- return NULL;
- return ref;
-}
-
-int btrfs_del_inode_ref(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- const char *name, int name_len,
- u64 inode_objectid, u64 ref_objectid, u64 *index)
-{
- struct btrfs_path *path;
- struct btrfs_key key;
- struct btrfs_inode_ref *ref;
- struct extent_buffer *leaf;
- unsigned long ptr;
- unsigned long item_start;
- u32 item_size;
- u32 sub_item_len;
- int ret;
- int del_len = name_len + sizeof(*ref);
-
- key.objectid = inode_objectid;
- key.offset = ref_objectid;
- btrfs_set_key_type(&key, BTRFS_INODE_REF_KEY);
-
- path = btrfs_alloc_path();
- if (!path)
- return -ENOMEM;
-
- path->leave_spinning = 1;
-
- ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
- if (ret > 0) {
- ret = -ENOENT;
- goto out;
- } else if (ret < 0) {
- goto out;
- }
- if (!find_name_in_backref(path, name, name_len, &ref)) {
- ret = -ENOENT;
- goto out;
- }
- leaf = path->nodes[0];
- item_size = btrfs_item_size_nr(leaf, path->slots[0]);
-
- if (index)
- *index = btrfs_inode_ref_index(leaf, ref);
-
- if (del_len == item_size) {
- ret = btrfs_del_item(trans, root, path);
- goto out;
- }
- ptr = (unsigned long)ref;
- sub_item_len = name_len + sizeof(*ref);
- item_start = btrfs_item_ptr_offset(leaf, path->slots[0]);
- memmove_extent_buffer(leaf, ptr, ptr + sub_item_len,
- item_size - (ptr + sub_item_len - item_start));
- btrfs_truncate_item(trans, root, path,
- item_size - sub_item_len, 1);
-out:
- btrfs_free_path(path);
- return ret;
-}
-
-/* Will return 0, -ENOMEM, -EMLINK, or -EEXIST or anything from the CoW path */
-int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- const char *name, int name_len,
- u64 inode_objectid, u64 ref_objectid, u64 index)
-{
- struct btrfs_path *path;
- struct btrfs_key key;
- struct btrfs_inode_ref *ref;
- unsigned long ptr;
- int ret;
- int ins_len = name_len + sizeof(*ref);
-
- key.objectid = inode_objectid;
- key.offset = ref_objectid;
- btrfs_set_key_type(&key, BTRFS_INODE_REF_KEY);
-
- path = btrfs_alloc_path();
- if (!path)
- return -ENOMEM;
-
- path->leave_spinning = 1;
- ret = btrfs_insert_empty_item(trans, root, path, &key,
- ins_len);
- if (ret == -EEXIST) {
- u32 old_size;
-
- if (find_name_in_backref(path, name, name_len, &ref))
- goto out;
-
- old_size = btrfs_item_size_nr(path->nodes[0], path->slots[0]);
- btrfs_extend_item(trans, root, path, ins_len);
- ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
- struct btrfs_inode_ref);
- ref = (struct btrfs_inode_ref *)((unsigned long)ref + old_size);
- btrfs_set_inode_ref_name_len(path->nodes[0], ref, name_len);
- btrfs_set_inode_ref_index(path->nodes[0], ref, index);
- ptr = (unsigned long)(ref + 1);
- ret = 0;
- } else if (ret < 0) {
- if (ret == -EOVERFLOW)
- ret = -EMLINK;
- goto out;
- } else {
- ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
- struct btrfs_inode_ref);
- btrfs_set_inode_ref_name_len(path->nodes[0], ref, name_len);
- btrfs_set_inode_ref_index(path->nodes[0], ref, index);
- ptr = (unsigned long)(ref + 1);
- }
- write_extent_buffer(path->nodes[0], name, ptr, name_len);
- btrfs_mark_buffer_dirty(path->nodes[0]);
-
-out:
- btrfs_free_path(path);
- return ret;
-}
-
-int btrfs_insert_empty_inode(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_path *path, u64 objectid)
-{
- struct btrfs_key key;
- int ret;
- key.objectid = objectid;
- btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
- key.offset = 0;
-
- ret = btrfs_insert_empty_item(trans, root, path, &key,
- sizeof(struct btrfs_inode_item));
- return ret;
-}
-
-int btrfs_lookup_inode(struct btrfs_trans_handle *trans, struct btrfs_root
- *root, struct btrfs_path *path,
- struct btrfs_key *location, int mod)
-{
- int ins_len = mod < 0 ? -1 : 0;
- int cow = mod != 0;
- int ret;
- int slot;
- struct extent_buffer *leaf;
- struct btrfs_key found_key;
-
- ret = btrfs_search_slot(trans, root, location, path, ins_len, cow);
- if (ret > 0 && btrfs_key_type(location) == BTRFS_ROOT_ITEM_KEY &&
- location->offset == (u64)-1 && path->slots[0] != 0) {
- slot = path->slots[0] - 1;
- leaf = path->nodes[0];
- btrfs_item_key_to_cpu(leaf, &found_key, slot);
- if (found_key.objectid == location->objectid &&
- btrfs_key_type(&found_key) == btrfs_key_type(location)) {
- path->slots[0]--;
- return 0;
- }
- }
- return ret;
-}
diff --git a/ANDROID_3.4.5/fs/btrfs/inode-map.c b/ANDROID_3.4.5/fs/btrfs/inode-map.c
deleted file mode 100644
index b1a1c929..00000000
--- a/ANDROID_3.4.5/fs/btrfs/inode-map.c
+++ /dev/null
@@ -1,576 +0,0 @@
-/*
- * Copyright (C) 2007 Oracle. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License v2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
- */
-
-#include <linux/delay.h>
-#include <linux/kthread.h>
-#include <linux/pagemap.h>
-
-#include "ctree.h"
-#include "disk-io.h"
-#include "free-space-cache.h"
-#include "inode-map.h"
-#include "transaction.h"
-
-static int caching_kthread(void *data)
-{
- struct btrfs_root *root = data;
- struct btrfs_fs_info *fs_info = root->fs_info;
- struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
- struct btrfs_key key;
- struct btrfs_path *path;
- struct extent_buffer *leaf;
- u64 last = (u64)-1;
- int slot;
- int ret;
-
- if (!btrfs_test_opt(root, INODE_MAP_CACHE))
- return 0;
-
- path = btrfs_alloc_path();
- if (!path)
- return -ENOMEM;
-
- /* Since the commit root is read-only, we can safely skip locking. */
- path->skip_locking = 1;
- path->search_commit_root = 1;
- path->reada = 2;
-
- key.objectid = BTRFS_FIRST_FREE_OBJECTID;
- key.offset = 0;
- key.type = BTRFS_INODE_ITEM_KEY;
-again:
- /* need to make sure the commit_root doesn't disappear */
- mutex_lock(&root->fs_commit_mutex);
-
- ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
- if (ret < 0)
- goto out;
-
- while (1) {
- if (btrfs_fs_closing(fs_info))
- goto out;
-
- leaf = path->nodes[0];
- slot = path->slots[0];
- if (slot >= btrfs_header_nritems(leaf)) {
- ret = btrfs_next_leaf(root, path);
- if (ret < 0)
- goto out;
- else if (ret > 0)
- break;
-
- if (need_resched() ||
- btrfs_transaction_in_commit(fs_info)) {
- leaf = path->nodes[0];
-
- if (btrfs_header_nritems(leaf) == 0) {
- WARN_ON(1);
- break;
- }
-
- /*
- * Save the key so we can advances forward
- * in the next search.
- */
- btrfs_item_key_to_cpu(leaf, &key, 0);
- btrfs_release_path(path);
- root->cache_progress = last;
- mutex_unlock(&root->fs_commit_mutex);
- schedule_timeout(1);
- goto again;
- } else
- continue;
- }
-
- btrfs_item_key_to_cpu(leaf, &key, slot);
-
- if (key.type != BTRFS_INODE_ITEM_KEY)
- goto next;
-
- if (key.objectid >= root->highest_objectid)
- break;
-
- if (last != (u64)-1 && last + 1 != key.objectid) {
- __btrfs_add_free_space(ctl, last + 1,
- key.objectid - last - 1);
- wake_up(&root->cache_wait);
- }
-
- last = key.objectid;
-next:
- path->slots[0]++;
- }
-
- if (last < root->highest_objectid - 1) {
- __btrfs_add_free_space(ctl, last + 1,
- root->highest_objectid - last - 1);
- }
-
- spin_lock(&root->cache_lock);
- root->cached = BTRFS_CACHE_FINISHED;
- spin_unlock(&root->cache_lock);
-
- root->cache_progress = (u64)-1;
- btrfs_unpin_free_ino(root);
-out:
- wake_up(&root->cache_wait);
- mutex_unlock(&root->fs_commit_mutex);
-
- btrfs_free_path(path);
-
- return ret;
-}
-
-static void start_caching(struct btrfs_root *root)
-{
- struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
- struct task_struct *tsk;
- int ret;
- u64 objectid;
-
- if (!btrfs_test_opt(root, INODE_MAP_CACHE))
- return;
-
- spin_lock(&root->cache_lock);
- if (root->cached != BTRFS_CACHE_NO) {
- spin_unlock(&root->cache_lock);
- return;
- }
-
- root->cached = BTRFS_CACHE_STARTED;
- spin_unlock(&root->cache_lock);
-
- ret = load_free_ino_cache(root->fs_info, root);
- if (ret == 1) {
- spin_lock(&root->cache_lock);
- root->cached = BTRFS_CACHE_FINISHED;
- spin_unlock(&root->cache_lock);
- return;
- }
-
- /*
- * It can be quite time-consuming to fill the cache by searching
- * through the extent tree, and this can keep ino allocation path
- * waiting. Therefore at start we quickly find out the highest
- * inode number and we know we can use inode numbers which fall in
- * [highest_ino + 1, BTRFS_LAST_FREE_OBJECTID].
- */
- ret = btrfs_find_free_objectid(root, &objectid);
- if (!ret && objectid <= BTRFS_LAST_FREE_OBJECTID) {
- __btrfs_add_free_space(ctl, objectid,
- BTRFS_LAST_FREE_OBJECTID - objectid + 1);
- }
-
- tsk = kthread_run(caching_kthread, root, "btrfs-ino-cache-%llu\n",
- root->root_key.objectid);
- BUG_ON(IS_ERR(tsk)); /* -ENOMEM */
-}
-
-int btrfs_find_free_ino(struct btrfs_root *root, u64 *objectid)
-{
- if (!btrfs_test_opt(root, INODE_MAP_CACHE))
- return btrfs_find_free_objectid(root, objectid);
-
-again:
- *objectid = btrfs_find_ino_for_alloc(root);
-
- if (*objectid != 0)
- return 0;
-
- start_caching(root);
-
- wait_event(root->cache_wait,
- root->cached == BTRFS_CACHE_FINISHED ||
- root->free_ino_ctl->free_space > 0);
-
- if (root->cached == BTRFS_CACHE_FINISHED &&
- root->free_ino_ctl->free_space == 0)
- return -ENOSPC;
- else
- goto again;
-}
-
-void btrfs_return_ino(struct btrfs_root *root, u64 objectid)
-{
- struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
- struct btrfs_free_space_ctl *pinned = root->free_ino_pinned;
-
- if (!btrfs_test_opt(root, INODE_MAP_CACHE))
- return;
-
-again:
- if (root->cached == BTRFS_CACHE_FINISHED) {
- __btrfs_add_free_space(ctl, objectid, 1);
- } else {
- /*
- * If we are in the process of caching free ino chunks,
- * to avoid adding the same inode number to the free_ino
- * tree twice due to cross transaction, we'll leave it
- * in the pinned tree until a transaction is committed
- * or the caching work is done.
- */
-
- mutex_lock(&root->fs_commit_mutex);
- spin_lock(&root->cache_lock);
- if (root->cached == BTRFS_CACHE_FINISHED) {
- spin_unlock(&root->cache_lock);
- mutex_unlock(&root->fs_commit_mutex);
- goto again;
- }
- spin_unlock(&root->cache_lock);
-
- start_caching(root);
-
- if (objectid <= root->cache_progress ||
- objectid > root->highest_objectid)
- __btrfs_add_free_space(ctl, objectid, 1);
- else
- __btrfs_add_free_space(pinned, objectid, 1);
-
- mutex_unlock(&root->fs_commit_mutex);
- }
-}
-
-/*
- * When a transaction is committed, we'll move those inode numbers which
- * are smaller than root->cache_progress from pinned tree to free_ino tree,
- * and others will just be dropped, because the commit root we were
- * searching has changed.
- *
- * Must be called with root->fs_commit_mutex held
- */
-void btrfs_unpin_free_ino(struct btrfs_root *root)
-{
- struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
- struct rb_root *rbroot = &root->free_ino_pinned->free_space_offset;
- struct btrfs_free_space *info;
- struct rb_node *n;
- u64 count;
-
- if (!btrfs_test_opt(root, INODE_MAP_CACHE))
- return;
-
- while (1) {
- n = rb_first(rbroot);
- if (!n)
- break;
-
- info = rb_entry(n, struct btrfs_free_space, offset_index);
- BUG_ON(info->bitmap); /* Logic error */
-
- if (info->offset > root->cache_progress)
- goto free;
- else if (info->offset + info->bytes > root->cache_progress)
- count = root->cache_progress - info->offset + 1;
- else
- count = info->bytes;
-
- __btrfs_add_free_space(ctl, info->offset, count);
-free:
- rb_erase(&info->offset_index, rbroot);
- kfree(info);
- }
-}
-
-#define INIT_THRESHOLD (((1024 * 32) / 2) / sizeof(struct btrfs_free_space))
-#define INODES_PER_BITMAP (PAGE_CACHE_SIZE * 8)
-
-/*
- * The goal is to keep the memory used by the free_ino tree won't
- * exceed the memory if we use bitmaps only.
- */
-static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl)
-{
- struct btrfs_free_space *info;
- struct rb_node *n;
- int max_ino;
- int max_bitmaps;
-
- n = rb_last(&ctl->free_space_offset);
- if (!n) {
- ctl->extents_thresh = INIT_THRESHOLD;
- return;
- }
- info = rb_entry(n, struct btrfs_free_space, offset_index);
-
- /*
- * Find the maximum inode number in the filesystem. Note we
- * ignore the fact that this can be a bitmap, because we are
- * not doing precise calculation.
- */
- max_ino = info->bytes - 1;
-
- max_bitmaps = ALIGN(max_ino, INODES_PER_BITMAP) / INODES_PER_BITMAP;
- if (max_bitmaps <= ctl->total_bitmaps) {
- ctl->extents_thresh = 0;
- return;
- }
-
- ctl->extents_thresh = (max_bitmaps - ctl->total_bitmaps) *
- PAGE_CACHE_SIZE / sizeof(*info);
-}
-
-/*
- * We don't fall back to bitmap, if we are below the extents threshold
- * or this chunk of inode numbers is a big one.
- */
-static bool use_bitmap(struct btrfs_free_space_ctl *ctl,
- struct btrfs_free_space *info)
-{
- if (ctl->free_extents < ctl->extents_thresh ||
- info->bytes > INODES_PER_BITMAP / 10)
- return false;
-
- return true;
-}
-
-static struct btrfs_free_space_op free_ino_op = {
- .recalc_thresholds = recalculate_thresholds,
- .use_bitmap = use_bitmap,
-};
-
-static void pinned_recalc_thresholds(struct btrfs_free_space_ctl *ctl)
-{
-}
-
-static bool pinned_use_bitmap(struct btrfs_free_space_ctl *ctl,
- struct btrfs_free_space *info)
-{
- /*
- * We always use extents for two reasons:
- *
- * - The pinned tree is only used during the process of caching
- * work.
- * - Make code simpler. See btrfs_unpin_free_ino().
- */
- return false;
-}
-
-static struct btrfs_free_space_op pinned_free_ino_op = {
- .recalc_thresholds = pinned_recalc_thresholds,
- .use_bitmap = pinned_use_bitmap,
-};
-
-void btrfs_init_free_ino_ctl(struct btrfs_root *root)
-{
- struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
- struct btrfs_free_space_ctl *pinned = root->free_ino_pinned;
-
- spin_lock_init(&ctl->tree_lock);
- ctl->unit = 1;
- ctl->start = 0;
- ctl->private = NULL;
- ctl->op = &free_ino_op;
-
- /*
- * Initially we allow to use 16K of ram to cache chunks of
- * inode numbers before we resort to bitmaps. This is somewhat
- * arbitrary, but it will be adjusted in runtime.
- */
- ctl->extents_thresh = INIT_THRESHOLD;
-
- spin_lock_init(&pinned->tree_lock);
- pinned->unit = 1;
- pinned->start = 0;
- pinned->private = NULL;
- pinned->extents_thresh = 0;
- pinned->op = &pinned_free_ino_op;
-}
-
-int btrfs_save_ino_cache(struct btrfs_root *root,
- struct btrfs_trans_handle *trans)
-{
- struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
- struct btrfs_path *path;
- struct inode *inode;
- struct btrfs_block_rsv *rsv;
- u64 num_bytes;
- u64 alloc_hint = 0;
- int ret;
- int prealloc;
- bool retry = false;
-
- /* only fs tree and subvol/snap needs ino cache */
- if (root->root_key.objectid != BTRFS_FS_TREE_OBJECTID &&
- (root->root_key.objectid < BTRFS_FIRST_FREE_OBJECTID ||
- root->root_key.objectid > BTRFS_LAST_FREE_OBJECTID))
- return 0;
-
- /* Don't save inode cache if we are deleting this root */
- if (btrfs_root_refs(&root->root_item) == 0 &&
- root != root->fs_info->tree_root)
- return 0;
-
- if (!btrfs_test_opt(root, INODE_MAP_CACHE))
- return 0;
-
- path = btrfs_alloc_path();
- if (!path)
- return -ENOMEM;
-
- rsv = trans->block_rsv;
- trans->block_rsv = &root->fs_info->trans_block_rsv;
-
- num_bytes = trans->bytes_reserved;
- /*
- * 1 item for inode item insertion if need
- * 3 items for inode item update (in the worst case)
- * 1 item for free space object
- * 3 items for pre-allocation
- */
- trans->bytes_reserved = btrfs_calc_trans_metadata_size(root, 8);
- ret = btrfs_block_rsv_add_noflush(root, trans->block_rsv,
- trans->bytes_reserved);
- if (ret)
- goto out;
- trace_btrfs_space_reservation(root->fs_info, "ino_cache",
- trans->transid, trans->bytes_reserved, 1);
-again:
- inode = lookup_free_ino_inode(root, path);
- if (IS_ERR(inode) && (PTR_ERR(inode) != -ENOENT || retry)) {
- ret = PTR_ERR(inode);
- goto out_release;
- }
-
- if (IS_ERR(inode)) {
- BUG_ON(retry); /* Logic error */
- retry = true;
-
- ret = create_free_ino_inode(root, trans, path);
- if (ret)
- goto out_release;
- goto again;
- }
-
- BTRFS_I(inode)->generation = 0;
- ret = btrfs_update_inode(trans, root, inode);
- if (ret) {
- btrfs_abort_transaction(trans, root, ret);
- goto out_put;
- }
-
- if (i_size_read(inode) > 0) {
- ret = btrfs_truncate_free_space_cache(root, trans, path, inode);
- if (ret) {
- btrfs_abort_transaction(trans, root, ret);
- goto out_put;
- }
- }
-
- spin_lock(&root->cache_lock);
- if (root->cached != BTRFS_CACHE_FINISHED) {
- ret = -1;
- spin_unlock(&root->cache_lock);
- goto out_put;
- }
- spin_unlock(&root->cache_lock);
-
- spin_lock(&ctl->tree_lock);
- prealloc = sizeof(struct btrfs_free_space) * ctl->free_extents;
- prealloc = ALIGN(prealloc, PAGE_CACHE_SIZE);
- prealloc += ctl->total_bitmaps * PAGE_CACHE_SIZE;
- spin_unlock(&ctl->tree_lock);
-
- /* Just to make sure we have enough space */
- prealloc += 8 * PAGE_CACHE_SIZE;
-
- ret = btrfs_delalloc_reserve_space(inode, prealloc);
- if (ret)
- goto out_put;
-
- ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, prealloc,
- prealloc, prealloc, &alloc_hint);
- if (ret) {
- btrfs_delalloc_release_space(inode, prealloc);
- goto out_put;
- }
- btrfs_free_reserved_data_space(inode, prealloc);
-
- ret = btrfs_write_out_ino_cache(root, trans, path);
-out_put:
- iput(inode);
-out_release:
- trace_btrfs_space_reservation(root->fs_info, "ino_cache",
- trans->transid, trans->bytes_reserved, 0);
- btrfs_block_rsv_release(root, trans->block_rsv, trans->bytes_reserved);
-out:
- trans->block_rsv = rsv;
- trans->bytes_reserved = num_bytes;
-
- btrfs_free_path(path);
- return ret;
-}
-
-static int btrfs_find_highest_objectid(struct btrfs_root *root, u64 *objectid)
-{
- struct btrfs_path *path;
- int ret;
- struct extent_buffer *l;
- struct btrfs_key search_key;
- struct btrfs_key found_key;
- int slot;
-
- path = btrfs_alloc_path();
- if (!path)
- return -ENOMEM;
-
- search_key.objectid = BTRFS_LAST_FREE_OBJECTID;
- search_key.type = -1;
- search_key.offset = (u64)-1;
- ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0);
- if (ret < 0)
- goto error;
- BUG_ON(ret == 0); /* Corruption */
- if (path->slots[0] > 0) {
- slot = path->slots[0] - 1;
- l = path->nodes[0];
- btrfs_item_key_to_cpu(l, &found_key, slot);
- *objectid = max_t(u64, found_key.objectid,
- BTRFS_FIRST_FREE_OBJECTID - 1);
- } else {
- *objectid = BTRFS_FIRST_FREE_OBJECTID - 1;
- }
- ret = 0;
-error:
- btrfs_free_path(path);
- return ret;
-}
-
-int btrfs_find_free_objectid(struct btrfs_root *root, u64 *objectid)
-{
- int ret;
- mutex_lock(&root->objectid_mutex);
-
- if (unlikely(root->highest_objectid < BTRFS_FIRST_FREE_OBJECTID)) {
- ret = btrfs_find_highest_objectid(root,
- &root->highest_objectid);
- if (ret)
- goto out;
- }
-
- if (unlikely(root->highest_objectid >= BTRFS_LAST_FREE_OBJECTID)) {
- ret = -ENOSPC;
- goto out;
- }
-
- *objectid = ++root->highest_objectid;
- ret = 0;
-out:
- mutex_unlock(&root->objectid_mutex);
- return ret;
-}
diff --git a/ANDROID_3.4.5/fs/btrfs/inode-map.h b/ANDROID_3.4.5/fs/btrfs/inode-map.h
deleted file mode 100644
index ddb347bf..00000000
--- a/ANDROID_3.4.5/fs/btrfs/inode-map.h
+++ /dev/null
@@ -1,13 +0,0 @@
-#ifndef __BTRFS_INODE_MAP
-#define __BTRFS_INODE_MAP
-
-void btrfs_init_free_ino_ctl(struct btrfs_root *root);
-void btrfs_unpin_free_ino(struct btrfs_root *root);
-void btrfs_return_ino(struct btrfs_root *root, u64 objectid);
-int btrfs_find_free_ino(struct btrfs_root *root, u64 *objectid);
-int btrfs_save_ino_cache(struct btrfs_root *root,
- struct btrfs_trans_handle *trans);
-
-int btrfs_find_free_objectid(struct btrfs_root *root, u64 *objectid);
-
-#endif
diff --git a/ANDROID_3.4.5/fs/btrfs/inode.c b/ANDROID_3.4.5/fs/btrfs/inode.c
deleted file mode 100644
index 0df0d1fd..00000000
--- a/ANDROID_3.4.5/fs/btrfs/inode.c
+++ /dev/null
@@ -1,7681 +0,0 @@
-/*
- * Copyright (C) 2007 Oracle. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License v2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
- */
-
-#include <linux/kernel.h>
-#include <linux/bio.h>
-#include <linux/buffer_head.h>
-#include <linux/file.h>
-#include <linux/fs.h>
-#include <linux/pagemap.h>
-#include <linux/highmem.h>
-#include <linux/time.h>
-#include <linux/init.h>
-#include <linux/string.h>
-#include <linux/backing-dev.h>
-#include <linux/mpage.h>
-#include <linux/swap.h>
-#include <linux/writeback.h>
-#include <linux/statfs.h>
-#include <linux/compat.h>
-#include <linux/bit_spinlock.h>
-#include <linux/xattr.h>
-#include <linux/posix_acl.h>
-#include <linux/falloc.h>
-#include <linux/slab.h>
-#include <linux/ratelimit.h>
-#include <linux/mount.h>
-#include "compat.h"
-#include "ctree.h"
-#include "disk-io.h"
-#include "transaction.h"
-#include "btrfs_inode.h"
-#include "ioctl.h"
-#include "print-tree.h"
-#include "ordered-data.h"
-#include "xattr.h"
-#include "tree-log.h"
-#include "volumes.h"
-#include "compression.h"
-#include "locking.h"
-#include "free-space-cache.h"
-#include "inode-map.h"
-
-struct btrfs_iget_args {
- u64 ino;
- struct btrfs_root *root;
-};
-
-static const struct inode_operations btrfs_dir_inode_operations;
-static const struct inode_operations btrfs_symlink_inode_operations;
-static const struct inode_operations btrfs_dir_ro_inode_operations;
-static const struct inode_operations btrfs_special_inode_operations;
-static const struct inode_operations btrfs_file_inode_operations;
-static const struct address_space_operations btrfs_aops;
-static const struct address_space_operations btrfs_symlink_aops;
-static const struct file_operations btrfs_dir_file_operations;
-static struct extent_io_ops btrfs_extent_io_ops;
-
-static struct kmem_cache *btrfs_inode_cachep;
-struct kmem_cache *btrfs_trans_handle_cachep;
-struct kmem_cache *btrfs_transaction_cachep;
-struct kmem_cache *btrfs_path_cachep;
-struct kmem_cache *btrfs_free_space_cachep;
-
-#define S_SHIFT 12
-static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
- [S_IFREG >> S_SHIFT] = BTRFS_FT_REG_FILE,
- [S_IFDIR >> S_SHIFT] = BTRFS_FT_DIR,
- [S_IFCHR >> S_SHIFT] = BTRFS_FT_CHRDEV,
- [S_IFBLK >> S_SHIFT] = BTRFS_FT_BLKDEV,
- [S_IFIFO >> S_SHIFT] = BTRFS_FT_FIFO,
- [S_IFSOCK >> S_SHIFT] = BTRFS_FT_SOCK,
- [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK,
-};
-
-static int btrfs_setsize(struct inode *inode, loff_t newsize);
-static int btrfs_truncate(struct inode *inode);
-static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end);
-static noinline int cow_file_range(struct inode *inode,
- struct page *locked_page,
- u64 start, u64 end, int *page_started,
- unsigned long *nr_written, int unlock);
-static noinline int btrfs_update_inode_fallback(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, struct inode *inode);
-
-static int btrfs_init_inode_security(struct btrfs_trans_handle *trans,
- struct inode *inode, struct inode *dir,
- const struct qstr *qstr)
-{
- int err;
-
- err = btrfs_init_acl(trans, inode, dir);
- if (!err)
- err = btrfs_xattr_security_init(trans, inode, dir, qstr);
- return err;
-}
-
-/*
- * this does all the hard work for inserting an inline extent into
- * the btree. The caller should have done a btrfs_drop_extents so that
- * no overlapping inline items exist in the btree
- */
-static noinline int insert_inline_extent(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, struct inode *inode,
- u64 start, size_t size, size_t compressed_size,
- int compress_type,
- struct page **compressed_pages)
-{
- struct btrfs_key key;
- struct btrfs_path *path;
- struct extent_buffer *leaf;
- struct page *page = NULL;
- char *kaddr;
- unsigned long ptr;
- struct btrfs_file_extent_item *ei;
- int err = 0;
- int ret;
- size_t cur_size = size;
- size_t datasize;
- unsigned long offset;
-
- if (compressed_size && compressed_pages)
- cur_size = compressed_size;
-
- path = btrfs_alloc_path();
- if (!path)
- return -ENOMEM;
-
- path->leave_spinning = 1;
-
- key.objectid = btrfs_ino(inode);
- key.offset = start;
- btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY);
- datasize = btrfs_file_extent_calc_inline_size(cur_size);
-
- inode_add_bytes(inode, size);
- ret = btrfs_insert_empty_item(trans, root, path, &key,
- datasize);
- if (ret) {
- err = ret;
- goto fail;
- }
- leaf = path->nodes[0];
- ei = btrfs_item_ptr(leaf, path->slots[0],
- struct btrfs_file_extent_item);
- btrfs_set_file_extent_generation(leaf, ei, trans->transid);
- btrfs_set_file_extent_type(leaf, ei, BTRFS_FILE_EXTENT_INLINE);
- btrfs_set_file_extent_encryption(leaf, ei, 0);
- btrfs_set_file_extent_other_encoding(leaf, ei, 0);
- btrfs_set_file_extent_ram_bytes(leaf, ei, size);
- ptr = btrfs_file_extent_inline_start(ei);
-
- if (compress_type != BTRFS_COMPRESS_NONE) {
- struct page *cpage;
- int i = 0;
- while (compressed_size > 0) {
- cpage = compressed_pages[i];
- cur_size = min_t(unsigned long, compressed_size,
- PAGE_CACHE_SIZE);
-
- kaddr = kmap_atomic(cpage);
- write_extent_buffer(leaf, kaddr, ptr, cur_size);
- kunmap_atomic(kaddr);
-
- i++;
- ptr += cur_size;
- compressed_size -= cur_size;
- }
- btrfs_set_file_extent_compression(leaf, ei,
- compress_type);
- } else {
- page = find_get_page(inode->i_mapping,
- start >> PAGE_CACHE_SHIFT);
- btrfs_set_file_extent_compression(leaf, ei, 0);
- kaddr = kmap_atomic(page);
- offset = start & (PAGE_CACHE_SIZE - 1);
- write_extent_buffer(leaf, kaddr + offset, ptr, size);
- kunmap_atomic(kaddr);
- page_cache_release(page);
- }
- btrfs_mark_buffer_dirty(leaf);
- btrfs_free_path(path);
-
- /*
- * we're an inline extent, so nobody can
- * extend the file past i_size without locking
- * a page we already have locked.
- *
- * We must do any isize and inode updates
- * before we unlock the pages. Otherwise we
- * could end up racing with unlink.
- */
- BTRFS_I(inode)->disk_i_size = inode->i_size;
- ret = btrfs_update_inode(trans, root, inode);
-
- return ret;
-fail:
- btrfs_free_path(path);
- return err;
-}
-
-
-/*
- * conditionally insert an inline extent into the file. This
- * does the checks required to make sure the data is small enough
- * to fit as an inline extent.
- */
-static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct inode *inode, u64 start, u64 end,
- size_t compressed_size, int compress_type,
- struct page **compressed_pages)
-{
- u64 isize = i_size_read(inode);
- u64 actual_end = min(end + 1, isize);
- u64 inline_len = actual_end - start;
- u64 aligned_end = (end + root->sectorsize - 1) &
- ~((u64)root->sectorsize - 1);
- u64 hint_byte;
- u64 data_len = inline_len;
- int ret;
-
- if (compressed_size)
- data_len = compressed_size;
-
- if (start > 0 ||
- actual_end >= PAGE_CACHE_SIZE ||
- data_len >= BTRFS_MAX_INLINE_DATA_SIZE(root) ||
- (!compressed_size &&
- (actual_end & (root->sectorsize - 1)) == 0) ||
- end + 1 < isize ||
- data_len > root->fs_info->max_inline) {
- return 1;
- }
-
- ret = btrfs_drop_extents(trans, inode, start, aligned_end,
- &hint_byte, 1);
- if (ret)
- return ret;
-
- if (isize > actual_end)
- inline_len = min_t(u64, isize, actual_end);
- ret = insert_inline_extent(trans, root, inode, start,
- inline_len, compressed_size,
- compress_type, compressed_pages);
- if (ret && ret != -ENOSPC) {
- btrfs_abort_transaction(trans, root, ret);
- return ret;
- } else if (ret == -ENOSPC) {
- return 1;
- }
-
- btrfs_delalloc_release_metadata(inode, end + 1 - start);
- btrfs_drop_extent_cache(inode, start, aligned_end - 1, 0);
- return 0;
-}
-
-struct async_extent {
- u64 start;
- u64 ram_size;
- u64 compressed_size;
- struct page **pages;
- unsigned long nr_pages;
- int compress_type;
- struct list_head list;
-};
-
-struct async_cow {
- struct inode *inode;
- struct btrfs_root *root;
- struct page *locked_page;
- u64 start;
- u64 end;
- struct list_head extents;
- struct btrfs_work work;
-};
-
-static noinline int add_async_extent(struct async_cow *cow,
- u64 start, u64 ram_size,
- u64 compressed_size,
- struct page **pages,
- unsigned long nr_pages,
- int compress_type)
-{
- struct async_extent *async_extent;
-
- async_extent = kmalloc(sizeof(*async_extent), GFP_NOFS);
- BUG_ON(!async_extent); /* -ENOMEM */
- async_extent->start = start;
- async_extent->ram_size = ram_size;
- async_extent->compressed_size = compressed_size;
- async_extent->pages = pages;
- async_extent->nr_pages = nr_pages;
- async_extent->compress_type = compress_type;
- list_add_tail(&async_extent->list, &cow->extents);
- return 0;
-}
-
-/*
- * we create compressed extents in two phases. The first
- * phase compresses a range of pages that have already been
- * locked (both pages and state bits are locked).
- *
- * This is done inside an ordered work queue, and the compression
- * is spread across many cpus. The actual IO submission is step
- * two, and the ordered work queue takes care of making sure that
- * happens in the same order things were put onto the queue by
- * writepages and friends.
- *
- * If this code finds it can't get good compression, it puts an
- * entry onto the work queue to write the uncompressed bytes. This
- * makes sure that both compressed inodes and uncompressed inodes
- * are written in the same order that pdflush sent them down.
- */
-static noinline int compress_file_range(struct inode *inode,
- struct page *locked_page,
- u64 start, u64 end,
- struct async_cow *async_cow,
- int *num_added)
-{
- struct btrfs_root *root = BTRFS_I(inode)->root;
- struct btrfs_trans_handle *trans;
- u64 num_bytes;
- u64 blocksize = root->sectorsize;
- u64 actual_end;
- u64 isize = i_size_read(inode);
- int ret = 0;
- struct page **pages = NULL;
- unsigned long nr_pages;
- unsigned long nr_pages_ret = 0;
- unsigned long total_compressed = 0;
- unsigned long total_in = 0;
- unsigned long max_compressed = 128 * 1024;
- unsigned long max_uncompressed = 128 * 1024;
- int i;
- int will_compress;
- int compress_type = root->fs_info->compress_type;
-
- /* if this is a small write inside eof, kick off a defrag */
- if ((end - start + 1) < 16 * 1024 &&
- (start > 0 || end + 1 < BTRFS_I(inode)->disk_i_size))
- btrfs_add_inode_defrag(NULL, inode);
-
- actual_end = min_t(u64, isize, end + 1);
-again:
- will_compress = 0;
- nr_pages = (end >> PAGE_CACHE_SHIFT) - (start >> PAGE_CACHE_SHIFT) + 1;
- nr_pages = min(nr_pages, (128 * 1024UL) / PAGE_CACHE_SIZE);
-
- /*
- * we don't want to send crud past the end of i_size through
- * compression, that's just a waste of CPU time. So, if the
- * end of the file is before the start of our current
- * requested range of bytes, we bail out to the uncompressed
- * cleanup code that can deal with all of this.
- *
- * It isn't really the fastest way to fix things, but this is a
- * very uncommon corner.
- */
- if (actual_end <= start)
- goto cleanup_and_bail_uncompressed;
-
- total_compressed = actual_end - start;
-
- /* we want to make sure that amount of ram required to uncompress
- * an extent is reasonable, so we limit the total size in ram
- * of a compressed extent to 128k. This is a crucial number
- * because it also controls how easily we can spread reads across
- * cpus for decompression.
- *
- * We also want to make sure the amount of IO required to do
- * a random read is reasonably small, so we limit the size of
- * a compressed extent to 128k.
- */
- total_compressed = min(total_compressed, max_uncompressed);
- num_bytes = (end - start + blocksize) & ~(blocksize - 1);
- num_bytes = max(blocksize, num_bytes);
- total_in = 0;
- ret = 0;
-
- /*
- * we do compression for mount -o compress and when the
- * inode has not been flagged as nocompress. This flag can
- * change at any time if we discover bad compression ratios.
- */
- if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NOCOMPRESS) &&
- (btrfs_test_opt(root, COMPRESS) ||
- (BTRFS_I(inode)->force_compress) ||
- (BTRFS_I(inode)->flags & BTRFS_INODE_COMPRESS))) {
- WARN_ON(pages);
- pages = kzalloc(sizeof(struct page *) * nr_pages, GFP_NOFS);
- if (!pages) {
- /* just bail out to the uncompressed code */
- goto cont;
- }
-
- if (BTRFS_I(inode)->force_compress)
- compress_type = BTRFS_I(inode)->force_compress;
-
- ret = btrfs_compress_pages(compress_type,
- inode->i_mapping, start,
- total_compressed, pages,
- nr_pages, &nr_pages_ret,
- &total_in,
- &total_compressed,
- max_compressed);
-
- if (!ret) {
- unsigned long offset = total_compressed &
- (PAGE_CACHE_SIZE - 1);
- struct page *page = pages[nr_pages_ret - 1];
- char *kaddr;
-
- /* zero the tail end of the last page, we might be
- * sending it down to disk
- */
- if (offset) {
- kaddr = kmap_atomic(page);
- memset(kaddr + offset, 0,
- PAGE_CACHE_SIZE - offset);
- kunmap_atomic(kaddr);
- }
- will_compress = 1;
- }
- }
-cont:
- if (start == 0) {
- trans = btrfs_join_transaction(root);
- if (IS_ERR(trans)) {
- ret = PTR_ERR(trans);
- trans = NULL;
- goto cleanup_and_out;
- }
- trans->block_rsv = &root->fs_info->delalloc_block_rsv;
-
- /* lets try to make an inline extent */
- if (ret || total_in < (actual_end - start)) {
- /* we didn't compress the entire range, try
- * to make an uncompressed inline extent.
- */
- ret = cow_file_range_inline(trans, root, inode,
- start, end, 0, 0, NULL);
- } else {
- /* try making a compressed inline extent */
- ret = cow_file_range_inline(trans, root, inode,
- start, end,
- total_compressed,
- compress_type, pages);
- }
- if (ret <= 0) {
- /*
- * inline extent creation worked or returned error,
- * we don't need to create any more async work items.
- * Unlock and free up our temp pages.
- */
- extent_clear_unlock_delalloc(inode,
- &BTRFS_I(inode)->io_tree,
- start, end, NULL,
- EXTENT_CLEAR_UNLOCK_PAGE | EXTENT_CLEAR_DIRTY |
- EXTENT_CLEAR_DELALLOC |
- EXTENT_SET_WRITEBACK | EXTENT_END_WRITEBACK);
-
- btrfs_end_transaction(trans, root);
- goto free_pages_out;
- }
- btrfs_end_transaction(trans, root);
- }
-
- if (will_compress) {
- /*
- * we aren't doing an inline extent round the compressed size
- * up to a block size boundary so the allocator does sane
- * things
- */
- total_compressed = (total_compressed + blocksize - 1) &
- ~(blocksize - 1);
-
- /*
- * one last check to make sure the compression is really a
- * win, compare the page count read with the blocks on disk
- */
- total_in = (total_in + PAGE_CACHE_SIZE - 1) &
- ~(PAGE_CACHE_SIZE - 1);
- if (total_compressed >= total_in) {
- will_compress = 0;
- } else {
- num_bytes = total_in;
- }
- }
- if (!will_compress && pages) {
- /*
- * the compression code ran but failed to make things smaller,
- * free any pages it allocated and our page pointer array
- */
- for (i = 0; i < nr_pages_ret; i++) {
- WARN_ON(pages[i]->mapping);
- page_cache_release(pages[i]);
- }
- kfree(pages);
- pages = NULL;
- total_compressed = 0;
- nr_pages_ret = 0;
-
- /* flag the file so we don't compress in the future */
- if (!btrfs_test_opt(root, FORCE_COMPRESS) &&
- !(BTRFS_I(inode)->force_compress)) {
- BTRFS_I(inode)->flags |= BTRFS_INODE_NOCOMPRESS;
- }
- }
- if (will_compress) {
- *num_added += 1;
-
- /* the async work queues will take care of doing actual
- * allocation on disk for these compressed pages,
- * and will submit them to the elevator.
- */
- add_async_extent(async_cow, start, num_bytes,
- total_compressed, pages, nr_pages_ret,
- compress_type);
-
- if (start + num_bytes < end) {
- start += num_bytes;
- pages = NULL;
- cond_resched();
- goto again;
- }
- } else {
-cleanup_and_bail_uncompressed:
- /*
- * No compression, but we still need to write the pages in
- * the file we've been given so far. redirty the locked
- * page if it corresponds to our extent and set things up
- * for the async work queue to run cow_file_range to do
- * the normal delalloc dance
- */
- if (page_offset(locked_page) >= start &&
- page_offset(locked_page) <= end) {
- __set_page_dirty_nobuffers(locked_page);
- /* unlocked later on in the async handlers */
- }
- add_async_extent(async_cow, start, end - start + 1,
- 0, NULL, 0, BTRFS_COMPRESS_NONE);
- *num_added += 1;
- }
-
-out:
- return ret;
-
-free_pages_out:
- for (i = 0; i < nr_pages_ret; i++) {
- WARN_ON(pages[i]->mapping);
- page_cache_release(pages[i]);
- }
- kfree(pages);
-
- goto out;
-
-cleanup_and_out:
- extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree,
- start, end, NULL,
- EXTENT_CLEAR_UNLOCK_PAGE |
- EXTENT_CLEAR_DIRTY |
- EXTENT_CLEAR_DELALLOC |
- EXTENT_SET_WRITEBACK |
- EXTENT_END_WRITEBACK);
- if (!trans || IS_ERR(trans))
- btrfs_error(root->fs_info, ret, "Failed to join transaction");
- else
- btrfs_abort_transaction(trans, root, ret);
- goto free_pages_out;
-}
-
-/*
- * phase two of compressed writeback. This is the ordered portion
- * of the code, which only gets called in the order the work was
- * queued. We walk all the async extents created by compress_file_range
- * and send them down to the disk.
- */
-static noinline int submit_compressed_extents(struct inode *inode,
- struct async_cow *async_cow)
-{
- struct async_extent *async_extent;
- u64 alloc_hint = 0;
- struct btrfs_trans_handle *trans;
- struct btrfs_key ins;
- struct extent_map *em;
- struct btrfs_root *root = BTRFS_I(inode)->root;
- struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
- struct extent_io_tree *io_tree;
- int ret = 0;
-
- if (list_empty(&async_cow->extents))
- return 0;
-
-
- while (!list_empty(&async_cow->extents)) {
- async_extent = list_entry(async_cow->extents.next,
- struct async_extent, list);
- list_del(&async_extent->list);
-
- io_tree = &BTRFS_I(inode)->io_tree;
-
-retry:
- /* did the compression code fall back to uncompressed IO? */
- if (!async_extent->pages) {
- int page_started = 0;
- unsigned long nr_written = 0;
-
- lock_extent(io_tree, async_extent->start,
- async_extent->start +
- async_extent->ram_size - 1);
-
- /* allocate blocks */
- ret = cow_file_range(inode, async_cow->locked_page,
- async_extent->start,
- async_extent->start +
- async_extent->ram_size - 1,
- &page_started, &nr_written, 0);
-
- /* JDM XXX */
-
- /*
- * if page_started, cow_file_range inserted an
- * inline extent and took care of all the unlocking
- * and IO for us. Otherwise, we need to submit
- * all those pages down to the drive.
- */
- if (!page_started && !ret)
- extent_write_locked_range(io_tree,
- inode, async_extent->start,
- async_extent->start +
- async_extent->ram_size - 1,
- btrfs_get_extent,
- WB_SYNC_ALL);
- kfree(async_extent);
- cond_resched();
- continue;
- }
-
- lock_extent(io_tree, async_extent->start,
- async_extent->start + async_extent->ram_size - 1);
-
- trans = btrfs_join_transaction(root);
- if (IS_ERR(trans)) {
- ret = PTR_ERR(trans);
- } else {
- trans->block_rsv = &root->fs_info->delalloc_block_rsv;
- ret = btrfs_reserve_extent(trans, root,
- async_extent->compressed_size,
- async_extent->compressed_size,
- 0, alloc_hint, &ins, 1);
- if (ret)
- btrfs_abort_transaction(trans, root, ret);
- btrfs_end_transaction(trans, root);
- }
-
- if (ret) {
- int i;
- for (i = 0; i < async_extent->nr_pages; i++) {
- WARN_ON(async_extent->pages[i]->mapping);
- page_cache_release(async_extent->pages[i]);
- }
- kfree(async_extent->pages);
- async_extent->nr_pages = 0;
- async_extent->pages = NULL;
- unlock_extent(io_tree, async_extent->start,
- async_extent->start +
- async_extent->ram_size - 1);
- if (ret == -ENOSPC)
- goto retry;
- goto out_free; /* JDM: Requeue? */
- }
-
- /*
- * here we're doing allocation and writeback of the
- * compressed pages
- */
- btrfs_drop_extent_cache(inode, async_extent->start,
- async_extent->start +
- async_extent->ram_size - 1, 0);
-
- em = alloc_extent_map();
- BUG_ON(!em); /* -ENOMEM */
- em->start = async_extent->start;
- em->len = async_extent->ram_size;
- em->orig_start = em->start;
-
- em->block_start = ins.objectid;
- em->block_len = ins.offset;
- em->bdev = root->fs_info->fs_devices->latest_bdev;
- em->compress_type = async_extent->compress_type;
- set_bit(EXTENT_FLAG_PINNED, &em->flags);
- set_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
-
- while (1) {
- write_lock(&em_tree->lock);
- ret = add_extent_mapping(em_tree, em);
- write_unlock(&em_tree->lock);
- if (ret != -EEXIST) {
- free_extent_map(em);
- break;
- }
- btrfs_drop_extent_cache(inode, async_extent->start,
- async_extent->start +
- async_extent->ram_size - 1, 0);
- }
-
- ret = btrfs_add_ordered_extent_compress(inode,
- async_extent->start,
- ins.objectid,
- async_extent->ram_size,
- ins.offset,
- BTRFS_ORDERED_COMPRESSED,
- async_extent->compress_type);
- BUG_ON(ret); /* -ENOMEM */
-
- /*
- * clear dirty, set writeback and unlock the pages.
- */
- extent_clear_unlock_delalloc(inode,
- &BTRFS_I(inode)->io_tree,
- async_extent->start,
- async_extent->start +
- async_extent->ram_size - 1,
- NULL, EXTENT_CLEAR_UNLOCK_PAGE |
- EXTENT_CLEAR_UNLOCK |
- EXTENT_CLEAR_DELALLOC |
- EXTENT_CLEAR_DIRTY | EXTENT_SET_WRITEBACK);
-
- ret = btrfs_submit_compressed_write(inode,
- async_extent->start,
- async_extent->ram_size,
- ins.objectid,
- ins.offset, async_extent->pages,
- async_extent->nr_pages);
-
- BUG_ON(ret); /* -ENOMEM */
- alloc_hint = ins.objectid + ins.offset;
- kfree(async_extent);
- cond_resched();
- }
- ret = 0;
-out:
- return ret;
-out_free:
- kfree(async_extent);
- goto out;
-}
-
-static u64 get_extent_allocation_hint(struct inode *inode, u64 start,
- u64 num_bytes)
-{
- struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
- struct extent_map *em;
- u64 alloc_hint = 0;
-
- read_lock(&em_tree->lock);
- em = search_extent_mapping(em_tree, start, num_bytes);
- if (em) {
- /*
- * if block start isn't an actual block number then find the
- * first block in this inode and use that as a hint. If that
- * block is also bogus then just don't worry about it.
- */
- if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
- free_extent_map(em);
- em = search_extent_mapping(em_tree, 0, 0);
- if (em && em->block_start < EXTENT_MAP_LAST_BYTE)
- alloc_hint = em->block_start;
- if (em)
- free_extent_map(em);
- } else {
- alloc_hint = em->block_start;
- free_extent_map(em);
- }
- }
- read_unlock(&em_tree->lock);
-
- return alloc_hint;
-}
-
-/*
- * when extent_io.c finds a delayed allocation range in the file,
- * the call backs end up in this code. The basic idea is to
- * allocate extents on disk for the range, and create ordered data structs
- * in ram to track those extents.
- *
- * locked_page is the page that writepage had locked already. We use
- * it to make sure we don't do extra locks or unlocks.
- *
- * *page_started is set to one if we unlock locked_page and do everything
- * required to start IO on it. It may be clean and already done with
- * IO when we return.
- */
-static noinline int cow_file_range(struct inode *inode,
- struct page *locked_page,
- u64 start, u64 end, int *page_started,
- unsigned long *nr_written,
- int unlock)
-{
- struct btrfs_root *root = BTRFS_I(inode)->root;
- struct btrfs_trans_handle *trans;
- u64 alloc_hint = 0;
- u64 num_bytes;
- unsigned long ram_size;
- u64 disk_num_bytes;
- u64 cur_alloc_size;
- u64 blocksize = root->sectorsize;
- struct btrfs_key ins;
- struct extent_map *em;
- struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
- int ret = 0;
-
- BUG_ON(btrfs_is_free_space_inode(root, inode));
- trans = btrfs_join_transaction(root);
- if (IS_ERR(trans)) {
- extent_clear_unlock_delalloc(inode,
- &BTRFS_I(inode)->io_tree,
- start, end, NULL,
- EXTENT_CLEAR_UNLOCK_PAGE |
- EXTENT_CLEAR_UNLOCK |
- EXTENT_CLEAR_DELALLOC |
- EXTENT_CLEAR_DIRTY |
- EXTENT_SET_WRITEBACK |
- EXTENT_END_WRITEBACK);
- return PTR_ERR(trans);
- }
- trans->block_rsv = &root->fs_info->delalloc_block_rsv;
-
- num_bytes = (end - start + blocksize) & ~(blocksize - 1);
- num_bytes = max(blocksize, num_bytes);
- disk_num_bytes = num_bytes;
- ret = 0;
-
- /* if this is a small write inside eof, kick off defrag */
- if (num_bytes < 64 * 1024 &&
- (start > 0 || end + 1 < BTRFS_I(inode)->disk_i_size))
- btrfs_add_inode_defrag(trans, inode);
-
- if (start == 0) {
- /* lets try to make an inline extent */
- ret = cow_file_range_inline(trans, root, inode,
- start, end, 0, 0, NULL);
- if (ret == 0) {
- extent_clear_unlock_delalloc(inode,
- &BTRFS_I(inode)->io_tree,
- start, end, NULL,
- EXTENT_CLEAR_UNLOCK_PAGE |
- EXTENT_CLEAR_UNLOCK |
- EXTENT_CLEAR_DELALLOC |
- EXTENT_CLEAR_DIRTY |
- EXTENT_SET_WRITEBACK |
- EXTENT_END_WRITEBACK);
-
- *nr_written = *nr_written +
- (end - start + PAGE_CACHE_SIZE) / PAGE_CACHE_SIZE;
- *page_started = 1;
- goto out;
- } else if (ret < 0) {
- btrfs_abort_transaction(trans, root, ret);
- goto out_unlock;
- }
- }
-
- BUG_ON(disk_num_bytes >
- btrfs_super_total_bytes(root->fs_info->super_copy));
-
- alloc_hint = get_extent_allocation_hint(inode, start, num_bytes);
- btrfs_drop_extent_cache(inode, start, start + num_bytes - 1, 0);
-
- while (disk_num_bytes > 0) {
- unsigned long op;
-
- cur_alloc_size = disk_num_bytes;
- ret = btrfs_reserve_extent(trans, root, cur_alloc_size,
- root->sectorsize, 0, alloc_hint,
- &ins, 1);
- if (ret < 0) {
- btrfs_abort_transaction(trans, root, ret);
- goto out_unlock;
- }
-
- em = alloc_extent_map();
- BUG_ON(!em); /* -ENOMEM */
- em->start = start;
- em->orig_start = em->start;
- ram_size = ins.offset;
- em->len = ins.offset;
-
- em->block_start = ins.objectid;
- em->block_len = ins.offset;
- em->bdev = root->fs_info->fs_devices->latest_bdev;
- set_bit(EXTENT_FLAG_PINNED, &em->flags);
-
- while (1) {
- write_lock(&em_tree->lock);
- ret = add_extent_mapping(em_tree, em);
- write_unlock(&em_tree->lock);
- if (ret != -EEXIST) {
- free_extent_map(em);
- break;
- }
- btrfs_drop_extent_cache(inode, start,
- start + ram_size - 1, 0);
- }
-
- cur_alloc_size = ins.offset;
- ret = btrfs_add_ordered_extent(inode, start, ins.objectid,
- ram_size, cur_alloc_size, 0);
- BUG_ON(ret); /* -ENOMEM */
-
- if (root->root_key.objectid ==
- BTRFS_DATA_RELOC_TREE_OBJECTID) {
- ret = btrfs_reloc_clone_csums(inode, start,
- cur_alloc_size);
- if (ret) {
- btrfs_abort_transaction(trans, root, ret);
- goto out_unlock;
- }
- }
-
- if (disk_num_bytes < cur_alloc_size)
- break;
-
- /* we're not doing compressed IO, don't unlock the first
- * page (which the caller expects to stay locked), don't
- * clear any dirty bits and don't set any writeback bits
- *
- * Do set the Private2 bit so we know this page was properly
- * setup for writepage
- */
- op = unlock ? EXTENT_CLEAR_UNLOCK_PAGE : 0;
- op |= EXTENT_CLEAR_UNLOCK | EXTENT_CLEAR_DELALLOC |
- EXTENT_SET_PRIVATE2;
-
- extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree,
- start, start + ram_size - 1,
- locked_page, op);
- disk_num_bytes -= cur_alloc_size;
- num_bytes -= cur_alloc_size;
- alloc_hint = ins.objectid + ins.offset;
- start += cur_alloc_size;
- }
- ret = 0;
-out:
- btrfs_end_transaction(trans, root);
-
- return ret;
-out_unlock:
- extent_clear_unlock_delalloc(inode,
- &BTRFS_I(inode)->io_tree,
- start, end, NULL,
- EXTENT_CLEAR_UNLOCK_PAGE |
- EXTENT_CLEAR_UNLOCK |
- EXTENT_CLEAR_DELALLOC |
- EXTENT_CLEAR_DIRTY |
- EXTENT_SET_WRITEBACK |
- EXTENT_END_WRITEBACK);
-
- goto out;
-}
-
-/*
- * work queue call back to started compression on a file and pages
- */
-static noinline void async_cow_start(struct btrfs_work *work)
-{
- struct async_cow *async_cow;
- int num_added = 0;
- async_cow = container_of(work, struct async_cow, work);
-
- compress_file_range(async_cow->inode, async_cow->locked_page,
- async_cow->start, async_cow->end, async_cow,
- &num_added);
- if (num_added == 0)
- async_cow->inode = NULL;
-}
-
-/*
- * work queue call back to submit previously compressed pages
- */
-static noinline void async_cow_submit(struct btrfs_work *work)
-{
- struct async_cow *async_cow;
- struct btrfs_root *root;
- unsigned long nr_pages;
-
- async_cow = container_of(work, struct async_cow, work);
-
- root = async_cow->root;
- nr_pages = (async_cow->end - async_cow->start + PAGE_CACHE_SIZE) >>
- PAGE_CACHE_SHIFT;
-
- atomic_sub(nr_pages, &root->fs_info->async_delalloc_pages);
-
- if (atomic_read(&root->fs_info->async_delalloc_pages) <
- 5 * 1042 * 1024 &&
- waitqueue_active(&root->fs_info->async_submit_wait))
- wake_up(&root->fs_info->async_submit_wait);
-
- if (async_cow->inode)
- submit_compressed_extents(async_cow->inode, async_cow);
-}
-
-static noinline void async_cow_free(struct btrfs_work *work)
-{
- struct async_cow *async_cow;
- async_cow = container_of(work, struct async_cow, work);
- kfree(async_cow);
-}
-
-static int cow_file_range_async(struct inode *inode, struct page *locked_page,
- u64 start, u64 end, int *page_started,
- unsigned long *nr_written)
-{
- struct async_cow *async_cow;
- struct btrfs_root *root = BTRFS_I(inode)->root;
- unsigned long nr_pages;
- u64 cur_end;
- int limit = 10 * 1024 * 1042;
-
- clear_extent_bit(&BTRFS_I(inode)->io_tree, start, end, EXTENT_LOCKED,
- 1, 0, NULL, GFP_NOFS);
- while (start < end) {
- async_cow = kmalloc(sizeof(*async_cow), GFP_NOFS);
- BUG_ON(!async_cow); /* -ENOMEM */
- async_cow->inode = inode;
- async_cow->root = root;
- async_cow->locked_page = locked_page;
- async_cow->start = start;
-
- if (BTRFS_I(inode)->flags & BTRFS_INODE_NOCOMPRESS)
- cur_end = end;
- else
- cur_end = min(end, start + 512 * 1024 - 1);
-
- async_cow->end = cur_end;
- INIT_LIST_HEAD(&async_cow->extents);
-
- async_cow->work.func = async_cow_start;
- async_cow->work.ordered_func = async_cow_submit;
- async_cow->work.ordered_free = async_cow_free;
- async_cow->work.flags = 0;
-
- nr_pages = (cur_end - start + PAGE_CACHE_SIZE) >>
- PAGE_CACHE_SHIFT;
- atomic_add(nr_pages, &root->fs_info->async_delalloc_pages);
-
- btrfs_queue_worker(&root->fs_info->delalloc_workers,
- &async_cow->work);
-
- if (atomic_read(&root->fs_info->async_delalloc_pages) > limit) {
- wait_event(root->fs_info->async_submit_wait,
- (atomic_read(&root->fs_info->async_delalloc_pages) <
- limit));
- }
-
- while (atomic_read(&root->fs_info->async_submit_draining) &&
- atomic_read(&root->fs_info->async_delalloc_pages)) {
- wait_event(root->fs_info->async_submit_wait,
- (atomic_read(&root->fs_info->async_delalloc_pages) ==
- 0));
- }
-
- *nr_written += nr_pages;
- start = cur_end + 1;
- }
- *page_started = 1;
- return 0;
-}
-
-static noinline int csum_exist_in_range(struct btrfs_root *root,
- u64 bytenr, u64 num_bytes)
-{
- int ret;
- struct btrfs_ordered_sum *sums;
- LIST_HEAD(list);
-
- ret = btrfs_lookup_csums_range(root->fs_info->csum_root, bytenr,
- bytenr + num_bytes - 1, &list, 0);
- if (ret == 0 && list_empty(&list))
- return 0;
-
- while (!list_empty(&list)) {
- sums = list_entry(list.next, struct btrfs_ordered_sum, list);
- list_del(&sums->list);
- kfree(sums);
- }
- return 1;
-}
-
-/*
- * when nowcow writeback call back. This checks for snapshots or COW copies
- * of the extents that exist in the file, and COWs the file as required.
- *
- * If no cow copies or snapshots exist, we write directly to the existing
- * blocks on disk
- */
-static noinline int run_delalloc_nocow(struct inode *inode,
- struct page *locked_page,
- u64 start, u64 end, int *page_started, int force,
- unsigned long *nr_written)
-{
- struct btrfs_root *root = BTRFS_I(inode)->root;
- struct btrfs_trans_handle *trans;
- struct extent_buffer *leaf;
- struct btrfs_path *path;
- struct btrfs_file_extent_item *fi;
- struct btrfs_key found_key;
- u64 cow_start;
- u64 cur_offset;
- u64 extent_end;
- u64 extent_offset;
- u64 disk_bytenr;
- u64 num_bytes;
- int extent_type;
- int ret, err;
- int type;
- int nocow;
- int check_prev = 1;
- bool nolock;
- u64 ino = btrfs_ino(inode);
-
- path = btrfs_alloc_path();
- if (!path)
- return -ENOMEM;
-
- nolock = btrfs_is_free_space_inode(root, inode);
-
- if (nolock)
- trans = btrfs_join_transaction_nolock(root);
- else
- trans = btrfs_join_transaction(root);
-
- if (IS_ERR(trans)) {
- btrfs_free_path(path);
- return PTR_ERR(trans);
- }
-
- trans->block_rsv = &root->fs_info->delalloc_block_rsv;
-
- cow_start = (u64)-1;
- cur_offset = start;
- while (1) {
- ret = btrfs_lookup_file_extent(trans, root, path, ino,
- cur_offset, 0);
- if (ret < 0) {
- btrfs_abort_transaction(trans, root, ret);
- goto error;
- }
- if (ret > 0 && path->slots[0] > 0 && check_prev) {
- leaf = path->nodes[0];
- btrfs_item_key_to_cpu(leaf, &found_key,
- path->slots[0] - 1);
- if (found_key.objectid == ino &&
- found_key.type == BTRFS_EXTENT_DATA_KEY)
- path->slots[0]--;
- }
- check_prev = 0;
-next_slot:
- leaf = path->nodes[0];
- if (path->slots[0] >= btrfs_header_nritems(leaf)) {
- ret = btrfs_next_leaf(root, path);
- if (ret < 0) {
- btrfs_abort_transaction(trans, root, ret);
- goto error;
- }
- if (ret > 0)
- break;
- leaf = path->nodes[0];
- }
-
- nocow = 0;
- disk_bytenr = 0;
- num_bytes = 0;
- btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
-
- if (found_key.objectid > ino ||
- found_key.type > BTRFS_EXTENT_DATA_KEY ||
- found_key.offset > end)
- break;
-
- if (found_key.offset > cur_offset) {
- extent_end = found_key.offset;
- extent_type = 0;
- goto out_check;
- }
-
- fi = btrfs_item_ptr(leaf, path->slots[0],
- struct btrfs_file_extent_item);
- extent_type = btrfs_file_extent_type(leaf, fi);
-
- if (extent_type == BTRFS_FILE_EXTENT_REG ||
- extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
- disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
- extent_offset = btrfs_file_extent_offset(leaf, fi);
- extent_end = found_key.offset +
- btrfs_file_extent_num_bytes(leaf, fi);
- if (extent_end <= start) {
- path->slots[0]++;
- goto next_slot;
- }
- if (disk_bytenr == 0)
- goto out_check;
- if (btrfs_file_extent_compression(leaf, fi) ||
- btrfs_file_extent_encryption(leaf, fi) ||
- btrfs_file_extent_other_encoding(leaf, fi))
- goto out_check;
- if (extent_type == BTRFS_FILE_EXTENT_REG && !force)
- goto out_check;
- if (btrfs_extent_readonly(root, disk_bytenr))
- goto out_check;
- if (btrfs_cross_ref_exist(trans, root, ino,
- found_key.offset -
- extent_offset, disk_bytenr))
- goto out_check;
- disk_bytenr += extent_offset;
- disk_bytenr += cur_offset - found_key.offset;
- num_bytes = min(end + 1, extent_end) - cur_offset;
- /*
- * force cow if csum exists in the range.
- * this ensure that csum for a given extent are
- * either valid or do not exist.
- */
- if (csum_exist_in_range(root, disk_bytenr, num_bytes))
- goto out_check;
- nocow = 1;
- } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
- extent_end = found_key.offset +
- btrfs_file_extent_inline_len(leaf, fi);
- extent_end = ALIGN(extent_end, root->sectorsize);
- } else {
- BUG_ON(1);
- }
-out_check:
- if (extent_end <= start) {
- path->slots[0]++;
- goto next_slot;
- }
- if (!nocow) {
- if (cow_start == (u64)-1)
- cow_start = cur_offset;
- cur_offset = extent_end;
- if (cur_offset > end)
- break;
- path->slots[0]++;
- goto next_slot;
- }
-
- btrfs_release_path(path);
- if (cow_start != (u64)-1) {
- ret = cow_file_range(inode, locked_page, cow_start,
- found_key.offset - 1, page_started,
- nr_written, 1);
- if (ret) {
- btrfs_abort_transaction(trans, root, ret);
- goto error;
- }
- cow_start = (u64)-1;
- }
-
- if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
- struct extent_map *em;
- struct extent_map_tree *em_tree;
- em_tree = &BTRFS_I(inode)->extent_tree;
- em = alloc_extent_map();
- BUG_ON(!em); /* -ENOMEM */
- em->start = cur_offset;
- em->orig_start = em->start;
- em->len = num_bytes;
- em->block_len = num_bytes;
- em->block_start = disk_bytenr;
- em->bdev = root->fs_info->fs_devices->latest_bdev;
- set_bit(EXTENT_FLAG_PINNED, &em->flags);
- while (1) {
- write_lock(&em_tree->lock);
- ret = add_extent_mapping(em_tree, em);
- write_unlock(&em_tree->lock);
- if (ret != -EEXIST) {
- free_extent_map(em);
- break;
- }
- btrfs_drop_extent_cache(inode, em->start,
- em->start + em->len - 1, 0);
- }
- type = BTRFS_ORDERED_PREALLOC;
- } else {
- type = BTRFS_ORDERED_NOCOW;
- }
-
- ret = btrfs_add_ordered_extent(inode, cur_offset, disk_bytenr,
- num_bytes, num_bytes, type);
- BUG_ON(ret); /* -ENOMEM */
-
- if (root->root_key.objectid ==
- BTRFS_DATA_RELOC_TREE_OBJECTID) {
- ret = btrfs_reloc_clone_csums(inode, cur_offset,
- num_bytes);
- if (ret) {
- btrfs_abort_transaction(trans, root, ret);
- goto error;
- }
- }
-
- extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree,
- cur_offset, cur_offset + num_bytes - 1,
- locked_page, EXTENT_CLEAR_UNLOCK_PAGE |
- EXTENT_CLEAR_UNLOCK | EXTENT_CLEAR_DELALLOC |
- EXTENT_SET_PRIVATE2);
- cur_offset = extent_end;
- if (cur_offset > end)
- break;
- }
- btrfs_release_path(path);
-
- if (cur_offset <= end && cow_start == (u64)-1)
- cow_start = cur_offset;
- if (cow_start != (u64)-1) {
- ret = cow_file_range(inode, locked_page, cow_start, end,
- page_started, nr_written, 1);
- if (ret) {
- btrfs_abort_transaction(trans, root, ret);
- goto error;
- }
- }
-
-error:
- if (nolock) {
- err = btrfs_end_transaction_nolock(trans, root);
- } else {
- err = btrfs_end_transaction(trans, root);
- }
- if (!ret)
- ret = err;
-
- btrfs_free_path(path);
- return ret;
-}
-
-/*
- * extent_io.c call back to do delayed allocation processing
- */
-static int run_delalloc_range(struct inode *inode, struct page *locked_page,
- u64 start, u64 end, int *page_started,
- unsigned long *nr_written)
-{
- int ret;
- struct btrfs_root *root = BTRFS_I(inode)->root;
-
- if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW)
- ret = run_delalloc_nocow(inode, locked_page, start, end,
- page_started, 1, nr_written);
- else if (BTRFS_I(inode)->flags & BTRFS_INODE_PREALLOC)
- ret = run_delalloc_nocow(inode, locked_page, start, end,
- page_started, 0, nr_written);
- else if (!btrfs_test_opt(root, COMPRESS) &&
- !(BTRFS_I(inode)->force_compress) &&
- !(BTRFS_I(inode)->flags & BTRFS_INODE_COMPRESS))
- ret = cow_file_range(inode, locked_page, start, end,
- page_started, nr_written, 1);
- else
- ret = cow_file_range_async(inode, locked_page, start, end,
- page_started, nr_written);
- return ret;
-}
-
-static void btrfs_split_extent_hook(struct inode *inode,
- struct extent_state *orig, u64 split)
-{
- /* not delalloc, ignore it */
- if (!(orig->state & EXTENT_DELALLOC))
- return;
-
- spin_lock(&BTRFS_I(inode)->lock);
- BTRFS_I(inode)->outstanding_extents++;
- spin_unlock(&BTRFS_I(inode)->lock);
-}
-
-/*
- * extent_io.c merge_extent_hook, used to track merged delayed allocation
- * extents so we can keep track of new extents that are just merged onto old
- * extents, such as when we are doing sequential writes, so we can properly
- * account for the metadata space we'll need.
- */
-static void btrfs_merge_extent_hook(struct inode *inode,
- struct extent_state *new,
- struct extent_state *other)
-{
- /* not delalloc, ignore it */
- if (!(other->state & EXTENT_DELALLOC))
- return;
-
- spin_lock(&BTRFS_I(inode)->lock);
- BTRFS_I(inode)->outstanding_extents--;
- spin_unlock(&BTRFS_I(inode)->lock);
-}
-
-/*
- * extent_io.c set_bit_hook, used to track delayed allocation
- * bytes in this file, and to maintain the list of inodes that
- * have pending delalloc work to be done.
- */
-static void btrfs_set_bit_hook(struct inode *inode,
- struct extent_state *state, int *bits)
-{
-
- /*
- * set_bit and clear bit hooks normally require _irqsave/restore
- * but in this case, we are only testing for the DELALLOC
- * bit, which is only set or cleared with irqs on
- */
- if (!(state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) {
- struct btrfs_root *root = BTRFS_I(inode)->root;
- u64 len = state->end + 1 - state->start;
- bool do_list = !btrfs_is_free_space_inode(root, inode);
-
- if (*bits & EXTENT_FIRST_DELALLOC) {
- *bits &= ~EXTENT_FIRST_DELALLOC;
- } else {
- spin_lock(&BTRFS_I(inode)->lock);
- BTRFS_I(inode)->outstanding_extents++;
- spin_unlock(&BTRFS_I(inode)->lock);
- }
-
- spin_lock(&root->fs_info->delalloc_lock);
- BTRFS_I(inode)->delalloc_bytes += len;
- root->fs_info->delalloc_bytes += len;
- if (do_list && list_empty(&BTRFS_I(inode)->delalloc_inodes)) {
- list_add_tail(&BTRFS_I(inode)->delalloc_inodes,
- &root->fs_info->delalloc_inodes);
- }
- spin_unlock(&root->fs_info->delalloc_lock);
- }
-}
-
-/*
- * extent_io.c clear_bit_hook, see set_bit_hook for why
- */
-static void btrfs_clear_bit_hook(struct inode *inode,
- struct extent_state *state, int *bits)
-{
- /*
- * set_bit and clear bit hooks normally require _irqsave/restore
- * but in this case, we are only testing for the DELALLOC
- * bit, which is only set or cleared with irqs on
- */
- if ((state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) {
- struct btrfs_root *root = BTRFS_I(inode)->root;
- u64 len = state->end + 1 - state->start;
- bool do_list = !btrfs_is_free_space_inode(root, inode);
-
- if (*bits & EXTENT_FIRST_DELALLOC) {
- *bits &= ~EXTENT_FIRST_DELALLOC;
- } else if (!(*bits & EXTENT_DO_ACCOUNTING)) {
- spin_lock(&BTRFS_I(inode)->lock);
- BTRFS_I(inode)->outstanding_extents--;
- spin_unlock(&BTRFS_I(inode)->lock);
- }
-
- if (*bits & EXTENT_DO_ACCOUNTING)
- btrfs_delalloc_release_metadata(inode, len);
-
- if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID
- && do_list)
- btrfs_free_reserved_data_space(inode, len);
-
- spin_lock(&root->fs_info->delalloc_lock);
- root->fs_info->delalloc_bytes -= len;
- BTRFS_I(inode)->delalloc_bytes -= len;
-
- if (do_list && BTRFS_I(inode)->delalloc_bytes == 0 &&
- !list_empty(&BTRFS_I(inode)->delalloc_inodes)) {
- list_del_init(&BTRFS_I(inode)->delalloc_inodes);
- }
- spin_unlock(&root->fs_info->delalloc_lock);
- }
-}
-
-/*
- * extent_io.c merge_bio_hook, this must check the chunk tree to make sure
- * we don't create bios that span stripes or chunks
- */
-int btrfs_merge_bio_hook(struct page *page, unsigned long offset,
- size_t size, struct bio *bio,
- unsigned long bio_flags)
-{
- struct btrfs_root *root = BTRFS_I(page->mapping->host)->root;
- struct btrfs_mapping_tree *map_tree;
- u64 logical = (u64)bio->bi_sector << 9;
- u64 length = 0;
- u64 map_length;
- int ret;
-
- if (bio_flags & EXTENT_BIO_COMPRESSED)
- return 0;
-
- length = bio->bi_size;
- map_tree = &root->fs_info->mapping_tree;
- map_length = length;
- ret = btrfs_map_block(map_tree, READ, logical,
- &map_length, NULL, 0);
- /* Will always return 0 or 1 with map_multi == NULL */
- BUG_ON(ret < 0);
- if (map_length < length + size)
- return 1;
- return 0;
-}
-
-/*
- * in order to insert checksums into the metadata in large chunks,
- * we wait until bio submission time. All the pages in the bio are
- * checksummed and sums are attached onto the ordered extent record.
- *
- * At IO completion time the cums attached on the ordered extent record
- * are inserted into the btree
- */
-static int __btrfs_submit_bio_start(struct inode *inode, int rw,
- struct bio *bio, int mirror_num,
- unsigned long bio_flags,
- u64 bio_offset)
-{
- struct btrfs_root *root = BTRFS_I(inode)->root;
- int ret = 0;
-
- ret = btrfs_csum_one_bio(root, inode, bio, 0, 0);
- BUG_ON(ret); /* -ENOMEM */
- return 0;
-}
-
-/*
- * in order to insert checksums into the metadata in large chunks,
- * we wait until bio submission time. All the pages in the bio are
- * checksummed and sums are attached onto the ordered extent record.
- *
- * At IO completion time the cums attached on the ordered extent record
- * are inserted into the btree
- */
-static int __btrfs_submit_bio_done(struct inode *inode, int rw, struct bio *bio,
- int mirror_num, unsigned long bio_flags,
- u64 bio_offset)
-{
- struct btrfs_root *root = BTRFS_I(inode)->root;
- return btrfs_map_bio(root, rw, bio, mirror_num, 1);
-}
-
-/*
- * extent_io.c submission hook. This does the right thing for csum calculation
- * on write, or reading the csums from the tree before a read
- */
-static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
- int mirror_num, unsigned long bio_flags,
- u64 bio_offset)
-{
- struct btrfs_root *root = BTRFS_I(inode)->root;
- int ret = 0;
- int skip_sum;
- int metadata = 0;
-
- skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
-
- if (btrfs_is_free_space_inode(root, inode))
- metadata = 2;
-
- ret = btrfs_bio_wq_end_io(root->fs_info, bio, metadata);
- if (ret)
- return ret;
-
- if (!(rw & REQ_WRITE)) {
- if (bio_flags & EXTENT_BIO_COMPRESSED) {
- return btrfs_submit_compressed_read(inode, bio,
- mirror_num, bio_flags);
- } else if (!skip_sum) {
- ret = btrfs_lookup_bio_sums(root, inode, bio, NULL);
- if (ret)
- return ret;
- }
- goto mapit;
- } else if (!skip_sum) {
- /* csum items have already been cloned */
- if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
- goto mapit;
- /* we're doing a write, do the async checksumming */
- return btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info,
- inode, rw, bio, mirror_num,
- bio_flags, bio_offset,
- __btrfs_submit_bio_start,
- __btrfs_submit_bio_done);
- }
-
-mapit:
- return btrfs_map_bio(root, rw, bio, mirror_num, 0);
-}
-
-/*
- * given a list of ordered sums record them in the inode. This happens
- * at IO completion time based on sums calculated at bio submission time.
- */
-static noinline int add_pending_csums(struct btrfs_trans_handle *trans,
- struct inode *inode, u64 file_offset,
- struct list_head *list)
-{
- struct btrfs_ordered_sum *sum;
-
- list_for_each_entry(sum, list, list) {
- btrfs_csum_file_blocks(trans,
- BTRFS_I(inode)->root->fs_info->csum_root, sum);
- }
- return 0;
-}
-
-int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end,
- struct extent_state **cached_state)
-{
- if ((end & (PAGE_CACHE_SIZE - 1)) == 0)
- WARN_ON(1);
- return set_extent_delalloc(&BTRFS_I(inode)->io_tree, start, end,
- cached_state, GFP_NOFS);
-}
-
-/* see btrfs_writepage_start_hook for details on why this is required */
-struct btrfs_writepage_fixup {
- struct page *page;
- struct btrfs_work work;
-};
-
-static void btrfs_writepage_fixup_worker(struct btrfs_work *work)
-{
- struct btrfs_writepage_fixup *fixup;
- struct btrfs_ordered_extent *ordered;
- struct extent_state *cached_state = NULL;
- struct page *page;
- struct inode *inode;
- u64 page_start;
- u64 page_end;
- int ret;
-
- fixup = container_of(work, struct btrfs_writepage_fixup, work);
- page = fixup->page;
-again:
- lock_page(page);
- if (!page->mapping || !PageDirty(page) || !PageChecked(page)) {
- ClearPageChecked(page);
- goto out_page;
- }
-
- inode = page->mapping->host;
- page_start = page_offset(page);
- page_end = page_offset(page) + PAGE_CACHE_SIZE - 1;
-
- lock_extent_bits(&BTRFS_I(inode)->io_tree, page_start, page_end, 0,
- &cached_state);
-
- /* already ordered? We're done */
- if (PagePrivate2(page))
- goto out;
-
- ordered = btrfs_lookup_ordered_extent(inode, page_start);
- if (ordered) {
- unlock_extent_cached(&BTRFS_I(inode)->io_tree, page_start,
- page_end, &cached_state, GFP_NOFS);
- unlock_page(page);
- btrfs_start_ordered_extent(inode, ordered, 1);
- btrfs_put_ordered_extent(ordered);
- goto again;
- }
-
- ret = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE);
- if (ret) {
- mapping_set_error(page->mapping, ret);
- end_extent_writepage(page, ret, page_start, page_end);
- ClearPageChecked(page);
- goto out;
- }
-
- btrfs_set_extent_delalloc(inode, page_start, page_end, &cached_state);
- ClearPageChecked(page);
- set_page_dirty(page);
-out:
- unlock_extent_cached(&BTRFS_I(inode)->io_tree, page_start, page_end,
- &cached_state, GFP_NOFS);
-out_page:
- unlock_page(page);
- page_cache_release(page);
- kfree(fixup);
-}
-
-/*
- * There are a few paths in the higher layers of the kernel that directly
- * set the page dirty bit without asking the filesystem if it is a
- * good idea. This causes problems because we want to make sure COW
- * properly happens and the data=ordered rules are followed.
- *
- * In our case any range that doesn't have the ORDERED bit set
- * hasn't been properly setup for IO. We kick off an async process
- * to fix it up. The async helper will wait for ordered extents, set
- * the delalloc bit and make it safe to write the page.
- */
-static int btrfs_writepage_start_hook(struct page *page, u64 start, u64 end)
-{
- struct inode *inode = page->mapping->host;
- struct btrfs_writepage_fixup *fixup;
- struct btrfs_root *root = BTRFS_I(inode)->root;
-
- /* this page is properly in the ordered list */
- if (TestClearPagePrivate2(page))
- return 0;
-
- if (PageChecked(page))
- return -EAGAIN;
-
- fixup = kzalloc(sizeof(*fixup), GFP_NOFS);
- if (!fixup)
- return -EAGAIN;
-
- SetPageChecked(page);
- page_cache_get(page);
- fixup->work.func = btrfs_writepage_fixup_worker;
- fixup->page = page;
- btrfs_queue_worker(&root->fs_info->fixup_workers, &fixup->work);
- return -EBUSY;
-}
-
-static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
- struct inode *inode, u64 file_pos,
- u64 disk_bytenr, u64 disk_num_bytes,
- u64 num_bytes, u64 ram_bytes,
- u8 compression, u8 encryption,
- u16 other_encoding, int extent_type)
-{
- struct btrfs_root *root = BTRFS_I(inode)->root;
- struct btrfs_file_extent_item *fi;
- struct btrfs_path *path;
- struct extent_buffer *leaf;
- struct btrfs_key ins;
- u64 hint;
- int ret;
-
- path = btrfs_alloc_path();
- if (!path)
- return -ENOMEM;
-
- path->leave_spinning = 1;
-
- /*
- * we may be replacing one extent in the tree with another.
- * The new extent is pinned in the extent map, and we don't want
- * to drop it from the cache until it is completely in the btree.
- *
- * So, tell btrfs_drop_extents to leave this extent in the cache.
- * the caller is expected to unpin it and allow it to be merged
- * with the others.
- */
- ret = btrfs_drop_extents(trans, inode, file_pos, file_pos + num_bytes,
- &hint, 0);
- if (ret)
- goto out;
-
- ins.objectid = btrfs_ino(inode);
- ins.offset = file_pos;
- ins.type = BTRFS_EXTENT_DATA_KEY;
- ret = btrfs_insert_empty_item(trans, root, path, &ins, sizeof(*fi));
- if (ret)
- goto out;
- leaf = path->nodes[0];
- fi = btrfs_item_ptr(leaf, path->slots[0],
- struct btrfs_file_extent_item);
- btrfs_set_file_extent_generation(leaf, fi, trans->transid);
- btrfs_set_file_extent_type(leaf, fi, extent_type);
- btrfs_set_file_extent_disk_bytenr(leaf, fi, disk_bytenr);
- btrfs_set_file_extent_disk_num_bytes(leaf, fi, disk_num_bytes);
- btrfs_set_file_extent_offset(leaf, fi, 0);
- btrfs_set_file_extent_num_bytes(leaf, fi, num_bytes);
- btrfs_set_file_extent_ram_bytes(leaf, fi, ram_bytes);
- btrfs_set_file_extent_compression(leaf, fi, compression);
- btrfs_set_file_extent_encryption(leaf, fi, encryption);
- btrfs_set_file_extent_other_encoding(leaf, fi, other_encoding);
-
- btrfs_unlock_up_safe(path, 1);
- btrfs_set_lock_blocking(leaf);
-
- btrfs_mark_buffer_dirty(leaf);
-
- inode_add_bytes(inode, num_bytes);
-
- ins.objectid = disk_bytenr;
- ins.offset = disk_num_bytes;
- ins.type = BTRFS_EXTENT_ITEM_KEY;
- ret = btrfs_alloc_reserved_file_extent(trans, root,
- root->root_key.objectid,
- btrfs_ino(inode), file_pos, &ins);
-out:
- btrfs_free_path(path);
-
- return ret;
-}
-
-/*
- * helper function for btrfs_finish_ordered_io, this
- * just reads in some of the csum leaves to prime them into ram
- * before we start the transaction. It limits the amount of btree
- * reads required while inside the transaction.
- */
-/* as ordered data IO finishes, this gets called so we can finish
- * an ordered extent if the range of bytes in the file it covers are
- * fully written.
- */
-static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
-{
- struct btrfs_root *root = BTRFS_I(inode)->root;
- struct btrfs_trans_handle *trans = NULL;
- struct btrfs_ordered_extent *ordered_extent = NULL;
- struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
- struct extent_state *cached_state = NULL;
- int compress_type = 0;
- int ret;
- bool nolock;
-
- ret = btrfs_dec_test_ordered_pending(inode, &ordered_extent, start,
- end - start + 1);
- if (!ret)
- return 0;
- BUG_ON(!ordered_extent); /* Logic error */
-
- nolock = btrfs_is_free_space_inode(root, inode);
-
- if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) {
- BUG_ON(!list_empty(&ordered_extent->list)); /* Logic error */
- ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent);
- if (!ret) {
- if (nolock)
- trans = btrfs_join_transaction_nolock(root);
- else
- trans = btrfs_join_transaction(root);
- if (IS_ERR(trans))
- return PTR_ERR(trans);
- trans->block_rsv = &root->fs_info->delalloc_block_rsv;
- ret = btrfs_update_inode_fallback(trans, root, inode);
- if (ret) /* -ENOMEM or corruption */
- btrfs_abort_transaction(trans, root, ret);
- }
- goto out;
- }
-
- lock_extent_bits(io_tree, ordered_extent->file_offset,
- ordered_extent->file_offset + ordered_extent->len - 1,
- 0, &cached_state);
-
- if (nolock)
- trans = btrfs_join_transaction_nolock(root);
- else
- trans = btrfs_join_transaction(root);
- if (IS_ERR(trans)) {
- ret = PTR_ERR(trans);
- trans = NULL;
- goto out_unlock;
- }
- trans->block_rsv = &root->fs_info->delalloc_block_rsv;
-
- if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags))
- compress_type = ordered_extent->compress_type;
- if (test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) {
- BUG_ON(compress_type);
- ret = btrfs_mark_extent_written(trans, inode,
- ordered_extent->file_offset,
- ordered_extent->file_offset +
- ordered_extent->len);
- } else {
- BUG_ON(root == root->fs_info->tree_root);
- ret = insert_reserved_file_extent(trans, inode,
- ordered_extent->file_offset,
- ordered_extent->start,
- ordered_extent->disk_len,
- ordered_extent->len,
- ordered_extent->len,
- compress_type, 0, 0,
- BTRFS_FILE_EXTENT_REG);
- unpin_extent_cache(&BTRFS_I(inode)->extent_tree,
- ordered_extent->file_offset,
- ordered_extent->len);
- }
- unlock_extent_cached(io_tree, ordered_extent->file_offset,
- ordered_extent->file_offset +
- ordered_extent->len - 1, &cached_state, GFP_NOFS);
- if (ret < 0) {
- btrfs_abort_transaction(trans, root, ret);
- goto out;
- }
-
- add_pending_csums(trans, inode, ordered_extent->file_offset,
- &ordered_extent->list);
-
- ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent);
- if (!ret || !test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) {
- ret = btrfs_update_inode_fallback(trans, root, inode);
- if (ret) { /* -ENOMEM or corruption */
- btrfs_abort_transaction(trans, root, ret);
- goto out;
- }
- }
- ret = 0;
-out:
- if (root != root->fs_info->tree_root)
- btrfs_delalloc_release_metadata(inode, ordered_extent->len);
- if (trans) {
- if (nolock)
- btrfs_end_transaction_nolock(trans, root);
- else
- btrfs_end_transaction(trans, root);
- }
-
- /* once for us */
- btrfs_put_ordered_extent(ordered_extent);
- /* once for the tree */
- btrfs_put_ordered_extent(ordered_extent);
-
- return 0;
-out_unlock:
- unlock_extent_cached(io_tree, ordered_extent->file_offset,
- ordered_extent->file_offset +
- ordered_extent->len - 1, &cached_state, GFP_NOFS);
- goto out;
-}
-
-static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end,
- struct extent_state *state, int uptodate)
-{
- trace_btrfs_writepage_end_io_hook(page, start, end, uptodate);
-
- ClearPagePrivate2(page);
- return btrfs_finish_ordered_io(page->mapping->host, start, end);
-}
-
-/*
- * when reads are done, we need to check csums to verify the data is correct
- * if there's a match, we allow the bio to finish. If not, the code in
- * extent_io.c will try to find good copies for us.
- */
-static int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end,
- struct extent_state *state, int mirror)
-{
- size_t offset = start - ((u64)page->index << PAGE_CACHE_SHIFT);
- struct inode *inode = page->mapping->host;
- struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
- char *kaddr;
- u64 private = ~(u32)0;
- int ret;
- struct btrfs_root *root = BTRFS_I(inode)->root;
- u32 csum = ~(u32)0;
-
- if (PageChecked(page)) {
- ClearPageChecked(page);
- goto good;
- }
-
- if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)
- goto good;
-
- if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID &&
- test_range_bit(io_tree, start, end, EXTENT_NODATASUM, 1, NULL)) {
- clear_extent_bits(io_tree, start, end, EXTENT_NODATASUM,
- GFP_NOFS);
- return 0;
- }
-
- if (state && state->start == start) {
- private = state->private;
- ret = 0;
- } else {
- ret = get_state_private(io_tree, start, &private);
- }
- kaddr = kmap_atomic(page);
- if (ret)
- goto zeroit;
-
- csum = btrfs_csum_data(root, kaddr + offset, csum, end - start + 1);
- btrfs_csum_final(csum, (char *)&csum);
- if (csum != private)
- goto zeroit;
-
- kunmap_atomic(kaddr);
-good:
- return 0;
-
-zeroit:
- printk_ratelimited(KERN_INFO "btrfs csum failed ino %llu off %llu csum %u "
- "private %llu\n",
- (unsigned long long)btrfs_ino(page->mapping->host),
- (unsigned long long)start, csum,
- (unsigned long long)private);
- memset(kaddr + offset, 1, end - start + 1);
- flush_dcache_page(page);
- kunmap_atomic(kaddr);
- if (private == 0)
- return 0;
- return -EIO;
-}
-
-struct delayed_iput {
- struct list_head list;
- struct inode *inode;
-};
-
-/* JDM: If this is fs-wide, why can't we add a pointer to
- * btrfs_inode instead and avoid the allocation? */
-void btrfs_add_delayed_iput(struct inode *inode)
-{
- struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
- struct delayed_iput *delayed;
-
- if (atomic_add_unless(&inode->i_count, -1, 1))
- return;
-
- delayed = kmalloc(sizeof(*delayed), GFP_NOFS | __GFP_NOFAIL);
- delayed->inode = inode;
-
- spin_lock(&fs_info->delayed_iput_lock);
- list_add_tail(&delayed->list, &fs_info->delayed_iputs);
- spin_unlock(&fs_info->delayed_iput_lock);
-}
-
-void btrfs_run_delayed_iputs(struct btrfs_root *root)
-{
- LIST_HEAD(list);
- struct btrfs_fs_info *fs_info = root->fs_info;
- struct delayed_iput *delayed;
- int empty;
-
- spin_lock(&fs_info->delayed_iput_lock);
- empty = list_empty(&fs_info->delayed_iputs);
- spin_unlock(&fs_info->delayed_iput_lock);
- if (empty)
- return;
-
- down_read(&root->fs_info->cleanup_work_sem);
- spin_lock(&fs_info->delayed_iput_lock);
- list_splice_init(&fs_info->delayed_iputs, &list);
- spin_unlock(&fs_info->delayed_iput_lock);
-
- while (!list_empty(&list)) {
- delayed = list_entry(list.next, struct delayed_iput, list);
- list_del(&delayed->list);
- iput(delayed->inode);
- kfree(delayed);
- }
- up_read(&root->fs_info->cleanup_work_sem);
-}
-
-enum btrfs_orphan_cleanup_state {
- ORPHAN_CLEANUP_STARTED = 1,
- ORPHAN_CLEANUP_DONE = 2,
-};
-
-/*
- * This is called in transaction commit time. If there are no orphan
- * files in the subvolume, it removes orphan item and frees block_rsv
- * structure.
- */
-void btrfs_orphan_commit_root(struct btrfs_trans_handle *trans,
- struct btrfs_root *root)
-{
- struct btrfs_block_rsv *block_rsv;
- int ret;
-
- if (!list_empty(&root->orphan_list) ||
- root->orphan_cleanup_state != ORPHAN_CLEANUP_DONE)
- return;
-
- spin_lock(&root->orphan_lock);
- if (!list_empty(&root->orphan_list)) {
- spin_unlock(&root->orphan_lock);
- return;
- }
-
- if (root->orphan_cleanup_state != ORPHAN_CLEANUP_DONE) {
- spin_unlock(&root->orphan_lock);
- return;
- }
-
- block_rsv = root->orphan_block_rsv;
- root->orphan_block_rsv = NULL;
- spin_unlock(&root->orphan_lock);
-
- if (root->orphan_item_inserted &&
- btrfs_root_refs(&root->root_item) > 0) {
- ret = btrfs_del_orphan_item(trans, root->fs_info->tree_root,
- root->root_key.objectid);
- BUG_ON(ret);
- root->orphan_item_inserted = 0;
- }
-
- if (block_rsv) {
- WARN_ON(block_rsv->size > 0);
- btrfs_free_block_rsv(root, block_rsv);
- }
-}
-
-/*
- * This creates an orphan entry for the given inode in case something goes
- * wrong in the middle of an unlink/truncate.
- *
- * NOTE: caller of this function should reserve 5 units of metadata for
- * this function.
- */
-int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode)
-{
- struct btrfs_root *root = BTRFS_I(inode)->root;
- struct btrfs_block_rsv *block_rsv = NULL;
- int reserve = 0;
- int insert = 0;
- int ret;
-
- if (!root->orphan_block_rsv) {
- block_rsv = btrfs_alloc_block_rsv(root);
- if (!block_rsv)
- return -ENOMEM;
- }
-
- spin_lock(&root->orphan_lock);
- if (!root->orphan_block_rsv) {
- root->orphan_block_rsv = block_rsv;
- } else if (block_rsv) {
- btrfs_free_block_rsv(root, block_rsv);
- block_rsv = NULL;
- }
-
- if (list_empty(&BTRFS_I(inode)->i_orphan)) {
- list_add(&BTRFS_I(inode)->i_orphan, &root->orphan_list);
-#if 0
- /*
- * For proper ENOSPC handling, we should do orphan
- * cleanup when mounting. But this introduces backward
- * compatibility issue.
- */
- if (!xchg(&root->orphan_item_inserted, 1))
- insert = 2;
- else
- insert = 1;
-#endif
- insert = 1;
- }
-
- if (!BTRFS_I(inode)->orphan_meta_reserved) {
- BTRFS_I(inode)->orphan_meta_reserved = 1;
- reserve = 1;
- }
- spin_unlock(&root->orphan_lock);
-
- /* grab metadata reservation from transaction handle */
- if (reserve) {
- ret = btrfs_orphan_reserve_metadata(trans, inode);
- BUG_ON(ret); /* -ENOSPC in reservation; Logic error? JDM */
- }
-
- /* insert an orphan item to track this unlinked/truncated file */
- if (insert >= 1) {
- ret = btrfs_insert_orphan_item(trans, root, btrfs_ino(inode));
- if (ret && ret != -EEXIST) {
- btrfs_abort_transaction(trans, root, ret);
- return ret;
- }
- ret = 0;
- }
-
- /* insert an orphan item to track subvolume contains orphan files */
- if (insert >= 2) {
- ret = btrfs_insert_orphan_item(trans, root->fs_info->tree_root,
- root->root_key.objectid);
- if (ret && ret != -EEXIST) {
- btrfs_abort_transaction(trans, root, ret);
- return ret;
- }
- }
- return 0;
-}
-
-/*
- * We have done the truncate/delete so we can go ahead and remove the orphan
- * item for this particular inode.
- */
-int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode)
-{
- struct btrfs_root *root = BTRFS_I(inode)->root;
- int delete_item = 0;
- int release_rsv = 0;
- int ret = 0;
-
- spin_lock(&root->orphan_lock);
- if (!list_empty(&BTRFS_I(inode)->i_orphan)) {
- list_del_init(&BTRFS_I(inode)->i_orphan);
- delete_item = 1;
- }
-
- if (BTRFS_I(inode)->orphan_meta_reserved) {
- BTRFS_I(inode)->orphan_meta_reserved = 0;
- release_rsv = 1;
- }
- spin_unlock(&root->orphan_lock);
-
- if (trans && delete_item) {
- ret = btrfs_del_orphan_item(trans, root, btrfs_ino(inode));
- BUG_ON(ret); /* -ENOMEM or corruption (JDM: Recheck) */
- }
-
- if (release_rsv)
- btrfs_orphan_release_metadata(inode);
-
- return 0;
-}
-
-/*
- * this cleans up any orphans that may be left on the list from the last use
- * of this root.
- */
-int btrfs_orphan_cleanup(struct btrfs_root *root)
-{
- struct btrfs_path *path;
- struct extent_buffer *leaf;
- struct btrfs_key key, found_key;
- struct btrfs_trans_handle *trans;
- struct inode *inode;
- u64 last_objectid = 0;
- int ret = 0, nr_unlink = 0, nr_truncate = 0;
-
- if (cmpxchg(&root->orphan_cleanup_state, 0, ORPHAN_CLEANUP_STARTED))
- return 0;
-
- path = btrfs_alloc_path();
- if (!path) {
- ret = -ENOMEM;
- goto out;
- }
- path->reada = -1;
-
- key.objectid = BTRFS_ORPHAN_OBJECTID;
- btrfs_set_key_type(&key, BTRFS_ORPHAN_ITEM_KEY);
- key.offset = (u64)-1;
-
- while (1) {
- ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
- if (ret < 0)
- goto out;
-
- /*
- * if ret == 0 means we found what we were searching for, which
- * is weird, but possible, so only screw with path if we didn't
- * find the key and see if we have stuff that matches
- */
- if (ret > 0) {
- ret = 0;
- if (path->slots[0] == 0)
- break;
- path->slots[0]--;
- }
-
- /* pull out the item */
- leaf = path->nodes[0];
- btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
-
- /* make sure the item matches what we want */
- if (found_key.objectid != BTRFS_ORPHAN_OBJECTID)
- break;
- if (btrfs_key_type(&found_key) != BTRFS_ORPHAN_ITEM_KEY)
- break;
-
- /* release the path since we're done with it */
- btrfs_release_path(path);
-
- /*
- * this is where we are basically btrfs_lookup, without the
- * crossing root thing. we store the inode number in the
- * offset of the orphan item.
- */
-
- if (found_key.offset == last_objectid) {
- printk(KERN_ERR "btrfs: Error removing orphan entry, "
- "stopping orphan cleanup\n");
- ret = -EINVAL;
- goto out;
- }
-
- last_objectid = found_key.offset;
-
- found_key.objectid = found_key.offset;
- found_key.type = BTRFS_INODE_ITEM_KEY;
- found_key.offset = 0;
- inode = btrfs_iget(root->fs_info->sb, &found_key, root, NULL);
- ret = PTR_RET(inode);
- if (ret && ret != -ESTALE)
- goto out;
-
- if (ret == -ESTALE && root == root->fs_info->tree_root) {
- struct btrfs_root *dead_root;
- struct btrfs_fs_info *fs_info = root->fs_info;
- int is_dead_root = 0;
-
- /*
- * this is an orphan in the tree root. Currently these
- * could come from 2 sources:
- * a) a snapshot deletion in progress
- * b) a free space cache inode
- * We need to distinguish those two, as the snapshot
- * orphan must not get deleted.
- * find_dead_roots already ran before us, so if this
- * is a snapshot deletion, we should find the root
- * in the dead_roots list
- */
- spin_lock(&fs_info->trans_lock);
- list_for_each_entry(dead_root, &fs_info->dead_roots,
- root_list) {
- if (dead_root->root_key.objectid ==
- found_key.objectid) {
- is_dead_root = 1;
- break;
- }
- }
- spin_unlock(&fs_info->trans_lock);
- if (is_dead_root) {
- /* prevent this orphan from being found again */
- key.offset = found_key.objectid - 1;
- continue;
- }
- }
- /*
- * Inode is already gone but the orphan item is still there,
- * kill the orphan item.
- */
- if (ret == -ESTALE) {
- trans = btrfs_start_transaction(root, 1);
- if (IS_ERR(trans)) {
- ret = PTR_ERR(trans);
- goto out;
- }
- ret = btrfs_del_orphan_item(trans, root,
- found_key.objectid);
- BUG_ON(ret); /* -ENOMEM or corruption (JDM: Recheck) */
- btrfs_end_transaction(trans, root);
- continue;
- }
-
- /*
- * add this inode to the orphan list so btrfs_orphan_del does
- * the proper thing when we hit it
- */
- spin_lock(&root->orphan_lock);
- list_add(&BTRFS_I(inode)->i_orphan, &root->orphan_list);
- spin_unlock(&root->orphan_lock);
-
- /* if we have links, this was a truncate, lets do that */
- if (inode->i_nlink) {
- if (!S_ISREG(inode->i_mode)) {
- WARN_ON(1);
- iput(inode);
- continue;
- }
- nr_truncate++;
- ret = btrfs_truncate(inode);
- } else {
- nr_unlink++;
- }
-
- /* this will do delete_inode and everything for us */
- iput(inode);
- if (ret)
- goto out;
- }
- /* release the path since we're done with it */
- btrfs_release_path(path);
-
- root->orphan_cleanup_state = ORPHAN_CLEANUP_DONE;
-
- if (root->orphan_block_rsv)
- btrfs_block_rsv_release(root, root->orphan_block_rsv,
- (u64)-1);
-
- if (root->orphan_block_rsv || root->orphan_item_inserted) {
- trans = btrfs_join_transaction(root);
- if (!IS_ERR(trans))
- btrfs_end_transaction(trans, root);
- }
-
- if (nr_unlink)
- printk(KERN_INFO "btrfs: unlinked %d orphans\n", nr_unlink);
- if (nr_truncate)
- printk(KERN_INFO "btrfs: truncated %d orphans\n", nr_truncate);
-
-out:
- if (ret)
- printk(KERN_CRIT "btrfs: could not do orphan cleanup %d\n", ret);
- btrfs_free_path(path);
- return ret;
-}
-
-/*
- * very simple check to peek ahead in the leaf looking for xattrs. If we
- * don't find any xattrs, we know there can't be any acls.
- *
- * slot is the slot the inode is in, objectid is the objectid of the inode
- */
-static noinline int acls_after_inode_item(struct extent_buffer *leaf,
- int slot, u64 objectid)
-{
- u32 nritems = btrfs_header_nritems(leaf);
- struct btrfs_key found_key;
- int scanned = 0;
-
- slot++;
- while (slot < nritems) {
- btrfs_item_key_to_cpu(leaf, &found_key, slot);
-
- /* we found a different objectid, there must not be acls */
- if (found_key.objectid != objectid)
- return 0;
-
- /* we found an xattr, assume we've got an acl */
- if (found_key.type == BTRFS_XATTR_ITEM_KEY)
- return 1;
-
- /*
- * we found a key greater than an xattr key, there can't
- * be any acls later on
- */
- if (found_key.type > BTRFS_XATTR_ITEM_KEY)
- return 0;
-
- slot++;
- scanned++;
-
- /*
- * it goes inode, inode backrefs, xattrs, extents,
- * so if there are a ton of hard links to an inode there can
- * be a lot of backrefs. Don't waste time searching too hard,
- * this is just an optimization
- */
- if (scanned >= 8)
- break;
- }
- /* we hit the end of the leaf before we found an xattr or
- * something larger than an xattr. We have to assume the inode
- * has acls
- */
- return 1;
-}
-
-/*
- * read an inode from the btree into the in-memory inode
- */
-static void btrfs_read_locked_inode(struct inode *inode)
-{
- struct btrfs_path *path;
- struct extent_buffer *leaf;
- struct btrfs_inode_item *inode_item;
- struct btrfs_timespec *tspec;
- struct btrfs_root *root = BTRFS_I(inode)->root;
- struct btrfs_key location;
- int maybe_acls;
- u32 rdev;
- int ret;
- bool filled = false;
-
- ret = btrfs_fill_inode(inode, &rdev);
- if (!ret)
- filled = true;
-
- path = btrfs_alloc_path();
- if (!path)
- goto make_bad;
-
- path->leave_spinning = 1;
- memcpy(&location, &BTRFS_I(inode)->location, sizeof(location));
-
- ret = btrfs_lookup_inode(NULL, root, path, &location, 0);
- if (ret)
- goto make_bad;
-
- leaf = path->nodes[0];
-
- if (filled)
- goto cache_acl;
-
- inode_item = btrfs_item_ptr(leaf, path->slots[0],
- struct btrfs_inode_item);
- inode->i_mode = btrfs_inode_mode(leaf, inode_item);
- set_nlink(inode, btrfs_inode_nlink(leaf, inode_item));
- inode->i_uid = btrfs_inode_uid(leaf, inode_item);
- inode->i_gid = btrfs_inode_gid(leaf, inode_item);
- btrfs_i_size_write(inode, btrfs_inode_size(leaf, inode_item));
-
- tspec = btrfs_inode_atime(inode_item);
- inode->i_atime.tv_sec = btrfs_timespec_sec(leaf, tspec);
- inode->i_atime.tv_nsec = btrfs_timespec_nsec(leaf, tspec);
-
- tspec = btrfs_inode_mtime(inode_item);
- inode->i_mtime.tv_sec = btrfs_timespec_sec(leaf, tspec);
- inode->i_mtime.tv_nsec = btrfs_timespec_nsec(leaf, tspec);
-
- tspec = btrfs_inode_ctime(inode_item);
- inode->i_ctime.tv_sec = btrfs_timespec_sec(leaf, tspec);
- inode->i_ctime.tv_nsec = btrfs_timespec_nsec(leaf, tspec);
-
- inode_set_bytes(inode, btrfs_inode_nbytes(leaf, inode_item));
- BTRFS_I(inode)->generation = btrfs_inode_generation(leaf, inode_item);
- BTRFS_I(inode)->sequence = btrfs_inode_sequence(leaf, inode_item);
- inode->i_generation = BTRFS_I(inode)->generation;
- inode->i_rdev = 0;
- rdev = btrfs_inode_rdev(leaf, inode_item);
-
- BTRFS_I(inode)->index_cnt = (u64)-1;
- BTRFS_I(inode)->flags = btrfs_inode_flags(leaf, inode_item);
-cache_acl:
- /*
- * try to precache a NULL acl entry for files that don't have
- * any xattrs or acls
- */
- maybe_acls = acls_after_inode_item(leaf, path->slots[0],
- btrfs_ino(inode));
- if (!maybe_acls)
- cache_no_acl(inode);
-
- btrfs_free_path(path);
-
- switch (inode->i_mode & S_IFMT) {
- case S_IFREG:
- inode->i_mapping->a_ops = &btrfs_aops;
- inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
- BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
- inode->i_fop = &btrfs_file_operations;
- inode->i_op = &btrfs_file_inode_operations;
- break;
- case S_IFDIR:
- inode->i_fop = &btrfs_dir_file_operations;
- if (root == root->fs_info->tree_root)
- inode->i_op = &btrfs_dir_ro_inode_operations;
- else
- inode->i_op = &btrfs_dir_inode_operations;
- break;
- case S_IFLNK:
- inode->i_op = &btrfs_symlink_inode_operations;
- inode->i_mapping->a_ops = &btrfs_symlink_aops;
- inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
- break;
- default:
- inode->i_op = &btrfs_special_inode_operations;
- init_special_inode(inode, inode->i_mode, rdev);
- break;
- }
-
- btrfs_update_iflags(inode);
- return;
-
-make_bad:
- btrfs_free_path(path);
- make_bad_inode(inode);
-}
-
-/*
- * given a leaf and an inode, copy the inode fields into the leaf
- */
-static void fill_inode_item(struct btrfs_trans_handle *trans,
- struct extent_buffer *leaf,
- struct btrfs_inode_item *item,
- struct inode *inode)
-{
- btrfs_set_inode_uid(leaf, item, inode->i_uid);
- btrfs_set_inode_gid(leaf, item, inode->i_gid);
- btrfs_set_inode_size(leaf, item, BTRFS_I(inode)->disk_i_size);
- btrfs_set_inode_mode(leaf, item, inode->i_mode);
- btrfs_set_inode_nlink(leaf, item, inode->i_nlink);
-
- btrfs_set_timespec_sec(leaf, btrfs_inode_atime(item),
- inode->i_atime.tv_sec);
- btrfs_set_timespec_nsec(leaf, btrfs_inode_atime(item),
- inode->i_atime.tv_nsec);
-
- btrfs_set_timespec_sec(leaf, btrfs_inode_mtime(item),
- inode->i_mtime.tv_sec);
- btrfs_set_timespec_nsec(leaf, btrfs_inode_mtime(item),
- inode->i_mtime.tv_nsec);
-
- btrfs_set_timespec_sec(leaf, btrfs_inode_ctime(item),
- inode->i_ctime.tv_sec);
- btrfs_set_timespec_nsec(leaf, btrfs_inode_ctime(item),
- inode->i_ctime.tv_nsec);
-
- btrfs_set_inode_nbytes(leaf, item, inode_get_bytes(inode));
- btrfs_set_inode_generation(leaf, item, BTRFS_I(inode)->generation);
- btrfs_set_inode_sequence(leaf, item, BTRFS_I(inode)->sequence);
- btrfs_set_inode_transid(leaf, item, trans->transid);
- btrfs_set_inode_rdev(leaf, item, inode->i_rdev);
- btrfs_set_inode_flags(leaf, item, BTRFS_I(inode)->flags);
- btrfs_set_inode_block_group(leaf, item, 0);
-}
-
-/*
- * copy everything in the in-memory inode into the btree.
- */
-static noinline int btrfs_update_inode_item(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, struct inode *inode)
-{
- struct btrfs_inode_item *inode_item;
- struct btrfs_path *path;
- struct extent_buffer *leaf;
- int ret;
-
- path = btrfs_alloc_path();
- if (!path)
- return -ENOMEM;
-
- path->leave_spinning = 1;
- ret = btrfs_lookup_inode(trans, root, path, &BTRFS_I(inode)->location,
- 1);
- if (ret) {
- if (ret > 0)
- ret = -ENOENT;
- goto failed;
- }
-
- btrfs_unlock_up_safe(path, 1);
- leaf = path->nodes[0];
- inode_item = btrfs_item_ptr(leaf, path->slots[0],
- struct btrfs_inode_item);
-
- fill_inode_item(trans, leaf, inode_item, inode);
- btrfs_mark_buffer_dirty(leaf);
- btrfs_set_inode_last_trans(trans, inode);
- ret = 0;
-failed:
- btrfs_free_path(path);
- return ret;
-}
-
-/*
- * copy everything in the in-memory inode into the btree.
- */
-noinline int btrfs_update_inode(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, struct inode *inode)
-{
- int ret;
-
- /*
- * If the inode is a free space inode, we can deadlock during commit
- * if we put it into the delayed code.
- *
- * The data relocation inode should also be directly updated
- * without delay
- */
- if (!btrfs_is_free_space_inode(root, inode)
- && root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID) {
- ret = btrfs_delayed_update_inode(trans, root, inode);
- if (!ret)
- btrfs_set_inode_last_trans(trans, inode);
- return ret;
- }
-
- return btrfs_update_inode_item(trans, root, inode);
-}
-
-static noinline int btrfs_update_inode_fallback(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, struct inode *inode)
-{
- int ret;
-
- ret = btrfs_update_inode(trans, root, inode);
- if (ret == -ENOSPC)
- return btrfs_update_inode_item(trans, root, inode);
- return ret;
-}
-
-/*
- * unlink helper that gets used here in inode.c and in the tree logging
- * recovery code. It remove a link in a directory with a given name, and
- * also drops the back refs in the inode to the directory
- */
-static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct inode *dir, struct inode *inode,
- const char *name, int name_len)
-{
- struct btrfs_path *path;
- int ret = 0;
- struct extent_buffer *leaf;
- struct btrfs_dir_item *di;
- struct btrfs_key key;
- u64 index;
- u64 ino = btrfs_ino(inode);
- u64 dir_ino = btrfs_ino(dir);
-
- path = btrfs_alloc_path();
- if (!path) {
- ret = -ENOMEM;
- goto out;
- }
-
- path->leave_spinning = 1;
- di = btrfs_lookup_dir_item(trans, root, path, dir_ino,
- name, name_len, -1);
- if (IS_ERR(di)) {
- ret = PTR_ERR(di);
- goto err;
- }
- if (!di) {
- ret = -ENOENT;
- goto err;
- }
- leaf = path->nodes[0];
- btrfs_dir_item_key_to_cpu(leaf, di, &key);
- ret = btrfs_delete_one_dir_name(trans, root, path, di);
- if (ret)
- goto err;
- btrfs_release_path(path);
-
- ret = btrfs_del_inode_ref(trans, root, name, name_len, ino,
- dir_ino, &index);
- if (ret) {
- printk(KERN_INFO "btrfs failed to delete reference to %.*s, "
- "inode %llu parent %llu\n", name_len, name,
- (unsigned long long)ino, (unsigned long long)dir_ino);
- btrfs_abort_transaction(trans, root, ret);
- goto err;
- }
-
- ret = btrfs_delete_delayed_dir_index(trans, root, dir, index);
- if (ret) {
- btrfs_abort_transaction(trans, root, ret);
- goto err;
- }
-
- ret = btrfs_del_inode_ref_in_log(trans, root, name, name_len,
- inode, dir_ino);
- if (ret != 0 && ret != -ENOENT) {
- btrfs_abort_transaction(trans, root, ret);
- goto err;
- }
-
- ret = btrfs_del_dir_entries_in_log(trans, root, name, name_len,
- dir, index);
- if (ret == -ENOENT)
- ret = 0;
-err:
- btrfs_free_path(path);
- if (ret)
- goto out;
-
- btrfs_i_size_write(dir, dir->i_size - name_len * 2);
- inode->i_ctime = dir->i_mtime = dir->i_ctime = CURRENT_TIME;
- btrfs_update_inode(trans, root, dir);
-out:
- return ret;
-}
-
-int btrfs_unlink_inode(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct inode *dir, struct inode *inode,
- const char *name, int name_len)
-{
- int ret;
- ret = __btrfs_unlink_inode(trans, root, dir, inode, name, name_len);
- if (!ret) {
- btrfs_drop_nlink(inode);
- ret = btrfs_update_inode(trans, root, inode);
- }
- return ret;
-}
-
-
-/* helper to check if there is any shared block in the path */
-static int check_path_shared(struct btrfs_root *root,
- struct btrfs_path *path)
-{
- struct extent_buffer *eb;
- int level;
- u64 refs = 1;
-
- for (level = 0; level < BTRFS_MAX_LEVEL; level++) {
- int ret;
-
- if (!path->nodes[level])
- break;
- eb = path->nodes[level];
- if (!btrfs_block_can_be_shared(root, eb))
- continue;
- ret = btrfs_lookup_extent_info(NULL, root, eb->start, eb->len,
- &refs, NULL);
- if (refs > 1)
- return 1;
- }
- return 0;
-}
-
-/*
- * helper to start transaction for unlink and rmdir.
- *
- * unlink and rmdir are special in btrfs, they do not always free space.
- * so in enospc case, we should make sure they will free space before
- * allowing them to use the global metadata reservation.
- */
-static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir,
- struct dentry *dentry)
-{
- struct btrfs_trans_handle *trans;
- struct btrfs_root *root = BTRFS_I(dir)->root;
- struct btrfs_path *path;
- struct btrfs_inode_ref *ref;
- struct btrfs_dir_item *di;
- struct inode *inode = dentry->d_inode;
- u64 index;
- int check_link = 1;
- int err = -ENOSPC;
- int ret;
- u64 ino = btrfs_ino(inode);
- u64 dir_ino = btrfs_ino(dir);
-
- /*
- * 1 for the possible orphan item
- * 1 for the dir item
- * 1 for the dir index
- * 1 for the inode ref
- * 1 for the inode ref in the tree log
- * 2 for the dir entries in the log
- * 1 for the inode
- */
- trans = btrfs_start_transaction(root, 8);
- if (!IS_ERR(trans) || PTR_ERR(trans) != -ENOSPC)
- return trans;
-
- if (ino == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)
- return ERR_PTR(-ENOSPC);
-
- /* check if there is someone else holds reference */
- if (S_ISDIR(inode->i_mode) && atomic_read(&inode->i_count) > 1)
- return ERR_PTR(-ENOSPC);
-
- if (atomic_read(&inode->i_count) > 2)
- return ERR_PTR(-ENOSPC);
-
- if (xchg(&root->fs_info->enospc_unlink, 1))
- return ERR_PTR(-ENOSPC);
-
- path = btrfs_alloc_path();
- if (!path) {
- root->fs_info->enospc_unlink = 0;
- return ERR_PTR(-ENOMEM);
- }
-
- /* 1 for the orphan item */
- trans = btrfs_start_transaction(root, 1);
- if (IS_ERR(trans)) {
- btrfs_free_path(path);
- root->fs_info->enospc_unlink = 0;
- return trans;
- }
-
- path->skip_locking = 1;
- path->search_commit_root = 1;
-
- ret = btrfs_lookup_inode(trans, root, path,
- &BTRFS_I(dir)->location, 0);
- if (ret < 0) {
- err = ret;
- goto out;
- }
- if (ret == 0) {
- if (check_path_shared(root, path))
- goto out;
- } else {
- check_link = 0;
- }
- btrfs_release_path(path);
-
- ret = btrfs_lookup_inode(trans, root, path,
- &BTRFS_I(inode)->location, 0);
- if (ret < 0) {
- err = ret;
- goto out;
- }
- if (ret == 0) {
- if (check_path_shared(root, path))
- goto out;
- } else {
- check_link = 0;
- }
- btrfs_release_path(path);
-
- if (ret == 0 && S_ISREG(inode->i_mode)) {
- ret = btrfs_lookup_file_extent(trans, root, path,
- ino, (u64)-1, 0);
- if (ret < 0) {
- err = ret;
- goto out;
- }
- BUG_ON(ret == 0); /* Corruption */
- if (check_path_shared(root, path))
- goto out;
- btrfs_release_path(path);
- }
-
- if (!check_link) {
- err = 0;
- goto out;
- }
-
- di = btrfs_lookup_dir_item(trans, root, path, dir_ino,
- dentry->d_name.name, dentry->d_name.len, 0);
- if (IS_ERR(di)) {
- err = PTR_ERR(di);
- goto out;
- }
- if (di) {
- if (check_path_shared(root, path))
- goto out;
- } else {
- err = 0;
- goto out;
- }
- btrfs_release_path(path);
-
- ref = btrfs_lookup_inode_ref(trans, root, path,
- dentry->d_name.name, dentry->d_name.len,
- ino, dir_ino, 0);
- if (IS_ERR(ref)) {
- err = PTR_ERR(ref);
- goto out;
- }
- BUG_ON(!ref); /* Logic error */
- if (check_path_shared(root, path))
- goto out;
- index = btrfs_inode_ref_index(path->nodes[0], ref);
- btrfs_release_path(path);
-
- /*
- * This is a commit root search, if we can lookup inode item and other
- * relative items in the commit root, it means the transaction of
- * dir/file creation has been committed, and the dir index item that we
- * delay to insert has also been inserted into the commit root. So
- * we needn't worry about the delayed insertion of the dir index item
- * here.
- */
- di = btrfs_lookup_dir_index_item(trans, root, path, dir_ino, index,
- dentry->d_name.name, dentry->d_name.len, 0);
- if (IS_ERR(di)) {
- err = PTR_ERR(di);
- goto out;
- }
- BUG_ON(ret == -ENOENT);
- if (check_path_shared(root, path))
- goto out;
-
- err = 0;
-out:
- btrfs_free_path(path);
- /* Migrate the orphan reservation over */
- if (!err)
- err = btrfs_block_rsv_migrate(trans->block_rsv,
- &root->fs_info->global_block_rsv,
- trans->bytes_reserved);
-
- if (err) {
- btrfs_end_transaction(trans, root);
- root->fs_info->enospc_unlink = 0;
- return ERR_PTR(err);
- }
-
- trans->block_rsv = &root->fs_info->global_block_rsv;
- return trans;
-}
-
-static void __unlink_end_trans(struct btrfs_trans_handle *trans,
- struct btrfs_root *root)
-{
- if (trans->block_rsv == &root->fs_info->global_block_rsv) {
- btrfs_block_rsv_release(root, trans->block_rsv,
- trans->bytes_reserved);
- trans->block_rsv = &root->fs_info->trans_block_rsv;
- BUG_ON(!root->fs_info->enospc_unlink);
- root->fs_info->enospc_unlink = 0;
- }
- btrfs_end_transaction(trans, root);
-}
-
-static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
-{
- struct btrfs_root *root = BTRFS_I(dir)->root;
- struct btrfs_trans_handle *trans;
- struct inode *inode = dentry->d_inode;
- int ret;
- unsigned long nr = 0;
-
- trans = __unlink_start_trans(dir, dentry);
- if (IS_ERR(trans))
- return PTR_ERR(trans);
-
- btrfs_record_unlink_dir(trans, dir, dentry->d_inode, 0);
-
- ret = btrfs_unlink_inode(trans, root, dir, dentry->d_inode,
- dentry->d_name.name, dentry->d_name.len);
- if (ret)
- goto out;
-
- if (inode->i_nlink == 0) {
- ret = btrfs_orphan_add(trans, inode);
- if (ret)
- goto out;
- }
-
-out:
- nr = trans->blocks_used;
- __unlink_end_trans(trans, root);
- btrfs_btree_balance_dirty(root, nr);
- return ret;
-}
-
-int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct inode *dir, u64 objectid,
- const char *name, int name_len)
-{
- struct btrfs_path *path;
- struct extent_buffer *leaf;
- struct btrfs_dir_item *di;
- struct btrfs_key key;
- u64 index;
- int ret;
- u64 dir_ino = btrfs_ino(dir);
-
- path = btrfs_alloc_path();
- if (!path)
- return -ENOMEM;
-
- di = btrfs_lookup_dir_item(trans, root, path, dir_ino,
- name, name_len, -1);
- if (IS_ERR_OR_NULL(di)) {
- if (!di)
- ret = -ENOENT;
- else
- ret = PTR_ERR(di);
- goto out;
- }
-
- leaf = path->nodes[0];
- btrfs_dir_item_key_to_cpu(leaf, di, &key);
- WARN_ON(key.type != BTRFS_ROOT_ITEM_KEY || key.objectid != objectid);
- ret = btrfs_delete_one_dir_name(trans, root, path, di);
- if (ret) {
- btrfs_abort_transaction(trans, root, ret);
- goto out;
- }
- btrfs_release_path(path);
-
- ret = btrfs_del_root_ref(trans, root->fs_info->tree_root,
- objectid, root->root_key.objectid,
- dir_ino, &index, name, name_len);
- if (ret < 0) {
- if (ret != -ENOENT) {
- btrfs_abort_transaction(trans, root, ret);
- goto out;
- }
- di = btrfs_search_dir_index_item(root, path, dir_ino,
- name, name_len);
- if (IS_ERR_OR_NULL(di)) {
- if (!di)
- ret = -ENOENT;
- else
- ret = PTR_ERR(di);
- btrfs_abort_transaction(trans, root, ret);
- goto out;
- }
-
- leaf = path->nodes[0];
- btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
- btrfs_release_path(path);
- index = key.offset;
- }
- btrfs_release_path(path);
-
- ret = btrfs_delete_delayed_dir_index(trans, root, dir, index);
- if (ret) {
- btrfs_abort_transaction(trans, root, ret);
- goto out;
- }
-
- btrfs_i_size_write(dir, dir->i_size - name_len * 2);
- dir->i_mtime = dir->i_ctime = CURRENT_TIME;
- ret = btrfs_update_inode(trans, root, dir);
- if (ret)
- btrfs_abort_transaction(trans, root, ret);
-out:
- btrfs_free_path(path);
- return ret;
-}
-
-static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
-{
- struct inode *inode = dentry->d_inode;
- int err = 0;
- struct btrfs_root *root = BTRFS_I(dir)->root;
- struct btrfs_trans_handle *trans;
- unsigned long nr = 0;
-
- if (inode->i_size > BTRFS_EMPTY_DIR_SIZE ||
- btrfs_ino(inode) == BTRFS_FIRST_FREE_OBJECTID)
- return -ENOTEMPTY;
-
- trans = __unlink_start_trans(dir, dentry);
- if (IS_ERR(trans))
- return PTR_ERR(trans);
-
- if (unlikely(btrfs_ino(inode) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) {
- err = btrfs_unlink_subvol(trans, root, dir,
- BTRFS_I(inode)->location.objectid,
- dentry->d_name.name,
- dentry->d_name.len);
- goto out;
- }
-
- err = btrfs_orphan_add(trans, inode);
- if (err)
- goto out;
-
- /* now the directory is empty */
- err = btrfs_unlink_inode(trans, root, dir, dentry->d_inode,
- dentry->d_name.name, dentry->d_name.len);
- if (!err)
- btrfs_i_size_write(inode, 0);
-out:
- nr = trans->blocks_used;
- __unlink_end_trans(trans, root);
- btrfs_btree_balance_dirty(root, nr);
-
- return err;
-}
-
-/*
- * this can truncate away extent items, csum items and directory items.
- * It starts at a high offset and removes keys until it can't find
- * any higher than new_size
- *
- * csum items that cross the new i_size are truncated to the new size
- * as well.
- *
- * min_type is the minimum key type to truncate down to. If set to 0, this
- * will kill all the items on this inode, including the INODE_ITEM_KEY.
- */
-int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct inode *inode,
- u64 new_size, u32 min_type)
-{
- struct btrfs_path *path;
- struct extent_buffer *leaf;
- struct btrfs_file_extent_item *fi;
- struct btrfs_key key;
- struct btrfs_key found_key;
- u64 extent_start = 0;
- u64 extent_num_bytes = 0;
- u64 extent_offset = 0;
- u64 item_end = 0;
- u64 mask = root->sectorsize - 1;
- u32 found_type = (u8)-1;
- int found_extent;
- int del_item;
- int pending_del_nr = 0;
- int pending_del_slot = 0;
- int extent_type = -1;
- int ret;
- int err = 0;
- u64 ino = btrfs_ino(inode);
-
- BUG_ON(new_size > 0 && min_type != BTRFS_EXTENT_DATA_KEY);
-
- path = btrfs_alloc_path();
- if (!path)
- return -ENOMEM;
- path->reada = -1;
-
- if (root->ref_cows || root == root->fs_info->tree_root)
- btrfs_drop_extent_cache(inode, new_size & (~mask), (u64)-1, 0);
-
- /*
- * This function is also used to drop the items in the log tree before
- * we relog the inode, so if root != BTRFS_I(inode)->root, it means
- * it is used to drop the loged items. So we shouldn't kill the delayed
- * items.
- */
- if (min_type == 0 && root == BTRFS_I(inode)->root)
- btrfs_kill_delayed_inode_items(inode);
-
- key.objectid = ino;
- key.offset = (u64)-1;
- key.type = (u8)-1;
-
-search_again:
- path->leave_spinning = 1;
- ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
- if (ret < 0) {
- err = ret;
- goto out;
- }
-
- if (ret > 0) {
- /* there are no items in the tree for us to truncate, we're
- * done
- */
- if (path->slots[0] == 0)
- goto out;
- path->slots[0]--;
- }
-
- while (1) {
- fi = NULL;
- leaf = path->nodes[0];
- btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
- found_type = btrfs_key_type(&found_key);
-
- if (found_key.objectid != ino)
- break;
-
- if (found_type < min_type)
- break;
-
- item_end = found_key.offset;
- if (found_type == BTRFS_EXTENT_DATA_KEY) {
- fi = btrfs_item_ptr(leaf, path->slots[0],
- struct btrfs_file_extent_item);
- extent_type = btrfs_file_extent_type(leaf, fi);
- if (extent_type != BTRFS_FILE_EXTENT_INLINE) {
- item_end +=
- btrfs_file_extent_num_bytes(leaf, fi);
- } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
- item_end += btrfs_file_extent_inline_len(leaf,
- fi);
- }
- item_end--;
- }
- if (found_type > min_type) {
- del_item = 1;
- } else {
- if (item_end < new_size)
- break;
- if (found_key.offset >= new_size)
- del_item = 1;
- else
- del_item = 0;
- }
- found_extent = 0;
- /* FIXME, shrink the extent if the ref count is only 1 */
- if (found_type != BTRFS_EXTENT_DATA_KEY)
- goto delete;
-
- if (extent_type != BTRFS_FILE_EXTENT_INLINE) {
- u64 num_dec;
- extent_start = btrfs_file_extent_disk_bytenr(leaf, fi);
- if (!del_item) {
- u64 orig_num_bytes =
- btrfs_file_extent_num_bytes(leaf, fi);
- extent_num_bytes = new_size -
- found_key.offset + root->sectorsize - 1;
- extent_num_bytes = extent_num_bytes &
- ~((u64)root->sectorsize - 1);
- btrfs_set_file_extent_num_bytes(leaf, fi,
- extent_num_bytes);
- num_dec = (orig_num_bytes -
- extent_num_bytes);
- if (root->ref_cows && extent_start != 0)
- inode_sub_bytes(inode, num_dec);
- btrfs_mark_buffer_dirty(leaf);
- } else {
- extent_num_bytes =
- btrfs_file_extent_disk_num_bytes(leaf,
- fi);
- extent_offset = found_key.offset -
- btrfs_file_extent_offset(leaf, fi);
-
- /* FIXME blocksize != 4096 */
- num_dec = btrfs_file_extent_num_bytes(leaf, fi);
- if (extent_start != 0) {
- found_extent = 1;
- if (root->ref_cows)
- inode_sub_bytes(inode, num_dec);
- }
- }
- } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
- /*
- * we can't truncate inline items that have had
- * special encodings
- */
- if (!del_item &&
- btrfs_file_extent_compression(leaf, fi) == 0 &&
- btrfs_file_extent_encryption(leaf, fi) == 0 &&
- btrfs_file_extent_other_encoding(leaf, fi) == 0) {
- u32 size = new_size - found_key.offset;
-
- if (root->ref_cows) {
- inode_sub_bytes(inode, item_end + 1 -
- new_size);
- }
- size =
- btrfs_file_extent_calc_inline_size(size);
- btrfs_truncate_item(trans, root, path,
- size, 1);
- } else if (root->ref_cows) {
- inode_sub_bytes(inode, item_end + 1 -
- found_key.offset);
- }
- }
-delete:
- if (del_item) {
- if (!pending_del_nr) {
- /* no pending yet, add ourselves */
- pending_del_slot = path->slots[0];
- pending_del_nr = 1;
- } else if (pending_del_nr &&
- path->slots[0] + 1 == pending_del_slot) {
- /* hop on the pending chunk */
- pending_del_nr++;
- pending_del_slot = path->slots[0];
- } else {
- BUG();
- }
- } else {
- break;
- }
- if (found_extent && (root->ref_cows ||
- root == root->fs_info->tree_root)) {
- btrfs_set_path_blocking(path);
- ret = btrfs_free_extent(trans, root, extent_start,
- extent_num_bytes, 0,
- btrfs_header_owner(leaf),
- ino, extent_offset, 0);
- BUG_ON(ret);
- }
-
- if (found_type == BTRFS_INODE_ITEM_KEY)
- break;
-
- if (path->slots[0] == 0 ||
- path->slots[0] != pending_del_slot) {
- if (root->ref_cows &&
- BTRFS_I(inode)->location.objectid !=
- BTRFS_FREE_INO_OBJECTID) {
- err = -EAGAIN;
- goto out;
- }
- if (pending_del_nr) {
- ret = btrfs_del_items(trans, root, path,
- pending_del_slot,
- pending_del_nr);
- if (ret) {
- btrfs_abort_transaction(trans,
- root, ret);
- goto error;
- }
- pending_del_nr = 0;
- }
- btrfs_release_path(path);
- goto search_again;
- } else {
- path->slots[0]--;
- }
- }
-out:
- if (pending_del_nr) {
- ret = btrfs_del_items(trans, root, path, pending_del_slot,
- pending_del_nr);
- if (ret)
- btrfs_abort_transaction(trans, root, ret);
- }
-error:
- btrfs_free_path(path);
- return err;
-}
-
-/*
- * taken from block_truncate_page, but does cow as it zeros out
- * any bytes left in the last page in the file.
- */
-static int btrfs_truncate_page(struct address_space *mapping, loff_t from)
-{
- struct inode *inode = mapping->host;
- struct btrfs_root *root = BTRFS_I(inode)->root;
- struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
- struct btrfs_ordered_extent *ordered;
- struct extent_state *cached_state = NULL;
- char *kaddr;
- u32 blocksize = root->sectorsize;
- pgoff_t index = from >> PAGE_CACHE_SHIFT;
- unsigned offset = from & (PAGE_CACHE_SIZE-1);
- struct page *page;
- gfp_t mask = btrfs_alloc_write_mask(mapping);
- int ret = 0;
- u64 page_start;
- u64 page_end;
-
- if ((offset & (blocksize - 1)) == 0)
- goto out;
- ret = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE);
- if (ret)
- goto out;
-
- ret = -ENOMEM;
-again:
- page = find_or_create_page(mapping, index, mask);
- if (!page) {
- btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE);
- goto out;
- }
-
- page_start = page_offset(page);
- page_end = page_start + PAGE_CACHE_SIZE - 1;
-
- if (!PageUptodate(page)) {
- ret = btrfs_readpage(NULL, page);
- lock_page(page);
- if (page->mapping != mapping) {
- unlock_page(page);
- page_cache_release(page);
- goto again;
- }
- if (!PageUptodate(page)) {
- ret = -EIO;
- goto out_unlock;
- }
- }
- wait_on_page_writeback(page);
-
- lock_extent_bits(io_tree, page_start, page_end, 0, &cached_state);
- set_page_extent_mapped(page);
-
- ordered = btrfs_lookup_ordered_extent(inode, page_start);
- if (ordered) {
- unlock_extent_cached(io_tree, page_start, page_end,
- &cached_state, GFP_NOFS);
- unlock_page(page);
- page_cache_release(page);
- btrfs_start_ordered_extent(inode, ordered, 1);
- btrfs_put_ordered_extent(ordered);
- goto again;
- }
-
- clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start, page_end,
- EXTENT_DIRTY | EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING,
- 0, 0, &cached_state, GFP_NOFS);
-
- ret = btrfs_set_extent_delalloc(inode, page_start, page_end,
- &cached_state);
- if (ret) {
- unlock_extent_cached(io_tree, page_start, page_end,
- &cached_state, GFP_NOFS);
- goto out_unlock;
- }
-
- ret = 0;
- if (offset != PAGE_CACHE_SIZE) {
- kaddr = kmap(page);
- memset(kaddr + offset, 0, PAGE_CACHE_SIZE - offset);
- flush_dcache_page(page);
- kunmap(page);
- }
- ClearPageChecked(page);
- set_page_dirty(page);
- unlock_extent_cached(io_tree, page_start, page_end, &cached_state,
- GFP_NOFS);
-
-out_unlock:
- if (ret)
- btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE);
- unlock_page(page);
- page_cache_release(page);
-out:
- return ret;
-}
-
-/*
- * This function puts in dummy file extents for the area we're creating a hole
- * for. So if we are truncating this file to a larger size we need to insert
- * these file extents so that btrfs_get_extent will return a EXTENT_MAP_HOLE for
- * the range between oldsize and size
- */
-int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
-{
- struct btrfs_trans_handle *trans;
- struct btrfs_root *root = BTRFS_I(inode)->root;
- struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
- struct extent_map *em = NULL;
- struct extent_state *cached_state = NULL;
- u64 mask = root->sectorsize - 1;
- u64 hole_start = (oldsize + mask) & ~mask;
- u64 block_end = (size + mask) & ~mask;
- u64 last_byte;
- u64 cur_offset;
- u64 hole_size;
- int err = 0;
-
- if (size <= hole_start)
- return 0;
-
- while (1) {
- struct btrfs_ordered_extent *ordered;
- btrfs_wait_ordered_range(inode, hole_start,
- block_end - hole_start);
- lock_extent_bits(io_tree, hole_start, block_end - 1, 0,
- &cached_state);
- ordered = btrfs_lookup_ordered_extent(inode, hole_start);
- if (!ordered)
- break;
- unlock_extent_cached(io_tree, hole_start, block_end - 1,
- &cached_state, GFP_NOFS);
- btrfs_put_ordered_extent(ordered);
- }
-
- cur_offset = hole_start;
- while (1) {
- em = btrfs_get_extent(inode, NULL, 0, cur_offset,
- block_end - cur_offset, 0);
- if (IS_ERR(em)) {
- err = PTR_ERR(em);
- break;
- }
- last_byte = min(extent_map_end(em), block_end);
- last_byte = (last_byte + mask) & ~mask;
- if (!test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) {
- u64 hint_byte = 0;
- hole_size = last_byte - cur_offset;
-
- trans = btrfs_start_transaction(root, 3);
- if (IS_ERR(trans)) {
- err = PTR_ERR(trans);
- break;
- }
-
- err = btrfs_drop_extents(trans, inode, cur_offset,
- cur_offset + hole_size,
- &hint_byte, 1);
- if (err) {
- btrfs_abort_transaction(trans, root, err);
- btrfs_end_transaction(trans, root);
- break;
- }
-
- err = btrfs_insert_file_extent(trans, root,
- btrfs_ino(inode), cur_offset, 0,
- 0, hole_size, 0, hole_size,
- 0, 0, 0);
- if (err) {
- btrfs_abort_transaction(trans, root, err);
- btrfs_end_transaction(trans, root);
- break;
- }
-
- btrfs_drop_extent_cache(inode, hole_start,
- last_byte - 1, 0);
-
- btrfs_update_inode(trans, root, inode);
- btrfs_end_transaction(trans, root);
- }
- free_extent_map(em);
- em = NULL;
- cur_offset = last_byte;
- if (cur_offset >= block_end)
- break;
- }
-
- free_extent_map(em);
- unlock_extent_cached(io_tree, hole_start, block_end - 1, &cached_state,
- GFP_NOFS);
- return err;
-}
-
-static int btrfs_setsize(struct inode *inode, loff_t newsize)
-{
- struct btrfs_root *root = BTRFS_I(inode)->root;
- struct btrfs_trans_handle *trans;
- loff_t oldsize = i_size_read(inode);
- int ret;
-
- if (newsize == oldsize)
- return 0;
-
- if (newsize > oldsize) {
- truncate_pagecache(inode, oldsize, newsize);
- ret = btrfs_cont_expand(inode, oldsize, newsize);
- if (ret)
- return ret;
-
- trans = btrfs_start_transaction(root, 1);
- if (IS_ERR(trans))
- return PTR_ERR(trans);
-
- i_size_write(inode, newsize);
- btrfs_ordered_update_i_size(inode, i_size_read(inode), NULL);
- ret = btrfs_update_inode(trans, root, inode);
- btrfs_end_transaction(trans, root);
- } else {
-
- /*
- * We're truncating a file that used to have good data down to
- * zero. Make sure it gets into the ordered flush list so that
- * any new writes get down to disk quickly.
- */
- if (newsize == 0)
- BTRFS_I(inode)->ordered_data_close = 1;
-
- /* we don't support swapfiles, so vmtruncate shouldn't fail */
- truncate_setsize(inode, newsize);
- ret = btrfs_truncate(inode);
- }
-
- return ret;
-}
-
-static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
-{
- struct inode *inode = dentry->d_inode;
- struct btrfs_root *root = BTRFS_I(inode)->root;
- int err;
-
- if (btrfs_root_readonly(root))
- return -EROFS;
-
- err = inode_change_ok(inode, attr);
- if (err)
- return err;
-
- if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) {
- err = btrfs_setsize(inode, attr->ia_size);
- if (err)
- return err;
- }
-
- if (attr->ia_valid) {
- setattr_copy(inode, attr);
- err = btrfs_dirty_inode(inode);
-
- if (!err && attr->ia_valid & ATTR_MODE)
- err = btrfs_acl_chmod(inode);
- }
-
- return err;
-}
-
-void btrfs_evict_inode(struct inode *inode)
-{
- struct btrfs_trans_handle *trans;
- struct btrfs_root *root = BTRFS_I(inode)->root;
- struct btrfs_block_rsv *rsv, *global_rsv;
- u64 min_size = btrfs_calc_trunc_metadata_size(root, 1);
- unsigned long nr;
- int ret;
-
- trace_btrfs_inode_evict(inode);
-
- truncate_inode_pages(&inode->i_data, 0);
- if (inode->i_nlink && (btrfs_root_refs(&root->root_item) != 0 ||
- btrfs_is_free_space_inode(root, inode)))
- goto no_delete;
-
- if (is_bad_inode(inode)) {
- btrfs_orphan_del(NULL, inode);
- goto no_delete;
- }
- /* do we really want it for ->i_nlink > 0 and zero btrfs_root_refs? */
- btrfs_wait_ordered_range(inode, 0, (u64)-1);
-
- if (root->fs_info->log_root_recovering) {
- BUG_ON(!list_empty(&BTRFS_I(inode)->i_orphan));
- goto no_delete;
- }
-
- if (inode->i_nlink > 0) {
- BUG_ON(btrfs_root_refs(&root->root_item) != 0);
- goto no_delete;
- }
-
- rsv = btrfs_alloc_block_rsv(root);
- if (!rsv) {
- btrfs_orphan_del(NULL, inode);
- goto no_delete;
- }
- rsv->size = min_size;
- global_rsv = &root->fs_info->global_block_rsv;
-
- btrfs_i_size_write(inode, 0);
-
- /*
- * This is a bit simpler than btrfs_truncate since
- *
- * 1) We've already reserved our space for our orphan item in the
- * unlink.
- * 2) We're going to delete the inode item, so we don't need to update
- * it at all.
- *
- * So we just need to reserve some slack space in case we add bytes when
- * doing the truncate.
- */
- while (1) {
- ret = btrfs_block_rsv_refill_noflush(root, rsv, min_size);
-
- /*
- * Try and steal from the global reserve since we will
- * likely not use this space anyway, we want to try as
- * hard as possible to get this to work.
- */
- if (ret)
- ret = btrfs_block_rsv_migrate(global_rsv, rsv, min_size);
-
- if (ret) {
- printk(KERN_WARNING "Could not get space for a "
- "delete, will truncate on mount %d\n", ret);
- btrfs_orphan_del(NULL, inode);
- btrfs_free_block_rsv(root, rsv);
- goto no_delete;
- }
-
- trans = btrfs_start_transaction(root, 0);
- if (IS_ERR(trans)) {
- btrfs_orphan_del(NULL, inode);
- btrfs_free_block_rsv(root, rsv);
- goto no_delete;
- }
-
- trans->block_rsv = rsv;
-
- ret = btrfs_truncate_inode_items(trans, root, inode, 0, 0);
- if (ret != -EAGAIN)
- break;
-
- nr = trans->blocks_used;
- btrfs_end_transaction(trans, root);
- trans = NULL;
- btrfs_btree_balance_dirty(root, nr);
- }
-
- btrfs_free_block_rsv(root, rsv);
-
- if (ret == 0) {
- trans->block_rsv = root->orphan_block_rsv;
- ret = btrfs_orphan_del(trans, inode);
- BUG_ON(ret);
- }
-
- trans->block_rsv = &root->fs_info->trans_block_rsv;
- if (!(root == root->fs_info->tree_root ||
- root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID))
- btrfs_return_ino(root, btrfs_ino(inode));
-
- nr = trans->blocks_used;
- btrfs_end_transaction(trans, root);
- btrfs_btree_balance_dirty(root, nr);
-no_delete:
- end_writeback(inode);
- return;
-}
-
-/*
- * this returns the key found in the dir entry in the location pointer.
- * If no dir entries were found, location->objectid is 0.
- */
-static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry,
- struct btrfs_key *location)
-{
- const char *name = dentry->d_name.name;
- int namelen = dentry->d_name.len;
- struct btrfs_dir_item *di;
- struct btrfs_path *path;
- struct btrfs_root *root = BTRFS_I(dir)->root;
- int ret = 0;
-
- path = btrfs_alloc_path();
- if (!path)
- return -ENOMEM;
-
- di = btrfs_lookup_dir_item(NULL, root, path, btrfs_ino(dir), name,
- namelen, 0);
- if (IS_ERR(di))
- ret = PTR_ERR(di);
-
- if (IS_ERR_OR_NULL(di))
- goto out_err;
-
- btrfs_dir_item_key_to_cpu(path->nodes[0], di, location);
-out:
- btrfs_free_path(path);
- return ret;
-out_err:
- location->objectid = 0;
- goto out;
-}
-
-/*
- * when we hit a tree root in a directory, the btrfs part of the inode
- * needs to be changed to reflect the root directory of the tree root. This
- * is kind of like crossing a mount point.
- */
-static int fixup_tree_root_location(struct btrfs_root *root,
- struct inode *dir,
- struct dentry *dentry,
- struct btrfs_key *location,
- struct btrfs_root **sub_root)
-{
- struct btrfs_path *path;
- struct btrfs_root *new_root;
- struct btrfs_root_ref *ref;
- struct extent_buffer *leaf;
- int ret;
- int err = 0;
-
- path = btrfs_alloc_path();
- if (!path) {
- err = -ENOMEM;
- goto out;
- }
-
- err = -ENOENT;
- ret = btrfs_find_root_ref(root->fs_info->tree_root, path,
- BTRFS_I(dir)->root->root_key.objectid,
- location->objectid);
- if (ret) {
- if (ret < 0)
- err = ret;
- goto out;
- }
-
- leaf = path->nodes[0];
- ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_root_ref);
- if (btrfs_root_ref_dirid(leaf, ref) != btrfs_ino(dir) ||
- btrfs_root_ref_name_len(leaf, ref) != dentry->d_name.len)
- goto out;
-
- ret = memcmp_extent_buffer(leaf, dentry->d_name.name,
- (unsigned long)(ref + 1),
- dentry->d_name.len);
- if (ret)
- goto out;
-
- btrfs_release_path(path);
-
- new_root = btrfs_read_fs_root_no_name(root->fs_info, location);
- if (IS_ERR(new_root)) {
- err = PTR_ERR(new_root);
- goto out;
- }
-
- if (btrfs_root_refs(&new_root->root_item) == 0) {
- err = -ENOENT;
- goto out;
- }
-
- *sub_root = new_root;
- location->objectid = btrfs_root_dirid(&new_root->root_item);
- location->type = BTRFS_INODE_ITEM_KEY;
- location->offset = 0;
- err = 0;
-out:
- btrfs_free_path(path);
- return err;
-}
-
-static void inode_tree_add(struct inode *inode)
-{
- struct btrfs_root *root = BTRFS_I(inode)->root;
- struct btrfs_inode *entry;
- struct rb_node **p;
- struct rb_node *parent;
- u64 ino = btrfs_ino(inode);
-again:
- p = &root->inode_tree.rb_node;
- parent = NULL;
-
- if (inode_unhashed(inode))
- return;
-
- spin_lock(&root->inode_lock);
- while (*p) {
- parent = *p;
- entry = rb_entry(parent, struct btrfs_inode, rb_node);
-
- if (ino < btrfs_ino(&entry->vfs_inode))
- p = &parent->rb_left;
- else if (ino > btrfs_ino(&entry->vfs_inode))
- p = &parent->rb_right;
- else {
- WARN_ON(!(entry->vfs_inode.i_state &
- (I_WILL_FREE | I_FREEING)));
- rb_erase(parent, &root->inode_tree);
- RB_CLEAR_NODE(parent);
- spin_unlock(&root->inode_lock);
- goto again;
- }
- }
- rb_link_node(&BTRFS_I(inode)->rb_node, parent, p);
- rb_insert_color(&BTRFS_I(inode)->rb_node, &root->inode_tree);
- spin_unlock(&root->inode_lock);
-}
-
-static void inode_tree_del(struct inode *inode)
-{
- struct btrfs_root *root = BTRFS_I(inode)->root;
- int empty = 0;
-
- spin_lock(&root->inode_lock);
- if (!RB_EMPTY_NODE(&BTRFS_I(inode)->rb_node)) {
- rb_erase(&BTRFS_I(inode)->rb_node, &root->inode_tree);
- RB_CLEAR_NODE(&BTRFS_I(inode)->rb_node);
- empty = RB_EMPTY_ROOT(&root->inode_tree);
- }
- spin_unlock(&root->inode_lock);
-
- /*
- * Free space cache has inodes in the tree root, but the tree root has a
- * root_refs of 0, so this could end up dropping the tree root as a
- * snapshot, so we need the extra !root->fs_info->tree_root check to
- * make sure we don't drop it.
- */
- if (empty && btrfs_root_refs(&root->root_item) == 0 &&
- root != root->fs_info->tree_root) {
- synchronize_srcu(&root->fs_info->subvol_srcu);
- spin_lock(&root->inode_lock);
- empty = RB_EMPTY_ROOT(&root->inode_tree);
- spin_unlock(&root->inode_lock);
- if (empty)
- btrfs_add_dead_root(root);
- }
-}
-
-void btrfs_invalidate_inodes(struct btrfs_root *root)
-{
- struct rb_node *node;
- struct rb_node *prev;
- struct btrfs_inode *entry;
- struct inode *inode;
- u64 objectid = 0;
-
- WARN_ON(btrfs_root_refs(&root->root_item) != 0);
-
- spin_lock(&root->inode_lock);
-again:
- node = root->inode_tree.rb_node;
- prev = NULL;
- while (node) {
- prev = node;
- entry = rb_entry(node, struct btrfs_inode, rb_node);
-
- if (objectid < btrfs_ino(&entry->vfs_inode))
- node = node->rb_left;
- else if (objectid > btrfs_ino(&entry->vfs_inode))
- node = node->rb_right;
- else
- break;
- }
- if (!node) {
- while (prev) {
- entry = rb_entry(prev, struct btrfs_inode, rb_node);
- if (objectid <= btrfs_ino(&entry->vfs_inode)) {
- node = prev;
- break;
- }
- prev = rb_next(prev);
- }
- }
- while (node) {
- entry = rb_entry(node, struct btrfs_inode, rb_node);
- objectid = btrfs_ino(&entry->vfs_inode) + 1;
- inode = igrab(&entry->vfs_inode);
- if (inode) {
- spin_unlock(&root->inode_lock);
- if (atomic_read(&inode->i_count) > 1)
- d_prune_aliases(inode);
- /*
- * btrfs_drop_inode will have it removed from
- * the inode cache when its usage count
- * hits zero.
- */
- iput(inode);
- cond_resched();
- spin_lock(&root->inode_lock);
- goto again;
- }
-
- if (cond_resched_lock(&root->inode_lock))
- goto again;
-
- node = rb_next(node);
- }
- spin_unlock(&root->inode_lock);
-}
-
-static int btrfs_init_locked_inode(struct inode *inode, void *p)
-{
- struct btrfs_iget_args *args = p;
- inode->i_ino = args->ino;
- BTRFS_I(inode)->root = args->root;
- btrfs_set_inode_space_info(args->root, inode);
- return 0;
-}
-
-static int btrfs_find_actor(struct inode *inode, void *opaque)
-{
- struct btrfs_iget_args *args = opaque;
- return args->ino == btrfs_ino(inode) &&
- args->root == BTRFS_I(inode)->root;
-}
-
-static struct inode *btrfs_iget_locked(struct super_block *s,
- u64 objectid,
- struct btrfs_root *root)
-{
- struct inode *inode;
- struct btrfs_iget_args args;
- args.ino = objectid;
- args.root = root;
-
- inode = iget5_locked(s, objectid, btrfs_find_actor,
- btrfs_init_locked_inode,
- (void *)&args);
- return inode;
-}
-
-/* Get an inode object given its location and corresponding root.
- * Returns in *is_new if the inode was read from disk
- */
-struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
- struct btrfs_root *root, int *new)
-{
- struct inode *inode;
-
- inode = btrfs_iget_locked(s, location->objectid, root);
- if (!inode)
- return ERR_PTR(-ENOMEM);
-
- if (inode->i_state & I_NEW) {
- BTRFS_I(inode)->root = root;
- memcpy(&BTRFS_I(inode)->location, location, sizeof(*location));
- btrfs_read_locked_inode(inode);
- if (!is_bad_inode(inode)) {
- inode_tree_add(inode);
- unlock_new_inode(inode);
- if (new)
- *new = 1;
- } else {
- unlock_new_inode(inode);
- iput(inode);
- inode = ERR_PTR(-ESTALE);
- }
- }
-
- return inode;
-}
-
-static struct inode *new_simple_dir(struct super_block *s,
- struct btrfs_key *key,
- struct btrfs_root *root)
-{
- struct inode *inode = new_inode(s);
-
- if (!inode)
- return ERR_PTR(-ENOMEM);
-
- BTRFS_I(inode)->root = root;
- memcpy(&BTRFS_I(inode)->location, key, sizeof(*key));
- BTRFS_I(inode)->dummy_inode = 1;
-
- inode->i_ino = BTRFS_EMPTY_SUBVOL_DIR_OBJECTID;
- inode->i_op = &btrfs_dir_ro_inode_operations;
- inode->i_fop = &simple_dir_operations;
- inode->i_mode = S_IFDIR | S_IRUGO | S_IWUSR | S_IXUGO;
- inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
-
- return inode;
-}
-
-struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
-{
- struct inode *inode;
- struct btrfs_root *root = BTRFS_I(dir)->root;
- struct btrfs_root *sub_root = root;
- struct btrfs_key location;
- int index;
- int ret = 0;
-
- if (dentry->d_name.len > BTRFS_NAME_LEN)
- return ERR_PTR(-ENAMETOOLONG);
-
- if (unlikely(d_need_lookup(dentry))) {
- memcpy(&location, dentry->d_fsdata, sizeof(struct btrfs_key));
- kfree(dentry->d_fsdata);
- dentry->d_fsdata = NULL;
- /* This thing is hashed, drop it for now */
- d_drop(dentry);
- } else {
- ret = btrfs_inode_by_name(dir, dentry, &location);
- }
-
- if (ret < 0)
- return ERR_PTR(ret);
-
- if (location.objectid == 0)
- return NULL;
-
- if (location.type == BTRFS_INODE_ITEM_KEY) {
- inode = btrfs_iget(dir->i_sb, &location, root, NULL);
- return inode;
- }
-
- BUG_ON(location.type != BTRFS_ROOT_ITEM_KEY);
-
- index = srcu_read_lock(&root->fs_info->subvol_srcu);
- ret = fixup_tree_root_location(root, dir, dentry,
- &location, &sub_root);
- if (ret < 0) {
- if (ret != -ENOENT)
- inode = ERR_PTR(ret);
- else
- inode = new_simple_dir(dir->i_sb, &location, sub_root);
- } else {
- inode = btrfs_iget(dir->i_sb, &location, sub_root, NULL);
- }
- srcu_read_unlock(&root->fs_info->subvol_srcu, index);
-
- if (!IS_ERR(inode) && root != sub_root) {
- down_read(&root->fs_info->cleanup_work_sem);
- if (!(inode->i_sb->s_flags & MS_RDONLY))
- ret = btrfs_orphan_cleanup(sub_root);
- up_read(&root->fs_info->cleanup_work_sem);
- if (ret)
- inode = ERR_PTR(ret);
- }
-
- return inode;
-}
-
-static int btrfs_dentry_delete(const struct dentry *dentry)
-{
- struct btrfs_root *root;
- struct inode *inode = dentry->d_inode;
-
- if (!inode && !IS_ROOT(dentry))
- inode = dentry->d_parent->d_inode;
-
- if (inode) {
- root = BTRFS_I(inode)->root;
- if (btrfs_root_refs(&root->root_item) == 0)
- return 1;
-
- if (btrfs_ino(inode) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)
- return 1;
- }
- return 0;
-}
-
-static void btrfs_dentry_release(struct dentry *dentry)
-{
- if (dentry->d_fsdata)
- kfree(dentry->d_fsdata);
-}
-
-static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry,
- struct nameidata *nd)
-{
- struct dentry *ret;
-
- ret = d_splice_alias(btrfs_lookup_dentry(dir, dentry), dentry);
- if (unlikely(d_need_lookup(dentry))) {
- spin_lock(&dentry->d_lock);
- dentry->d_flags &= ~DCACHE_NEED_LOOKUP;
- spin_unlock(&dentry->d_lock);
- }
- return ret;
-}
-
-unsigned char btrfs_filetype_table[] = {
- DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK
-};
-
-static int btrfs_real_readdir(struct file *filp, void *dirent,
- filldir_t filldir)
-{
- struct inode *inode = filp->f_dentry->d_inode;
- struct btrfs_root *root = BTRFS_I(inode)->root;
- struct btrfs_item *item;
- struct btrfs_dir_item *di;
- struct btrfs_key key;
- struct btrfs_key found_key;
- struct btrfs_path *path;
- struct list_head ins_list;
- struct list_head del_list;
- int ret;
- struct extent_buffer *leaf;
- int slot;
- unsigned char d_type;
- int over = 0;
- u32 di_cur;
- u32 di_total;
- u32 di_len;
- int key_type = BTRFS_DIR_INDEX_KEY;
- char tmp_name[32];
- char *name_ptr;
- int name_len;
- int is_curr = 0; /* filp->f_pos points to the current index? */
-
- /* FIXME, use a real flag for deciding about the key type */
- if (root->fs_info->tree_root == root)
- key_type = BTRFS_DIR_ITEM_KEY;
-
- /* special case for "." */
- if (filp->f_pos == 0) {
- over = filldir(dirent, ".", 1,
- filp->f_pos, btrfs_ino(inode), DT_DIR);
- if (over)
- return 0;
- filp->f_pos = 1;
- }
- /* special case for .., just use the back ref */
- if (filp->f_pos == 1) {
- u64 pino = parent_ino(filp->f_path.dentry);
- over = filldir(dirent, "..", 2,
- filp->f_pos, pino, DT_DIR);
- if (over)
- return 0;
- filp->f_pos = 2;
- }
- path = btrfs_alloc_path();
- if (!path)
- return -ENOMEM;
-
- path->reada = 1;
-
- if (key_type == BTRFS_DIR_INDEX_KEY) {
- INIT_LIST_HEAD(&ins_list);
- INIT_LIST_HEAD(&del_list);
- btrfs_get_delayed_items(inode, &ins_list, &del_list);
- }
-
- btrfs_set_key_type(&key, key_type);
- key.offset = filp->f_pos;
- key.objectid = btrfs_ino(inode);
-
- ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
- if (ret < 0)
- goto err;
-
- while (1) {
- leaf = path->nodes[0];
- slot = path->slots[0];
- if (slot >= btrfs_header_nritems(leaf)) {
- ret = btrfs_next_leaf(root, path);
- if (ret < 0)
- goto err;
- else if (ret > 0)
- break;
- continue;
- }
-
- item = btrfs_item_nr(leaf, slot);
- btrfs_item_key_to_cpu(leaf, &found_key, slot);
-
- if (found_key.objectid != key.objectid)
- break;
- if (btrfs_key_type(&found_key) != key_type)
- break;
- if (found_key.offset < filp->f_pos)
- goto next;
- if (key_type == BTRFS_DIR_INDEX_KEY &&
- btrfs_should_delete_dir_index(&del_list,
- found_key.offset))
- goto next;
-
- filp->f_pos = found_key.offset;
- is_curr = 1;
-
- di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item);
- di_cur = 0;
- di_total = btrfs_item_size(leaf, item);
-
- while (di_cur < di_total) {
- struct btrfs_key location;
-
- if (verify_dir_item(root, leaf, di))
- break;
-
- name_len = btrfs_dir_name_len(leaf, di);
- if (name_len <= sizeof(tmp_name)) {
- name_ptr = tmp_name;
- } else {
- name_ptr = kmalloc(name_len, GFP_NOFS);
- if (!name_ptr) {
- ret = -ENOMEM;
- goto err;
- }
- }
- read_extent_buffer(leaf, name_ptr,
- (unsigned long)(di + 1), name_len);
-
- d_type = btrfs_filetype_table[btrfs_dir_type(leaf, di)];
- btrfs_dir_item_key_to_cpu(leaf, di, &location);
-
-
- /* is this a reference to our own snapshot? If so
- * skip it.
- *
- * In contrast to old kernels, we insert the snapshot's
- * dir item and dir index after it has been created, so
- * we won't find a reference to our own snapshot. We
- * still keep the following code for backward
- * compatibility.
- */
- if (location.type == BTRFS_ROOT_ITEM_KEY &&
- location.objectid == root->root_key.objectid) {
- over = 0;
- goto skip;
- }
- over = filldir(dirent, name_ptr, name_len,
- found_key.offset, location.objectid,
- d_type);
-
-skip:
- if (name_ptr != tmp_name)
- kfree(name_ptr);
-
- if (over)
- goto nopos;
- di_len = btrfs_dir_name_len(leaf, di) +
- btrfs_dir_data_len(leaf, di) + sizeof(*di);
- di_cur += di_len;
- di = (struct btrfs_dir_item *)((char *)di + di_len);
- }
-next:
- path->slots[0]++;
- }
-
- if (key_type == BTRFS_DIR_INDEX_KEY) {
- if (is_curr)
- filp->f_pos++;
- ret = btrfs_readdir_delayed_dir_index(filp, dirent, filldir,
- &ins_list);
- if (ret)
- goto nopos;
- }
-
- /* Reached end of directory/root. Bump pos past the last item. */
- if (key_type == BTRFS_DIR_INDEX_KEY)
- /*
- * 32-bit glibc will use getdents64, but then strtol -
- * so the last number we can serve is this.
- */
- filp->f_pos = 0x7fffffff;
- else
- filp->f_pos++;
-nopos:
- ret = 0;
-err:
- if (key_type == BTRFS_DIR_INDEX_KEY)
- btrfs_put_delayed_items(&ins_list, &del_list);
- btrfs_free_path(path);
- return ret;
-}
-
-int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc)
-{
- struct btrfs_root *root = BTRFS_I(inode)->root;
- struct btrfs_trans_handle *trans;
- int ret = 0;
- bool nolock = false;
-
- if (BTRFS_I(inode)->dummy_inode)
- return 0;
-
- if (btrfs_fs_closing(root->fs_info) && btrfs_is_free_space_inode(root, inode))
- nolock = true;
-
- if (wbc->sync_mode == WB_SYNC_ALL) {
- if (nolock)
- trans = btrfs_join_transaction_nolock(root);
- else
- trans = btrfs_join_transaction(root);
- if (IS_ERR(trans))
- return PTR_ERR(trans);
- if (nolock)
- ret = btrfs_end_transaction_nolock(trans, root);
- else
- ret = btrfs_commit_transaction(trans, root);
- }
- return ret;
-}
-
-/*
- * This is somewhat expensive, updating the tree every time the
- * inode changes. But, it is most likely to find the inode in cache.
- * FIXME, needs more benchmarking...there are no reasons other than performance
- * to keep or drop this code.
- */
-int btrfs_dirty_inode(struct inode *inode)
-{
- struct btrfs_root *root = BTRFS_I(inode)->root;
- struct btrfs_trans_handle *trans;
- int ret;
-
- if (BTRFS_I(inode)->dummy_inode)
- return 0;
-
- trans = btrfs_join_transaction(root);
- if (IS_ERR(trans))
- return PTR_ERR(trans);
-
- ret = btrfs_update_inode(trans, root, inode);
- if (ret && ret == -ENOSPC) {
- /* whoops, lets try again with the full transaction */
- btrfs_end_transaction(trans, root);
- trans = btrfs_start_transaction(root, 1);
- if (IS_ERR(trans))
- return PTR_ERR(trans);
-
- ret = btrfs_update_inode(trans, root, inode);
- }
- btrfs_end_transaction(trans, root);
- if (BTRFS_I(inode)->delayed_node)
- btrfs_balance_delayed_items(root);
-
- return ret;
-}
-
-/*
- * This is a copy of file_update_time. We need this so we can return error on
- * ENOSPC for updating the inode in the case of file write and mmap writes.
- */
-int btrfs_update_time(struct file *file)
-{
- struct inode *inode = file->f_path.dentry->d_inode;
- struct timespec now;
- int ret;
- enum { S_MTIME = 1, S_CTIME = 2, S_VERSION = 4 } sync_it = 0;
-
- /* First try to exhaust all avenues to not sync */
- if (IS_NOCMTIME(inode))
- return 0;
-
- now = current_fs_time(inode->i_sb);
- if (!timespec_equal(&inode->i_mtime, &now))
- sync_it = S_MTIME;
-
- if (!timespec_equal(&inode->i_ctime, &now))
- sync_it |= S_CTIME;
-
- if (IS_I_VERSION(inode))
- sync_it |= S_VERSION;
-
- if (!sync_it)
- return 0;
-
- /* Finally allowed to write? Takes lock. */
- if (mnt_want_write_file(file))
- return 0;
-
- /* Only change inode inside the lock region */
- if (sync_it & S_VERSION)
- inode_inc_iversion(inode);
- if (sync_it & S_CTIME)
- inode->i_ctime = now;
- if (sync_it & S_MTIME)
- inode->i_mtime = now;
- ret = btrfs_dirty_inode(inode);
- if (!ret)
- mark_inode_dirty_sync(inode);
- mnt_drop_write(file->f_path.mnt);
- return ret;
-}
-
-/*
- * find the highest existing sequence number in a directory
- * and then set the in-memory index_cnt variable to reflect
- * free sequence numbers
- */
-static int btrfs_set_inode_index_count(struct inode *inode)
-{
- struct btrfs_root *root = BTRFS_I(inode)->root;
- struct btrfs_key key, found_key;
- struct btrfs_path *path;
- struct extent_buffer *leaf;
- int ret;
-
- key.objectid = btrfs_ino(inode);
- btrfs_set_key_type(&key, BTRFS_DIR_INDEX_KEY);
- key.offset = (u64)-1;
-
- path = btrfs_alloc_path();
- if (!path)
- return -ENOMEM;
-
- ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
- if (ret < 0)
- goto out;
- /* FIXME: we should be able to handle this */
- if (ret == 0)
- goto out;
- ret = 0;
-
- /*
- * MAGIC NUMBER EXPLANATION:
- * since we search a directory based on f_pos we have to start at 2
- * since '.' and '..' have f_pos of 0 and 1 respectively, so everybody
- * else has to start at 2
- */
- if (path->slots[0] == 0) {
- BTRFS_I(inode)->index_cnt = 2;
- goto out;
- }
-
- path->slots[0]--;
-
- leaf = path->nodes[0];
- btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
-
- if (found_key.objectid != btrfs_ino(inode) ||
- btrfs_key_type(&found_key) != BTRFS_DIR_INDEX_KEY) {
- BTRFS_I(inode)->index_cnt = 2;
- goto out;
- }
-
- BTRFS_I(inode)->index_cnt = found_key.offset + 1;
-out:
- btrfs_free_path(path);
- return ret;
-}
-
-/*
- * helper to find a free sequence number in a given directory. This current
- * code is very simple, later versions will do smarter things in the btree
- */
-int btrfs_set_inode_index(struct inode *dir, u64 *index)
-{
- int ret = 0;
-
- if (BTRFS_I(dir)->index_cnt == (u64)-1) {
- ret = btrfs_inode_delayed_dir_index_count(dir);
- if (ret) {
- ret = btrfs_set_inode_index_count(dir);
- if (ret)
- return ret;
- }
- }
-
- *index = BTRFS_I(dir)->index_cnt;
- BTRFS_I(dir)->index_cnt++;
-
- return ret;
-}
-
-static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct inode *dir,
- const char *name, int name_len,
- u64 ref_objectid, u64 objectid,
- umode_t mode, u64 *index)
-{
- struct inode *inode;
- struct btrfs_inode_item *inode_item;
- struct btrfs_key *location;
- struct btrfs_path *path;
- struct btrfs_inode_ref *ref;
- struct btrfs_key key[2];
- u32 sizes[2];
- unsigned long ptr;
- int ret;
- int owner;
-
- path = btrfs_alloc_path();
- if (!path)
- return ERR_PTR(-ENOMEM);
-
- inode = new_inode(root->fs_info->sb);
- if (!inode) {
- btrfs_free_path(path);
- return ERR_PTR(-ENOMEM);
- }
-
- /*
- * we have to initialize this early, so we can reclaim the inode
- * number if we fail afterwards in this function.
- */
- inode->i_ino = objectid;
-
- if (dir) {
- trace_btrfs_inode_request(dir);
-
- ret = btrfs_set_inode_index(dir, index);
- if (ret) {
- btrfs_free_path(path);
- iput(inode);
- return ERR_PTR(ret);
- }
- }
- /*
- * index_cnt is ignored for everything but a dir,
- * btrfs_get_inode_index_count has an explanation for the magic
- * number
- */
- BTRFS_I(inode)->index_cnt = 2;
- BTRFS_I(inode)->root = root;
- BTRFS_I(inode)->generation = trans->transid;
- inode->i_generation = BTRFS_I(inode)->generation;
- btrfs_set_inode_space_info(root, inode);
-
- if (S_ISDIR(mode))
- owner = 0;
- else
- owner = 1;
-
- key[0].objectid = objectid;
- btrfs_set_key_type(&key[0], BTRFS_INODE_ITEM_KEY);
- key[0].offset = 0;
-
- key[1].objectid = objectid;
- btrfs_set_key_type(&key[1], BTRFS_INODE_REF_KEY);
- key[1].offset = ref_objectid;
-
- sizes[0] = sizeof(struct btrfs_inode_item);
- sizes[1] = name_len + sizeof(*ref);
-
- path->leave_spinning = 1;
- ret = btrfs_insert_empty_items(trans, root, path, key, sizes, 2);
- if (ret != 0)
- goto fail;
-
- inode_init_owner(inode, dir, mode);
- inode_set_bytes(inode, 0);
- inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
- inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
- struct btrfs_inode_item);
- fill_inode_item(trans, path->nodes[0], inode_item, inode);
-
- ref = btrfs_item_ptr(path->nodes[0], path->slots[0] + 1,
- struct btrfs_inode_ref);
- btrfs_set_inode_ref_name_len(path->nodes[0], ref, name_len);
- btrfs_set_inode_ref_index(path->nodes[0], ref, *index);
- ptr = (unsigned long)(ref + 1);
- write_extent_buffer(path->nodes[0], name, ptr, name_len);
-
- btrfs_mark_buffer_dirty(path->nodes[0]);
- btrfs_free_path(path);
-
- location = &BTRFS_I(inode)->location;
- location->objectid = objectid;
- location->offset = 0;
- btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY);
-
- btrfs_inherit_iflags(inode, dir);
-
- if (S_ISREG(mode)) {
- if (btrfs_test_opt(root, NODATASUM))
- BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM;
- if (btrfs_test_opt(root, NODATACOW) ||
- (BTRFS_I(dir)->flags & BTRFS_INODE_NODATACOW))
- BTRFS_I(inode)->flags |= BTRFS_INODE_NODATACOW;
- }
-
- insert_inode_hash(inode);
- inode_tree_add(inode);
-
- trace_btrfs_inode_new(inode);
- btrfs_set_inode_last_trans(trans, inode);
-
- return inode;
-fail:
- if (dir)
- BTRFS_I(dir)->index_cnt--;
- btrfs_free_path(path);
- iput(inode);
- return ERR_PTR(ret);
-}
-
-static inline u8 btrfs_inode_type(struct inode *inode)
-{
- return btrfs_type_by_mode[(inode->i_mode & S_IFMT) >> S_SHIFT];
-}
-
-/*
- * utility function to add 'inode' into 'parent_inode' with
- * a give name and a given sequence number.
- * if 'add_backref' is true, also insert a backref from the
- * inode to the parent directory.
- */
-int btrfs_add_link(struct btrfs_trans_handle *trans,
- struct inode *parent_inode, struct inode *inode,
- const char *name, int name_len, int add_backref, u64 index)
-{
- int ret = 0;
- struct btrfs_key key;
- struct btrfs_root *root = BTRFS_I(parent_inode)->root;
- u64 ino = btrfs_ino(inode);
- u64 parent_ino = btrfs_ino(parent_inode);
-
- if (unlikely(ino == BTRFS_FIRST_FREE_OBJECTID)) {
- memcpy(&key, &BTRFS_I(inode)->root->root_key, sizeof(key));
- } else {
- key.objectid = ino;
- btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
- key.offset = 0;
- }
-
- if (unlikely(ino == BTRFS_FIRST_FREE_OBJECTID)) {
- ret = btrfs_add_root_ref(trans, root->fs_info->tree_root,
- key.objectid, root->root_key.objectid,
- parent_ino, index, name, name_len);
- } else if (add_backref) {
- ret = btrfs_insert_inode_ref(trans, root, name, name_len, ino,
- parent_ino, index);
- }
-
- /* Nothing to clean up yet */
- if (ret)
- return ret;
-
- ret = btrfs_insert_dir_item(trans, root, name, name_len,
- parent_inode, &key,
- btrfs_inode_type(inode), index);
- if (ret == -EEXIST)
- goto fail_dir_item;
- else if (ret) {
- btrfs_abort_transaction(trans, root, ret);
- return ret;
- }
-
- btrfs_i_size_write(parent_inode, parent_inode->i_size +
- name_len * 2);
- parent_inode->i_mtime = parent_inode->i_ctime = CURRENT_TIME;
- ret = btrfs_update_inode(trans, root, parent_inode);
- if (ret)
- btrfs_abort_transaction(trans, root, ret);
- return ret;
-
-fail_dir_item:
- if (unlikely(ino == BTRFS_FIRST_FREE_OBJECTID)) {
- u64 local_index;
- int err;
- err = btrfs_del_root_ref(trans, root->fs_info->tree_root,
- key.objectid, root->root_key.objectid,
- parent_ino, &local_index, name, name_len);
-
- } else if (add_backref) {
- u64 local_index;
- int err;
-
- err = btrfs_del_inode_ref(trans, root, name, name_len,
- ino, parent_ino, &local_index);
- }
- return ret;
-}
-
-static int btrfs_add_nondir(struct btrfs_trans_handle *trans,
- struct inode *dir, struct dentry *dentry,
- struct inode *inode, int backref, u64 index)
-{
- int err = btrfs_add_link(trans, dir, inode,
- dentry->d_name.name, dentry->d_name.len,
- backref, index);
- if (err > 0)
- err = -EEXIST;
- return err;
-}
-
-static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
- umode_t mode, dev_t rdev)
-{
- struct btrfs_trans_handle *trans;
- struct btrfs_root *root = BTRFS_I(dir)->root;
- struct inode *inode = NULL;
- int err;
- int drop_inode = 0;
- u64 objectid;
- unsigned long nr = 0;
- u64 index = 0;
-
- if (!new_valid_dev(rdev))
- return -EINVAL;
-
- /*
- * 2 for inode item and ref
- * 2 for dir items
- * 1 for xattr if selinux is on
- */
- trans = btrfs_start_transaction(root, 5);
- if (IS_ERR(trans))
- return PTR_ERR(trans);
-
- err = btrfs_find_free_ino(root, &objectid);
- if (err)
- goto out_unlock;
-
- inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
- dentry->d_name.len, btrfs_ino(dir), objectid,
- mode, &index);
- if (IS_ERR(inode)) {
- err = PTR_ERR(inode);
- goto out_unlock;
- }
-
- err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name);
- if (err) {
- drop_inode = 1;
- goto out_unlock;
- }
-
- /*
- * If the active LSM wants to access the inode during
- * d_instantiate it needs these. Smack checks to see
- * if the filesystem supports xattrs by looking at the
- * ops vector.
- */
-
- inode->i_op = &btrfs_special_inode_operations;
- err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index);
- if (err)
- drop_inode = 1;
- else {
- init_special_inode(inode, inode->i_mode, rdev);
- btrfs_update_inode(trans, root, inode);
- d_instantiate(dentry, inode);
- }
-out_unlock:
- nr = trans->blocks_used;
- btrfs_end_transaction(trans, root);
- btrfs_btree_balance_dirty(root, nr);
- if (drop_inode) {
- inode_dec_link_count(inode);
- iput(inode);
- }
- return err;
-}
-
-static int btrfs_create(struct inode *dir, struct dentry *dentry,
- umode_t mode, struct nameidata *nd)
-{
- struct btrfs_trans_handle *trans;
- struct btrfs_root *root = BTRFS_I(dir)->root;
- struct inode *inode = NULL;
- int drop_inode = 0;
- int err;
- unsigned long nr = 0;
- u64 objectid;
- u64 index = 0;
-
- /*
- * 2 for inode item and ref
- * 2 for dir items
- * 1 for xattr if selinux is on
- */
- trans = btrfs_start_transaction(root, 5);
- if (IS_ERR(trans))
- return PTR_ERR(trans);
-
- err = btrfs_find_free_ino(root, &objectid);
- if (err)
- goto out_unlock;
-
- inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
- dentry->d_name.len, btrfs_ino(dir), objectid,
- mode, &index);
- if (IS_ERR(inode)) {
- err = PTR_ERR(inode);
- goto out_unlock;
- }
-
- err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name);
- if (err) {
- drop_inode = 1;
- goto out_unlock;
- }
-
- /*
- * If the active LSM wants to access the inode during
- * d_instantiate it needs these. Smack checks to see
- * if the filesystem supports xattrs by looking at the
- * ops vector.
- */
- inode->i_fop = &btrfs_file_operations;
- inode->i_op = &btrfs_file_inode_operations;
-
- err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index);
- if (err)
- drop_inode = 1;
- else {
- inode->i_mapping->a_ops = &btrfs_aops;
- inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
- BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
- d_instantiate(dentry, inode);
- }
-out_unlock:
- nr = trans->blocks_used;
- btrfs_end_transaction(trans, root);
- if (drop_inode) {
- inode_dec_link_count(inode);
- iput(inode);
- }
- btrfs_btree_balance_dirty(root, nr);
- return err;
-}
-
-static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
- struct dentry *dentry)
-{
- struct btrfs_trans_handle *trans;
- struct btrfs_root *root = BTRFS_I(dir)->root;
- struct inode *inode = old_dentry->d_inode;
- u64 index;
- unsigned long nr = 0;
- int err;
- int drop_inode = 0;
-
- /* do not allow sys_link's with other subvols of the same device */
- if (root->objectid != BTRFS_I(inode)->root->objectid)
- return -EXDEV;
-
- if (inode->i_nlink == ~0U)
- return -EMLINK;
-
- err = btrfs_set_inode_index(dir, &index);
- if (err)
- goto fail;
-
- /*
- * 2 items for inode and inode ref
- * 2 items for dir items
- * 1 item for parent inode
- */
- trans = btrfs_start_transaction(root, 5);
- if (IS_ERR(trans)) {
- err = PTR_ERR(trans);
- goto fail;
- }
-
- btrfs_inc_nlink(inode);
- inode->i_ctime = CURRENT_TIME;
- ihold(inode);
-
- err = btrfs_add_nondir(trans, dir, dentry, inode, 1, index);
-
- if (err) {
- drop_inode = 1;
- } else {
- struct dentry *parent = dentry->d_parent;
- err = btrfs_update_inode(trans, root, inode);
- if (err)
- goto fail;
- d_instantiate(dentry, inode);
- btrfs_log_new_name(trans, inode, NULL, parent);
- }
-
- nr = trans->blocks_used;
- btrfs_end_transaction(trans, root);
-fail:
- if (drop_inode) {
- inode_dec_link_count(inode);
- iput(inode);
- }
- btrfs_btree_balance_dirty(root, nr);
- return err;
-}
-
-static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
-{
- struct inode *inode = NULL;
- struct btrfs_trans_handle *trans;
- struct btrfs_root *root = BTRFS_I(dir)->root;
- int err = 0;
- int drop_on_err = 0;
- u64 objectid = 0;
- u64 index = 0;
- unsigned long nr = 1;
-
- /*
- * 2 items for inode and ref
- * 2 items for dir items
- * 1 for xattr if selinux is on
- */
- trans = btrfs_start_transaction(root, 5);
- if (IS_ERR(trans))
- return PTR_ERR(trans);
-
- err = btrfs_find_free_ino(root, &objectid);
- if (err)
- goto out_fail;
-
- inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
- dentry->d_name.len, btrfs_ino(dir), objectid,
- S_IFDIR | mode, &index);
- if (IS_ERR(inode)) {
- err = PTR_ERR(inode);
- goto out_fail;
- }
-
- drop_on_err = 1;
-
- err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name);
- if (err)
- goto out_fail;
-
- inode->i_op = &btrfs_dir_inode_operations;
- inode->i_fop = &btrfs_dir_file_operations;
-
- btrfs_i_size_write(inode, 0);
- err = btrfs_update_inode(trans, root, inode);
- if (err)
- goto out_fail;
-
- err = btrfs_add_link(trans, dir, inode, dentry->d_name.name,
- dentry->d_name.len, 0, index);
- if (err)
- goto out_fail;
-
- d_instantiate(dentry, inode);
- drop_on_err = 0;
-
-out_fail:
- nr = trans->blocks_used;
- btrfs_end_transaction(trans, root);
- if (drop_on_err)
- iput(inode);
- btrfs_btree_balance_dirty(root, nr);
- return err;
-}
-
-/* helper for btfs_get_extent. Given an existing extent in the tree,
- * and an extent that you want to insert, deal with overlap and insert
- * the new extent into the tree.
- */
-static int merge_extent_mapping(struct extent_map_tree *em_tree,
- struct extent_map *existing,
- struct extent_map *em,
- u64 map_start, u64 map_len)
-{
- u64 start_diff;
-
- BUG_ON(map_start < em->start || map_start >= extent_map_end(em));
- start_diff = map_start - em->start;
- em->start = map_start;
- em->len = map_len;
- if (em->block_start < EXTENT_MAP_LAST_BYTE &&
- !test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) {
- em->block_start += start_diff;
- em->block_len -= start_diff;
- }
- return add_extent_mapping(em_tree, em);
-}
-
-static noinline int uncompress_inline(struct btrfs_path *path,
- struct inode *inode, struct page *page,
- size_t pg_offset, u64 extent_offset,
- struct btrfs_file_extent_item *item)
-{
- int ret;
- struct extent_buffer *leaf = path->nodes[0];
- char *tmp;
- size_t max_size;
- unsigned long inline_size;
- unsigned long ptr;
- int compress_type;
-
- WARN_ON(pg_offset != 0);
- compress_type = btrfs_file_extent_compression(leaf, item);
- max_size = btrfs_file_extent_ram_bytes(leaf, item);
- inline_size = btrfs_file_extent_inline_item_len(leaf,
- btrfs_item_nr(leaf, path->slots[0]));
- tmp = kmalloc(inline_size, GFP_NOFS);
- if (!tmp)
- return -ENOMEM;
- ptr = btrfs_file_extent_inline_start(item);
-
- read_extent_buffer(leaf, tmp, ptr, inline_size);
-
- max_size = min_t(unsigned long, PAGE_CACHE_SIZE, max_size);
- ret = btrfs_decompress(compress_type, tmp, page,
- extent_offset, inline_size, max_size);
- if (ret) {
- char *kaddr = kmap_atomic(page);
- unsigned long copy_size = min_t(u64,
- PAGE_CACHE_SIZE - pg_offset,
- max_size - extent_offset);
- memset(kaddr + pg_offset, 0, copy_size);
- kunmap_atomic(kaddr);
- }
- kfree(tmp);
- return 0;
-}
-
-/*
- * a bit scary, this does extent mapping from logical file offset to the disk.
- * the ugly parts come from merging extents from the disk with the in-ram
- * representation. This gets more complex because of the data=ordered code,
- * where the in-ram extents might be locked pending data=ordered completion.
- *
- * This also copies inline extents directly into the page.
- */
-
-struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page,
- size_t pg_offset, u64 start, u64 len,
- int create)
-{
- int ret;
- int err = 0;
- u64 bytenr;
- u64 extent_start = 0;
- u64 extent_end = 0;
- u64 objectid = btrfs_ino(inode);
- u32 found_type;
- struct btrfs_path *path = NULL;
- struct btrfs_root *root = BTRFS_I(inode)->root;
- struct btrfs_file_extent_item *item;
- struct extent_buffer *leaf;
- struct btrfs_key found_key;
- struct extent_map *em = NULL;
- struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
- struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
- struct btrfs_trans_handle *trans = NULL;
- int compress_type;
-
-again:
- read_lock(&em_tree->lock);
- em = lookup_extent_mapping(em_tree, start, len);
- if (em)
- em->bdev = root->fs_info->fs_devices->latest_bdev;
- read_unlock(&em_tree->lock);
-
- if (em) {
- if (em->start > start || em->start + em->len <= start)
- free_extent_map(em);
- else if (em->block_start == EXTENT_MAP_INLINE && page)
- free_extent_map(em);
- else
- goto out;
- }
- em = alloc_extent_map();
- if (!em) {
- err = -ENOMEM;
- goto out;
- }
- em->bdev = root->fs_info->fs_devices->latest_bdev;
- em->start = EXTENT_MAP_HOLE;
- em->orig_start = EXTENT_MAP_HOLE;
- em->len = (u64)-1;
- em->block_len = (u64)-1;
-
- if (!path) {
- path = btrfs_alloc_path();
- if (!path) {
- err = -ENOMEM;
- goto out;
- }
- /*
- * Chances are we'll be called again, so go ahead and do
- * readahead
- */
- path->reada = 1;
- }
-
- ret = btrfs_lookup_file_extent(trans, root, path,
- objectid, start, trans != NULL);
- if (ret < 0) {
- err = ret;
- goto out;
- }
-
- if (ret != 0) {
- if (path->slots[0] == 0)
- goto not_found;
- path->slots[0]--;
- }
-
- leaf = path->nodes[0];
- item = btrfs_item_ptr(leaf, path->slots[0],
- struct btrfs_file_extent_item);
- /* are we inside the extent that was found? */
- btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
- found_type = btrfs_key_type(&found_key);
- if (found_key.objectid != objectid ||
- found_type != BTRFS_EXTENT_DATA_KEY) {
- goto not_found;
- }
-
- found_type = btrfs_file_extent_type(leaf, item);
- extent_start = found_key.offset;
- compress_type = btrfs_file_extent_compression(leaf, item);
- if (found_type == BTRFS_FILE_EXTENT_REG ||
- found_type == BTRFS_FILE_EXTENT_PREALLOC) {
- extent_end = extent_start +
- btrfs_file_extent_num_bytes(leaf, item);
- } else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
- size_t size;
- size = btrfs_file_extent_inline_len(leaf, item);
- extent_end = (extent_start + size + root->sectorsize - 1) &
- ~((u64)root->sectorsize - 1);
- }
-
- if (start >= extent_end) {
- path->slots[0]++;
- if (path->slots[0] >= btrfs_header_nritems(leaf)) {
- ret = btrfs_next_leaf(root, path);
- if (ret < 0) {
- err = ret;
- goto out;
- }
- if (ret > 0)
- goto not_found;
- leaf = path->nodes[0];
- }
- btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
- if (found_key.objectid != objectid ||
- found_key.type != BTRFS_EXTENT_DATA_KEY)
- goto not_found;
- if (start + len <= found_key.offset)
- goto not_found;
- em->start = start;
- em->len = found_key.offset - start;
- goto not_found_em;
- }
-
- if (found_type == BTRFS_FILE_EXTENT_REG ||
- found_type == BTRFS_FILE_EXTENT_PREALLOC) {
- em->start = extent_start;
- em->len = extent_end - extent_start;
- em->orig_start = extent_start -
- btrfs_file_extent_offset(leaf, item);
- bytenr = btrfs_file_extent_disk_bytenr(leaf, item);
- if (bytenr == 0) {
- em->block_start = EXTENT_MAP_HOLE;
- goto insert;
- }
- if (compress_type != BTRFS_COMPRESS_NONE) {
- set_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
- em->compress_type = compress_type;
- em->block_start = bytenr;
- em->block_len = btrfs_file_extent_disk_num_bytes(leaf,
- item);
- } else {
- bytenr += btrfs_file_extent_offset(leaf, item);
- em->block_start = bytenr;
- em->block_len = em->len;
- if (found_type == BTRFS_FILE_EXTENT_PREALLOC)
- set_bit(EXTENT_FLAG_PREALLOC, &em->flags);
- }
- goto insert;
- } else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
- unsigned long ptr;
- char *map;
- size_t size;
- size_t extent_offset;
- size_t copy_size;
-
- em->block_start = EXTENT_MAP_INLINE;
- if (!page || create) {
- em->start = extent_start;
- em->len = extent_end - extent_start;
- goto out;
- }
-
- size = btrfs_file_extent_inline_len(leaf, item);
- extent_offset = page_offset(page) + pg_offset - extent_start;
- copy_size = min_t(u64, PAGE_CACHE_SIZE - pg_offset,
- size - extent_offset);
- em->start = extent_start + extent_offset;
- em->len = (copy_size + root->sectorsize - 1) &
- ~((u64)root->sectorsize - 1);
- em->orig_start = EXTENT_MAP_INLINE;
- if (compress_type) {
- set_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
- em->compress_type = compress_type;
- }
- ptr = btrfs_file_extent_inline_start(item) + extent_offset;
- if (create == 0 && !PageUptodate(page)) {
- if (btrfs_file_extent_compression(leaf, item) !=
- BTRFS_COMPRESS_NONE) {
- ret = uncompress_inline(path, inode, page,
- pg_offset,
- extent_offset, item);
- BUG_ON(ret); /* -ENOMEM */
- } else {
- map = kmap(page);
- read_extent_buffer(leaf, map + pg_offset, ptr,
- copy_size);
- if (pg_offset + copy_size < PAGE_CACHE_SIZE) {
- memset(map + pg_offset + copy_size, 0,
- PAGE_CACHE_SIZE - pg_offset -
- copy_size);
- }
- kunmap(page);
- }
- flush_dcache_page(page);
- } else if (create && PageUptodate(page)) {
- BUG();
- if (!trans) {
- kunmap(page);
- free_extent_map(em);
- em = NULL;
-
- btrfs_release_path(path);
- trans = btrfs_join_transaction(root);
-
- if (IS_ERR(trans))
- return ERR_CAST(trans);
- goto again;
- }
- map = kmap(page);
- write_extent_buffer(leaf, map + pg_offset, ptr,
- copy_size);
- kunmap(page);
- btrfs_mark_buffer_dirty(leaf);
- }
- set_extent_uptodate(io_tree, em->start,
- extent_map_end(em) - 1, NULL, GFP_NOFS);
- goto insert;
- } else {
- printk(KERN_ERR "btrfs unknown found_type %d\n", found_type);
- WARN_ON(1);
- }
-not_found:
- em->start = start;
- em->len = len;
-not_found_em:
- em->block_start = EXTENT_MAP_HOLE;
- set_bit(EXTENT_FLAG_VACANCY, &em->flags);
-insert:
- btrfs_release_path(path);
- if (em->start > start || extent_map_end(em) <= start) {
- printk(KERN_ERR "Btrfs: bad extent! em: [%llu %llu] passed "
- "[%llu %llu]\n", (unsigned long long)em->start,
- (unsigned long long)em->len,
- (unsigned long long)start,
- (unsigned long long)len);
- err = -EIO;
- goto out;
- }
-
- err = 0;
- write_lock(&em_tree->lock);
- ret = add_extent_mapping(em_tree, em);
- /* it is possible that someone inserted the extent into the tree
- * while we had the lock dropped. It is also possible that
- * an overlapping map exists in the tree
- */
- if (ret == -EEXIST) {
- struct extent_map *existing;
-
- ret = 0;
-
- existing = lookup_extent_mapping(em_tree, start, len);
- if (existing && (existing->start > start ||
- existing->start + existing->len <= start)) {
- free_extent_map(existing);
- existing = NULL;
- }
- if (!existing) {
- existing = lookup_extent_mapping(em_tree, em->start,
- em->len);
- if (existing) {
- err = merge_extent_mapping(em_tree, existing,
- em, start,
- root->sectorsize);
- free_extent_map(existing);
- if (err) {
- free_extent_map(em);
- em = NULL;
- }
- } else {
- err = -EIO;
- free_extent_map(em);
- em = NULL;
- }
- } else {
- free_extent_map(em);
- em = existing;
- err = 0;
- }
- }
- write_unlock(&em_tree->lock);
-out:
-
- trace_btrfs_get_extent(root, em);
-
- if (path)
- btrfs_free_path(path);
- if (trans) {
- ret = btrfs_end_transaction(trans, root);
- if (!err)
- err = ret;
- }
- if (err) {
- free_extent_map(em);
- return ERR_PTR(err);
- }
- BUG_ON(!em); /* Error is always set */
- return em;
-}
-
-struct extent_map *btrfs_get_extent_fiemap(struct inode *inode, struct page *page,
- size_t pg_offset, u64 start, u64 len,
- int create)
-{
- struct extent_map *em;
- struct extent_map *hole_em = NULL;
- u64 range_start = start;
- u64 end;
- u64 found;
- u64 found_end;
- int err = 0;
-
- em = btrfs_get_extent(inode, page, pg_offset, start, len, create);
- if (IS_ERR(em))
- return em;
- if (em) {
- /*
- * if our em maps to a hole, there might
- * actually be delalloc bytes behind it
- */
- if (em->block_start != EXTENT_MAP_HOLE)
- return em;
- else
- hole_em = em;
- }
-
- /* check to see if we've wrapped (len == -1 or similar) */
- end = start + len;
- if (end < start)
- end = (u64)-1;
- else
- end -= 1;
-
- em = NULL;
-
- /* ok, we didn't find anything, lets look for delalloc */
- found = count_range_bits(&BTRFS_I(inode)->io_tree, &range_start,
- end, len, EXTENT_DELALLOC, 1);
- found_end = range_start + found;
- if (found_end < range_start)
- found_end = (u64)-1;
-
- /*
- * we didn't find anything useful, return
- * the original results from get_extent()
- */
- if (range_start > end || found_end <= start) {
- em = hole_em;
- hole_em = NULL;
- goto out;
- }
-
- /* adjust the range_start to make sure it doesn't
- * go backwards from the start they passed in
- */
- range_start = max(start,range_start);
- found = found_end - range_start;
-
- if (found > 0) {
- u64 hole_start = start;
- u64 hole_len = len;
-
- em = alloc_extent_map();
- if (!em) {
- err = -ENOMEM;
- goto out;
- }
- /*
- * when btrfs_get_extent can't find anything it
- * returns one huge hole
- *
- * make sure what it found really fits our range, and
- * adjust to make sure it is based on the start from
- * the caller
- */
- if (hole_em) {
- u64 calc_end = extent_map_end(hole_em);
-
- if (calc_end <= start || (hole_em->start > end)) {
- free_extent_map(hole_em);
- hole_em = NULL;
- } else {
- hole_start = max(hole_em->start, start);
- hole_len = calc_end - hole_start;
- }
- }
- em->bdev = NULL;
- if (hole_em && range_start > hole_start) {
- /* our hole starts before our delalloc, so we
- * have to return just the parts of the hole
- * that go until the delalloc starts
- */
- em->len = min(hole_len,
- range_start - hole_start);
- em->start = hole_start;
- em->orig_start = hole_start;
- /*
- * don't adjust block start at all,
- * it is fixed at EXTENT_MAP_HOLE
- */
- em->block_start = hole_em->block_start;
- em->block_len = hole_len;
- } else {
- em->start = range_start;
- em->len = found;
- em->orig_start = range_start;
- em->block_start = EXTENT_MAP_DELALLOC;
- em->block_len = found;
- }
- } else if (hole_em) {
- return hole_em;
- }
-out:
-
- free_extent_map(hole_em);
- if (err) {
- free_extent_map(em);
- return ERR_PTR(err);
- }
- return em;
-}
-
-static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
- struct extent_map *em,
- u64 start, u64 len)
-{
- struct btrfs_root *root = BTRFS_I(inode)->root;
- struct btrfs_trans_handle *trans;
- struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
- struct btrfs_key ins;
- u64 alloc_hint;
- int ret;
- bool insert = false;
-
- /*
- * Ok if the extent map we looked up is a hole and is for the exact
- * range we want, there is no reason to allocate a new one, however if
- * it is not right then we need to free this one and drop the cache for
- * our range.
- */
- if (em->block_start != EXTENT_MAP_HOLE || em->start != start ||
- em->len != len) {
- free_extent_map(em);
- em = NULL;
- insert = true;
- btrfs_drop_extent_cache(inode, start, start + len - 1, 0);
- }
-
- trans = btrfs_join_transaction(root);
- if (IS_ERR(trans))
- return ERR_CAST(trans);
-
- if (start <= BTRFS_I(inode)->disk_i_size && len < 64 * 1024)
- btrfs_add_inode_defrag(trans, inode);
-
- trans->block_rsv = &root->fs_info->delalloc_block_rsv;
-
- alloc_hint = get_extent_allocation_hint(inode, start, len);
- ret = btrfs_reserve_extent(trans, root, len, root->sectorsize, 0,
- alloc_hint, &ins, 1);
- if (ret) {
- em = ERR_PTR(ret);
- goto out;
- }
-
- if (!em) {
- em = alloc_extent_map();
- if (!em) {
- em = ERR_PTR(-ENOMEM);
- goto out;
- }
- }
-
- em->start = start;
- em->orig_start = em->start;
- em->len = ins.offset;
-
- em->block_start = ins.objectid;
- em->block_len = ins.offset;
- em->bdev = root->fs_info->fs_devices->latest_bdev;
-
- /*
- * We need to do this because if we're using the original em we searched
- * for, we could have EXTENT_FLAG_VACANCY set, and we don't want that.
- */
- em->flags = 0;
- set_bit(EXTENT_FLAG_PINNED, &em->flags);
-
- while (insert) {
- write_lock(&em_tree->lock);
- ret = add_extent_mapping(em_tree, em);
- write_unlock(&em_tree->lock);
- if (ret != -EEXIST)
- break;
- btrfs_drop_extent_cache(inode, start, start + em->len - 1, 0);
- }
-
- ret = btrfs_add_ordered_extent_dio(inode, start, ins.objectid,
- ins.offset, ins.offset, 0);
- if (ret) {
- btrfs_free_reserved_extent(root, ins.objectid, ins.offset);
- em = ERR_PTR(ret);
- }
-out:
- btrfs_end_transaction(trans, root);
- return em;
-}
-
-/*
- * returns 1 when the nocow is safe, < 1 on error, 0 if the
- * block must be cow'd
- */
-static noinline int can_nocow_odirect(struct btrfs_trans_handle *trans,
- struct inode *inode, u64 offset, u64 len)
-{
- struct btrfs_path *path;
- int ret;
- struct extent_buffer *leaf;
- struct btrfs_root *root = BTRFS_I(inode)->root;
- struct btrfs_file_extent_item *fi;
- struct btrfs_key key;
- u64 disk_bytenr;
- u64 backref_offset;
- u64 extent_end;
- u64 num_bytes;
- int slot;
- int found_type;
-
- path = btrfs_alloc_path();
- if (!path)
- return -ENOMEM;
-
- ret = btrfs_lookup_file_extent(trans, root, path, btrfs_ino(inode),
- offset, 0);
- if (ret < 0)
- goto out;
-
- slot = path->slots[0];
- if (ret == 1) {
- if (slot == 0) {
- /* can't find the item, must cow */
- ret = 0;
- goto out;
- }
- slot--;
- }
- ret = 0;
- leaf = path->nodes[0];
- btrfs_item_key_to_cpu(leaf, &key, slot);
- if (key.objectid != btrfs_ino(inode) ||
- key.type != BTRFS_EXTENT_DATA_KEY) {
- /* not our file or wrong item type, must cow */
- goto out;
- }
-
- if (key.offset > offset) {
- /* Wrong offset, must cow */
- goto out;
- }
-
- fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
- found_type = btrfs_file_extent_type(leaf, fi);
- if (found_type != BTRFS_FILE_EXTENT_REG &&
- found_type != BTRFS_FILE_EXTENT_PREALLOC) {
- /* not a regular extent, must cow */
- goto out;
- }
- disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
- backref_offset = btrfs_file_extent_offset(leaf, fi);
-
- extent_end = key.offset + btrfs_file_extent_num_bytes(leaf, fi);
- if (extent_end < offset + len) {
- /* extent doesn't include our full range, must cow */
- goto out;
- }
-
- if (btrfs_extent_readonly(root, disk_bytenr))
- goto out;
-
- /*
- * look for other files referencing this extent, if we
- * find any we must cow
- */
- if (btrfs_cross_ref_exist(trans, root, btrfs_ino(inode),
- key.offset - backref_offset, disk_bytenr))
- goto out;
-
- /*
- * adjust disk_bytenr and num_bytes to cover just the bytes
- * in this extent we are about to write. If there
- * are any csums in that range we have to cow in order
- * to keep the csums correct
- */
- disk_bytenr += backref_offset;
- disk_bytenr += offset - key.offset;
- num_bytes = min(offset + len, extent_end) - offset;
- if (csum_exist_in_range(root, disk_bytenr, num_bytes))
- goto out;
- /*
- * all of the above have passed, it is safe to overwrite this extent
- * without cow
- */
- ret = 1;
-out:
- btrfs_free_path(path);
- return ret;
-}
-
-static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
- struct buffer_head *bh_result, int create)
-{
- struct extent_map *em;
- struct btrfs_root *root = BTRFS_I(inode)->root;
- u64 start = iblock << inode->i_blkbits;
- u64 len = bh_result->b_size;
- struct btrfs_trans_handle *trans;
-
- em = btrfs_get_extent(inode, NULL, 0, start, len, 0);
- if (IS_ERR(em))
- return PTR_ERR(em);
-
- /*
- * Ok for INLINE and COMPRESSED extents we need to fallback on buffered
- * io. INLINE is special, and we could probably kludge it in here, but
- * it's still buffered so for safety lets just fall back to the generic
- * buffered path.
- *
- * For COMPRESSED we _have_ to read the entire extent in so we can
- * decompress it, so there will be buffering required no matter what we
- * do, so go ahead and fallback to buffered.
- *
- * We return -ENOTBLK because thats what makes DIO go ahead and go back
- * to buffered IO. Don't blame me, this is the price we pay for using
- * the generic code.
- */
- if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags) ||
- em->block_start == EXTENT_MAP_INLINE) {
- free_extent_map(em);
- return -ENOTBLK;
- }
-
- /* Just a good old fashioned hole, return */
- if (!create && (em->block_start == EXTENT_MAP_HOLE ||
- test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) {
- free_extent_map(em);
- /* DIO will do one hole at a time, so just unlock a sector */
- unlock_extent(&BTRFS_I(inode)->io_tree, start,
- start + root->sectorsize - 1);
- return 0;
- }
-
- /*
- * We don't allocate a new extent in the following cases
- *
- * 1) The inode is marked as NODATACOW. In this case we'll just use the
- * existing extent.
- * 2) The extent is marked as PREALLOC. We're good to go here and can
- * just use the extent.
- *
- */
- if (!create) {
- len = em->len - (start - em->start);
- goto map;
- }
-
- if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags) ||
- ((BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW) &&
- em->block_start != EXTENT_MAP_HOLE)) {
- int type;
- int ret;
- u64 block_start;
-
- if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
- type = BTRFS_ORDERED_PREALLOC;
- else
- type = BTRFS_ORDERED_NOCOW;
- len = min(len, em->len - (start - em->start));
- block_start = em->block_start + (start - em->start);
-
- /*
- * we're not going to log anything, but we do need
- * to make sure the current transaction stays open
- * while we look for nocow cross refs
- */
- trans = btrfs_join_transaction(root);
- if (IS_ERR(trans))
- goto must_cow;
-
- if (can_nocow_odirect(trans, inode, start, len) == 1) {
- ret = btrfs_add_ordered_extent_dio(inode, start,
- block_start, len, len, type);
- btrfs_end_transaction(trans, root);
- if (ret) {
- free_extent_map(em);
- return ret;
- }
- goto unlock;
- }
- btrfs_end_transaction(trans, root);
- }
-must_cow:
- /*
- * this will cow the extent, reset the len in case we changed
- * it above
- */
- len = bh_result->b_size;
- em = btrfs_new_extent_direct(inode, em, start, len);
- if (IS_ERR(em))
- return PTR_ERR(em);
- len = min(len, em->len - (start - em->start));
-unlock:
- clear_extent_bit(&BTRFS_I(inode)->io_tree, start, start + len - 1,
- EXTENT_LOCKED | EXTENT_DELALLOC | EXTENT_DIRTY, 1,
- 0, NULL, GFP_NOFS);
-map:
- bh_result->b_blocknr = (em->block_start + (start - em->start)) >>
- inode->i_blkbits;
- bh_result->b_size = len;
- bh_result->b_bdev = em->bdev;
- set_buffer_mapped(bh_result);
- if (create && !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
- set_buffer_new(bh_result);
-
- free_extent_map(em);
-
- return 0;
-}
-
-struct btrfs_dio_private {
- struct inode *inode;
- u64 logical_offset;
- u64 disk_bytenr;
- u64 bytes;
- u32 *csums;
- void *private;
-
- /* number of bios pending for this dio */
- atomic_t pending_bios;
-
- /* IO errors */
- int errors;
-
- struct bio *orig_bio;
-};
-
-static void btrfs_endio_direct_read(struct bio *bio, int err)
-{
- struct btrfs_dio_private *dip = bio->bi_private;
- struct bio_vec *bvec_end = bio->bi_io_vec + bio->bi_vcnt - 1;
- struct bio_vec *bvec = bio->bi_io_vec;
- struct inode *inode = dip->inode;
- struct btrfs_root *root = BTRFS_I(inode)->root;
- u64 start;
- u32 *private = dip->csums;
-
- start = dip->logical_offset;
- do {
- if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) {
- struct page *page = bvec->bv_page;
- char *kaddr;
- u32 csum = ~(u32)0;
- unsigned long flags;
-
- local_irq_save(flags);
- kaddr = kmap_atomic(page);
- csum = btrfs_csum_data(root, kaddr + bvec->bv_offset,
- csum, bvec->bv_len);
- btrfs_csum_final(csum, (char *)&csum);
- kunmap_atomic(kaddr);
- local_irq_restore(flags);
-
- flush_dcache_page(bvec->bv_page);
- if (csum != *private) {
- printk(KERN_ERR "btrfs csum failed ino %llu off"
- " %llu csum %u private %u\n",
- (unsigned long long)btrfs_ino(inode),
- (unsigned long long)start,
- csum, *private);
- err = -EIO;
- }
- }
-
- start += bvec->bv_len;
- private++;
- bvec++;
- } while (bvec <= bvec_end);
-
- unlock_extent(&BTRFS_I(inode)->io_tree, dip->logical_offset,
- dip->logical_offset + dip->bytes - 1);
- bio->bi_private = dip->private;
-
- kfree(dip->csums);
- kfree(dip);
-
- /* If we had a csum failure make sure to clear the uptodate flag */
- if (err)
- clear_bit(BIO_UPTODATE, &bio->bi_flags);
- dio_end_io(bio, err);
-}
-
-static void btrfs_endio_direct_write(struct bio *bio, int err)
-{
- struct btrfs_dio_private *dip = bio->bi_private;
- struct inode *inode = dip->inode;
- struct btrfs_root *root = BTRFS_I(inode)->root;
- struct btrfs_trans_handle *trans;
- struct btrfs_ordered_extent *ordered = NULL;
- struct extent_state *cached_state = NULL;
- u64 ordered_offset = dip->logical_offset;
- u64 ordered_bytes = dip->bytes;
- int ret;
-
- if (err)
- goto out_done;
-again:
- ret = btrfs_dec_test_first_ordered_pending(inode, &ordered,
- &ordered_offset,
- ordered_bytes);
- if (!ret)
- goto out_test;
-
- BUG_ON(!ordered);
-
- trans = btrfs_join_transaction(root);
- if (IS_ERR(trans)) {
- err = -ENOMEM;
- goto out;
- }
- trans->block_rsv = &root->fs_info->delalloc_block_rsv;
-
- if (test_bit(BTRFS_ORDERED_NOCOW, &ordered->flags)) {
- ret = btrfs_ordered_update_i_size(inode, 0, ordered);
- if (!ret)
- err = btrfs_update_inode_fallback(trans, root, inode);
- goto out;
- }
-
- lock_extent_bits(&BTRFS_I(inode)->io_tree, ordered->file_offset,
- ordered->file_offset + ordered->len - 1, 0,
- &cached_state);
-
- if (test_bit(BTRFS_ORDERED_PREALLOC, &ordered->flags)) {
- ret = btrfs_mark_extent_written(trans, inode,
- ordered->file_offset,
- ordered->file_offset +
- ordered->len);
- if (ret) {
- err = ret;
- goto out_unlock;
- }
- } else {
- ret = insert_reserved_file_extent(trans, inode,
- ordered->file_offset,
- ordered->start,
- ordered->disk_len,
- ordered->len,
- ordered->len,
- 0, 0, 0,
- BTRFS_FILE_EXTENT_REG);
- unpin_extent_cache(&BTRFS_I(inode)->extent_tree,
- ordered->file_offset, ordered->len);
- if (ret) {
- err = ret;
- WARN_ON(1);
- goto out_unlock;
- }
- }
-
- add_pending_csums(trans, inode, ordered->file_offset, &ordered->list);
- ret = btrfs_ordered_update_i_size(inode, 0, ordered);
- if (!ret || !test_bit(BTRFS_ORDERED_PREALLOC, &ordered->flags))
- btrfs_update_inode_fallback(trans, root, inode);
- ret = 0;
-out_unlock:
- unlock_extent_cached(&BTRFS_I(inode)->io_tree, ordered->file_offset,
- ordered->file_offset + ordered->len - 1,
- &cached_state, GFP_NOFS);
-out:
- btrfs_delalloc_release_metadata(inode, ordered->len);
- btrfs_end_transaction(trans, root);
- ordered_offset = ordered->file_offset + ordered->len;
- btrfs_put_ordered_extent(ordered);
- btrfs_put_ordered_extent(ordered);
-
-out_test:
- /*
- * our bio might span multiple ordered extents. If we haven't
- * completed the accounting for the whole dio, go back and try again
- */
- if (ordered_offset < dip->logical_offset + dip->bytes) {
- ordered_bytes = dip->logical_offset + dip->bytes -
- ordered_offset;
- goto again;
- }
-out_done:
- bio->bi_private = dip->private;
-
- kfree(dip->csums);
- kfree(dip);
-
- /* If we had an error make sure to clear the uptodate flag */
- if (err)
- clear_bit(BIO_UPTODATE, &bio->bi_flags);
- dio_end_io(bio, err);
-}
-
-static int __btrfs_submit_bio_start_direct_io(struct inode *inode, int rw,
- struct bio *bio, int mirror_num,
- unsigned long bio_flags, u64 offset)
-{
- int ret;
- struct btrfs_root *root = BTRFS_I(inode)->root;
- ret = btrfs_csum_one_bio(root, inode, bio, offset, 1);
- BUG_ON(ret); /* -ENOMEM */
- return 0;
-}
-
-static void btrfs_end_dio_bio(struct bio *bio, int err)
-{
- struct btrfs_dio_private *dip = bio->bi_private;
-
- if (err) {
- printk(KERN_ERR "btrfs direct IO failed ino %llu rw %lu "
- "sector %#Lx len %u err no %d\n",
- (unsigned long long)btrfs_ino(dip->inode), bio->bi_rw,
- (unsigned long long)bio->bi_sector, bio->bi_size, err);
- dip->errors = 1;
-
- /*
- * before atomic variable goto zero, we must make sure
- * dip->errors is perceived to be set.
- */
- smp_mb__before_atomic_dec();
- }
-
- /* if there are more bios still pending for this dio, just exit */
- if (!atomic_dec_and_test(&dip->pending_bios))
- goto out;
-
- if (dip->errors)
- bio_io_error(dip->orig_bio);
- else {
- set_bit(BIO_UPTODATE, &dip->orig_bio->bi_flags);
- bio_endio(dip->orig_bio, 0);
- }
-out:
- bio_put(bio);
-}
-
-static struct bio *btrfs_dio_bio_alloc(struct block_device *bdev,
- u64 first_sector, gfp_t gfp_flags)
-{
- int nr_vecs = bio_get_nr_vecs(bdev);
- return btrfs_bio_alloc(bdev, first_sector, nr_vecs, gfp_flags);
-}
-
-static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode,
- int rw, u64 file_offset, int skip_sum,
- u32 *csums, int async_submit)
-{
- int write = rw & REQ_WRITE;
- struct btrfs_root *root = BTRFS_I(inode)->root;
- int ret;
-
- bio_get(bio);
- ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0);
- if (ret)
- goto err;
-
- if (skip_sum)
- goto map;
-
- if (write && async_submit) {
- ret = btrfs_wq_submit_bio(root->fs_info,
- inode, rw, bio, 0, 0,
- file_offset,
- __btrfs_submit_bio_start_direct_io,
- __btrfs_submit_bio_done);
- goto err;
- } else if (write) {
- /*
- * If we aren't doing async submit, calculate the csum of the
- * bio now.
- */
- ret = btrfs_csum_one_bio(root, inode, bio, file_offset, 1);
- if (ret)
- goto err;
- } else if (!skip_sum) {
- ret = btrfs_lookup_bio_sums_dio(root, inode, bio,
- file_offset, csums);
- if (ret)
- goto err;
- }
-
-map:
- ret = btrfs_map_bio(root, rw, bio, 0, async_submit);
-err:
- bio_put(bio);
- return ret;
-}
-
-static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
- int skip_sum)
-{
- struct inode *inode = dip->inode;
- struct btrfs_root *root = BTRFS_I(inode)->root;
- struct btrfs_mapping_tree *map_tree = &root->fs_info->mapping_tree;
- struct bio *bio;
- struct bio *orig_bio = dip->orig_bio;
- struct bio_vec *bvec = orig_bio->bi_io_vec;
- u64 start_sector = orig_bio->bi_sector;
- u64 file_offset = dip->logical_offset;
- u64 submit_len = 0;
- u64 map_length;
- int nr_pages = 0;
- u32 *csums = dip->csums;
- int ret = 0;
- int async_submit = 0;
- int write = rw & REQ_WRITE;
-
- map_length = orig_bio->bi_size;
- ret = btrfs_map_block(map_tree, READ, start_sector << 9,
- &map_length, NULL, 0);
- if (ret) {
- bio_put(orig_bio);
- return -EIO;
- }
-
- if (map_length >= orig_bio->bi_size) {
- bio = orig_bio;
- goto submit;
- }
-
- async_submit = 1;
- bio = btrfs_dio_bio_alloc(orig_bio->bi_bdev, start_sector, GFP_NOFS);
- if (!bio)
- return -ENOMEM;
- bio->bi_private = dip;
- bio->bi_end_io = btrfs_end_dio_bio;
- atomic_inc(&dip->pending_bios);
-
- while (bvec <= (orig_bio->bi_io_vec + orig_bio->bi_vcnt - 1)) {
- if (unlikely(map_length < submit_len + bvec->bv_len ||
- bio_add_page(bio, bvec->bv_page, bvec->bv_len,
- bvec->bv_offset) < bvec->bv_len)) {
- /*
- * inc the count before we submit the bio so
- * we know the end IO handler won't happen before
- * we inc the count. Otherwise, the dip might get freed
- * before we're done setting it up
- */
- atomic_inc(&dip->pending_bios);
- ret = __btrfs_submit_dio_bio(bio, inode, rw,
- file_offset, skip_sum,
- csums, async_submit);
- if (ret) {
- bio_put(bio);
- atomic_dec(&dip->pending_bios);
- goto out_err;
- }
-
- /* Write's use the ordered csums */
- if (!write && !skip_sum)
- csums = csums + nr_pages;
- start_sector += submit_len >> 9;
- file_offset += submit_len;
-
- submit_len = 0;
- nr_pages = 0;
-
- bio = btrfs_dio_bio_alloc(orig_bio->bi_bdev,
- start_sector, GFP_NOFS);
- if (!bio)
- goto out_err;
- bio->bi_private = dip;
- bio->bi_end_io = btrfs_end_dio_bio;
-
- map_length = orig_bio->bi_size;
- ret = btrfs_map_block(map_tree, READ, start_sector << 9,
- &map_length, NULL, 0);
- if (ret) {
- bio_put(bio);
- goto out_err;
- }
- } else {
- submit_len += bvec->bv_len;
- nr_pages ++;
- bvec++;
- }
- }
-
-submit:
- ret = __btrfs_submit_dio_bio(bio, inode, rw, file_offset, skip_sum,
- csums, async_submit);
- if (!ret)
- return 0;
-
- bio_put(bio);
-out_err:
- dip->errors = 1;
- /*
- * before atomic variable goto zero, we must
- * make sure dip->errors is perceived to be set.
- */
- smp_mb__before_atomic_dec();
- if (atomic_dec_and_test(&dip->pending_bios))
- bio_io_error(dip->orig_bio);
-
- /* bio_end_io() will handle error, so we needn't return it */
- return 0;
-}
-
-static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode,
- loff_t file_offset)
-{
- struct btrfs_root *root = BTRFS_I(inode)->root;
- struct btrfs_dio_private *dip;
- struct bio_vec *bvec = bio->bi_io_vec;
- int skip_sum;
- int write = rw & REQ_WRITE;
- int ret = 0;
-
- skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
-
- dip = kmalloc(sizeof(*dip), GFP_NOFS);
- if (!dip) {
- ret = -ENOMEM;
- goto free_ordered;
- }
- dip->csums = NULL;
-
- /* Write's use the ordered csum stuff, so we don't need dip->csums */
- if (!write && !skip_sum) {
- dip->csums = kmalloc(sizeof(u32) * bio->bi_vcnt, GFP_NOFS);
- if (!dip->csums) {
- kfree(dip);
- ret = -ENOMEM;
- goto free_ordered;
- }
- }
-
- dip->private = bio->bi_private;
- dip->inode = inode;
- dip->logical_offset = file_offset;
-
- dip->bytes = 0;
- do {
- dip->bytes += bvec->bv_len;
- bvec++;
- } while (bvec <= (bio->bi_io_vec + bio->bi_vcnt - 1));
-
- dip->disk_bytenr = (u64)bio->bi_sector << 9;
- bio->bi_private = dip;
- dip->errors = 0;
- dip->orig_bio = bio;
- atomic_set(&dip->pending_bios, 0);
-
- if (write)
- bio->bi_end_io = btrfs_endio_direct_write;
- else
- bio->bi_end_io = btrfs_endio_direct_read;
-
- ret = btrfs_submit_direct_hook(rw, dip, skip_sum);
- if (!ret)
- return;
-free_ordered:
- /*
- * If this is a write, we need to clean up the reserved space and kill
- * the ordered extent.
- */
- if (write) {
- struct btrfs_ordered_extent *ordered;
- ordered = btrfs_lookup_ordered_extent(inode, file_offset);
- if (!test_bit(BTRFS_ORDERED_PREALLOC, &ordered->flags) &&
- !test_bit(BTRFS_ORDERED_NOCOW, &ordered->flags))
- btrfs_free_reserved_extent(root, ordered->start,
- ordered->disk_len);
- btrfs_put_ordered_extent(ordered);
- btrfs_put_ordered_extent(ordered);
- }
- bio_endio(bio, ret);
-}
-
-static ssize_t check_direct_IO(struct btrfs_root *root, int rw, struct kiocb *iocb,
- const struct iovec *iov, loff_t offset,
- unsigned long nr_segs)
-{
- int seg;
- int i;
- size_t size;
- unsigned long addr;
- unsigned blocksize_mask = root->sectorsize - 1;
- ssize_t retval = -EINVAL;
- loff_t end = offset;
-
- if (offset & blocksize_mask)
- goto out;
-
- /* Check the memory alignment. Blocks cannot straddle pages */
- for (seg = 0; seg < nr_segs; seg++) {
- addr = (unsigned long)iov[seg].iov_base;
- size = iov[seg].iov_len;
- end += size;
- if ((addr & blocksize_mask) || (size & blocksize_mask))
- goto out;
-
- /* If this is a write we don't need to check anymore */
- if (rw & WRITE)
- continue;
-
- /*
- * Check to make sure we don't have duplicate iov_base's in this
- * iovec, if so return EINVAL, otherwise we'll get csum errors
- * when reading back.
- */
- for (i = seg + 1; i < nr_segs; i++) {
- if (iov[seg].iov_base == iov[i].iov_base)
- goto out;
- }
- }
- retval = 0;
-out:
- return retval;
-}
-static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
- const struct iovec *iov, loff_t offset,
- unsigned long nr_segs)
-{
- struct file *file = iocb->ki_filp;
- struct inode *inode = file->f_mapping->host;
- struct btrfs_ordered_extent *ordered;
- struct extent_state *cached_state = NULL;
- u64 lockstart, lockend;
- ssize_t ret;
- int writing = rw & WRITE;
- int write_bits = 0;
- size_t count = iov_length(iov, nr_segs);
-
- if (check_direct_IO(BTRFS_I(inode)->root, rw, iocb, iov,
- offset, nr_segs)) {
- return 0;
- }
-
- lockstart = offset;
- lockend = offset + count - 1;
-
- if (writing) {
- ret = btrfs_delalloc_reserve_space(inode, count);
- if (ret)
- goto out;
- }
-
- while (1) {
- lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend,
- 0, &cached_state);
- /*
- * We're concerned with the entire range that we're going to be
- * doing DIO to, so we need to make sure theres no ordered
- * extents in this range.
- */
- ordered = btrfs_lookup_ordered_range(inode, lockstart,
- lockend - lockstart + 1);
- if (!ordered)
- break;
- unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
- &cached_state, GFP_NOFS);
- btrfs_start_ordered_extent(inode, ordered, 1);
- btrfs_put_ordered_extent(ordered);
- cond_resched();
- }
-
- /*
- * we don't use btrfs_set_extent_delalloc because we don't want
- * the dirty or uptodate bits
- */
- if (writing) {
- write_bits = EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING;
- ret = set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend,
- EXTENT_DELALLOC, NULL, &cached_state,
- GFP_NOFS);
- if (ret) {
- clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
- lockend, EXTENT_LOCKED | write_bits,
- 1, 0, &cached_state, GFP_NOFS);
- goto out;
- }
- }
-
- free_extent_state(cached_state);
- cached_state = NULL;
-
- ret = __blockdev_direct_IO(rw, iocb, inode,
- BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev,
- iov, offset, nr_segs, btrfs_get_blocks_direct, NULL,
- btrfs_submit_direct, 0);
-
- if (ret < 0 && ret != -EIOCBQUEUED) {
- clear_extent_bit(&BTRFS_I(inode)->io_tree, offset,
- offset + iov_length(iov, nr_segs) - 1,
- EXTENT_LOCKED | write_bits, 1, 0,
- &cached_state, GFP_NOFS);
- } else if (ret >= 0 && ret < iov_length(iov, nr_segs)) {
- /*
- * We're falling back to buffered, unlock the section we didn't
- * do IO on.
- */
- clear_extent_bit(&BTRFS_I(inode)->io_tree, offset + ret,
- offset + iov_length(iov, nr_segs) - 1,
- EXTENT_LOCKED | write_bits, 1, 0,
- &cached_state, GFP_NOFS);
- }
-out:
- free_extent_state(cached_state);
- return ret;
-}
-
-static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
- __u64 start, __u64 len)
-{
- return extent_fiemap(inode, fieinfo, start, len, btrfs_get_extent_fiemap);
-}
-
-int btrfs_readpage(struct file *file, struct page *page)
-{
- struct extent_io_tree *tree;
- tree = &BTRFS_I(page->mapping->host)->io_tree;
- return extent_read_full_page(tree, page, btrfs_get_extent, 0);
-}
-
-static int btrfs_writepage(struct page *page, struct writeback_control *wbc)
-{
- struct extent_io_tree *tree;
-
-
- if (current->flags & PF_MEMALLOC) {
- redirty_page_for_writepage(wbc, page);
- unlock_page(page);
- return 0;
- }
- tree = &BTRFS_I(page->mapping->host)->io_tree;
- return extent_write_full_page(tree, page, btrfs_get_extent, wbc);
-}
-
-int btrfs_writepages(struct address_space *mapping,
- struct writeback_control *wbc)
-{
- struct extent_io_tree *tree;
-
- tree = &BTRFS_I(mapping->host)->io_tree;
- return extent_writepages(tree, mapping, btrfs_get_extent, wbc);
-}
-
-static int
-btrfs_readpages(struct file *file, struct address_space *mapping,
- struct list_head *pages, unsigned nr_pages)
-{
- struct extent_io_tree *tree;
- tree = &BTRFS_I(mapping->host)->io_tree;
- return extent_readpages(tree, mapping, pages, nr_pages,
- btrfs_get_extent);
-}
-static int __btrfs_releasepage(struct page *page, gfp_t gfp_flags)
-{
- struct extent_io_tree *tree;
- struct extent_map_tree *map;
- int ret;
-
- tree = &BTRFS_I(page->mapping->host)->io_tree;
- map = &BTRFS_I(page->mapping->host)->extent_tree;
- ret = try_release_extent_mapping(map, tree, page, gfp_flags);
- if (ret == 1) {
- ClearPagePrivate(page);
- set_page_private(page, 0);
- page_cache_release(page);
- }
- return ret;
-}
-
-static int btrfs_releasepage(struct page *page, gfp_t gfp_flags)
-{
- if (PageWriteback(page) || PageDirty(page))
- return 0;
- return __btrfs_releasepage(page, gfp_flags & GFP_NOFS);
-}
-
-static void btrfs_invalidatepage(struct page *page, unsigned long offset)
-{
- struct extent_io_tree *tree;
- struct btrfs_ordered_extent *ordered;
- struct extent_state *cached_state = NULL;
- u64 page_start = page_offset(page);
- u64 page_end = page_start + PAGE_CACHE_SIZE - 1;
-
-
- /*
- * we have the page locked, so new writeback can't start,
- * and the dirty bit won't be cleared while we are here.
- *
- * Wait for IO on this page so that we can safely clear
- * the PagePrivate2 bit and do ordered accounting
- */
- wait_on_page_writeback(page);
-
- tree = &BTRFS_I(page->mapping->host)->io_tree;
- if (offset) {
- btrfs_releasepage(page, GFP_NOFS);
- return;
- }
- lock_extent_bits(tree, page_start, page_end, 0, &cached_state);
- ordered = btrfs_lookup_ordered_extent(page->mapping->host,
- page_offset(page));
- if (ordered) {
- /*
- * IO on this page will never be started, so we need
- * to account for any ordered extents now
- */
- clear_extent_bit(tree, page_start, page_end,
- EXTENT_DIRTY | EXTENT_DELALLOC |
- EXTENT_LOCKED | EXTENT_DO_ACCOUNTING, 1, 0,
- &cached_state, GFP_NOFS);
- /*
- * whoever cleared the private bit is responsible
- * for the finish_ordered_io
- */
- if (TestClearPagePrivate2(page)) {
- btrfs_finish_ordered_io(page->mapping->host,
- page_start, page_end);
- }
- btrfs_put_ordered_extent(ordered);
- cached_state = NULL;
- lock_extent_bits(tree, page_start, page_end, 0, &cached_state);
- }
- clear_extent_bit(tree, page_start, page_end,
- EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC |
- EXTENT_DO_ACCOUNTING, 1, 1, &cached_state, GFP_NOFS);
- __btrfs_releasepage(page, GFP_NOFS);
-
- ClearPageChecked(page);
- if (PagePrivate(page)) {
- ClearPagePrivate(page);
- set_page_private(page, 0);
- page_cache_release(page);
- }
-}
-
-/*
- * btrfs_page_mkwrite() is not allowed to change the file size as it gets
- * called from a page fault handler when a page is first dirtied. Hence we must
- * be careful to check for EOF conditions here. We set the page up correctly
- * for a written page which means we get ENOSPC checking when writing into
- * holes and correct delalloc and unwritten extent mapping on filesystems that
- * support these features.
- *
- * We are not allowed to take the i_mutex here so we have to play games to
- * protect against truncate races as the page could now be beyond EOF. Because
- * vmtruncate() writes the inode size before removing pages, once we have the
- * page lock we can determine safely if the page is beyond EOF. If it is not
- * beyond EOF, then the page is guaranteed safe against truncation until we
- * unlock the page.
- */
-int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
-{
- struct page *page = vmf->page;
- struct inode *inode = fdentry(vma->vm_file)->d_inode;
- struct btrfs_root *root = BTRFS_I(inode)->root;
- struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
- struct btrfs_ordered_extent *ordered;
- struct extent_state *cached_state = NULL;
- char *kaddr;
- unsigned long zero_start;
- loff_t size;
- int ret;
- int reserved = 0;
- u64 page_start;
- u64 page_end;
-
- ret = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE);
- if (!ret) {
- ret = btrfs_update_time(vma->vm_file);
- reserved = 1;
- }
- if (ret) {
- if (ret == -ENOMEM)
- ret = VM_FAULT_OOM;
- else /* -ENOSPC, -EIO, etc */
- ret = VM_FAULT_SIGBUS;
- if (reserved)
- goto out;
- goto out_noreserve;
- }
-
- ret = VM_FAULT_NOPAGE; /* make the VM retry the fault */
-again:
- lock_page(page);
- size = i_size_read(inode);
- page_start = page_offset(page);
- page_end = page_start + PAGE_CACHE_SIZE - 1;
-
- if ((page->mapping != inode->i_mapping) ||
- (page_start >= size)) {
- /* page got truncated out from underneath us */
- goto out_unlock;
- }
- wait_on_page_writeback(page);
-
- lock_extent_bits(io_tree, page_start, page_end, 0, &cached_state);
- set_page_extent_mapped(page);
-
- /*
- * we can't set the delalloc bits if there are pending ordered
- * extents. Drop our locks and wait for them to finish
- */
- ordered = btrfs_lookup_ordered_extent(inode, page_start);
- if (ordered) {
- unlock_extent_cached(io_tree, page_start, page_end,
- &cached_state, GFP_NOFS);
- unlock_page(page);
- btrfs_start_ordered_extent(inode, ordered, 1);
- btrfs_put_ordered_extent(ordered);
- goto again;
- }
-
- /*
- * XXX - page_mkwrite gets called every time the page is dirtied, even
- * if it was already dirty, so for space accounting reasons we need to
- * clear any delalloc bits for the range we are fixing to save. There
- * is probably a better way to do this, but for now keep consistent with
- * prepare_pages in the normal write path.
- */
- clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start, page_end,
- EXTENT_DIRTY | EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING,
- 0, 0, &cached_state, GFP_NOFS);
-
- ret = btrfs_set_extent_delalloc(inode, page_start, page_end,
- &cached_state);
- if (ret) {
- unlock_extent_cached(io_tree, page_start, page_end,
- &cached_state, GFP_NOFS);
- ret = VM_FAULT_SIGBUS;
- goto out_unlock;
- }
- ret = 0;
-
- /* page is wholly or partially inside EOF */
- if (page_start + PAGE_CACHE_SIZE > size)
- zero_start = size & ~PAGE_CACHE_MASK;
- else
- zero_start = PAGE_CACHE_SIZE;
-
- if (zero_start != PAGE_CACHE_SIZE) {
- kaddr = kmap(page);
- memset(kaddr + zero_start, 0, PAGE_CACHE_SIZE - zero_start);
- flush_dcache_page(page);
- kunmap(page);
- }
- ClearPageChecked(page);
- set_page_dirty(page);
- SetPageUptodate(page);
-
- BTRFS_I(inode)->last_trans = root->fs_info->generation;
- BTRFS_I(inode)->last_sub_trans = BTRFS_I(inode)->root->log_transid;
-
- unlock_extent_cached(io_tree, page_start, page_end, &cached_state, GFP_NOFS);
-
-out_unlock:
- if (!ret)
- return VM_FAULT_LOCKED;
- unlock_page(page);
-out:
- btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE);
-out_noreserve:
- return ret;
-}
-
-static int btrfs_truncate(struct inode *inode)
-{
- struct btrfs_root *root = BTRFS_I(inode)->root;
- struct btrfs_block_rsv *rsv;
- int ret;
- int err = 0;
- struct btrfs_trans_handle *trans;
- unsigned long nr;
- u64 mask = root->sectorsize - 1;
- u64 min_size = btrfs_calc_trunc_metadata_size(root, 1);
-
- ret = btrfs_truncate_page(inode->i_mapping, inode->i_size);
- if (ret)
- return ret;
-
- btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1);
- btrfs_ordered_update_i_size(inode, inode->i_size, NULL);
-
- /*
- * Yes ladies and gentelment, this is indeed ugly. The fact is we have
- * 3 things going on here
- *
- * 1) We need to reserve space for our orphan item and the space to
- * delete our orphan item. Lord knows we don't want to have a dangling
- * orphan item because we didn't reserve space to remove it.
- *
- * 2) We need to reserve space to update our inode.
- *
- * 3) We need to have something to cache all the space that is going to
- * be free'd up by the truncate operation, but also have some slack
- * space reserved in case it uses space during the truncate (thank you
- * very much snapshotting).
- *
- * And we need these to all be seperate. The fact is we can use alot of
- * space doing the truncate, and we have no earthly idea how much space
- * we will use, so we need the truncate reservation to be seperate so it
- * doesn't end up using space reserved for updating the inode or
- * removing the orphan item. We also need to be able to stop the
- * transaction and start a new one, which means we need to be able to
- * update the inode several times, and we have no idea of knowing how
- * many times that will be, so we can't just reserve 1 item for the
- * entirety of the opration, so that has to be done seperately as well.
- * Then there is the orphan item, which does indeed need to be held on
- * to for the whole operation, and we need nobody to touch this reserved
- * space except the orphan code.
- *
- * So that leaves us with
- *
- * 1) root->orphan_block_rsv - for the orphan deletion.
- * 2) rsv - for the truncate reservation, which we will steal from the
- * transaction reservation.
- * 3) fs_info->trans_block_rsv - this will have 1 items worth left for
- * updating the inode.
- */
- rsv = btrfs_alloc_block_rsv(root);
- if (!rsv)
- return -ENOMEM;
- rsv->size = min_size;
-
- /*
- * 1 for the truncate slack space
- * 1 for the orphan item we're going to add
- * 1 for the orphan item deletion
- * 1 for updating the inode.
- */
- trans = btrfs_start_transaction(root, 4);
- if (IS_ERR(trans)) {
- err = PTR_ERR(trans);
- goto out;
- }
-
- /* Migrate the slack space for the truncate to our reserve */
- ret = btrfs_block_rsv_migrate(&root->fs_info->trans_block_rsv, rsv,
- min_size);
- BUG_ON(ret);
-
- ret = btrfs_orphan_add(trans, inode);
- if (ret) {
- btrfs_end_transaction(trans, root);
- goto out;
- }
-
- /*
- * setattr is responsible for setting the ordered_data_close flag,
- * but that is only tested during the last file release. That
- * could happen well after the next commit, leaving a great big
- * window where new writes may get lost if someone chooses to write
- * to this file after truncating to zero
- *
- * The inode doesn't have any dirty data here, and so if we commit
- * this is a noop. If someone immediately starts writing to the inode
- * it is very likely we'll catch some of their writes in this
- * transaction, and the commit will find this file on the ordered
- * data list with good things to send down.
- *
- * This is a best effort solution, there is still a window where
- * using truncate to replace the contents of the file will
- * end up with a zero length file after a crash.
- */
- if (inode->i_size == 0 && BTRFS_I(inode)->ordered_data_close)
- btrfs_add_ordered_operation(trans, root, inode);
-
- while (1) {
- ret = btrfs_block_rsv_refill(root, rsv, min_size);
- if (ret) {
- /*
- * This can only happen with the original transaction we
- * started above, every other time we shouldn't have a
- * transaction started yet.
- */
- if (ret == -EAGAIN)
- goto end_trans;
- err = ret;
- break;
- }
-
- if (!trans) {
- /* Just need the 1 for updating the inode */
- trans = btrfs_start_transaction(root, 1);
- if (IS_ERR(trans)) {
- ret = err = PTR_ERR(trans);
- trans = NULL;
- break;
- }
- }
-
- trans->block_rsv = rsv;
-
- ret = btrfs_truncate_inode_items(trans, root, inode,
- inode->i_size,
- BTRFS_EXTENT_DATA_KEY);
- if (ret != -EAGAIN) {
- err = ret;
- break;
- }
-
- trans->block_rsv = &root->fs_info->trans_block_rsv;
- ret = btrfs_update_inode(trans, root, inode);
- if (ret) {
- err = ret;
- break;
- }
-end_trans:
- nr = trans->blocks_used;
- btrfs_end_transaction(trans, root);
- trans = NULL;
- btrfs_btree_balance_dirty(root, nr);
- }
-
- if (ret == 0 && inode->i_nlink > 0) {
- trans->block_rsv = root->orphan_block_rsv;
- ret = btrfs_orphan_del(trans, inode);
- if (ret)
- err = ret;
- } else if (ret && inode->i_nlink > 0) {
- /*
- * Failed to do the truncate, remove us from the in memory
- * orphan list.
- */
- ret = btrfs_orphan_del(NULL, inode);
- }
-
- if (trans) {
- trans->block_rsv = &root->fs_info->trans_block_rsv;
- ret = btrfs_update_inode(trans, root, inode);
- if (ret && !err)
- err = ret;
-
- nr = trans->blocks_used;
- ret = btrfs_end_transaction(trans, root);
- btrfs_btree_balance_dirty(root, nr);
- }
-
-out:
- btrfs_free_block_rsv(root, rsv);
-
- if (ret && !err)
- err = ret;
-
- return err;
-}
-
-/*
- * create a new subvolume directory/inode (helper for the ioctl).
- */
-int btrfs_create_subvol_root(struct btrfs_trans_handle *trans,
- struct btrfs_root *new_root, u64 new_dirid)
-{
- struct inode *inode;
- int err;
- u64 index = 0;
-
- inode = btrfs_new_inode(trans, new_root, NULL, "..", 2,
- new_dirid, new_dirid,
- S_IFDIR | (~current_umask() & S_IRWXUGO),
- &index);
- if (IS_ERR(inode))
- return PTR_ERR(inode);
- inode->i_op = &btrfs_dir_inode_operations;
- inode->i_fop = &btrfs_dir_file_operations;
-
- set_nlink(inode, 1);
- btrfs_i_size_write(inode, 0);
-
- err = btrfs_update_inode(trans, new_root, inode);
-
- iput(inode);
- return err;
-}
-
-struct inode *btrfs_alloc_inode(struct super_block *sb)
-{
- struct btrfs_inode *ei;
- struct inode *inode;
-
- ei = kmem_cache_alloc(btrfs_inode_cachep, GFP_NOFS);
- if (!ei)
- return NULL;
-
- ei->root = NULL;
- ei->space_info = NULL;
- ei->generation = 0;
- ei->sequence = 0;
- ei->last_trans = 0;
- ei->last_sub_trans = 0;
- ei->logged_trans = 0;
- ei->delalloc_bytes = 0;
- ei->disk_i_size = 0;
- ei->flags = 0;
- ei->csum_bytes = 0;
- ei->index_cnt = (u64)-1;
- ei->last_unlink_trans = 0;
-
- spin_lock_init(&ei->lock);
- ei->outstanding_extents = 0;
- ei->reserved_extents = 0;
-
- ei->ordered_data_close = 0;
- ei->orphan_meta_reserved = 0;
- ei->dummy_inode = 0;
- ei->in_defrag = 0;
- ei->delalloc_meta_reserved = 0;
- ei->force_compress = BTRFS_COMPRESS_NONE;
-
- ei->delayed_node = NULL;
-
- inode = &ei->vfs_inode;
- extent_map_tree_init(&ei->extent_tree);
- extent_io_tree_init(&ei->io_tree, &inode->i_data);
- extent_io_tree_init(&ei->io_failure_tree, &inode->i_data);
- ei->io_tree.track_uptodate = 1;
- ei->io_failure_tree.track_uptodate = 1;
- mutex_init(&ei->log_mutex);
- mutex_init(&ei->delalloc_mutex);
- btrfs_ordered_inode_tree_init(&ei->ordered_tree);
- INIT_LIST_HEAD(&ei->i_orphan);
- INIT_LIST_HEAD(&ei->delalloc_inodes);
- INIT_LIST_HEAD(&ei->ordered_operations);
- RB_CLEAR_NODE(&ei->rb_node);
-
- return inode;
-}
-
-static void btrfs_i_callback(struct rcu_head *head)
-{
- struct inode *inode = container_of(head, struct inode, i_rcu);
- kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode));
-}
-
-void btrfs_destroy_inode(struct inode *inode)
-{
- struct btrfs_ordered_extent *ordered;
- struct btrfs_root *root = BTRFS_I(inode)->root;
-
- WARN_ON(!list_empty(&inode->i_dentry));
- WARN_ON(inode->i_data.nrpages);
- WARN_ON(BTRFS_I(inode)->outstanding_extents);
- WARN_ON(BTRFS_I(inode)->reserved_extents);
- WARN_ON(BTRFS_I(inode)->delalloc_bytes);
- WARN_ON(BTRFS_I(inode)->csum_bytes);
-
- /*
- * This can happen where we create an inode, but somebody else also
- * created the same inode and we need to destroy the one we already
- * created.
- */
- if (!root)
- goto free;
-
- /*
- * Make sure we're properly removed from the ordered operation
- * lists.
- */
- smp_mb();
- if (!list_empty(&BTRFS_I(inode)->ordered_operations)) {
- spin_lock(&root->fs_info->ordered_extent_lock);
- list_del_init(&BTRFS_I(inode)->ordered_operations);
- spin_unlock(&root->fs_info->ordered_extent_lock);
- }
-
- spin_lock(&root->orphan_lock);
- if (!list_empty(&BTRFS_I(inode)->i_orphan)) {
- printk(KERN_INFO "BTRFS: inode %llu still on the orphan list\n",
- (unsigned long long)btrfs_ino(inode));
- list_del_init(&BTRFS_I(inode)->i_orphan);
- }
- spin_unlock(&root->orphan_lock);
-
- while (1) {
- ordered = btrfs_lookup_first_ordered_extent(inode, (u64)-1);
- if (!ordered)
- break;
- else {
- printk(KERN_ERR "btrfs found ordered "
- "extent %llu %llu on inode cleanup\n",
- (unsigned long long)ordered->file_offset,
- (unsigned long long)ordered->len);
- btrfs_remove_ordered_extent(inode, ordered);
- btrfs_put_ordered_extent(ordered);
- btrfs_put_ordered_extent(ordered);
- }
- }
- inode_tree_del(inode);
- btrfs_drop_extent_cache(inode, 0, (u64)-1, 0);
-free:
- btrfs_remove_delayed_node(inode);
- call_rcu(&inode->i_rcu, btrfs_i_callback);
-}
-
-int btrfs_drop_inode(struct inode *inode)
-{
- struct btrfs_root *root = BTRFS_I(inode)->root;
-
- if (btrfs_root_refs(&root->root_item) == 0 &&
- !btrfs_is_free_space_inode(root, inode))
- return 1;
- else
- return generic_drop_inode(inode);
-}
-
-static void init_once(void *foo)
-{
- struct btrfs_inode *ei = (struct btrfs_inode *) foo;
-
- inode_init_once(&ei->vfs_inode);
-}
-
-void btrfs_destroy_cachep(void)
-{
- if (btrfs_inode_cachep)
- kmem_cache_destroy(btrfs_inode_cachep);
- if (btrfs_trans_handle_cachep)
- kmem_cache_destroy(btrfs_trans_handle_cachep);
- if (btrfs_transaction_cachep)
- kmem_cache_destroy(btrfs_transaction_cachep);
- if (btrfs_path_cachep)
- kmem_cache_destroy(btrfs_path_cachep);
- if (btrfs_free_space_cachep)
- kmem_cache_destroy(btrfs_free_space_cachep);
-}
-
-int btrfs_init_cachep(void)
-{
- btrfs_inode_cachep = kmem_cache_create("btrfs_inode_cache",
- sizeof(struct btrfs_inode), 0,
- SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, init_once);
- if (!btrfs_inode_cachep)
- goto fail;
-
- btrfs_trans_handle_cachep = kmem_cache_create("btrfs_trans_handle_cache",
- sizeof(struct btrfs_trans_handle), 0,
- SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
- if (!btrfs_trans_handle_cachep)
- goto fail;
-
- btrfs_transaction_cachep = kmem_cache_create("btrfs_transaction_cache",
- sizeof(struct btrfs_transaction), 0,
- SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
- if (!btrfs_transaction_cachep)
- goto fail;
-
- btrfs_path_cachep = kmem_cache_create("btrfs_path_cache",
- sizeof(struct btrfs_path), 0,
- SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
- if (!btrfs_path_cachep)
- goto fail;
-
- btrfs_free_space_cachep = kmem_cache_create("btrfs_free_space_cache",
- sizeof(struct btrfs_free_space), 0,
- SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
- if (!btrfs_free_space_cachep)
- goto fail;
-
- return 0;
-fail:
- btrfs_destroy_cachep();
- return -ENOMEM;
-}
-
-static int btrfs_getattr(struct vfsmount *mnt,
- struct dentry *dentry, struct kstat *stat)
-{
- struct inode *inode = dentry->d_inode;
- u32 blocksize = inode->i_sb->s_blocksize;
-
- generic_fillattr(inode, stat);
- stat->dev = BTRFS_I(inode)->root->anon_dev;
- stat->blksize = PAGE_CACHE_SIZE;
- stat->blocks = (ALIGN(inode_get_bytes(inode), blocksize) +
- ALIGN(BTRFS_I(inode)->delalloc_bytes, blocksize)) >> 9;
- return 0;
-}
-
-/*
- * If a file is moved, it will inherit the cow and compression flags of the new
- * directory.
- */
-static void fixup_inode_flags(struct inode *dir, struct inode *inode)
-{
- struct btrfs_inode *b_dir = BTRFS_I(dir);
- struct btrfs_inode *b_inode = BTRFS_I(inode);
-
- if (b_dir->flags & BTRFS_INODE_NODATACOW)
- b_inode->flags |= BTRFS_INODE_NODATACOW;
- else
- b_inode->flags &= ~BTRFS_INODE_NODATACOW;
-
- if (b_dir->flags & BTRFS_INODE_COMPRESS)
- b_inode->flags |= BTRFS_INODE_COMPRESS;
- else
- b_inode->flags &= ~BTRFS_INODE_COMPRESS;
-}
-
-static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
- struct inode *new_dir, struct dentry *new_dentry)
-{
- struct btrfs_trans_handle *trans;
- struct btrfs_root *root = BTRFS_I(old_dir)->root;
- struct btrfs_root *dest = BTRFS_I(new_dir)->root;
- struct inode *new_inode = new_dentry->d_inode;
- struct inode *old_inode = old_dentry->d_inode;
- struct timespec ctime = CURRENT_TIME;
- u64 index = 0;
- u64 root_objectid;
- int ret;
- u64 old_ino = btrfs_ino(old_inode);
-
- if (btrfs_ino(new_dir) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)
- return -EPERM;
-
- /* we only allow rename subvolume link between subvolumes */
- if (old_ino != BTRFS_FIRST_FREE_OBJECTID && root != dest)
- return -EXDEV;
-
- if (old_ino == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID ||
- (new_inode && btrfs_ino(new_inode) == BTRFS_FIRST_FREE_OBJECTID))
- return -ENOTEMPTY;
-
- if (S_ISDIR(old_inode->i_mode) && new_inode &&
- new_inode->i_size > BTRFS_EMPTY_DIR_SIZE)
- return -ENOTEMPTY;
- /*
- * we're using rename to replace one file with another.
- * and the replacement file is large. Start IO on it now so
- * we don't add too much work to the end of the transaction
- */
- if (new_inode && S_ISREG(old_inode->i_mode) && new_inode->i_size &&
- old_inode->i_size > BTRFS_ORDERED_OPERATIONS_FLUSH_LIMIT)
- filemap_flush(old_inode->i_mapping);
-
- /* close the racy window with snapshot create/destroy ioctl */
- if (old_ino == BTRFS_FIRST_FREE_OBJECTID)
- down_read(&root->fs_info->subvol_sem);
- /*
- * We want to reserve the absolute worst case amount of items. So if
- * both inodes are subvols and we need to unlink them then that would
- * require 4 item modifications, but if they are both normal inodes it
- * would require 5 item modifications, so we'll assume their normal
- * inodes. So 5 * 2 is 10, plus 1 for the new link, so 11 total items
- * should cover the worst case number of items we'll modify.
- */
- trans = btrfs_start_transaction(root, 20);
- if (IS_ERR(trans)) {
- ret = PTR_ERR(trans);
- goto out_notrans;
- }
-
- if (dest != root)
- btrfs_record_root_in_trans(trans, dest);
-
- ret = btrfs_set_inode_index(new_dir, &index);
- if (ret)
- goto out_fail;
-
- if (unlikely(old_ino == BTRFS_FIRST_FREE_OBJECTID)) {
- /* force full log commit if subvolume involved. */
- root->fs_info->last_trans_log_full_commit = trans->transid;
- } else {
- ret = btrfs_insert_inode_ref(trans, dest,
- new_dentry->d_name.name,
- new_dentry->d_name.len,
- old_ino,
- btrfs_ino(new_dir), index);
- if (ret)
- goto out_fail;
- /*
- * this is an ugly little race, but the rename is required
- * to make sure that if we crash, the inode is either at the
- * old name or the new one. pinning the log transaction lets
- * us make sure we don't allow a log commit to come in after
- * we unlink the name but before we add the new name back in.
- */
- btrfs_pin_log_trans(root);
- }
- /*
- * make sure the inode gets flushed if it is replacing
- * something.
- */
- if (new_inode && new_inode->i_size && S_ISREG(old_inode->i_mode))
- btrfs_add_ordered_operation(trans, root, old_inode);
-
- old_dir->i_ctime = old_dir->i_mtime = ctime;
- new_dir->i_ctime = new_dir->i_mtime = ctime;
- old_inode->i_ctime = ctime;
-
- if (old_dentry->d_parent != new_dentry->d_parent)
- btrfs_record_unlink_dir(trans, old_dir, old_inode, 1);
-
- if (unlikely(old_ino == BTRFS_FIRST_FREE_OBJECTID)) {
- root_objectid = BTRFS_I(old_inode)->root->root_key.objectid;
- ret = btrfs_unlink_subvol(trans, root, old_dir, root_objectid,
- old_dentry->d_name.name,
- old_dentry->d_name.len);
- } else {
- ret = __btrfs_unlink_inode(trans, root, old_dir,
- old_dentry->d_inode,
- old_dentry->d_name.name,
- old_dentry->d_name.len);
- if (!ret)
- ret = btrfs_update_inode(trans, root, old_inode);
- }
- if (ret) {
- btrfs_abort_transaction(trans, root, ret);
- goto out_fail;
- }
-
- if (new_inode) {
- new_inode->i_ctime = CURRENT_TIME;
- if (unlikely(btrfs_ino(new_inode) ==
- BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) {
- root_objectid = BTRFS_I(new_inode)->location.objectid;
- ret = btrfs_unlink_subvol(trans, dest, new_dir,
- root_objectid,
- new_dentry->d_name.name,
- new_dentry->d_name.len);
- BUG_ON(new_inode->i_nlink == 0);
- } else {
- ret = btrfs_unlink_inode(trans, dest, new_dir,
- new_dentry->d_inode,
- new_dentry->d_name.name,
- new_dentry->d_name.len);
- }
- if (!ret && new_inode->i_nlink == 0) {
- ret = btrfs_orphan_add(trans, new_dentry->d_inode);
- BUG_ON(ret);
- }
- if (ret) {
- btrfs_abort_transaction(trans, root, ret);
- goto out_fail;
- }
- }
-
- fixup_inode_flags(new_dir, old_inode);
-
- ret = btrfs_add_link(trans, new_dir, old_inode,
- new_dentry->d_name.name,
- new_dentry->d_name.len, 0, index);
- if (ret) {
- btrfs_abort_transaction(trans, root, ret);
- goto out_fail;
- }
-
- if (old_ino != BTRFS_FIRST_FREE_OBJECTID) {
- struct dentry *parent = new_dentry->d_parent;
- btrfs_log_new_name(trans, old_inode, old_dir, parent);
- btrfs_end_log_trans(root);
- }
-out_fail:
- btrfs_end_transaction(trans, root);
-out_notrans:
- if (old_ino == BTRFS_FIRST_FREE_OBJECTID)
- up_read(&root->fs_info->subvol_sem);
-
- return ret;
-}
-
-/*
- * some fairly slow code that needs optimization. This walks the list
- * of all the inodes with pending delalloc and forces them to disk.
- */
-int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput)
-{
- struct list_head *head = &root->fs_info->delalloc_inodes;
- struct btrfs_inode *binode;
- struct inode *inode;
-
- if (root->fs_info->sb->s_flags & MS_RDONLY)
- return -EROFS;
-
- spin_lock(&root->fs_info->delalloc_lock);
- while (!list_empty(head)) {
- binode = list_entry(head->next, struct btrfs_inode,
- delalloc_inodes);
- inode = igrab(&binode->vfs_inode);
- if (!inode)
- list_del_init(&binode->delalloc_inodes);
- spin_unlock(&root->fs_info->delalloc_lock);
- if (inode) {
- filemap_flush(inode->i_mapping);
- if (delay_iput)
- btrfs_add_delayed_iput(inode);
- else
- iput(inode);
- }
- cond_resched();
- spin_lock(&root->fs_info->delalloc_lock);
- }
- spin_unlock(&root->fs_info->delalloc_lock);
-
- /* the filemap_flush will queue IO into the worker threads, but
- * we have to make sure the IO is actually started and that
- * ordered extents get created before we return
- */
- atomic_inc(&root->fs_info->async_submit_draining);
- while (atomic_read(&root->fs_info->nr_async_submits) ||
- atomic_read(&root->fs_info->async_delalloc_pages)) {
- wait_event(root->fs_info->async_submit_wait,
- (atomic_read(&root->fs_info->nr_async_submits) == 0 &&
- atomic_read(&root->fs_info->async_delalloc_pages) == 0));
- }
- atomic_dec(&root->fs_info->async_submit_draining);
- return 0;
-}
-
-static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
- const char *symname)
-{
- struct btrfs_trans_handle *trans;
- struct btrfs_root *root = BTRFS_I(dir)->root;
- struct btrfs_path *path;
- struct btrfs_key key;
- struct inode *inode = NULL;
- int err;
- int drop_inode = 0;
- u64 objectid;
- u64 index = 0 ;
- int name_len;
- int datasize;
- unsigned long ptr;
- struct btrfs_file_extent_item *ei;
- struct extent_buffer *leaf;
- unsigned long nr = 0;
-
- name_len = strlen(symname) + 1;
- if (name_len > BTRFS_MAX_INLINE_DATA_SIZE(root))
- return -ENAMETOOLONG;
-
- /*
- * 2 items for inode item and ref
- * 2 items for dir items
- * 1 item for xattr if selinux is on
- */
- trans = btrfs_start_transaction(root, 5);
- if (IS_ERR(trans))
- return PTR_ERR(trans);
-
- err = btrfs_find_free_ino(root, &objectid);
- if (err)
- goto out_unlock;
-
- inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
- dentry->d_name.len, btrfs_ino(dir), objectid,
- S_IFLNK|S_IRWXUGO, &index);
- if (IS_ERR(inode)) {
- err = PTR_ERR(inode);
- goto out_unlock;
- }
-
- err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name);
- if (err) {
- drop_inode = 1;
- goto out_unlock;
- }
-
- /*
- * If the active LSM wants to access the inode during
- * d_instantiate it needs these. Smack checks to see
- * if the filesystem supports xattrs by looking at the
- * ops vector.
- */
- inode->i_fop = &btrfs_file_operations;
- inode->i_op = &btrfs_file_inode_operations;
-
- err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index);
- if (err)
- drop_inode = 1;
- else {
- inode->i_mapping->a_ops = &btrfs_aops;
- inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
- BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
- }
- if (drop_inode)
- goto out_unlock;
-
- path = btrfs_alloc_path();
- if (!path) {
- err = -ENOMEM;
- drop_inode = 1;
- goto out_unlock;
- }
- key.objectid = btrfs_ino(inode);
- key.offset = 0;
- btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY);
- datasize = btrfs_file_extent_calc_inline_size(name_len);
- err = btrfs_insert_empty_item(trans, root, path, &key,
- datasize);
- if (err) {
- drop_inode = 1;
- btrfs_free_path(path);
- goto out_unlock;
- }
- leaf = path->nodes[0];
- ei = btrfs_item_ptr(leaf, path->slots[0],
- struct btrfs_file_extent_item);
- btrfs_set_file_extent_generation(leaf, ei, trans->transid);
- btrfs_set_file_extent_type(leaf, ei,
- BTRFS_FILE_EXTENT_INLINE);
- btrfs_set_file_extent_encryption(leaf, ei, 0);
- btrfs_set_file_extent_compression(leaf, ei, 0);
- btrfs_set_file_extent_other_encoding(leaf, ei, 0);
- btrfs_set_file_extent_ram_bytes(leaf, ei, name_len);
-
- ptr = btrfs_file_extent_inline_start(ei);
- write_extent_buffer(leaf, symname, ptr, name_len);
- btrfs_mark_buffer_dirty(leaf);
- btrfs_free_path(path);
-
- inode->i_op = &btrfs_symlink_inode_operations;
- inode->i_mapping->a_ops = &btrfs_symlink_aops;
- inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
- inode_set_bytes(inode, name_len);
- btrfs_i_size_write(inode, name_len - 1);
- err = btrfs_update_inode(trans, root, inode);
- if (err)
- drop_inode = 1;
-
-out_unlock:
- if (!err)
- d_instantiate(dentry, inode);
- nr = trans->blocks_used;
- btrfs_end_transaction(trans, root);
- if (drop_inode) {
- inode_dec_link_count(inode);
- iput(inode);
- }
- btrfs_btree_balance_dirty(root, nr);
- return err;
-}
-
-static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
- u64 start, u64 num_bytes, u64 min_size,
- loff_t actual_len, u64 *alloc_hint,
- struct btrfs_trans_handle *trans)
-{
- struct btrfs_root *root = BTRFS_I(inode)->root;
- struct btrfs_key ins;
- u64 cur_offset = start;
- u64 i_size;
- int ret = 0;
- bool own_trans = true;
-
- if (trans)
- own_trans = false;
- while (num_bytes > 0) {
- if (own_trans) {
- trans = btrfs_start_transaction(root, 3);
- if (IS_ERR(trans)) {
- ret = PTR_ERR(trans);
- break;
- }
- }
-
- ret = btrfs_reserve_extent(trans, root, num_bytes, min_size,
- 0, *alloc_hint, &ins, 1);
- if (ret) {
- if (own_trans)
- btrfs_end_transaction(trans, root);
- break;
- }
-
- ret = insert_reserved_file_extent(trans, inode,
- cur_offset, ins.objectid,
- ins.offset, ins.offset,
- ins.offset, 0, 0, 0,
- BTRFS_FILE_EXTENT_PREALLOC);
- if (ret) {
- btrfs_abort_transaction(trans, root, ret);
- if (own_trans)
- btrfs_end_transaction(trans, root);
- break;
- }
- btrfs_drop_extent_cache(inode, cur_offset,
- cur_offset + ins.offset -1, 0);
-
- num_bytes -= ins.offset;
- cur_offset += ins.offset;
- *alloc_hint = ins.objectid + ins.offset;
-
- inode->i_ctime = CURRENT_TIME;
- BTRFS_I(inode)->flags |= BTRFS_INODE_PREALLOC;
- if (!(mode & FALLOC_FL_KEEP_SIZE) &&
- (actual_len > inode->i_size) &&
- (cur_offset > inode->i_size)) {
- if (cur_offset > actual_len)
- i_size = actual_len;
- else
- i_size = cur_offset;
- i_size_write(inode, i_size);
- btrfs_ordered_update_i_size(inode, i_size, NULL);
- }
-
- ret = btrfs_update_inode(trans, root, inode);
-
- if (ret) {
- btrfs_abort_transaction(trans, root, ret);
- if (own_trans)
- btrfs_end_transaction(trans, root);
- break;
- }
-
- if (own_trans)
- btrfs_end_transaction(trans, root);
- }
- return ret;
-}
-
-int btrfs_prealloc_file_range(struct inode *inode, int mode,
- u64 start, u64 num_bytes, u64 min_size,
- loff_t actual_len, u64 *alloc_hint)
-{
- return __btrfs_prealloc_file_range(inode, mode, start, num_bytes,
- min_size, actual_len, alloc_hint,
- NULL);
-}
-
-int btrfs_prealloc_file_range_trans(struct inode *inode,
- struct btrfs_trans_handle *trans, int mode,
- u64 start, u64 num_bytes, u64 min_size,
- loff_t actual_len, u64 *alloc_hint)
-{
- return __btrfs_prealloc_file_range(inode, mode, start, num_bytes,
- min_size, actual_len, alloc_hint, trans);
-}
-
-static int btrfs_set_page_dirty(struct page *page)
-{
- return __set_page_dirty_nobuffers(page);
-}
-
-static int btrfs_permission(struct inode *inode, int mask)
-{
- struct btrfs_root *root = BTRFS_I(inode)->root;
- umode_t mode = inode->i_mode;
-
- if (mask & MAY_WRITE &&
- (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode))) {
- if (btrfs_root_readonly(root))
- return -EROFS;
- if (BTRFS_I(inode)->flags & BTRFS_INODE_READONLY)
- return -EACCES;
- }
- return generic_permission(inode, mask);
-}
-
-static const struct inode_operations btrfs_dir_inode_operations = {
- .getattr = btrfs_getattr,
- .lookup = btrfs_lookup,
- .create = btrfs_create,
- .unlink = btrfs_unlink,
- .link = btrfs_link,
- .mkdir = btrfs_mkdir,
- .rmdir = btrfs_rmdir,
- .rename = btrfs_rename,
- .symlink = btrfs_symlink,
- .setattr = btrfs_setattr,
- .mknod = btrfs_mknod,
- .setxattr = btrfs_setxattr,
- .getxattr = btrfs_getxattr,
- .listxattr = btrfs_listxattr,
- .removexattr = btrfs_removexattr,
- .permission = btrfs_permission,
- .get_acl = btrfs_get_acl,
-};
-static const struct inode_operations btrfs_dir_ro_inode_operations = {
- .lookup = btrfs_lookup,
- .permission = btrfs_permission,
- .get_acl = btrfs_get_acl,
-};
-
-static const struct file_operations btrfs_dir_file_operations = {
- .llseek = generic_file_llseek,
- .read = generic_read_dir,
- .readdir = btrfs_real_readdir,
- .unlocked_ioctl = btrfs_ioctl,
-#ifdef CONFIG_COMPAT
- .compat_ioctl = btrfs_ioctl,
-#endif
- .release = btrfs_release_file,
- .fsync = btrfs_sync_file,
-};
-
-static struct extent_io_ops btrfs_extent_io_ops = {
- .fill_delalloc = run_delalloc_range,
- .submit_bio_hook = btrfs_submit_bio_hook,
- .merge_bio_hook = btrfs_merge_bio_hook,
- .readpage_end_io_hook = btrfs_readpage_end_io_hook,
- .writepage_end_io_hook = btrfs_writepage_end_io_hook,
- .writepage_start_hook = btrfs_writepage_start_hook,
- .set_bit_hook = btrfs_set_bit_hook,
- .clear_bit_hook = btrfs_clear_bit_hook,
- .merge_extent_hook = btrfs_merge_extent_hook,
- .split_extent_hook = btrfs_split_extent_hook,
-};
-
-/*
- * btrfs doesn't support the bmap operation because swapfiles
- * use bmap to make a mapping of extents in the file. They assume
- * these extents won't change over the life of the file and they
- * use the bmap result to do IO directly to the drive.
- *
- * the btrfs bmap call would return logical addresses that aren't
- * suitable for IO and they also will change frequently as COW
- * operations happen. So, swapfile + btrfs == corruption.
- *
- * For now we're avoiding this by dropping bmap.
- */
-static const struct address_space_operations btrfs_aops = {
- .readpage = btrfs_readpage,
- .writepage = btrfs_writepage,
- .writepages = btrfs_writepages,
- .readpages = btrfs_readpages,
- .direct_IO = btrfs_direct_IO,
- .invalidatepage = btrfs_invalidatepage,
- .releasepage = btrfs_releasepage,
- .set_page_dirty = btrfs_set_page_dirty,
- .error_remove_page = generic_error_remove_page,
-};
-
-static const struct address_space_operations btrfs_symlink_aops = {
- .readpage = btrfs_readpage,
- .writepage = btrfs_writepage,
- .invalidatepage = btrfs_invalidatepage,
- .releasepage = btrfs_releasepage,
-};
-
-static const struct inode_operations btrfs_file_inode_operations = {
- .getattr = btrfs_getattr,
- .setattr = btrfs_setattr,
- .setxattr = btrfs_setxattr,
- .getxattr = btrfs_getxattr,
- .listxattr = btrfs_listxattr,
- .removexattr = btrfs_removexattr,
- .permission = btrfs_permission,
- .fiemap = btrfs_fiemap,
- .get_acl = btrfs_get_acl,
-};
-static const struct inode_operations btrfs_special_inode_operations = {
- .getattr = btrfs_getattr,
- .setattr = btrfs_setattr,
- .permission = btrfs_permission,
- .setxattr = btrfs_setxattr,
- .getxattr = btrfs_getxattr,
- .listxattr = btrfs_listxattr,
- .removexattr = btrfs_removexattr,
- .get_acl = btrfs_get_acl,
-};
-static const struct inode_operations btrfs_symlink_inode_operations = {
- .readlink = generic_readlink,
- .follow_link = page_follow_link_light,
- .put_link = page_put_link,
- .getattr = btrfs_getattr,
- .setattr = btrfs_setattr,
- .permission = btrfs_permission,
- .setxattr = btrfs_setxattr,
- .getxattr = btrfs_getxattr,
- .listxattr = btrfs_listxattr,
- .removexattr = btrfs_removexattr,
- .get_acl = btrfs_get_acl,
-};
-
-const struct dentry_operations btrfs_dentry_operations = {
- .d_delete = btrfs_dentry_delete,
- .d_release = btrfs_dentry_release,
-};
diff --git a/ANDROID_3.4.5/fs/btrfs/ioctl.c b/ANDROID_3.4.5/fs/btrfs/ioctl.c
deleted file mode 100644
index 14f8e1fa..00000000
--- a/ANDROID_3.4.5/fs/btrfs/ioctl.c
+++ /dev/null
@@ -1,3430 +0,0 @@
-/*
- * Copyright (C) 2007 Oracle. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License v2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
- */
-
-#include <linux/kernel.h>
-#include <linux/bio.h>
-#include <linux/buffer_head.h>
-#include <linux/file.h>
-#include <linux/fs.h>
-#include <linux/fsnotify.h>
-#include <linux/pagemap.h>
-#include <linux/highmem.h>
-#include <linux/time.h>
-#include <linux/init.h>
-#include <linux/string.h>
-#include <linux/backing-dev.h>
-#include <linux/mount.h>
-#include <linux/mpage.h>
-#include <linux/namei.h>
-#include <linux/swap.h>
-#include <linux/writeback.h>
-#include <linux/statfs.h>
-#include <linux/compat.h>
-#include <linux/bit_spinlock.h>
-#include <linux/security.h>
-#include <linux/xattr.h>
-#include <linux/vmalloc.h>
-#include <linux/slab.h>
-#include <linux/blkdev.h>
-#include "compat.h"
-#include "ctree.h"
-#include "disk-io.h"
-#include "transaction.h"
-#include "btrfs_inode.h"
-#include "ioctl.h"
-#include "print-tree.h"
-#include "volumes.h"
-#include "locking.h"
-#include "inode-map.h"
-#include "backref.h"
-
-/* Mask out flags that are inappropriate for the given type of inode. */
-static inline __u32 btrfs_mask_flags(umode_t mode, __u32 flags)
-{
- if (S_ISDIR(mode))
- return flags;
- else if (S_ISREG(mode))
- return flags & ~FS_DIRSYNC_FL;
- else
- return flags & (FS_NODUMP_FL | FS_NOATIME_FL);
-}
-
-/*
- * Export inode flags to the format expected by the FS_IOC_GETFLAGS ioctl.
- */
-static unsigned int btrfs_flags_to_ioctl(unsigned int flags)
-{
- unsigned int iflags = 0;
-
- if (flags & BTRFS_INODE_SYNC)
- iflags |= FS_SYNC_FL;
- if (flags & BTRFS_INODE_IMMUTABLE)
- iflags |= FS_IMMUTABLE_FL;
- if (flags & BTRFS_INODE_APPEND)
- iflags |= FS_APPEND_FL;
- if (flags & BTRFS_INODE_NODUMP)
- iflags |= FS_NODUMP_FL;
- if (flags & BTRFS_INODE_NOATIME)
- iflags |= FS_NOATIME_FL;
- if (flags & BTRFS_INODE_DIRSYNC)
- iflags |= FS_DIRSYNC_FL;
- if (flags & BTRFS_INODE_NODATACOW)
- iflags |= FS_NOCOW_FL;
-
- if ((flags & BTRFS_INODE_COMPRESS) && !(flags & BTRFS_INODE_NOCOMPRESS))
- iflags |= FS_COMPR_FL;
- else if (flags & BTRFS_INODE_NOCOMPRESS)
- iflags |= FS_NOCOMP_FL;
-
- return iflags;
-}
-
-/*
- * Update inode->i_flags based on the btrfs internal flags.
- */
-void btrfs_update_iflags(struct inode *inode)
-{
- struct btrfs_inode *ip = BTRFS_I(inode);
-
- inode->i_flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC);
-
- if (ip->flags & BTRFS_INODE_SYNC)
- inode->i_flags |= S_SYNC;
- if (ip->flags & BTRFS_INODE_IMMUTABLE)
- inode->i_flags |= S_IMMUTABLE;
- if (ip->flags & BTRFS_INODE_APPEND)
- inode->i_flags |= S_APPEND;
- if (ip->flags & BTRFS_INODE_NOATIME)
- inode->i_flags |= S_NOATIME;
- if (ip->flags & BTRFS_INODE_DIRSYNC)
- inode->i_flags |= S_DIRSYNC;
-}
-
-/*
- * Inherit flags from the parent inode.
- *
- * Currently only the compression flags and the cow flags are inherited.
- */
-void btrfs_inherit_iflags(struct inode *inode, struct inode *dir)
-{
- unsigned int flags;
-
- if (!dir)
- return;
-
- flags = BTRFS_I(dir)->flags;
-
- if (flags & BTRFS_INODE_NOCOMPRESS) {
- BTRFS_I(inode)->flags &= ~BTRFS_INODE_COMPRESS;
- BTRFS_I(inode)->flags |= BTRFS_INODE_NOCOMPRESS;
- } else if (flags & BTRFS_INODE_COMPRESS) {
- BTRFS_I(inode)->flags &= ~BTRFS_INODE_NOCOMPRESS;
- BTRFS_I(inode)->flags |= BTRFS_INODE_COMPRESS;
- }
-
- if (flags & BTRFS_INODE_NODATACOW)
- BTRFS_I(inode)->flags |= BTRFS_INODE_NODATACOW;
-
- btrfs_update_iflags(inode);
-}
-
-static int btrfs_ioctl_getflags(struct file *file, void __user *arg)
-{
- struct btrfs_inode *ip = BTRFS_I(file->f_path.dentry->d_inode);
- unsigned int flags = btrfs_flags_to_ioctl(ip->flags);
-
- if (copy_to_user(arg, &flags, sizeof(flags)))
- return -EFAULT;
- return 0;
-}
-
-static int check_flags(unsigned int flags)
-{
- if (flags & ~(FS_IMMUTABLE_FL | FS_APPEND_FL | \
- FS_NOATIME_FL | FS_NODUMP_FL | \
- FS_SYNC_FL | FS_DIRSYNC_FL | \
- FS_NOCOMP_FL | FS_COMPR_FL |
- FS_NOCOW_FL))
- return -EOPNOTSUPP;
-
- if ((flags & FS_NOCOMP_FL) && (flags & FS_COMPR_FL))
- return -EINVAL;
-
- return 0;
-}
-
-static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
-{
- struct inode *inode = file->f_path.dentry->d_inode;
- struct btrfs_inode *ip = BTRFS_I(inode);
- struct btrfs_root *root = ip->root;
- struct btrfs_trans_handle *trans;
- unsigned int flags, oldflags;
- int ret;
- u64 ip_oldflags;
- unsigned int i_oldflags;
-
- if (btrfs_root_readonly(root))
- return -EROFS;
-
- if (copy_from_user(&flags, arg, sizeof(flags)))
- return -EFAULT;
-
- ret = check_flags(flags);
- if (ret)
- return ret;
-
- if (!inode_owner_or_capable(inode))
- return -EACCES;
-
- mutex_lock(&inode->i_mutex);
-
- ip_oldflags = ip->flags;
- i_oldflags = inode->i_flags;
-
- flags = btrfs_mask_flags(inode->i_mode, flags);
- oldflags = btrfs_flags_to_ioctl(ip->flags);
- if ((flags ^ oldflags) & (FS_APPEND_FL | FS_IMMUTABLE_FL)) {
- if (!capable(CAP_LINUX_IMMUTABLE)) {
- ret = -EPERM;
- goto out_unlock;
- }
- }
-
- ret = mnt_want_write_file(file);
- if (ret)
- goto out_unlock;
-
- if (flags & FS_SYNC_FL)
- ip->flags |= BTRFS_INODE_SYNC;
- else
- ip->flags &= ~BTRFS_INODE_SYNC;
- if (flags & FS_IMMUTABLE_FL)
- ip->flags |= BTRFS_INODE_IMMUTABLE;
- else
- ip->flags &= ~BTRFS_INODE_IMMUTABLE;
- if (flags & FS_APPEND_FL)
- ip->flags |= BTRFS_INODE_APPEND;
- else
- ip->flags &= ~BTRFS_INODE_APPEND;
- if (flags & FS_NODUMP_FL)
- ip->flags |= BTRFS_INODE_NODUMP;
- else
- ip->flags &= ~BTRFS_INODE_NODUMP;
- if (flags & FS_NOATIME_FL)
- ip->flags |= BTRFS_INODE_NOATIME;
- else
- ip->flags &= ~BTRFS_INODE_NOATIME;
- if (flags & FS_DIRSYNC_FL)
- ip->flags |= BTRFS_INODE_DIRSYNC;
- else
- ip->flags &= ~BTRFS_INODE_DIRSYNC;
- if (flags & FS_NOCOW_FL)
- ip->flags |= BTRFS_INODE_NODATACOW;
- else
- ip->flags &= ~BTRFS_INODE_NODATACOW;
-
- /*
- * The COMPRESS flag can only be changed by users, while the NOCOMPRESS
- * flag may be changed automatically if compression code won't make
- * things smaller.
- */
- if (flags & FS_NOCOMP_FL) {
- ip->flags &= ~BTRFS_INODE_COMPRESS;
- ip->flags |= BTRFS_INODE_NOCOMPRESS;
- } else if (flags & FS_COMPR_FL) {
- ip->flags |= BTRFS_INODE_COMPRESS;
- ip->flags &= ~BTRFS_INODE_NOCOMPRESS;
- } else {
- ip->flags &= ~(BTRFS_INODE_COMPRESS | BTRFS_INODE_NOCOMPRESS);
- }
-
- trans = btrfs_start_transaction(root, 1);
- if (IS_ERR(trans)) {
- ret = PTR_ERR(trans);
- goto out_drop;
- }
-
- btrfs_update_iflags(inode);
- inode->i_ctime = CURRENT_TIME;
- ret = btrfs_update_inode(trans, root, inode);
-
- btrfs_end_transaction(trans, root);
- out_drop:
- if (ret) {
- ip->flags = ip_oldflags;
- inode->i_flags = i_oldflags;
- }
-
- mnt_drop_write_file(file);
- out_unlock:
- mutex_unlock(&inode->i_mutex);
- return ret;
-}
-
-static int btrfs_ioctl_getversion(struct file *file, int __user *arg)
-{
- struct inode *inode = file->f_path.dentry->d_inode;
-
- return put_user(inode->i_generation, arg);
-}
-
-static noinline int btrfs_ioctl_fitrim(struct file *file, void __user *arg)
-{
- struct btrfs_fs_info *fs_info = btrfs_sb(fdentry(file)->d_sb);
- struct btrfs_device *device;
- struct request_queue *q;
- struct fstrim_range range;
- u64 minlen = ULLONG_MAX;
- u64 num_devices = 0;
- u64 total_bytes = btrfs_super_total_bytes(fs_info->super_copy);
- int ret;
-
- if (!capable(CAP_SYS_ADMIN))
- return -EPERM;
-
- rcu_read_lock();
- list_for_each_entry_rcu(device, &fs_info->fs_devices->devices,
- dev_list) {
- if (!device->bdev)
- continue;
- q = bdev_get_queue(device->bdev);
- if (blk_queue_discard(q)) {
- num_devices++;
- minlen = min((u64)q->limits.discard_granularity,
- minlen);
- }
- }
- rcu_read_unlock();
-
- if (!num_devices)
- return -EOPNOTSUPP;
- if (copy_from_user(&range, arg, sizeof(range)))
- return -EFAULT;
- if (range.start > total_bytes)
- return -EINVAL;
-
- range.len = min(range.len, total_bytes - range.start);
- range.minlen = max(range.minlen, minlen);
- ret = btrfs_trim_fs(fs_info->tree_root, &range);
- if (ret < 0)
- return ret;
-
- if (copy_to_user(arg, &range, sizeof(range)))
- return -EFAULT;
-
- return 0;
-}
-
-static noinline int create_subvol(struct btrfs_root *root,
- struct dentry *dentry,
- char *name, int namelen,
- u64 *async_transid)
-{
- struct btrfs_trans_handle *trans;
- struct btrfs_key key;
- struct btrfs_root_item root_item;
- struct btrfs_inode_item *inode_item;
- struct extent_buffer *leaf;
- struct btrfs_root *new_root;
- struct dentry *parent = dentry->d_parent;
- struct inode *dir;
- int ret;
- int err;
- u64 objectid;
- u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID;
- u64 index = 0;
-
- ret = btrfs_find_free_objectid(root->fs_info->tree_root, &objectid);
- if (ret)
- return ret;
-
- dir = parent->d_inode;
-
- /*
- * 1 - inode item
- * 2 - refs
- * 1 - root item
- * 2 - dir items
- */
- trans = btrfs_start_transaction(root, 6);
- if (IS_ERR(trans))
- return PTR_ERR(trans);
-
- leaf = btrfs_alloc_free_block(trans, root, root->leafsize,
- 0, objectid, NULL, 0, 0, 0, 0);
- if (IS_ERR(leaf)) {
- ret = PTR_ERR(leaf);
- goto fail;
- }
-
- memset_extent_buffer(leaf, 0, 0, sizeof(struct btrfs_header));
- btrfs_set_header_bytenr(leaf, leaf->start);
- btrfs_set_header_generation(leaf, trans->transid);
- btrfs_set_header_backref_rev(leaf, BTRFS_MIXED_BACKREF_REV);
- btrfs_set_header_owner(leaf, objectid);
-
- write_extent_buffer(leaf, root->fs_info->fsid,
- (unsigned long)btrfs_header_fsid(leaf),
- BTRFS_FSID_SIZE);
- write_extent_buffer(leaf, root->fs_info->chunk_tree_uuid,
- (unsigned long)btrfs_header_chunk_tree_uuid(leaf),
- BTRFS_UUID_SIZE);
- btrfs_mark_buffer_dirty(leaf);
-
- inode_item = &root_item.inode;
- memset(inode_item, 0, sizeof(*inode_item));
- inode_item->generation = cpu_to_le64(1);
- inode_item->size = cpu_to_le64(3);
- inode_item->nlink = cpu_to_le32(1);
- inode_item->nbytes = cpu_to_le64(root->leafsize);
- inode_item->mode = cpu_to_le32(S_IFDIR | 0755);
-
- root_item.flags = 0;
- root_item.byte_limit = 0;
- inode_item->flags = cpu_to_le64(BTRFS_INODE_ROOT_ITEM_INIT);
-
- btrfs_set_root_bytenr(&root_item, leaf->start);
- btrfs_set_root_generation(&root_item, trans->transid);
- btrfs_set_root_level(&root_item, 0);
- btrfs_set_root_refs(&root_item, 1);
- btrfs_set_root_used(&root_item, leaf->len);
- btrfs_set_root_last_snapshot(&root_item, 0);
-
- memset(&root_item.drop_progress, 0, sizeof(root_item.drop_progress));
- root_item.drop_level = 0;
-
- btrfs_tree_unlock(leaf);
- free_extent_buffer(leaf);
- leaf = NULL;
-
- btrfs_set_root_dirid(&root_item, new_dirid);
-
- key.objectid = objectid;
- key.offset = 0;
- btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
- ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key,
- &root_item);
- if (ret)
- goto fail;
-
- key.offset = (u64)-1;
- new_root = btrfs_read_fs_root_no_name(root->fs_info, &key);
- if (IS_ERR(new_root)) {
- btrfs_abort_transaction(trans, root, PTR_ERR(new_root));
- ret = PTR_ERR(new_root);
- goto fail;
- }
-
- btrfs_record_root_in_trans(trans, new_root);
-
- ret = btrfs_create_subvol_root(trans, new_root, new_dirid);
- if (ret) {
- /* We potentially lose an unused inode item here */
- btrfs_abort_transaction(trans, root, ret);
- goto fail;
- }
-
- /*
- * insert the directory item
- */
- ret = btrfs_set_inode_index(dir, &index);
- if (ret) {
- btrfs_abort_transaction(trans, root, ret);
- goto fail;
- }
-
- ret = btrfs_insert_dir_item(trans, root,
- name, namelen, dir, &key,
- BTRFS_FT_DIR, index);
- if (ret) {
- btrfs_abort_transaction(trans, root, ret);
- goto fail;
- }
-
- btrfs_i_size_write(dir, dir->i_size + namelen * 2);
- ret = btrfs_update_inode(trans, root, dir);
- BUG_ON(ret);
-
- ret = btrfs_add_root_ref(trans, root->fs_info->tree_root,
- objectid, root->root_key.objectid,
- btrfs_ino(dir), index, name, namelen);
-
- BUG_ON(ret);
-
- d_instantiate(dentry, btrfs_lookup_dentry(dir, dentry));
-fail:
- if (async_transid) {
- *async_transid = trans->transid;
- err = btrfs_commit_transaction_async(trans, root, 1);
- } else {
- err = btrfs_commit_transaction(trans, root);
- }
- if (err && !ret)
- ret = err;
- return ret;
-}
-
-static int create_snapshot(struct btrfs_root *root, struct dentry *dentry,
- char *name, int namelen, u64 *async_transid,
- bool readonly)
-{
- struct inode *inode;
- struct btrfs_pending_snapshot *pending_snapshot;
- struct btrfs_trans_handle *trans;
- int ret;
-
- if (!root->ref_cows)
- return -EINVAL;
-
- pending_snapshot = kzalloc(sizeof(*pending_snapshot), GFP_NOFS);
- if (!pending_snapshot)
- return -ENOMEM;
-
- btrfs_init_block_rsv(&pending_snapshot->block_rsv);
- pending_snapshot->dentry = dentry;
- pending_snapshot->root = root;
- pending_snapshot->readonly = readonly;
-
- trans = btrfs_start_transaction(root->fs_info->extent_root, 5);
- if (IS_ERR(trans)) {
- ret = PTR_ERR(trans);
- goto fail;
- }
-
- ret = btrfs_snap_reserve_metadata(trans, pending_snapshot);
- BUG_ON(ret);
-
- spin_lock(&root->fs_info->trans_lock);
- list_add(&pending_snapshot->list,
- &trans->transaction->pending_snapshots);
- spin_unlock(&root->fs_info->trans_lock);
- if (async_transid) {
- *async_transid = trans->transid;
- ret = btrfs_commit_transaction_async(trans,
- root->fs_info->extent_root, 1);
- } else {
- ret = btrfs_commit_transaction(trans,
- root->fs_info->extent_root);
- }
- BUG_ON(ret);
-
- ret = pending_snapshot->error;
- if (ret)
- goto fail;
-
- ret = btrfs_orphan_cleanup(pending_snapshot->snap);
- if (ret)
- goto fail;
-
- inode = btrfs_lookup_dentry(dentry->d_parent->d_inode, dentry);
- if (IS_ERR(inode)) {
- ret = PTR_ERR(inode);
- goto fail;
- }
- BUG_ON(!inode);
- d_instantiate(dentry, inode);
- ret = 0;
-fail:
- kfree(pending_snapshot);
- return ret;
-}
-
-/* copy of check_sticky in fs/namei.c()
-* It's inline, so penalty for filesystems that don't use sticky bit is
-* minimal.
-*/
-static inline int btrfs_check_sticky(struct inode *dir, struct inode *inode)
-{
- uid_t fsuid = current_fsuid();
-
- if (!(dir->i_mode & S_ISVTX))
- return 0;
- if (inode->i_uid == fsuid)
- return 0;
- if (dir->i_uid == fsuid)
- return 0;
- return !capable(CAP_FOWNER);
-}
-
-/* copy of may_delete in fs/namei.c()
- * Check whether we can remove a link victim from directory dir, check
- * whether the type of victim is right.
- * 1. We can't do it if dir is read-only (done in permission())
- * 2. We should have write and exec permissions on dir
- * 3. We can't remove anything from append-only dir
- * 4. We can't do anything with immutable dir (done in permission())
- * 5. If the sticky bit on dir is set we should either
- * a. be owner of dir, or
- * b. be owner of victim, or
- * c. have CAP_FOWNER capability
- * 6. If the victim is append-only or immutable we can't do antyhing with
- * links pointing to it.
- * 7. If we were asked to remove a directory and victim isn't one - ENOTDIR.
- * 8. If we were asked to remove a non-directory and victim isn't one - EISDIR.
- * 9. We can't remove a root or mountpoint.
- * 10. We don't allow removal of NFS sillyrenamed files; it's handled by
- * nfs_async_unlink().
- */
-
-static int btrfs_may_delete(struct inode *dir,struct dentry *victim,int isdir)
-{
- int error;
-
- if (!victim->d_inode)
- return -ENOENT;
-
- BUG_ON(victim->d_parent->d_inode != dir);
- audit_inode_child(victim, dir);
-
- error = inode_permission(dir, MAY_WRITE | MAY_EXEC);
- if (error)
- return error;
- if (IS_APPEND(dir))
- return -EPERM;
- if (btrfs_check_sticky(dir, victim->d_inode)||
- IS_APPEND(victim->d_inode)||
- IS_IMMUTABLE(victim->d_inode) || IS_SWAPFILE(victim->d_inode))
- return -EPERM;
- if (isdir) {
- if (!S_ISDIR(victim->d_inode->i_mode))
- return -ENOTDIR;
- if (IS_ROOT(victim))
- return -EBUSY;
- } else if (S_ISDIR(victim->d_inode->i_mode))
- return -EISDIR;
- if (IS_DEADDIR(dir))
- return -ENOENT;
- if (victim->d_flags & DCACHE_NFSFS_RENAMED)
- return -EBUSY;
- return 0;
-}
-
-/* copy of may_create in fs/namei.c() */
-static inline int btrfs_may_create(struct inode *dir, struct dentry *child)
-{
- if (child->d_inode)
- return -EEXIST;
- if (IS_DEADDIR(dir))
- return -ENOENT;
- return inode_permission(dir, MAY_WRITE | MAY_EXEC);
-}
-
-/*
- * Create a new subvolume below @parent. This is largely modeled after
- * sys_mkdirat and vfs_mkdir, but we only do a single component lookup
- * inside this filesystem so it's quite a bit simpler.
- */
-static noinline int btrfs_mksubvol(struct path *parent,
- char *name, int namelen,
- struct btrfs_root *snap_src,
- u64 *async_transid, bool readonly)
-{
- struct inode *dir = parent->dentry->d_inode;
- struct dentry *dentry;
- int error;
-
- mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT);
-
- dentry = lookup_one_len(name, parent->dentry, namelen);
- error = PTR_ERR(dentry);
- if (IS_ERR(dentry))
- goto out_unlock;
-
- error = -EEXIST;
- if (dentry->d_inode)
- goto out_dput;
-
- error = mnt_want_write(parent->mnt);
- if (error)
- goto out_dput;
-
- error = btrfs_may_create(dir, dentry);
- if (error)
- goto out_drop_write;
-
- down_read(&BTRFS_I(dir)->root->fs_info->subvol_sem);
-
- if (btrfs_root_refs(&BTRFS_I(dir)->root->root_item) == 0)
- goto out_up_read;
-
- if (snap_src) {
- error = create_snapshot(snap_src, dentry,
- name, namelen, async_transid, readonly);
- } else {
- error = create_subvol(BTRFS_I(dir)->root, dentry,
- name, namelen, async_transid);
- }
- if (!error)
- fsnotify_mkdir(dir, dentry);
-out_up_read:
- up_read(&BTRFS_I(dir)->root->fs_info->subvol_sem);
-out_drop_write:
- mnt_drop_write(parent->mnt);
-out_dput:
- dput(dentry);
-out_unlock:
- mutex_unlock(&dir->i_mutex);
- return error;
-}
-
-/*
- * When we're defragging a range, we don't want to kick it off again
- * if it is really just waiting for delalloc to send it down.
- * If we find a nice big extent or delalloc range for the bytes in the
- * file you want to defrag, we return 0 to let you know to skip this
- * part of the file
- */
-static int check_defrag_in_cache(struct inode *inode, u64 offset, int thresh)
-{
- struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
- struct extent_map *em = NULL;
- struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
- u64 end;
-
- read_lock(&em_tree->lock);
- em = lookup_extent_mapping(em_tree, offset, PAGE_CACHE_SIZE);
- read_unlock(&em_tree->lock);
-
- if (em) {
- end = extent_map_end(em);
- free_extent_map(em);
- if (end - offset > thresh)
- return 0;
- }
- /* if we already have a nice delalloc here, just stop */
- thresh /= 2;
- end = count_range_bits(io_tree, &offset, offset + thresh,
- thresh, EXTENT_DELALLOC, 1);
- if (end >= thresh)
- return 0;
- return 1;
-}
-
-/*
- * helper function to walk through a file and find extents
- * newer than a specific transid, and smaller than thresh.
- *
- * This is used by the defragging code to find new and small
- * extents
- */
-static int find_new_extents(struct btrfs_root *root,
- struct inode *inode, u64 newer_than,
- u64 *off, int thresh)
-{
- struct btrfs_path *path;
- struct btrfs_key min_key;
- struct btrfs_key max_key;
- struct extent_buffer *leaf;
- struct btrfs_file_extent_item *extent;
- int type;
- int ret;
- u64 ino = btrfs_ino(inode);
-
- path = btrfs_alloc_path();
- if (!path)
- return -ENOMEM;
-
- min_key.objectid = ino;
- min_key.type = BTRFS_EXTENT_DATA_KEY;
- min_key.offset = *off;
-
- max_key.objectid = ino;
- max_key.type = (u8)-1;
- max_key.offset = (u64)-1;
-
- path->keep_locks = 1;
-
- while(1) {
- ret = btrfs_search_forward(root, &min_key, &max_key,
- path, 0, newer_than);
- if (ret != 0)
- goto none;
- if (min_key.objectid != ino)
- goto none;
- if (min_key.type != BTRFS_EXTENT_DATA_KEY)
- goto none;
-
- leaf = path->nodes[0];
- extent = btrfs_item_ptr(leaf, path->slots[0],
- struct btrfs_file_extent_item);
-
- type = btrfs_file_extent_type(leaf, extent);
- if (type == BTRFS_FILE_EXTENT_REG &&
- btrfs_file_extent_num_bytes(leaf, extent) < thresh &&
- check_defrag_in_cache(inode, min_key.offset, thresh)) {
- *off = min_key.offset;
- btrfs_free_path(path);
- return 0;
- }
-
- if (min_key.offset == (u64)-1)
- goto none;
-
- min_key.offset++;
- btrfs_release_path(path);
- }
-none:
- btrfs_free_path(path);
- return -ENOENT;
-}
-
-/*
- * Validaty check of prev em and next em:
- * 1) no prev/next em
- * 2) prev/next em is an hole/inline extent
- */
-static int check_adjacent_extents(struct inode *inode, struct extent_map *em)
-{
- struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
- struct extent_map *prev = NULL, *next = NULL;
- int ret = 0;
-
- read_lock(&em_tree->lock);
- prev = lookup_extent_mapping(em_tree, em->start - 1, (u64)-1);
- next = lookup_extent_mapping(em_tree, em->start + em->len, (u64)-1);
- read_unlock(&em_tree->lock);
-
- if ((!prev || prev->block_start >= EXTENT_MAP_LAST_BYTE) &&
- (!next || next->block_start >= EXTENT_MAP_LAST_BYTE))
- ret = 1;
- free_extent_map(prev);
- free_extent_map(next);
-
- return ret;
-}
-
-static int should_defrag_range(struct inode *inode, u64 start, u64 len,
- int thresh, u64 *last_len, u64 *skip,
- u64 *defrag_end)
-{
- struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
- struct extent_map *em = NULL;
- struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
- int ret = 1;
-
- /*
- * make sure that once we start defragging an extent, we keep on
- * defragging it
- */
- if (start < *defrag_end)
- return 1;
-
- *skip = 0;
-
- /*
- * hopefully we have this extent in the tree already, try without
- * the full extent lock
- */
- read_lock(&em_tree->lock);
- em = lookup_extent_mapping(em_tree, start, len);
- read_unlock(&em_tree->lock);
-
- if (!em) {
- /* get the big lock and read metadata off disk */
- lock_extent(io_tree, start, start + len - 1);
- em = btrfs_get_extent(inode, NULL, 0, start, len, 0);
- unlock_extent(io_tree, start, start + len - 1);
-
- if (IS_ERR(em))
- return 0;
- }
-
- /* this will cover holes, and inline extents */
- if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
- ret = 0;
- goto out;
- }
-
- /* If we have nothing to merge with us, just skip. */
- if (check_adjacent_extents(inode, em)) {
- ret = 0;
- goto out;
- }
-
- /*
- * we hit a real extent, if it is big don't bother defragging it again
- */
- if ((*last_len == 0 || *last_len >= thresh) && em->len >= thresh)
- ret = 0;
-
-out:
- /*
- * last_len ends up being a counter of how many bytes we've defragged.
- * every time we choose not to defrag an extent, we reset *last_len
- * so that the next tiny extent will force a defrag.
- *
- * The end result of this is that tiny extents before a single big
- * extent will force at least part of that big extent to be defragged.
- */
- if (ret) {
- *defrag_end = extent_map_end(em);
- } else {
- *last_len = 0;
- *skip = extent_map_end(em);
- *defrag_end = 0;
- }
-
- free_extent_map(em);
- return ret;
-}
-
-/*
- * it doesn't do much good to defrag one or two pages
- * at a time. This pulls in a nice chunk of pages
- * to COW and defrag.
- *
- * It also makes sure the delalloc code has enough
- * dirty data to avoid making new small extents as part
- * of the defrag
- *
- * It's a good idea to start RA on this range
- * before calling this.
- */
-static int cluster_pages_for_defrag(struct inode *inode,
- struct page **pages,
- unsigned long start_index,
- int num_pages)
-{
- unsigned long file_end;
- u64 isize = i_size_read(inode);
- u64 page_start;
- u64 page_end;
- u64 page_cnt;
- int ret;
- int i;
- int i_done;
- struct btrfs_ordered_extent *ordered;
- struct extent_state *cached_state = NULL;
- struct extent_io_tree *tree;
- gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping);
-
- file_end = (isize - 1) >> PAGE_CACHE_SHIFT;
- if (!isize || start_index > file_end)
- return 0;
-
- page_cnt = min_t(u64, (u64)num_pages, (u64)file_end - start_index + 1);
-
- ret = btrfs_delalloc_reserve_space(inode,
- page_cnt << PAGE_CACHE_SHIFT);
- if (ret)
- return ret;
- i_done = 0;
- tree = &BTRFS_I(inode)->io_tree;
-
- /* step one, lock all the pages */
- for (i = 0; i < page_cnt; i++) {
- struct page *page;
-again:
- page = find_or_create_page(inode->i_mapping,
- start_index + i, mask);
- if (!page)
- break;
-
- page_start = page_offset(page);
- page_end = page_start + PAGE_CACHE_SIZE - 1;
- while (1) {
- lock_extent(tree, page_start, page_end);
- ordered = btrfs_lookup_ordered_extent(inode,
- page_start);
- unlock_extent(tree, page_start, page_end);
- if (!ordered)
- break;
-
- unlock_page(page);
- btrfs_start_ordered_extent(inode, ordered, 1);
- btrfs_put_ordered_extent(ordered);
- lock_page(page);
- /*
- * we unlocked the page above, so we need check if
- * it was released or not.
- */
- if (page->mapping != inode->i_mapping) {
- unlock_page(page);
- page_cache_release(page);
- goto again;
- }
- }
-
- if (!PageUptodate(page)) {
- btrfs_readpage(NULL, page);
- lock_page(page);
- if (!PageUptodate(page)) {
- unlock_page(page);
- page_cache_release(page);
- ret = -EIO;
- break;
- }
- }
-
- if (page->mapping != inode->i_mapping) {
- unlock_page(page);
- page_cache_release(page);
- goto again;
- }
-
- pages[i] = page;
- i_done++;
- }
- if (!i_done || ret)
- goto out;
-
- if (!(inode->i_sb->s_flags & MS_ACTIVE))
- goto out;
-
- /*
- * so now we have a nice long stream of locked
- * and up to date pages, lets wait on them
- */
- for (i = 0; i < i_done; i++)
- wait_on_page_writeback(pages[i]);
-
- page_start = page_offset(pages[0]);
- page_end = page_offset(pages[i_done - 1]) + PAGE_CACHE_SIZE;
-
- lock_extent_bits(&BTRFS_I(inode)->io_tree,
- page_start, page_end - 1, 0, &cached_state);
- clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start,
- page_end - 1, EXTENT_DIRTY | EXTENT_DELALLOC |
- EXTENT_DO_ACCOUNTING, 0, 0, &cached_state,
- GFP_NOFS);
-
- if (i_done != page_cnt) {
- spin_lock(&BTRFS_I(inode)->lock);
- BTRFS_I(inode)->outstanding_extents++;
- spin_unlock(&BTRFS_I(inode)->lock);
- btrfs_delalloc_release_space(inode,
- (page_cnt - i_done) << PAGE_CACHE_SHIFT);
- }
-
-
- btrfs_set_extent_delalloc(inode, page_start, page_end - 1,
- &cached_state);
-
- unlock_extent_cached(&BTRFS_I(inode)->io_tree,
- page_start, page_end - 1, &cached_state,
- GFP_NOFS);
-
- for (i = 0; i < i_done; i++) {
- clear_page_dirty_for_io(pages[i]);
- ClearPageChecked(pages[i]);
- set_page_extent_mapped(pages[i]);
- set_page_dirty(pages[i]);
- unlock_page(pages[i]);
- page_cache_release(pages[i]);
- }
- return i_done;
-out:
- for (i = 0; i < i_done; i++) {
- unlock_page(pages[i]);
- page_cache_release(pages[i]);
- }
- btrfs_delalloc_release_space(inode, page_cnt << PAGE_CACHE_SHIFT);
- return ret;
-
-}
-
-int btrfs_defrag_file(struct inode *inode, struct file *file,
- struct btrfs_ioctl_defrag_range_args *range,
- u64 newer_than, unsigned long max_to_defrag)
-{
- struct btrfs_root *root = BTRFS_I(inode)->root;
- struct btrfs_super_block *disk_super;
- struct file_ra_state *ra = NULL;
- unsigned long last_index;
- u64 isize = i_size_read(inode);
- u64 features;
- u64 last_len = 0;
- u64 skip = 0;
- u64 defrag_end = 0;
- u64 newer_off = range->start;
- unsigned long i;
- unsigned long ra_index = 0;
- int ret;
- int defrag_count = 0;
- int compress_type = BTRFS_COMPRESS_ZLIB;
- int extent_thresh = range->extent_thresh;
- int max_cluster = (256 * 1024) >> PAGE_CACHE_SHIFT;
- int cluster = max_cluster;
- u64 new_align = ~((u64)128 * 1024 - 1);
- struct page **pages = NULL;
-
- if (extent_thresh == 0)
- extent_thresh = 256 * 1024;
-
- if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS) {
- if (range->compress_type > BTRFS_COMPRESS_TYPES)
- return -EINVAL;
- if (range->compress_type)
- compress_type = range->compress_type;
- }
-
- if (isize == 0)
- return 0;
-
- /*
- * if we were not given a file, allocate a readahead
- * context
- */
- if (!file) {
- ra = kzalloc(sizeof(*ra), GFP_NOFS);
- if (!ra)
- return -ENOMEM;
- file_ra_state_init(ra, inode->i_mapping);
- } else {
- ra = &file->f_ra;
- }
-
- pages = kmalloc(sizeof(struct page *) * max_cluster,
- GFP_NOFS);
- if (!pages) {
- ret = -ENOMEM;
- goto out_ra;
- }
-
- /* find the last page to defrag */
- if (range->start + range->len > range->start) {
- last_index = min_t(u64, isize - 1,
- range->start + range->len - 1) >> PAGE_CACHE_SHIFT;
- } else {
- last_index = (isize - 1) >> PAGE_CACHE_SHIFT;
- }
-
- if (newer_than) {
- ret = find_new_extents(root, inode, newer_than,
- &newer_off, 64 * 1024);
- if (!ret) {
- range->start = newer_off;
- /*
- * we always align our defrag to help keep
- * the extents in the file evenly spaced
- */
- i = (newer_off & new_align) >> PAGE_CACHE_SHIFT;
- } else
- goto out_ra;
- } else {
- i = range->start >> PAGE_CACHE_SHIFT;
- }
- if (!max_to_defrag)
- max_to_defrag = last_index + 1;
-
- /*
- * make writeback starts from i, so the defrag range can be
- * written sequentially.
- */
- if (i < inode->i_mapping->writeback_index)
- inode->i_mapping->writeback_index = i;
-
- while (i <= last_index && defrag_count < max_to_defrag &&
- (i < (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >>
- PAGE_CACHE_SHIFT)) {
- /*
- * make sure we stop running if someone unmounts
- * the FS
- */
- if (!(inode->i_sb->s_flags & MS_ACTIVE))
- break;
-
- if (!should_defrag_range(inode, (u64)i << PAGE_CACHE_SHIFT,
- PAGE_CACHE_SIZE, extent_thresh,
- &last_len, &skip, &defrag_end)) {
- unsigned long next;
- /*
- * the should_defrag function tells us how much to skip
- * bump our counter by the suggested amount
- */
- next = (skip + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
- i = max(i + 1, next);
- continue;
- }
-
- if (!newer_than) {
- cluster = (PAGE_CACHE_ALIGN(defrag_end) >>
- PAGE_CACHE_SHIFT) - i;
- cluster = min(cluster, max_cluster);
- } else {
- cluster = max_cluster;
- }
-
- if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS)
- BTRFS_I(inode)->force_compress = compress_type;
-
- if (i + cluster > ra_index) {
- ra_index = max(i, ra_index);
- btrfs_force_ra(inode->i_mapping, ra, file, ra_index,
- cluster);
- ra_index += max_cluster;
- }
-
- mutex_lock(&inode->i_mutex);
- ret = cluster_pages_for_defrag(inode, pages, i, cluster);
- if (ret < 0) {
- mutex_unlock(&inode->i_mutex);
- goto out_ra;
- }
-
- defrag_count += ret;
- balance_dirty_pages_ratelimited_nr(inode->i_mapping, ret);
- mutex_unlock(&inode->i_mutex);
-
- if (newer_than) {
- if (newer_off == (u64)-1)
- break;
-
- if (ret > 0)
- i += ret;
-
- newer_off = max(newer_off + 1,
- (u64)i << PAGE_CACHE_SHIFT);
-
- ret = find_new_extents(root, inode,
- newer_than, &newer_off,
- 64 * 1024);
- if (!ret) {
- range->start = newer_off;
- i = (newer_off & new_align) >> PAGE_CACHE_SHIFT;
- } else {
- break;
- }
- } else {
- if (ret > 0) {
- i += ret;
- last_len += ret << PAGE_CACHE_SHIFT;
- } else {
- i++;
- last_len = 0;
- }
- }
- }
-
- if ((range->flags & BTRFS_DEFRAG_RANGE_START_IO))
- filemap_flush(inode->i_mapping);
-
- if ((range->flags & BTRFS_DEFRAG_RANGE_COMPRESS)) {
- /* the filemap_flush will queue IO into the worker threads, but
- * we have to make sure the IO is actually started and that
- * ordered extents get created before we return
- */
- atomic_inc(&root->fs_info->async_submit_draining);
- while (atomic_read(&root->fs_info->nr_async_submits) ||
- atomic_read(&root->fs_info->async_delalloc_pages)) {
- wait_event(root->fs_info->async_submit_wait,
- (atomic_read(&root->fs_info->nr_async_submits) == 0 &&
- atomic_read(&root->fs_info->async_delalloc_pages) == 0));
- }
- atomic_dec(&root->fs_info->async_submit_draining);
-
- mutex_lock(&inode->i_mutex);
- BTRFS_I(inode)->force_compress = BTRFS_COMPRESS_NONE;
- mutex_unlock(&inode->i_mutex);
- }
-
- disk_super = root->fs_info->super_copy;
- features = btrfs_super_incompat_flags(disk_super);
- if (range->compress_type == BTRFS_COMPRESS_LZO) {
- features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO;
- btrfs_set_super_incompat_flags(disk_super, features);
- }
-
- ret = defrag_count;
-
-out_ra:
- if (!file)
- kfree(ra);
- kfree(pages);
- return ret;
-}
-
-static noinline int btrfs_ioctl_resize(struct btrfs_root *root,
- void __user *arg)
-{
- u64 new_size;
- u64 old_size;
- u64 devid = 1;
- struct btrfs_ioctl_vol_args *vol_args;
- struct btrfs_trans_handle *trans;
- struct btrfs_device *device = NULL;
- char *sizestr;
- char *devstr = NULL;
- int ret = 0;
- int mod = 0;
-
- if (root->fs_info->sb->s_flags & MS_RDONLY)
- return -EROFS;
-
- if (!capable(CAP_SYS_ADMIN))
- return -EPERM;
-
- mutex_lock(&root->fs_info->volume_mutex);
- if (root->fs_info->balance_ctl) {
- printk(KERN_INFO "btrfs: balance in progress\n");
- ret = -EINVAL;
- goto out;
- }
-
- vol_args = memdup_user(arg, sizeof(*vol_args));
- if (IS_ERR(vol_args)) {
- ret = PTR_ERR(vol_args);
- goto out;
- }
-
- vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
-
- sizestr = vol_args->name;
- devstr = strchr(sizestr, ':');
- if (devstr) {
- char *end;
- sizestr = devstr + 1;
- *devstr = '\0';
- devstr = vol_args->name;
- devid = simple_strtoull(devstr, &end, 10);
- printk(KERN_INFO "btrfs: resizing devid %llu\n",
- (unsigned long long)devid);
- }
- device = btrfs_find_device(root, devid, NULL, NULL);
- if (!device) {
- printk(KERN_INFO "btrfs: resizer unable to find device %llu\n",
- (unsigned long long)devid);
- ret = -EINVAL;
- goto out_free;
- }
- if (!strcmp(sizestr, "max"))
- new_size = device->bdev->bd_inode->i_size;
- else {
- if (sizestr[0] == '-') {
- mod = -1;
- sizestr++;
- } else if (sizestr[0] == '+') {
- mod = 1;
- sizestr++;
- }
- new_size = memparse(sizestr, NULL);
- if (new_size == 0) {
- ret = -EINVAL;
- goto out_free;
- }
- }
-
- old_size = device->total_bytes;
-
- if (mod < 0) {
- if (new_size > old_size) {
- ret = -EINVAL;
- goto out_free;
- }
- new_size = old_size - new_size;
- } else if (mod > 0) {
- new_size = old_size + new_size;
- }
-
- if (new_size < 256 * 1024 * 1024) {
- ret = -EINVAL;
- goto out_free;
- }
- if (new_size > device->bdev->bd_inode->i_size) {
- ret = -EFBIG;
- goto out_free;
- }
-
- do_div(new_size, root->sectorsize);
- new_size *= root->sectorsize;
-
- printk(KERN_INFO "btrfs: new size for %s is %llu\n",
- device->name, (unsigned long long)new_size);
-
- if (new_size > old_size) {
- trans = btrfs_start_transaction(root, 0);
- if (IS_ERR(trans)) {
- ret = PTR_ERR(trans);
- goto out_free;
- }
- ret = btrfs_grow_device(trans, device, new_size);
- btrfs_commit_transaction(trans, root);
- } else if (new_size < old_size) {
- ret = btrfs_shrink_device(device, new_size);
- }
-
-out_free:
- kfree(vol_args);
-out:
- mutex_unlock(&root->fs_info->volume_mutex);
- return ret;
-}
-
-static noinline int btrfs_ioctl_snap_create_transid(struct file *file,
- char *name,
- unsigned long fd,
- int subvol,
- u64 *transid,
- bool readonly)
-{
- struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root;
- struct file *src_file;
- int namelen;
- int ret = 0;
-
- if (root->fs_info->sb->s_flags & MS_RDONLY)
- return -EROFS;
-
- namelen = strlen(name);
- if (strchr(name, '/')) {
- ret = -EINVAL;
- goto out;
- }
-
- if (name[0] == '.' &&
- (namelen == 1 || (name[1] == '.' && namelen == 2))) {
- ret = -EEXIST;
- goto out;
- }
-
- if (subvol) {
- ret = btrfs_mksubvol(&file->f_path, name, namelen,
- NULL, transid, readonly);
- } else {
- struct inode *src_inode;
- src_file = fget(fd);
- if (!src_file) {
- ret = -EINVAL;
- goto out;
- }
-
- src_inode = src_file->f_path.dentry->d_inode;
- if (src_inode->i_sb != file->f_path.dentry->d_inode->i_sb) {
- printk(KERN_INFO "btrfs: Snapshot src from "
- "another FS\n");
- ret = -EINVAL;
- fput(src_file);
- goto out;
- }
- ret = btrfs_mksubvol(&file->f_path, name, namelen,
- BTRFS_I(src_inode)->root,
- transid, readonly);
- fput(src_file);
- }
-out:
- return ret;
-}
-
-static noinline int btrfs_ioctl_snap_create(struct file *file,
- void __user *arg, int subvol)
-{
- struct btrfs_ioctl_vol_args *vol_args;
- int ret;
-
- vol_args = memdup_user(arg, sizeof(*vol_args));
- if (IS_ERR(vol_args))
- return PTR_ERR(vol_args);
- vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
-
- ret = btrfs_ioctl_snap_create_transid(file, vol_args->name,
- vol_args->fd, subvol,
- NULL, false);
-
- kfree(vol_args);
- return ret;
-}
-
-static noinline int btrfs_ioctl_snap_create_v2(struct file *file,
- void __user *arg, int subvol)
-{
- struct btrfs_ioctl_vol_args_v2 *vol_args;
- int ret;
- u64 transid = 0;
- u64 *ptr = NULL;
- bool readonly = false;
-
- vol_args = memdup_user(arg, sizeof(*vol_args));
- if (IS_ERR(vol_args))
- return PTR_ERR(vol_args);
- vol_args->name[BTRFS_SUBVOL_NAME_MAX] = '\0';
-
- if (vol_args->flags &
- ~(BTRFS_SUBVOL_CREATE_ASYNC | BTRFS_SUBVOL_RDONLY)) {
- ret = -EOPNOTSUPP;
- goto out;
- }
-
- if (vol_args->flags & BTRFS_SUBVOL_CREATE_ASYNC)
- ptr = &transid;
- if (vol_args->flags & BTRFS_SUBVOL_RDONLY)
- readonly = true;
-
- ret = btrfs_ioctl_snap_create_transid(file, vol_args->name,
- vol_args->fd, subvol,
- ptr, readonly);
-
- if (ret == 0 && ptr &&
- copy_to_user(arg +
- offsetof(struct btrfs_ioctl_vol_args_v2,
- transid), ptr, sizeof(*ptr)))
- ret = -EFAULT;
-out:
- kfree(vol_args);
- return ret;
-}
-
-static noinline int btrfs_ioctl_subvol_getflags(struct file *file,
- void __user *arg)
-{
- struct inode *inode = fdentry(file)->d_inode;
- struct btrfs_root *root = BTRFS_I(inode)->root;
- int ret = 0;
- u64 flags = 0;
-
- if (btrfs_ino(inode) != BTRFS_FIRST_FREE_OBJECTID)
- return -EINVAL;
-
- down_read(&root->fs_info->subvol_sem);
- if (btrfs_root_readonly(root))
- flags |= BTRFS_SUBVOL_RDONLY;
- up_read(&root->fs_info->subvol_sem);
-
- if (copy_to_user(arg, &flags, sizeof(flags)))
- ret = -EFAULT;
-
- return ret;
-}
-
-static noinline int btrfs_ioctl_subvol_setflags(struct file *file,
- void __user *arg)
-{
- struct inode *inode = fdentry(file)->d_inode;
- struct btrfs_root *root = BTRFS_I(inode)->root;
- struct btrfs_trans_handle *trans;
- u64 root_flags;
- u64 flags;
- int ret = 0;
-
- if (root->fs_info->sb->s_flags & MS_RDONLY)
- return -EROFS;
-
- if (btrfs_ino(inode) != BTRFS_FIRST_FREE_OBJECTID)
- return -EINVAL;
-
- if (copy_from_user(&flags, arg, sizeof(flags)))
- return -EFAULT;
-
- if (flags & BTRFS_SUBVOL_CREATE_ASYNC)
- return -EINVAL;
-
- if (flags & ~BTRFS_SUBVOL_RDONLY)
- return -EOPNOTSUPP;
-
- if (!inode_owner_or_capable(inode))
- return -EACCES;
-
- down_write(&root->fs_info->subvol_sem);
-
- /* nothing to do */
- if (!!(flags & BTRFS_SUBVOL_RDONLY) == btrfs_root_readonly(root))
- goto out;
-
- root_flags = btrfs_root_flags(&root->root_item);
- if (flags & BTRFS_SUBVOL_RDONLY)
- btrfs_set_root_flags(&root->root_item,
- root_flags | BTRFS_ROOT_SUBVOL_RDONLY);
- else
- btrfs_set_root_flags(&root->root_item,
- root_flags & ~BTRFS_ROOT_SUBVOL_RDONLY);
-
- trans = btrfs_start_transaction(root, 1);
- if (IS_ERR(trans)) {
- ret = PTR_ERR(trans);
- goto out_reset;
- }
-
- ret = btrfs_update_root(trans, root->fs_info->tree_root,
- &root->root_key, &root->root_item);
-
- btrfs_commit_transaction(trans, root);
-out_reset:
- if (ret)
- btrfs_set_root_flags(&root->root_item, root_flags);
-out:
- up_write(&root->fs_info->subvol_sem);
- return ret;
-}
-
-/*
- * helper to check if the subvolume references other subvolumes
- */
-static noinline int may_destroy_subvol(struct btrfs_root *root)
-{
- struct btrfs_path *path;
- struct btrfs_key key;
- int ret;
-
- path = btrfs_alloc_path();
- if (!path)
- return -ENOMEM;
-
- key.objectid = root->root_key.objectid;
- key.type = BTRFS_ROOT_REF_KEY;
- key.offset = (u64)-1;
-
- ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
- &key, path, 0, 0);
- if (ret < 0)
- goto out;
- BUG_ON(ret == 0);
-
- ret = 0;
- if (path->slots[0] > 0) {
- path->slots[0]--;
- btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
- if (key.objectid == root->root_key.objectid &&
- key.type == BTRFS_ROOT_REF_KEY)
- ret = -ENOTEMPTY;
- }
-out:
- btrfs_free_path(path);
- return ret;
-}
-
-static noinline int key_in_sk(struct btrfs_key *key,
- struct btrfs_ioctl_search_key *sk)
-{
- struct btrfs_key test;
- int ret;
-
- test.objectid = sk->min_objectid;
- test.type = sk->min_type;
- test.offset = sk->min_offset;
-
- ret = btrfs_comp_cpu_keys(key, &test);
- if (ret < 0)
- return 0;
-
- test.objectid = sk->max_objectid;
- test.type = sk->max_type;
- test.offset = sk->max_offset;
-
- ret = btrfs_comp_cpu_keys(key, &test);
- if (ret > 0)
- return 0;
- return 1;
-}
-
-static noinline int copy_to_sk(struct btrfs_root *root,
- struct btrfs_path *path,
- struct btrfs_key *key,
- struct btrfs_ioctl_search_key *sk,
- char *buf,
- unsigned long *sk_offset,
- int *num_found)
-{
- u64 found_transid;
- struct extent_buffer *leaf;
- struct btrfs_ioctl_search_header sh;
- unsigned long item_off;
- unsigned long item_len;
- int nritems;
- int i;
- int slot;
- int ret = 0;
-
- leaf = path->nodes[0];
- slot = path->slots[0];
- nritems = btrfs_header_nritems(leaf);
-
- if (btrfs_header_generation(leaf) > sk->max_transid) {
- i = nritems;
- goto advance_key;
- }
- found_transid = btrfs_header_generation(leaf);
-
- for (i = slot; i < nritems; i++) {
- item_off = btrfs_item_ptr_offset(leaf, i);
- item_len = btrfs_item_size_nr(leaf, i);
-
- if (item_len > BTRFS_SEARCH_ARGS_BUFSIZE)
- item_len = 0;
-
- if (sizeof(sh) + item_len + *sk_offset >
- BTRFS_SEARCH_ARGS_BUFSIZE) {
- ret = 1;
- goto overflow;
- }
-
- btrfs_item_key_to_cpu(leaf, key, i);
- if (!key_in_sk(key, sk))
- continue;
-
- sh.objectid = key->objectid;
- sh.offset = key->offset;
- sh.type = key->type;
- sh.len = item_len;
- sh.transid = found_transid;
-
- /* copy search result header */
- memcpy(buf + *sk_offset, &sh, sizeof(sh));
- *sk_offset += sizeof(sh);
-
- if (item_len) {
- char *p = buf + *sk_offset;
- /* copy the item */
- read_extent_buffer(leaf, p,
- item_off, item_len);
- *sk_offset += item_len;
- }
- (*num_found)++;
-
- if (*num_found >= sk->nr_items)
- break;
- }
-advance_key:
- ret = 0;
- if (key->offset < (u64)-1 && key->offset < sk->max_offset)
- key->offset++;
- else if (key->type < (u8)-1 && key->type < sk->max_type) {
- key->offset = 0;
- key->type++;
- } else if (key->objectid < (u64)-1 && key->objectid < sk->max_objectid) {
- key->offset = 0;
- key->type = 0;
- key->objectid++;
- } else
- ret = 1;
-overflow:
- return ret;
-}
-
-static noinline int search_ioctl(struct inode *inode,
- struct btrfs_ioctl_search_args *args)
-{
- struct btrfs_root *root;
- struct btrfs_key key;
- struct btrfs_key max_key;
- struct btrfs_path *path;
- struct btrfs_ioctl_search_key *sk = &args->key;
- struct btrfs_fs_info *info = BTRFS_I(inode)->root->fs_info;
- int ret;
- int num_found = 0;
- unsigned long sk_offset = 0;
-
- path = btrfs_alloc_path();
- if (!path)
- return -ENOMEM;
-
- if (sk->tree_id == 0) {
- /* search the root of the inode that was passed */
- root = BTRFS_I(inode)->root;
- } else {
- key.objectid = sk->tree_id;
- key.type = BTRFS_ROOT_ITEM_KEY;
- key.offset = (u64)-1;
- root = btrfs_read_fs_root_no_name(info, &key);
- if (IS_ERR(root)) {
- printk(KERN_ERR "could not find root %llu\n",
- sk->tree_id);
- btrfs_free_path(path);
- return -ENOENT;
- }
- }
-
- key.objectid = sk->min_objectid;
- key.type = sk->min_type;
- key.offset = sk->min_offset;
-
- max_key.objectid = sk->max_objectid;
- max_key.type = sk->max_type;
- max_key.offset = sk->max_offset;
-
- path->keep_locks = 1;
-
- while(1) {
- ret = btrfs_search_forward(root, &key, &max_key, path, 0,
- sk->min_transid);
- if (ret != 0) {
- if (ret > 0)
- ret = 0;
- goto err;
- }
- ret = copy_to_sk(root, path, &key, sk, args->buf,
- &sk_offset, &num_found);
- btrfs_release_path(path);
- if (ret || num_found >= sk->nr_items)
- break;
-
- }
- ret = 0;
-err:
- sk->nr_items = num_found;
- btrfs_free_path(path);
- return ret;
-}
-
-static noinline int btrfs_ioctl_tree_search(struct file *file,
- void __user *argp)
-{
- struct btrfs_ioctl_search_args *args;
- struct inode *inode;
- int ret;
-
- if (!capable(CAP_SYS_ADMIN))
- return -EPERM;
-
- args = memdup_user(argp, sizeof(*args));
- if (IS_ERR(args))
- return PTR_ERR(args);
-
- inode = fdentry(file)->d_inode;
- ret = search_ioctl(inode, args);
- if (ret == 0 && copy_to_user(argp, args, sizeof(*args)))
- ret = -EFAULT;
- kfree(args);
- return ret;
-}
-
-/*
- * Search INODE_REFs to identify path name of 'dirid' directory
- * in a 'tree_id' tree. and sets path name to 'name'.
- */
-static noinline int btrfs_search_path_in_tree(struct btrfs_fs_info *info,
- u64 tree_id, u64 dirid, char *name)
-{
- struct btrfs_root *root;
- struct btrfs_key key;
- char *ptr;
- int ret = -1;
- int slot;
- int len;
- int total_len = 0;
- struct btrfs_inode_ref *iref;
- struct extent_buffer *l;
- struct btrfs_path *path;
-
- if (dirid == BTRFS_FIRST_FREE_OBJECTID) {
- name[0]='\0';
- return 0;
- }
-
- path = btrfs_alloc_path();
- if (!path)
- return -ENOMEM;
-
- ptr = &name[BTRFS_INO_LOOKUP_PATH_MAX];
-
- key.objectid = tree_id;
- key.type = BTRFS_ROOT_ITEM_KEY;
- key.offset = (u64)-1;
- root = btrfs_read_fs_root_no_name(info, &key);
- if (IS_ERR(root)) {
- printk(KERN_ERR "could not find root %llu\n", tree_id);
- ret = -ENOENT;
- goto out;
- }
-
- key.objectid = dirid;
- key.type = BTRFS_INODE_REF_KEY;
- key.offset = (u64)-1;
-
- while(1) {
- ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
- if (ret < 0)
- goto out;
-
- l = path->nodes[0];
- slot = path->slots[0];
- if (ret > 0 && slot > 0)
- slot--;
- btrfs_item_key_to_cpu(l, &key, slot);
-
- if (ret > 0 && (key.objectid != dirid ||
- key.type != BTRFS_INODE_REF_KEY)) {
- ret = -ENOENT;
- goto out;
- }
-
- iref = btrfs_item_ptr(l, slot, struct btrfs_inode_ref);
- len = btrfs_inode_ref_name_len(l, iref);
- ptr -= len + 1;
- total_len += len + 1;
- if (ptr < name)
- goto out;
-
- *(ptr + len) = '/';
- read_extent_buffer(l, ptr,(unsigned long)(iref + 1), len);
-
- if (key.offset == BTRFS_FIRST_FREE_OBJECTID)
- break;
-
- btrfs_release_path(path);
- key.objectid = key.offset;
- key.offset = (u64)-1;
- dirid = key.objectid;
- }
- if (ptr < name)
- goto out;
- memmove(name, ptr, total_len);
- name[total_len]='\0';
- ret = 0;
-out:
- btrfs_free_path(path);
- return ret;
-}
-
-static noinline int btrfs_ioctl_ino_lookup(struct file *file,
- void __user *argp)
-{
- struct btrfs_ioctl_ino_lookup_args *args;
- struct inode *inode;
- int ret;
-
- if (!capable(CAP_SYS_ADMIN))
- return -EPERM;
-
- args = memdup_user(argp, sizeof(*args));
- if (IS_ERR(args))
- return PTR_ERR(args);
-
- inode = fdentry(file)->d_inode;
-
- if (args->treeid == 0)
- args->treeid = BTRFS_I(inode)->root->root_key.objectid;
-
- ret = btrfs_search_path_in_tree(BTRFS_I(inode)->root->fs_info,
- args->treeid, args->objectid,
- args->name);
-
- if (ret == 0 && copy_to_user(argp, args, sizeof(*args)))
- ret = -EFAULT;
-
- kfree(args);
- return ret;
-}
-
-static noinline int btrfs_ioctl_snap_destroy(struct file *file,
- void __user *arg)
-{
- struct dentry *parent = fdentry(file);
- struct dentry *dentry;
- struct inode *dir = parent->d_inode;
- struct inode *inode;
- struct btrfs_root *root = BTRFS_I(dir)->root;
- struct btrfs_root *dest = NULL;
- struct btrfs_ioctl_vol_args *vol_args;
- struct btrfs_trans_handle *trans;
- int namelen;
- int ret;
- int err = 0;
-
- vol_args = memdup_user(arg, sizeof(*vol_args));
- if (IS_ERR(vol_args))
- return PTR_ERR(vol_args);
-
- vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
- namelen = strlen(vol_args->name);
- if (strchr(vol_args->name, '/') ||
- strncmp(vol_args->name, "..", namelen) == 0) {
- err = -EINVAL;
- goto out;
- }
-
- err = mnt_want_write_file(file);
- if (err)
- goto out;
-
- mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT);
- dentry = lookup_one_len(vol_args->name, parent, namelen);
- if (IS_ERR(dentry)) {
- err = PTR_ERR(dentry);
- goto out_unlock_dir;
- }
-
- if (!dentry->d_inode) {
- err = -ENOENT;
- goto out_dput;
- }
-
- inode = dentry->d_inode;
- dest = BTRFS_I(inode)->root;
- if (!capable(CAP_SYS_ADMIN)){
- /*
- * Regular user. Only allow this with a special mount
- * option, when the user has write+exec access to the
- * subvol root, and when rmdir(2) would have been
- * allowed.
- *
- * Note that this is _not_ check that the subvol is
- * empty or doesn't contain data that we wouldn't
- * otherwise be able to delete.
- *
- * Users who want to delete empty subvols should try
- * rmdir(2).
- */
- err = -EPERM;
- if (!btrfs_test_opt(root, USER_SUBVOL_RM_ALLOWED))
- goto out_dput;
-
- /*
- * Do not allow deletion if the parent dir is the same
- * as the dir to be deleted. That means the ioctl
- * must be called on the dentry referencing the root
- * of the subvol, not a random directory contained
- * within it.
- */
- err = -EINVAL;
- if (root == dest)
- goto out_dput;
-
- err = inode_permission(inode, MAY_WRITE | MAY_EXEC);
- if (err)
- goto out_dput;
-
- /* check if subvolume may be deleted by a non-root user */
- err = btrfs_may_delete(dir, dentry, 1);
- if (err)
- goto out_dput;
- }
-
- if (btrfs_ino(inode) != BTRFS_FIRST_FREE_OBJECTID) {
- err = -EINVAL;
- goto out_dput;
- }
-
- mutex_lock(&inode->i_mutex);
- err = d_invalidate(dentry);
- if (err)
- goto out_unlock;
-
- down_write(&root->fs_info->subvol_sem);
-
- err = may_destroy_subvol(dest);
- if (err)
- goto out_up_write;
-
- trans = btrfs_start_transaction(root, 0);
- if (IS_ERR(trans)) {
- err = PTR_ERR(trans);
- goto out_up_write;
- }
- trans->block_rsv = &root->fs_info->global_block_rsv;
-
- ret = btrfs_unlink_subvol(trans, root, dir,
- dest->root_key.objectid,
- dentry->d_name.name,
- dentry->d_name.len);
- if (ret) {
- err = ret;
- btrfs_abort_transaction(trans, root, ret);
- goto out_end_trans;
- }
-
- btrfs_record_root_in_trans(trans, dest);
-
- memset(&dest->root_item.drop_progress, 0,
- sizeof(dest->root_item.drop_progress));
- dest->root_item.drop_level = 0;
- btrfs_set_root_refs(&dest->root_item, 0);
-
- if (!xchg(&dest->orphan_item_inserted, 1)) {
- ret = btrfs_insert_orphan_item(trans,
- root->fs_info->tree_root,
- dest->root_key.objectid);
- if (ret) {
- btrfs_abort_transaction(trans, root, ret);
- err = ret;
- goto out_end_trans;
- }
- }
-out_end_trans:
- ret = btrfs_end_transaction(trans, root);
- if (ret && !err)
- err = ret;
- inode->i_flags |= S_DEAD;
-out_up_write:
- up_write(&root->fs_info->subvol_sem);
-out_unlock:
- mutex_unlock(&inode->i_mutex);
- if (!err) {
- shrink_dcache_sb(root->fs_info->sb);
- btrfs_invalidate_inodes(dest);
- d_delete(dentry);
- }
-out_dput:
- dput(dentry);
-out_unlock_dir:
- mutex_unlock(&dir->i_mutex);
- mnt_drop_write_file(file);
-out:
- kfree(vol_args);
- return err;
-}
-
-static int btrfs_ioctl_defrag(struct file *file, void __user *argp)
-{
- struct inode *inode = fdentry(file)->d_inode;
- struct btrfs_root *root = BTRFS_I(inode)->root;
- struct btrfs_ioctl_defrag_range_args *range;
- int ret;
-
- if (btrfs_root_readonly(root))
- return -EROFS;
-
- ret = mnt_want_write_file(file);
- if (ret)
- return ret;
-
- switch (inode->i_mode & S_IFMT) {
- case S_IFDIR:
- if (!capable(CAP_SYS_ADMIN)) {
- ret = -EPERM;
- goto out;
- }
- ret = btrfs_defrag_root(root, 0);
- if (ret)
- goto out;
- ret = btrfs_defrag_root(root->fs_info->extent_root, 0);
- break;
- case S_IFREG:
- if (!(file->f_mode & FMODE_WRITE)) {
- ret = -EINVAL;
- goto out;
- }
-
- range = kzalloc(sizeof(*range), GFP_KERNEL);
- if (!range) {
- ret = -ENOMEM;
- goto out;
- }
-
- if (argp) {
- if (copy_from_user(range, argp,
- sizeof(*range))) {
- ret = -EFAULT;
- kfree(range);
- goto out;
- }
- /* compression requires us to start the IO */
- if ((range->flags & BTRFS_DEFRAG_RANGE_COMPRESS)) {
- range->flags |= BTRFS_DEFRAG_RANGE_START_IO;
- range->extent_thresh = (u32)-1;
- }
- } else {
- /* the rest are all set to zero by kzalloc */
- range->len = (u64)-1;
- }
- ret = btrfs_defrag_file(fdentry(file)->d_inode, file,
- range, 0, 0);
- if (ret > 0)
- ret = 0;
- kfree(range);
- break;
- default:
- ret = -EINVAL;
- }
-out:
- mnt_drop_write_file(file);
- return ret;
-}
-
-static long btrfs_ioctl_add_dev(struct btrfs_root *root, void __user *arg)
-{
- struct btrfs_ioctl_vol_args *vol_args;
- int ret;
-
- if (!capable(CAP_SYS_ADMIN))
- return -EPERM;
-
- mutex_lock(&root->fs_info->volume_mutex);
- if (root->fs_info->balance_ctl) {
- printk(KERN_INFO "btrfs: balance in progress\n");
- ret = -EINVAL;
- goto out;
- }
-
- vol_args = memdup_user(arg, sizeof(*vol_args));
- if (IS_ERR(vol_args)) {
- ret = PTR_ERR(vol_args);
- goto out;
- }
-
- vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
- ret = btrfs_init_new_device(root, vol_args->name);
-
- kfree(vol_args);
-out:
- mutex_unlock(&root->fs_info->volume_mutex);
- return ret;
-}
-
-static long btrfs_ioctl_rm_dev(struct btrfs_root *root, void __user *arg)
-{
- struct btrfs_ioctl_vol_args *vol_args;
- int ret;
-
- if (!capable(CAP_SYS_ADMIN))
- return -EPERM;
-
- if (root->fs_info->sb->s_flags & MS_RDONLY)
- return -EROFS;
-
- mutex_lock(&root->fs_info->volume_mutex);
- if (root->fs_info->balance_ctl) {
- printk(KERN_INFO "btrfs: balance in progress\n");
- ret = -EINVAL;
- goto out;
- }
-
- vol_args = memdup_user(arg, sizeof(*vol_args));
- if (IS_ERR(vol_args)) {
- ret = PTR_ERR(vol_args);
- goto out;
- }
-
- vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
- ret = btrfs_rm_device(root, vol_args->name);
-
- kfree(vol_args);
-out:
- mutex_unlock(&root->fs_info->volume_mutex);
- return ret;
-}
-
-static long btrfs_ioctl_fs_info(struct btrfs_root *root, void __user *arg)
-{
- struct btrfs_ioctl_fs_info_args *fi_args;
- struct btrfs_device *device;
- struct btrfs_device *next;
- struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices;
- int ret = 0;
-
- if (!capable(CAP_SYS_ADMIN))
- return -EPERM;
-
- fi_args = kzalloc(sizeof(*fi_args), GFP_KERNEL);
- if (!fi_args)
- return -ENOMEM;
-
- fi_args->num_devices = fs_devices->num_devices;
- memcpy(&fi_args->fsid, root->fs_info->fsid, sizeof(fi_args->fsid));
-
- mutex_lock(&fs_devices->device_list_mutex);
- list_for_each_entry_safe(device, next, &fs_devices->devices, dev_list) {
- if (device->devid > fi_args->max_id)
- fi_args->max_id = device->devid;
- }
- mutex_unlock(&fs_devices->device_list_mutex);
-
- if (copy_to_user(arg, fi_args, sizeof(*fi_args)))
- ret = -EFAULT;
-
- kfree(fi_args);
- return ret;
-}
-
-static long btrfs_ioctl_dev_info(struct btrfs_root *root, void __user *arg)
-{
- struct btrfs_ioctl_dev_info_args *di_args;
- struct btrfs_device *dev;
- struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices;
- int ret = 0;
- char *s_uuid = NULL;
- char empty_uuid[BTRFS_UUID_SIZE] = {0};
-
- if (!capable(CAP_SYS_ADMIN))
- return -EPERM;
-
- di_args = memdup_user(arg, sizeof(*di_args));
- if (IS_ERR(di_args))
- return PTR_ERR(di_args);
-
- if (memcmp(empty_uuid, di_args->uuid, BTRFS_UUID_SIZE) != 0)
- s_uuid = di_args->uuid;
-
- mutex_lock(&fs_devices->device_list_mutex);
- dev = btrfs_find_device(root, di_args->devid, s_uuid, NULL);
- mutex_unlock(&fs_devices->device_list_mutex);
-
- if (!dev) {
- ret = -ENODEV;
- goto out;
- }
-
- di_args->devid = dev->devid;
- di_args->bytes_used = dev->bytes_used;
- di_args->total_bytes = dev->total_bytes;
- memcpy(di_args->uuid, dev->uuid, sizeof(di_args->uuid));
- if (dev->name)
- strncpy(di_args->path, dev->name, sizeof(di_args->path));
- else
- di_args->path[0] = '\0';
-
-out:
- if (ret == 0 && copy_to_user(arg, di_args, sizeof(*di_args)))
- ret = -EFAULT;
-
- kfree(di_args);
- return ret;
-}
-
-static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
- u64 off, u64 olen, u64 destoff)
-{
- struct inode *inode = fdentry(file)->d_inode;
- struct btrfs_root *root = BTRFS_I(inode)->root;
- struct file *src_file;
- struct inode *src;
- struct btrfs_trans_handle *trans;
- struct btrfs_path *path;
- struct extent_buffer *leaf;
- char *buf;
- struct btrfs_key key;
- u32 nritems;
- int slot;
- int ret;
- u64 len = olen;
- u64 bs = root->fs_info->sb->s_blocksize;
- u64 hint_byte;
-
- /*
- * TODO:
- * - split compressed inline extents. annoying: we need to
- * decompress into destination's address_space (the file offset
- * may change, so source mapping won't do), then recompress (or
- * otherwise reinsert) a subrange.
- * - allow ranges within the same file to be cloned (provided
- * they don't overlap)?
- */
-
- /* the destination must be opened for writing */
- if (!(file->f_mode & FMODE_WRITE) || (file->f_flags & O_APPEND))
- return -EINVAL;
-
- if (btrfs_root_readonly(root))
- return -EROFS;
-
- ret = mnt_want_write_file(file);
- if (ret)
- return ret;
-
- src_file = fget(srcfd);
- if (!src_file) {
- ret = -EBADF;
- goto out_drop_write;
- }
-
- src = src_file->f_dentry->d_inode;
-
- ret = -EINVAL;
- if (src == inode)
- goto out_fput;
-
- /* the src must be open for reading */
- if (!(src_file->f_mode & FMODE_READ))
- goto out_fput;
-
- /* don't make the dst file partly checksummed */
- if ((BTRFS_I(src)->flags & BTRFS_INODE_NODATASUM) !=
- (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM))
- goto out_fput;
-
- ret = -EISDIR;
- if (S_ISDIR(src->i_mode) || S_ISDIR(inode->i_mode))
- goto out_fput;
-
- ret = -EXDEV;
- if (src->i_sb != inode->i_sb || BTRFS_I(src)->root != root)
- goto out_fput;
-
- ret = -ENOMEM;
- buf = vmalloc(btrfs_level_size(root, 0));
- if (!buf)
- goto out_fput;
-
- path = btrfs_alloc_path();
- if (!path) {
- vfree(buf);
- goto out_fput;
- }
- path->reada = 2;
-
- if (inode < src) {
- mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT);
- mutex_lock_nested(&src->i_mutex, I_MUTEX_CHILD);
- } else {
- mutex_lock_nested(&src->i_mutex, I_MUTEX_PARENT);
- mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD);
- }
-
- /* determine range to clone */
- ret = -EINVAL;
- if (off + len > src->i_size || off + len < off)
- goto out_unlock;
- if (len == 0)
- olen = len = src->i_size - off;
- /* if we extend to eof, continue to block boundary */
- if (off + len == src->i_size)
- len = ALIGN(src->i_size, bs) - off;
-
- /* verify the end result is block aligned */
- if (!IS_ALIGNED(off, bs) || !IS_ALIGNED(off + len, bs) ||
- !IS_ALIGNED(destoff, bs))
- goto out_unlock;
-
- if (destoff > inode->i_size) {
- ret = btrfs_cont_expand(inode, inode->i_size, destoff);
- if (ret)
- goto out_unlock;
- }
-
- /* truncate page cache pages from target inode range */
- truncate_inode_pages_range(&inode->i_data, destoff,
- PAGE_CACHE_ALIGN(destoff + len) - 1);
-
- /* do any pending delalloc/csum calc on src, one way or
- another, and lock file content */
- while (1) {
- struct btrfs_ordered_extent *ordered;
- lock_extent(&BTRFS_I(src)->io_tree, off, off+len);
- ordered = btrfs_lookup_first_ordered_extent(src, off+len);
- if (!ordered &&
- !test_range_bit(&BTRFS_I(src)->io_tree, off, off+len,
- EXTENT_DELALLOC, 0, NULL))
- break;
- unlock_extent(&BTRFS_I(src)->io_tree, off, off+len);
- if (ordered)
- btrfs_put_ordered_extent(ordered);
- btrfs_wait_ordered_range(src, off, len);
- }
-
- /* clone data */
- key.objectid = btrfs_ino(src);
- key.type = BTRFS_EXTENT_DATA_KEY;
- key.offset = 0;
-
- while (1) {
- /*
- * note the key will change type as we walk through the
- * tree.
- */
- ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
- if (ret < 0)
- goto out;
-
- nritems = btrfs_header_nritems(path->nodes[0]);
- if (path->slots[0] >= nritems) {
- ret = btrfs_next_leaf(root, path);
- if (ret < 0)
- goto out;
- if (ret > 0)
- break;
- nritems = btrfs_header_nritems(path->nodes[0]);
- }
- leaf = path->nodes[0];
- slot = path->slots[0];
-
- btrfs_item_key_to_cpu(leaf, &key, slot);
- if (btrfs_key_type(&key) > BTRFS_EXTENT_DATA_KEY ||
- key.objectid != btrfs_ino(src))
- break;
-
- if (btrfs_key_type(&key) == BTRFS_EXTENT_DATA_KEY) {
- struct btrfs_file_extent_item *extent;
- int type;
- u32 size;
- struct btrfs_key new_key;
- u64 disko = 0, diskl = 0;
- u64 datao = 0, datal = 0;
- u8 comp;
- u64 endoff;
-
- size = btrfs_item_size_nr(leaf, slot);
- read_extent_buffer(leaf, buf,
- btrfs_item_ptr_offset(leaf, slot),
- size);
-
- extent = btrfs_item_ptr(leaf, slot,
- struct btrfs_file_extent_item);
- comp = btrfs_file_extent_compression(leaf, extent);
- type = btrfs_file_extent_type(leaf, extent);
- if (type == BTRFS_FILE_EXTENT_REG ||
- type == BTRFS_FILE_EXTENT_PREALLOC) {
- disko = btrfs_file_extent_disk_bytenr(leaf,
- extent);
- diskl = btrfs_file_extent_disk_num_bytes(leaf,
- extent);
- datao = btrfs_file_extent_offset(leaf, extent);
- datal = btrfs_file_extent_num_bytes(leaf,
- extent);
- } else if (type == BTRFS_FILE_EXTENT_INLINE) {
- /* take upper bound, may be compressed */
- datal = btrfs_file_extent_ram_bytes(leaf,
- extent);
- }
- btrfs_release_path(path);
-
- if (key.offset + datal <= off ||
- key.offset >= off+len)
- goto next;
-
- memcpy(&new_key, &key, sizeof(new_key));
- new_key.objectid = btrfs_ino(inode);
- if (off <= key.offset)
- new_key.offset = key.offset + destoff - off;
- else
- new_key.offset = destoff;
-
- /*
- * 1 - adjusting old extent (we may have to split it)
- * 1 - add new extent
- * 1 - inode update
- */
- trans = btrfs_start_transaction(root, 3);
- if (IS_ERR(trans)) {
- ret = PTR_ERR(trans);
- goto out;
- }
-
- if (type == BTRFS_FILE_EXTENT_REG ||
- type == BTRFS_FILE_EXTENT_PREALLOC) {
- /*
- * a | --- range to clone ---| b
- * | ------------- extent ------------- |
- */
-
- /* substract range b */
- if (key.offset + datal > off + len)
- datal = off + len - key.offset;
-
- /* substract range a */
- if (off > key.offset) {
- datao += off - key.offset;
- datal -= off - key.offset;
- }
-
- ret = btrfs_drop_extents(trans, inode,
- new_key.offset,
- new_key.offset + datal,
- &hint_byte, 1);
- if (ret) {
- btrfs_abort_transaction(trans, root,
- ret);
- btrfs_end_transaction(trans, root);
- goto out;
- }
-
- ret = btrfs_insert_empty_item(trans, root, path,
- &new_key, size);
- if (ret) {
- btrfs_abort_transaction(trans, root,
- ret);
- btrfs_end_transaction(trans, root);
- goto out;
- }
-
- leaf = path->nodes[0];
- slot = path->slots[0];
- write_extent_buffer(leaf, buf,
- btrfs_item_ptr_offset(leaf, slot),
- size);
-
- extent = btrfs_item_ptr(leaf, slot,
- struct btrfs_file_extent_item);
-
- /* disko == 0 means it's a hole */
- if (!disko)
- datao = 0;
-
- btrfs_set_file_extent_offset(leaf, extent,
- datao);
- btrfs_set_file_extent_num_bytes(leaf, extent,
- datal);
- if (disko) {
- inode_add_bytes(inode, datal);
- ret = btrfs_inc_extent_ref(trans, root,
- disko, diskl, 0,
- root->root_key.objectid,
- btrfs_ino(inode),
- new_key.offset - datao,
- 0);
- if (ret) {
- btrfs_abort_transaction(trans,
- root,
- ret);
- btrfs_end_transaction(trans,
- root);
- goto out;
-
- }
- }
- } else if (type == BTRFS_FILE_EXTENT_INLINE) {
- u64 skip = 0;
- u64 trim = 0;
- if (off > key.offset) {
- skip = off - key.offset;
- new_key.offset += skip;
- }
-
- if (key.offset + datal > off+len)
- trim = key.offset + datal - (off+len);
-
- if (comp && (skip || trim)) {
- ret = -EINVAL;
- btrfs_end_transaction(trans, root);
- goto out;
- }
- size -= skip + trim;
- datal -= skip + trim;
-
- ret = btrfs_drop_extents(trans, inode,
- new_key.offset,
- new_key.offset + datal,
- &hint_byte, 1);
- if (ret) {
- btrfs_abort_transaction(trans, root,
- ret);
- btrfs_end_transaction(trans, root);
- goto out;
- }
-
- ret = btrfs_insert_empty_item(trans, root, path,
- &new_key, size);
- if (ret) {
- btrfs_abort_transaction(trans, root,
- ret);
- btrfs_end_transaction(trans, root);
- goto out;
- }
-
- if (skip) {
- u32 start =
- btrfs_file_extent_calc_inline_size(0);
- memmove(buf+start, buf+start+skip,
- datal);
- }
-
- leaf = path->nodes[0];
- slot = path->slots[0];
- write_extent_buffer(leaf, buf,
- btrfs_item_ptr_offset(leaf, slot),
- size);
- inode_add_bytes(inode, datal);
- }
-
- btrfs_mark_buffer_dirty(leaf);
- btrfs_release_path(path);
-
- inode->i_mtime = inode->i_ctime = CURRENT_TIME;
-
- /*
- * we round up to the block size at eof when
- * determining which extents to clone above,
- * but shouldn't round up the file size
- */
- endoff = new_key.offset + datal;
- if (endoff > destoff+olen)
- endoff = destoff+olen;
- if (endoff > inode->i_size)
- btrfs_i_size_write(inode, endoff);
-
- ret = btrfs_update_inode(trans, root, inode);
- if (ret) {
- btrfs_abort_transaction(trans, root, ret);
- btrfs_end_transaction(trans, root);
- goto out;
- }
- ret = btrfs_end_transaction(trans, root);
- }
-next:
- btrfs_release_path(path);
- key.offset++;
- }
- ret = 0;
-out:
- btrfs_release_path(path);
- unlock_extent(&BTRFS_I(src)->io_tree, off, off+len);
-out_unlock:
- mutex_unlock(&src->i_mutex);
- mutex_unlock(&inode->i_mutex);
- vfree(buf);
- btrfs_free_path(path);
-out_fput:
- fput(src_file);
-out_drop_write:
- mnt_drop_write_file(file);
- return ret;
-}
-
-static long btrfs_ioctl_clone_range(struct file *file, void __user *argp)
-{
- struct btrfs_ioctl_clone_range_args args;
-
- if (copy_from_user(&args, argp, sizeof(args)))
- return -EFAULT;
- return btrfs_ioctl_clone(file, args.src_fd, args.src_offset,
- args.src_length, args.dest_offset);
-}
-
-/*
- * there are many ways the trans_start and trans_end ioctls can lead
- * to deadlocks. They should only be used by applications that
- * basically own the machine, and have a very in depth understanding
- * of all the possible deadlocks and enospc problems.
- */
-static long btrfs_ioctl_trans_start(struct file *file)
-{
- struct inode *inode = fdentry(file)->d_inode;
- struct btrfs_root *root = BTRFS_I(inode)->root;
- struct btrfs_trans_handle *trans;
- int ret;
-
- ret = -EPERM;
- if (!capable(CAP_SYS_ADMIN))
- goto out;
-
- ret = -EINPROGRESS;
- if (file->private_data)
- goto out;
-
- ret = -EROFS;
- if (btrfs_root_readonly(root))
- goto out;
-
- ret = mnt_want_write_file(file);
- if (ret)
- goto out;
-
- atomic_inc(&root->fs_info->open_ioctl_trans);
-
- ret = -ENOMEM;
- trans = btrfs_start_ioctl_transaction(root);
- if (IS_ERR(trans))
- goto out_drop;
-
- file->private_data = trans;
- return 0;
-
-out_drop:
- atomic_dec(&root->fs_info->open_ioctl_trans);
- mnt_drop_write_file(file);
-out:
- return ret;
-}
-
-static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp)
-{
- struct inode *inode = fdentry(file)->d_inode;
- struct btrfs_root *root = BTRFS_I(inode)->root;
- struct btrfs_root *new_root;
- struct btrfs_dir_item *di;
- struct btrfs_trans_handle *trans;
- struct btrfs_path *path;
- struct btrfs_key location;
- struct btrfs_disk_key disk_key;
- struct btrfs_super_block *disk_super;
- u64 features;
- u64 objectid = 0;
- u64 dir_id;
-
- if (!capable(CAP_SYS_ADMIN))
- return -EPERM;
-
- if (copy_from_user(&objectid, argp, sizeof(objectid)))
- return -EFAULT;
-
- if (!objectid)
- objectid = root->root_key.objectid;
-
- location.objectid = objectid;
- location.type = BTRFS_ROOT_ITEM_KEY;
- location.offset = (u64)-1;
-
- new_root = btrfs_read_fs_root_no_name(root->fs_info, &location);
- if (IS_ERR(new_root))
- return PTR_ERR(new_root);
-
- if (btrfs_root_refs(&new_root->root_item) == 0)
- return -ENOENT;
-
- path = btrfs_alloc_path();
- if (!path)
- return -ENOMEM;
- path->leave_spinning = 1;
-
- trans = btrfs_start_transaction(root, 1);
- if (IS_ERR(trans)) {
- btrfs_free_path(path);
- return PTR_ERR(trans);
- }
-
- dir_id = btrfs_super_root_dir(root->fs_info->super_copy);
- di = btrfs_lookup_dir_item(trans, root->fs_info->tree_root, path,
- dir_id, "default", 7, 1);
- if (IS_ERR_OR_NULL(di)) {
- btrfs_free_path(path);
- btrfs_end_transaction(trans, root);
- printk(KERN_ERR "Umm, you don't have the default dir item, "
- "this isn't going to work\n");
- return -ENOENT;
- }
-
- btrfs_cpu_key_to_disk(&disk_key, &new_root->root_key);
- btrfs_set_dir_item_key(path->nodes[0], di, &disk_key);
- btrfs_mark_buffer_dirty(path->nodes[0]);
- btrfs_free_path(path);
-
- disk_super = root->fs_info->super_copy;
- features = btrfs_super_incompat_flags(disk_super);
- if (!(features & BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL)) {
- features |= BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL;
- btrfs_set_super_incompat_flags(disk_super, features);
- }
- btrfs_end_transaction(trans, root);
-
- return 0;
-}
-
-static void get_block_group_info(struct list_head *groups_list,
- struct btrfs_ioctl_space_info *space)
-{
- struct btrfs_block_group_cache *block_group;
-
- space->total_bytes = 0;
- space->used_bytes = 0;
- space->flags = 0;
- list_for_each_entry(block_group, groups_list, list) {
- space->flags = block_group->flags;
- space->total_bytes += block_group->key.offset;
- space->used_bytes +=
- btrfs_block_group_used(&block_group->item);
- }
-}
-
-long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg)
-{
- struct btrfs_ioctl_space_args space_args;
- struct btrfs_ioctl_space_info space;
- struct btrfs_ioctl_space_info *dest;
- struct btrfs_ioctl_space_info *dest_orig;
- struct btrfs_ioctl_space_info __user *user_dest;
- struct btrfs_space_info *info;
- u64 types[] = {BTRFS_BLOCK_GROUP_DATA,
- BTRFS_BLOCK_GROUP_SYSTEM,
- BTRFS_BLOCK_GROUP_METADATA,
- BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_METADATA};
- int num_types = 4;
- int alloc_size;
- int ret = 0;
- u64 slot_count = 0;
- int i, c;
-
- if (copy_from_user(&space_args,
- (struct btrfs_ioctl_space_args __user *)arg,
- sizeof(space_args)))
- return -EFAULT;
-
- for (i = 0; i < num_types; i++) {
- struct btrfs_space_info *tmp;
-
- info = NULL;
- rcu_read_lock();
- list_for_each_entry_rcu(tmp, &root->fs_info->space_info,
- list) {
- if (tmp->flags == types[i]) {
- info = tmp;
- break;
- }
- }
- rcu_read_unlock();
-
- if (!info)
- continue;
-
- down_read(&info->groups_sem);
- for (c = 0; c < BTRFS_NR_RAID_TYPES; c++) {
- if (!list_empty(&info->block_groups[c]))
- slot_count++;
- }
- up_read(&info->groups_sem);
- }
-
- /* space_slots == 0 means they are asking for a count */
- if (space_args.space_slots == 0) {
- space_args.total_spaces = slot_count;
- goto out;
- }
-
- slot_count = min_t(u64, space_args.space_slots, slot_count);
-
- alloc_size = sizeof(*dest) * slot_count;
-
- /* we generally have at most 6 or so space infos, one for each raid
- * level. So, a whole page should be more than enough for everyone
- */
- if (alloc_size > PAGE_CACHE_SIZE)
- return -ENOMEM;
-
- space_args.total_spaces = 0;
- dest = kmalloc(alloc_size, GFP_NOFS);
- if (!dest)
- return -ENOMEM;
- dest_orig = dest;
-
- /* now we have a buffer to copy into */
- for (i = 0; i < num_types; i++) {
- struct btrfs_space_info *tmp;
-
- if (!slot_count)
- break;
-
- info = NULL;
- rcu_read_lock();
- list_for_each_entry_rcu(tmp, &root->fs_info->space_info,
- list) {
- if (tmp->flags == types[i]) {
- info = tmp;
- break;
- }
- }
- rcu_read_unlock();
-
- if (!info)
- continue;
- down_read(&info->groups_sem);
- for (c = 0; c < BTRFS_NR_RAID_TYPES; c++) {
- if (!list_empty(&info->block_groups[c])) {
- get_block_group_info(&info->block_groups[c],
- &space);
- memcpy(dest, &space, sizeof(space));
- dest++;
- space_args.total_spaces++;
- slot_count--;
- }
- if (!slot_count)
- break;
- }
- up_read(&info->groups_sem);
- }
-
- user_dest = (struct btrfs_ioctl_space_info *)
- (arg + sizeof(struct btrfs_ioctl_space_args));
-
- if (copy_to_user(user_dest, dest_orig, alloc_size))
- ret = -EFAULT;
-
- kfree(dest_orig);
-out:
- if (ret == 0 && copy_to_user(arg, &space_args, sizeof(space_args)))
- ret = -EFAULT;
-
- return ret;
-}
-
-/*
- * there are many ways the trans_start and trans_end ioctls can lead
- * to deadlocks. They should only be used by applications that
- * basically own the machine, and have a very in depth understanding
- * of all the possible deadlocks and enospc problems.
- */
-long btrfs_ioctl_trans_end(struct file *file)
-{
- struct inode *inode = fdentry(file)->d_inode;
- struct btrfs_root *root = BTRFS_I(inode)->root;
- struct btrfs_trans_handle *trans;
-
- trans = file->private_data;
- if (!trans)
- return -EINVAL;
- file->private_data = NULL;
-
- btrfs_end_transaction(trans, root);
-
- atomic_dec(&root->fs_info->open_ioctl_trans);
-
- mnt_drop_write_file(file);
- return 0;
-}
-
-static noinline long btrfs_ioctl_start_sync(struct file *file, void __user *argp)
-{
- struct btrfs_root *root = BTRFS_I(file->f_dentry->d_inode)->root;
- struct btrfs_trans_handle *trans;
- u64 transid;
- int ret;
-
- trans = btrfs_start_transaction(root, 0);
- if (IS_ERR(trans))
- return PTR_ERR(trans);
- transid = trans->transid;
- ret = btrfs_commit_transaction_async(trans, root, 0);
- if (ret) {
- btrfs_end_transaction(trans, root);
- return ret;
- }
-
- if (argp)
- if (copy_to_user(argp, &transid, sizeof(transid)))
- return -EFAULT;
- return 0;
-}
-
-static noinline long btrfs_ioctl_wait_sync(struct file *file, void __user *argp)
-{
- struct btrfs_root *root = BTRFS_I(file->f_dentry->d_inode)->root;
- u64 transid;
-
- if (argp) {
- if (copy_from_user(&transid, argp, sizeof(transid)))
- return -EFAULT;
- } else {
- transid = 0; /* current trans */
- }
- return btrfs_wait_for_commit(root, transid);
-}
-
-static long btrfs_ioctl_scrub(struct btrfs_root *root, void __user *arg)
-{
- int ret;
- struct btrfs_ioctl_scrub_args *sa;
-
- if (!capable(CAP_SYS_ADMIN))
- return -EPERM;
-
- sa = memdup_user(arg, sizeof(*sa));
- if (IS_ERR(sa))
- return PTR_ERR(sa);
-
- ret = btrfs_scrub_dev(root, sa->devid, sa->start, sa->end,
- &sa->progress, sa->flags & BTRFS_SCRUB_READONLY);
-
- if (copy_to_user(arg, sa, sizeof(*sa)))
- ret = -EFAULT;
-
- kfree(sa);
- return ret;
-}
-
-static long btrfs_ioctl_scrub_cancel(struct btrfs_root *root, void __user *arg)
-{
- if (!capable(CAP_SYS_ADMIN))
- return -EPERM;
-
- return btrfs_scrub_cancel(root);
-}
-
-static long btrfs_ioctl_scrub_progress(struct btrfs_root *root,
- void __user *arg)
-{
- struct btrfs_ioctl_scrub_args *sa;
- int ret;
-
- if (!capable(CAP_SYS_ADMIN))
- return -EPERM;
-
- sa = memdup_user(arg, sizeof(*sa));
- if (IS_ERR(sa))
- return PTR_ERR(sa);
-
- ret = btrfs_scrub_progress(root, sa->devid, &sa->progress);
-
- if (copy_to_user(arg, sa, sizeof(*sa)))
- ret = -EFAULT;
-
- kfree(sa);
- return ret;
-}
-
-static long btrfs_ioctl_ino_to_path(struct btrfs_root *root, void __user *arg)
-{
- int ret = 0;
- int i;
- u64 rel_ptr;
- int size;
- struct btrfs_ioctl_ino_path_args *ipa = NULL;
- struct inode_fs_paths *ipath = NULL;
- struct btrfs_path *path;
-
- if (!capable(CAP_SYS_ADMIN))
- return -EPERM;
-
- path = btrfs_alloc_path();
- if (!path) {
- ret = -ENOMEM;
- goto out;
- }
-
- ipa = memdup_user(arg, sizeof(*ipa));
- if (IS_ERR(ipa)) {
- ret = PTR_ERR(ipa);
- ipa = NULL;
- goto out;
- }
-
- size = min_t(u32, ipa->size, 4096);
- ipath = init_ipath(size, root, path);
- if (IS_ERR(ipath)) {
- ret = PTR_ERR(ipath);
- ipath = NULL;
- goto out;
- }
-
- ret = paths_from_inode(ipa->inum, ipath);
- if (ret < 0)
- goto out;
-
- for (i = 0; i < ipath->fspath->elem_cnt; ++i) {
- rel_ptr = ipath->fspath->val[i] -
- (u64)(unsigned long)ipath->fspath->val;
- ipath->fspath->val[i] = rel_ptr;
- }
-
- ret = copy_to_user((void *)(unsigned long)ipa->fspath,
- (void *)(unsigned long)ipath->fspath, size);
- if (ret) {
- ret = -EFAULT;
- goto out;
- }
-
-out:
- btrfs_free_path(path);
- free_ipath(ipath);
- kfree(ipa);
-
- return ret;
-}
-
-static int build_ino_list(u64 inum, u64 offset, u64 root, void *ctx)
-{
- struct btrfs_data_container *inodes = ctx;
- const size_t c = 3 * sizeof(u64);
-
- if (inodes->bytes_left >= c) {
- inodes->bytes_left -= c;
- inodes->val[inodes->elem_cnt] = inum;
- inodes->val[inodes->elem_cnt + 1] = offset;
- inodes->val[inodes->elem_cnt + 2] = root;
- inodes->elem_cnt += 3;
- } else {
- inodes->bytes_missing += c - inodes->bytes_left;
- inodes->bytes_left = 0;
- inodes->elem_missed += 3;
- }
-
- return 0;
-}
-
-static long btrfs_ioctl_logical_to_ino(struct btrfs_root *root,
- void __user *arg)
-{
- int ret = 0;
- int size;
- u64 extent_item_pos;
- struct btrfs_ioctl_logical_ino_args *loi;
- struct btrfs_data_container *inodes = NULL;
- struct btrfs_path *path = NULL;
- struct btrfs_key key;
-
- if (!capable(CAP_SYS_ADMIN))
- return -EPERM;
-
- loi = memdup_user(arg, sizeof(*loi));
- if (IS_ERR(loi)) {
- ret = PTR_ERR(loi);
- loi = NULL;
- goto out;
- }
-
- path = btrfs_alloc_path();
- if (!path) {
- ret = -ENOMEM;
- goto out;
- }
-
- size = min_t(u32, loi->size, 4096);
- inodes = init_data_container(size);
- if (IS_ERR(inodes)) {
- ret = PTR_ERR(inodes);
- inodes = NULL;
- goto out;
- }
-
- ret = extent_from_logical(root->fs_info, loi->logical, path, &key);
- btrfs_release_path(path);
-
- if (ret & BTRFS_EXTENT_FLAG_TREE_BLOCK)
- ret = -ENOENT;
- if (ret < 0)
- goto out;
-
- extent_item_pos = loi->logical - key.objectid;
- ret = iterate_extent_inodes(root->fs_info, key.objectid,
- extent_item_pos, 0, build_ino_list,
- inodes);
-
- if (ret < 0)
- goto out;
-
- ret = copy_to_user((void *)(unsigned long)loi->inodes,
- (void *)(unsigned long)inodes, size);
- if (ret)
- ret = -EFAULT;
-
-out:
- btrfs_free_path(path);
- kfree(inodes);
- kfree(loi);
-
- return ret;
-}
-
-void update_ioctl_balance_args(struct btrfs_fs_info *fs_info, int lock,
- struct btrfs_ioctl_balance_args *bargs)
-{
- struct btrfs_balance_control *bctl = fs_info->balance_ctl;
-
- bargs->flags = bctl->flags;
-
- if (atomic_read(&fs_info->balance_running))
- bargs->state |= BTRFS_BALANCE_STATE_RUNNING;
- if (atomic_read(&fs_info->balance_pause_req))
- bargs->state |= BTRFS_BALANCE_STATE_PAUSE_REQ;
- if (atomic_read(&fs_info->balance_cancel_req))
- bargs->state |= BTRFS_BALANCE_STATE_CANCEL_REQ;
-
- memcpy(&bargs->data, &bctl->data, sizeof(bargs->data));
- memcpy(&bargs->meta, &bctl->meta, sizeof(bargs->meta));
- memcpy(&bargs->sys, &bctl->sys, sizeof(bargs->sys));
-
- if (lock) {
- spin_lock(&fs_info->balance_lock);
- memcpy(&bargs->stat, &bctl->stat, sizeof(bargs->stat));
- spin_unlock(&fs_info->balance_lock);
- } else {
- memcpy(&bargs->stat, &bctl->stat, sizeof(bargs->stat));
- }
-}
-
-static long btrfs_ioctl_balance(struct btrfs_root *root, void __user *arg)
-{
- struct btrfs_fs_info *fs_info = root->fs_info;
- struct btrfs_ioctl_balance_args *bargs;
- struct btrfs_balance_control *bctl;
- int ret;
-
- if (!capable(CAP_SYS_ADMIN))
- return -EPERM;
-
- if (fs_info->sb->s_flags & MS_RDONLY)
- return -EROFS;
-
- mutex_lock(&fs_info->volume_mutex);
- mutex_lock(&fs_info->balance_mutex);
-
- if (arg) {
- bargs = memdup_user(arg, sizeof(*bargs));
- if (IS_ERR(bargs)) {
- ret = PTR_ERR(bargs);
- goto out;
- }
-
- if (bargs->flags & BTRFS_BALANCE_RESUME) {
- if (!fs_info->balance_ctl) {
- ret = -ENOTCONN;
- goto out_bargs;
- }
-
- bctl = fs_info->balance_ctl;
- spin_lock(&fs_info->balance_lock);
- bctl->flags |= BTRFS_BALANCE_RESUME;
- spin_unlock(&fs_info->balance_lock);
-
- goto do_balance;
- }
- } else {
- bargs = NULL;
- }
-
- if (fs_info->balance_ctl) {
- ret = -EINPROGRESS;
- goto out_bargs;
- }
-
- bctl = kzalloc(sizeof(*bctl), GFP_NOFS);
- if (!bctl) {
- ret = -ENOMEM;
- goto out_bargs;
- }
-
- bctl->fs_info = fs_info;
- if (arg) {
- memcpy(&bctl->data, &bargs->data, sizeof(bctl->data));
- memcpy(&bctl->meta, &bargs->meta, sizeof(bctl->meta));
- memcpy(&bctl->sys, &bargs->sys, sizeof(bctl->sys));
-
- bctl->flags = bargs->flags;
- } else {
- /* balance everything - no filters */
- bctl->flags |= BTRFS_BALANCE_TYPE_MASK;
- }
-
-do_balance:
- ret = btrfs_balance(bctl, bargs);
- /*
- * bctl is freed in __cancel_balance or in free_fs_info if
- * restriper was paused all the way until unmount
- */
- if (arg) {
- if (copy_to_user(arg, bargs, sizeof(*bargs)))
- ret = -EFAULT;
- }
-
-out_bargs:
- kfree(bargs);
-out:
- mutex_unlock(&fs_info->balance_mutex);
- mutex_unlock(&fs_info->volume_mutex);
- return ret;
-}
-
-static long btrfs_ioctl_balance_ctl(struct btrfs_root *root, int cmd)
-{
- if (!capable(CAP_SYS_ADMIN))
- return -EPERM;
-
- switch (cmd) {
- case BTRFS_BALANCE_CTL_PAUSE:
- return btrfs_pause_balance(root->fs_info);
- case BTRFS_BALANCE_CTL_CANCEL:
- return btrfs_cancel_balance(root->fs_info);
- }
-
- return -EINVAL;
-}
-
-static long btrfs_ioctl_balance_progress(struct btrfs_root *root,
- void __user *arg)
-{
- struct btrfs_fs_info *fs_info = root->fs_info;
- struct btrfs_ioctl_balance_args *bargs;
- int ret = 0;
-
- if (!capable(CAP_SYS_ADMIN))
- return -EPERM;
-
- mutex_lock(&fs_info->balance_mutex);
- if (!fs_info->balance_ctl) {
- ret = -ENOTCONN;
- goto out;
- }
-
- bargs = kzalloc(sizeof(*bargs), GFP_NOFS);
- if (!bargs) {
- ret = -ENOMEM;
- goto out;
- }
-
- update_ioctl_balance_args(fs_info, 1, bargs);
-
- if (copy_to_user(arg, bargs, sizeof(*bargs)))
- ret = -EFAULT;
-
- kfree(bargs);
-out:
- mutex_unlock(&fs_info->balance_mutex);
- return ret;
-}
-
-long btrfs_ioctl(struct file *file, unsigned int
- cmd, unsigned long arg)
-{
- struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root;
- void __user *argp = (void __user *)arg;
-
- switch (cmd) {
- case FS_IOC_GETFLAGS:
- return btrfs_ioctl_getflags(file, argp);
- case FS_IOC_SETFLAGS:
- return btrfs_ioctl_setflags(file, argp);
- case FS_IOC_GETVERSION:
- return btrfs_ioctl_getversion(file, argp);
- case FITRIM:
- return btrfs_ioctl_fitrim(file, argp);
- case BTRFS_IOC_SNAP_CREATE:
- return btrfs_ioctl_snap_create(file, argp, 0);
- case BTRFS_IOC_SNAP_CREATE_V2:
- return btrfs_ioctl_snap_create_v2(file, argp, 0);
- case BTRFS_IOC_SUBVOL_CREATE:
- return btrfs_ioctl_snap_create(file, argp, 1);
- case BTRFS_IOC_SNAP_DESTROY:
- return btrfs_ioctl_snap_destroy(file, argp);
- case BTRFS_IOC_SUBVOL_GETFLAGS:
- return btrfs_ioctl_subvol_getflags(file, argp);
- case BTRFS_IOC_SUBVOL_SETFLAGS:
- return btrfs_ioctl_subvol_setflags(file, argp);
- case BTRFS_IOC_DEFAULT_SUBVOL:
- return btrfs_ioctl_default_subvol(file, argp);
- case BTRFS_IOC_DEFRAG:
- return btrfs_ioctl_defrag(file, NULL);
- case BTRFS_IOC_DEFRAG_RANGE:
- return btrfs_ioctl_defrag(file, argp);
- case BTRFS_IOC_RESIZE:
- return btrfs_ioctl_resize(root, argp);
- case BTRFS_IOC_ADD_DEV:
- return btrfs_ioctl_add_dev(root, argp);
- case BTRFS_IOC_RM_DEV:
- return btrfs_ioctl_rm_dev(root, argp);
- case BTRFS_IOC_FS_INFO:
- return btrfs_ioctl_fs_info(root, argp);
- case BTRFS_IOC_DEV_INFO:
- return btrfs_ioctl_dev_info(root, argp);
- case BTRFS_IOC_BALANCE:
- return btrfs_ioctl_balance(root, NULL);
- case BTRFS_IOC_CLONE:
- return btrfs_ioctl_clone(file, arg, 0, 0, 0);
- case BTRFS_IOC_CLONE_RANGE:
- return btrfs_ioctl_clone_range(file, argp);
- case BTRFS_IOC_TRANS_START:
- return btrfs_ioctl_trans_start(file);
- case BTRFS_IOC_TRANS_END:
- return btrfs_ioctl_trans_end(file);
- case BTRFS_IOC_TREE_SEARCH:
- return btrfs_ioctl_tree_search(file, argp);
- case BTRFS_IOC_INO_LOOKUP:
- return btrfs_ioctl_ino_lookup(file, argp);
- case BTRFS_IOC_INO_PATHS:
- return btrfs_ioctl_ino_to_path(root, argp);
- case BTRFS_IOC_LOGICAL_INO:
- return btrfs_ioctl_logical_to_ino(root, argp);
- case BTRFS_IOC_SPACE_INFO:
- return btrfs_ioctl_space_info(root, argp);
- case BTRFS_IOC_SYNC:
- btrfs_sync_fs(file->f_dentry->d_sb, 1);
- return 0;
- case BTRFS_IOC_START_SYNC:
- return btrfs_ioctl_start_sync(file, argp);
- case BTRFS_IOC_WAIT_SYNC:
- return btrfs_ioctl_wait_sync(file, argp);
- case BTRFS_IOC_SCRUB:
- return btrfs_ioctl_scrub(root, argp);
- case BTRFS_IOC_SCRUB_CANCEL:
- return btrfs_ioctl_scrub_cancel(root, argp);
- case BTRFS_IOC_SCRUB_PROGRESS:
- return btrfs_ioctl_scrub_progress(root, argp);
- case BTRFS_IOC_BALANCE_V2:
- return btrfs_ioctl_balance(root, argp);
- case BTRFS_IOC_BALANCE_CTL:
- return btrfs_ioctl_balance_ctl(root, arg);
- case BTRFS_IOC_BALANCE_PROGRESS:
- return btrfs_ioctl_balance_progress(root, argp);
- }
-
- return -ENOTTY;
-}
diff --git a/ANDROID_3.4.5/fs/btrfs/ioctl.h b/ANDROID_3.4.5/fs/btrfs/ioctl.h
deleted file mode 100644
index 086e6bda..00000000
--- a/ANDROID_3.4.5/fs/btrfs/ioctl.h
+++ /dev/null
@@ -1,334 +0,0 @@
-/*
- * Copyright (C) 2007 Oracle. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License v2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
- */
-
-#ifndef __IOCTL_
-#define __IOCTL_
-#include <linux/ioctl.h>
-
-#define BTRFS_IOCTL_MAGIC 0x94
-#define BTRFS_VOL_NAME_MAX 255
-
-/* this should be 4k */
-#define BTRFS_PATH_NAME_MAX 4087
-struct btrfs_ioctl_vol_args {
- __s64 fd;
- char name[BTRFS_PATH_NAME_MAX + 1];
-};
-
-#define BTRFS_SUBVOL_CREATE_ASYNC (1ULL << 0)
-#define BTRFS_SUBVOL_RDONLY (1ULL << 1)
-#define BTRFS_FSID_SIZE 16
-#define BTRFS_UUID_SIZE 16
-
-#define BTRFS_SUBVOL_NAME_MAX 4039
-struct btrfs_ioctl_vol_args_v2 {
- __s64 fd;
- __u64 transid;
- __u64 flags;
- __u64 unused[4];
- char name[BTRFS_SUBVOL_NAME_MAX + 1];
-};
-
-/*
- * structure to report errors and progress to userspace, either as a
- * result of a finished scrub, a canceled scrub or a progress inquiry
- */
-struct btrfs_scrub_progress {
- __u64 data_extents_scrubbed; /* # of data extents scrubbed */
- __u64 tree_extents_scrubbed; /* # of tree extents scrubbed */
- __u64 data_bytes_scrubbed; /* # of data bytes scrubbed */
- __u64 tree_bytes_scrubbed; /* # of tree bytes scrubbed */
- __u64 read_errors; /* # of read errors encountered (EIO) */
- __u64 csum_errors; /* # of failed csum checks */
- __u64 verify_errors; /* # of occurences, where the metadata
- * of a tree block did not match the
- * expected values, like generation or
- * logical */
- __u64 no_csum; /* # of 4k data block for which no csum
- * is present, probably the result of
- * data written with nodatasum */
- __u64 csum_discards; /* # of csum for which no data was found
- * in the extent tree. */
- __u64 super_errors; /* # of bad super blocks encountered */
- __u64 malloc_errors; /* # of internal kmalloc errors. These
- * will likely cause an incomplete
- * scrub */
- __u64 uncorrectable_errors; /* # of errors where either no intact
- * copy was found or the writeback
- * failed */
- __u64 corrected_errors; /* # of errors corrected */
- __u64 last_physical; /* last physical address scrubbed. In
- * case a scrub was aborted, this can
- * be used to restart the scrub */
- __u64 unverified_errors; /* # of occurences where a read for a
- * full (64k) bio failed, but the re-
- * check succeeded for each 4k piece.
- * Intermittent error. */
-};
-
-#define BTRFS_SCRUB_READONLY 1
-struct btrfs_ioctl_scrub_args {
- __u64 devid; /* in */
- __u64 start; /* in */
- __u64 end; /* in */
- __u64 flags; /* in */
- struct btrfs_scrub_progress progress; /* out */
- /* pad to 1k */
- __u64 unused[(1024-32-sizeof(struct btrfs_scrub_progress))/8];
-};
-
-#define BTRFS_DEVICE_PATH_NAME_MAX 1024
-struct btrfs_ioctl_dev_info_args {
- __u64 devid; /* in/out */
- __u8 uuid[BTRFS_UUID_SIZE]; /* in/out */
- __u64 bytes_used; /* out */
- __u64 total_bytes; /* out */
- __u64 unused[379]; /* pad to 4k */
- __u8 path[BTRFS_DEVICE_PATH_NAME_MAX]; /* out */
-};
-
-struct btrfs_ioctl_fs_info_args {
- __u64 max_id; /* out */
- __u64 num_devices; /* out */
- __u8 fsid[BTRFS_FSID_SIZE]; /* out */
- __u64 reserved[124]; /* pad to 1k */
-};
-
-/* balance control ioctl modes */
-#define BTRFS_BALANCE_CTL_PAUSE 1
-#define BTRFS_BALANCE_CTL_CANCEL 2
-
-/*
- * this is packed, because it should be exactly the same as its disk
- * byte order counterpart (struct btrfs_disk_balance_args)
- */
-struct btrfs_balance_args {
- __u64 profiles;
- __u64 usage;
- __u64 devid;
- __u64 pstart;
- __u64 pend;
- __u64 vstart;
- __u64 vend;
-
- __u64 target;
-
- __u64 flags;
-
- __u64 unused[8];
-} __attribute__ ((__packed__));
-
-/* report balance progress to userspace */
-struct btrfs_balance_progress {
- __u64 expected; /* estimated # of chunks that will be
- * relocated to fulfill the request */
- __u64 considered; /* # of chunks we have considered so far */
- __u64 completed; /* # of chunks relocated so far */
-};
-
-#define BTRFS_BALANCE_STATE_RUNNING (1ULL << 0)
-#define BTRFS_BALANCE_STATE_PAUSE_REQ (1ULL << 1)
-#define BTRFS_BALANCE_STATE_CANCEL_REQ (1ULL << 2)
-
-struct btrfs_ioctl_balance_args {
- __u64 flags; /* in/out */
- __u64 state; /* out */
-
- struct btrfs_balance_args data; /* in/out */
- struct btrfs_balance_args meta; /* in/out */
- struct btrfs_balance_args sys; /* in/out */
-
- struct btrfs_balance_progress stat; /* out */
-
- __u64 unused[72]; /* pad to 1k */
-};
-
-#define BTRFS_INO_LOOKUP_PATH_MAX 4080
-struct btrfs_ioctl_ino_lookup_args {
- __u64 treeid;
- __u64 objectid;
- char name[BTRFS_INO_LOOKUP_PATH_MAX];
-};
-
-struct btrfs_ioctl_search_key {
- /* which root are we searching. 0 is the tree of tree roots */
- __u64 tree_id;
-
- /* keys returned will be >= min and <= max */
- __u64 min_objectid;
- __u64 max_objectid;
-
- /* keys returned will be >= min and <= max */
- __u64 min_offset;
- __u64 max_offset;
-
- /* max and min transids to search for */
- __u64 min_transid;
- __u64 max_transid;
-
- /* keys returned will be >= min and <= max */
- __u32 min_type;
- __u32 max_type;
-
- /*
- * how many items did userland ask for, and how many are we
- * returning
- */
- __u32 nr_items;
-
- /* align to 64 bits */
- __u32 unused;
-
- /* some extra for later */
- __u64 unused1;
- __u64 unused2;
- __u64 unused3;
- __u64 unused4;
-};
-
-struct btrfs_ioctl_search_header {
- __u64 transid;
- __u64 objectid;
- __u64 offset;
- __u32 type;
- __u32 len;
-};
-
-#define BTRFS_SEARCH_ARGS_BUFSIZE (4096 - sizeof(struct btrfs_ioctl_search_key))
-/*
- * the buf is an array of search headers where
- * each header is followed by the actual item
- * the type field is expanded to 32 bits for alignment
- */
-struct btrfs_ioctl_search_args {
- struct btrfs_ioctl_search_key key;
- char buf[BTRFS_SEARCH_ARGS_BUFSIZE];
-};
-
-struct btrfs_ioctl_clone_range_args {
- __s64 src_fd;
- __u64 src_offset, src_length;
- __u64 dest_offset;
-};
-
-/* flags for the defrag range ioctl */
-#define BTRFS_DEFRAG_RANGE_COMPRESS 1
-#define BTRFS_DEFRAG_RANGE_START_IO 2
-
-struct btrfs_ioctl_space_info {
- __u64 flags;
- __u64 total_bytes;
- __u64 used_bytes;
-};
-
-struct btrfs_ioctl_space_args {
- __u64 space_slots;
- __u64 total_spaces;
- struct btrfs_ioctl_space_info spaces[0];
-};
-
-struct btrfs_data_container {
- __u32 bytes_left; /* out -- bytes not needed to deliver output */
- __u32 bytes_missing; /* out -- additional bytes needed for result */
- __u32 elem_cnt; /* out */
- __u32 elem_missed; /* out */
- __u64 val[0]; /* out */
-};
-
-struct btrfs_ioctl_ino_path_args {
- __u64 inum; /* in */
- __u64 size; /* in */
- __u64 reserved[4];
- /* struct btrfs_data_container *fspath; out */
- __u64 fspath; /* out */
-};
-
-struct btrfs_ioctl_logical_ino_args {
- __u64 logical; /* in */
- __u64 size; /* in */
- __u64 reserved[4];
- /* struct btrfs_data_container *inodes; out */
- __u64 inodes;
-};
-
-#define BTRFS_IOC_SNAP_CREATE _IOW(BTRFS_IOCTL_MAGIC, 1, \
- struct btrfs_ioctl_vol_args)
-#define BTRFS_IOC_DEFRAG _IOW(BTRFS_IOCTL_MAGIC, 2, \
- struct btrfs_ioctl_vol_args)
-#define BTRFS_IOC_RESIZE _IOW(BTRFS_IOCTL_MAGIC, 3, \
- struct btrfs_ioctl_vol_args)
-#define BTRFS_IOC_SCAN_DEV _IOW(BTRFS_IOCTL_MAGIC, 4, \
- struct btrfs_ioctl_vol_args)
-/* trans start and trans end are dangerous, and only for
- * use by applications that know how to avoid the
- * resulting deadlocks
- */
-#define BTRFS_IOC_TRANS_START _IO(BTRFS_IOCTL_MAGIC, 6)
-#define BTRFS_IOC_TRANS_END _IO(BTRFS_IOCTL_MAGIC, 7)
-#define BTRFS_IOC_SYNC _IO(BTRFS_IOCTL_MAGIC, 8)
-
-#define BTRFS_IOC_CLONE _IOW(BTRFS_IOCTL_MAGIC, 9, int)
-#define BTRFS_IOC_ADD_DEV _IOW(BTRFS_IOCTL_MAGIC, 10, \
- struct btrfs_ioctl_vol_args)
-#define BTRFS_IOC_RM_DEV _IOW(BTRFS_IOCTL_MAGIC, 11, \
- struct btrfs_ioctl_vol_args)
-#define BTRFS_IOC_BALANCE _IOW(BTRFS_IOCTL_MAGIC, 12, \
- struct btrfs_ioctl_vol_args)
-
-#define BTRFS_IOC_CLONE_RANGE _IOW(BTRFS_IOCTL_MAGIC, 13, \
- struct btrfs_ioctl_clone_range_args)
-
-#define BTRFS_IOC_SUBVOL_CREATE _IOW(BTRFS_IOCTL_MAGIC, 14, \
- struct btrfs_ioctl_vol_args)
-#define BTRFS_IOC_SNAP_DESTROY _IOW(BTRFS_IOCTL_MAGIC, 15, \
- struct btrfs_ioctl_vol_args)
-#define BTRFS_IOC_DEFRAG_RANGE _IOW(BTRFS_IOCTL_MAGIC, 16, \
- struct btrfs_ioctl_defrag_range_args)
-#define BTRFS_IOC_TREE_SEARCH _IOWR(BTRFS_IOCTL_MAGIC, 17, \
- struct btrfs_ioctl_search_args)
-#define BTRFS_IOC_INO_LOOKUP _IOWR(BTRFS_IOCTL_MAGIC, 18, \
- struct btrfs_ioctl_ino_lookup_args)
-#define BTRFS_IOC_DEFAULT_SUBVOL _IOW(BTRFS_IOCTL_MAGIC, 19, u64)
-#define BTRFS_IOC_SPACE_INFO _IOWR(BTRFS_IOCTL_MAGIC, 20, \
- struct btrfs_ioctl_space_args)
-#define BTRFS_IOC_START_SYNC _IOR(BTRFS_IOCTL_MAGIC, 24, __u64)
-#define BTRFS_IOC_WAIT_SYNC _IOW(BTRFS_IOCTL_MAGIC, 22, __u64)
-#define BTRFS_IOC_SNAP_CREATE_V2 _IOW(BTRFS_IOCTL_MAGIC, 23, \
- struct btrfs_ioctl_vol_args_v2)
-#define BTRFS_IOC_SUBVOL_GETFLAGS _IOW(BTRFS_IOCTL_MAGIC, 25, __u64)
-#define BTRFS_IOC_SUBVOL_SETFLAGS _IOW(BTRFS_IOCTL_MAGIC, 26, __u64)
-#define BTRFS_IOC_SCRUB _IOWR(BTRFS_IOCTL_MAGIC, 27, \
- struct btrfs_ioctl_scrub_args)
-#define BTRFS_IOC_SCRUB_CANCEL _IO(BTRFS_IOCTL_MAGIC, 28)
-#define BTRFS_IOC_SCRUB_PROGRESS _IOWR(BTRFS_IOCTL_MAGIC, 29, \
- struct btrfs_ioctl_scrub_args)
-#define BTRFS_IOC_DEV_INFO _IOWR(BTRFS_IOCTL_MAGIC, 30, \
- struct btrfs_ioctl_dev_info_args)
-#define BTRFS_IOC_FS_INFO _IOR(BTRFS_IOCTL_MAGIC, 31, \
- struct btrfs_ioctl_fs_info_args)
-#define BTRFS_IOC_BALANCE_V2 _IOWR(BTRFS_IOCTL_MAGIC, 32, \
- struct btrfs_ioctl_balance_args)
-#define BTRFS_IOC_BALANCE_CTL _IOW(BTRFS_IOCTL_MAGIC, 33, int)
-#define BTRFS_IOC_BALANCE_PROGRESS _IOR(BTRFS_IOCTL_MAGIC, 34, \
- struct btrfs_ioctl_balance_args)
-#define BTRFS_IOC_INO_PATHS _IOWR(BTRFS_IOCTL_MAGIC, 35, \
- struct btrfs_ioctl_ino_path_args)
-#define BTRFS_IOC_LOGICAL_INO _IOWR(BTRFS_IOCTL_MAGIC, 36, \
- struct btrfs_ioctl_ino_path_args)
-
-#endif
diff --git a/ANDROID_3.4.5/fs/btrfs/locking.c b/ANDROID_3.4.5/fs/btrfs/locking.c
deleted file mode 100644
index 272f9112..00000000
--- a/ANDROID_3.4.5/fs/btrfs/locking.c
+++ /dev/null
@@ -1,267 +0,0 @@
-/*
- * Copyright (C) 2008 Oracle. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License v2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
- */
-#include <linux/sched.h>
-#include <linux/pagemap.h>
-#include <linux/spinlock.h>
-#include <linux/page-flags.h>
-#include <asm/bug.h>
-#include "ctree.h"
-#include "extent_io.h"
-#include "locking.h"
-
-void btrfs_assert_tree_read_locked(struct extent_buffer *eb);
-
-/*
- * if we currently have a spinning reader or writer lock
- * (indicated by the rw flag) this will bump the count
- * of blocking holders and drop the spinlock.
- */
-void btrfs_set_lock_blocking_rw(struct extent_buffer *eb, int rw)
-{
- if (eb->lock_nested) {
- read_lock(&eb->lock);
- if (eb->lock_nested && current->pid == eb->lock_owner) {
- read_unlock(&eb->lock);
- return;
- }
- read_unlock(&eb->lock);
- }
- if (rw == BTRFS_WRITE_LOCK) {
- if (atomic_read(&eb->blocking_writers) == 0) {
- WARN_ON(atomic_read(&eb->spinning_writers) != 1);
- atomic_dec(&eb->spinning_writers);
- btrfs_assert_tree_locked(eb);
- atomic_inc(&eb->blocking_writers);
- write_unlock(&eb->lock);
- }
- } else if (rw == BTRFS_READ_LOCK) {
- btrfs_assert_tree_read_locked(eb);
- atomic_inc(&eb->blocking_readers);
- WARN_ON(atomic_read(&eb->spinning_readers) == 0);
- atomic_dec(&eb->spinning_readers);
- read_unlock(&eb->lock);
- }
- return;
-}
-
-/*
- * if we currently have a blocking lock, take the spinlock
- * and drop our blocking count
- */
-void btrfs_clear_lock_blocking_rw(struct extent_buffer *eb, int rw)
-{
- if (eb->lock_nested) {
- read_lock(&eb->lock);
- if (&eb->lock_nested && current->pid == eb->lock_owner) {
- read_unlock(&eb->lock);
- return;
- }
- read_unlock(&eb->lock);
- }
- if (rw == BTRFS_WRITE_LOCK_BLOCKING) {
- BUG_ON(atomic_read(&eb->blocking_writers) != 1);
- write_lock(&eb->lock);
- WARN_ON(atomic_read(&eb->spinning_writers));
- atomic_inc(&eb->spinning_writers);
- if (atomic_dec_and_test(&eb->blocking_writers))
- wake_up(&eb->write_lock_wq);
- } else if (rw == BTRFS_READ_LOCK_BLOCKING) {
- BUG_ON(atomic_read(&eb->blocking_readers) == 0);
- read_lock(&eb->lock);
- atomic_inc(&eb->spinning_readers);
- if (atomic_dec_and_test(&eb->blocking_readers))
- wake_up(&eb->read_lock_wq);
- }
- return;
-}
-
-/*
- * take a spinning read lock. This will wait for any blocking
- * writers
- */
-void btrfs_tree_read_lock(struct extent_buffer *eb)
-{
-again:
- read_lock(&eb->lock);
- if (atomic_read(&eb->blocking_writers) &&
- current->pid == eb->lock_owner) {
- /*
- * This extent is already write-locked by our thread. We allow
- * an additional read lock to be added because it's for the same
- * thread. btrfs_find_all_roots() depends on this as it may be
- * called on a partly (write-)locked tree.
- */
- BUG_ON(eb->lock_nested);
- eb->lock_nested = 1;
- read_unlock(&eb->lock);
- return;
- }
- read_unlock(&eb->lock);
- wait_event(eb->write_lock_wq, atomic_read(&eb->blocking_writers) == 0);
- read_lock(&eb->lock);
- if (atomic_read(&eb->blocking_writers)) {
- read_unlock(&eb->lock);
- goto again;
- }
- atomic_inc(&eb->read_locks);
- atomic_inc(&eb->spinning_readers);
-}
-
-/*
- * returns 1 if we get the read lock and 0 if we don't
- * this won't wait for blocking writers
- */
-int btrfs_try_tree_read_lock(struct extent_buffer *eb)
-{
- if (atomic_read(&eb->blocking_writers))
- return 0;
-
- read_lock(&eb->lock);
- if (atomic_read(&eb->blocking_writers)) {
- read_unlock(&eb->lock);
- return 0;
- }
- atomic_inc(&eb->read_locks);
- atomic_inc(&eb->spinning_readers);
- return 1;
-}
-
-/*
- * returns 1 if we get the read lock and 0 if we don't
- * this won't wait for blocking writers or readers
- */
-int btrfs_try_tree_write_lock(struct extent_buffer *eb)
-{
- if (atomic_read(&eb->blocking_writers) ||
- atomic_read(&eb->blocking_readers))
- return 0;
- write_lock(&eb->lock);
- if (atomic_read(&eb->blocking_writers) ||
- atomic_read(&eb->blocking_readers)) {
- write_unlock(&eb->lock);
- return 0;
- }
- atomic_inc(&eb->write_locks);
- atomic_inc(&eb->spinning_writers);
- eb->lock_owner = current->pid;
- return 1;
-}
-
-/*
- * drop a spinning read lock
- */
-void btrfs_tree_read_unlock(struct extent_buffer *eb)
-{
- if (eb->lock_nested) {
- read_lock(&eb->lock);
- if (eb->lock_nested && current->pid == eb->lock_owner) {
- eb->lock_nested = 0;
- read_unlock(&eb->lock);
- return;
- }
- read_unlock(&eb->lock);
- }
- btrfs_assert_tree_read_locked(eb);
- WARN_ON(atomic_read(&eb->spinning_readers) == 0);
- atomic_dec(&eb->spinning_readers);
- atomic_dec(&eb->read_locks);
- read_unlock(&eb->lock);
-}
-
-/*
- * drop a blocking read lock
- */
-void btrfs_tree_read_unlock_blocking(struct extent_buffer *eb)
-{
- if (eb->lock_nested) {
- read_lock(&eb->lock);
- if (eb->lock_nested && current->pid == eb->lock_owner) {
- eb->lock_nested = 0;
- read_unlock(&eb->lock);
- return;
- }
- read_unlock(&eb->lock);
- }
- btrfs_assert_tree_read_locked(eb);
- WARN_ON(atomic_read(&eb->blocking_readers) == 0);
- if (atomic_dec_and_test(&eb->blocking_readers))
- wake_up(&eb->read_lock_wq);
- atomic_dec(&eb->read_locks);
-}
-
-/*
- * take a spinning write lock. This will wait for both
- * blocking readers or writers
- */
-void btrfs_tree_lock(struct extent_buffer *eb)
-{
-again:
- wait_event(eb->read_lock_wq, atomic_read(&eb->blocking_readers) == 0);
- wait_event(eb->write_lock_wq, atomic_read(&eb->blocking_writers) == 0);
- write_lock(&eb->lock);
- if (atomic_read(&eb->blocking_readers)) {
- write_unlock(&eb->lock);
- wait_event(eb->read_lock_wq,
- atomic_read(&eb->blocking_readers) == 0);
- goto again;
- }
- if (atomic_read(&eb->blocking_writers)) {
- write_unlock(&eb->lock);
- wait_event(eb->write_lock_wq,
- atomic_read(&eb->blocking_writers) == 0);
- goto again;
- }
- WARN_ON(atomic_read(&eb->spinning_writers));
- atomic_inc(&eb->spinning_writers);
- atomic_inc(&eb->write_locks);
- eb->lock_owner = current->pid;
-}
-
-/*
- * drop a spinning or a blocking write lock.
- */
-void btrfs_tree_unlock(struct extent_buffer *eb)
-{
- int blockers = atomic_read(&eb->blocking_writers);
-
- BUG_ON(blockers > 1);
-
- btrfs_assert_tree_locked(eb);
- atomic_dec(&eb->write_locks);
-
- if (blockers) {
- WARN_ON(atomic_read(&eb->spinning_writers));
- atomic_dec(&eb->blocking_writers);
- smp_wmb();
- wake_up(&eb->write_lock_wq);
- } else {
- WARN_ON(atomic_read(&eb->spinning_writers) != 1);
- atomic_dec(&eb->spinning_writers);
- write_unlock(&eb->lock);
- }
-}
-
-void btrfs_assert_tree_locked(struct extent_buffer *eb)
-{
- BUG_ON(!atomic_read(&eb->write_locks));
-}
-
-void btrfs_assert_tree_read_locked(struct extent_buffer *eb)
-{
- BUG_ON(!atomic_read(&eb->read_locks));
-}
diff --git a/ANDROID_3.4.5/fs/btrfs/locking.h b/ANDROID_3.4.5/fs/btrfs/locking.h
deleted file mode 100644
index ca52681e..00000000
--- a/ANDROID_3.4.5/fs/btrfs/locking.h
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
- * Copyright (C) 2008 Oracle. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License v2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
- */
-
-#ifndef __BTRFS_LOCKING_
-#define __BTRFS_LOCKING_
-
-#define BTRFS_WRITE_LOCK 1
-#define BTRFS_READ_LOCK 2
-#define BTRFS_WRITE_LOCK_BLOCKING 3
-#define BTRFS_READ_LOCK_BLOCKING 4
-
-void btrfs_tree_lock(struct extent_buffer *eb);
-void btrfs_tree_unlock(struct extent_buffer *eb);
-int btrfs_try_spin_lock(struct extent_buffer *eb);
-
-void btrfs_tree_read_lock(struct extent_buffer *eb);
-void btrfs_tree_read_unlock(struct extent_buffer *eb);
-void btrfs_tree_read_unlock_blocking(struct extent_buffer *eb);
-void btrfs_set_lock_blocking_rw(struct extent_buffer *eb, int rw);
-void btrfs_clear_lock_blocking_rw(struct extent_buffer *eb, int rw);
-void btrfs_assert_tree_locked(struct extent_buffer *eb);
-int btrfs_try_tree_read_lock(struct extent_buffer *eb);
-int btrfs_try_tree_write_lock(struct extent_buffer *eb);
-
-static inline void btrfs_tree_unlock_rw(struct extent_buffer *eb, int rw)
-{
- if (rw == BTRFS_WRITE_LOCK || rw == BTRFS_WRITE_LOCK_BLOCKING)
- btrfs_tree_unlock(eb);
- else if (rw == BTRFS_READ_LOCK_BLOCKING)
- btrfs_tree_read_unlock_blocking(eb);
- else if (rw == BTRFS_READ_LOCK)
- btrfs_tree_read_unlock(eb);
- else
- BUG();
-}
-
-static inline void btrfs_set_lock_blocking(struct extent_buffer *eb)
-{
- btrfs_set_lock_blocking_rw(eb, BTRFS_WRITE_LOCK);
-}
-
-static inline void btrfs_clear_lock_blocking(struct extent_buffer *eb)
-{
- btrfs_clear_lock_blocking_rw(eb, BTRFS_WRITE_LOCK_BLOCKING);
-}
-#endif
diff --git a/ANDROID_3.4.5/fs/btrfs/lzo.c b/ANDROID_3.4.5/fs/btrfs/lzo.c
deleted file mode 100644
index 743b86fa..00000000
--- a/ANDROID_3.4.5/fs/btrfs/lzo.c
+++ /dev/null
@@ -1,427 +0,0 @@
-/*
- * Copyright (C) 2008 Oracle. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License v2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
- */
-
-#include <linux/kernel.h>
-#include <linux/slab.h>
-#include <linux/vmalloc.h>
-#include <linux/init.h>
-#include <linux/err.h>
-#include <linux/sched.h>
-#include <linux/pagemap.h>
-#include <linux/bio.h>
-#include <linux/lzo.h>
-#include "compression.h"
-
-#define LZO_LEN 4
-
-struct workspace {
- void *mem;
- void *buf; /* where compressed data goes */
- void *cbuf; /* where decompressed data goes */
- struct list_head list;
-};
-
-static void lzo_free_workspace(struct list_head *ws)
-{
- struct workspace *workspace = list_entry(ws, struct workspace, list);
-
- vfree(workspace->buf);
- vfree(workspace->cbuf);
- vfree(workspace->mem);
- kfree(workspace);
-}
-
-static struct list_head *lzo_alloc_workspace(void)
-{
- struct workspace *workspace;
-
- workspace = kzalloc(sizeof(*workspace), GFP_NOFS);
- if (!workspace)
- return ERR_PTR(-ENOMEM);
-
- workspace->mem = vmalloc(LZO1X_MEM_COMPRESS);
- workspace->buf = vmalloc(lzo1x_worst_compress(PAGE_CACHE_SIZE));
- workspace->cbuf = vmalloc(lzo1x_worst_compress(PAGE_CACHE_SIZE));
- if (!workspace->mem || !workspace->buf || !workspace->cbuf)
- goto fail;
-
- INIT_LIST_HEAD(&workspace->list);
-
- return &workspace->list;
-fail:
- lzo_free_workspace(&workspace->list);
- return ERR_PTR(-ENOMEM);
-}
-
-static inline void write_compress_length(char *buf, size_t len)
-{
- __le32 dlen;
-
- dlen = cpu_to_le32(len);
- memcpy(buf, &dlen, LZO_LEN);
-}
-
-static inline size_t read_compress_length(char *buf)
-{
- __le32 dlen;
-
- memcpy(&dlen, buf, LZO_LEN);
- return le32_to_cpu(dlen);
-}
-
-static int lzo_compress_pages(struct list_head *ws,
- struct address_space *mapping,
- u64 start, unsigned long len,
- struct page **pages,
- unsigned long nr_dest_pages,
- unsigned long *out_pages,
- unsigned long *total_in,
- unsigned long *total_out,
- unsigned long max_out)
-{
- struct workspace *workspace = list_entry(ws, struct workspace, list);
- int ret = 0;
- char *data_in;
- char *cpage_out;
- int nr_pages = 0;
- struct page *in_page = NULL;
- struct page *out_page = NULL;
- unsigned long bytes_left;
-
- size_t in_len;
- size_t out_len;
- char *buf;
- unsigned long tot_in = 0;
- unsigned long tot_out = 0;
- unsigned long pg_bytes_left;
- unsigned long out_offset;
- unsigned long bytes;
-
- *out_pages = 0;
- *total_out = 0;
- *total_in = 0;
-
- in_page = find_get_page(mapping, start >> PAGE_CACHE_SHIFT);
- data_in = kmap(in_page);
-
- /*
- * store the size of all chunks of compressed data in
- * the first 4 bytes
- */
- out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
- if (out_page == NULL) {
- ret = -ENOMEM;
- goto out;
- }
- cpage_out = kmap(out_page);
- out_offset = LZO_LEN;
- tot_out = LZO_LEN;
- pages[0] = out_page;
- nr_pages = 1;
- pg_bytes_left = PAGE_CACHE_SIZE - LZO_LEN;
-
- /* compress at most one page of data each time */
- in_len = min(len, PAGE_CACHE_SIZE);
- while (tot_in < len) {
- ret = lzo1x_1_compress(data_in, in_len, workspace->cbuf,
- &out_len, workspace->mem);
- if (ret != LZO_E_OK) {
- printk(KERN_DEBUG "btrfs deflate in loop returned %d\n",
- ret);
- ret = -1;
- goto out;
- }
-
- /* store the size of this chunk of compressed data */
- write_compress_length(cpage_out + out_offset, out_len);
- tot_out += LZO_LEN;
- out_offset += LZO_LEN;
- pg_bytes_left -= LZO_LEN;
-
- tot_in += in_len;
- tot_out += out_len;
-
- /* copy bytes from the working buffer into the pages */
- buf = workspace->cbuf;
- while (out_len) {
- bytes = min_t(unsigned long, pg_bytes_left, out_len);
-
- memcpy(cpage_out + out_offset, buf, bytes);
-
- out_len -= bytes;
- pg_bytes_left -= bytes;
- buf += bytes;
- out_offset += bytes;
-
- /*
- * we need another page for writing out.
- *
- * Note if there's less than 4 bytes left, we just
- * skip to a new page.
- */
- if ((out_len == 0 && pg_bytes_left < LZO_LEN) ||
- pg_bytes_left == 0) {
- if (pg_bytes_left) {
- memset(cpage_out + out_offset, 0,
- pg_bytes_left);
- tot_out += pg_bytes_left;
- }
-
- /* we're done, don't allocate new page */
- if (out_len == 0 && tot_in >= len)
- break;
-
- kunmap(out_page);
- if (nr_pages == nr_dest_pages) {
- out_page = NULL;
- ret = -1;
- goto out;
- }
-
- out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
- if (out_page == NULL) {
- ret = -ENOMEM;
- goto out;
- }
- cpage_out = kmap(out_page);
- pages[nr_pages++] = out_page;
-
- pg_bytes_left = PAGE_CACHE_SIZE;
- out_offset = 0;
- }
- }
-
- /* we're making it bigger, give up */
- if (tot_in > 8192 && tot_in < tot_out)
- goto out;
-
- /* we're all done */
- if (tot_in >= len)
- break;
-
- if (tot_out > max_out)
- break;
-
- bytes_left = len - tot_in;
- kunmap(in_page);
- page_cache_release(in_page);
-
- start += PAGE_CACHE_SIZE;
- in_page = find_get_page(mapping, start >> PAGE_CACHE_SHIFT);
- data_in = kmap(in_page);
- in_len = min(bytes_left, PAGE_CACHE_SIZE);
- }
-
- if (tot_out > tot_in)
- goto out;
-
- /* store the size of all chunks of compressed data */
- cpage_out = kmap(pages[0]);
- write_compress_length(cpage_out, tot_out);
-
- kunmap(pages[0]);
-
- ret = 0;
- *total_out = tot_out;
- *total_in = tot_in;
-out:
- *out_pages = nr_pages;
- if (out_page)
- kunmap(out_page);
-
- if (in_page) {
- kunmap(in_page);
- page_cache_release(in_page);
- }
-
- return ret;
-}
-
-static int lzo_decompress_biovec(struct list_head *ws,
- struct page **pages_in,
- u64 disk_start,
- struct bio_vec *bvec,
- int vcnt,
- size_t srclen)
-{
- struct workspace *workspace = list_entry(ws, struct workspace, list);
- int ret = 0, ret2;
- char *data_in;
- unsigned long page_in_index = 0;
- unsigned long page_out_index = 0;
- unsigned long total_pages_in = (srclen + PAGE_CACHE_SIZE - 1) /
- PAGE_CACHE_SIZE;
- unsigned long buf_start;
- unsigned long buf_offset = 0;
- unsigned long bytes;
- unsigned long working_bytes;
- unsigned long pg_offset;
-
- size_t in_len;
- size_t out_len;
- unsigned long in_offset;
- unsigned long in_page_bytes_left;
- unsigned long tot_in;
- unsigned long tot_out;
- unsigned long tot_len;
- char *buf;
- bool may_late_unmap, need_unmap;
-
- data_in = kmap(pages_in[0]);
- tot_len = read_compress_length(data_in);
-
- tot_in = LZO_LEN;
- in_offset = LZO_LEN;
- tot_len = min_t(size_t, srclen, tot_len);
- in_page_bytes_left = PAGE_CACHE_SIZE - LZO_LEN;
-
- tot_out = 0;
- pg_offset = 0;
-
- while (tot_in < tot_len) {
- in_len = read_compress_length(data_in + in_offset);
- in_page_bytes_left -= LZO_LEN;
- in_offset += LZO_LEN;
- tot_in += LZO_LEN;
-
- tot_in += in_len;
- working_bytes = in_len;
- may_late_unmap = need_unmap = false;
-
- /* fast path: avoid using the working buffer */
- if (in_page_bytes_left >= in_len) {
- buf = data_in + in_offset;
- bytes = in_len;
- may_late_unmap = true;
- goto cont;
- }
-
- /* copy bytes from the pages into the working buffer */
- buf = workspace->cbuf;
- buf_offset = 0;
- while (working_bytes) {
- bytes = min(working_bytes, in_page_bytes_left);
-
- memcpy(buf + buf_offset, data_in + in_offset, bytes);
- buf_offset += bytes;
-cont:
- working_bytes -= bytes;
- in_page_bytes_left -= bytes;
- in_offset += bytes;
-
- /* check if we need to pick another page */
- if ((working_bytes == 0 && in_page_bytes_left < LZO_LEN)
- || in_page_bytes_left == 0) {
- tot_in += in_page_bytes_left;
-
- if (working_bytes == 0 && tot_in >= tot_len)
- break;
-
- if (page_in_index + 1 >= total_pages_in) {
- ret = -1;
- goto done;
- }
-
- if (may_late_unmap)
- need_unmap = true;
- else
- kunmap(pages_in[page_in_index]);
-
- data_in = kmap(pages_in[++page_in_index]);
-
- in_page_bytes_left = PAGE_CACHE_SIZE;
- in_offset = 0;
- }
- }
-
- out_len = lzo1x_worst_compress(PAGE_CACHE_SIZE);
- ret = lzo1x_decompress_safe(buf, in_len, workspace->buf,
- &out_len);
- if (need_unmap)
- kunmap(pages_in[page_in_index - 1]);
- if (ret != LZO_E_OK) {
- printk(KERN_WARNING "btrfs decompress failed\n");
- ret = -1;
- break;
- }
-
- buf_start = tot_out;
- tot_out += out_len;
-
- ret2 = btrfs_decompress_buf2page(workspace->buf, buf_start,
- tot_out, disk_start,
- bvec, vcnt,
- &page_out_index, &pg_offset);
- if (ret2 == 0)
- break;
- }
-done:
- kunmap(pages_in[page_in_index]);
- return ret;
-}
-
-static int lzo_decompress(struct list_head *ws, unsigned char *data_in,
- struct page *dest_page,
- unsigned long start_byte,
- size_t srclen, size_t destlen)
-{
- struct workspace *workspace = list_entry(ws, struct workspace, list);
- size_t in_len;
- size_t out_len;
- size_t tot_len;
- int ret = 0;
- char *kaddr;
- unsigned long bytes;
-
- BUG_ON(srclen < LZO_LEN);
-
- tot_len = read_compress_length(data_in);
- data_in += LZO_LEN;
-
- in_len = read_compress_length(data_in);
- data_in += LZO_LEN;
-
- out_len = PAGE_CACHE_SIZE;
- ret = lzo1x_decompress_safe(data_in, in_len, workspace->buf, &out_len);
- if (ret != LZO_E_OK) {
- printk(KERN_WARNING "btrfs decompress failed!\n");
- ret = -1;
- goto out;
- }
-
- if (out_len < start_byte) {
- ret = -1;
- goto out;
- }
-
- bytes = min_t(unsigned long, destlen, out_len - start_byte);
-
- kaddr = kmap_atomic(dest_page);
- memcpy(kaddr, workspace->buf + start_byte, bytes);
- kunmap_atomic(kaddr);
-out:
- return ret;
-}
-
-struct btrfs_compress_op btrfs_lzo_compress = {
- .alloc_workspace = lzo_alloc_workspace,
- .free_workspace = lzo_free_workspace,
- .compress_pages = lzo_compress_pages,
- .decompress_biovec = lzo_decompress_biovec,
- .decompress = lzo_decompress,
-};
diff --git a/ANDROID_3.4.5/fs/btrfs/ordered-data.c b/ANDROID_3.4.5/fs/btrfs/ordered-data.c
deleted file mode 100644
index bbf6d0d9..00000000
--- a/ANDROID_3.4.5/fs/btrfs/ordered-data.c
+++ /dev/null
@@ -1,977 +0,0 @@
-/*
- * Copyright (C) 2007 Oracle. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License v2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
- */
-
-#include <linux/slab.h>
-#include <linux/blkdev.h>
-#include <linux/writeback.h>
-#include <linux/pagevec.h>
-#include "ctree.h"
-#include "transaction.h"
-#include "btrfs_inode.h"
-#include "extent_io.h"
-
-static u64 entry_end(struct btrfs_ordered_extent *entry)
-{
- if (entry->file_offset + entry->len < entry->file_offset)
- return (u64)-1;
- return entry->file_offset + entry->len;
-}
-
-/* returns NULL if the insertion worked, or it returns the node it did find
- * in the tree
- */
-static struct rb_node *tree_insert(struct rb_root *root, u64 file_offset,
- struct rb_node *node)
-{
- struct rb_node **p = &root->rb_node;
- struct rb_node *parent = NULL;
- struct btrfs_ordered_extent *entry;
-
- while (*p) {
- parent = *p;
- entry = rb_entry(parent, struct btrfs_ordered_extent, rb_node);
-
- if (file_offset < entry->file_offset)
- p = &(*p)->rb_left;
- else if (file_offset >= entry_end(entry))
- p = &(*p)->rb_right;
- else
- return parent;
- }
-
- rb_link_node(node, parent, p);
- rb_insert_color(node, root);
- return NULL;
-}
-
-static void ordered_data_tree_panic(struct inode *inode, int errno,
- u64 offset)
-{
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
- btrfs_panic(fs_info, errno, "Inconsistency in ordered tree at offset "
- "%llu\n", (unsigned long long)offset);
-}
-
-/*
- * look for a given offset in the tree, and if it can't be found return the
- * first lesser offset
- */
-static struct rb_node *__tree_search(struct rb_root *root, u64 file_offset,
- struct rb_node **prev_ret)
-{
- struct rb_node *n = root->rb_node;
- struct rb_node *prev = NULL;
- struct rb_node *test;
- struct btrfs_ordered_extent *entry;
- struct btrfs_ordered_extent *prev_entry = NULL;
-
- while (n) {
- entry = rb_entry(n, struct btrfs_ordered_extent, rb_node);
- prev = n;
- prev_entry = entry;
-
- if (file_offset < entry->file_offset)
- n = n->rb_left;
- else if (file_offset >= entry_end(entry))
- n = n->rb_right;
- else
- return n;
- }
- if (!prev_ret)
- return NULL;
-
- while (prev && file_offset >= entry_end(prev_entry)) {
- test = rb_next(prev);
- if (!test)
- break;
- prev_entry = rb_entry(test, struct btrfs_ordered_extent,
- rb_node);
- if (file_offset < entry_end(prev_entry))
- break;
-
- prev = test;
- }
- if (prev)
- prev_entry = rb_entry(prev, struct btrfs_ordered_extent,
- rb_node);
- while (prev && file_offset < entry_end(prev_entry)) {
- test = rb_prev(prev);
- if (!test)
- break;
- prev_entry = rb_entry(test, struct btrfs_ordered_extent,
- rb_node);
- prev = test;
- }
- *prev_ret = prev;
- return NULL;
-}
-
-/*
- * helper to check if a given offset is inside a given entry
- */
-static int offset_in_entry(struct btrfs_ordered_extent *entry, u64 file_offset)
-{
- if (file_offset < entry->file_offset ||
- entry->file_offset + entry->len <= file_offset)
- return 0;
- return 1;
-}
-
-static int range_overlaps(struct btrfs_ordered_extent *entry, u64 file_offset,
- u64 len)
-{
- if (file_offset + len <= entry->file_offset ||
- entry->file_offset + entry->len <= file_offset)
- return 0;
- return 1;
-}
-
-/*
- * look find the first ordered struct that has this offset, otherwise
- * the first one less than this offset
- */
-static inline struct rb_node *tree_search(struct btrfs_ordered_inode_tree *tree,
- u64 file_offset)
-{
- struct rb_root *root = &tree->tree;
- struct rb_node *prev = NULL;
- struct rb_node *ret;
- struct btrfs_ordered_extent *entry;
-
- if (tree->last) {
- entry = rb_entry(tree->last, struct btrfs_ordered_extent,
- rb_node);
- if (offset_in_entry(entry, file_offset))
- return tree->last;
- }
- ret = __tree_search(root, file_offset, &prev);
- if (!ret)
- ret = prev;
- if (ret)
- tree->last = ret;
- return ret;
-}
-
-/* allocate and add a new ordered_extent into the per-inode tree.
- * file_offset is the logical offset in the file
- *
- * start is the disk block number of an extent already reserved in the
- * extent allocation tree
- *
- * len is the length of the extent
- *
- * The tree is given a single reference on the ordered extent that was
- * inserted.
- */
-static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
- u64 start, u64 len, u64 disk_len,
- int type, int dio, int compress_type)
-{
- struct btrfs_ordered_inode_tree *tree;
- struct rb_node *node;
- struct btrfs_ordered_extent *entry;
-
- tree = &BTRFS_I(inode)->ordered_tree;
- entry = kzalloc(sizeof(*entry), GFP_NOFS);
- if (!entry)
- return -ENOMEM;
-
- entry->file_offset = file_offset;
- entry->start = start;
- entry->len = len;
- entry->disk_len = disk_len;
- entry->bytes_left = len;
- entry->inode = inode;
- entry->compress_type = compress_type;
- if (type != BTRFS_ORDERED_IO_DONE && type != BTRFS_ORDERED_COMPLETE)
- set_bit(type, &entry->flags);
-
- if (dio)
- set_bit(BTRFS_ORDERED_DIRECT, &entry->flags);
-
- /* one ref for the tree */
- atomic_set(&entry->refs, 1);
- init_waitqueue_head(&entry->wait);
- INIT_LIST_HEAD(&entry->list);
- INIT_LIST_HEAD(&entry->root_extent_list);
-
- trace_btrfs_ordered_extent_add(inode, entry);
-
- spin_lock(&tree->lock);
- node = tree_insert(&tree->tree, file_offset,
- &entry->rb_node);
- if (node)
- ordered_data_tree_panic(inode, -EEXIST, file_offset);
- spin_unlock(&tree->lock);
-
- spin_lock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock);
- list_add_tail(&entry->root_extent_list,
- &BTRFS_I(inode)->root->fs_info->ordered_extents);
- spin_unlock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock);
-
- return 0;
-}
-
-int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
- u64 start, u64 len, u64 disk_len, int type)
-{
- return __btrfs_add_ordered_extent(inode, file_offset, start, len,
- disk_len, type, 0,
- BTRFS_COMPRESS_NONE);
-}
-
-int btrfs_add_ordered_extent_dio(struct inode *inode, u64 file_offset,
- u64 start, u64 len, u64 disk_len, int type)
-{
- return __btrfs_add_ordered_extent(inode, file_offset, start, len,
- disk_len, type, 1,
- BTRFS_COMPRESS_NONE);
-}
-
-int btrfs_add_ordered_extent_compress(struct inode *inode, u64 file_offset,
- u64 start, u64 len, u64 disk_len,
- int type, int compress_type)
-{
- return __btrfs_add_ordered_extent(inode, file_offset, start, len,
- disk_len, type, 0,
- compress_type);
-}
-
-/*
- * Add a struct btrfs_ordered_sum into the list of checksums to be inserted
- * when an ordered extent is finished. If the list covers more than one
- * ordered extent, it is split across multiples.
- */
-void btrfs_add_ordered_sum(struct inode *inode,
- struct btrfs_ordered_extent *entry,
- struct btrfs_ordered_sum *sum)
-{
- struct btrfs_ordered_inode_tree *tree;
-
- tree = &BTRFS_I(inode)->ordered_tree;
- spin_lock(&tree->lock);
- list_add_tail(&sum->list, &entry->list);
- spin_unlock(&tree->lock);
-}
-
-/*
- * this is used to account for finished IO across a given range
- * of the file. The IO may span ordered extents. If
- * a given ordered_extent is completely done, 1 is returned, otherwise
- * 0.
- *
- * test_and_set_bit on a flag in the struct btrfs_ordered_extent is used
- * to make sure this function only returns 1 once for a given ordered extent.
- *
- * file_offset is updated to one byte past the range that is recorded as
- * complete. This allows you to walk forward in the file.
- */
-int btrfs_dec_test_first_ordered_pending(struct inode *inode,
- struct btrfs_ordered_extent **cached,
- u64 *file_offset, u64 io_size)
-{
- struct btrfs_ordered_inode_tree *tree;
- struct rb_node *node;
- struct btrfs_ordered_extent *entry = NULL;
- int ret;
- u64 dec_end;
- u64 dec_start;
- u64 to_dec;
-
- tree = &BTRFS_I(inode)->ordered_tree;
- spin_lock(&tree->lock);
- node = tree_search(tree, *file_offset);
- if (!node) {
- ret = 1;
- goto out;
- }
-
- entry = rb_entry(node, struct btrfs_ordered_extent, rb_node);
- if (!offset_in_entry(entry, *file_offset)) {
- ret = 1;
- goto out;
- }
-
- dec_start = max(*file_offset, entry->file_offset);
- dec_end = min(*file_offset + io_size, entry->file_offset +
- entry->len);
- *file_offset = dec_end;
- if (dec_start > dec_end) {
- printk(KERN_CRIT "bad ordering dec_start %llu end %llu\n",
- (unsigned long long)dec_start,
- (unsigned long long)dec_end);
- }
- to_dec = dec_end - dec_start;
- if (to_dec > entry->bytes_left) {
- printk(KERN_CRIT "bad ordered accounting left %llu size %llu\n",
- (unsigned long long)entry->bytes_left,
- (unsigned long long)to_dec);
- }
- entry->bytes_left -= to_dec;
- if (entry->bytes_left == 0)
- ret = test_and_set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags);
- else
- ret = 1;
-out:
- if (!ret && cached && entry) {
- *cached = entry;
- atomic_inc(&entry->refs);
- }
- spin_unlock(&tree->lock);
- return ret == 0;
-}
-
-/*
- * this is used to account for finished IO across a given range
- * of the file. The IO should not span ordered extents. If
- * a given ordered_extent is completely done, 1 is returned, otherwise
- * 0.
- *
- * test_and_set_bit on a flag in the struct btrfs_ordered_extent is used
- * to make sure this function only returns 1 once for a given ordered extent.
- */
-int btrfs_dec_test_ordered_pending(struct inode *inode,
- struct btrfs_ordered_extent **cached,
- u64 file_offset, u64 io_size)
-{
- struct btrfs_ordered_inode_tree *tree;
- struct rb_node *node;
- struct btrfs_ordered_extent *entry = NULL;
- int ret;
-
- tree = &BTRFS_I(inode)->ordered_tree;
- spin_lock(&tree->lock);
- node = tree_search(tree, file_offset);
- if (!node) {
- ret = 1;
- goto out;
- }
-
- entry = rb_entry(node, struct btrfs_ordered_extent, rb_node);
- if (!offset_in_entry(entry, file_offset)) {
- ret = 1;
- goto out;
- }
-
- if (io_size > entry->bytes_left) {
- printk(KERN_CRIT "bad ordered accounting left %llu size %llu\n",
- (unsigned long long)entry->bytes_left,
- (unsigned long long)io_size);
- }
- entry->bytes_left -= io_size;
- if (entry->bytes_left == 0)
- ret = test_and_set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags);
- else
- ret = 1;
-out:
- if (!ret && cached && entry) {
- *cached = entry;
- atomic_inc(&entry->refs);
- }
- spin_unlock(&tree->lock);
- return ret == 0;
-}
-
-/*
- * used to drop a reference on an ordered extent. This will free
- * the extent if the last reference is dropped
- */
-void btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry)
-{
- struct list_head *cur;
- struct btrfs_ordered_sum *sum;
-
- trace_btrfs_ordered_extent_put(entry->inode, entry);
-
- if (atomic_dec_and_test(&entry->refs)) {
- while (!list_empty(&entry->list)) {
- cur = entry->list.next;
- sum = list_entry(cur, struct btrfs_ordered_sum, list);
- list_del(&sum->list);
- kfree(sum);
- }
- kfree(entry);
- }
-}
-
-/*
- * remove an ordered extent from the tree. No references are dropped
- * and you must wake_up entry->wait. You must hold the tree lock
- * while you call this function.
- */
-static void __btrfs_remove_ordered_extent(struct inode *inode,
- struct btrfs_ordered_extent *entry)
-{
- struct btrfs_ordered_inode_tree *tree;
- struct btrfs_root *root = BTRFS_I(inode)->root;
- struct rb_node *node;
-
- tree = &BTRFS_I(inode)->ordered_tree;
- node = &entry->rb_node;
- rb_erase(node, &tree->tree);
- tree->last = NULL;
- set_bit(BTRFS_ORDERED_COMPLETE, &entry->flags);
-
- spin_lock(&root->fs_info->ordered_extent_lock);
- list_del_init(&entry->root_extent_list);
-
- trace_btrfs_ordered_extent_remove(inode, entry);
-
- /*
- * we have no more ordered extents for this inode and
- * no dirty pages. We can safely remove it from the
- * list of ordered extents
- */
- if (RB_EMPTY_ROOT(&tree->tree) &&
- !mapping_tagged(inode->i_mapping, PAGECACHE_TAG_DIRTY)) {
- list_del_init(&BTRFS_I(inode)->ordered_operations);
- }
- spin_unlock(&root->fs_info->ordered_extent_lock);
-}
-
-/*
- * remove an ordered extent from the tree. No references are dropped
- * but any waiters are woken.
- */
-void btrfs_remove_ordered_extent(struct inode *inode,
- struct btrfs_ordered_extent *entry)
-{
- struct btrfs_ordered_inode_tree *tree;
-
- tree = &BTRFS_I(inode)->ordered_tree;
- spin_lock(&tree->lock);
- __btrfs_remove_ordered_extent(inode, entry);
- spin_unlock(&tree->lock);
- wake_up(&entry->wait);
-}
-
-/*
- * wait for all the ordered extents in a root. This is done when balancing
- * space between drives.
- */
-void btrfs_wait_ordered_extents(struct btrfs_root *root,
- int nocow_only, int delay_iput)
-{
- struct list_head splice;
- struct list_head *cur;
- struct btrfs_ordered_extent *ordered;
- struct inode *inode;
-
- INIT_LIST_HEAD(&splice);
-
- spin_lock(&root->fs_info->ordered_extent_lock);
- list_splice_init(&root->fs_info->ordered_extents, &splice);
- while (!list_empty(&splice)) {
- cur = splice.next;
- ordered = list_entry(cur, struct btrfs_ordered_extent,
- root_extent_list);
- if (nocow_only &&
- !test_bit(BTRFS_ORDERED_NOCOW, &ordered->flags) &&
- !test_bit(BTRFS_ORDERED_PREALLOC, &ordered->flags)) {
- list_move(&ordered->root_extent_list,
- &root->fs_info->ordered_extents);
- cond_resched_lock(&root->fs_info->ordered_extent_lock);
- continue;
- }
-
- list_del_init(&ordered->root_extent_list);
- atomic_inc(&ordered->refs);
-
- /*
- * the inode may be getting freed (in sys_unlink path).
- */
- inode = igrab(ordered->inode);
-
- spin_unlock(&root->fs_info->ordered_extent_lock);
-
- if (inode) {
- btrfs_start_ordered_extent(inode, ordered, 1);
- btrfs_put_ordered_extent(ordered);
- if (delay_iput)
- btrfs_add_delayed_iput(inode);
- else
- iput(inode);
- } else {
- btrfs_put_ordered_extent(ordered);
- }
-
- spin_lock(&root->fs_info->ordered_extent_lock);
- }
- spin_unlock(&root->fs_info->ordered_extent_lock);
-}
-
-/*
- * this is used during transaction commit to write all the inodes
- * added to the ordered operation list. These files must be fully on
- * disk before the transaction commits.
- *
- * we have two modes here, one is to just start the IO via filemap_flush
- * and the other is to wait for all the io. When we wait, we have an
- * extra check to make sure the ordered operation list really is empty
- * before we return
- */
-void btrfs_run_ordered_operations(struct btrfs_root *root, int wait)
-{
- struct btrfs_inode *btrfs_inode;
- struct inode *inode;
- struct list_head splice;
-
- INIT_LIST_HEAD(&splice);
-
- mutex_lock(&root->fs_info->ordered_operations_mutex);
- spin_lock(&root->fs_info->ordered_extent_lock);
-again:
- list_splice_init(&root->fs_info->ordered_operations, &splice);
-
- while (!list_empty(&splice)) {
- btrfs_inode = list_entry(splice.next, struct btrfs_inode,
- ordered_operations);
-
- inode = &btrfs_inode->vfs_inode;
-
- list_del_init(&btrfs_inode->ordered_operations);
-
- /*
- * the inode may be getting freed (in sys_unlink path).
- */
- inode = igrab(inode);
-
- if (!wait && inode) {
- list_add_tail(&BTRFS_I(inode)->ordered_operations,
- &root->fs_info->ordered_operations);
- }
- spin_unlock(&root->fs_info->ordered_extent_lock);
-
- if (inode) {
- if (wait)
- btrfs_wait_ordered_range(inode, 0, (u64)-1);
- else
- filemap_flush(inode->i_mapping);
- btrfs_add_delayed_iput(inode);
- }
-
- cond_resched();
- spin_lock(&root->fs_info->ordered_extent_lock);
- }
- if (wait && !list_empty(&root->fs_info->ordered_operations))
- goto again;
-
- spin_unlock(&root->fs_info->ordered_extent_lock);
- mutex_unlock(&root->fs_info->ordered_operations_mutex);
-}
-
-/*
- * Used to start IO or wait for a given ordered extent to finish.
- *
- * If wait is one, this effectively waits on page writeback for all the pages
- * in the extent, and it waits on the io completion code to insert
- * metadata into the btree corresponding to the extent
- */
-void btrfs_start_ordered_extent(struct inode *inode,
- struct btrfs_ordered_extent *entry,
- int wait)
-{
- u64 start = entry->file_offset;
- u64 end = start + entry->len - 1;
-
- trace_btrfs_ordered_extent_start(inode, entry);
-
- /*
- * pages in the range can be dirty, clean or writeback. We
- * start IO on any dirty ones so the wait doesn't stall waiting
- * for pdflush to find them
- */
- if (!test_bit(BTRFS_ORDERED_DIRECT, &entry->flags))
- filemap_fdatawrite_range(inode->i_mapping, start, end);
- if (wait) {
- wait_event(entry->wait, test_bit(BTRFS_ORDERED_COMPLETE,
- &entry->flags));
- }
-}
-
-/*
- * Used to wait on ordered extents across a large range of bytes.
- */
-void btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
-{
- u64 end;
- u64 orig_end;
- struct btrfs_ordered_extent *ordered;
- int found;
-
- if (start + len < start) {
- orig_end = INT_LIMIT(loff_t);
- } else {
- orig_end = start + len - 1;
- if (orig_end > INT_LIMIT(loff_t))
- orig_end = INT_LIMIT(loff_t);
- }
-again:
- /* start IO across the range first to instantiate any delalloc
- * extents
- */
- filemap_fdatawrite_range(inode->i_mapping, start, orig_end);
-
- /* The compression code will leave pages locked but return from
- * writepage without setting the page writeback. Starting again
- * with WB_SYNC_ALL will end up waiting for the IO to actually start.
- */
- filemap_fdatawrite_range(inode->i_mapping, start, orig_end);
-
- filemap_fdatawait_range(inode->i_mapping, start, orig_end);
-
- end = orig_end;
- found = 0;
- while (1) {
- ordered = btrfs_lookup_first_ordered_extent(inode, end);
- if (!ordered)
- break;
- if (ordered->file_offset > orig_end) {
- btrfs_put_ordered_extent(ordered);
- break;
- }
- if (ordered->file_offset + ordered->len < start) {
- btrfs_put_ordered_extent(ordered);
- break;
- }
- found++;
- btrfs_start_ordered_extent(inode, ordered, 1);
- end = ordered->file_offset;
- btrfs_put_ordered_extent(ordered);
- if (end == 0 || end == start)
- break;
- end--;
- }
- if (found || test_range_bit(&BTRFS_I(inode)->io_tree, start, orig_end,
- EXTENT_DELALLOC, 0, NULL)) {
- schedule_timeout(1);
- goto again;
- }
-}
-
-/*
- * find an ordered extent corresponding to file_offset. return NULL if
- * nothing is found, otherwise take a reference on the extent and return it
- */
-struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct inode *inode,
- u64 file_offset)
-{
- struct btrfs_ordered_inode_tree *tree;
- struct rb_node *node;
- struct btrfs_ordered_extent *entry = NULL;
-
- tree = &BTRFS_I(inode)->ordered_tree;
- spin_lock(&tree->lock);
- node = tree_search(tree, file_offset);
- if (!node)
- goto out;
-
- entry = rb_entry(node, struct btrfs_ordered_extent, rb_node);
- if (!offset_in_entry(entry, file_offset))
- entry = NULL;
- if (entry)
- atomic_inc(&entry->refs);
-out:
- spin_unlock(&tree->lock);
- return entry;
-}
-
-/* Since the DIO code tries to lock a wide area we need to look for any ordered
- * extents that exist in the range, rather than just the start of the range.
- */
-struct btrfs_ordered_extent *btrfs_lookup_ordered_range(struct inode *inode,
- u64 file_offset,
- u64 len)
-{
- struct btrfs_ordered_inode_tree *tree;
- struct rb_node *node;
- struct btrfs_ordered_extent *entry = NULL;
-
- tree = &BTRFS_I(inode)->ordered_tree;
- spin_lock(&tree->lock);
- node = tree_search(tree, file_offset);
- if (!node) {
- node = tree_search(tree, file_offset + len);
- if (!node)
- goto out;
- }
-
- while (1) {
- entry = rb_entry(node, struct btrfs_ordered_extent, rb_node);
- if (range_overlaps(entry, file_offset, len))
- break;
-
- if (entry->file_offset >= file_offset + len) {
- entry = NULL;
- break;
- }
- entry = NULL;
- node = rb_next(node);
- if (!node)
- break;
- }
-out:
- if (entry)
- atomic_inc(&entry->refs);
- spin_unlock(&tree->lock);
- return entry;
-}
-
-/*
- * lookup and return any extent before 'file_offset'. NULL is returned
- * if none is found
- */
-struct btrfs_ordered_extent *
-btrfs_lookup_first_ordered_extent(struct inode *inode, u64 file_offset)
-{
- struct btrfs_ordered_inode_tree *tree;
- struct rb_node *node;
- struct btrfs_ordered_extent *entry = NULL;
-
- tree = &BTRFS_I(inode)->ordered_tree;
- spin_lock(&tree->lock);
- node = tree_search(tree, file_offset);
- if (!node)
- goto out;
-
- entry = rb_entry(node, struct btrfs_ordered_extent, rb_node);
- atomic_inc(&entry->refs);
-out:
- spin_unlock(&tree->lock);
- return entry;
-}
-
-/*
- * After an extent is done, call this to conditionally update the on disk
- * i_size. i_size is updated to cover any fully written part of the file.
- */
-int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
- struct btrfs_ordered_extent *ordered)
-{
- struct btrfs_ordered_inode_tree *tree = &BTRFS_I(inode)->ordered_tree;
- struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
- u64 disk_i_size;
- u64 new_i_size;
- u64 i_size_test;
- u64 i_size = i_size_read(inode);
- struct rb_node *node;
- struct rb_node *prev = NULL;
- struct btrfs_ordered_extent *test;
- int ret = 1;
-
- if (ordered)
- offset = entry_end(ordered);
- else
- offset = ALIGN(offset, BTRFS_I(inode)->root->sectorsize);
-
- spin_lock(&tree->lock);
- disk_i_size = BTRFS_I(inode)->disk_i_size;
-
- /* truncate file */
- if (disk_i_size > i_size) {
- BTRFS_I(inode)->disk_i_size = i_size;
- ret = 0;
- goto out;
- }
-
- /*
- * if the disk i_size is already at the inode->i_size, or
- * this ordered extent is inside the disk i_size, we're done
- */
- if (disk_i_size == i_size || offset <= disk_i_size) {
- goto out;
- }
-
- /*
- * we can't update the disk_isize if there are delalloc bytes
- * between disk_i_size and this ordered extent
- */
- if (test_range_bit(io_tree, disk_i_size, offset - 1,
- EXTENT_DELALLOC, 0, NULL)) {
- goto out;
- }
- /*
- * walk backward from this ordered extent to disk_i_size.
- * if we find an ordered extent then we can't update disk i_size
- * yet
- */
- if (ordered) {
- node = rb_prev(&ordered->rb_node);
- } else {
- prev = tree_search(tree, offset);
- /*
- * we insert file extents without involving ordered struct,
- * so there should be no ordered struct cover this offset
- */
- if (prev) {
- test = rb_entry(prev, struct btrfs_ordered_extent,
- rb_node);
- BUG_ON(offset_in_entry(test, offset));
- }
- node = prev;
- }
- while (node) {
- test = rb_entry(node, struct btrfs_ordered_extent, rb_node);
- if (test->file_offset + test->len <= disk_i_size)
- break;
- if (test->file_offset >= i_size)
- break;
- if (test->file_offset >= disk_i_size)
- goto out;
- node = rb_prev(node);
- }
- new_i_size = min_t(u64, offset, i_size);
-
- /*
- * at this point, we know we can safely update i_size to at least
- * the offset from this ordered extent. But, we need to
- * walk forward and see if ios from higher up in the file have
- * finished.
- */
- if (ordered) {
- node = rb_next(&ordered->rb_node);
- } else {
- if (prev)
- node = rb_next(prev);
- else
- node = rb_first(&tree->tree);
- }
- i_size_test = 0;
- if (node) {
- /*
- * do we have an area where IO might have finished
- * between our ordered extent and the next one.
- */
- test = rb_entry(node, struct btrfs_ordered_extent, rb_node);
- if (test->file_offset > offset)
- i_size_test = test->file_offset;
- } else {
- i_size_test = i_size;
- }
-
- /*
- * i_size_test is the end of a region after this ordered
- * extent where there are no ordered extents. As long as there
- * are no delalloc bytes in this area, it is safe to update
- * disk_i_size to the end of the region.
- */
- if (i_size_test > offset &&
- !test_range_bit(io_tree, offset, i_size_test - 1,
- EXTENT_DELALLOC, 0, NULL)) {
- new_i_size = min_t(u64, i_size_test, i_size);
- }
- BTRFS_I(inode)->disk_i_size = new_i_size;
- ret = 0;
-out:
- /*
- * we need to remove the ordered extent with the tree lock held
- * so that other people calling this function don't find our fully
- * processed ordered entry and skip updating the i_size
- */
- if (ordered)
- __btrfs_remove_ordered_extent(inode, ordered);
- spin_unlock(&tree->lock);
- if (ordered)
- wake_up(&ordered->wait);
- return ret;
-}
-
-/*
- * search the ordered extents for one corresponding to 'offset' and
- * try to find a checksum. This is used because we allow pages to
- * be reclaimed before their checksum is actually put into the btree
- */
-int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr,
- u32 *sum)
-{
- struct btrfs_ordered_sum *ordered_sum;
- struct btrfs_sector_sum *sector_sums;
- struct btrfs_ordered_extent *ordered;
- struct btrfs_ordered_inode_tree *tree = &BTRFS_I(inode)->ordered_tree;
- unsigned long num_sectors;
- unsigned long i;
- u32 sectorsize = BTRFS_I(inode)->root->sectorsize;
- int ret = 1;
-
- ordered = btrfs_lookup_ordered_extent(inode, offset);
- if (!ordered)
- return 1;
-
- spin_lock(&tree->lock);
- list_for_each_entry_reverse(ordered_sum, &ordered->list, list) {
- if (disk_bytenr >= ordered_sum->bytenr) {
- num_sectors = ordered_sum->len / sectorsize;
- sector_sums = ordered_sum->sums;
- for (i = 0; i < num_sectors; i++) {
- if (sector_sums[i].bytenr == disk_bytenr) {
- *sum = sector_sums[i].sum;
- ret = 0;
- goto out;
- }
- }
- }
- }
-out:
- spin_unlock(&tree->lock);
- btrfs_put_ordered_extent(ordered);
- return ret;
-}
-
-
-/*
- * add a given inode to the list of inodes that must be fully on
- * disk before a transaction commit finishes.
- *
- * This basically gives us the ext3 style data=ordered mode, and it is mostly
- * used to make sure renamed files are fully on disk.
- *
- * It is a noop if the inode is already fully on disk.
- *
- * If trans is not null, we'll do a friendly check for a transaction that
- * is already flushing things and force the IO down ourselves.
- */
-void btrfs_add_ordered_operation(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, struct inode *inode)
-{
- u64 last_mod;
-
- last_mod = max(BTRFS_I(inode)->generation, BTRFS_I(inode)->last_trans);
-
- /*
- * if this file hasn't been changed since the last transaction
- * commit, we can safely return without doing anything
- */
- if (last_mod < root->fs_info->last_trans_committed)
- return;
-
- /*
- * the transaction is already committing. Just start the IO and
- * don't bother with all of this list nonsense
- */
- if (trans && root->fs_info->running_transaction->blocked) {
- btrfs_wait_ordered_range(inode, 0, (u64)-1);
- return;
- }
-
- spin_lock(&root->fs_info->ordered_extent_lock);
- if (list_empty(&BTRFS_I(inode)->ordered_operations)) {
- list_add_tail(&BTRFS_I(inode)->ordered_operations,
- &root->fs_info->ordered_operations);
- }
- spin_unlock(&root->fs_info->ordered_extent_lock);
-}
diff --git a/ANDROID_3.4.5/fs/btrfs/ordered-data.h b/ANDROID_3.4.5/fs/btrfs/ordered-data.h
deleted file mode 100644
index c355ad4d..00000000
--- a/ANDROID_3.4.5/fs/btrfs/ordered-data.h
+++ /dev/null
@@ -1,179 +0,0 @@
-/*
- * Copyright (C) 2007 Oracle. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License v2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
- */
-
-#ifndef __BTRFS_ORDERED_DATA__
-#define __BTRFS_ORDERED_DATA__
-
-/* one of these per inode */
-struct btrfs_ordered_inode_tree {
- spinlock_t lock;
- struct rb_root tree;
- struct rb_node *last;
-};
-
-/*
- * these are used to collect checksums done just before bios submission.
- * They are attached via a list into the ordered extent, and
- * checksum items are inserted into the tree after all the blocks in
- * the ordered extent are on disk
- */
-struct btrfs_sector_sum {
- /* bytenr on disk */
- u64 bytenr;
- u32 sum;
-};
-
-struct btrfs_ordered_sum {
- /* bytenr is the start of this extent on disk */
- u64 bytenr;
-
- /*
- * this is the length in bytes covered by the sums array below.
- */
- unsigned long len;
- struct list_head list;
- /* last field is a variable length array of btrfs_sector_sums */
- struct btrfs_sector_sum sums[];
-};
-
-/*
- * bits for the flags field:
- *
- * BTRFS_ORDERED_IO_DONE is set when all of the blocks are written.
- * It is used to make sure metadata is inserted into the tree only once
- * per extent.
- *
- * BTRFS_ORDERED_COMPLETE is set when the extent is removed from the
- * rbtree, just before waking any waiters. It is used to indicate the
- * IO is done and any metadata is inserted into the tree.
- */
-#define BTRFS_ORDERED_IO_DONE 0 /* set when all the pages are written */
-
-#define BTRFS_ORDERED_COMPLETE 1 /* set when removed from the tree */
-
-#define BTRFS_ORDERED_NOCOW 2 /* set when we want to write in place */
-
-#define BTRFS_ORDERED_COMPRESSED 3 /* writing a zlib compressed extent */
-
-#define BTRFS_ORDERED_PREALLOC 4 /* set when writing to prealloced extent */
-
-#define BTRFS_ORDERED_DIRECT 5 /* set when we're doing DIO with this extent */
-
-struct btrfs_ordered_extent {
- /* logical offset in the file */
- u64 file_offset;
-
- /* disk byte number */
- u64 start;
-
- /* ram length of the extent in bytes */
- u64 len;
-
- /* extent length on disk */
- u64 disk_len;
-
- /* number of bytes that still need writing */
- u64 bytes_left;
-
- /* flags (described above) */
- unsigned long flags;
-
- /* compression algorithm */
- int compress_type;
-
- /* reference count */
- atomic_t refs;
-
- /* the inode we belong to */
- struct inode *inode;
-
- /* list of checksums for insertion when the extent io is done */
- struct list_head list;
-
- /* used to wait for the BTRFS_ORDERED_COMPLETE bit */
- wait_queue_head_t wait;
-
- /* our friendly rbtree entry */
- struct rb_node rb_node;
-
- /* a per root list of all the pending ordered extents */
- struct list_head root_extent_list;
-};
-
-
-/*
- * calculates the total size you need to allocate for an ordered sum
- * structure spanning 'bytes' in the file
- */
-static inline int btrfs_ordered_sum_size(struct btrfs_root *root,
- unsigned long bytes)
-{
- unsigned long num_sectors = (bytes + root->sectorsize - 1) /
- root->sectorsize;
- num_sectors++;
- return sizeof(struct btrfs_ordered_sum) +
- num_sectors * sizeof(struct btrfs_sector_sum);
-}
-
-static inline void
-btrfs_ordered_inode_tree_init(struct btrfs_ordered_inode_tree *t)
-{
- spin_lock_init(&t->lock);
- t->tree = RB_ROOT;
- t->last = NULL;
-}
-
-void btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry);
-void btrfs_remove_ordered_extent(struct inode *inode,
- struct btrfs_ordered_extent *entry);
-int btrfs_dec_test_ordered_pending(struct inode *inode,
- struct btrfs_ordered_extent **cached,
- u64 file_offset, u64 io_size);
-int btrfs_dec_test_first_ordered_pending(struct inode *inode,
- struct btrfs_ordered_extent **cached,
- u64 *file_offset, u64 io_size);
-int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
- u64 start, u64 len, u64 disk_len, int type);
-int btrfs_add_ordered_extent_dio(struct inode *inode, u64 file_offset,
- u64 start, u64 len, u64 disk_len, int type);
-int btrfs_add_ordered_extent_compress(struct inode *inode, u64 file_offset,
- u64 start, u64 len, u64 disk_len,
- int type, int compress_type);
-void btrfs_add_ordered_sum(struct inode *inode,
- struct btrfs_ordered_extent *entry,
- struct btrfs_ordered_sum *sum);
-struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct inode *inode,
- u64 file_offset);
-void btrfs_start_ordered_extent(struct inode *inode,
- struct btrfs_ordered_extent *entry, int wait);
-void btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len);
-struct btrfs_ordered_extent *
-btrfs_lookup_first_ordered_extent(struct inode * inode, u64 file_offset);
-struct btrfs_ordered_extent *btrfs_lookup_ordered_range(struct inode *inode,
- u64 file_offset,
- u64 len);
-int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
- struct btrfs_ordered_extent *ordered);
-int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr, u32 *sum);
-void btrfs_run_ordered_operations(struct btrfs_root *root, int wait);
-void btrfs_add_ordered_operation(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct inode *inode);
-void btrfs_wait_ordered_extents(struct btrfs_root *root,
- int nocow_only, int delay_iput);
-#endif
diff --git a/ANDROID_3.4.5/fs/btrfs/orphan.c b/ANDROID_3.4.5/fs/btrfs/orphan.c
deleted file mode 100644
index 24cad169..00000000
--- a/ANDROID_3.4.5/fs/btrfs/orphan.c
+++ /dev/null
@@ -1,91 +0,0 @@
-/*
- * Copyright (C) 2008 Red Hat. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License v2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
- */
-
-#include "ctree.h"
-#include "disk-io.h"
-
-int btrfs_insert_orphan_item(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, u64 offset)
-{
- struct btrfs_path *path;
- struct btrfs_key key;
- int ret = 0;
-
- key.objectid = BTRFS_ORPHAN_OBJECTID;
- btrfs_set_key_type(&key, BTRFS_ORPHAN_ITEM_KEY);
- key.offset = offset;
-
- path = btrfs_alloc_path();
- if (!path)
- return -ENOMEM;
-
- ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
-
- btrfs_free_path(path);
- return ret;
-}
-
-int btrfs_del_orphan_item(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, u64 offset)
-{
- struct btrfs_path *path;
- struct btrfs_key key;
- int ret = 0;
-
- key.objectid = BTRFS_ORPHAN_OBJECTID;
- btrfs_set_key_type(&key, BTRFS_ORPHAN_ITEM_KEY);
- key.offset = offset;
-
- path = btrfs_alloc_path();
- if (!path)
- return -ENOMEM;
-
- ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
- if (ret < 0)
- goto out;
- if (ret) { /* JDM: Really? */
- ret = -ENOENT;
- goto out;
- }
-
- ret = btrfs_del_item(trans, root, path);
-
-out:
- btrfs_free_path(path);
- return ret;
-}
-
-int btrfs_find_orphan_item(struct btrfs_root *root, u64 offset)
-{
- struct btrfs_path *path;
- struct btrfs_key key;
- int ret;
-
- key.objectid = BTRFS_ORPHAN_OBJECTID;
- key.type = BTRFS_ORPHAN_ITEM_KEY;
- key.offset = offset;
-
- path = btrfs_alloc_path();
- if (!path)
- return -ENOMEM;
-
- ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
-
- btrfs_free_path(path);
- return ret;
-}
diff --git a/ANDROID_3.4.5/fs/btrfs/print-tree.c b/ANDROID_3.4.5/fs/btrfs/print-tree.c
deleted file mode 100644
index f38e4524..00000000
--- a/ANDROID_3.4.5/fs/btrfs/print-tree.c
+++ /dev/null
@@ -1,342 +0,0 @@
-/*
- * Copyright (C) 2007 Oracle. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License v2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
- */
-
-#include "ctree.h"
-#include "disk-io.h"
-#include "print-tree.h"
-
-static void print_chunk(struct extent_buffer *eb, struct btrfs_chunk *chunk)
-{
- int num_stripes = btrfs_chunk_num_stripes(eb, chunk);
- int i;
- printk(KERN_INFO "\t\tchunk length %llu owner %llu type %llu "
- "num_stripes %d\n",
- (unsigned long long)btrfs_chunk_length(eb, chunk),
- (unsigned long long)btrfs_chunk_owner(eb, chunk),
- (unsigned long long)btrfs_chunk_type(eb, chunk),
- num_stripes);
- for (i = 0 ; i < num_stripes ; i++) {
- printk(KERN_INFO "\t\t\tstripe %d devid %llu offset %llu\n", i,
- (unsigned long long)btrfs_stripe_devid_nr(eb, chunk, i),
- (unsigned long long)btrfs_stripe_offset_nr(eb, chunk, i));
- }
-}
-static void print_dev_item(struct extent_buffer *eb,
- struct btrfs_dev_item *dev_item)
-{
- printk(KERN_INFO "\t\tdev item devid %llu "
- "total_bytes %llu bytes used %llu\n",
- (unsigned long long)btrfs_device_id(eb, dev_item),
- (unsigned long long)btrfs_device_total_bytes(eb, dev_item),
- (unsigned long long)btrfs_device_bytes_used(eb, dev_item));
-}
-static void print_extent_data_ref(struct extent_buffer *eb,
- struct btrfs_extent_data_ref *ref)
-{
- printk(KERN_INFO "\t\textent data backref root %llu "
- "objectid %llu offset %llu count %u\n",
- (unsigned long long)btrfs_extent_data_ref_root(eb, ref),
- (unsigned long long)btrfs_extent_data_ref_objectid(eb, ref),
- (unsigned long long)btrfs_extent_data_ref_offset(eb, ref),
- btrfs_extent_data_ref_count(eb, ref));
-}
-
-static void print_extent_item(struct extent_buffer *eb, int slot)
-{
- struct btrfs_extent_item *ei;
- struct btrfs_extent_inline_ref *iref;
- struct btrfs_extent_data_ref *dref;
- struct btrfs_shared_data_ref *sref;
- struct btrfs_disk_key key;
- unsigned long end;
- unsigned long ptr;
- int type;
- u32 item_size = btrfs_item_size_nr(eb, slot);
- u64 flags;
- u64 offset;
-
- if (item_size < sizeof(*ei)) {
-#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
- struct btrfs_extent_item_v0 *ei0;
- BUG_ON(item_size != sizeof(*ei0));
- ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
- printk(KERN_INFO "\t\textent refs %u\n",
- btrfs_extent_refs_v0(eb, ei0));
- return;
-#else
- BUG();
-#endif
- }
-
- ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
- flags = btrfs_extent_flags(eb, ei);
-
- printk(KERN_INFO "\t\textent refs %llu gen %llu flags %llu\n",
- (unsigned long long)btrfs_extent_refs(eb, ei),
- (unsigned long long)btrfs_extent_generation(eb, ei),
- (unsigned long long)flags);
-
- if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
- struct btrfs_tree_block_info *info;
- info = (struct btrfs_tree_block_info *)(ei + 1);
- btrfs_tree_block_key(eb, info, &key);
- printk(KERN_INFO "\t\ttree block key (%llu %x %llu) "
- "level %d\n",
- (unsigned long long)btrfs_disk_key_objectid(&key),
- key.type,
- (unsigned long long)btrfs_disk_key_offset(&key),
- btrfs_tree_block_level(eb, info));
- iref = (struct btrfs_extent_inline_ref *)(info + 1);
- } else {
- iref = (struct btrfs_extent_inline_ref *)(ei + 1);
- }
-
- ptr = (unsigned long)iref;
- end = (unsigned long)ei + item_size;
- while (ptr < end) {
- iref = (struct btrfs_extent_inline_ref *)ptr;
- type = btrfs_extent_inline_ref_type(eb, iref);
- offset = btrfs_extent_inline_ref_offset(eb, iref);
- switch (type) {
- case BTRFS_TREE_BLOCK_REF_KEY:
- printk(KERN_INFO "\t\ttree block backref "
- "root %llu\n", (unsigned long long)offset);
- break;
- case BTRFS_SHARED_BLOCK_REF_KEY:
- printk(KERN_INFO "\t\tshared block backref "
- "parent %llu\n", (unsigned long long)offset);
- break;
- case BTRFS_EXTENT_DATA_REF_KEY:
- dref = (struct btrfs_extent_data_ref *)(&iref->offset);
- print_extent_data_ref(eb, dref);
- break;
- case BTRFS_SHARED_DATA_REF_KEY:
- sref = (struct btrfs_shared_data_ref *)(iref + 1);
- printk(KERN_INFO "\t\tshared data backref "
- "parent %llu count %u\n",
- (unsigned long long)offset,
- btrfs_shared_data_ref_count(eb, sref));
- break;
- default:
- BUG();
- }
- ptr += btrfs_extent_inline_ref_size(type);
- }
- WARN_ON(ptr > end);
-}
-
-#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
-static void print_extent_ref_v0(struct extent_buffer *eb, int slot)
-{
- struct btrfs_extent_ref_v0 *ref0;
-
- ref0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_ref_v0);
- printk("\t\textent back ref root %llu gen %llu "
- "owner %llu num_refs %lu\n",
- (unsigned long long)btrfs_ref_root_v0(eb, ref0),
- (unsigned long long)btrfs_ref_generation_v0(eb, ref0),
- (unsigned long long)btrfs_ref_objectid_v0(eb, ref0),
- (unsigned long)btrfs_ref_count_v0(eb, ref0));
-}
-#endif
-
-void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l)
-{
- int i;
- u32 type, nr;
- struct btrfs_item *item;
- struct btrfs_root_item *ri;
- struct btrfs_dir_item *di;
- struct btrfs_inode_item *ii;
- struct btrfs_block_group_item *bi;
- struct btrfs_file_extent_item *fi;
- struct btrfs_extent_data_ref *dref;
- struct btrfs_shared_data_ref *sref;
- struct btrfs_dev_extent *dev_extent;
- struct btrfs_key key;
- struct btrfs_key found_key;
-
- if (!l)
- return;
-
- nr = btrfs_header_nritems(l);
-
- printk(KERN_INFO "leaf %llu total ptrs %d free space %d\n",
- (unsigned long long)btrfs_header_bytenr(l), nr,
- btrfs_leaf_free_space(root, l));
- for (i = 0 ; i < nr ; i++) {
- item = btrfs_item_nr(l, i);
- btrfs_item_key_to_cpu(l, &key, i);
- type = btrfs_key_type(&key);
- printk(KERN_INFO "\titem %d key (%llu %x %llu) itemoff %d "
- "itemsize %d\n",
- i,
- (unsigned long long)key.objectid, type,
- (unsigned long long)key.offset,
- btrfs_item_offset(l, item), btrfs_item_size(l, item));
- switch (type) {
- case BTRFS_INODE_ITEM_KEY:
- ii = btrfs_item_ptr(l, i, struct btrfs_inode_item);
- printk(KERN_INFO "\t\tinode generation %llu size %llu "
- "mode %o\n",
- (unsigned long long)
- btrfs_inode_generation(l, ii),
- (unsigned long long)btrfs_inode_size(l, ii),
- btrfs_inode_mode(l, ii));
- break;
- case BTRFS_DIR_ITEM_KEY:
- di = btrfs_item_ptr(l, i, struct btrfs_dir_item);
- btrfs_dir_item_key_to_cpu(l, di, &found_key);
- printk(KERN_INFO "\t\tdir oid %llu type %u\n",
- (unsigned long long)found_key.objectid,
- btrfs_dir_type(l, di));
- break;
- case BTRFS_ROOT_ITEM_KEY:
- ri = btrfs_item_ptr(l, i, struct btrfs_root_item);
- printk(KERN_INFO "\t\troot data bytenr %llu refs %u\n",
- (unsigned long long)
- btrfs_disk_root_bytenr(l, ri),
- btrfs_disk_root_refs(l, ri));
- break;
- case BTRFS_EXTENT_ITEM_KEY:
- print_extent_item(l, i);
- break;
- case BTRFS_TREE_BLOCK_REF_KEY:
- printk(KERN_INFO "\t\ttree block backref\n");
- break;
- case BTRFS_SHARED_BLOCK_REF_KEY:
- printk(KERN_INFO "\t\tshared block backref\n");
- break;
- case BTRFS_EXTENT_DATA_REF_KEY:
- dref = btrfs_item_ptr(l, i,
- struct btrfs_extent_data_ref);
- print_extent_data_ref(l, dref);
- break;
- case BTRFS_SHARED_DATA_REF_KEY:
- sref = btrfs_item_ptr(l, i,
- struct btrfs_shared_data_ref);
- printk(KERN_INFO "\t\tshared data backref count %u\n",
- btrfs_shared_data_ref_count(l, sref));
- break;
- case BTRFS_EXTENT_DATA_KEY:
- fi = btrfs_item_ptr(l, i,
- struct btrfs_file_extent_item);
- if (btrfs_file_extent_type(l, fi) ==
- BTRFS_FILE_EXTENT_INLINE) {
- printk(KERN_INFO "\t\tinline extent data "
- "size %u\n",
- btrfs_file_extent_inline_len(l, fi));
- break;
- }
- printk(KERN_INFO "\t\textent data disk bytenr %llu "
- "nr %llu\n",
- (unsigned long long)
- btrfs_file_extent_disk_bytenr(l, fi),
- (unsigned long long)
- btrfs_file_extent_disk_num_bytes(l, fi));
- printk(KERN_INFO "\t\textent data offset %llu "
- "nr %llu ram %llu\n",
- (unsigned long long)
- btrfs_file_extent_offset(l, fi),
- (unsigned long long)
- btrfs_file_extent_num_bytes(l, fi),
- (unsigned long long)
- btrfs_file_extent_ram_bytes(l, fi));
- break;
- case BTRFS_EXTENT_REF_V0_KEY:
-#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
- print_extent_ref_v0(l, i);
-#else
- BUG();
-#endif
- break;
- case BTRFS_BLOCK_GROUP_ITEM_KEY:
- bi = btrfs_item_ptr(l, i,
- struct btrfs_block_group_item);
- printk(KERN_INFO "\t\tblock group used %llu\n",
- (unsigned long long)
- btrfs_disk_block_group_used(l, bi));
- break;
- case BTRFS_CHUNK_ITEM_KEY:
- print_chunk(l, btrfs_item_ptr(l, i,
- struct btrfs_chunk));
- break;
- case BTRFS_DEV_ITEM_KEY:
- print_dev_item(l, btrfs_item_ptr(l, i,
- struct btrfs_dev_item));
- break;
- case BTRFS_DEV_EXTENT_KEY:
- dev_extent = btrfs_item_ptr(l, i,
- struct btrfs_dev_extent);
- printk(KERN_INFO "\t\tdev extent chunk_tree %llu\n"
- "\t\tchunk objectid %llu chunk offset %llu "
- "length %llu\n",
- (unsigned long long)
- btrfs_dev_extent_chunk_tree(l, dev_extent),
- (unsigned long long)
- btrfs_dev_extent_chunk_objectid(l, dev_extent),
- (unsigned long long)
- btrfs_dev_extent_chunk_offset(l, dev_extent),
- (unsigned long long)
- btrfs_dev_extent_length(l, dev_extent));
- };
- }
-}
-
-void btrfs_print_tree(struct btrfs_root *root, struct extent_buffer *c)
-{
- int i; u32 nr;
- struct btrfs_key key;
- int level;
-
- if (!c)
- return;
- nr = btrfs_header_nritems(c);
- level = btrfs_header_level(c);
- if (level == 0) {
- btrfs_print_leaf(root, c);
- return;
- }
- printk(KERN_INFO "node %llu level %d total ptrs %d free spc %u\n",
- (unsigned long long)btrfs_header_bytenr(c),
- level, nr,
- (u32)BTRFS_NODEPTRS_PER_BLOCK(root) - nr);
- for (i = 0; i < nr; i++) {
- btrfs_node_key_to_cpu(c, &key, i);
- printk(KERN_INFO "\tkey %d (%llu %u %llu) block %llu\n",
- i,
- (unsigned long long)key.objectid,
- key.type,
- (unsigned long long)key.offset,
- (unsigned long long)btrfs_node_blockptr(c, i));
- }
- for (i = 0; i < nr; i++) {
- struct extent_buffer *next = read_tree_block(root,
- btrfs_node_blockptr(c, i),
- btrfs_level_size(root, level - 1),
- btrfs_node_ptr_generation(c, i));
- if (btrfs_is_leaf(next) &&
- level != 1)
- BUG();
- if (btrfs_header_level(next) !=
- level - 1)
- BUG();
- btrfs_print_tree(root, next);
- free_extent_buffer(next);
- }
-}
diff --git a/ANDROID_3.4.5/fs/btrfs/print-tree.h b/ANDROID_3.4.5/fs/btrfs/print-tree.h
deleted file mode 100644
index da75efe5..00000000
--- a/ANDROID_3.4.5/fs/btrfs/print-tree.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- * Copyright (C) 2007 Oracle. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License v2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
- */
-
-#ifndef __PRINT_TREE_
-#define __PRINT_TREE_
-void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l);
-void btrfs_print_tree(struct btrfs_root *root, struct extent_buffer *t);
-#endif
diff --git a/ANDROID_3.4.5/fs/btrfs/reada.c b/ANDROID_3.4.5/fs/btrfs/reada.c
deleted file mode 100644
index ac5d0108..00000000
--- a/ANDROID_3.4.5/fs/btrfs/reada.c
+++ /dev/null
@@ -1,961 +0,0 @@
-/*
- * Copyright (C) 2011 STRATO. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License v2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
- */
-
-#include <linux/sched.h>
-#include <linux/pagemap.h>
-#include <linux/writeback.h>
-#include <linux/blkdev.h>
-#include <linux/rbtree.h>
-#include <linux/slab.h>
-#include <linux/workqueue.h>
-#include "ctree.h"
-#include "volumes.h"
-#include "disk-io.h"
-#include "transaction.h"
-
-#undef DEBUG
-
-/*
- * This is the implementation for the generic read ahead framework.
- *
- * To trigger a readahead, btrfs_reada_add must be called. It will start
- * a read ahead for the given range [start, end) on tree root. The returned
- * handle can either be used to wait on the readahead to finish
- * (btrfs_reada_wait), or to send it to the background (btrfs_reada_detach).
- *
- * The read ahead works as follows:
- * On btrfs_reada_add, the root of the tree is inserted into a radix_tree.
- * reada_start_machine will then search for extents to prefetch and trigger
- * some reads. When a read finishes for a node, all contained node/leaf
- * pointers that lie in the given range will also be enqueued. The reads will
- * be triggered in sequential order, thus giving a big win over a naive
- * enumeration. It will also make use of multi-device layouts. Each disk
- * will have its on read pointer and all disks will by utilized in parallel.
- * Also will no two disks read both sides of a mirror simultaneously, as this
- * would waste seeking capacity. Instead both disks will read different parts
- * of the filesystem.
- * Any number of readaheads can be started in parallel. The read order will be
- * determined globally, i.e. 2 parallel readaheads will normally finish faster
- * than the 2 started one after another.
- */
-
-#define MAX_IN_FLIGHT 6
-
-struct reada_extctl {
- struct list_head list;
- struct reada_control *rc;
- u64 generation;
-};
-
-struct reada_extent {
- u64 logical;
- struct btrfs_key top;
- u32 blocksize;
- int err;
- struct list_head extctl;
- struct kref refcnt;
- spinlock_t lock;
- struct reada_zone *zones[BTRFS_MAX_MIRRORS];
- int nzones;
- struct btrfs_device *scheduled_for;
-};
-
-struct reada_zone {
- u64 start;
- u64 end;
- u64 elems;
- struct list_head list;
- spinlock_t lock;
- int locked;
- struct btrfs_device *device;
- struct btrfs_device *devs[BTRFS_MAX_MIRRORS]; /* full list, incl
- * self */
- int ndevs;
- struct kref refcnt;
-};
-
-struct reada_machine_work {
- struct btrfs_work work;
- struct btrfs_fs_info *fs_info;
-};
-
-static void reada_extent_put(struct btrfs_fs_info *, struct reada_extent *);
-static void reada_control_release(struct kref *kref);
-static void reada_zone_release(struct kref *kref);
-static void reada_start_machine(struct btrfs_fs_info *fs_info);
-static void __reada_start_machine(struct btrfs_fs_info *fs_info);
-
-static int reada_add_block(struct reada_control *rc, u64 logical,
- struct btrfs_key *top, int level, u64 generation);
-
-/* recurses */
-/* in case of err, eb might be NULL */
-static int __readahead_hook(struct btrfs_root *root, struct extent_buffer *eb,
- u64 start, int err)
-{
- int level = 0;
- int nritems;
- int i;
- u64 bytenr;
- u64 generation;
- struct reada_extent *re;
- struct btrfs_fs_info *fs_info = root->fs_info;
- struct list_head list;
- unsigned long index = start >> PAGE_CACHE_SHIFT;
- struct btrfs_device *for_dev;
-
- if (eb)
- level = btrfs_header_level(eb);
-
- /* find extent */
- spin_lock(&fs_info->reada_lock);
- re = radix_tree_lookup(&fs_info->reada_tree, index);
- if (re)
- kref_get(&re->refcnt);
- spin_unlock(&fs_info->reada_lock);
-
- if (!re)
- return -1;
-
- spin_lock(&re->lock);
- /*
- * just take the full list from the extent. afterwards we
- * don't need the lock anymore
- */
- list_replace_init(&re->extctl, &list);
- for_dev = re->scheduled_for;
- re->scheduled_for = NULL;
- spin_unlock(&re->lock);
-
- if (err == 0) {
- nritems = level ? btrfs_header_nritems(eb) : 0;
- generation = btrfs_header_generation(eb);
- /*
- * FIXME: currently we just set nritems to 0 if this is a leaf,
- * effectively ignoring the content. In a next step we could
- * trigger more readahead depending from the content, e.g.
- * fetch the checksums for the extents in the leaf.
- */
- } else {
- /*
- * this is the error case, the extent buffer has not been
- * read correctly. We won't access anything from it and
- * just cleanup our data structures. Effectively this will
- * cut the branch below this node from read ahead.
- */
- nritems = 0;
- generation = 0;
- }
-
- for (i = 0; i < nritems; i++) {
- struct reada_extctl *rec;
- u64 n_gen;
- struct btrfs_key key;
- struct btrfs_key next_key;
-
- btrfs_node_key_to_cpu(eb, &key, i);
- if (i + 1 < nritems)
- btrfs_node_key_to_cpu(eb, &next_key, i + 1);
- else
- next_key = re->top;
- bytenr = btrfs_node_blockptr(eb, i);
- n_gen = btrfs_node_ptr_generation(eb, i);
-
- list_for_each_entry(rec, &list, list) {
- struct reada_control *rc = rec->rc;
-
- /*
- * if the generation doesn't match, just ignore this
- * extctl. This will probably cut off a branch from
- * prefetch. Alternatively one could start a new (sub-)
- * prefetch for this branch, starting again from root.
- * FIXME: move the generation check out of this loop
- */
-#ifdef DEBUG
- if (rec->generation != generation) {
- printk(KERN_DEBUG "generation mismatch for "
- "(%llu,%d,%llu) %llu != %llu\n",
- key.objectid, key.type, key.offset,
- rec->generation, generation);
- }
-#endif
- if (rec->generation == generation &&
- btrfs_comp_cpu_keys(&key, &rc->key_end) < 0 &&
- btrfs_comp_cpu_keys(&next_key, &rc->key_start) > 0)
- reada_add_block(rc, bytenr, &next_key,
- level - 1, n_gen);
- }
- }
- /*
- * free extctl records
- */
- while (!list_empty(&list)) {
- struct reada_control *rc;
- struct reada_extctl *rec;
-
- rec = list_first_entry(&list, struct reada_extctl, list);
- list_del(&rec->list);
- rc = rec->rc;
- kfree(rec);
-
- kref_get(&rc->refcnt);
- if (atomic_dec_and_test(&rc->elems)) {
- kref_put(&rc->refcnt, reada_control_release);
- wake_up(&rc->wait);
- }
- kref_put(&rc->refcnt, reada_control_release);
-
- reada_extent_put(fs_info, re); /* one ref for each entry */
- }
- reada_extent_put(fs_info, re); /* our ref */
- if (for_dev)
- atomic_dec(&for_dev->reada_in_flight);
-
- return 0;
-}
-
-/*
- * start is passed separately in case eb in NULL, which may be the case with
- * failed I/O
- */
-int btree_readahead_hook(struct btrfs_root *root, struct extent_buffer *eb,
- u64 start, int err)
-{
- int ret;
-
- ret = __readahead_hook(root, eb, start, err);
-
- reada_start_machine(root->fs_info);
-
- return ret;
-}
-
-static struct reada_zone *reada_find_zone(struct btrfs_fs_info *fs_info,
- struct btrfs_device *dev, u64 logical,
- struct btrfs_bio *bbio)
-{
- int ret;
- struct reada_zone *zone;
- struct btrfs_block_group_cache *cache = NULL;
- u64 start;
- u64 end;
- int i;
-
- zone = NULL;
- spin_lock(&fs_info->reada_lock);
- ret = radix_tree_gang_lookup(&dev->reada_zones, (void **)&zone,
- logical >> PAGE_CACHE_SHIFT, 1);
- if (ret == 1)
- kref_get(&zone->refcnt);
- spin_unlock(&fs_info->reada_lock);
-
- if (ret == 1) {
- if (logical >= zone->start && logical < zone->end)
- return zone;
- spin_lock(&fs_info->reada_lock);
- kref_put(&zone->refcnt, reada_zone_release);
- spin_unlock(&fs_info->reada_lock);
- }
-
- cache = btrfs_lookup_block_group(fs_info, logical);
- if (!cache)
- return NULL;
-
- start = cache->key.objectid;
- end = start + cache->key.offset - 1;
- btrfs_put_block_group(cache);
-
- zone = kzalloc(sizeof(*zone), GFP_NOFS);
- if (!zone)
- return NULL;
-
- zone->start = start;
- zone->end = end;
- INIT_LIST_HEAD(&zone->list);
- spin_lock_init(&zone->lock);
- zone->locked = 0;
- kref_init(&zone->refcnt);
- zone->elems = 0;
- zone->device = dev; /* our device always sits at index 0 */
- for (i = 0; i < bbio->num_stripes; ++i) {
- /* bounds have already been checked */
- zone->devs[i] = bbio->stripes[i].dev;
- }
- zone->ndevs = bbio->num_stripes;
-
- spin_lock(&fs_info->reada_lock);
- ret = radix_tree_insert(&dev->reada_zones,
- (unsigned long)(zone->end >> PAGE_CACHE_SHIFT),
- zone);
-
- if (ret == -EEXIST) {
- kfree(zone);
- ret = radix_tree_gang_lookup(&dev->reada_zones, (void **)&zone,
- logical >> PAGE_CACHE_SHIFT, 1);
- if (ret == 1)
- kref_get(&zone->refcnt);
- }
- spin_unlock(&fs_info->reada_lock);
-
- return zone;
-}
-
-static struct reada_extent *reada_find_extent(struct btrfs_root *root,
- u64 logical,
- struct btrfs_key *top, int level)
-{
- int ret;
- struct reada_extent *re = NULL;
- struct reada_extent *re_exist = NULL;
- struct btrfs_fs_info *fs_info = root->fs_info;
- struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree;
- struct btrfs_bio *bbio = NULL;
- struct btrfs_device *dev;
- struct btrfs_device *prev_dev;
- u32 blocksize;
- u64 length;
- int nzones = 0;
- int i;
- unsigned long index = logical >> PAGE_CACHE_SHIFT;
-
- spin_lock(&fs_info->reada_lock);
- re = radix_tree_lookup(&fs_info->reada_tree, index);
- if (re)
- kref_get(&re->refcnt);
- spin_unlock(&fs_info->reada_lock);
-
- if (re)
- return re;
-
- re = kzalloc(sizeof(*re), GFP_NOFS);
- if (!re)
- return NULL;
-
- blocksize = btrfs_level_size(root, level);
- re->logical = logical;
- re->blocksize = blocksize;
- re->top = *top;
- INIT_LIST_HEAD(&re->extctl);
- spin_lock_init(&re->lock);
- kref_init(&re->refcnt);
-
- /*
- * map block
- */
- length = blocksize;
- ret = btrfs_map_block(map_tree, REQ_WRITE, logical, &length, &bbio, 0);
- if (ret || !bbio || length < blocksize)
- goto error;
-
- if (bbio->num_stripes > BTRFS_MAX_MIRRORS) {
- printk(KERN_ERR "btrfs readahead: more than %d copies not "
- "supported", BTRFS_MAX_MIRRORS);
- goto error;
- }
-
- for (nzones = 0; nzones < bbio->num_stripes; ++nzones) {
- struct reada_zone *zone;
-
- dev = bbio->stripes[nzones].dev;
- zone = reada_find_zone(fs_info, dev, logical, bbio);
- if (!zone)
- break;
-
- re->zones[nzones] = zone;
- spin_lock(&zone->lock);
- if (!zone->elems)
- kref_get(&zone->refcnt);
- ++zone->elems;
- spin_unlock(&zone->lock);
- spin_lock(&fs_info->reada_lock);
- kref_put(&zone->refcnt, reada_zone_release);
- spin_unlock(&fs_info->reada_lock);
- }
- re->nzones = nzones;
- if (nzones == 0) {
- /* not a single zone found, error and out */
- goto error;
- }
-
- /* insert extent in reada_tree + all per-device trees, all or nothing */
- spin_lock(&fs_info->reada_lock);
- ret = radix_tree_insert(&fs_info->reada_tree, index, re);
- if (ret == -EEXIST) {
- re_exist = radix_tree_lookup(&fs_info->reada_tree, index);
- BUG_ON(!re_exist);
- kref_get(&re_exist->refcnt);
- spin_unlock(&fs_info->reada_lock);
- goto error;
- }
- if (ret) {
- spin_unlock(&fs_info->reada_lock);
- goto error;
- }
- prev_dev = NULL;
- for (i = 0; i < nzones; ++i) {
- dev = bbio->stripes[i].dev;
- if (dev == prev_dev) {
- /*
- * in case of DUP, just add the first zone. As both
- * are on the same device, there's nothing to gain
- * from adding both.
- * Also, it wouldn't work, as the tree is per device
- * and adding would fail with EEXIST
- */
- continue;
- }
- prev_dev = dev;
- ret = radix_tree_insert(&dev->reada_extents, index, re);
- if (ret) {
- while (--i >= 0) {
- dev = bbio->stripes[i].dev;
- BUG_ON(dev == NULL);
- radix_tree_delete(&dev->reada_extents, index);
- }
- BUG_ON(fs_info == NULL);
- radix_tree_delete(&fs_info->reada_tree, index);
- spin_unlock(&fs_info->reada_lock);
- goto error;
- }
- }
- spin_unlock(&fs_info->reada_lock);
-
- kfree(bbio);
- return re;
-
-error:
- while (nzones) {
- struct reada_zone *zone;
-
- --nzones;
- zone = re->zones[nzones];
- kref_get(&zone->refcnt);
- spin_lock(&zone->lock);
- --zone->elems;
- if (zone->elems == 0) {
- /*
- * no fs_info->reada_lock needed, as this can't be
- * the last ref
- */
- kref_put(&zone->refcnt, reada_zone_release);
- }
- spin_unlock(&zone->lock);
-
- spin_lock(&fs_info->reada_lock);
- kref_put(&zone->refcnt, reada_zone_release);
- spin_unlock(&fs_info->reada_lock);
- }
- kfree(bbio);
- kfree(re);
- return re_exist;
-}
-
-static void reada_kref_dummy(struct kref *kr)
-{
-}
-
-static void reada_extent_put(struct btrfs_fs_info *fs_info,
- struct reada_extent *re)
-{
- int i;
- unsigned long index = re->logical >> PAGE_CACHE_SHIFT;
-
- spin_lock(&fs_info->reada_lock);
- if (!kref_put(&re->refcnt, reada_kref_dummy)) {
- spin_unlock(&fs_info->reada_lock);
- return;
- }
-
- radix_tree_delete(&fs_info->reada_tree, index);
- for (i = 0; i < re->nzones; ++i) {
- struct reada_zone *zone = re->zones[i];
-
- radix_tree_delete(&zone->device->reada_extents, index);
- }
-
- spin_unlock(&fs_info->reada_lock);
-
- for (i = 0; i < re->nzones; ++i) {
- struct reada_zone *zone = re->zones[i];
-
- kref_get(&zone->refcnt);
- spin_lock(&zone->lock);
- --zone->elems;
- if (zone->elems == 0) {
- /* no fs_info->reada_lock needed, as this can't be
- * the last ref */
- kref_put(&zone->refcnt, reada_zone_release);
- }
- spin_unlock(&zone->lock);
-
- spin_lock(&fs_info->reada_lock);
- kref_put(&zone->refcnt, reada_zone_release);
- spin_unlock(&fs_info->reada_lock);
- }
- if (re->scheduled_for)
- atomic_dec(&re->scheduled_for->reada_in_flight);
-
- kfree(re);
-}
-
-static void reada_zone_release(struct kref *kref)
-{
- struct reada_zone *zone = container_of(kref, struct reada_zone, refcnt);
-
- radix_tree_delete(&zone->device->reada_zones,
- zone->end >> PAGE_CACHE_SHIFT);
-
- kfree(zone);
-}
-
-static void reada_control_release(struct kref *kref)
-{
- struct reada_control *rc = container_of(kref, struct reada_control,
- refcnt);
-
- kfree(rc);
-}
-
-static int reada_add_block(struct reada_control *rc, u64 logical,
- struct btrfs_key *top, int level, u64 generation)
-{
- struct btrfs_root *root = rc->root;
- struct reada_extent *re;
- struct reada_extctl *rec;
-
- re = reada_find_extent(root, logical, top, level); /* takes one ref */
- if (!re)
- return -1;
-
- rec = kzalloc(sizeof(*rec), GFP_NOFS);
- if (!rec) {
- reada_extent_put(root->fs_info, re);
- return -1;
- }
-
- rec->rc = rc;
- rec->generation = generation;
- atomic_inc(&rc->elems);
-
- spin_lock(&re->lock);
- list_add_tail(&rec->list, &re->extctl);
- spin_unlock(&re->lock);
-
- /* leave the ref on the extent */
-
- return 0;
-}
-
-/*
- * called with fs_info->reada_lock held
- */
-static void reada_peer_zones_set_lock(struct reada_zone *zone, int lock)
-{
- int i;
- unsigned long index = zone->end >> PAGE_CACHE_SHIFT;
-
- for (i = 0; i < zone->ndevs; ++i) {
- struct reada_zone *peer;
- peer = radix_tree_lookup(&zone->devs[i]->reada_zones, index);
- if (peer && peer->device != zone->device)
- peer->locked = lock;
- }
-}
-
-/*
- * called with fs_info->reada_lock held
- */
-static int reada_pick_zone(struct btrfs_device *dev)
-{
- struct reada_zone *top_zone = NULL;
- struct reada_zone *top_locked_zone = NULL;
- u64 top_elems = 0;
- u64 top_locked_elems = 0;
- unsigned long index = 0;
- int ret;
-
- if (dev->reada_curr_zone) {
- reada_peer_zones_set_lock(dev->reada_curr_zone, 0);
- kref_put(&dev->reada_curr_zone->refcnt, reada_zone_release);
- dev->reada_curr_zone = NULL;
- }
- /* pick the zone with the most elements */
- while (1) {
- struct reada_zone *zone;
-
- ret = radix_tree_gang_lookup(&dev->reada_zones,
- (void **)&zone, index, 1);
- if (ret == 0)
- break;
- index = (zone->end >> PAGE_CACHE_SHIFT) + 1;
- if (zone->locked) {
- if (zone->elems > top_locked_elems) {
- top_locked_elems = zone->elems;
- top_locked_zone = zone;
- }
- } else {
- if (zone->elems > top_elems) {
- top_elems = zone->elems;
- top_zone = zone;
- }
- }
- }
- if (top_zone)
- dev->reada_curr_zone = top_zone;
- else if (top_locked_zone)
- dev->reada_curr_zone = top_locked_zone;
- else
- return 0;
-
- dev->reada_next = dev->reada_curr_zone->start;
- kref_get(&dev->reada_curr_zone->refcnt);
- reada_peer_zones_set_lock(dev->reada_curr_zone, 1);
-
- return 1;
-}
-
-static int reada_start_machine_dev(struct btrfs_fs_info *fs_info,
- struct btrfs_device *dev)
-{
- struct reada_extent *re = NULL;
- int mirror_num = 0;
- struct extent_buffer *eb = NULL;
- u64 logical;
- u32 blocksize;
- int ret;
- int i;
- int need_kick = 0;
-
- spin_lock(&fs_info->reada_lock);
- if (dev->reada_curr_zone == NULL) {
- ret = reada_pick_zone(dev);
- if (!ret) {
- spin_unlock(&fs_info->reada_lock);
- return 0;
- }
- }
- /*
- * FIXME currently we issue the reads one extent at a time. If we have
- * a contiguous block of extents, we could also coagulate them or use
- * plugging to speed things up
- */
- ret = radix_tree_gang_lookup(&dev->reada_extents, (void **)&re,
- dev->reada_next >> PAGE_CACHE_SHIFT, 1);
- if (ret == 0 || re->logical >= dev->reada_curr_zone->end) {
- ret = reada_pick_zone(dev);
- if (!ret) {
- spin_unlock(&fs_info->reada_lock);
- return 0;
- }
- re = NULL;
- ret = radix_tree_gang_lookup(&dev->reada_extents, (void **)&re,
- dev->reada_next >> PAGE_CACHE_SHIFT, 1);
- }
- if (ret == 0) {
- spin_unlock(&fs_info->reada_lock);
- return 0;
- }
- dev->reada_next = re->logical + re->blocksize;
- kref_get(&re->refcnt);
-
- spin_unlock(&fs_info->reada_lock);
-
- /*
- * find mirror num
- */
- for (i = 0; i < re->nzones; ++i) {
- if (re->zones[i]->device == dev) {
- mirror_num = i + 1;
- break;
- }
- }
- logical = re->logical;
- blocksize = re->blocksize;
-
- spin_lock(&re->lock);
- if (re->scheduled_for == NULL) {
- re->scheduled_for = dev;
- need_kick = 1;
- }
- spin_unlock(&re->lock);
-
- reada_extent_put(fs_info, re);
-
- if (!need_kick)
- return 0;
-
- atomic_inc(&dev->reada_in_flight);
- ret = reada_tree_block_flagged(fs_info->extent_root, logical, blocksize,
- mirror_num, &eb);
- if (ret)
- __readahead_hook(fs_info->extent_root, NULL, logical, ret);
- else if (eb)
- __readahead_hook(fs_info->extent_root, eb, eb->start, ret);
-
- if (eb)
- free_extent_buffer(eb);
-
- return 1;
-
-}
-
-static void reada_start_machine_worker(struct btrfs_work *work)
-{
- struct reada_machine_work *rmw;
- struct btrfs_fs_info *fs_info;
-
- rmw = container_of(work, struct reada_machine_work, work);
- fs_info = rmw->fs_info;
-
- kfree(rmw);
-
- __reada_start_machine(fs_info);
-}
-
-static void __reada_start_machine(struct btrfs_fs_info *fs_info)
-{
- struct btrfs_device *device;
- struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
- u64 enqueued;
- u64 total = 0;
- int i;
-
- do {
- enqueued = 0;
- list_for_each_entry(device, &fs_devices->devices, dev_list) {
- if (atomic_read(&device->reada_in_flight) <
- MAX_IN_FLIGHT)
- enqueued += reada_start_machine_dev(fs_info,
- device);
- }
- total += enqueued;
- } while (enqueued && total < 10000);
-
- if (enqueued == 0)
- return;
-
- /*
- * If everything is already in the cache, this is effectively single
- * threaded. To a) not hold the caller for too long and b) to utilize
- * more cores, we broke the loop above after 10000 iterations and now
- * enqueue to workers to finish it. This will distribute the load to
- * the cores.
- */
- for (i = 0; i < 2; ++i)
- reada_start_machine(fs_info);
-}
-
-static void reada_start_machine(struct btrfs_fs_info *fs_info)
-{
- struct reada_machine_work *rmw;
-
- rmw = kzalloc(sizeof(*rmw), GFP_NOFS);
- if (!rmw) {
- /* FIXME we cannot handle this properly right now */
- BUG();
- }
- rmw->work.func = reada_start_machine_worker;
- rmw->fs_info = fs_info;
-
- btrfs_queue_worker(&fs_info->readahead_workers, &rmw->work);
-}
-
-#ifdef DEBUG
-static void dump_devs(struct btrfs_fs_info *fs_info, int all)
-{
- struct btrfs_device *device;
- struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
- unsigned long index;
- int ret;
- int i;
- int j;
- int cnt;
-
- spin_lock(&fs_info->reada_lock);
- list_for_each_entry(device, &fs_devices->devices, dev_list) {
- printk(KERN_DEBUG "dev %lld has %d in flight\n", device->devid,
- atomic_read(&device->reada_in_flight));
- index = 0;
- while (1) {
- struct reada_zone *zone;
- ret = radix_tree_gang_lookup(&device->reada_zones,
- (void **)&zone, index, 1);
- if (ret == 0)
- break;
- printk(KERN_DEBUG " zone %llu-%llu elems %llu locked "
- "%d devs", zone->start, zone->end, zone->elems,
- zone->locked);
- for (j = 0; j < zone->ndevs; ++j) {
- printk(KERN_CONT " %lld",
- zone->devs[j]->devid);
- }
- if (device->reada_curr_zone == zone)
- printk(KERN_CONT " curr off %llu",
- device->reada_next - zone->start);
- printk(KERN_CONT "\n");
- index = (zone->end >> PAGE_CACHE_SHIFT) + 1;
- }
- cnt = 0;
- index = 0;
- while (all) {
- struct reada_extent *re = NULL;
-
- ret = radix_tree_gang_lookup(&device->reada_extents,
- (void **)&re, index, 1);
- if (ret == 0)
- break;
- printk(KERN_DEBUG
- " re: logical %llu size %u empty %d for %lld",
- re->logical, re->blocksize,
- list_empty(&re->extctl), re->scheduled_for ?
- re->scheduled_for->devid : -1);
-
- for (i = 0; i < re->nzones; ++i) {
- printk(KERN_CONT " zone %llu-%llu devs",
- re->zones[i]->start,
- re->zones[i]->end);
- for (j = 0; j < re->zones[i]->ndevs; ++j) {
- printk(KERN_CONT " %lld",
- re->zones[i]->devs[j]->devid);
- }
- }
- printk(KERN_CONT "\n");
- index = (re->logical >> PAGE_CACHE_SHIFT) + 1;
- if (++cnt > 15)
- break;
- }
- }
-
- index = 0;
- cnt = 0;
- while (all) {
- struct reada_extent *re = NULL;
-
- ret = radix_tree_gang_lookup(&fs_info->reada_tree, (void **)&re,
- index, 1);
- if (ret == 0)
- break;
- if (!re->scheduled_for) {
- index = (re->logical >> PAGE_CACHE_SHIFT) + 1;
- continue;
- }
- printk(KERN_DEBUG
- "re: logical %llu size %u list empty %d for %lld",
- re->logical, re->blocksize, list_empty(&re->extctl),
- re->scheduled_for ? re->scheduled_for->devid : -1);
- for (i = 0; i < re->nzones; ++i) {
- printk(KERN_CONT " zone %llu-%llu devs",
- re->zones[i]->start,
- re->zones[i]->end);
- for (i = 0; i < re->nzones; ++i) {
- printk(KERN_CONT " zone %llu-%llu devs",
- re->zones[i]->start,
- re->zones[i]->end);
- for (j = 0; j < re->zones[i]->ndevs; ++j) {
- printk(KERN_CONT " %lld",
- re->zones[i]->devs[j]->devid);
- }
- }
- }
- printk(KERN_CONT "\n");
- index = (re->logical >> PAGE_CACHE_SHIFT) + 1;
- }
- spin_unlock(&fs_info->reada_lock);
-}
-#endif
-
-/*
- * interface
- */
-struct reada_control *btrfs_reada_add(struct btrfs_root *root,
- struct btrfs_key *key_start, struct btrfs_key *key_end)
-{
- struct reada_control *rc;
- u64 start;
- u64 generation;
- int level;
- struct extent_buffer *node;
- static struct btrfs_key max_key = {
- .objectid = (u64)-1,
- .type = (u8)-1,
- .offset = (u64)-1
- };
-
- rc = kzalloc(sizeof(*rc), GFP_NOFS);
- if (!rc)
- return ERR_PTR(-ENOMEM);
-
- rc->root = root;
- rc->key_start = *key_start;
- rc->key_end = *key_end;
- atomic_set(&rc->elems, 0);
- init_waitqueue_head(&rc->wait);
- kref_init(&rc->refcnt);
- kref_get(&rc->refcnt); /* one ref for having elements */
-
- node = btrfs_root_node(root);
- start = node->start;
- level = btrfs_header_level(node);
- generation = btrfs_header_generation(node);
- free_extent_buffer(node);
-
- reada_add_block(rc, start, &max_key, level, generation);
-
- reada_start_machine(root->fs_info);
-
- return rc;
-}
-
-#ifdef DEBUG
-int btrfs_reada_wait(void *handle)
-{
- struct reada_control *rc = handle;
-
- while (atomic_read(&rc->elems)) {
- wait_event_timeout(rc->wait, atomic_read(&rc->elems) == 0,
- 5 * HZ);
- dump_devs(rc->root->fs_info, rc->elems < 10 ? 1 : 0);
- }
-
- dump_devs(rc->root->fs_info, rc->elems < 10 ? 1 : 0);
-
- kref_put(&rc->refcnt, reada_control_release);
-
- return 0;
-}
-#else
-int btrfs_reada_wait(void *handle)
-{
- struct reada_control *rc = handle;
-
- while (atomic_read(&rc->elems)) {
- wait_event(rc->wait, atomic_read(&rc->elems) == 0);
- }
-
- kref_put(&rc->refcnt, reada_control_release);
-
- return 0;
-}
-#endif
-
-void btrfs_reada_detach(void *handle)
-{
- struct reada_control *rc = handle;
-
- kref_put(&rc->refcnt, reada_control_release);
-}
diff --git a/ANDROID_3.4.5/fs/btrfs/relocation.c b/ANDROID_3.4.5/fs/btrfs/relocation.c
deleted file mode 100644
index 646ee21b..00000000
--- a/ANDROID_3.4.5/fs/btrfs/relocation.c
+++ /dev/null
@@ -1,4464 +0,0 @@
-/*
- * Copyright (C) 2009 Oracle. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License v2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
- */
-
-#include <linux/sched.h>
-#include <linux/pagemap.h>
-#include <linux/writeback.h>
-#include <linux/blkdev.h>
-#include <linux/rbtree.h>
-#include <linux/slab.h>
-#include "ctree.h"
-#include "disk-io.h"
-#include "transaction.h"
-#include "volumes.h"
-#include "locking.h"
-#include "btrfs_inode.h"
-#include "async-thread.h"
-#include "free-space-cache.h"
-#include "inode-map.h"
-
-/*
- * backref_node, mapping_node and tree_block start with this
- */
-struct tree_entry {
- struct rb_node rb_node;
- u64 bytenr;
-};
-
-/*
- * present a tree block in the backref cache
- */
-struct backref_node {
- struct rb_node rb_node;
- u64 bytenr;
-
- u64 new_bytenr;
- /* objectid of tree block owner, can be not uptodate */
- u64 owner;
- /* link to pending, changed or detached list */
- struct list_head list;
- /* list of upper level blocks reference this block */
- struct list_head upper;
- /* list of child blocks in the cache */
- struct list_head lower;
- /* NULL if this node is not tree root */
- struct btrfs_root *root;
- /* extent buffer got by COW the block */
- struct extent_buffer *eb;
- /* level of tree block */
- unsigned int level:8;
- /* is the block in non-reference counted tree */
- unsigned int cowonly:1;
- /* 1 if no child node in the cache */
- unsigned int lowest:1;
- /* is the extent buffer locked */
- unsigned int locked:1;
- /* has the block been processed */
- unsigned int processed:1;
- /* have backrefs of this block been checked */
- unsigned int checked:1;
- /*
- * 1 if corresponding block has been cowed but some upper
- * level block pointers may not point to the new location
- */
- unsigned int pending:1;
- /*
- * 1 if the backref node isn't connected to any other
- * backref node.
- */
- unsigned int detached:1;
-};
-
-/*
- * present a block pointer in the backref cache
- */
-struct backref_edge {
- struct list_head list[2];
- struct backref_node *node[2];
-};
-
-#define LOWER 0
-#define UPPER 1
-
-struct backref_cache {
- /* red black tree of all backref nodes in the cache */
- struct rb_root rb_root;
- /* for passing backref nodes to btrfs_reloc_cow_block */
- struct backref_node *path[BTRFS_MAX_LEVEL];
- /*
- * list of blocks that have been cowed but some block
- * pointers in upper level blocks may not reflect the
- * new location
- */
- struct list_head pending[BTRFS_MAX_LEVEL];
- /* list of backref nodes with no child node */
- struct list_head leaves;
- /* list of blocks that have been cowed in current transaction */
- struct list_head changed;
- /* list of detached backref node. */
- struct list_head detached;
-
- u64 last_trans;
-
- int nr_nodes;
- int nr_edges;
-};
-
-/*
- * map address of tree root to tree
- */
-struct mapping_node {
- struct rb_node rb_node;
- u64 bytenr;
- void *data;
-};
-
-struct mapping_tree {
- struct rb_root rb_root;
- spinlock_t lock;
-};
-
-/*
- * present a tree block to process
- */
-struct tree_block {
- struct rb_node rb_node;
- u64 bytenr;
- struct btrfs_key key;
- unsigned int level:8;
- unsigned int key_ready:1;
-};
-
-#define MAX_EXTENTS 128
-
-struct file_extent_cluster {
- u64 start;
- u64 end;
- u64 boundary[MAX_EXTENTS];
- unsigned int nr;
-};
-
-struct reloc_control {
- /* block group to relocate */
- struct btrfs_block_group_cache *block_group;
- /* extent tree */
- struct btrfs_root *extent_root;
- /* inode for moving data */
- struct inode *data_inode;
-
- struct btrfs_block_rsv *block_rsv;
-
- struct backref_cache backref_cache;
-
- struct file_extent_cluster cluster;
- /* tree blocks have been processed */
- struct extent_io_tree processed_blocks;
- /* map start of tree root to corresponding reloc tree */
- struct mapping_tree reloc_root_tree;
- /* list of reloc trees */
- struct list_head reloc_roots;
- /* size of metadata reservation for merging reloc trees */
- u64 merging_rsv_size;
- /* size of relocated tree nodes */
- u64 nodes_relocated;
-
- u64 search_start;
- u64 extents_found;
-
- unsigned int stage:8;
- unsigned int create_reloc_tree:1;
- unsigned int merge_reloc_tree:1;
- unsigned int found_file_extent:1;
- unsigned int commit_transaction:1;
-};
-
-/* stages of data relocation */
-#define MOVE_DATA_EXTENTS 0
-#define UPDATE_DATA_PTRS 1
-
-static void remove_backref_node(struct backref_cache *cache,
- struct backref_node *node);
-static void __mark_block_processed(struct reloc_control *rc,
- struct backref_node *node);
-
-static void mapping_tree_init(struct mapping_tree *tree)
-{
- tree->rb_root = RB_ROOT;
- spin_lock_init(&tree->lock);
-}
-
-static void backref_cache_init(struct backref_cache *cache)
-{
- int i;
- cache->rb_root = RB_ROOT;
- for (i = 0; i < BTRFS_MAX_LEVEL; i++)
- INIT_LIST_HEAD(&cache->pending[i]);
- INIT_LIST_HEAD(&cache->changed);
- INIT_LIST_HEAD(&cache->detached);
- INIT_LIST_HEAD(&cache->leaves);
-}
-
-static void backref_cache_cleanup(struct backref_cache *cache)
-{
- struct backref_node *node;
- int i;
-
- while (!list_empty(&cache->detached)) {
- node = list_entry(cache->detached.next,
- struct backref_node, list);
- remove_backref_node(cache, node);
- }
-
- while (!list_empty(&cache->leaves)) {
- node = list_entry(cache->leaves.next,
- struct backref_node, lower);
- remove_backref_node(cache, node);
- }
-
- cache->last_trans = 0;
-
- for (i = 0; i < BTRFS_MAX_LEVEL; i++)
- BUG_ON(!list_empty(&cache->pending[i]));
- BUG_ON(!list_empty(&cache->changed));
- BUG_ON(!list_empty(&cache->detached));
- BUG_ON(!RB_EMPTY_ROOT(&cache->rb_root));
- BUG_ON(cache->nr_nodes);
- BUG_ON(cache->nr_edges);
-}
-
-static struct backref_node *alloc_backref_node(struct backref_cache *cache)
-{
- struct backref_node *node;
-
- node = kzalloc(sizeof(*node), GFP_NOFS);
- if (node) {
- INIT_LIST_HEAD(&node->list);
- INIT_LIST_HEAD(&node->upper);
- INIT_LIST_HEAD(&node->lower);
- RB_CLEAR_NODE(&node->rb_node);
- cache->nr_nodes++;
- }
- return node;
-}
-
-static void free_backref_node(struct backref_cache *cache,
- struct backref_node *node)
-{
- if (node) {
- cache->nr_nodes--;
- kfree(node);
- }
-}
-
-static struct backref_edge *alloc_backref_edge(struct backref_cache *cache)
-{
- struct backref_edge *edge;
-
- edge = kzalloc(sizeof(*edge), GFP_NOFS);
- if (edge)
- cache->nr_edges++;
- return edge;
-}
-
-static void free_backref_edge(struct backref_cache *cache,
- struct backref_edge *edge)
-{
- if (edge) {
- cache->nr_edges--;
- kfree(edge);
- }
-}
-
-static struct rb_node *tree_insert(struct rb_root *root, u64 bytenr,
- struct rb_node *node)
-{
- struct rb_node **p = &root->rb_node;
- struct rb_node *parent = NULL;
- struct tree_entry *entry;
-
- while (*p) {
- parent = *p;
- entry = rb_entry(parent, struct tree_entry, rb_node);
-
- if (bytenr < entry->bytenr)
- p = &(*p)->rb_left;
- else if (bytenr > entry->bytenr)
- p = &(*p)->rb_right;
- else
- return parent;
- }
-
- rb_link_node(node, parent, p);
- rb_insert_color(node, root);
- return NULL;
-}
-
-static struct rb_node *tree_search(struct rb_root *root, u64 bytenr)
-{
- struct rb_node *n = root->rb_node;
- struct tree_entry *entry;
-
- while (n) {
- entry = rb_entry(n, struct tree_entry, rb_node);
-
- if (bytenr < entry->bytenr)
- n = n->rb_left;
- else if (bytenr > entry->bytenr)
- n = n->rb_right;
- else
- return n;
- }
- return NULL;
-}
-
-void backref_tree_panic(struct rb_node *rb_node, int errno,
- u64 bytenr)
-{
-
- struct btrfs_fs_info *fs_info = NULL;
- struct backref_node *bnode = rb_entry(rb_node, struct backref_node,
- rb_node);
- if (bnode->root)
- fs_info = bnode->root->fs_info;
- btrfs_panic(fs_info, errno, "Inconsistency in backref cache "
- "found at offset %llu\n", (unsigned long long)bytenr);
-}
-
-/*
- * walk up backref nodes until reach node presents tree root
- */
-static struct backref_node *walk_up_backref(struct backref_node *node,
- struct backref_edge *edges[],
- int *index)
-{
- struct backref_edge *edge;
- int idx = *index;
-
- while (!list_empty(&node->upper)) {
- edge = list_entry(node->upper.next,
- struct backref_edge, list[LOWER]);
- edges[idx++] = edge;
- node = edge->node[UPPER];
- }
- BUG_ON(node->detached);
- *index = idx;
- return node;
-}
-
-/*
- * walk down backref nodes to find start of next reference path
- */
-static struct backref_node *walk_down_backref(struct backref_edge *edges[],
- int *index)
-{
- struct backref_edge *edge;
- struct backref_node *lower;
- int idx = *index;
-
- while (idx > 0) {
- edge = edges[idx - 1];
- lower = edge->node[LOWER];
- if (list_is_last(&edge->list[LOWER], &lower->upper)) {
- idx--;
- continue;
- }
- edge = list_entry(edge->list[LOWER].next,
- struct backref_edge, list[LOWER]);
- edges[idx - 1] = edge;
- *index = idx;
- return edge->node[UPPER];
- }
- *index = 0;
- return NULL;
-}
-
-static void unlock_node_buffer(struct backref_node *node)
-{
- if (node->locked) {
- btrfs_tree_unlock(node->eb);
- node->locked = 0;
- }
-}
-
-static void drop_node_buffer(struct backref_node *node)
-{
- if (node->eb) {
- unlock_node_buffer(node);
- free_extent_buffer(node->eb);
- node->eb = NULL;
- }
-}
-
-static void drop_backref_node(struct backref_cache *tree,
- struct backref_node *node)
-{
- BUG_ON(!list_empty(&node->upper));
-
- drop_node_buffer(node);
- list_del(&node->list);
- list_del(&node->lower);
- if (!RB_EMPTY_NODE(&node->rb_node))
- rb_erase(&node->rb_node, &tree->rb_root);
- free_backref_node(tree, node);
-}
-
-/*
- * remove a backref node from the backref cache
- */
-static void remove_backref_node(struct backref_cache *cache,
- struct backref_node *node)
-{
- struct backref_node *upper;
- struct backref_edge *edge;
-
- if (!node)
- return;
-
- BUG_ON(!node->lowest && !node->detached);
- while (!list_empty(&node->upper)) {
- edge = list_entry(node->upper.next, struct backref_edge,
- list[LOWER]);
- upper = edge->node[UPPER];
- list_del(&edge->list[LOWER]);
- list_del(&edge->list[UPPER]);
- free_backref_edge(cache, edge);
-
- if (RB_EMPTY_NODE(&upper->rb_node)) {
- BUG_ON(!list_empty(&node->upper));
- drop_backref_node(cache, node);
- node = upper;
- node->lowest = 1;
- continue;
- }
- /*
- * add the node to leaf node list if no other
- * child block cached.
- */
- if (list_empty(&upper->lower)) {
- list_add_tail(&upper->lower, &cache->leaves);
- upper->lowest = 1;
- }
- }
-
- drop_backref_node(cache, node);
-}
-
-static void update_backref_node(struct backref_cache *cache,
- struct backref_node *node, u64 bytenr)
-{
- struct rb_node *rb_node;
- rb_erase(&node->rb_node, &cache->rb_root);
- node->bytenr = bytenr;
- rb_node = tree_insert(&cache->rb_root, node->bytenr, &node->rb_node);
- if (rb_node)
- backref_tree_panic(rb_node, -EEXIST, bytenr);
-}
-
-/*
- * update backref cache after a transaction commit
- */
-static int update_backref_cache(struct btrfs_trans_handle *trans,
- struct backref_cache *cache)
-{
- struct backref_node *node;
- int level = 0;
-
- if (cache->last_trans == 0) {
- cache->last_trans = trans->transid;
- return 0;
- }
-
- if (cache->last_trans == trans->transid)
- return 0;
-
- /*
- * detached nodes are used to avoid unnecessary backref
- * lookup. transaction commit changes the extent tree.
- * so the detached nodes are no longer useful.
- */
- while (!list_empty(&cache->detached)) {
- node = list_entry(cache->detached.next,
- struct backref_node, list);
- remove_backref_node(cache, node);
- }
-
- while (!list_empty(&cache->changed)) {
- node = list_entry(cache->changed.next,
- struct backref_node, list);
- list_del_init(&node->list);
- BUG_ON(node->pending);
- update_backref_node(cache, node, node->new_bytenr);
- }
-
- /*
- * some nodes can be left in the pending list if there were
- * errors during processing the pending nodes.
- */
- for (level = 0; level < BTRFS_MAX_LEVEL; level++) {
- list_for_each_entry(node, &cache->pending[level], list) {
- BUG_ON(!node->pending);
- if (node->bytenr == node->new_bytenr)
- continue;
- update_backref_node(cache, node, node->new_bytenr);
- }
- }
-
- cache->last_trans = 0;
- return 1;
-}
-
-
-static int should_ignore_root(struct btrfs_root *root)
-{
- struct btrfs_root *reloc_root;
-
- if (!root->ref_cows)
- return 0;
-
- reloc_root = root->reloc_root;
- if (!reloc_root)
- return 0;
-
- if (btrfs_root_last_snapshot(&reloc_root->root_item) ==
- root->fs_info->running_transaction->transid - 1)
- return 0;
- /*
- * if there is reloc tree and it was created in previous
- * transaction backref lookup can find the reloc tree,
- * so backref node for the fs tree root is useless for
- * relocation.
- */
- return 1;
-}
-/*
- * find reloc tree by address of tree root
- */
-static struct btrfs_root *find_reloc_root(struct reloc_control *rc,
- u64 bytenr)
-{
- struct rb_node *rb_node;
- struct mapping_node *node;
- struct btrfs_root *root = NULL;
-
- spin_lock(&rc->reloc_root_tree.lock);
- rb_node = tree_search(&rc->reloc_root_tree.rb_root, bytenr);
- if (rb_node) {
- node = rb_entry(rb_node, struct mapping_node, rb_node);
- root = (struct btrfs_root *)node->data;
- }
- spin_unlock(&rc->reloc_root_tree.lock);
- return root;
-}
-
-static int is_cowonly_root(u64 root_objectid)
-{
- if (root_objectid == BTRFS_ROOT_TREE_OBJECTID ||
- root_objectid == BTRFS_EXTENT_TREE_OBJECTID ||
- root_objectid == BTRFS_CHUNK_TREE_OBJECTID ||
- root_objectid == BTRFS_DEV_TREE_OBJECTID ||
- root_objectid == BTRFS_TREE_LOG_OBJECTID ||
- root_objectid == BTRFS_CSUM_TREE_OBJECTID)
- return 1;
- return 0;
-}
-
-static struct btrfs_root *read_fs_root(struct btrfs_fs_info *fs_info,
- u64 root_objectid)
-{
- struct btrfs_key key;
-
- key.objectid = root_objectid;
- key.type = BTRFS_ROOT_ITEM_KEY;
- if (is_cowonly_root(root_objectid))
- key.offset = 0;
- else
- key.offset = (u64)-1;
-
- return btrfs_read_fs_root_no_name(fs_info, &key);
-}
-
-#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
-static noinline_for_stack
-struct btrfs_root *find_tree_root(struct reloc_control *rc,
- struct extent_buffer *leaf,
- struct btrfs_extent_ref_v0 *ref0)
-{
- struct btrfs_root *root;
- u64 root_objectid = btrfs_ref_root_v0(leaf, ref0);
- u64 generation = btrfs_ref_generation_v0(leaf, ref0);
-
- BUG_ON(root_objectid == BTRFS_TREE_RELOC_OBJECTID);
-
- root = read_fs_root(rc->extent_root->fs_info, root_objectid);
- BUG_ON(IS_ERR(root));
-
- if (root->ref_cows &&
- generation != btrfs_root_generation(&root->root_item))
- return NULL;
-
- return root;
-}
-#endif
-
-static noinline_for_stack
-int find_inline_backref(struct extent_buffer *leaf, int slot,
- unsigned long *ptr, unsigned long *end)
-{
- struct btrfs_extent_item *ei;
- struct btrfs_tree_block_info *bi;
- u32 item_size;
-
- item_size = btrfs_item_size_nr(leaf, slot);
-#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
- if (item_size < sizeof(*ei)) {
- WARN_ON(item_size != sizeof(struct btrfs_extent_item_v0));
- return 1;
- }
-#endif
- ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
- WARN_ON(!(btrfs_extent_flags(leaf, ei) &
- BTRFS_EXTENT_FLAG_TREE_BLOCK));
-
- if (item_size <= sizeof(*ei) + sizeof(*bi)) {
- WARN_ON(item_size < sizeof(*ei) + sizeof(*bi));
- return 1;
- }
-
- bi = (struct btrfs_tree_block_info *)(ei + 1);
- *ptr = (unsigned long)(bi + 1);
- *end = (unsigned long)ei + item_size;
- return 0;
-}
-
-/*
- * build backref tree for a given tree block. root of the backref tree
- * corresponds the tree block, leaves of the backref tree correspond
- * roots of b-trees that reference the tree block.
- *
- * the basic idea of this function is check backrefs of a given block
- * to find upper level blocks that refernece the block, and then check
- * bakcrefs of these upper level blocks recursively. the recursion stop
- * when tree root is reached or backrefs for the block is cached.
- *
- * NOTE: if we find backrefs for a block are cached, we know backrefs
- * for all upper level blocks that directly/indirectly reference the
- * block are also cached.
- */
-static noinline_for_stack
-struct backref_node *build_backref_tree(struct reloc_control *rc,
- struct btrfs_key *node_key,
- int level, u64 bytenr)
-{
- struct backref_cache *cache = &rc->backref_cache;
- struct btrfs_path *path1;
- struct btrfs_path *path2;
- struct extent_buffer *eb;
- struct btrfs_root *root;
- struct backref_node *cur;
- struct backref_node *upper;
- struct backref_node *lower;
- struct backref_node *node = NULL;
- struct backref_node *exist = NULL;
- struct backref_edge *edge;
- struct rb_node *rb_node;
- struct btrfs_key key;
- unsigned long end;
- unsigned long ptr;
- LIST_HEAD(list);
- LIST_HEAD(useless);
- int cowonly;
- int ret;
- int err = 0;
-
- path1 = btrfs_alloc_path();
- path2 = btrfs_alloc_path();
- if (!path1 || !path2) {
- err = -ENOMEM;
- goto out;
- }
- path1->reada = 1;
- path2->reada = 2;
-
- node = alloc_backref_node(cache);
- if (!node) {
- err = -ENOMEM;
- goto out;
- }
-
- node->bytenr = bytenr;
- node->level = level;
- node->lowest = 1;
- cur = node;
-again:
- end = 0;
- ptr = 0;
- key.objectid = cur->bytenr;
- key.type = BTRFS_EXTENT_ITEM_KEY;
- key.offset = (u64)-1;
-
- path1->search_commit_root = 1;
- path1->skip_locking = 1;
- ret = btrfs_search_slot(NULL, rc->extent_root, &key, path1,
- 0, 0);
- if (ret < 0) {
- err = ret;
- goto out;
- }
- BUG_ON(!ret || !path1->slots[0]);
-
- path1->slots[0]--;
-
- WARN_ON(cur->checked);
- if (!list_empty(&cur->upper)) {
- /*
- * the backref was added previously when processing
- * backref of type BTRFS_TREE_BLOCK_REF_KEY
- */
- BUG_ON(!list_is_singular(&cur->upper));
- edge = list_entry(cur->upper.next, struct backref_edge,
- list[LOWER]);
- BUG_ON(!list_empty(&edge->list[UPPER]));
- exist = edge->node[UPPER];
- /*
- * add the upper level block to pending list if we need
- * check its backrefs
- */
- if (!exist->checked)
- list_add_tail(&edge->list[UPPER], &list);
- } else {
- exist = NULL;
- }
-
- while (1) {
- cond_resched();
- eb = path1->nodes[0];
-
- if (ptr >= end) {
- if (path1->slots[0] >= btrfs_header_nritems(eb)) {
- ret = btrfs_next_leaf(rc->extent_root, path1);
- if (ret < 0) {
- err = ret;
- goto out;
- }
- if (ret > 0)
- break;
- eb = path1->nodes[0];
- }
-
- btrfs_item_key_to_cpu(eb, &key, path1->slots[0]);
- if (key.objectid != cur->bytenr) {
- WARN_ON(exist);
- break;
- }
-
- if (key.type == BTRFS_EXTENT_ITEM_KEY) {
- ret = find_inline_backref(eb, path1->slots[0],
- &ptr, &end);
- if (ret)
- goto next;
- }
- }
-
- if (ptr < end) {
- /* update key for inline back ref */
- struct btrfs_extent_inline_ref *iref;
- iref = (struct btrfs_extent_inline_ref *)ptr;
- key.type = btrfs_extent_inline_ref_type(eb, iref);
- key.offset = btrfs_extent_inline_ref_offset(eb, iref);
- WARN_ON(key.type != BTRFS_TREE_BLOCK_REF_KEY &&
- key.type != BTRFS_SHARED_BLOCK_REF_KEY);
- }
-
- if (exist &&
- ((key.type == BTRFS_TREE_BLOCK_REF_KEY &&
- exist->owner == key.offset) ||
- (key.type == BTRFS_SHARED_BLOCK_REF_KEY &&
- exist->bytenr == key.offset))) {
- exist = NULL;
- goto next;
- }
-
-#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
- if (key.type == BTRFS_SHARED_BLOCK_REF_KEY ||
- key.type == BTRFS_EXTENT_REF_V0_KEY) {
- if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
- struct btrfs_extent_ref_v0 *ref0;
- ref0 = btrfs_item_ptr(eb, path1->slots[0],
- struct btrfs_extent_ref_v0);
- if (key.objectid == key.offset) {
- root = find_tree_root(rc, eb, ref0);
- if (root && !should_ignore_root(root))
- cur->root = root;
- else
- list_add(&cur->list, &useless);
- break;
- }
- if (is_cowonly_root(btrfs_ref_root_v0(eb,
- ref0)))
- cur->cowonly = 1;
- }
-#else
- BUG_ON(key.type == BTRFS_EXTENT_REF_V0_KEY);
- if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
-#endif
- if (key.objectid == key.offset) {
- /*
- * only root blocks of reloc trees use
- * backref of this type.
- */
- root = find_reloc_root(rc, cur->bytenr);
- BUG_ON(!root);
- cur->root = root;
- break;
- }
-
- edge = alloc_backref_edge(cache);
- if (!edge) {
- err = -ENOMEM;
- goto out;
- }
- rb_node = tree_search(&cache->rb_root, key.offset);
- if (!rb_node) {
- upper = alloc_backref_node(cache);
- if (!upper) {
- free_backref_edge(cache, edge);
- err = -ENOMEM;
- goto out;
- }
- upper->bytenr = key.offset;
- upper->level = cur->level + 1;
- /*
- * backrefs for the upper level block isn't
- * cached, add the block to pending list
- */
- list_add_tail(&edge->list[UPPER], &list);
- } else {
- upper = rb_entry(rb_node, struct backref_node,
- rb_node);
- BUG_ON(!upper->checked);
- INIT_LIST_HEAD(&edge->list[UPPER]);
- }
- list_add_tail(&edge->list[LOWER], &cur->upper);
- edge->node[LOWER] = cur;
- edge->node[UPPER] = upper;
-
- goto next;
- } else if (key.type != BTRFS_TREE_BLOCK_REF_KEY) {
- goto next;
- }
-
- /* key.type == BTRFS_TREE_BLOCK_REF_KEY */
- root = read_fs_root(rc->extent_root->fs_info, key.offset);
- if (IS_ERR(root)) {
- err = PTR_ERR(root);
- goto out;
- }
-
- if (!root->ref_cows)
- cur->cowonly = 1;
-
- if (btrfs_root_level(&root->root_item) == cur->level) {
- /* tree root */
- BUG_ON(btrfs_root_bytenr(&root->root_item) !=
- cur->bytenr);
- if (should_ignore_root(root))
- list_add(&cur->list, &useless);
- else
- cur->root = root;
- break;
- }
-
- level = cur->level + 1;
-
- /*
- * searching the tree to find upper level blocks
- * reference the block.
- */
- path2->search_commit_root = 1;
- path2->skip_locking = 1;
- path2->lowest_level = level;
- ret = btrfs_search_slot(NULL, root, node_key, path2, 0, 0);
- path2->lowest_level = 0;
- if (ret < 0) {
- err = ret;
- goto out;
- }
- if (ret > 0 && path2->slots[level] > 0)
- path2->slots[level]--;
-
- eb = path2->nodes[level];
- WARN_ON(btrfs_node_blockptr(eb, path2->slots[level]) !=
- cur->bytenr);
-
- lower = cur;
- for (; level < BTRFS_MAX_LEVEL; level++) {
- if (!path2->nodes[level]) {
- BUG_ON(btrfs_root_bytenr(&root->root_item) !=
- lower->bytenr);
- if (should_ignore_root(root))
- list_add(&lower->list, &useless);
- else
- lower->root = root;
- break;
- }
-
- edge = alloc_backref_edge(cache);
- if (!edge) {
- err = -ENOMEM;
- goto out;
- }
-
- eb = path2->nodes[level];
- rb_node = tree_search(&cache->rb_root, eb->start);
- if (!rb_node) {
- upper = alloc_backref_node(cache);
- if (!upper) {
- free_backref_edge(cache, edge);
- err = -ENOMEM;
- goto out;
- }
- upper->bytenr = eb->start;
- upper->owner = btrfs_header_owner(eb);
- upper->level = lower->level + 1;
- if (!root->ref_cows)
- upper->cowonly = 1;
-
- /*
- * if we know the block isn't shared
- * we can void checking its backrefs.
- */
- if (btrfs_block_can_be_shared(root, eb))
- upper->checked = 0;
- else
- upper->checked = 1;
-
- /*
- * add the block to pending list if we
- * need check its backrefs. only block
- * at 'cur->level + 1' is added to the
- * tail of pending list. this guarantees
- * we check backrefs from lower level
- * blocks to upper level blocks.
- */
- if (!upper->checked &&
- level == cur->level + 1) {
- list_add_tail(&edge->list[UPPER],
- &list);
- } else
- INIT_LIST_HEAD(&edge->list[UPPER]);
- } else {
- upper = rb_entry(rb_node, struct backref_node,
- rb_node);
- BUG_ON(!upper->checked);
- INIT_LIST_HEAD(&edge->list[UPPER]);
- if (!upper->owner)
- upper->owner = btrfs_header_owner(eb);
- }
- list_add_tail(&edge->list[LOWER], &lower->upper);
- edge->node[LOWER] = lower;
- edge->node[UPPER] = upper;
-
- if (rb_node)
- break;
- lower = upper;
- upper = NULL;
- }
- btrfs_release_path(path2);
-next:
- if (ptr < end) {
- ptr += btrfs_extent_inline_ref_size(key.type);
- if (ptr >= end) {
- WARN_ON(ptr > end);
- ptr = 0;
- end = 0;
- }
- }
- if (ptr >= end)
- path1->slots[0]++;
- }
- btrfs_release_path(path1);
-
- cur->checked = 1;
- WARN_ON(exist);
-
- /* the pending list isn't empty, take the first block to process */
- if (!list_empty(&list)) {
- edge = list_entry(list.next, struct backref_edge, list[UPPER]);
- list_del_init(&edge->list[UPPER]);
- cur = edge->node[UPPER];
- goto again;
- }
-
- /*
- * everything goes well, connect backref nodes and insert backref nodes
- * into the cache.
- */
- BUG_ON(!node->checked);
- cowonly = node->cowonly;
- if (!cowonly) {
- rb_node = tree_insert(&cache->rb_root, node->bytenr,
- &node->rb_node);
- if (rb_node)
- backref_tree_panic(rb_node, -EEXIST, node->bytenr);
- list_add_tail(&node->lower, &cache->leaves);
- }
-
- list_for_each_entry(edge, &node->upper, list[LOWER])
- list_add_tail(&edge->list[UPPER], &list);
-
- while (!list_empty(&list)) {
- edge = list_entry(list.next, struct backref_edge, list[UPPER]);
- list_del_init(&edge->list[UPPER]);
- upper = edge->node[UPPER];
- if (upper->detached) {
- list_del(&edge->list[LOWER]);
- lower = edge->node[LOWER];
- free_backref_edge(cache, edge);
- if (list_empty(&lower->upper))
- list_add(&lower->list, &useless);
- continue;
- }
-
- if (!RB_EMPTY_NODE(&upper->rb_node)) {
- if (upper->lowest) {
- list_del_init(&upper->lower);
- upper->lowest = 0;
- }
-
- list_add_tail(&edge->list[UPPER], &upper->lower);
- continue;
- }
-
- BUG_ON(!upper->checked);
- BUG_ON(cowonly != upper->cowonly);
- if (!cowonly) {
- rb_node = tree_insert(&cache->rb_root, upper->bytenr,
- &upper->rb_node);
- if (rb_node)
- backref_tree_panic(rb_node, -EEXIST,
- upper->bytenr);
- }
-
- list_add_tail(&edge->list[UPPER], &upper->lower);
-
- list_for_each_entry(edge, &upper->upper, list[LOWER])
- list_add_tail(&edge->list[UPPER], &list);
- }
- /*
- * process useless backref nodes. backref nodes for tree leaves
- * are deleted from the cache. backref nodes for upper level
- * tree blocks are left in the cache to avoid unnecessary backref
- * lookup.
- */
- while (!list_empty(&useless)) {
- upper = list_entry(useless.next, struct backref_node, list);
- list_del_init(&upper->list);
- BUG_ON(!list_empty(&upper->upper));
- if (upper == node)
- node = NULL;
- if (upper->lowest) {
- list_del_init(&upper->lower);
- upper->lowest = 0;
- }
- while (!list_empty(&upper->lower)) {
- edge = list_entry(upper->lower.next,
- struct backref_edge, list[UPPER]);
- list_del(&edge->list[UPPER]);
- list_del(&edge->list[LOWER]);
- lower = edge->node[LOWER];
- free_backref_edge(cache, edge);
-
- if (list_empty(&lower->upper))
- list_add(&lower->list, &useless);
- }
- __mark_block_processed(rc, upper);
- if (upper->level > 0) {
- list_add(&upper->list, &cache->detached);
- upper->detached = 1;
- } else {
- rb_erase(&upper->rb_node, &cache->rb_root);
- free_backref_node(cache, upper);
- }
- }
-out:
- btrfs_free_path(path1);
- btrfs_free_path(path2);
- if (err) {
- while (!list_empty(&useless)) {
- lower = list_entry(useless.next,
- struct backref_node, upper);
- list_del_init(&lower->upper);
- }
- upper = node;
- INIT_LIST_HEAD(&list);
- while (upper) {
- if (RB_EMPTY_NODE(&upper->rb_node)) {
- list_splice_tail(&upper->upper, &list);
- free_backref_node(cache, upper);
- }
-
- if (list_empty(&list))
- break;
-
- edge = list_entry(list.next, struct backref_edge,
- list[LOWER]);
- list_del(&edge->list[LOWER]);
- upper = edge->node[UPPER];
- free_backref_edge(cache, edge);
- }
- return ERR_PTR(err);
- }
- BUG_ON(node && node->detached);
- return node;
-}
-
-/*
- * helper to add backref node for the newly created snapshot.
- * the backref node is created by cloning backref node that
- * corresponds to root of source tree
- */
-static int clone_backref_node(struct btrfs_trans_handle *trans,
- struct reloc_control *rc,
- struct btrfs_root *src,
- struct btrfs_root *dest)
-{
- struct btrfs_root *reloc_root = src->reloc_root;
- struct backref_cache *cache = &rc->backref_cache;
- struct backref_node *node = NULL;
- struct backref_node *new_node;
- struct backref_edge *edge;
- struct backref_edge *new_edge;
- struct rb_node *rb_node;
-
- if (cache->last_trans > 0)
- update_backref_cache(trans, cache);
-
- rb_node = tree_search(&cache->rb_root, src->commit_root->start);
- if (rb_node) {
- node = rb_entry(rb_node, struct backref_node, rb_node);
- if (node->detached)
- node = NULL;
- else
- BUG_ON(node->new_bytenr != reloc_root->node->start);
- }
-
- if (!node) {
- rb_node = tree_search(&cache->rb_root,
- reloc_root->commit_root->start);
- if (rb_node) {
- node = rb_entry(rb_node, struct backref_node,
- rb_node);
- BUG_ON(node->detached);
- }
- }
-
- if (!node)
- return 0;
-
- new_node = alloc_backref_node(cache);
- if (!new_node)
- return -ENOMEM;
-
- new_node->bytenr = dest->node->start;
- new_node->level = node->level;
- new_node->lowest = node->lowest;
- new_node->checked = 1;
- new_node->root = dest;
-
- if (!node->lowest) {
- list_for_each_entry(edge, &node->lower, list[UPPER]) {
- new_edge = alloc_backref_edge(cache);
- if (!new_edge)
- goto fail;
-
- new_edge->node[UPPER] = new_node;
- new_edge->node[LOWER] = edge->node[LOWER];
- list_add_tail(&new_edge->list[UPPER],
- &new_node->lower);
- }
- } else {
- list_add_tail(&new_node->lower, &cache->leaves);
- }
-
- rb_node = tree_insert(&cache->rb_root, new_node->bytenr,
- &new_node->rb_node);
- if (rb_node)
- backref_tree_panic(rb_node, -EEXIST, new_node->bytenr);
-
- if (!new_node->lowest) {
- list_for_each_entry(new_edge, &new_node->lower, list[UPPER]) {
- list_add_tail(&new_edge->list[LOWER],
- &new_edge->node[LOWER]->upper);
- }
- }
- return 0;
-fail:
- while (!list_empty(&new_node->lower)) {
- new_edge = list_entry(new_node->lower.next,
- struct backref_edge, list[UPPER]);
- list_del(&new_edge->list[UPPER]);
- free_backref_edge(cache, new_edge);
- }
- free_backref_node(cache, new_node);
- return -ENOMEM;
-}
-
-/*
- * helper to add 'address of tree root -> reloc tree' mapping
- */
-static int __must_check __add_reloc_root(struct btrfs_root *root)
-{
- struct rb_node *rb_node;
- struct mapping_node *node;
- struct reloc_control *rc = root->fs_info->reloc_ctl;
-
- node = kmalloc(sizeof(*node), GFP_NOFS);
- if (!node)
- return -ENOMEM;
-
- node->bytenr = root->node->start;
- node->data = root;
-
- spin_lock(&rc->reloc_root_tree.lock);
- rb_node = tree_insert(&rc->reloc_root_tree.rb_root,
- node->bytenr, &node->rb_node);
- spin_unlock(&rc->reloc_root_tree.lock);
- if (rb_node) {
- kfree(node);
- btrfs_panic(root->fs_info, -EEXIST, "Duplicate root found "
- "for start=%llu while inserting into relocation "
- "tree\n");
- }
-
- list_add_tail(&root->root_list, &rc->reloc_roots);
- return 0;
-}
-
-/*
- * helper to update/delete the 'address of tree root -> reloc tree'
- * mapping
- */
-static int __update_reloc_root(struct btrfs_root *root, int del)
-{
- struct rb_node *rb_node;
- struct mapping_node *node = NULL;
- struct reloc_control *rc = root->fs_info->reloc_ctl;
-
- spin_lock(&rc->reloc_root_tree.lock);
- rb_node = tree_search(&rc->reloc_root_tree.rb_root,
- root->commit_root->start);
- if (rb_node) {
- node = rb_entry(rb_node, struct mapping_node, rb_node);
- rb_erase(&node->rb_node, &rc->reloc_root_tree.rb_root);
- }
- spin_unlock(&rc->reloc_root_tree.lock);
-
- BUG_ON((struct btrfs_root *)node->data != root);
-
- if (!del) {
- spin_lock(&rc->reloc_root_tree.lock);
- node->bytenr = root->node->start;
- rb_node = tree_insert(&rc->reloc_root_tree.rb_root,
- node->bytenr, &node->rb_node);
- spin_unlock(&rc->reloc_root_tree.lock);
- if (rb_node)
- backref_tree_panic(rb_node, -EEXIST, node->bytenr);
- } else {
- spin_lock(&root->fs_info->trans_lock);
- list_del_init(&root->root_list);
- spin_unlock(&root->fs_info->trans_lock);
- kfree(node);
- }
- return 0;
-}
-
-static struct btrfs_root *create_reloc_root(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, u64 objectid)
-{
- struct btrfs_root *reloc_root;
- struct extent_buffer *eb;
- struct btrfs_root_item *root_item;
- struct btrfs_key root_key;
- int ret;
-
- root_item = kmalloc(sizeof(*root_item), GFP_NOFS);
- BUG_ON(!root_item);
-
- root_key.objectid = BTRFS_TREE_RELOC_OBJECTID;
- root_key.type = BTRFS_ROOT_ITEM_KEY;
- root_key.offset = objectid;
-
- if (root->root_key.objectid == objectid) {
- /* called by btrfs_init_reloc_root */
- ret = btrfs_copy_root(trans, root, root->commit_root, &eb,
- BTRFS_TREE_RELOC_OBJECTID);
- BUG_ON(ret);
-
- btrfs_set_root_last_snapshot(&root->root_item,
- trans->transid - 1);
- } else {
- /*
- * called by btrfs_reloc_post_snapshot_hook.
- * the source tree is a reloc tree, all tree blocks
- * modified after it was created have RELOC flag
- * set in their headers. so it's OK to not update
- * the 'last_snapshot'.
- */
- ret = btrfs_copy_root(trans, root, root->node, &eb,
- BTRFS_TREE_RELOC_OBJECTID);
- BUG_ON(ret);
- }
-
- memcpy(root_item, &root->root_item, sizeof(*root_item));
- btrfs_set_root_bytenr(root_item, eb->start);
- btrfs_set_root_level(root_item, btrfs_header_level(eb));
- btrfs_set_root_generation(root_item, trans->transid);
-
- if (root->root_key.objectid == objectid) {
- btrfs_set_root_refs(root_item, 0);
- memset(&root_item->drop_progress, 0,
- sizeof(struct btrfs_disk_key));
- root_item->drop_level = 0;
- }
-
- btrfs_tree_unlock(eb);
- free_extent_buffer(eb);
-
- ret = btrfs_insert_root(trans, root->fs_info->tree_root,
- &root_key, root_item);
- BUG_ON(ret);
- kfree(root_item);
-
- reloc_root = btrfs_read_fs_root_no_radix(root->fs_info->tree_root,
- &root_key);
- BUG_ON(IS_ERR(reloc_root));
- reloc_root->last_trans = trans->transid;
- return reloc_root;
-}
-
-/*
- * create reloc tree for a given fs tree. reloc tree is just a
- * snapshot of the fs tree with special root objectid.
- */
-int btrfs_init_reloc_root(struct btrfs_trans_handle *trans,
- struct btrfs_root *root)
-{
- struct btrfs_root *reloc_root;
- struct reloc_control *rc = root->fs_info->reloc_ctl;
- int clear_rsv = 0;
- int ret;
-
- if (root->reloc_root) {
- reloc_root = root->reloc_root;
- reloc_root->last_trans = trans->transid;
- return 0;
- }
-
- if (!rc || !rc->create_reloc_tree ||
- root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
- return 0;
-
- if (!trans->block_rsv) {
- trans->block_rsv = rc->block_rsv;
- clear_rsv = 1;
- }
- reloc_root = create_reloc_root(trans, root, root->root_key.objectid);
- if (clear_rsv)
- trans->block_rsv = NULL;
-
- ret = __add_reloc_root(reloc_root);
- BUG_ON(ret < 0);
- root->reloc_root = reloc_root;
- return 0;
-}
-
-/*
- * update root item of reloc tree
- */
-int btrfs_update_reloc_root(struct btrfs_trans_handle *trans,
- struct btrfs_root *root)
-{
- struct btrfs_root *reloc_root;
- struct btrfs_root_item *root_item;
- int del = 0;
- int ret;
-
- if (!root->reloc_root)
- goto out;
-
- reloc_root = root->reloc_root;
- root_item = &reloc_root->root_item;
-
- if (root->fs_info->reloc_ctl->merge_reloc_tree &&
- btrfs_root_refs(root_item) == 0) {
- root->reloc_root = NULL;
- del = 1;
- }
-
- __update_reloc_root(reloc_root, del);
-
- if (reloc_root->commit_root != reloc_root->node) {
- btrfs_set_root_node(root_item, reloc_root->node);
- free_extent_buffer(reloc_root->commit_root);
- reloc_root->commit_root = btrfs_root_node(reloc_root);
- }
-
- ret = btrfs_update_root(trans, root->fs_info->tree_root,
- &reloc_root->root_key, root_item);
- BUG_ON(ret);
-
-out:
- return 0;
-}
-
-/*
- * helper to find first cached inode with inode number >= objectid
- * in a subvolume
- */
-static struct inode *find_next_inode(struct btrfs_root *root, u64 objectid)
-{
- struct rb_node *node;
- struct rb_node *prev;
- struct btrfs_inode *entry;
- struct inode *inode;
-
- spin_lock(&root->inode_lock);
-again:
- node = root->inode_tree.rb_node;
- prev = NULL;
- while (node) {
- prev = node;
- entry = rb_entry(node, struct btrfs_inode, rb_node);
-
- if (objectid < btrfs_ino(&entry->vfs_inode))
- node = node->rb_left;
- else if (objectid > btrfs_ino(&entry->vfs_inode))
- node = node->rb_right;
- else
- break;
- }
- if (!node) {
- while (prev) {
- entry = rb_entry(prev, struct btrfs_inode, rb_node);
- if (objectid <= btrfs_ino(&entry->vfs_inode)) {
- node = prev;
- break;
- }
- prev = rb_next(prev);
- }
- }
- while (node) {
- entry = rb_entry(node, struct btrfs_inode, rb_node);
- inode = igrab(&entry->vfs_inode);
- if (inode) {
- spin_unlock(&root->inode_lock);
- return inode;
- }
-
- objectid = btrfs_ino(&entry->vfs_inode) + 1;
- if (cond_resched_lock(&root->inode_lock))
- goto again;
-
- node = rb_next(node);
- }
- spin_unlock(&root->inode_lock);
- return NULL;
-}
-
-static int in_block_group(u64 bytenr,
- struct btrfs_block_group_cache *block_group)
-{
- if (bytenr >= block_group->key.objectid &&
- bytenr < block_group->key.objectid + block_group->key.offset)
- return 1;
- return 0;
-}
-
-/*
- * get new location of data
- */
-static int get_new_location(struct inode *reloc_inode, u64 *new_bytenr,
- u64 bytenr, u64 num_bytes)
-{
- struct btrfs_root *root = BTRFS_I(reloc_inode)->root;
- struct btrfs_path *path;
- struct btrfs_file_extent_item *fi;
- struct extent_buffer *leaf;
- int ret;
-
- path = btrfs_alloc_path();
- if (!path)
- return -ENOMEM;
-
- bytenr -= BTRFS_I(reloc_inode)->index_cnt;
- ret = btrfs_lookup_file_extent(NULL, root, path, btrfs_ino(reloc_inode),
- bytenr, 0);
- if (ret < 0)
- goto out;
- if (ret > 0) {
- ret = -ENOENT;
- goto out;
- }
-
- leaf = path->nodes[0];
- fi = btrfs_item_ptr(leaf, path->slots[0],
- struct btrfs_file_extent_item);
-
- BUG_ON(btrfs_file_extent_offset(leaf, fi) ||
- btrfs_file_extent_compression(leaf, fi) ||
- btrfs_file_extent_encryption(leaf, fi) ||
- btrfs_file_extent_other_encoding(leaf, fi));
-
- if (num_bytes != btrfs_file_extent_disk_num_bytes(leaf, fi)) {
- ret = 1;
- goto out;
- }
-
- *new_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
- ret = 0;
-out:
- btrfs_free_path(path);
- return ret;
-}
-
-/*
- * update file extent items in the tree leaf to point to
- * the new locations.
- */
-static noinline_for_stack
-int replace_file_extents(struct btrfs_trans_handle *trans,
- struct reloc_control *rc,
- struct btrfs_root *root,
- struct extent_buffer *leaf)
-{
- struct btrfs_key key;
- struct btrfs_file_extent_item *fi;
- struct inode *inode = NULL;
- u64 parent;
- u64 bytenr;
- u64 new_bytenr = 0;
- u64 num_bytes;
- u64 end;
- u32 nritems;
- u32 i;
- int ret;
- int first = 1;
- int dirty = 0;
-
- if (rc->stage != UPDATE_DATA_PTRS)
- return 0;
-
- /* reloc trees always use full backref */
- if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
- parent = leaf->start;
- else
- parent = 0;
-
- nritems = btrfs_header_nritems(leaf);
- for (i = 0; i < nritems; i++) {
- cond_resched();
- btrfs_item_key_to_cpu(leaf, &key, i);
- if (key.type != BTRFS_EXTENT_DATA_KEY)
- continue;
- fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
- if (btrfs_file_extent_type(leaf, fi) ==
- BTRFS_FILE_EXTENT_INLINE)
- continue;
- bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
- num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
- if (bytenr == 0)
- continue;
- if (!in_block_group(bytenr, rc->block_group))
- continue;
-
- /*
- * if we are modifying block in fs tree, wait for readpage
- * to complete and drop the extent cache
- */
- if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
- if (first) {
- inode = find_next_inode(root, key.objectid);
- first = 0;
- } else if (inode && btrfs_ino(inode) < key.objectid) {
- btrfs_add_delayed_iput(inode);
- inode = find_next_inode(root, key.objectid);
- }
- if (inode && btrfs_ino(inode) == key.objectid) {
- end = key.offset +
- btrfs_file_extent_num_bytes(leaf, fi);
- WARN_ON(!IS_ALIGNED(key.offset,
- root->sectorsize));
- WARN_ON(!IS_ALIGNED(end, root->sectorsize));
- end--;
- ret = try_lock_extent(&BTRFS_I(inode)->io_tree,
- key.offset, end);
- if (!ret)
- continue;
-
- btrfs_drop_extent_cache(inode, key.offset, end,
- 1);
- unlock_extent(&BTRFS_I(inode)->io_tree,
- key.offset, end);
- }
- }
-
- ret = get_new_location(rc->data_inode, &new_bytenr,
- bytenr, num_bytes);
- if (ret > 0) {
- WARN_ON(1);
- continue;
- }
- BUG_ON(ret < 0);
-
- btrfs_set_file_extent_disk_bytenr(leaf, fi, new_bytenr);
- dirty = 1;
-
- key.offset -= btrfs_file_extent_offset(leaf, fi);
- ret = btrfs_inc_extent_ref(trans, root, new_bytenr,
- num_bytes, parent,
- btrfs_header_owner(leaf),
- key.objectid, key.offset, 1);
- BUG_ON(ret);
-
- ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
- parent, btrfs_header_owner(leaf),
- key.objectid, key.offset, 1);
- BUG_ON(ret);
- }
- if (dirty)
- btrfs_mark_buffer_dirty(leaf);
- if (inode)
- btrfs_add_delayed_iput(inode);
- return 0;
-}
-
-static noinline_for_stack
-int memcmp_node_keys(struct extent_buffer *eb, int slot,
- struct btrfs_path *path, int level)
-{
- struct btrfs_disk_key key1;
- struct btrfs_disk_key key2;
- btrfs_node_key(eb, &key1, slot);
- btrfs_node_key(path->nodes[level], &key2, path->slots[level]);
- return memcmp(&key1, &key2, sizeof(key1));
-}
-
-/*
- * try to replace tree blocks in fs tree with the new blocks
- * in reloc tree. tree blocks haven't been modified since the
- * reloc tree was create can be replaced.
- *
- * if a block was replaced, level of the block + 1 is returned.
- * if no block got replaced, 0 is returned. if there are other
- * errors, a negative error number is returned.
- */
-static noinline_for_stack
-int replace_path(struct btrfs_trans_handle *trans,
- struct btrfs_root *dest, struct btrfs_root *src,
- struct btrfs_path *path, struct btrfs_key *next_key,
- int lowest_level, int max_level)
-{
- struct extent_buffer *eb;
- struct extent_buffer *parent;
- struct btrfs_key key;
- u64 old_bytenr;
- u64 new_bytenr;
- u64 old_ptr_gen;
- u64 new_ptr_gen;
- u64 last_snapshot;
- u32 blocksize;
- int cow = 0;
- int level;
- int ret;
- int slot;
-
- BUG_ON(src->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID);
- BUG_ON(dest->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID);
-
- last_snapshot = btrfs_root_last_snapshot(&src->root_item);
-again:
- slot = path->slots[lowest_level];
- btrfs_node_key_to_cpu(path->nodes[lowest_level], &key, slot);
-
- eb = btrfs_lock_root_node(dest);
- btrfs_set_lock_blocking(eb);
- level = btrfs_header_level(eb);
-
- if (level < lowest_level) {
- btrfs_tree_unlock(eb);
- free_extent_buffer(eb);
- return 0;
- }
-
- if (cow) {
- ret = btrfs_cow_block(trans, dest, eb, NULL, 0, &eb);
- BUG_ON(ret);
- }
- btrfs_set_lock_blocking(eb);
-
- if (next_key) {
- next_key->objectid = (u64)-1;
- next_key->type = (u8)-1;
- next_key->offset = (u64)-1;
- }
-
- parent = eb;
- while (1) {
- level = btrfs_header_level(parent);
- BUG_ON(level < lowest_level);
-
- ret = btrfs_bin_search(parent, &key, level, &slot);
- if (ret && slot > 0)
- slot--;
-
- if (next_key && slot + 1 < btrfs_header_nritems(parent))
- btrfs_node_key_to_cpu(parent, next_key, slot + 1);
-
- old_bytenr = btrfs_node_blockptr(parent, slot);
- blocksize = btrfs_level_size(dest, level - 1);
- old_ptr_gen = btrfs_node_ptr_generation(parent, slot);
-
- if (level <= max_level) {
- eb = path->nodes[level];
- new_bytenr = btrfs_node_blockptr(eb,
- path->slots[level]);
- new_ptr_gen = btrfs_node_ptr_generation(eb,
- path->slots[level]);
- } else {
- new_bytenr = 0;
- new_ptr_gen = 0;
- }
-
- if (new_bytenr > 0 && new_bytenr == old_bytenr) {
- WARN_ON(1);
- ret = level;
- break;
- }
-
- if (new_bytenr == 0 || old_ptr_gen > last_snapshot ||
- memcmp_node_keys(parent, slot, path, level)) {
- if (level <= lowest_level) {
- ret = 0;
- break;
- }
-
- eb = read_tree_block(dest, old_bytenr, blocksize,
- old_ptr_gen);
- BUG_ON(!eb);
- btrfs_tree_lock(eb);
- if (cow) {
- ret = btrfs_cow_block(trans, dest, eb, parent,
- slot, &eb);
- BUG_ON(ret);
- }
- btrfs_set_lock_blocking(eb);
-
- btrfs_tree_unlock(parent);
- free_extent_buffer(parent);
-
- parent = eb;
- continue;
- }
-
- if (!cow) {
- btrfs_tree_unlock(parent);
- free_extent_buffer(parent);
- cow = 1;
- goto again;
- }
-
- btrfs_node_key_to_cpu(path->nodes[level], &key,
- path->slots[level]);
- btrfs_release_path(path);
-
- path->lowest_level = level;
- ret = btrfs_search_slot(trans, src, &key, path, 0, 1);
- path->lowest_level = 0;
- BUG_ON(ret);
-
- /*
- * swap blocks in fs tree and reloc tree.
- */
- btrfs_set_node_blockptr(parent, slot, new_bytenr);
- btrfs_set_node_ptr_generation(parent, slot, new_ptr_gen);
- btrfs_mark_buffer_dirty(parent);
-
- btrfs_set_node_blockptr(path->nodes[level],
- path->slots[level], old_bytenr);
- btrfs_set_node_ptr_generation(path->nodes[level],
- path->slots[level], old_ptr_gen);
- btrfs_mark_buffer_dirty(path->nodes[level]);
-
- ret = btrfs_inc_extent_ref(trans, src, old_bytenr, blocksize,
- path->nodes[level]->start,
- src->root_key.objectid, level - 1, 0,
- 1);
- BUG_ON(ret);
- ret = btrfs_inc_extent_ref(trans, dest, new_bytenr, blocksize,
- 0, dest->root_key.objectid, level - 1,
- 0, 1);
- BUG_ON(ret);
-
- ret = btrfs_free_extent(trans, src, new_bytenr, blocksize,
- path->nodes[level]->start,
- src->root_key.objectid, level - 1, 0,
- 1);
- BUG_ON(ret);
-
- ret = btrfs_free_extent(trans, dest, old_bytenr, blocksize,
- 0, dest->root_key.objectid, level - 1,
- 0, 1);
- BUG_ON(ret);
-
- btrfs_unlock_up_safe(path, 0);
-
- ret = level;
- break;
- }
- btrfs_tree_unlock(parent);
- free_extent_buffer(parent);
- return ret;
-}
-
-/*
- * helper to find next relocated block in reloc tree
- */
-static noinline_for_stack
-int walk_up_reloc_tree(struct btrfs_root *root, struct btrfs_path *path,
- int *level)
-{
- struct extent_buffer *eb;
- int i;
- u64 last_snapshot;
- u32 nritems;
-
- last_snapshot = btrfs_root_last_snapshot(&root->root_item);
-
- for (i = 0; i < *level; i++) {
- free_extent_buffer(path->nodes[i]);
- path->nodes[i] = NULL;
- }
-
- for (i = *level; i < BTRFS_MAX_LEVEL && path->nodes[i]; i++) {
- eb = path->nodes[i];
- nritems = btrfs_header_nritems(eb);
- while (path->slots[i] + 1 < nritems) {
- path->slots[i]++;
- if (btrfs_node_ptr_generation(eb, path->slots[i]) <=
- last_snapshot)
- continue;
-
- *level = i;
- return 0;
- }
- free_extent_buffer(path->nodes[i]);
- path->nodes[i] = NULL;
- }
- return 1;
-}
-
-/*
- * walk down reloc tree to find relocated block of lowest level
- */
-static noinline_for_stack
-int walk_down_reloc_tree(struct btrfs_root *root, struct btrfs_path *path,
- int *level)
-{
- struct extent_buffer *eb = NULL;
- int i;
- u64 bytenr;
- u64 ptr_gen = 0;
- u64 last_snapshot;
- u32 blocksize;
- u32 nritems;
-
- last_snapshot = btrfs_root_last_snapshot(&root->root_item);
-
- for (i = *level; i > 0; i--) {
- eb = path->nodes[i];
- nritems = btrfs_header_nritems(eb);
- while (path->slots[i] < nritems) {
- ptr_gen = btrfs_node_ptr_generation(eb, path->slots[i]);
- if (ptr_gen > last_snapshot)
- break;
- path->slots[i]++;
- }
- if (path->slots[i] >= nritems) {
- if (i == *level)
- break;
- *level = i + 1;
- return 0;
- }
- if (i == 1) {
- *level = i;
- return 0;
- }
-
- bytenr = btrfs_node_blockptr(eb, path->slots[i]);
- blocksize = btrfs_level_size(root, i - 1);
- eb = read_tree_block(root, bytenr, blocksize, ptr_gen);
- BUG_ON(btrfs_header_level(eb) != i - 1);
- path->nodes[i - 1] = eb;
- path->slots[i - 1] = 0;
- }
- return 1;
-}
-
-/*
- * invalidate extent cache for file extents whose key in range of
- * [min_key, max_key)
- */
-static int invalidate_extent_cache(struct btrfs_root *root,
- struct btrfs_key *min_key,
- struct btrfs_key *max_key)
-{
- struct inode *inode = NULL;
- u64 objectid;
- u64 start, end;
- u64 ino;
-
- objectid = min_key->objectid;
- while (1) {
- cond_resched();
- iput(inode);
-
- if (objectid > max_key->objectid)
- break;
-
- inode = find_next_inode(root, objectid);
- if (!inode)
- break;
- ino = btrfs_ino(inode);
-
- if (ino > max_key->objectid) {
- iput(inode);
- break;
- }
-
- objectid = ino + 1;
- if (!S_ISREG(inode->i_mode))
- continue;
-
- if (unlikely(min_key->objectid == ino)) {
- if (min_key->type > BTRFS_EXTENT_DATA_KEY)
- continue;
- if (min_key->type < BTRFS_EXTENT_DATA_KEY)
- start = 0;
- else {
- start = min_key->offset;
- WARN_ON(!IS_ALIGNED(start, root->sectorsize));
- }
- } else {
- start = 0;
- }
-
- if (unlikely(max_key->objectid == ino)) {
- if (max_key->type < BTRFS_EXTENT_DATA_KEY)
- continue;
- if (max_key->type > BTRFS_EXTENT_DATA_KEY) {
- end = (u64)-1;
- } else {
- if (max_key->offset == 0)
- continue;
- end = max_key->offset;
- WARN_ON(!IS_ALIGNED(end, root->sectorsize));
- end--;
- }
- } else {
- end = (u64)-1;
- }
-
- /* the lock_extent waits for readpage to complete */
- lock_extent(&BTRFS_I(inode)->io_tree, start, end);
- btrfs_drop_extent_cache(inode, start, end, 1);
- unlock_extent(&BTRFS_I(inode)->io_tree, start, end);
- }
- return 0;
-}
-
-static int find_next_key(struct btrfs_path *path, int level,
- struct btrfs_key *key)
-
-{
- while (level < BTRFS_MAX_LEVEL) {
- if (!path->nodes[level])
- break;
- if (path->slots[level] + 1 <
- btrfs_header_nritems(path->nodes[level])) {
- btrfs_node_key_to_cpu(path->nodes[level], key,
- path->slots[level] + 1);
- return 0;
- }
- level++;
- }
- return 1;
-}
-
-/*
- * merge the relocated tree blocks in reloc tree with corresponding
- * fs tree.
- */
-static noinline_for_stack int merge_reloc_root(struct reloc_control *rc,
- struct btrfs_root *root)
-{
- LIST_HEAD(inode_list);
- struct btrfs_key key;
- struct btrfs_key next_key;
- struct btrfs_trans_handle *trans;
- struct btrfs_root *reloc_root;
- struct btrfs_root_item *root_item;
- struct btrfs_path *path;
- struct extent_buffer *leaf;
- unsigned long nr;
- int level;
- int max_level;
- int replaced = 0;
- int ret;
- int err = 0;
- u32 min_reserved;
-
- path = btrfs_alloc_path();
- if (!path)
- return -ENOMEM;
- path->reada = 1;
-
- reloc_root = root->reloc_root;
- root_item = &reloc_root->root_item;
-
- if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
- level = btrfs_root_level(root_item);
- extent_buffer_get(reloc_root->node);
- path->nodes[level] = reloc_root->node;
- path->slots[level] = 0;
- } else {
- btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
-
- level = root_item->drop_level;
- BUG_ON(level == 0);
- path->lowest_level = level;
- ret = btrfs_search_slot(NULL, reloc_root, &key, path, 0, 0);
- path->lowest_level = 0;
- if (ret < 0) {
- btrfs_free_path(path);
- return ret;
- }
-
- btrfs_node_key_to_cpu(path->nodes[level], &next_key,
- path->slots[level]);
- WARN_ON(memcmp(&key, &next_key, sizeof(key)));
-
- btrfs_unlock_up_safe(path, 0);
- }
-
- min_reserved = root->nodesize * (BTRFS_MAX_LEVEL - 1) * 2;
- memset(&next_key, 0, sizeof(next_key));
-
- while (1) {
- trans = btrfs_start_transaction(root, 0);
- BUG_ON(IS_ERR(trans));
- trans->block_rsv = rc->block_rsv;
-
- ret = btrfs_block_rsv_refill(root, rc->block_rsv, min_reserved);
- if (ret) {
- BUG_ON(ret != -EAGAIN);
- ret = btrfs_commit_transaction(trans, root);
- BUG_ON(ret);
- continue;
- }
-
- replaced = 0;
- max_level = level;
-
- ret = walk_down_reloc_tree(reloc_root, path, &level);
- if (ret < 0) {
- err = ret;
- goto out;
- }
- if (ret > 0)
- break;
-
- if (!find_next_key(path, level, &key) &&
- btrfs_comp_cpu_keys(&next_key, &key) >= 0) {
- ret = 0;
- } else {
- ret = replace_path(trans, root, reloc_root, path,
- &next_key, level, max_level);
- }
- if (ret < 0) {
- err = ret;
- goto out;
- }
-
- if (ret > 0) {
- level = ret;
- btrfs_node_key_to_cpu(path->nodes[level], &key,
- path->slots[level]);
- replaced = 1;
- }
-
- ret = walk_up_reloc_tree(reloc_root, path, &level);
- if (ret > 0)
- break;
-
- BUG_ON(level == 0);
- /*
- * save the merging progress in the drop_progress.
- * this is OK since root refs == 1 in this case.
- */
- btrfs_node_key(path->nodes[level], &root_item->drop_progress,
- path->slots[level]);
- root_item->drop_level = level;
-
- nr = trans->blocks_used;
- btrfs_end_transaction_throttle(trans, root);
-
- btrfs_btree_balance_dirty(root, nr);
-
- if (replaced && rc->stage == UPDATE_DATA_PTRS)
- invalidate_extent_cache(root, &key, &next_key);
- }
-
- /*
- * handle the case only one block in the fs tree need to be
- * relocated and the block is tree root.
- */
- leaf = btrfs_lock_root_node(root);
- ret = btrfs_cow_block(trans, root, leaf, NULL, 0, &leaf);
- btrfs_tree_unlock(leaf);
- free_extent_buffer(leaf);
- if (ret < 0)
- err = ret;
-out:
- btrfs_free_path(path);
-
- if (err == 0) {
- memset(&root_item->drop_progress, 0,
- sizeof(root_item->drop_progress));
- root_item->drop_level = 0;
- btrfs_set_root_refs(root_item, 0);
- btrfs_update_reloc_root(trans, root);
- }
-
- nr = trans->blocks_used;
- btrfs_end_transaction_throttle(trans, root);
-
- btrfs_btree_balance_dirty(root, nr);
-
- if (replaced && rc->stage == UPDATE_DATA_PTRS)
- invalidate_extent_cache(root, &key, &next_key);
-
- return err;
-}
-
-static noinline_for_stack
-int prepare_to_merge(struct reloc_control *rc, int err)
-{
- struct btrfs_root *root = rc->extent_root;
- struct btrfs_root *reloc_root;
- struct btrfs_trans_handle *trans;
- LIST_HEAD(reloc_roots);
- u64 num_bytes = 0;
- int ret;
-
- mutex_lock(&root->fs_info->reloc_mutex);
- rc->merging_rsv_size += root->nodesize * (BTRFS_MAX_LEVEL - 1) * 2;
- rc->merging_rsv_size += rc->nodes_relocated * 2;
- mutex_unlock(&root->fs_info->reloc_mutex);
-
-again:
- if (!err) {
- num_bytes = rc->merging_rsv_size;
- ret = btrfs_block_rsv_add(root, rc->block_rsv, num_bytes);
- if (ret)
- err = ret;
- }
-
- trans = btrfs_join_transaction(rc->extent_root);
- if (IS_ERR(trans)) {
- if (!err)
- btrfs_block_rsv_release(rc->extent_root,
- rc->block_rsv, num_bytes);
- return PTR_ERR(trans);
- }
-
- if (!err) {
- if (num_bytes != rc->merging_rsv_size) {
- btrfs_end_transaction(trans, rc->extent_root);
- btrfs_block_rsv_release(rc->extent_root,
- rc->block_rsv, num_bytes);
- goto again;
- }
- }
-
- rc->merge_reloc_tree = 1;
-
- while (!list_empty(&rc->reloc_roots)) {
- reloc_root = list_entry(rc->reloc_roots.next,
- struct btrfs_root, root_list);
- list_del_init(&reloc_root->root_list);
-
- root = read_fs_root(reloc_root->fs_info,
- reloc_root->root_key.offset);
- BUG_ON(IS_ERR(root));
- BUG_ON(root->reloc_root != reloc_root);
-
- /*
- * set reference count to 1, so btrfs_recover_relocation
- * knows it should resumes merging
- */
- if (!err)
- btrfs_set_root_refs(&reloc_root->root_item, 1);
- btrfs_update_reloc_root(trans, root);
-
- list_add(&reloc_root->root_list, &reloc_roots);
- }
-
- list_splice(&reloc_roots, &rc->reloc_roots);
-
- if (!err)
- btrfs_commit_transaction(trans, rc->extent_root);
- else
- btrfs_end_transaction(trans, rc->extent_root);
- return err;
-}
-
-static noinline_for_stack
-int merge_reloc_roots(struct reloc_control *rc)
-{
- struct btrfs_root *root;
- struct btrfs_root *reloc_root;
- LIST_HEAD(reloc_roots);
- int found = 0;
- int ret;
-again:
- root = rc->extent_root;
-
- /*
- * this serializes us with btrfs_record_root_in_transaction,
- * we have to make sure nobody is in the middle of
- * adding their roots to the list while we are
- * doing this splice
- */
- mutex_lock(&root->fs_info->reloc_mutex);
- list_splice_init(&rc->reloc_roots, &reloc_roots);
- mutex_unlock(&root->fs_info->reloc_mutex);
-
- while (!list_empty(&reloc_roots)) {
- found = 1;
- reloc_root = list_entry(reloc_roots.next,
- struct btrfs_root, root_list);
-
- if (btrfs_root_refs(&reloc_root->root_item) > 0) {
- root = read_fs_root(reloc_root->fs_info,
- reloc_root->root_key.offset);
- BUG_ON(IS_ERR(root));
- BUG_ON(root->reloc_root != reloc_root);
-
- ret = merge_reloc_root(rc, root);
- BUG_ON(ret);
- } else {
- list_del_init(&reloc_root->root_list);
- }
- ret = btrfs_drop_snapshot(reloc_root, rc->block_rsv, 0, 1);
- BUG_ON(ret < 0);
- }
-
- if (found) {
- found = 0;
- goto again;
- }
- BUG_ON(!RB_EMPTY_ROOT(&rc->reloc_root_tree.rb_root));
- return 0;
-}
-
-static void free_block_list(struct rb_root *blocks)
-{
- struct tree_block *block;
- struct rb_node *rb_node;
- while ((rb_node = rb_first(blocks))) {
- block = rb_entry(rb_node, struct tree_block, rb_node);
- rb_erase(rb_node, blocks);
- kfree(block);
- }
-}
-
-static int record_reloc_root_in_trans(struct btrfs_trans_handle *trans,
- struct btrfs_root *reloc_root)
-{
- struct btrfs_root *root;
-
- if (reloc_root->last_trans == trans->transid)
- return 0;
-
- root = read_fs_root(reloc_root->fs_info, reloc_root->root_key.offset);
- BUG_ON(IS_ERR(root));
- BUG_ON(root->reloc_root != reloc_root);
-
- return btrfs_record_root_in_trans(trans, root);
-}
-
-static noinline_for_stack
-struct btrfs_root *select_reloc_root(struct btrfs_trans_handle *trans,
- struct reloc_control *rc,
- struct backref_node *node,
- struct backref_edge *edges[], int *nr)
-{
- struct backref_node *next;
- struct btrfs_root *root;
- int index = 0;
-
- next = node;
- while (1) {
- cond_resched();
- next = walk_up_backref(next, edges, &index);
- root = next->root;
- BUG_ON(!root);
- BUG_ON(!root->ref_cows);
-
- if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
- record_reloc_root_in_trans(trans, root);
- break;
- }
-
- btrfs_record_root_in_trans(trans, root);
- root = root->reloc_root;
-
- if (next->new_bytenr != root->node->start) {
- BUG_ON(next->new_bytenr);
- BUG_ON(!list_empty(&next->list));
- next->new_bytenr = root->node->start;
- next->root = root;
- list_add_tail(&next->list,
- &rc->backref_cache.changed);
- __mark_block_processed(rc, next);
- break;
- }
-
- WARN_ON(1);
- root = NULL;
- next = walk_down_backref(edges, &index);
- if (!next || next->level <= node->level)
- break;
- }
- if (!root)
- return NULL;
-
- *nr = index;
- next = node;
- /* setup backref node path for btrfs_reloc_cow_block */
- while (1) {
- rc->backref_cache.path[next->level] = next;
- if (--index < 0)
- break;
- next = edges[index]->node[UPPER];
- }
- return root;
-}
-
-/*
- * select a tree root for relocation. return NULL if the block
- * is reference counted. we should use do_relocation() in this
- * case. return a tree root pointer if the block isn't reference
- * counted. return -ENOENT if the block is root of reloc tree.
- */
-static noinline_for_stack
-struct btrfs_root *select_one_root(struct btrfs_trans_handle *trans,
- struct backref_node *node)
-{
- struct backref_node *next;
- struct btrfs_root *root;
- struct btrfs_root *fs_root = NULL;
- struct backref_edge *edges[BTRFS_MAX_LEVEL - 1];
- int index = 0;
-
- next = node;
- while (1) {
- cond_resched();
- next = walk_up_backref(next, edges, &index);
- root = next->root;
- BUG_ON(!root);
-
- /* no other choice for non-references counted tree */
- if (!root->ref_cows)
- return root;
-
- if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID)
- fs_root = root;
-
- if (next != node)
- return NULL;
-
- next = walk_down_backref(edges, &index);
- if (!next || next->level <= node->level)
- break;
- }
-
- if (!fs_root)
- return ERR_PTR(-ENOENT);
- return fs_root;
-}
-
-static noinline_for_stack
-u64 calcu_metadata_size(struct reloc_control *rc,
- struct backref_node *node, int reserve)
-{
- struct backref_node *next = node;
- struct backref_edge *edge;
- struct backref_edge *edges[BTRFS_MAX_LEVEL - 1];
- u64 num_bytes = 0;
- int index = 0;
-
- BUG_ON(reserve && node->processed);
-
- while (next) {
- cond_resched();
- while (1) {
- if (next->processed && (reserve || next != node))
- break;
-
- num_bytes += btrfs_level_size(rc->extent_root,
- next->level);
-
- if (list_empty(&next->upper))
- break;
-
- edge = list_entry(next->upper.next,
- struct backref_edge, list[LOWER]);
- edges[index++] = edge;
- next = edge->node[UPPER];
- }
- next = walk_down_backref(edges, &index);
- }
- return num_bytes;
-}
-
-static int reserve_metadata_space(struct btrfs_trans_handle *trans,
- struct reloc_control *rc,
- struct backref_node *node)
-{
- struct btrfs_root *root = rc->extent_root;
- u64 num_bytes;
- int ret;
-
- num_bytes = calcu_metadata_size(rc, node, 1) * 2;
-
- trans->block_rsv = rc->block_rsv;
- ret = btrfs_block_rsv_add(root, rc->block_rsv, num_bytes);
- if (ret) {
- if (ret == -EAGAIN)
- rc->commit_transaction = 1;
- return ret;
- }
-
- return 0;
-}
-
-static void release_metadata_space(struct reloc_control *rc,
- struct backref_node *node)
-{
- u64 num_bytes = calcu_metadata_size(rc, node, 0) * 2;
- btrfs_block_rsv_release(rc->extent_root, rc->block_rsv, num_bytes);
-}
-
-/*
- * relocate a block tree, and then update pointers in upper level
- * blocks that reference the block to point to the new location.
- *
- * if called by link_to_upper, the block has already been relocated.
- * in that case this function just updates pointers.
- */
-static int do_relocation(struct btrfs_trans_handle *trans,
- struct reloc_control *rc,
- struct backref_node *node,
- struct btrfs_key *key,
- struct btrfs_path *path, int lowest)
-{
- struct backref_node *upper;
- struct backref_edge *edge;
- struct backref_edge *edges[BTRFS_MAX_LEVEL - 1];
- struct btrfs_root *root;
- struct extent_buffer *eb;
- u32 blocksize;
- u64 bytenr;
- u64 generation;
- int nr;
- int slot;
- int ret;
- int err = 0;
-
- BUG_ON(lowest && node->eb);
-
- path->lowest_level = node->level + 1;
- rc->backref_cache.path[node->level] = node;
- list_for_each_entry(edge, &node->upper, list[LOWER]) {
- cond_resched();
-
- upper = edge->node[UPPER];
- root = select_reloc_root(trans, rc, upper, edges, &nr);
- BUG_ON(!root);
-
- if (upper->eb && !upper->locked) {
- if (!lowest) {
- ret = btrfs_bin_search(upper->eb, key,
- upper->level, &slot);
- BUG_ON(ret);
- bytenr = btrfs_node_blockptr(upper->eb, slot);
- if (node->eb->start == bytenr)
- goto next;
- }
- drop_node_buffer(upper);
- }
-
- if (!upper->eb) {
- ret = btrfs_search_slot(trans, root, key, path, 0, 1);
- if (ret < 0) {
- err = ret;
- break;
- }
- BUG_ON(ret > 0);
-
- if (!upper->eb) {
- upper->eb = path->nodes[upper->level];
- path->nodes[upper->level] = NULL;
- } else {
- BUG_ON(upper->eb != path->nodes[upper->level]);
- }
-
- upper->locked = 1;
- path->locks[upper->level] = 0;
-
- slot = path->slots[upper->level];
- btrfs_release_path(path);
- } else {
- ret = btrfs_bin_search(upper->eb, key, upper->level,
- &slot);
- BUG_ON(ret);
- }
-
- bytenr = btrfs_node_blockptr(upper->eb, slot);
- if (lowest) {
- BUG_ON(bytenr != node->bytenr);
- } else {
- if (node->eb->start == bytenr)
- goto next;
- }
-
- blocksize = btrfs_level_size(root, node->level);
- generation = btrfs_node_ptr_generation(upper->eb, slot);
- eb = read_tree_block(root, bytenr, blocksize, generation);
- if (!eb) {
- err = -EIO;
- goto next;
- }
- btrfs_tree_lock(eb);
- btrfs_set_lock_blocking(eb);
-
- if (!node->eb) {
- ret = btrfs_cow_block(trans, root, eb, upper->eb,
- slot, &eb);
- btrfs_tree_unlock(eb);
- free_extent_buffer(eb);
- if (ret < 0) {
- err = ret;
- goto next;
- }
- BUG_ON(node->eb != eb);
- } else {
- btrfs_set_node_blockptr(upper->eb, slot,
- node->eb->start);
- btrfs_set_node_ptr_generation(upper->eb, slot,
- trans->transid);
- btrfs_mark_buffer_dirty(upper->eb);
-
- ret = btrfs_inc_extent_ref(trans, root,
- node->eb->start, blocksize,
- upper->eb->start,
- btrfs_header_owner(upper->eb),
- node->level, 0, 1);
- BUG_ON(ret);
-
- ret = btrfs_drop_subtree(trans, root, eb, upper->eb);
- BUG_ON(ret);
- }
-next:
- if (!upper->pending)
- drop_node_buffer(upper);
- else
- unlock_node_buffer(upper);
- if (err)
- break;
- }
-
- if (!err && node->pending) {
- drop_node_buffer(node);
- list_move_tail(&node->list, &rc->backref_cache.changed);
- node->pending = 0;
- }
-
- path->lowest_level = 0;
- BUG_ON(err == -ENOSPC);
- return err;
-}
-
-static int link_to_upper(struct btrfs_trans_handle *trans,
- struct reloc_control *rc,
- struct backref_node *node,
- struct btrfs_path *path)
-{
- struct btrfs_key key;
-
- btrfs_node_key_to_cpu(node->eb, &key, 0);
- return do_relocation(trans, rc, node, &key, path, 0);
-}
-
-static int finish_pending_nodes(struct btrfs_trans_handle *trans,
- struct reloc_control *rc,
- struct btrfs_path *path, int err)
-{
- LIST_HEAD(list);
- struct backref_cache *cache = &rc->backref_cache;
- struct backref_node *node;
- int level;
- int ret;
-
- for (level = 0; level < BTRFS_MAX_LEVEL; level++) {
- while (!list_empty(&cache->pending[level])) {
- node = list_entry(cache->pending[level].next,
- struct backref_node, list);
- list_move_tail(&node->list, &list);
- BUG_ON(!node->pending);
-
- if (!err) {
- ret = link_to_upper(trans, rc, node, path);
- if (ret < 0)
- err = ret;
- }
- }
- list_splice_init(&list, &cache->pending[level]);
- }
- return err;
-}
-
-static void mark_block_processed(struct reloc_control *rc,
- u64 bytenr, u32 blocksize)
-{
- set_extent_bits(&rc->processed_blocks, bytenr, bytenr + blocksize - 1,
- EXTENT_DIRTY, GFP_NOFS);
-}
-
-static void __mark_block_processed(struct reloc_control *rc,
- struct backref_node *node)
-{
- u32 blocksize;
- if (node->level == 0 ||
- in_block_group(node->bytenr, rc->block_group)) {
- blocksize = btrfs_level_size(rc->extent_root, node->level);
- mark_block_processed(rc, node->bytenr, blocksize);
- }
- node->processed = 1;
-}
-
-/*
- * mark a block and all blocks directly/indirectly reference the block
- * as processed.
- */
-static void update_processed_blocks(struct reloc_control *rc,
- struct backref_node *node)
-{
- struct backref_node *next = node;
- struct backref_edge *edge;
- struct backref_edge *edges[BTRFS_MAX_LEVEL - 1];
- int index = 0;
-
- while (next) {
- cond_resched();
- while (1) {
- if (next->processed)
- break;
-
- __mark_block_processed(rc, next);
-
- if (list_empty(&next->upper))
- break;
-
- edge = list_entry(next->upper.next,
- struct backref_edge, list[LOWER]);
- edges[index++] = edge;
- next = edge->node[UPPER];
- }
- next = walk_down_backref(edges, &index);
- }
-}
-
-static int tree_block_processed(u64 bytenr, u32 blocksize,
- struct reloc_control *rc)
-{
- if (test_range_bit(&rc->processed_blocks, bytenr,
- bytenr + blocksize - 1, EXTENT_DIRTY, 1, NULL))
- return 1;
- return 0;
-}
-
-static int get_tree_block_key(struct reloc_control *rc,
- struct tree_block *block)
-{
- struct extent_buffer *eb;
-
- BUG_ON(block->key_ready);
- eb = read_tree_block(rc->extent_root, block->bytenr,
- block->key.objectid, block->key.offset);
- BUG_ON(!eb);
- WARN_ON(btrfs_header_level(eb) != block->level);
- if (block->level == 0)
- btrfs_item_key_to_cpu(eb, &block->key, 0);
- else
- btrfs_node_key_to_cpu(eb, &block->key, 0);
- free_extent_buffer(eb);
- block->key_ready = 1;
- return 0;
-}
-
-static int reada_tree_block(struct reloc_control *rc,
- struct tree_block *block)
-{
- BUG_ON(block->key_ready);
- readahead_tree_block(rc->extent_root, block->bytenr,
- block->key.objectid, block->key.offset);
- return 0;
-}
-
-/*
- * helper function to relocate a tree block
- */
-static int relocate_tree_block(struct btrfs_trans_handle *trans,
- struct reloc_control *rc,
- struct backref_node *node,
- struct btrfs_key *key,
- struct btrfs_path *path)
-{
- struct btrfs_root *root;
- int release = 0;
- int ret = 0;
-
- if (!node)
- return 0;
-
- BUG_ON(node->processed);
- root = select_one_root(trans, node);
- if (root == ERR_PTR(-ENOENT)) {
- update_processed_blocks(rc, node);
- goto out;
- }
-
- if (!root || root->ref_cows) {
- ret = reserve_metadata_space(trans, rc, node);
- if (ret)
- goto out;
- release = 1;
- }
-
- if (root) {
- if (root->ref_cows) {
- BUG_ON(node->new_bytenr);
- BUG_ON(!list_empty(&node->list));
- btrfs_record_root_in_trans(trans, root);
- root = root->reloc_root;
- node->new_bytenr = root->node->start;
- node->root = root;
- list_add_tail(&node->list, &rc->backref_cache.changed);
- } else {
- path->lowest_level = node->level;
- ret = btrfs_search_slot(trans, root, key, path, 0, 1);
- btrfs_release_path(path);
- if (ret > 0)
- ret = 0;
- }
- if (!ret)
- update_processed_blocks(rc, node);
- } else {
- ret = do_relocation(trans, rc, node, key, path, 1);
- }
-out:
- if (ret || node->level == 0 || node->cowonly) {
- if (release)
- release_metadata_space(rc, node);
- remove_backref_node(&rc->backref_cache, node);
- }
- return ret;
-}
-
-/*
- * relocate a list of blocks
- */
-static noinline_for_stack
-int relocate_tree_blocks(struct btrfs_trans_handle *trans,
- struct reloc_control *rc, struct rb_root *blocks)
-{
- struct backref_node *node;
- struct btrfs_path *path;
- struct tree_block *block;
- struct rb_node *rb_node;
- int ret;
- int err = 0;
-
- path = btrfs_alloc_path();
- if (!path)
- return -ENOMEM;
-
- rb_node = rb_first(blocks);
- while (rb_node) {
- block = rb_entry(rb_node, struct tree_block, rb_node);
- if (!block->key_ready)
- reada_tree_block(rc, block);
- rb_node = rb_next(rb_node);
- }
-
- rb_node = rb_first(blocks);
- while (rb_node) {
- block = rb_entry(rb_node, struct tree_block, rb_node);
- if (!block->key_ready)
- get_tree_block_key(rc, block);
- rb_node = rb_next(rb_node);
- }
-
- rb_node = rb_first(blocks);
- while (rb_node) {
- block = rb_entry(rb_node, struct tree_block, rb_node);
-
- node = build_backref_tree(rc, &block->key,
- block->level, block->bytenr);
- if (IS_ERR(node)) {
- err = PTR_ERR(node);
- goto out;
- }
-
- ret = relocate_tree_block(trans, rc, node, &block->key,
- path);
- if (ret < 0) {
- if (ret != -EAGAIN || rb_node == rb_first(blocks))
- err = ret;
- goto out;
- }
- rb_node = rb_next(rb_node);
- }
-out:
- free_block_list(blocks);
- err = finish_pending_nodes(trans, rc, path, err);
-
- btrfs_free_path(path);
- return err;
-}
-
-static noinline_for_stack
-int prealloc_file_extent_cluster(struct inode *inode,
- struct file_extent_cluster *cluster)
-{
- u64 alloc_hint = 0;
- u64 start;
- u64 end;
- u64 offset = BTRFS_I(inode)->index_cnt;
- u64 num_bytes;
- int nr = 0;
- int ret = 0;
-
- BUG_ON(cluster->start != cluster->boundary[0]);
- mutex_lock(&inode->i_mutex);
-
- ret = btrfs_check_data_free_space(inode, cluster->end +
- 1 - cluster->start);
- if (ret)
- goto out;
-
- while (nr < cluster->nr) {
- start = cluster->boundary[nr] - offset;
- if (nr + 1 < cluster->nr)
- end = cluster->boundary[nr + 1] - 1 - offset;
- else
- end = cluster->end - offset;
-
- lock_extent(&BTRFS_I(inode)->io_tree, start, end);
- num_bytes = end + 1 - start;
- ret = btrfs_prealloc_file_range(inode, 0, start,
- num_bytes, num_bytes,
- end + 1, &alloc_hint);
- unlock_extent(&BTRFS_I(inode)->io_tree, start, end);
- if (ret)
- break;
- nr++;
- }
- btrfs_free_reserved_data_space(inode, cluster->end +
- 1 - cluster->start);
-out:
- mutex_unlock(&inode->i_mutex);
- return ret;
-}
-
-static noinline_for_stack
-int setup_extent_mapping(struct inode *inode, u64 start, u64 end,
- u64 block_start)
-{
- struct btrfs_root *root = BTRFS_I(inode)->root;
- struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
- struct extent_map *em;
- int ret = 0;
-
- em = alloc_extent_map();
- if (!em)
- return -ENOMEM;
-
- em->start = start;
- em->len = end + 1 - start;
- em->block_len = em->len;
- em->block_start = block_start;
- em->bdev = root->fs_info->fs_devices->latest_bdev;
- set_bit(EXTENT_FLAG_PINNED, &em->flags);
-
- lock_extent(&BTRFS_I(inode)->io_tree, start, end);
- while (1) {
- write_lock(&em_tree->lock);
- ret = add_extent_mapping(em_tree, em);
- write_unlock(&em_tree->lock);
- if (ret != -EEXIST) {
- free_extent_map(em);
- break;
- }
- btrfs_drop_extent_cache(inode, start, end, 0);
- }
- unlock_extent(&BTRFS_I(inode)->io_tree, start, end);
- return ret;
-}
-
-static int relocate_file_extent_cluster(struct inode *inode,
- struct file_extent_cluster *cluster)
-{
- u64 page_start;
- u64 page_end;
- u64 offset = BTRFS_I(inode)->index_cnt;
- unsigned long index;
- unsigned long last_index;
- struct page *page;
- struct file_ra_state *ra;
- gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping);
- int nr = 0;
- int ret = 0;
-
- if (!cluster->nr)
- return 0;
-
- ra = kzalloc(sizeof(*ra), GFP_NOFS);
- if (!ra)
- return -ENOMEM;
-
- ret = prealloc_file_extent_cluster(inode, cluster);
- if (ret)
- goto out;
-
- file_ra_state_init(ra, inode->i_mapping);
-
- ret = setup_extent_mapping(inode, cluster->start - offset,
- cluster->end - offset, cluster->start);
- if (ret)
- goto out;
-
- index = (cluster->start - offset) >> PAGE_CACHE_SHIFT;
- last_index = (cluster->end - offset) >> PAGE_CACHE_SHIFT;
- while (index <= last_index) {
- ret = btrfs_delalloc_reserve_metadata(inode, PAGE_CACHE_SIZE);
- if (ret)
- goto out;
-
- page = find_lock_page(inode->i_mapping, index);
- if (!page) {
- page_cache_sync_readahead(inode->i_mapping,
- ra, NULL, index,
- last_index + 1 - index);
- page = find_or_create_page(inode->i_mapping, index,
- mask);
- if (!page) {
- btrfs_delalloc_release_metadata(inode,
- PAGE_CACHE_SIZE);
- ret = -ENOMEM;
- goto out;
- }
- }
-
- if (PageReadahead(page)) {
- page_cache_async_readahead(inode->i_mapping,
- ra, NULL, page, index,
- last_index + 1 - index);
- }
-
- if (!PageUptodate(page)) {
- btrfs_readpage(NULL, page);
- lock_page(page);
- if (!PageUptodate(page)) {
- unlock_page(page);
- page_cache_release(page);
- btrfs_delalloc_release_metadata(inode,
- PAGE_CACHE_SIZE);
- ret = -EIO;
- goto out;
- }
- }
-
- page_start = (u64)page->index << PAGE_CACHE_SHIFT;
- page_end = page_start + PAGE_CACHE_SIZE - 1;
-
- lock_extent(&BTRFS_I(inode)->io_tree, page_start, page_end);
-
- set_page_extent_mapped(page);
-
- if (nr < cluster->nr &&
- page_start + offset == cluster->boundary[nr]) {
- set_extent_bits(&BTRFS_I(inode)->io_tree,
- page_start, page_end,
- EXTENT_BOUNDARY, GFP_NOFS);
- nr++;
- }
-
- btrfs_set_extent_delalloc(inode, page_start, page_end, NULL);
- set_page_dirty(page);
-
- unlock_extent(&BTRFS_I(inode)->io_tree,
- page_start, page_end);
- unlock_page(page);
- page_cache_release(page);
-
- index++;
- balance_dirty_pages_ratelimited(inode->i_mapping);
- btrfs_throttle(BTRFS_I(inode)->root);
- }
- WARN_ON(nr != cluster->nr);
-out:
- kfree(ra);
- return ret;
-}
-
-static noinline_for_stack
-int relocate_data_extent(struct inode *inode, struct btrfs_key *extent_key,
- struct file_extent_cluster *cluster)
-{
- int ret;
-
- if (cluster->nr > 0 && extent_key->objectid != cluster->end + 1) {
- ret = relocate_file_extent_cluster(inode, cluster);
- if (ret)
- return ret;
- cluster->nr = 0;
- }
-
- if (!cluster->nr)
- cluster->start = extent_key->objectid;
- else
- BUG_ON(cluster->nr >= MAX_EXTENTS);
- cluster->end = extent_key->objectid + extent_key->offset - 1;
- cluster->boundary[cluster->nr] = extent_key->objectid;
- cluster->nr++;
-
- if (cluster->nr >= MAX_EXTENTS) {
- ret = relocate_file_extent_cluster(inode, cluster);
- if (ret)
- return ret;
- cluster->nr = 0;
- }
- return 0;
-}
-
-#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
-static int get_ref_objectid_v0(struct reloc_control *rc,
- struct btrfs_path *path,
- struct btrfs_key *extent_key,
- u64 *ref_objectid, int *path_change)
-{
- struct btrfs_key key;
- struct extent_buffer *leaf;
- struct btrfs_extent_ref_v0 *ref0;
- int ret;
- int slot;
-
- leaf = path->nodes[0];
- slot = path->slots[0];
- while (1) {
- if (slot >= btrfs_header_nritems(leaf)) {
- ret = btrfs_next_leaf(rc->extent_root, path);
- if (ret < 0)
- return ret;
- BUG_ON(ret > 0);
- leaf = path->nodes[0];
- slot = path->slots[0];
- if (path_change)
- *path_change = 1;
- }
- btrfs_item_key_to_cpu(leaf, &key, slot);
- if (key.objectid != extent_key->objectid)
- return -ENOENT;
-
- if (key.type != BTRFS_EXTENT_REF_V0_KEY) {
- slot++;
- continue;
- }
- ref0 = btrfs_item_ptr(leaf, slot,
- struct btrfs_extent_ref_v0);
- *ref_objectid = btrfs_ref_objectid_v0(leaf, ref0);
- break;
- }
- return 0;
-}
-#endif
-
-/*
- * helper to add a tree block to the list.
- * the major work is getting the generation and level of the block
- */
-static int add_tree_block(struct reloc_control *rc,
- struct btrfs_key *extent_key,
- struct btrfs_path *path,
- struct rb_root *blocks)
-{
- struct extent_buffer *eb;
- struct btrfs_extent_item *ei;
- struct btrfs_tree_block_info *bi;
- struct tree_block *block;
- struct rb_node *rb_node;
- u32 item_size;
- int level = -1;
- int generation;
-
- eb = path->nodes[0];
- item_size = btrfs_item_size_nr(eb, path->slots[0]);
-
- if (item_size >= sizeof(*ei) + sizeof(*bi)) {
- ei = btrfs_item_ptr(eb, path->slots[0],
- struct btrfs_extent_item);
- bi = (struct btrfs_tree_block_info *)(ei + 1);
- generation = btrfs_extent_generation(eb, ei);
- level = btrfs_tree_block_level(eb, bi);
- } else {
-#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
- u64 ref_owner;
- int ret;
-
- BUG_ON(item_size != sizeof(struct btrfs_extent_item_v0));
- ret = get_ref_objectid_v0(rc, path, extent_key,
- &ref_owner, NULL);
- if (ret < 0)
- return ret;
- BUG_ON(ref_owner >= BTRFS_MAX_LEVEL);
- level = (int)ref_owner;
- /* FIXME: get real generation */
- generation = 0;
-#else
- BUG();
-#endif
- }
-
- btrfs_release_path(path);
-
- BUG_ON(level == -1);
-
- block = kmalloc(sizeof(*block), GFP_NOFS);
- if (!block)
- return -ENOMEM;
-
- block->bytenr = extent_key->objectid;
- block->key.objectid = extent_key->offset;
- block->key.offset = generation;
- block->level = level;
- block->key_ready = 0;
-
- rb_node = tree_insert(blocks, block->bytenr, &block->rb_node);
- if (rb_node)
- backref_tree_panic(rb_node, -EEXIST, block->bytenr);
-
- return 0;
-}
-
-/*
- * helper to add tree blocks for backref of type BTRFS_SHARED_DATA_REF_KEY
- */
-static int __add_tree_block(struct reloc_control *rc,
- u64 bytenr, u32 blocksize,
- struct rb_root *blocks)
-{
- struct btrfs_path *path;
- struct btrfs_key key;
- int ret;
-
- if (tree_block_processed(bytenr, blocksize, rc))
- return 0;
-
- if (tree_search(blocks, bytenr))
- return 0;
-
- path = btrfs_alloc_path();
- if (!path)
- return -ENOMEM;
-
- key.objectid = bytenr;
- key.type = BTRFS_EXTENT_ITEM_KEY;
- key.offset = blocksize;
-
- path->search_commit_root = 1;
- path->skip_locking = 1;
- ret = btrfs_search_slot(NULL, rc->extent_root, &key, path, 0, 0);
- if (ret < 0)
- goto out;
- BUG_ON(ret);
-
- btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
- ret = add_tree_block(rc, &key, path, blocks);
-out:
- btrfs_free_path(path);
- return ret;
-}
-
-/*
- * helper to check if the block use full backrefs for pointers in it
- */
-static int block_use_full_backref(struct reloc_control *rc,
- struct extent_buffer *eb)
-{
- u64 flags;
- int ret;
-
- if (btrfs_header_flag(eb, BTRFS_HEADER_FLAG_RELOC) ||
- btrfs_header_backref_rev(eb) < BTRFS_MIXED_BACKREF_REV)
- return 1;
-
- ret = btrfs_lookup_extent_info(NULL, rc->extent_root,
- eb->start, eb->len, NULL, &flags);
- BUG_ON(ret);
-
- if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
- ret = 1;
- else
- ret = 0;
- return ret;
-}
-
-static int delete_block_group_cache(struct btrfs_fs_info *fs_info,
- struct inode *inode, u64 ino)
-{
- struct btrfs_key key;
- struct btrfs_path *path;
- struct btrfs_root *root = fs_info->tree_root;
- struct btrfs_trans_handle *trans;
- unsigned long nr;
- int ret = 0;
-
- if (inode)
- goto truncate;
-
- key.objectid = ino;
- key.type = BTRFS_INODE_ITEM_KEY;
- key.offset = 0;
-
- inode = btrfs_iget(fs_info->sb, &key, root, NULL);
- if (IS_ERR_OR_NULL(inode) || is_bad_inode(inode)) {
- if (inode && !IS_ERR(inode))
- iput(inode);
- return -ENOENT;
- }
-
-truncate:
- path = btrfs_alloc_path();
- if (!path) {
- ret = -ENOMEM;
- goto out;
- }
-
- trans = btrfs_join_transaction(root);
- if (IS_ERR(trans)) {
- btrfs_free_path(path);
- ret = PTR_ERR(trans);
- goto out;
- }
-
- ret = btrfs_truncate_free_space_cache(root, trans, path, inode);
-
- btrfs_free_path(path);
- nr = trans->blocks_used;
- btrfs_end_transaction(trans, root);
- btrfs_btree_balance_dirty(root, nr);
-out:
- iput(inode);
- return ret;
-}
-
-/*
- * helper to add tree blocks for backref of type BTRFS_EXTENT_DATA_REF_KEY
- * this function scans fs tree to find blocks reference the data extent
- */
-static int find_data_references(struct reloc_control *rc,
- struct btrfs_key *extent_key,
- struct extent_buffer *leaf,
- struct btrfs_extent_data_ref *ref,
- struct rb_root *blocks)
-{
- struct btrfs_path *path;
- struct tree_block *block;
- struct btrfs_root *root;
- struct btrfs_file_extent_item *fi;
- struct rb_node *rb_node;
- struct btrfs_key key;
- u64 ref_root;
- u64 ref_objectid;
- u64 ref_offset;
- u32 ref_count;
- u32 nritems;
- int err = 0;
- int added = 0;
- int counted;
- int ret;
-
- ref_root = btrfs_extent_data_ref_root(leaf, ref);
- ref_objectid = btrfs_extent_data_ref_objectid(leaf, ref);
- ref_offset = btrfs_extent_data_ref_offset(leaf, ref);
- ref_count = btrfs_extent_data_ref_count(leaf, ref);
-
- /*
- * This is an extent belonging to the free space cache, lets just delete
- * it and redo the search.
- */
- if (ref_root == BTRFS_ROOT_TREE_OBJECTID) {
- ret = delete_block_group_cache(rc->extent_root->fs_info,
- NULL, ref_objectid);
- if (ret != -ENOENT)
- return ret;
- ret = 0;
- }
-
- path = btrfs_alloc_path();
- if (!path)
- return -ENOMEM;
- path->reada = 1;
-
- root = read_fs_root(rc->extent_root->fs_info, ref_root);
- if (IS_ERR(root)) {
- err = PTR_ERR(root);
- goto out;
- }
-
- key.objectid = ref_objectid;
- key.type = BTRFS_EXTENT_DATA_KEY;
- if (ref_offset > ((u64)-1 << 32))
- key.offset = 0;
- else
- key.offset = ref_offset;
-
- path->search_commit_root = 1;
- path->skip_locking = 1;
- ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
- if (ret < 0) {
- err = ret;
- goto out;
- }
-
- leaf = path->nodes[0];
- nritems = btrfs_header_nritems(leaf);
- /*
- * the references in tree blocks that use full backrefs
- * are not counted in
- */
- if (block_use_full_backref(rc, leaf))
- counted = 0;
- else
- counted = 1;
- rb_node = tree_search(blocks, leaf->start);
- if (rb_node) {
- if (counted)
- added = 1;
- else
- path->slots[0] = nritems;
- }
-
- while (ref_count > 0) {
- while (path->slots[0] >= nritems) {
- ret = btrfs_next_leaf(root, path);
- if (ret < 0) {
- err = ret;
- goto out;
- }
- if (ret > 0) {
- WARN_ON(1);
- goto out;
- }
-
- leaf = path->nodes[0];
- nritems = btrfs_header_nritems(leaf);
- added = 0;
-
- if (block_use_full_backref(rc, leaf))
- counted = 0;
- else
- counted = 1;
- rb_node = tree_search(blocks, leaf->start);
- if (rb_node) {
- if (counted)
- added = 1;
- else
- path->slots[0] = nritems;
- }
- }
-
- btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
- if (key.objectid != ref_objectid ||
- key.type != BTRFS_EXTENT_DATA_KEY) {
- WARN_ON(1);
- break;
- }
-
- fi = btrfs_item_ptr(leaf, path->slots[0],
- struct btrfs_file_extent_item);
-
- if (btrfs_file_extent_type(leaf, fi) ==
- BTRFS_FILE_EXTENT_INLINE)
- goto next;
-
- if (btrfs_file_extent_disk_bytenr(leaf, fi) !=
- extent_key->objectid)
- goto next;
-
- key.offset -= btrfs_file_extent_offset(leaf, fi);
- if (key.offset != ref_offset)
- goto next;
-
- if (counted)
- ref_count--;
- if (added)
- goto next;
-
- if (!tree_block_processed(leaf->start, leaf->len, rc)) {
- block = kmalloc(sizeof(*block), GFP_NOFS);
- if (!block) {
- err = -ENOMEM;
- break;
- }
- block->bytenr = leaf->start;
- btrfs_item_key_to_cpu(leaf, &block->key, 0);
- block->level = 0;
- block->key_ready = 1;
- rb_node = tree_insert(blocks, block->bytenr,
- &block->rb_node);
- if (rb_node)
- backref_tree_panic(rb_node, -EEXIST,
- block->bytenr);
- }
- if (counted)
- added = 1;
- else
- path->slots[0] = nritems;
-next:
- path->slots[0]++;
-
- }
-out:
- btrfs_free_path(path);
- return err;
-}
-
-/*
- * hepler to find all tree blocks that reference a given data extent
- */
-static noinline_for_stack
-int add_data_references(struct reloc_control *rc,
- struct btrfs_key *extent_key,
- struct btrfs_path *path,
- struct rb_root *blocks)
-{
- struct btrfs_key key;
- struct extent_buffer *eb;
- struct btrfs_extent_data_ref *dref;
- struct btrfs_extent_inline_ref *iref;
- unsigned long ptr;
- unsigned long end;
- u32 blocksize = btrfs_level_size(rc->extent_root, 0);
- int ret;
- int err = 0;
-
- eb = path->nodes[0];
- ptr = btrfs_item_ptr_offset(eb, path->slots[0]);
- end = ptr + btrfs_item_size_nr(eb, path->slots[0]);
-#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
- if (ptr + sizeof(struct btrfs_extent_item_v0) == end)
- ptr = end;
- else
-#endif
- ptr += sizeof(struct btrfs_extent_item);
-
- while (ptr < end) {
- iref = (struct btrfs_extent_inline_ref *)ptr;
- key.type = btrfs_extent_inline_ref_type(eb, iref);
- if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
- key.offset = btrfs_extent_inline_ref_offset(eb, iref);
- ret = __add_tree_block(rc, key.offset, blocksize,
- blocks);
- } else if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
- dref = (struct btrfs_extent_data_ref *)(&iref->offset);
- ret = find_data_references(rc, extent_key,
- eb, dref, blocks);
- } else {
- BUG();
- }
- ptr += btrfs_extent_inline_ref_size(key.type);
- }
- WARN_ON(ptr > end);
-
- while (1) {
- cond_resched();
- eb = path->nodes[0];
- if (path->slots[0] >= btrfs_header_nritems(eb)) {
- ret = btrfs_next_leaf(rc->extent_root, path);
- if (ret < 0) {
- err = ret;
- break;
- }
- if (ret > 0)
- break;
- eb = path->nodes[0];
- }
-
- btrfs_item_key_to_cpu(eb, &key, path->slots[0]);
- if (key.objectid != extent_key->objectid)
- break;
-
-#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
- if (key.type == BTRFS_SHARED_DATA_REF_KEY ||
- key.type == BTRFS_EXTENT_REF_V0_KEY) {
-#else
- BUG_ON(key.type == BTRFS_EXTENT_REF_V0_KEY);
- if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
-#endif
- ret = __add_tree_block(rc, key.offset, blocksize,
- blocks);
- } else if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
- dref = btrfs_item_ptr(eb, path->slots[0],
- struct btrfs_extent_data_ref);
- ret = find_data_references(rc, extent_key,
- eb, dref, blocks);
- } else {
- ret = 0;
- }
- if (ret) {
- err = ret;
- break;
- }
- path->slots[0]++;
- }
- btrfs_release_path(path);
- if (err)
- free_block_list(blocks);
- return err;
-}
-
-/*
- * hepler to find next unprocessed extent
- */
-static noinline_for_stack
-int find_next_extent(struct btrfs_trans_handle *trans,
- struct reloc_control *rc, struct btrfs_path *path,
- struct btrfs_key *extent_key)
-{
- struct btrfs_key key;
- struct extent_buffer *leaf;
- u64 start, end, last;
- int ret;
-
- last = rc->block_group->key.objectid + rc->block_group->key.offset;
- while (1) {
- cond_resched();
- if (rc->search_start >= last) {
- ret = 1;
- break;
- }
-
- key.objectid = rc->search_start;
- key.type = BTRFS_EXTENT_ITEM_KEY;
- key.offset = 0;
-
- path->search_commit_root = 1;
- path->skip_locking = 1;
- ret = btrfs_search_slot(NULL, rc->extent_root, &key, path,
- 0, 0);
- if (ret < 0)
- break;
-next:
- leaf = path->nodes[0];
- if (path->slots[0] >= btrfs_header_nritems(leaf)) {
- ret = btrfs_next_leaf(rc->extent_root, path);
- if (ret != 0)
- break;
- leaf = path->nodes[0];
- }
-
- btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
- if (key.objectid >= last) {
- ret = 1;
- break;
- }
-
- if (key.type != BTRFS_EXTENT_ITEM_KEY ||
- key.objectid + key.offset <= rc->search_start) {
- path->slots[0]++;
- goto next;
- }
-
- ret = find_first_extent_bit(&rc->processed_blocks,
- key.objectid, &start, &end,
- EXTENT_DIRTY);
-
- if (ret == 0 && start <= key.objectid) {
- btrfs_release_path(path);
- rc->search_start = end + 1;
- } else {
- rc->search_start = key.objectid + key.offset;
- memcpy(extent_key, &key, sizeof(key));
- return 0;
- }
- }
- btrfs_release_path(path);
- return ret;
-}
-
-static void set_reloc_control(struct reloc_control *rc)
-{
- struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
-
- mutex_lock(&fs_info->reloc_mutex);
- fs_info->reloc_ctl = rc;
- mutex_unlock(&fs_info->reloc_mutex);
-}
-
-static void unset_reloc_control(struct reloc_control *rc)
-{
- struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
-
- mutex_lock(&fs_info->reloc_mutex);
- fs_info->reloc_ctl = NULL;
- mutex_unlock(&fs_info->reloc_mutex);
-}
-
-static int check_extent_flags(u64 flags)
-{
- if ((flags & BTRFS_EXTENT_FLAG_DATA) &&
- (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
- return 1;
- if (!(flags & BTRFS_EXTENT_FLAG_DATA) &&
- !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
- return 1;
- if ((flags & BTRFS_EXTENT_FLAG_DATA) &&
- (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF))
- return 1;
- return 0;
-}
-
-static noinline_for_stack
-int prepare_to_relocate(struct reloc_control *rc)
-{
- struct btrfs_trans_handle *trans;
- int ret;
-
- rc->block_rsv = btrfs_alloc_block_rsv(rc->extent_root);
- if (!rc->block_rsv)
- return -ENOMEM;
-
- /*
- * reserve some space for creating reloc trees.
- * btrfs_init_reloc_root will use them when there
- * is no reservation in transaction handle.
- */
- ret = btrfs_block_rsv_add(rc->extent_root, rc->block_rsv,
- rc->extent_root->nodesize * 256);
- if (ret)
- return ret;
-
- memset(&rc->cluster, 0, sizeof(rc->cluster));
- rc->search_start = rc->block_group->key.objectid;
- rc->extents_found = 0;
- rc->nodes_relocated = 0;
- rc->merging_rsv_size = 0;
-
- rc->create_reloc_tree = 1;
- set_reloc_control(rc);
-
- trans = btrfs_join_transaction(rc->extent_root);
- BUG_ON(IS_ERR(trans));
- btrfs_commit_transaction(trans, rc->extent_root);
- return 0;
-}
-
-static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
-{
- struct rb_root blocks = RB_ROOT;
- struct btrfs_key key;
- struct btrfs_trans_handle *trans = NULL;
- struct btrfs_path *path;
- struct btrfs_extent_item *ei;
- unsigned long nr;
- u64 flags;
- u32 item_size;
- int ret;
- int err = 0;
- int progress = 0;
-
- path = btrfs_alloc_path();
- if (!path)
- return -ENOMEM;
- path->reada = 1;
-
- ret = prepare_to_relocate(rc);
- if (ret) {
- err = ret;
- goto out_free;
- }
-
- while (1) {
- progress++;
- trans = btrfs_start_transaction(rc->extent_root, 0);
- BUG_ON(IS_ERR(trans));
-restart:
- if (update_backref_cache(trans, &rc->backref_cache)) {
- btrfs_end_transaction(trans, rc->extent_root);
- continue;
- }
-
- ret = find_next_extent(trans, rc, path, &key);
- if (ret < 0)
- err = ret;
- if (ret != 0)
- break;
-
- rc->extents_found++;
-
- ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
- struct btrfs_extent_item);
- item_size = btrfs_item_size_nr(path->nodes[0], path->slots[0]);
- if (item_size >= sizeof(*ei)) {
- flags = btrfs_extent_flags(path->nodes[0], ei);
- ret = check_extent_flags(flags);
- BUG_ON(ret);
-
- } else {
-#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
- u64 ref_owner;
- int path_change = 0;
-
- BUG_ON(item_size !=
- sizeof(struct btrfs_extent_item_v0));
- ret = get_ref_objectid_v0(rc, path, &key, &ref_owner,
- &path_change);
- if (ref_owner < BTRFS_FIRST_FREE_OBJECTID)
- flags = BTRFS_EXTENT_FLAG_TREE_BLOCK;
- else
- flags = BTRFS_EXTENT_FLAG_DATA;
-
- if (path_change) {
- btrfs_release_path(path);
-
- path->search_commit_root = 1;
- path->skip_locking = 1;
- ret = btrfs_search_slot(NULL, rc->extent_root,
- &key, path, 0, 0);
- if (ret < 0) {
- err = ret;
- break;
- }
- BUG_ON(ret > 0);
- }
-#else
- BUG();
-#endif
- }
-
- if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
- ret = add_tree_block(rc, &key, path, &blocks);
- } else if (rc->stage == UPDATE_DATA_PTRS &&
- (flags & BTRFS_EXTENT_FLAG_DATA)) {
- ret = add_data_references(rc, &key, path, &blocks);
- } else {
- btrfs_release_path(path);
- ret = 0;
- }
- if (ret < 0) {
- err = ret;
- break;
- }
-
- if (!RB_EMPTY_ROOT(&blocks)) {
- ret = relocate_tree_blocks(trans, rc, &blocks);
- if (ret < 0) {
- if (ret != -EAGAIN) {
- err = ret;
- break;
- }
- rc->extents_found--;
- rc->search_start = key.objectid;
- }
- }
-
- ret = btrfs_block_rsv_check(rc->extent_root, rc->block_rsv, 5);
- if (ret < 0) {
- if (ret != -ENOSPC) {
- err = ret;
- WARN_ON(1);
- break;
- }
- rc->commit_transaction = 1;
- }
-
- if (rc->commit_transaction) {
- rc->commit_transaction = 0;
- ret = btrfs_commit_transaction(trans, rc->extent_root);
- BUG_ON(ret);
- } else {
- nr = trans->blocks_used;
- btrfs_end_transaction_throttle(trans, rc->extent_root);
- btrfs_btree_balance_dirty(rc->extent_root, nr);
- }
- trans = NULL;
-
- if (rc->stage == MOVE_DATA_EXTENTS &&
- (flags & BTRFS_EXTENT_FLAG_DATA)) {
- rc->found_file_extent = 1;
- ret = relocate_data_extent(rc->data_inode,
- &key, &rc->cluster);
- if (ret < 0) {
- err = ret;
- break;
- }
- }
- }
- if (trans && progress && err == -ENOSPC) {
- ret = btrfs_force_chunk_alloc(trans, rc->extent_root,
- rc->block_group->flags);
- if (ret == 0) {
- err = 0;
- progress = 0;
- goto restart;
- }
- }
-
- btrfs_release_path(path);
- clear_extent_bits(&rc->processed_blocks, 0, (u64)-1, EXTENT_DIRTY,
- GFP_NOFS);
-
- if (trans) {
- nr = trans->blocks_used;
- btrfs_end_transaction_throttle(trans, rc->extent_root);
- btrfs_btree_balance_dirty(rc->extent_root, nr);
- }
-
- if (!err) {
- ret = relocate_file_extent_cluster(rc->data_inode,
- &rc->cluster);
- if (ret < 0)
- err = ret;
- }
-
- rc->create_reloc_tree = 0;
- set_reloc_control(rc);
-
- backref_cache_cleanup(&rc->backref_cache);
- btrfs_block_rsv_release(rc->extent_root, rc->block_rsv, (u64)-1);
-
- err = prepare_to_merge(rc, err);
-
- merge_reloc_roots(rc);
-
- rc->merge_reloc_tree = 0;
- unset_reloc_control(rc);
- btrfs_block_rsv_release(rc->extent_root, rc->block_rsv, (u64)-1);
-
- /* get rid of pinned extents */
- trans = btrfs_join_transaction(rc->extent_root);
- if (IS_ERR(trans))
- err = PTR_ERR(trans);
- else
- btrfs_commit_transaction(trans, rc->extent_root);
-out_free:
- btrfs_free_block_rsv(rc->extent_root, rc->block_rsv);
- btrfs_free_path(path);
- return err;
-}
-
-static int __insert_orphan_inode(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, u64 objectid)
-{
- struct btrfs_path *path;
- struct btrfs_inode_item *item;
- struct extent_buffer *leaf;
- int ret;
-
- path = btrfs_alloc_path();
- if (!path)
- return -ENOMEM;
-
- ret = btrfs_insert_empty_inode(trans, root, path, objectid);
- if (ret)
- goto out;
-
- leaf = path->nodes[0];
- item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_inode_item);
- memset_extent_buffer(leaf, 0, (unsigned long)item, sizeof(*item));
- btrfs_set_inode_generation(leaf, item, 1);
- btrfs_set_inode_size(leaf, item, 0);
- btrfs_set_inode_mode(leaf, item, S_IFREG | 0600);
- btrfs_set_inode_flags(leaf, item, BTRFS_INODE_NOCOMPRESS |
- BTRFS_INODE_PREALLOC);
- btrfs_mark_buffer_dirty(leaf);
- btrfs_release_path(path);
-out:
- btrfs_free_path(path);
- return ret;
-}
-
-/*
- * helper to create inode for data relocation.
- * the inode is in data relocation tree and its link count is 0
- */
-static noinline_for_stack
-struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info,
- struct btrfs_block_group_cache *group)
-{
- struct inode *inode = NULL;
- struct btrfs_trans_handle *trans;
- struct btrfs_root *root;
- struct btrfs_key key;
- unsigned long nr;
- u64 objectid = BTRFS_FIRST_FREE_OBJECTID;
- int err = 0;
-
- root = read_fs_root(fs_info, BTRFS_DATA_RELOC_TREE_OBJECTID);
- if (IS_ERR(root))
- return ERR_CAST(root);
-
- trans = btrfs_start_transaction(root, 6);
- if (IS_ERR(trans))
- return ERR_CAST(trans);
-
- err = btrfs_find_free_objectid(root, &objectid);
- if (err)
- goto out;
-
- err = __insert_orphan_inode(trans, root, objectid);
- BUG_ON(err);
-
- key.objectid = objectid;
- key.type = BTRFS_INODE_ITEM_KEY;
- key.offset = 0;
- inode = btrfs_iget(root->fs_info->sb, &key, root, NULL);
- BUG_ON(IS_ERR(inode) || is_bad_inode(inode));
- BTRFS_I(inode)->index_cnt = group->key.objectid;
-
- err = btrfs_orphan_add(trans, inode);
-out:
- nr = trans->blocks_used;
- btrfs_end_transaction(trans, root);
- btrfs_btree_balance_dirty(root, nr);
- if (err) {
- if (inode)
- iput(inode);
- inode = ERR_PTR(err);
- }
- return inode;
-}
-
-static struct reloc_control *alloc_reloc_control(void)
-{
- struct reloc_control *rc;
-
- rc = kzalloc(sizeof(*rc), GFP_NOFS);
- if (!rc)
- return NULL;
-
- INIT_LIST_HEAD(&rc->reloc_roots);
- backref_cache_init(&rc->backref_cache);
- mapping_tree_init(&rc->reloc_root_tree);
- extent_io_tree_init(&rc->processed_blocks, NULL);
- return rc;
-}
-
-/*
- * function to relocate all extents in a block group.
- */
-int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start)
-{
- struct btrfs_fs_info *fs_info = extent_root->fs_info;
- struct reloc_control *rc;
- struct inode *inode;
- struct btrfs_path *path;
- int ret;
- int rw = 0;
- int err = 0;
-
- rc = alloc_reloc_control();
- if (!rc)
- return -ENOMEM;
-
- rc->extent_root = extent_root;
-
- rc->block_group = btrfs_lookup_block_group(fs_info, group_start);
- BUG_ON(!rc->block_group);
-
- if (!rc->block_group->ro) {
- ret = btrfs_set_block_group_ro(extent_root, rc->block_group);
- if (ret) {
- err = ret;
- goto out;
- }
- rw = 1;
- }
-
- path = btrfs_alloc_path();
- if (!path) {
- err = -ENOMEM;
- goto out;
- }
-
- inode = lookup_free_space_inode(fs_info->tree_root, rc->block_group,
- path);
- btrfs_free_path(path);
-
- if (!IS_ERR(inode))
- ret = delete_block_group_cache(fs_info, inode, 0);
- else
- ret = PTR_ERR(inode);
-
- if (ret && ret != -ENOENT) {
- err = ret;
- goto out;
- }
-
- rc->data_inode = create_reloc_inode(fs_info, rc->block_group);
- if (IS_ERR(rc->data_inode)) {
- err = PTR_ERR(rc->data_inode);
- rc->data_inode = NULL;
- goto out;
- }
-
- printk(KERN_INFO "btrfs: relocating block group %llu flags %llu\n",
- (unsigned long long)rc->block_group->key.objectid,
- (unsigned long long)rc->block_group->flags);
-
- btrfs_start_delalloc_inodes(fs_info->tree_root, 0);
- btrfs_wait_ordered_extents(fs_info->tree_root, 0, 0);
-
- while (1) {
- mutex_lock(&fs_info->cleaner_mutex);
-
- btrfs_clean_old_snapshots(fs_info->tree_root);
- ret = relocate_block_group(rc);
-
- mutex_unlock(&fs_info->cleaner_mutex);
- if (ret < 0) {
- err = ret;
- goto out;
- }
-
- if (rc->extents_found == 0)
- break;
-
- printk(KERN_INFO "btrfs: found %llu extents\n",
- (unsigned long long)rc->extents_found);
-
- if (rc->stage == MOVE_DATA_EXTENTS && rc->found_file_extent) {
- btrfs_wait_ordered_range(rc->data_inode, 0, (u64)-1);
- invalidate_mapping_pages(rc->data_inode->i_mapping,
- 0, -1);
- rc->stage = UPDATE_DATA_PTRS;
- }
- }
-
- filemap_write_and_wait_range(fs_info->btree_inode->i_mapping,
- rc->block_group->key.objectid,
- rc->block_group->key.objectid +
- rc->block_group->key.offset - 1);
-
- WARN_ON(rc->block_group->pinned > 0);
- WARN_ON(rc->block_group->reserved > 0);
- WARN_ON(btrfs_block_group_used(&rc->block_group->item) > 0);
-out:
- if (err && rw)
- btrfs_set_block_group_rw(extent_root, rc->block_group);
- iput(rc->data_inode);
- btrfs_put_block_group(rc->block_group);
- kfree(rc);
- return err;
-}
-
-static noinline_for_stack int mark_garbage_root(struct btrfs_root *root)
-{
- struct btrfs_trans_handle *trans;
- int ret, err;
-
- trans = btrfs_start_transaction(root->fs_info->tree_root, 0);
- if (IS_ERR(trans))
- return PTR_ERR(trans);
-
- memset(&root->root_item.drop_progress, 0,
- sizeof(root->root_item.drop_progress));
- root->root_item.drop_level = 0;
- btrfs_set_root_refs(&root->root_item, 0);
- ret = btrfs_update_root(trans, root->fs_info->tree_root,
- &root->root_key, &root->root_item);
-
- err = btrfs_end_transaction(trans, root->fs_info->tree_root);
- if (err)
- return err;
- return ret;
-}
-
-/*
- * recover relocation interrupted by system crash.
- *
- * this function resumes merging reloc trees with corresponding fs trees.
- * this is important for keeping the sharing of tree blocks
- */
-int btrfs_recover_relocation(struct btrfs_root *root)
-{
- LIST_HEAD(reloc_roots);
- struct btrfs_key key;
- struct btrfs_root *fs_root;
- struct btrfs_root *reloc_root;
- struct btrfs_path *path;
- struct extent_buffer *leaf;
- struct reloc_control *rc = NULL;
- struct btrfs_trans_handle *trans;
- int ret;
- int err = 0;
-
- path = btrfs_alloc_path();
- if (!path)
- return -ENOMEM;
- path->reada = -1;
-
- key.objectid = BTRFS_TREE_RELOC_OBJECTID;
- key.type = BTRFS_ROOT_ITEM_KEY;
- key.offset = (u64)-1;
-
- while (1) {
- ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key,
- path, 0, 0);
- if (ret < 0) {
- err = ret;
- goto out;
- }
- if (ret > 0) {
- if (path->slots[0] == 0)
- break;
- path->slots[0]--;
- }
- leaf = path->nodes[0];
- btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
- btrfs_release_path(path);
-
- if (key.objectid != BTRFS_TREE_RELOC_OBJECTID ||
- key.type != BTRFS_ROOT_ITEM_KEY)
- break;
-
- reloc_root = btrfs_read_fs_root_no_radix(root, &key);
- if (IS_ERR(reloc_root)) {
- err = PTR_ERR(reloc_root);
- goto out;
- }
-
- list_add(&reloc_root->root_list, &reloc_roots);
-
- if (btrfs_root_refs(&reloc_root->root_item) > 0) {
- fs_root = read_fs_root(root->fs_info,
- reloc_root->root_key.offset);
- if (IS_ERR(fs_root)) {
- ret = PTR_ERR(fs_root);
- if (ret != -ENOENT) {
- err = ret;
- goto out;
- }
- ret = mark_garbage_root(reloc_root);
- if (ret < 0) {
- err = ret;
- goto out;
- }
- }
- }
-
- if (key.offset == 0)
- break;
-
- key.offset--;
- }
- btrfs_release_path(path);
-
- if (list_empty(&reloc_roots))
- goto out;
-
- rc = alloc_reloc_control();
- if (!rc) {
- err = -ENOMEM;
- goto out;
- }
-
- rc->extent_root = root->fs_info->extent_root;
-
- set_reloc_control(rc);
-
- trans = btrfs_join_transaction(rc->extent_root);
- if (IS_ERR(trans)) {
- unset_reloc_control(rc);
- err = PTR_ERR(trans);
- goto out_free;
- }
-
- rc->merge_reloc_tree = 1;
-
- while (!list_empty(&reloc_roots)) {
- reloc_root = list_entry(reloc_roots.next,
- struct btrfs_root, root_list);
- list_del(&reloc_root->root_list);
-
- if (btrfs_root_refs(&reloc_root->root_item) == 0) {
- list_add_tail(&reloc_root->root_list,
- &rc->reloc_roots);
- continue;
- }
-
- fs_root = read_fs_root(root->fs_info,
- reloc_root->root_key.offset);
- if (IS_ERR(fs_root)) {
- err = PTR_ERR(fs_root);
- goto out_free;
- }
-
- err = __add_reloc_root(reloc_root);
- BUG_ON(err < 0); /* -ENOMEM or logic error */
- fs_root->reloc_root = reloc_root;
- }
-
- err = btrfs_commit_transaction(trans, rc->extent_root);
- if (err)
- goto out_free;
-
- merge_reloc_roots(rc);
-
- unset_reloc_control(rc);
-
- trans = btrfs_join_transaction(rc->extent_root);
- if (IS_ERR(trans))
- err = PTR_ERR(trans);
- else
- err = btrfs_commit_transaction(trans, rc->extent_root);
-out_free:
- kfree(rc);
-out:
- while (!list_empty(&reloc_roots)) {
- reloc_root = list_entry(reloc_roots.next,
- struct btrfs_root, root_list);
- list_del(&reloc_root->root_list);
- free_extent_buffer(reloc_root->node);
- free_extent_buffer(reloc_root->commit_root);
- kfree(reloc_root);
- }
- btrfs_free_path(path);
-
- if (err == 0) {
- /* cleanup orphan inode in data relocation tree */
- fs_root = read_fs_root(root->fs_info,
- BTRFS_DATA_RELOC_TREE_OBJECTID);
- if (IS_ERR(fs_root))
- err = PTR_ERR(fs_root);
- else
- err = btrfs_orphan_cleanup(fs_root);
- }
- return err;
-}
-
-/*
- * helper to add ordered checksum for data relocation.
- *
- * cloning checksum properly handles the nodatasum extents.
- * it also saves CPU time to re-calculate the checksum.
- */
-int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len)
-{
- struct btrfs_ordered_sum *sums;
- struct btrfs_sector_sum *sector_sum;
- struct btrfs_ordered_extent *ordered;
- struct btrfs_root *root = BTRFS_I(inode)->root;
- size_t offset;
- int ret;
- u64 disk_bytenr;
- LIST_HEAD(list);
-
- ordered = btrfs_lookup_ordered_extent(inode, file_pos);
- BUG_ON(ordered->file_offset != file_pos || ordered->len != len);
-
- disk_bytenr = file_pos + BTRFS_I(inode)->index_cnt;
- ret = btrfs_lookup_csums_range(root->fs_info->csum_root, disk_bytenr,
- disk_bytenr + len - 1, &list, 0);
- if (ret)
- goto out;
-
- while (!list_empty(&list)) {
- sums = list_entry(list.next, struct btrfs_ordered_sum, list);
- list_del_init(&sums->list);
-
- sector_sum = sums->sums;
- sums->bytenr = ordered->start;
-
- offset = 0;
- while (offset < sums->len) {
- sector_sum->bytenr += ordered->start - disk_bytenr;
- sector_sum++;
- offset += root->sectorsize;
- }
-
- btrfs_add_ordered_sum(inode, ordered, sums);
- }
-out:
- btrfs_put_ordered_extent(ordered);
- return ret;
-}
-
-void btrfs_reloc_cow_block(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, struct extent_buffer *buf,
- struct extent_buffer *cow)
-{
- struct reloc_control *rc;
- struct backref_node *node;
- int first_cow = 0;
- int level;
- int ret;
-
- rc = root->fs_info->reloc_ctl;
- if (!rc)
- return;
-
- BUG_ON(rc->stage == UPDATE_DATA_PTRS &&
- root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID);
-
- level = btrfs_header_level(buf);
- if (btrfs_header_generation(buf) <=
- btrfs_root_last_snapshot(&root->root_item))
- first_cow = 1;
-
- if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID &&
- rc->create_reloc_tree) {
- WARN_ON(!first_cow && level == 0);
-
- node = rc->backref_cache.path[level];
- BUG_ON(node->bytenr != buf->start &&
- node->new_bytenr != buf->start);
-
- drop_node_buffer(node);
- extent_buffer_get(cow);
- node->eb = cow;
- node->new_bytenr = cow->start;
-
- if (!node->pending) {
- list_move_tail(&node->list,
- &rc->backref_cache.pending[level]);
- node->pending = 1;
- }
-
- if (first_cow)
- __mark_block_processed(rc, node);
-
- if (first_cow && level > 0)
- rc->nodes_relocated += buf->len;
- }
-
- if (level == 0 && first_cow && rc->stage == UPDATE_DATA_PTRS) {
- ret = replace_file_extents(trans, rc, root, cow);
- BUG_ON(ret);
- }
-}
-
-/*
- * called before creating snapshot. it calculates metadata reservation
- * requried for relocating tree blocks in the snapshot
- */
-void btrfs_reloc_pre_snapshot(struct btrfs_trans_handle *trans,
- struct btrfs_pending_snapshot *pending,
- u64 *bytes_to_reserve)
-{
- struct btrfs_root *root;
- struct reloc_control *rc;
-
- root = pending->root;
- if (!root->reloc_root)
- return;
-
- rc = root->fs_info->reloc_ctl;
- if (!rc->merge_reloc_tree)
- return;
-
- root = root->reloc_root;
- BUG_ON(btrfs_root_refs(&root->root_item) == 0);
- /*
- * relocation is in the stage of merging trees. the space
- * used by merging a reloc tree is twice the size of
- * relocated tree nodes in the worst case. half for cowing
- * the reloc tree, half for cowing the fs tree. the space
- * used by cowing the reloc tree will be freed after the
- * tree is dropped. if we create snapshot, cowing the fs
- * tree may use more space than it frees. so we need
- * reserve extra space.
- */
- *bytes_to_reserve += rc->nodes_relocated;
-}
-
-/*
- * called after snapshot is created. migrate block reservation
- * and create reloc root for the newly created snapshot
- */
-int btrfs_reloc_post_snapshot(struct btrfs_trans_handle *trans,
- struct btrfs_pending_snapshot *pending)
-{
- struct btrfs_root *root = pending->root;
- struct btrfs_root *reloc_root;
- struct btrfs_root *new_root;
- struct reloc_control *rc;
- int ret;
-
- if (!root->reloc_root)
- return 0;
-
- rc = root->fs_info->reloc_ctl;
- rc->merging_rsv_size += rc->nodes_relocated;
-
- if (rc->merge_reloc_tree) {
- ret = btrfs_block_rsv_migrate(&pending->block_rsv,
- rc->block_rsv,
- rc->nodes_relocated);
- if (ret)
- return ret;
- }
-
- new_root = pending->snap;
- reloc_root = create_reloc_root(trans, root->reloc_root,
- new_root->root_key.objectid);
- if (IS_ERR(reloc_root))
- return PTR_ERR(reloc_root);
-
- ret = __add_reloc_root(reloc_root);
- BUG_ON(ret < 0);
- new_root->reloc_root = reloc_root;
-
- if (rc->create_reloc_tree)
- ret = clone_backref_node(trans, rc, root, reloc_root);
- return ret;
-}
diff --git a/ANDROID_3.4.5/fs/btrfs/root-tree.c b/ANDROID_3.4.5/fs/btrfs/root-tree.c
deleted file mode 100644
index 24fb8ce4..00000000
--- a/ANDROID_3.4.5/fs/btrfs/root-tree.c
+++ /dev/null
@@ -1,456 +0,0 @@
-/*
- * Copyright (C) 2007 Oracle. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License v2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
- */
-
-#include "ctree.h"
-#include "transaction.h"
-#include "disk-io.h"
-#include "print-tree.h"
-
-/*
- * lookup the root with the highest offset for a given objectid. The key we do
- * find is copied into 'key'. If we find something return 0, otherwise 1, < 0
- * on error.
- */
-int btrfs_find_last_root(struct btrfs_root *root, u64 objectid,
- struct btrfs_root_item *item, struct btrfs_key *key)
-{
- struct btrfs_path *path;
- struct btrfs_key search_key;
- struct btrfs_key found_key;
- struct extent_buffer *l;
- int ret;
- int slot;
-
- search_key.objectid = objectid;
- search_key.type = BTRFS_ROOT_ITEM_KEY;
- search_key.offset = (u64)-1;
-
- path = btrfs_alloc_path();
- if (!path)
- return -ENOMEM;
- ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0);
- if (ret < 0)
- goto out;
-
- BUG_ON(ret == 0);
- if (path->slots[0] == 0) {
- ret = 1;
- goto out;
- }
- l = path->nodes[0];
- slot = path->slots[0] - 1;
- btrfs_item_key_to_cpu(l, &found_key, slot);
- if (found_key.objectid != objectid ||
- found_key.type != BTRFS_ROOT_ITEM_KEY) {
- ret = 1;
- goto out;
- }
- if (item)
- read_extent_buffer(l, item, btrfs_item_ptr_offset(l, slot),
- sizeof(*item));
- if (key)
- memcpy(key, &found_key, sizeof(found_key));
- ret = 0;
-out:
- btrfs_free_path(path);
- return ret;
-}
-
-void btrfs_set_root_node(struct btrfs_root_item *item,
- struct extent_buffer *node)
-{
- btrfs_set_root_bytenr(item, node->start);
- btrfs_set_root_level(item, btrfs_header_level(node));
- btrfs_set_root_generation(item, btrfs_header_generation(node));
-}
-
-/*
- * copy the data in 'item' into the btree
- */
-int btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root
- *root, struct btrfs_key *key, struct btrfs_root_item
- *item)
-{
- struct btrfs_path *path;
- struct extent_buffer *l;
- int ret;
- int slot;
- unsigned long ptr;
-
- path = btrfs_alloc_path();
- if (!path)
- return -ENOMEM;
-
- ret = btrfs_search_slot(trans, root, key, path, 0, 1);
- if (ret < 0) {
- btrfs_abort_transaction(trans, root, ret);
- goto out;
- }
-
- if (ret != 0) {
- btrfs_print_leaf(root, path->nodes[0]);
- printk(KERN_CRIT "unable to update root key %llu %u %llu\n",
- (unsigned long long)key->objectid, key->type,
- (unsigned long long)key->offset);
- BUG_ON(1);
- }
-
- l = path->nodes[0];
- slot = path->slots[0];
- ptr = btrfs_item_ptr_offset(l, slot);
- write_extent_buffer(l, item, ptr, sizeof(*item));
- btrfs_mark_buffer_dirty(path->nodes[0]);
-out:
- btrfs_free_path(path);
- return ret;
-}
-
-int btrfs_insert_root(struct btrfs_trans_handle *trans, struct btrfs_root *root,
- struct btrfs_key *key, struct btrfs_root_item *item)
-{
- return btrfs_insert_item(trans, root, key, item, sizeof(*item));
-}
-
-/*
- * at mount time we want to find all the old transaction snapshots that were in
- * the process of being deleted if we crashed. This is any root item with an
- * offset lower than the latest root. They need to be queued for deletion to
- * finish what was happening when we crashed.
- */
-int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid)
-{
- struct btrfs_root *dead_root;
- struct btrfs_root_item *ri;
- struct btrfs_key key;
- struct btrfs_key found_key;
- struct btrfs_path *path;
- int ret;
- u32 nritems;
- struct extent_buffer *leaf;
- int slot;
-
- key.objectid = objectid;
- btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
- key.offset = 0;
- path = btrfs_alloc_path();
- if (!path)
- return -ENOMEM;
-
-again:
- ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
- if (ret < 0)
- goto err;
- while (1) {
- leaf = path->nodes[0];
- nritems = btrfs_header_nritems(leaf);
- slot = path->slots[0];
- if (slot >= nritems) {
- ret = btrfs_next_leaf(root, path);
- if (ret)
- break;
- leaf = path->nodes[0];
- nritems = btrfs_header_nritems(leaf);
- slot = path->slots[0];
- }
- btrfs_item_key_to_cpu(leaf, &key, slot);
- if (btrfs_key_type(&key) != BTRFS_ROOT_ITEM_KEY)
- goto next;
-
- if (key.objectid < objectid)
- goto next;
-
- if (key.objectid > objectid)
- break;
-
- ri = btrfs_item_ptr(leaf, slot, struct btrfs_root_item);
- if (btrfs_disk_root_refs(leaf, ri) != 0)
- goto next;
-
- memcpy(&found_key, &key, sizeof(key));
- key.offset++;
- btrfs_release_path(path);
- dead_root =
- btrfs_read_fs_root_no_radix(root->fs_info->tree_root,
- &found_key);
- if (IS_ERR(dead_root)) {
- ret = PTR_ERR(dead_root);
- goto err;
- }
-
- ret = btrfs_add_dead_root(dead_root);
- if (ret)
- goto err;
- goto again;
-next:
- slot++;
- path->slots[0]++;
- }
- ret = 0;
-err:
- btrfs_free_path(path);
- return ret;
-}
-
-int btrfs_find_orphan_roots(struct btrfs_root *tree_root)
-{
- struct extent_buffer *leaf;
- struct btrfs_path *path;
- struct btrfs_key key;
- struct btrfs_key root_key;
- struct btrfs_root *root;
- int err = 0;
- int ret;
-
- path = btrfs_alloc_path();
- if (!path)
- return -ENOMEM;
-
- key.objectid = BTRFS_ORPHAN_OBJECTID;
- key.type = BTRFS_ORPHAN_ITEM_KEY;
- key.offset = 0;
-
- root_key.type = BTRFS_ROOT_ITEM_KEY;
- root_key.offset = (u64)-1;
-
- while (1) {
- ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0);
- if (ret < 0) {
- err = ret;
- break;
- }
-
- leaf = path->nodes[0];
- if (path->slots[0] >= btrfs_header_nritems(leaf)) {
- ret = btrfs_next_leaf(tree_root, path);
- if (ret < 0)
- err = ret;
- if (ret != 0)
- break;
- leaf = path->nodes[0];
- }
-
- btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
- btrfs_release_path(path);
-
- if (key.objectid != BTRFS_ORPHAN_OBJECTID ||
- key.type != BTRFS_ORPHAN_ITEM_KEY)
- break;
-
- root_key.objectid = key.offset;
- key.offset++;
-
- root = btrfs_read_fs_root_no_name(tree_root->fs_info,
- &root_key);
- if (!IS_ERR(root))
- continue;
-
- ret = PTR_ERR(root);
- if (ret != -ENOENT) {
- err = ret;
- break;
- }
-
- ret = btrfs_find_dead_roots(tree_root, root_key.objectid);
- if (ret) {
- err = ret;
- break;
- }
- }
-
- btrfs_free_path(path);
- return err;
-}
-
-/* drop the root item for 'key' from 'root' */
-int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root,
- struct btrfs_key *key)
-{
- struct btrfs_path *path;
- int ret;
- struct btrfs_root_item *ri;
- struct extent_buffer *leaf;
-
- path = btrfs_alloc_path();
- if (!path)
- return -ENOMEM;
- ret = btrfs_search_slot(trans, root, key, path, -1, 1);
- if (ret < 0)
- goto out;
-
- BUG_ON(ret != 0);
- leaf = path->nodes[0];
- ri = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_root_item);
-
- ret = btrfs_del_item(trans, root, path);
-out:
- btrfs_free_path(path);
- return ret;
-}
-
-int btrfs_del_root_ref(struct btrfs_trans_handle *trans,
- struct btrfs_root *tree_root,
- u64 root_id, u64 ref_id, u64 dirid, u64 *sequence,
- const char *name, int name_len)
-
-{
- struct btrfs_path *path;
- struct btrfs_root_ref *ref;
- struct extent_buffer *leaf;
- struct btrfs_key key;
- unsigned long ptr;
- int err = 0;
- int ret;
-
- path = btrfs_alloc_path();
- if (!path)
- return -ENOMEM;
-
- key.objectid = root_id;
- key.type = BTRFS_ROOT_BACKREF_KEY;
- key.offset = ref_id;
-again:
- ret = btrfs_search_slot(trans, tree_root, &key, path, -1, 1);
- BUG_ON(ret < 0);
- if (ret == 0) {
- leaf = path->nodes[0];
- ref = btrfs_item_ptr(leaf, path->slots[0],
- struct btrfs_root_ref);
-
- WARN_ON(btrfs_root_ref_dirid(leaf, ref) != dirid);
- WARN_ON(btrfs_root_ref_name_len(leaf, ref) != name_len);
- ptr = (unsigned long)(ref + 1);
- WARN_ON(memcmp_extent_buffer(leaf, name, ptr, name_len));
- *sequence = btrfs_root_ref_sequence(leaf, ref);
-
- ret = btrfs_del_item(trans, tree_root, path);
- if (ret) {
- err = ret;
- goto out;
- }
- } else
- err = -ENOENT;
-
- if (key.type == BTRFS_ROOT_BACKREF_KEY) {
- btrfs_release_path(path);
- key.objectid = ref_id;
- key.type = BTRFS_ROOT_REF_KEY;
- key.offset = root_id;
- goto again;
- }
-
-out:
- btrfs_free_path(path);
- return err;
-}
-
-int btrfs_find_root_ref(struct btrfs_root *tree_root,
- struct btrfs_path *path,
- u64 root_id, u64 ref_id)
-{
- struct btrfs_key key;
- int ret;
-
- key.objectid = root_id;
- key.type = BTRFS_ROOT_REF_KEY;
- key.offset = ref_id;
-
- ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0);
- return ret;
-}
-
-/*
- * add a btrfs_root_ref item. type is either BTRFS_ROOT_REF_KEY
- * or BTRFS_ROOT_BACKREF_KEY.
- *
- * The dirid, sequence, name and name_len refer to the directory entry
- * that is referencing the root.
- *
- * For a forward ref, the root_id is the id of the tree referencing
- * the root and ref_id is the id of the subvol or snapshot.
- *
- * For a back ref the root_id is the id of the subvol or snapshot and
- * ref_id is the id of the tree referencing it.
- *
- * Will return 0, -ENOMEM, or anything from the CoW path
- */
-int btrfs_add_root_ref(struct btrfs_trans_handle *trans,
- struct btrfs_root *tree_root,
- u64 root_id, u64 ref_id, u64 dirid, u64 sequence,
- const char *name, int name_len)
-{
- struct btrfs_key key;
- int ret;
- struct btrfs_path *path;
- struct btrfs_root_ref *ref;
- struct extent_buffer *leaf;
- unsigned long ptr;
-
- path = btrfs_alloc_path();
- if (!path)
- return -ENOMEM;
-
- key.objectid = root_id;
- key.type = BTRFS_ROOT_BACKREF_KEY;
- key.offset = ref_id;
-again:
- ret = btrfs_insert_empty_item(trans, tree_root, path, &key,
- sizeof(*ref) + name_len);
- if (ret) {
- btrfs_abort_transaction(trans, tree_root, ret);
- btrfs_free_path(path);
- return ret;
- }
-
- leaf = path->nodes[0];
- ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_root_ref);
- btrfs_set_root_ref_dirid(leaf, ref, dirid);
- btrfs_set_root_ref_sequence(leaf, ref, sequence);
- btrfs_set_root_ref_name_len(leaf, ref, name_len);
- ptr = (unsigned long)(ref + 1);
- write_extent_buffer(leaf, name, ptr, name_len);
- btrfs_mark_buffer_dirty(leaf);
-
- if (key.type == BTRFS_ROOT_BACKREF_KEY) {
- btrfs_release_path(path);
- key.objectid = ref_id;
- key.type = BTRFS_ROOT_REF_KEY;
- key.offset = root_id;
- goto again;
- }
-
- btrfs_free_path(path);
- return 0;
-}
-
-/*
- * Old btrfs forgets to init root_item->flags and root_item->byte_limit
- * for subvolumes. To work around this problem, we steal a bit from
- * root_item->inode_item->flags, and use it to indicate if those fields
- * have been properly initialized.
- */
-void btrfs_check_and_init_root_item(struct btrfs_root_item *root_item)
-{
- u64 inode_flags = le64_to_cpu(root_item->inode.flags);
-
- if (!(inode_flags & BTRFS_INODE_ROOT_ITEM_INIT)) {
- inode_flags |= BTRFS_INODE_ROOT_ITEM_INIT;
- root_item->inode.flags = cpu_to_le64(inode_flags);
- root_item->flags = 0;
- root_item->byte_limit = 0;
- }
-}
diff --git a/ANDROID_3.4.5/fs/btrfs/scrub.c b/ANDROID_3.4.5/fs/btrfs/scrub.c
deleted file mode 100644
index 2f3d6f91..00000000
--- a/ANDROID_3.4.5/fs/btrfs/scrub.c
+++ /dev/null
@@ -1,2440 +0,0 @@
-/*
- * Copyright (C) 2011 STRATO. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License v2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
- */
-
-#include <linux/blkdev.h>
-#include <linux/ratelimit.h>
-#include "ctree.h"
-#include "volumes.h"
-#include "disk-io.h"
-#include "ordered-data.h"
-#include "transaction.h"
-#include "backref.h"
-#include "extent_io.h"
-#include "check-integrity.h"
-
-/*
- * This is only the first step towards a full-features scrub. It reads all
- * extent and super block and verifies the checksums. In case a bad checksum
- * is found or the extent cannot be read, good data will be written back if
- * any can be found.
- *
- * Future enhancements:
- * - In case an unrepairable extent is encountered, track which files are
- * affected and report them
- * - track and record media errors, throw out bad devices
- * - add a mode to also read unallocated space
- */
-
-struct scrub_block;
-struct scrub_dev;
-
-#define SCRUB_PAGES_PER_BIO 16 /* 64k per bio */
-#define SCRUB_BIOS_PER_DEV 16 /* 1 MB per device in flight */
-#define SCRUB_MAX_PAGES_PER_BLOCK 16 /* 64k per node/leaf/sector */
-
-struct scrub_page {
- struct scrub_block *sblock;
- struct page *page;
- struct block_device *bdev;
- u64 flags; /* extent flags */
- u64 generation;
- u64 logical;
- u64 physical;
- struct {
- unsigned int mirror_num:8;
- unsigned int have_csum:1;
- unsigned int io_error:1;
- };
- u8 csum[BTRFS_CSUM_SIZE];
-};
-
-struct scrub_bio {
- int index;
- struct scrub_dev *sdev;
- struct bio *bio;
- int err;
- u64 logical;
- u64 physical;
- struct scrub_page *pagev[SCRUB_PAGES_PER_BIO];
- int page_count;
- int next_free;
- struct btrfs_work work;
-};
-
-struct scrub_block {
- struct scrub_page pagev[SCRUB_MAX_PAGES_PER_BLOCK];
- int page_count;
- atomic_t outstanding_pages;
- atomic_t ref_count; /* free mem on transition to zero */
- struct scrub_dev *sdev;
- struct {
- unsigned int header_error:1;
- unsigned int checksum_error:1;
- unsigned int no_io_error_seen:1;
- };
-};
-
-struct scrub_dev {
- struct scrub_bio *bios[SCRUB_BIOS_PER_DEV];
- struct btrfs_device *dev;
- int first_free;
- int curr;
- atomic_t in_flight;
- atomic_t fixup_cnt;
- spinlock_t list_lock;
- wait_queue_head_t list_wait;
- u16 csum_size;
- struct list_head csum_list;
- atomic_t cancel_req;
- int readonly;
- int pages_per_bio; /* <= SCRUB_PAGES_PER_BIO */
- u32 sectorsize;
- u32 nodesize;
- u32 leafsize;
- /*
- * statistics
- */
- struct btrfs_scrub_progress stat;
- spinlock_t stat_lock;
-};
-
-struct scrub_fixup_nodatasum {
- struct scrub_dev *sdev;
- u64 logical;
- struct btrfs_root *root;
- struct btrfs_work work;
- int mirror_num;
-};
-
-struct scrub_warning {
- struct btrfs_path *path;
- u64 extent_item_size;
- char *scratch_buf;
- char *msg_buf;
- const char *errstr;
- sector_t sector;
- u64 logical;
- struct btrfs_device *dev;
- int msg_bufsize;
- int scratch_bufsize;
-};
-
-
-static int scrub_handle_errored_block(struct scrub_block *sblock_to_check);
-static int scrub_setup_recheck_block(struct scrub_dev *sdev,
- struct btrfs_mapping_tree *map_tree,
- u64 length, u64 logical,
- struct scrub_block *sblock);
-static int scrub_recheck_block(struct btrfs_fs_info *fs_info,
- struct scrub_block *sblock, int is_metadata,
- int have_csum, u8 *csum, u64 generation,
- u16 csum_size);
-static void scrub_recheck_block_checksum(struct btrfs_fs_info *fs_info,
- struct scrub_block *sblock,
- int is_metadata, int have_csum,
- const u8 *csum, u64 generation,
- u16 csum_size);
-static void scrub_complete_bio_end_io(struct bio *bio, int err);
-static int scrub_repair_block_from_good_copy(struct scrub_block *sblock_bad,
- struct scrub_block *sblock_good,
- int force_write);
-static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad,
- struct scrub_block *sblock_good,
- int page_num, int force_write);
-static int scrub_checksum_data(struct scrub_block *sblock);
-static int scrub_checksum_tree_block(struct scrub_block *sblock);
-static int scrub_checksum_super(struct scrub_block *sblock);
-static void scrub_block_get(struct scrub_block *sblock);
-static void scrub_block_put(struct scrub_block *sblock);
-static int scrub_add_page_to_bio(struct scrub_dev *sdev,
- struct scrub_page *spage);
-static int scrub_pages(struct scrub_dev *sdev, u64 logical, u64 len,
- u64 physical, u64 flags, u64 gen, int mirror_num,
- u8 *csum, int force);
-static void scrub_bio_end_io(struct bio *bio, int err);
-static void scrub_bio_end_io_worker(struct btrfs_work *work);
-static void scrub_block_complete(struct scrub_block *sblock);
-
-
-static void scrub_free_csums(struct scrub_dev *sdev)
-{
- while (!list_empty(&sdev->csum_list)) {
- struct btrfs_ordered_sum *sum;
- sum = list_first_entry(&sdev->csum_list,
- struct btrfs_ordered_sum, list);
- list_del(&sum->list);
- kfree(sum);
- }
-}
-
-static noinline_for_stack void scrub_free_dev(struct scrub_dev *sdev)
-{
- int i;
-
- if (!sdev)
- return;
-
- /* this can happen when scrub is cancelled */
- if (sdev->curr != -1) {
- struct scrub_bio *sbio = sdev->bios[sdev->curr];
-
- for (i = 0; i < sbio->page_count; i++) {
- BUG_ON(!sbio->pagev[i]);
- BUG_ON(!sbio->pagev[i]->page);
- scrub_block_put(sbio->pagev[i]->sblock);
- }
- bio_put(sbio->bio);
- }
-
- for (i = 0; i < SCRUB_BIOS_PER_DEV; ++i) {
- struct scrub_bio *sbio = sdev->bios[i];
-
- if (!sbio)
- break;
- kfree(sbio);
- }
-
- scrub_free_csums(sdev);
- kfree(sdev);
-}
-
-static noinline_for_stack
-struct scrub_dev *scrub_setup_dev(struct btrfs_device *dev)
-{
- struct scrub_dev *sdev;
- int i;
- struct btrfs_fs_info *fs_info = dev->dev_root->fs_info;
- int pages_per_bio;
-
- pages_per_bio = min_t(int, SCRUB_PAGES_PER_BIO,
- bio_get_nr_vecs(dev->bdev));
- sdev = kzalloc(sizeof(*sdev), GFP_NOFS);
- if (!sdev)
- goto nomem;
- sdev->dev = dev;
- sdev->pages_per_bio = pages_per_bio;
- sdev->curr = -1;
- for (i = 0; i < SCRUB_BIOS_PER_DEV; ++i) {
- struct scrub_bio *sbio;
-
- sbio = kzalloc(sizeof(*sbio), GFP_NOFS);
- if (!sbio)
- goto nomem;
- sdev->bios[i] = sbio;
-
- sbio->index = i;
- sbio->sdev = sdev;
- sbio->page_count = 0;
- sbio->work.func = scrub_bio_end_io_worker;
-
- if (i != SCRUB_BIOS_PER_DEV-1)
- sdev->bios[i]->next_free = i + 1;
- else
- sdev->bios[i]->next_free = -1;
- }
- sdev->first_free = 0;
- sdev->nodesize = dev->dev_root->nodesize;
- sdev->leafsize = dev->dev_root->leafsize;
- sdev->sectorsize = dev->dev_root->sectorsize;
- atomic_set(&sdev->in_flight, 0);
- atomic_set(&sdev->fixup_cnt, 0);
- atomic_set(&sdev->cancel_req, 0);
- sdev->csum_size = btrfs_super_csum_size(fs_info->super_copy);
- INIT_LIST_HEAD(&sdev->csum_list);
-
- spin_lock_init(&sdev->list_lock);
- spin_lock_init(&sdev->stat_lock);
- init_waitqueue_head(&sdev->list_wait);
- return sdev;
-
-nomem:
- scrub_free_dev(sdev);
- return ERR_PTR(-ENOMEM);
-}
-
-static int scrub_print_warning_inode(u64 inum, u64 offset, u64 root, void *ctx)
-{
- u64 isize;
- u32 nlink;
- int ret;
- int i;
- struct extent_buffer *eb;
- struct btrfs_inode_item *inode_item;
- struct scrub_warning *swarn = ctx;
- struct btrfs_fs_info *fs_info = swarn->dev->dev_root->fs_info;
- struct inode_fs_paths *ipath = NULL;
- struct btrfs_root *local_root;
- struct btrfs_key root_key;
-
- root_key.objectid = root;
- root_key.type = BTRFS_ROOT_ITEM_KEY;
- root_key.offset = (u64)-1;
- local_root = btrfs_read_fs_root_no_name(fs_info, &root_key);
- if (IS_ERR(local_root)) {
- ret = PTR_ERR(local_root);
- goto err;
- }
-
- ret = inode_item_info(inum, 0, local_root, swarn->path);
- if (ret) {
- btrfs_release_path(swarn->path);
- goto err;
- }
-
- eb = swarn->path->nodes[0];
- inode_item = btrfs_item_ptr(eb, swarn->path->slots[0],
- struct btrfs_inode_item);
- isize = btrfs_inode_size(eb, inode_item);
- nlink = btrfs_inode_nlink(eb, inode_item);
- btrfs_release_path(swarn->path);
-
- ipath = init_ipath(4096, local_root, swarn->path);
- if (IS_ERR(ipath)) {
- ret = PTR_ERR(ipath);
- ipath = NULL;
- goto err;
- }
- ret = paths_from_inode(inum, ipath);
-
- if (ret < 0)
- goto err;
-
- /*
- * we deliberately ignore the bit ipath might have been too small to
- * hold all of the paths here
- */
- for (i = 0; i < ipath->fspath->elem_cnt; ++i)
- printk(KERN_WARNING "btrfs: %s at logical %llu on dev "
- "%s, sector %llu, root %llu, inode %llu, offset %llu, "
- "length %llu, links %u (path: %s)\n", swarn->errstr,
- swarn->logical, swarn->dev->name,
- (unsigned long long)swarn->sector, root, inum, offset,
- min(isize - offset, (u64)PAGE_SIZE), nlink,
- (char *)(unsigned long)ipath->fspath->val[i]);
-
- free_ipath(ipath);
- return 0;
-
-err:
- printk(KERN_WARNING "btrfs: %s at logical %llu on dev "
- "%s, sector %llu, root %llu, inode %llu, offset %llu: path "
- "resolving failed with ret=%d\n", swarn->errstr,
- swarn->logical, swarn->dev->name,
- (unsigned long long)swarn->sector, root, inum, offset, ret);
-
- free_ipath(ipath);
- return 0;
-}
-
-static void scrub_print_warning(const char *errstr, struct scrub_block *sblock)
-{
- struct btrfs_device *dev = sblock->sdev->dev;
- struct btrfs_fs_info *fs_info = dev->dev_root->fs_info;
- struct btrfs_path *path;
- struct btrfs_key found_key;
- struct extent_buffer *eb;
- struct btrfs_extent_item *ei;
- struct scrub_warning swarn;
- u32 item_size;
- int ret;
- u64 ref_root;
- u8 ref_level;
- unsigned long ptr = 0;
- const int bufsize = 4096;
- u64 extent_item_pos;
-
- path = btrfs_alloc_path();
-
- swarn.scratch_buf = kmalloc(bufsize, GFP_NOFS);
- swarn.msg_buf = kmalloc(bufsize, GFP_NOFS);
- BUG_ON(sblock->page_count < 1);
- swarn.sector = (sblock->pagev[0].physical) >> 9;
- swarn.logical = sblock->pagev[0].logical;
- swarn.errstr = errstr;
- swarn.dev = dev;
- swarn.msg_bufsize = bufsize;
- swarn.scratch_bufsize = bufsize;
-
- if (!path || !swarn.scratch_buf || !swarn.msg_buf)
- goto out;
-
- ret = extent_from_logical(fs_info, swarn.logical, path, &found_key);
- if (ret < 0)
- goto out;
-
- extent_item_pos = swarn.logical - found_key.objectid;
- swarn.extent_item_size = found_key.offset;
-
- eb = path->nodes[0];
- ei = btrfs_item_ptr(eb, path->slots[0], struct btrfs_extent_item);
- item_size = btrfs_item_size_nr(eb, path->slots[0]);
- btrfs_release_path(path);
-
- if (ret & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
- do {
- ret = tree_backref_for_extent(&ptr, eb, ei, item_size,
- &ref_root, &ref_level);
- printk(KERN_WARNING
- "btrfs: %s at logical %llu on dev %s, "
- "sector %llu: metadata %s (level %d) in tree "
- "%llu\n", errstr, swarn.logical, dev->name,
- (unsigned long long)swarn.sector,
- ref_level ? "node" : "leaf",
- ret < 0 ? -1 : ref_level,
- ret < 0 ? -1 : ref_root);
- } while (ret != 1);
- } else {
- swarn.path = path;
- iterate_extent_inodes(fs_info, found_key.objectid,
- extent_item_pos, 1,
- scrub_print_warning_inode, &swarn);
- }
-
-out:
- btrfs_free_path(path);
- kfree(swarn.scratch_buf);
- kfree(swarn.msg_buf);
-}
-
-static int scrub_fixup_readpage(u64 inum, u64 offset, u64 root, void *ctx)
-{
- struct page *page = NULL;
- unsigned long index;
- struct scrub_fixup_nodatasum *fixup = ctx;
- int ret;
- int corrected = 0;
- struct btrfs_key key;
- struct inode *inode = NULL;
- u64 end = offset + PAGE_SIZE - 1;
- struct btrfs_root *local_root;
-
- key.objectid = root;
- key.type = BTRFS_ROOT_ITEM_KEY;
- key.offset = (u64)-1;
- local_root = btrfs_read_fs_root_no_name(fixup->root->fs_info, &key);
- if (IS_ERR(local_root))
- return PTR_ERR(local_root);
-
- key.type = BTRFS_INODE_ITEM_KEY;
- key.objectid = inum;
- key.offset = 0;
- inode = btrfs_iget(fixup->root->fs_info->sb, &key, local_root, NULL);
- if (IS_ERR(inode))
- return PTR_ERR(inode);
-
- index = offset >> PAGE_CACHE_SHIFT;
-
- page = find_or_create_page(inode->i_mapping, index, GFP_NOFS);
- if (!page) {
- ret = -ENOMEM;
- goto out;
- }
-
- if (PageUptodate(page)) {
- struct btrfs_mapping_tree *map_tree;
- if (PageDirty(page)) {
- /*
- * we need to write the data to the defect sector. the
- * data that was in that sector is not in memory,
- * because the page was modified. we must not write the
- * modified page to that sector.
- *
- * TODO: what could be done here: wait for the delalloc
- * runner to write out that page (might involve
- * COW) and see whether the sector is still
- * referenced afterwards.
- *
- * For the meantime, we'll treat this error
- * incorrectable, although there is a chance that a
- * later scrub will find the bad sector again and that
- * there's no dirty page in memory, then.
- */
- ret = -EIO;
- goto out;
- }
- map_tree = &BTRFS_I(inode)->root->fs_info->mapping_tree;
- ret = repair_io_failure(map_tree, offset, PAGE_SIZE,
- fixup->logical, page,
- fixup->mirror_num);
- unlock_page(page);
- corrected = !ret;
- } else {
- /*
- * we need to get good data first. the general readpage path
- * will call repair_io_failure for us, we just have to make
- * sure we read the bad mirror.
- */
- ret = set_extent_bits(&BTRFS_I(inode)->io_tree, offset, end,
- EXTENT_DAMAGED, GFP_NOFS);
- if (ret) {
- /* set_extent_bits should give proper error */
- WARN_ON(ret > 0);
- if (ret > 0)
- ret = -EFAULT;
- goto out;
- }
-
- ret = extent_read_full_page(&BTRFS_I(inode)->io_tree, page,
- btrfs_get_extent,
- fixup->mirror_num);
- wait_on_page_locked(page);
-
- corrected = !test_range_bit(&BTRFS_I(inode)->io_tree, offset,
- end, EXTENT_DAMAGED, 0, NULL);
- if (!corrected)
- clear_extent_bits(&BTRFS_I(inode)->io_tree, offset, end,
- EXTENT_DAMAGED, GFP_NOFS);
- }
-
-out:
- if (page)
- put_page(page);
- if (inode)
- iput(inode);
-
- if (ret < 0)
- return ret;
-
- if (ret == 0 && corrected) {
- /*
- * we only need to call readpage for one of the inodes belonging
- * to this extent. so make iterate_extent_inodes stop
- */
- return 1;
- }
-
- return -EIO;
-}
-
-static void scrub_fixup_nodatasum(struct btrfs_work *work)
-{
- int ret;
- struct scrub_fixup_nodatasum *fixup;
- struct scrub_dev *sdev;
- struct btrfs_trans_handle *trans = NULL;
- struct btrfs_fs_info *fs_info;
- struct btrfs_path *path;
- int uncorrectable = 0;
-
- fixup = container_of(work, struct scrub_fixup_nodatasum, work);
- sdev = fixup->sdev;
- fs_info = fixup->root->fs_info;
-
- path = btrfs_alloc_path();
- if (!path) {
- spin_lock(&sdev->stat_lock);
- ++sdev->stat.malloc_errors;
- spin_unlock(&sdev->stat_lock);
- uncorrectable = 1;
- goto out;
- }
-
- trans = btrfs_join_transaction(fixup->root);
- if (IS_ERR(trans)) {
- uncorrectable = 1;
- goto out;
- }
-
- /*
- * the idea is to trigger a regular read through the standard path. we
- * read a page from the (failed) logical address by specifying the
- * corresponding copynum of the failed sector. thus, that readpage is
- * expected to fail.
- * that is the point where on-the-fly error correction will kick in
- * (once it's finished) and rewrite the failed sector if a good copy
- * can be found.
- */
- ret = iterate_inodes_from_logical(fixup->logical, fixup->root->fs_info,
- path, scrub_fixup_readpage,
- fixup);
- if (ret < 0) {
- uncorrectable = 1;
- goto out;
- }
- WARN_ON(ret != 1);
-
- spin_lock(&sdev->stat_lock);
- ++sdev->stat.corrected_errors;
- spin_unlock(&sdev->stat_lock);
-
-out:
- if (trans && !IS_ERR(trans))
- btrfs_end_transaction(trans, fixup->root);
- if (uncorrectable) {
- spin_lock(&sdev->stat_lock);
- ++sdev->stat.uncorrectable_errors;
- spin_unlock(&sdev->stat_lock);
- printk_ratelimited(KERN_ERR
- "btrfs: unable to fixup (nodatasum) error at logical %llu on dev %s\n",
- (unsigned long long)fixup->logical, sdev->dev->name);
- }
-
- btrfs_free_path(path);
- kfree(fixup);
-
- /* see caller why we're pretending to be paused in the scrub counters */
- mutex_lock(&fs_info->scrub_lock);
- atomic_dec(&fs_info->scrubs_running);
- atomic_dec(&fs_info->scrubs_paused);
- mutex_unlock(&fs_info->scrub_lock);
- atomic_dec(&sdev->fixup_cnt);
- wake_up(&fs_info->scrub_pause_wait);
- wake_up(&sdev->list_wait);
-}
-
-/*
- * scrub_handle_errored_block gets called when either verification of the
- * pages failed or the bio failed to read, e.g. with EIO. In the latter
- * case, this function handles all pages in the bio, even though only one
- * may be bad.
- * The goal of this function is to repair the errored block by using the
- * contents of one of the mirrors.
- */
-static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
-{
- struct scrub_dev *sdev = sblock_to_check->sdev;
- struct btrfs_fs_info *fs_info;
- u64 length;
- u64 logical;
- u64 generation;
- unsigned int failed_mirror_index;
- unsigned int is_metadata;
- unsigned int have_csum;
- u8 *csum;
- struct scrub_block *sblocks_for_recheck; /* holds one for each mirror */
- struct scrub_block *sblock_bad;
- int ret;
- int mirror_index;
- int page_num;
- int success;
- static DEFINE_RATELIMIT_STATE(_rs, DEFAULT_RATELIMIT_INTERVAL,
- DEFAULT_RATELIMIT_BURST);
-
- BUG_ON(sblock_to_check->page_count < 1);
- fs_info = sdev->dev->dev_root->fs_info;
- length = sblock_to_check->page_count * PAGE_SIZE;
- logical = sblock_to_check->pagev[0].logical;
- generation = sblock_to_check->pagev[0].generation;
- BUG_ON(sblock_to_check->pagev[0].mirror_num < 1);
- failed_mirror_index = sblock_to_check->pagev[0].mirror_num - 1;
- is_metadata = !(sblock_to_check->pagev[0].flags &
- BTRFS_EXTENT_FLAG_DATA);
- have_csum = sblock_to_check->pagev[0].have_csum;
- csum = sblock_to_check->pagev[0].csum;
-
- /*
- * read all mirrors one after the other. This includes to
- * re-read the extent or metadata block that failed (that was
- * the cause that this fixup code is called) another time,
- * page by page this time in order to know which pages
- * caused I/O errors and which ones are good (for all mirrors).
- * It is the goal to handle the situation when more than one
- * mirror contains I/O errors, but the errors do not
- * overlap, i.e. the data can be repaired by selecting the
- * pages from those mirrors without I/O error on the
- * particular pages. One example (with blocks >= 2 * PAGE_SIZE)
- * would be that mirror #1 has an I/O error on the first page,
- * the second page is good, and mirror #2 has an I/O error on
- * the second page, but the first page is good.
- * Then the first page of the first mirror can be repaired by
- * taking the first page of the second mirror, and the
- * second page of the second mirror can be repaired by
- * copying the contents of the 2nd page of the 1st mirror.
- * One more note: if the pages of one mirror contain I/O
- * errors, the checksum cannot be verified. In order to get
- * the best data for repairing, the first attempt is to find
- * a mirror without I/O errors and with a validated checksum.
- * Only if this is not possible, the pages are picked from
- * mirrors with I/O errors without considering the checksum.
- * If the latter is the case, at the end, the checksum of the
- * repaired area is verified in order to correctly maintain
- * the statistics.
- */
-
- sblocks_for_recheck = kzalloc(BTRFS_MAX_MIRRORS *
- sizeof(*sblocks_for_recheck),
- GFP_NOFS);
- if (!sblocks_for_recheck) {
- spin_lock(&sdev->stat_lock);
- sdev->stat.malloc_errors++;
- sdev->stat.read_errors++;
- sdev->stat.uncorrectable_errors++;
- spin_unlock(&sdev->stat_lock);
- goto out;
- }
-
- /* setup the context, map the logical blocks and alloc the pages */
- ret = scrub_setup_recheck_block(sdev, &fs_info->mapping_tree, length,
- logical, sblocks_for_recheck);
- if (ret) {
- spin_lock(&sdev->stat_lock);
- sdev->stat.read_errors++;
- sdev->stat.uncorrectable_errors++;
- spin_unlock(&sdev->stat_lock);
- goto out;
- }
- BUG_ON(failed_mirror_index >= BTRFS_MAX_MIRRORS);
- sblock_bad = sblocks_for_recheck + failed_mirror_index;
-
- /* build and submit the bios for the failed mirror, check checksums */
- ret = scrub_recheck_block(fs_info, sblock_bad, is_metadata, have_csum,
- csum, generation, sdev->csum_size);
- if (ret) {
- spin_lock(&sdev->stat_lock);
- sdev->stat.read_errors++;
- sdev->stat.uncorrectable_errors++;
- spin_unlock(&sdev->stat_lock);
- goto out;
- }
-
- if (!sblock_bad->header_error && !sblock_bad->checksum_error &&
- sblock_bad->no_io_error_seen) {
- /*
- * the error disappeared after reading page by page, or
- * the area was part of a huge bio and other parts of the
- * bio caused I/O errors, or the block layer merged several
- * read requests into one and the error is caused by a
- * different bio (usually one of the two latter cases is
- * the cause)
- */
- spin_lock(&sdev->stat_lock);
- sdev->stat.unverified_errors++;
- spin_unlock(&sdev->stat_lock);
-
- goto out;
- }
-
- if (!sblock_bad->no_io_error_seen) {
- spin_lock(&sdev->stat_lock);
- sdev->stat.read_errors++;
- spin_unlock(&sdev->stat_lock);
- if (__ratelimit(&_rs))
- scrub_print_warning("i/o error", sblock_to_check);
- } else if (sblock_bad->checksum_error) {
- spin_lock(&sdev->stat_lock);
- sdev->stat.csum_errors++;
- spin_unlock(&sdev->stat_lock);
- if (__ratelimit(&_rs))
- scrub_print_warning("checksum error", sblock_to_check);
- } else if (sblock_bad->header_error) {
- spin_lock(&sdev->stat_lock);
- sdev->stat.verify_errors++;
- spin_unlock(&sdev->stat_lock);
- if (__ratelimit(&_rs))
- scrub_print_warning("checksum/header error",
- sblock_to_check);
- }
-
- if (sdev->readonly)
- goto did_not_correct_error;
-
- if (!is_metadata && !have_csum) {
- struct scrub_fixup_nodatasum *fixup_nodatasum;
-
- /*
- * !is_metadata and !have_csum, this means that the data
- * might not be COW'ed, that it might be modified
- * concurrently. The general strategy to work on the
- * commit root does not help in the case when COW is not
- * used.
- */
- fixup_nodatasum = kzalloc(sizeof(*fixup_nodatasum), GFP_NOFS);
- if (!fixup_nodatasum)
- goto did_not_correct_error;
- fixup_nodatasum->sdev = sdev;
- fixup_nodatasum->logical = logical;
- fixup_nodatasum->root = fs_info->extent_root;
- fixup_nodatasum->mirror_num = failed_mirror_index + 1;
- /*
- * increment scrubs_running to prevent cancel requests from
- * completing as long as a fixup worker is running. we must also
- * increment scrubs_paused to prevent deadlocking on pause
- * requests used for transactions commits (as the worker uses a
- * transaction context). it is safe to regard the fixup worker
- * as paused for all matters practical. effectively, we only
- * avoid cancellation requests from completing.
- */
- mutex_lock(&fs_info->scrub_lock);
- atomic_inc(&fs_info->scrubs_running);
- atomic_inc(&fs_info->scrubs_paused);
- mutex_unlock(&fs_info->scrub_lock);
- atomic_inc(&sdev->fixup_cnt);
- fixup_nodatasum->work.func = scrub_fixup_nodatasum;
- btrfs_queue_worker(&fs_info->scrub_workers,
- &fixup_nodatasum->work);
- goto out;
- }
-
- /*
- * now build and submit the bios for the other mirrors, check
- * checksums
- */
- for (mirror_index = 0;
- mirror_index < BTRFS_MAX_MIRRORS &&
- sblocks_for_recheck[mirror_index].page_count > 0;
- mirror_index++) {
- if (mirror_index == failed_mirror_index)
- continue;
-
- /* build and submit the bios, check checksums */
- ret = scrub_recheck_block(fs_info,
- sblocks_for_recheck + mirror_index,
- is_metadata, have_csum, csum,
- generation, sdev->csum_size);
- if (ret)
- goto did_not_correct_error;
- }
-
- /*
- * first try to pick the mirror which is completely without I/O
- * errors and also does not have a checksum error.
- * If one is found, and if a checksum is present, the full block
- * that is known to contain an error is rewritten. Afterwards
- * the block is known to be corrected.
- * If a mirror is found which is completely correct, and no
- * checksum is present, only those pages are rewritten that had
- * an I/O error in the block to be repaired, since it cannot be
- * determined, which copy of the other pages is better (and it
- * could happen otherwise that a correct page would be
- * overwritten by a bad one).
- */
- for (mirror_index = 0;
- mirror_index < BTRFS_MAX_MIRRORS &&
- sblocks_for_recheck[mirror_index].page_count > 0;
- mirror_index++) {
- struct scrub_block *sblock_other = sblocks_for_recheck +
- mirror_index;
-
- if (!sblock_other->header_error &&
- !sblock_other->checksum_error &&
- sblock_other->no_io_error_seen) {
- int force_write = is_metadata || have_csum;
-
- ret = scrub_repair_block_from_good_copy(sblock_bad,
- sblock_other,
- force_write);
- if (0 == ret)
- goto corrected_error;
- }
- }
-
- /*
- * in case of I/O errors in the area that is supposed to be
- * repaired, continue by picking good copies of those pages.
- * Select the good pages from mirrors to rewrite bad pages from
- * the area to fix. Afterwards verify the checksum of the block
- * that is supposed to be repaired. This verification step is
- * only done for the purpose of statistic counting and for the
- * final scrub report, whether errors remain.
- * A perfect algorithm could make use of the checksum and try
- * all possible combinations of pages from the different mirrors
- * until the checksum verification succeeds. For example, when
- * the 2nd page of mirror #1 faces I/O errors, and the 2nd page
- * of mirror #2 is readable but the final checksum test fails,
- * then the 2nd page of mirror #3 could be tried, whether now
- * the final checksum succeedes. But this would be a rare
- * exception and is therefore not implemented. At least it is
- * avoided that the good copy is overwritten.
- * A more useful improvement would be to pick the sectors
- * without I/O error based on sector sizes (512 bytes on legacy
- * disks) instead of on PAGE_SIZE. Then maybe 512 byte of one
- * mirror could be repaired by taking 512 byte of a different
- * mirror, even if other 512 byte sectors in the same PAGE_SIZE
- * area are unreadable.
- */
-
- /* can only fix I/O errors from here on */
- if (sblock_bad->no_io_error_seen)
- goto did_not_correct_error;
-
- success = 1;
- for (page_num = 0; page_num < sblock_bad->page_count; page_num++) {
- struct scrub_page *page_bad = sblock_bad->pagev + page_num;
-
- if (!page_bad->io_error)
- continue;
-
- for (mirror_index = 0;
- mirror_index < BTRFS_MAX_MIRRORS &&
- sblocks_for_recheck[mirror_index].page_count > 0;
- mirror_index++) {
- struct scrub_block *sblock_other = sblocks_for_recheck +
- mirror_index;
- struct scrub_page *page_other = sblock_other->pagev +
- page_num;
-
- if (!page_other->io_error) {
- ret = scrub_repair_page_from_good_copy(
- sblock_bad, sblock_other, page_num, 0);
- if (0 == ret) {
- page_bad->io_error = 0;
- break; /* succeeded for this page */
- }
- }
- }
-
- if (page_bad->io_error) {
- /* did not find a mirror to copy the page from */
- success = 0;
- }
- }
-
- if (success) {
- if (is_metadata || have_csum) {
- /*
- * need to verify the checksum now that all
- * sectors on disk are repaired (the write
- * request for data to be repaired is on its way).
- * Just be lazy and use scrub_recheck_block()
- * which re-reads the data before the checksum
- * is verified, but most likely the data comes out
- * of the page cache.
- */
- ret = scrub_recheck_block(fs_info, sblock_bad,
- is_metadata, have_csum, csum,
- generation, sdev->csum_size);
- if (!ret && !sblock_bad->header_error &&
- !sblock_bad->checksum_error &&
- sblock_bad->no_io_error_seen)
- goto corrected_error;
- else
- goto did_not_correct_error;
- } else {
-corrected_error:
- spin_lock(&sdev->stat_lock);
- sdev->stat.corrected_errors++;
- spin_unlock(&sdev->stat_lock);
- printk_ratelimited(KERN_ERR
- "btrfs: fixed up error at logical %llu on dev %s\n",
- (unsigned long long)logical, sdev->dev->name);
- }
- } else {
-did_not_correct_error:
- spin_lock(&sdev->stat_lock);
- sdev->stat.uncorrectable_errors++;
- spin_unlock(&sdev->stat_lock);
- printk_ratelimited(KERN_ERR
- "btrfs: unable to fixup (regular) error at logical %llu on dev %s\n",
- (unsigned long long)logical, sdev->dev->name);
- }
-
-out:
- if (sblocks_for_recheck) {
- for (mirror_index = 0; mirror_index < BTRFS_MAX_MIRRORS;
- mirror_index++) {
- struct scrub_block *sblock = sblocks_for_recheck +
- mirror_index;
- int page_index;
-
- for (page_index = 0; page_index < SCRUB_PAGES_PER_BIO;
- page_index++)
- if (sblock->pagev[page_index].page)
- __free_page(
- sblock->pagev[page_index].page);
- }
- kfree(sblocks_for_recheck);
- }
-
- return 0;
-}
-
-static int scrub_setup_recheck_block(struct scrub_dev *sdev,
- struct btrfs_mapping_tree *map_tree,
- u64 length, u64 logical,
- struct scrub_block *sblocks_for_recheck)
-{
- int page_index;
- int mirror_index;
- int ret;
-
- /*
- * note: the three members sdev, ref_count and outstanding_pages
- * are not used (and not set) in the blocks that are used for
- * the recheck procedure
- */
-
- page_index = 0;
- while (length > 0) {
- u64 sublen = min_t(u64, length, PAGE_SIZE);
- u64 mapped_length = sublen;
- struct btrfs_bio *bbio = NULL;
-
- /*
- * with a length of PAGE_SIZE, each returned stripe
- * represents one mirror
- */
- ret = btrfs_map_block(map_tree, WRITE, logical, &mapped_length,
- &bbio, 0);
- if (ret || !bbio || mapped_length < sublen) {
- kfree(bbio);
- return -EIO;
- }
-
- BUG_ON(page_index >= SCRUB_PAGES_PER_BIO);
- for (mirror_index = 0; mirror_index < (int)bbio->num_stripes;
- mirror_index++) {
- struct scrub_block *sblock;
- struct scrub_page *page;
-
- if (mirror_index >= BTRFS_MAX_MIRRORS)
- continue;
-
- sblock = sblocks_for_recheck + mirror_index;
- page = sblock->pagev + page_index;
- page->logical = logical;
- page->physical = bbio->stripes[mirror_index].physical;
- /* for missing devices, bdev is NULL */
- page->bdev = bbio->stripes[mirror_index].dev->bdev;
- page->mirror_num = mirror_index + 1;
- page->page = alloc_page(GFP_NOFS);
- if (!page->page) {
- spin_lock(&sdev->stat_lock);
- sdev->stat.malloc_errors++;
- spin_unlock(&sdev->stat_lock);
- return -ENOMEM;
- }
- sblock->page_count++;
- }
- kfree(bbio);
- length -= sublen;
- logical += sublen;
- page_index++;
- }
-
- return 0;
-}
-
-/*
- * this function will check the on disk data for checksum errors, header
- * errors and read I/O errors. If any I/O errors happen, the exact pages
- * which are errored are marked as being bad. The goal is to enable scrub
- * to take those pages that are not errored from all the mirrors so that
- * the pages that are errored in the just handled mirror can be repaired.
- */
-static int scrub_recheck_block(struct btrfs_fs_info *fs_info,
- struct scrub_block *sblock, int is_metadata,
- int have_csum, u8 *csum, u64 generation,
- u16 csum_size)
-{
- int page_num;
-
- sblock->no_io_error_seen = 1;
- sblock->header_error = 0;
- sblock->checksum_error = 0;
-
- for (page_num = 0; page_num < sblock->page_count; page_num++) {
- struct bio *bio;
- int ret;
- struct scrub_page *page = sblock->pagev + page_num;
- DECLARE_COMPLETION_ONSTACK(complete);
-
- if (page->bdev == NULL) {
- page->io_error = 1;
- sblock->no_io_error_seen = 0;
- continue;
- }
-
- BUG_ON(!page->page);
- bio = bio_alloc(GFP_NOFS, 1);
- if (!bio)
- return -EIO;
- bio->bi_bdev = page->bdev;
- bio->bi_sector = page->physical >> 9;
- bio->bi_end_io = scrub_complete_bio_end_io;
- bio->bi_private = &complete;
-
- ret = bio_add_page(bio, page->page, PAGE_SIZE, 0);
- if (PAGE_SIZE != ret) {
- bio_put(bio);
- return -EIO;
- }
- btrfsic_submit_bio(READ, bio);
-
- /* this will also unplug the queue */
- wait_for_completion(&complete);
-
- page->io_error = !test_bit(BIO_UPTODATE, &bio->bi_flags);
- if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
- sblock->no_io_error_seen = 0;
- bio_put(bio);
- }
-
- if (sblock->no_io_error_seen)
- scrub_recheck_block_checksum(fs_info, sblock, is_metadata,
- have_csum, csum, generation,
- csum_size);
-
- return 0;
-}
-
-static void scrub_recheck_block_checksum(struct btrfs_fs_info *fs_info,
- struct scrub_block *sblock,
- int is_metadata, int have_csum,
- const u8 *csum, u64 generation,
- u16 csum_size)
-{
- int page_num;
- u8 calculated_csum[BTRFS_CSUM_SIZE];
- u32 crc = ~(u32)0;
- struct btrfs_root *root = fs_info->extent_root;
- void *mapped_buffer;
-
- BUG_ON(!sblock->pagev[0].page);
- if (is_metadata) {
- struct btrfs_header *h;
-
- mapped_buffer = kmap_atomic(sblock->pagev[0].page);
- h = (struct btrfs_header *)mapped_buffer;
-
- if (sblock->pagev[0].logical != le64_to_cpu(h->bytenr) ||
- generation != le64_to_cpu(h->generation) ||
- memcmp(h->fsid, fs_info->fsid, BTRFS_UUID_SIZE) ||
- memcmp(h->chunk_tree_uuid, fs_info->chunk_tree_uuid,
- BTRFS_UUID_SIZE))
- sblock->header_error = 1;
- csum = h->csum;
- } else {
- if (!have_csum)
- return;
-
- mapped_buffer = kmap_atomic(sblock->pagev[0].page);
- }
-
- for (page_num = 0;;) {
- if (page_num == 0 && is_metadata)
- crc = btrfs_csum_data(root,
- ((u8 *)mapped_buffer) + BTRFS_CSUM_SIZE,
- crc, PAGE_SIZE - BTRFS_CSUM_SIZE);
- else
- crc = btrfs_csum_data(root, mapped_buffer, crc,
- PAGE_SIZE);
-
- kunmap_atomic(mapped_buffer);
- page_num++;
- if (page_num >= sblock->page_count)
- break;
- BUG_ON(!sblock->pagev[page_num].page);
-
- mapped_buffer = kmap_atomic(sblock->pagev[page_num].page);
- }
-
- btrfs_csum_final(crc, calculated_csum);
- if (memcmp(calculated_csum, csum, csum_size))
- sblock->checksum_error = 1;
-}
-
-static void scrub_complete_bio_end_io(struct bio *bio, int err)
-{
- complete((struct completion *)bio->bi_private);
-}
-
-static int scrub_repair_block_from_good_copy(struct scrub_block *sblock_bad,
- struct scrub_block *sblock_good,
- int force_write)
-{
- int page_num;
- int ret = 0;
-
- for (page_num = 0; page_num < sblock_bad->page_count; page_num++) {
- int ret_sub;
-
- ret_sub = scrub_repair_page_from_good_copy(sblock_bad,
- sblock_good,
- page_num,
- force_write);
- if (ret_sub)
- ret = ret_sub;
- }
-
- return ret;
-}
-
-static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad,
- struct scrub_block *sblock_good,
- int page_num, int force_write)
-{
- struct scrub_page *page_bad = sblock_bad->pagev + page_num;
- struct scrub_page *page_good = sblock_good->pagev + page_num;
-
- BUG_ON(sblock_bad->pagev[page_num].page == NULL);
- BUG_ON(sblock_good->pagev[page_num].page == NULL);
- if (force_write || sblock_bad->header_error ||
- sblock_bad->checksum_error || page_bad->io_error) {
- struct bio *bio;
- int ret;
- DECLARE_COMPLETION_ONSTACK(complete);
-
- bio = bio_alloc(GFP_NOFS, 1);
- if (!bio)
- return -EIO;
- bio->bi_bdev = page_bad->bdev;
- bio->bi_sector = page_bad->physical >> 9;
- bio->bi_end_io = scrub_complete_bio_end_io;
- bio->bi_private = &complete;
-
- ret = bio_add_page(bio, page_good->page, PAGE_SIZE, 0);
- if (PAGE_SIZE != ret) {
- bio_put(bio);
- return -EIO;
- }
- btrfsic_submit_bio(WRITE, bio);
-
- /* this will also unplug the queue */
- wait_for_completion(&complete);
- bio_put(bio);
- }
-
- return 0;
-}
-
-static void scrub_checksum(struct scrub_block *sblock)
-{
- u64 flags;
- int ret;
-
- BUG_ON(sblock->page_count < 1);
- flags = sblock->pagev[0].flags;
- ret = 0;
- if (flags & BTRFS_EXTENT_FLAG_DATA)
- ret = scrub_checksum_data(sblock);
- else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
- ret = scrub_checksum_tree_block(sblock);
- else if (flags & BTRFS_EXTENT_FLAG_SUPER)
- (void)scrub_checksum_super(sblock);
- else
- WARN_ON(1);
- if (ret)
- scrub_handle_errored_block(sblock);
-}
-
-static int scrub_checksum_data(struct scrub_block *sblock)
-{
- struct scrub_dev *sdev = sblock->sdev;
- u8 csum[BTRFS_CSUM_SIZE];
- u8 *on_disk_csum;
- struct page *page;
- void *buffer;
- u32 crc = ~(u32)0;
- int fail = 0;
- struct btrfs_root *root = sdev->dev->dev_root;
- u64 len;
- int index;
-
- BUG_ON(sblock->page_count < 1);
- if (!sblock->pagev[0].have_csum)
- return 0;
-
- on_disk_csum = sblock->pagev[0].csum;
- page = sblock->pagev[0].page;
- buffer = kmap_atomic(page);
-
- len = sdev->sectorsize;
- index = 0;
- for (;;) {
- u64 l = min_t(u64, len, PAGE_SIZE);
-
- crc = btrfs_csum_data(root, buffer, crc, l);
- kunmap_atomic(buffer);
- len -= l;
- if (len == 0)
- break;
- index++;
- BUG_ON(index >= sblock->page_count);
- BUG_ON(!sblock->pagev[index].page);
- page = sblock->pagev[index].page;
- buffer = kmap_atomic(page);
- }
-
- btrfs_csum_final(crc, csum);
- if (memcmp(csum, on_disk_csum, sdev->csum_size))
- fail = 1;
-
- return fail;
-}
-
-static int scrub_checksum_tree_block(struct scrub_block *sblock)
-{
- struct scrub_dev *sdev = sblock->sdev;
- struct btrfs_header *h;
- struct btrfs_root *root = sdev->dev->dev_root;
- struct btrfs_fs_info *fs_info = root->fs_info;
- u8 calculated_csum[BTRFS_CSUM_SIZE];
- u8 on_disk_csum[BTRFS_CSUM_SIZE];
- struct page *page;
- void *mapped_buffer;
- u64 mapped_size;
- void *p;
- u32 crc = ~(u32)0;
- int fail = 0;
- int crc_fail = 0;
- u64 len;
- int index;
-
- BUG_ON(sblock->page_count < 1);
- page = sblock->pagev[0].page;
- mapped_buffer = kmap_atomic(page);
- h = (struct btrfs_header *)mapped_buffer;
- memcpy(on_disk_csum, h->csum, sdev->csum_size);
-
- /*
- * we don't use the getter functions here, as we
- * a) don't have an extent buffer and
- * b) the page is already kmapped
- */
-
- if (sblock->pagev[0].logical != le64_to_cpu(h->bytenr))
- ++fail;
-
- if (sblock->pagev[0].generation != le64_to_cpu(h->generation))
- ++fail;
-
- if (memcmp(h->fsid, fs_info->fsid, BTRFS_UUID_SIZE))
- ++fail;
-
- if (memcmp(h->chunk_tree_uuid, fs_info->chunk_tree_uuid,
- BTRFS_UUID_SIZE))
- ++fail;
-
- BUG_ON(sdev->nodesize != sdev->leafsize);
- len = sdev->nodesize - BTRFS_CSUM_SIZE;
- mapped_size = PAGE_SIZE - BTRFS_CSUM_SIZE;
- p = ((u8 *)mapped_buffer) + BTRFS_CSUM_SIZE;
- index = 0;
- for (;;) {
- u64 l = min_t(u64, len, mapped_size);
-
- crc = btrfs_csum_data(root, p, crc, l);
- kunmap_atomic(mapped_buffer);
- len -= l;
- if (len == 0)
- break;
- index++;
- BUG_ON(index >= sblock->page_count);
- BUG_ON(!sblock->pagev[index].page);
- page = sblock->pagev[index].page;
- mapped_buffer = kmap_atomic(page);
- mapped_size = PAGE_SIZE;
- p = mapped_buffer;
- }
-
- btrfs_csum_final(crc, calculated_csum);
- if (memcmp(calculated_csum, on_disk_csum, sdev->csum_size))
- ++crc_fail;
-
- return fail || crc_fail;
-}
-
-static int scrub_checksum_super(struct scrub_block *sblock)
-{
- struct btrfs_super_block *s;
- struct scrub_dev *sdev = sblock->sdev;
- struct btrfs_root *root = sdev->dev->dev_root;
- struct btrfs_fs_info *fs_info = root->fs_info;
- u8 calculated_csum[BTRFS_CSUM_SIZE];
- u8 on_disk_csum[BTRFS_CSUM_SIZE];
- struct page *page;
- void *mapped_buffer;
- u64 mapped_size;
- void *p;
- u32 crc = ~(u32)0;
- int fail = 0;
- u64 len;
- int index;
-
- BUG_ON(sblock->page_count < 1);
- page = sblock->pagev[0].page;
- mapped_buffer = kmap_atomic(page);
- s = (struct btrfs_super_block *)mapped_buffer;
- memcpy(on_disk_csum, s->csum, sdev->csum_size);
-
- if (sblock->pagev[0].logical != le64_to_cpu(s->bytenr))
- ++fail;
-
- if (sblock->pagev[0].generation != le64_to_cpu(s->generation))
- ++fail;
-
- if (memcmp(s->fsid, fs_info->fsid, BTRFS_UUID_SIZE))
- ++fail;
-
- len = BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE;
- mapped_size = PAGE_SIZE - BTRFS_CSUM_SIZE;
- p = ((u8 *)mapped_buffer) + BTRFS_CSUM_SIZE;
- index = 0;
- for (;;) {
- u64 l = min_t(u64, len, mapped_size);
-
- crc = btrfs_csum_data(root, p, crc, l);
- kunmap_atomic(mapped_buffer);
- len -= l;
- if (len == 0)
- break;
- index++;
- BUG_ON(index >= sblock->page_count);
- BUG_ON(!sblock->pagev[index].page);
- page = sblock->pagev[index].page;
- mapped_buffer = kmap_atomic(page);
- mapped_size = PAGE_SIZE;
- p = mapped_buffer;
- }
-
- btrfs_csum_final(crc, calculated_csum);
- if (memcmp(calculated_csum, on_disk_csum, sdev->csum_size))
- ++fail;
-
- if (fail) {
- /*
- * if we find an error in a super block, we just report it.
- * They will get written with the next transaction commit
- * anyway
- */
- spin_lock(&sdev->stat_lock);
- ++sdev->stat.super_errors;
- spin_unlock(&sdev->stat_lock);
- }
-
- return fail;
-}
-
-static void scrub_block_get(struct scrub_block *sblock)
-{
- atomic_inc(&sblock->ref_count);
-}
-
-static void scrub_block_put(struct scrub_block *sblock)
-{
- if (atomic_dec_and_test(&sblock->ref_count)) {
- int i;
-
- for (i = 0; i < sblock->page_count; i++)
- if (sblock->pagev[i].page)
- __free_page(sblock->pagev[i].page);
- kfree(sblock);
- }
-}
-
-static void scrub_submit(struct scrub_dev *sdev)
-{
- struct scrub_bio *sbio;
-
- if (sdev->curr == -1)
- return;
-
- sbio = sdev->bios[sdev->curr];
- sdev->curr = -1;
- atomic_inc(&sdev->in_flight);
-
- btrfsic_submit_bio(READ, sbio->bio);
-}
-
-static int scrub_add_page_to_bio(struct scrub_dev *sdev,
- struct scrub_page *spage)
-{
- struct scrub_block *sblock = spage->sblock;
- struct scrub_bio *sbio;
- int ret;
-
-again:
- /*
- * grab a fresh bio or wait for one to become available
- */
- while (sdev->curr == -1) {
- spin_lock(&sdev->list_lock);
- sdev->curr = sdev->first_free;
- if (sdev->curr != -1) {
- sdev->first_free = sdev->bios[sdev->curr]->next_free;
- sdev->bios[sdev->curr]->next_free = -1;
- sdev->bios[sdev->curr]->page_count = 0;
- spin_unlock(&sdev->list_lock);
- } else {
- spin_unlock(&sdev->list_lock);
- wait_event(sdev->list_wait, sdev->first_free != -1);
- }
- }
- sbio = sdev->bios[sdev->curr];
- if (sbio->page_count == 0) {
- struct bio *bio;
-
- sbio->physical = spage->physical;
- sbio->logical = spage->logical;
- bio = sbio->bio;
- if (!bio) {
- bio = bio_alloc(GFP_NOFS, sdev->pages_per_bio);
- if (!bio)
- return -ENOMEM;
- sbio->bio = bio;
- }
-
- bio->bi_private = sbio;
- bio->bi_end_io = scrub_bio_end_io;
- bio->bi_bdev = sdev->dev->bdev;
- bio->bi_sector = spage->physical >> 9;
- sbio->err = 0;
- } else if (sbio->physical + sbio->page_count * PAGE_SIZE !=
- spage->physical ||
- sbio->logical + sbio->page_count * PAGE_SIZE !=
- spage->logical) {
- scrub_submit(sdev);
- goto again;
- }
-
- sbio->pagev[sbio->page_count] = spage;
- ret = bio_add_page(sbio->bio, spage->page, PAGE_SIZE, 0);
- if (ret != PAGE_SIZE) {
- if (sbio->page_count < 1) {
- bio_put(sbio->bio);
- sbio->bio = NULL;
- return -EIO;
- }
- scrub_submit(sdev);
- goto again;
- }
-
- scrub_block_get(sblock); /* one for the added page */
- atomic_inc(&sblock->outstanding_pages);
- sbio->page_count++;
- if (sbio->page_count == sdev->pages_per_bio)
- scrub_submit(sdev);
-
- return 0;
-}
-
-static int scrub_pages(struct scrub_dev *sdev, u64 logical, u64 len,
- u64 physical, u64 flags, u64 gen, int mirror_num,
- u8 *csum, int force)
-{
- struct scrub_block *sblock;
- int index;
-
- sblock = kzalloc(sizeof(*sblock), GFP_NOFS);
- if (!sblock) {
- spin_lock(&sdev->stat_lock);
- sdev->stat.malloc_errors++;
- spin_unlock(&sdev->stat_lock);
- return -ENOMEM;
- }
-
- /* one ref inside this function, plus one for each page later on */
- atomic_set(&sblock->ref_count, 1);
- sblock->sdev = sdev;
- sblock->no_io_error_seen = 1;
-
- for (index = 0; len > 0; index++) {
- struct scrub_page *spage = sblock->pagev + index;
- u64 l = min_t(u64, len, PAGE_SIZE);
-
- BUG_ON(index >= SCRUB_MAX_PAGES_PER_BLOCK);
- spage->page = alloc_page(GFP_NOFS);
- if (!spage->page) {
- spin_lock(&sdev->stat_lock);
- sdev->stat.malloc_errors++;
- spin_unlock(&sdev->stat_lock);
- while (index > 0) {
- index--;
- __free_page(sblock->pagev[index].page);
- }
- kfree(sblock);
- return -ENOMEM;
- }
- spage->sblock = sblock;
- spage->bdev = sdev->dev->bdev;
- spage->flags = flags;
- spage->generation = gen;
- spage->logical = logical;
- spage->physical = physical;
- spage->mirror_num = mirror_num;
- if (csum) {
- spage->have_csum = 1;
- memcpy(spage->csum, csum, sdev->csum_size);
- } else {
- spage->have_csum = 0;
- }
- sblock->page_count++;
- len -= l;
- logical += l;
- physical += l;
- }
-
- BUG_ON(sblock->page_count == 0);
- for (index = 0; index < sblock->page_count; index++) {
- struct scrub_page *spage = sblock->pagev + index;
- int ret;
-
- ret = scrub_add_page_to_bio(sdev, spage);
- if (ret) {
- scrub_block_put(sblock);
- return ret;
- }
- }
-
- if (force)
- scrub_submit(sdev);
-
- /* last one frees, either here or in bio completion for last page */
- scrub_block_put(sblock);
- return 0;
-}
-
-static void scrub_bio_end_io(struct bio *bio, int err)
-{
- struct scrub_bio *sbio = bio->bi_private;
- struct scrub_dev *sdev = sbio->sdev;
- struct btrfs_fs_info *fs_info = sdev->dev->dev_root->fs_info;
-
- sbio->err = err;
- sbio->bio = bio;
-
- btrfs_queue_worker(&fs_info->scrub_workers, &sbio->work);
-}
-
-static void scrub_bio_end_io_worker(struct btrfs_work *work)
-{
- struct scrub_bio *sbio = container_of(work, struct scrub_bio, work);
- struct scrub_dev *sdev = sbio->sdev;
- int i;
-
- BUG_ON(sbio->page_count > SCRUB_PAGES_PER_BIO);
- if (sbio->err) {
- for (i = 0; i < sbio->page_count; i++) {
- struct scrub_page *spage = sbio->pagev[i];
-
- spage->io_error = 1;
- spage->sblock->no_io_error_seen = 0;
- }
- }
-
- /* now complete the scrub_block items that have all pages completed */
- for (i = 0; i < sbio->page_count; i++) {
- struct scrub_page *spage = sbio->pagev[i];
- struct scrub_block *sblock = spage->sblock;
-
- if (atomic_dec_and_test(&sblock->outstanding_pages))
- scrub_block_complete(sblock);
- scrub_block_put(sblock);
- }
-
- if (sbio->err) {
- /* what is this good for??? */
- sbio->bio->bi_flags &= ~(BIO_POOL_MASK - 1);
- sbio->bio->bi_flags |= 1 << BIO_UPTODATE;
- sbio->bio->bi_phys_segments = 0;
- sbio->bio->bi_idx = 0;
-
- for (i = 0; i < sbio->page_count; i++) {
- struct bio_vec *bi;
- bi = &sbio->bio->bi_io_vec[i];
- bi->bv_offset = 0;
- bi->bv_len = PAGE_SIZE;
- }
- }
-
- bio_put(sbio->bio);
- sbio->bio = NULL;
- spin_lock(&sdev->list_lock);
- sbio->next_free = sdev->first_free;
- sdev->first_free = sbio->index;
- spin_unlock(&sdev->list_lock);
- atomic_dec(&sdev->in_flight);
- wake_up(&sdev->list_wait);
-}
-
-static void scrub_block_complete(struct scrub_block *sblock)
-{
- if (!sblock->no_io_error_seen)
- scrub_handle_errored_block(sblock);
- else
- scrub_checksum(sblock);
-}
-
-static int scrub_find_csum(struct scrub_dev *sdev, u64 logical, u64 len,
- u8 *csum)
-{
- struct btrfs_ordered_sum *sum = NULL;
- int ret = 0;
- unsigned long i;
- unsigned long num_sectors;
-
- while (!list_empty(&sdev->csum_list)) {
- sum = list_first_entry(&sdev->csum_list,
- struct btrfs_ordered_sum, list);
- if (sum->bytenr > logical)
- return 0;
- if (sum->bytenr + sum->len > logical)
- break;
-
- ++sdev->stat.csum_discards;
- list_del(&sum->list);
- kfree(sum);
- sum = NULL;
- }
- if (!sum)
- return 0;
-
- num_sectors = sum->len / sdev->sectorsize;
- for (i = 0; i < num_sectors; ++i) {
- if (sum->sums[i].bytenr == logical) {
- memcpy(csum, &sum->sums[i].sum, sdev->csum_size);
- ret = 1;
- break;
- }
- }
- if (ret && i == num_sectors - 1) {
- list_del(&sum->list);
- kfree(sum);
- }
- return ret;
-}
-
-/* scrub extent tries to collect up to 64 kB for each bio */
-static int scrub_extent(struct scrub_dev *sdev, u64 logical, u64 len,
- u64 physical, u64 flags, u64 gen, int mirror_num)
-{
- int ret;
- u8 csum[BTRFS_CSUM_SIZE];
- u32 blocksize;
-
- if (flags & BTRFS_EXTENT_FLAG_DATA) {
- blocksize = sdev->sectorsize;
- spin_lock(&sdev->stat_lock);
- sdev->stat.data_extents_scrubbed++;
- sdev->stat.data_bytes_scrubbed += len;
- spin_unlock(&sdev->stat_lock);
- } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
- BUG_ON(sdev->nodesize != sdev->leafsize);
- blocksize = sdev->nodesize;
- spin_lock(&sdev->stat_lock);
- sdev->stat.tree_extents_scrubbed++;
- sdev->stat.tree_bytes_scrubbed += len;
- spin_unlock(&sdev->stat_lock);
- } else {
- blocksize = sdev->sectorsize;
- BUG_ON(1);
- }
-
- while (len) {
- u64 l = min_t(u64, len, blocksize);
- int have_csum = 0;
-
- if (flags & BTRFS_EXTENT_FLAG_DATA) {
- /* push csums to sbio */
- have_csum = scrub_find_csum(sdev, logical, l, csum);
- if (have_csum == 0)
- ++sdev->stat.no_csum;
- }
- ret = scrub_pages(sdev, logical, l, physical, flags, gen,
- mirror_num, have_csum ? csum : NULL, 0);
- if (ret)
- return ret;
- len -= l;
- logical += l;
- physical += l;
- }
- return 0;
-}
-
-static noinline_for_stack int scrub_stripe(struct scrub_dev *sdev,
- struct map_lookup *map, int num, u64 base, u64 length)
-{
- struct btrfs_path *path;
- struct btrfs_fs_info *fs_info = sdev->dev->dev_root->fs_info;
- struct btrfs_root *root = fs_info->extent_root;
- struct btrfs_root *csum_root = fs_info->csum_root;
- struct btrfs_extent_item *extent;
- struct blk_plug plug;
- u64 flags;
- int ret;
- int slot;
- int i;
- u64 nstripes;
- struct extent_buffer *l;
- struct btrfs_key key;
- u64 physical;
- u64 logical;
- u64 generation;
- int mirror_num;
- struct reada_control *reada1;
- struct reada_control *reada2;
- struct btrfs_key key_start;
- struct btrfs_key key_end;
-
- u64 increment = map->stripe_len;
- u64 offset;
-
- nstripes = length;
- offset = 0;
- do_div(nstripes, map->stripe_len);
- if (map->type & BTRFS_BLOCK_GROUP_RAID0) {
- offset = map->stripe_len * num;
- increment = map->stripe_len * map->num_stripes;
- mirror_num = 1;
- } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) {
- int factor = map->num_stripes / map->sub_stripes;
- offset = map->stripe_len * (num / map->sub_stripes);
- increment = map->stripe_len * factor;
- mirror_num = num % map->sub_stripes + 1;
- } else if (map->type & BTRFS_BLOCK_GROUP_RAID1) {
- increment = map->stripe_len;
- mirror_num = num % map->num_stripes + 1;
- } else if (map->type & BTRFS_BLOCK_GROUP_DUP) {
- increment = map->stripe_len;
- mirror_num = num % map->num_stripes + 1;
- } else {
- increment = map->stripe_len;
- mirror_num = 1;
- }
-
- path = btrfs_alloc_path();
- if (!path)
- return -ENOMEM;
-
- /*
- * work on commit root. The related disk blocks are static as
- * long as COW is applied. This means, it is save to rewrite
- * them to repair disk errors without any race conditions
- */
- path->search_commit_root = 1;
- path->skip_locking = 1;
-
- /*
- * trigger the readahead for extent tree csum tree and wait for
- * completion. During readahead, the scrub is officially paused
- * to not hold off transaction commits
- */
- logical = base + offset;
-
- wait_event(sdev->list_wait,
- atomic_read(&sdev->in_flight) == 0);
- atomic_inc(&fs_info->scrubs_paused);
- wake_up(&fs_info->scrub_pause_wait);
-
- /* FIXME it might be better to start readahead at commit root */
- key_start.objectid = logical;
- key_start.type = BTRFS_EXTENT_ITEM_KEY;
- key_start.offset = (u64)0;
- key_end.objectid = base + offset + nstripes * increment;
- key_end.type = BTRFS_EXTENT_ITEM_KEY;
- key_end.offset = (u64)0;
- reada1 = btrfs_reada_add(root, &key_start, &key_end);
-
- key_start.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
- key_start.type = BTRFS_EXTENT_CSUM_KEY;
- key_start.offset = logical;
- key_end.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
- key_end.type = BTRFS_EXTENT_CSUM_KEY;
- key_end.offset = base + offset + nstripes * increment;
- reada2 = btrfs_reada_add(csum_root, &key_start, &key_end);
-
- if (!IS_ERR(reada1))
- btrfs_reada_wait(reada1);
- if (!IS_ERR(reada2))
- btrfs_reada_wait(reada2);
-
- mutex_lock(&fs_info->scrub_lock);
- while (atomic_read(&fs_info->scrub_pause_req)) {
- mutex_unlock(&fs_info->scrub_lock);
- wait_event(fs_info->scrub_pause_wait,
- atomic_read(&fs_info->scrub_pause_req) == 0);
- mutex_lock(&fs_info->scrub_lock);
- }
- atomic_dec(&fs_info->scrubs_paused);
- mutex_unlock(&fs_info->scrub_lock);
- wake_up(&fs_info->scrub_pause_wait);
-
- /*
- * collect all data csums for the stripe to avoid seeking during
- * the scrub. This might currently (crc32) end up to be about 1MB
- */
- blk_start_plug(&plug);
-
- /*
- * now find all extents for each stripe and scrub them
- */
- logical = base + offset;
- physical = map->stripes[num].physical;
- ret = 0;
- for (i = 0; i < nstripes; ++i) {
- /*
- * canceled?
- */
- if (atomic_read(&fs_info->scrub_cancel_req) ||
- atomic_read(&sdev->cancel_req)) {
- ret = -ECANCELED;
- goto out;
- }
- /*
- * check to see if we have to pause
- */
- if (atomic_read(&fs_info->scrub_pause_req)) {
- /* push queued extents */
- scrub_submit(sdev);
- wait_event(sdev->list_wait,
- atomic_read(&sdev->in_flight) == 0);
- atomic_inc(&fs_info->scrubs_paused);
- wake_up(&fs_info->scrub_pause_wait);
- mutex_lock(&fs_info->scrub_lock);
- while (atomic_read(&fs_info->scrub_pause_req)) {
- mutex_unlock(&fs_info->scrub_lock);
- wait_event(fs_info->scrub_pause_wait,
- atomic_read(&fs_info->scrub_pause_req) == 0);
- mutex_lock(&fs_info->scrub_lock);
- }
- atomic_dec(&fs_info->scrubs_paused);
- mutex_unlock(&fs_info->scrub_lock);
- wake_up(&fs_info->scrub_pause_wait);
- }
-
- ret = btrfs_lookup_csums_range(csum_root, logical,
- logical + map->stripe_len - 1,
- &sdev->csum_list, 1);
- if (ret)
- goto out;
-
- key.objectid = logical;
- key.type = BTRFS_EXTENT_ITEM_KEY;
- key.offset = (u64)0;
-
- ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
- if (ret < 0)
- goto out;
- if (ret > 0) {
- ret = btrfs_previous_item(root, path, 0,
- BTRFS_EXTENT_ITEM_KEY);
- if (ret < 0)
- goto out;
- if (ret > 0) {
- /* there's no smaller item, so stick with the
- * larger one */
- btrfs_release_path(path);
- ret = btrfs_search_slot(NULL, root, &key,
- path, 0, 0);
- if (ret < 0)
- goto out;
- }
- }
-
- while (1) {
- l = path->nodes[0];
- slot = path->slots[0];
- if (slot >= btrfs_header_nritems(l)) {
- ret = btrfs_next_leaf(root, path);
- if (ret == 0)
- continue;
- if (ret < 0)
- goto out;
-
- break;
- }
- btrfs_item_key_to_cpu(l, &key, slot);
-
- if (key.objectid + key.offset <= logical)
- goto next;
-
- if (key.objectid >= logical + map->stripe_len)
- break;
-
- if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY)
- goto next;
-
- extent = btrfs_item_ptr(l, slot,
- struct btrfs_extent_item);
- flags = btrfs_extent_flags(l, extent);
- generation = btrfs_extent_generation(l, extent);
-
- if (key.objectid < logical &&
- (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
- printk(KERN_ERR
- "btrfs scrub: tree block %llu spanning "
- "stripes, ignored. logical=%llu\n",
- (unsigned long long)key.objectid,
- (unsigned long long)logical);
- goto next;
- }
-
- /*
- * trim extent to this stripe
- */
- if (key.objectid < logical) {
- key.offset -= logical - key.objectid;
- key.objectid = logical;
- }
- if (key.objectid + key.offset >
- logical + map->stripe_len) {
- key.offset = logical + map->stripe_len -
- key.objectid;
- }
-
- ret = scrub_extent(sdev, key.objectid, key.offset,
- key.objectid - logical + physical,
- flags, generation, mirror_num);
- if (ret)
- goto out;
-
-next:
- path->slots[0]++;
- }
- btrfs_release_path(path);
- logical += increment;
- physical += map->stripe_len;
- spin_lock(&sdev->stat_lock);
- sdev->stat.last_physical = physical;
- spin_unlock(&sdev->stat_lock);
- }
- /* push queued extents */
- scrub_submit(sdev);
-
-out:
- blk_finish_plug(&plug);
- btrfs_free_path(path);
- return ret < 0 ? ret : 0;
-}
-
-static noinline_for_stack int scrub_chunk(struct scrub_dev *sdev,
- u64 chunk_tree, u64 chunk_objectid, u64 chunk_offset, u64 length,
- u64 dev_offset)
-{
- struct btrfs_mapping_tree *map_tree =
- &sdev->dev->dev_root->fs_info->mapping_tree;
- struct map_lookup *map;
- struct extent_map *em;
- int i;
- int ret = -EINVAL;
-
- read_lock(&map_tree->map_tree.lock);
- em = lookup_extent_mapping(&map_tree->map_tree, chunk_offset, 1);
- read_unlock(&map_tree->map_tree.lock);
-
- if (!em)
- return -EINVAL;
-
- map = (struct map_lookup *)em->bdev;
- if (em->start != chunk_offset)
- goto out;
-
- if (em->len < length)
- goto out;
-
- for (i = 0; i < map->num_stripes; ++i) {
- if (map->stripes[i].dev == sdev->dev &&
- map->stripes[i].physical == dev_offset) {
- ret = scrub_stripe(sdev, map, i, chunk_offset, length);
- if (ret)
- goto out;
- }
- }
-out:
- free_extent_map(em);
-
- return ret;
-}
-
-static noinline_for_stack
-int scrub_enumerate_chunks(struct scrub_dev *sdev, u64 start, u64 end)
-{
- struct btrfs_dev_extent *dev_extent = NULL;
- struct btrfs_path *path;
- struct btrfs_root *root = sdev->dev->dev_root;
- struct btrfs_fs_info *fs_info = root->fs_info;
- u64 length;
- u64 chunk_tree;
- u64 chunk_objectid;
- u64 chunk_offset;
- int ret;
- int slot;
- struct extent_buffer *l;
- struct btrfs_key key;
- struct btrfs_key found_key;
- struct btrfs_block_group_cache *cache;
-
- path = btrfs_alloc_path();
- if (!path)
- return -ENOMEM;
-
- path->reada = 2;
- path->search_commit_root = 1;
- path->skip_locking = 1;
-
- key.objectid = sdev->dev->devid;
- key.offset = 0ull;
- key.type = BTRFS_DEV_EXTENT_KEY;
-
-
- while (1) {
- ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
- if (ret < 0)
- break;
- if (ret > 0) {
- if (path->slots[0] >=
- btrfs_header_nritems(path->nodes[0])) {
- ret = btrfs_next_leaf(root, path);
- if (ret)
- break;
- }
- }
-
- l = path->nodes[0];
- slot = path->slots[0];
-
- btrfs_item_key_to_cpu(l, &found_key, slot);
-
- if (found_key.objectid != sdev->dev->devid)
- break;
-
- if (btrfs_key_type(&found_key) != BTRFS_DEV_EXTENT_KEY)
- break;
-
- if (found_key.offset >= end)
- break;
-
- if (found_key.offset < key.offset)
- break;
-
- dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent);
- length = btrfs_dev_extent_length(l, dev_extent);
-
- if (found_key.offset + length <= start) {
- key.offset = found_key.offset + length;
- btrfs_release_path(path);
- continue;
- }
-
- chunk_tree = btrfs_dev_extent_chunk_tree(l, dev_extent);
- chunk_objectid = btrfs_dev_extent_chunk_objectid(l, dev_extent);
- chunk_offset = btrfs_dev_extent_chunk_offset(l, dev_extent);
-
- /*
- * get a reference on the corresponding block group to prevent
- * the chunk from going away while we scrub it
- */
- cache = btrfs_lookup_block_group(fs_info, chunk_offset);
- if (!cache) {
- ret = -ENOENT;
- break;
- }
- ret = scrub_chunk(sdev, chunk_tree, chunk_objectid,
- chunk_offset, length, found_key.offset);
- btrfs_put_block_group(cache);
- if (ret)
- break;
-
- key.offset = found_key.offset + length;
- btrfs_release_path(path);
- }
-
- btrfs_free_path(path);
-
- /*
- * ret can still be 1 from search_slot or next_leaf,
- * that's not an error
- */
- return ret < 0 ? ret : 0;
-}
-
-static noinline_for_stack int scrub_supers(struct scrub_dev *sdev)
-{
- int i;
- u64 bytenr;
- u64 gen;
- int ret;
- struct btrfs_device *device = sdev->dev;
- struct btrfs_root *root = device->dev_root;
-
- if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR)
- return -EIO;
-
- gen = root->fs_info->last_trans_committed;
-
- for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
- bytenr = btrfs_sb_offset(i);
- if (bytenr + BTRFS_SUPER_INFO_SIZE > device->total_bytes)
- break;
-
- ret = scrub_pages(sdev, bytenr, BTRFS_SUPER_INFO_SIZE, bytenr,
- BTRFS_EXTENT_FLAG_SUPER, gen, i, NULL, 1);
- if (ret)
- return ret;
- }
- wait_event(sdev->list_wait, atomic_read(&sdev->in_flight) == 0);
-
- return 0;
-}
-
-/*
- * get a reference count on fs_info->scrub_workers. start worker if necessary
- */
-static noinline_for_stack int scrub_workers_get(struct btrfs_root *root)
-{
- struct btrfs_fs_info *fs_info = root->fs_info;
- int ret = 0;
-
- mutex_lock(&fs_info->scrub_lock);
- if (fs_info->scrub_workers_refcnt == 0) {
- btrfs_init_workers(&fs_info->scrub_workers, "scrub",
- fs_info->thread_pool_size, &fs_info->generic_worker);
- fs_info->scrub_workers.idle_thresh = 4;
- ret = btrfs_start_workers(&fs_info->scrub_workers);
- if (ret)
- goto out;
- }
- ++fs_info->scrub_workers_refcnt;
-out:
- mutex_unlock(&fs_info->scrub_lock);
-
- return ret;
-}
-
-static noinline_for_stack void scrub_workers_put(struct btrfs_root *root)
-{
- struct btrfs_fs_info *fs_info = root->fs_info;
-
- mutex_lock(&fs_info->scrub_lock);
- if (--fs_info->scrub_workers_refcnt == 0)
- btrfs_stop_workers(&fs_info->scrub_workers);
- WARN_ON(fs_info->scrub_workers_refcnt < 0);
- mutex_unlock(&fs_info->scrub_lock);
-}
-
-
-int btrfs_scrub_dev(struct btrfs_root *root, u64 devid, u64 start, u64 end,
- struct btrfs_scrub_progress *progress, int readonly)
-{
- struct scrub_dev *sdev;
- struct btrfs_fs_info *fs_info = root->fs_info;
- int ret;
- struct btrfs_device *dev;
-
- if (btrfs_fs_closing(root->fs_info))
- return -EINVAL;
-
- /*
- * check some assumptions
- */
- if (root->nodesize != root->leafsize) {
- printk(KERN_ERR
- "btrfs_scrub: size assumption nodesize == leafsize (%d == %d) fails\n",
- root->nodesize, root->leafsize);
- return -EINVAL;
- }
-
- if (root->nodesize > BTRFS_STRIPE_LEN) {
- /*
- * in this case scrub is unable to calculate the checksum
- * the way scrub is implemented. Do not handle this
- * situation at all because it won't ever happen.
- */
- printk(KERN_ERR
- "btrfs_scrub: size assumption nodesize <= BTRFS_STRIPE_LEN (%d <= %d) fails\n",
- root->nodesize, BTRFS_STRIPE_LEN);
- return -EINVAL;
- }
-
- if (root->sectorsize != PAGE_SIZE) {
- /* not supported for data w/o checksums */
- printk(KERN_ERR
- "btrfs_scrub: size assumption sectorsize != PAGE_SIZE (%d != %lld) fails\n",
- root->sectorsize, (unsigned long long)PAGE_SIZE);
- return -EINVAL;
- }
-
- ret = scrub_workers_get(root);
- if (ret)
- return ret;
-
- mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
- dev = btrfs_find_device(root, devid, NULL, NULL);
- if (!dev || dev->missing) {
- mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
- scrub_workers_put(root);
- return -ENODEV;
- }
- mutex_lock(&fs_info->scrub_lock);
-
- if (!dev->in_fs_metadata) {
- mutex_unlock(&fs_info->scrub_lock);
- mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
- scrub_workers_put(root);
- return -ENODEV;
- }
-
- if (dev->scrub_device) {
- mutex_unlock(&fs_info->scrub_lock);
- mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
- scrub_workers_put(root);
- return -EINPROGRESS;
- }
- sdev = scrub_setup_dev(dev);
- if (IS_ERR(sdev)) {
- mutex_unlock(&fs_info->scrub_lock);
- mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
- scrub_workers_put(root);
- return PTR_ERR(sdev);
- }
- sdev->readonly = readonly;
- dev->scrub_device = sdev;
-
- atomic_inc(&fs_info->scrubs_running);
- mutex_unlock(&fs_info->scrub_lock);
- mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
-
- down_read(&fs_info->scrub_super_lock);
- ret = scrub_supers(sdev);
- up_read(&fs_info->scrub_super_lock);
-
- if (!ret)
- ret = scrub_enumerate_chunks(sdev, start, end);
-
- wait_event(sdev->list_wait, atomic_read(&sdev->in_flight) == 0);
- atomic_dec(&fs_info->scrubs_running);
- wake_up(&fs_info->scrub_pause_wait);
-
- wait_event(sdev->list_wait, atomic_read(&sdev->fixup_cnt) == 0);
-
- if (progress)
- memcpy(progress, &sdev->stat, sizeof(*progress));
-
- mutex_lock(&fs_info->scrub_lock);
- dev->scrub_device = NULL;
- mutex_unlock(&fs_info->scrub_lock);
-
- scrub_free_dev(sdev);
- scrub_workers_put(root);
-
- return ret;
-}
-
-void btrfs_scrub_pause(struct btrfs_root *root)
-{
- struct btrfs_fs_info *fs_info = root->fs_info;
-
- mutex_lock(&fs_info->scrub_lock);
- atomic_inc(&fs_info->scrub_pause_req);
- while (atomic_read(&fs_info->scrubs_paused) !=
- atomic_read(&fs_info->scrubs_running)) {
- mutex_unlock(&fs_info->scrub_lock);
- wait_event(fs_info->scrub_pause_wait,
- atomic_read(&fs_info->scrubs_paused) ==
- atomic_read(&fs_info->scrubs_running));
- mutex_lock(&fs_info->scrub_lock);
- }
- mutex_unlock(&fs_info->scrub_lock);
-}
-
-void btrfs_scrub_continue(struct btrfs_root *root)
-{
- struct btrfs_fs_info *fs_info = root->fs_info;
-
- atomic_dec(&fs_info->scrub_pause_req);
- wake_up(&fs_info->scrub_pause_wait);
-}
-
-void btrfs_scrub_pause_super(struct btrfs_root *root)
-{
- down_write(&root->fs_info->scrub_super_lock);
-}
-
-void btrfs_scrub_continue_super(struct btrfs_root *root)
-{
- up_write(&root->fs_info->scrub_super_lock);
-}
-
-int __btrfs_scrub_cancel(struct btrfs_fs_info *fs_info)
-{
-
- mutex_lock(&fs_info->scrub_lock);
- if (!atomic_read(&fs_info->scrubs_running)) {
- mutex_unlock(&fs_info->scrub_lock);
- return -ENOTCONN;
- }
-
- atomic_inc(&fs_info->scrub_cancel_req);
- while (atomic_read(&fs_info->scrubs_running)) {
- mutex_unlock(&fs_info->scrub_lock);
- wait_event(fs_info->scrub_pause_wait,
- atomic_read(&fs_info->scrubs_running) == 0);
- mutex_lock(&fs_info->scrub_lock);
- }
- atomic_dec(&fs_info->scrub_cancel_req);
- mutex_unlock(&fs_info->scrub_lock);
-
- return 0;
-}
-
-int btrfs_scrub_cancel(struct btrfs_root *root)
-{
- return __btrfs_scrub_cancel(root->fs_info);
-}
-
-int btrfs_scrub_cancel_dev(struct btrfs_root *root, struct btrfs_device *dev)
-{
- struct btrfs_fs_info *fs_info = root->fs_info;
- struct scrub_dev *sdev;
-
- mutex_lock(&fs_info->scrub_lock);
- sdev = dev->scrub_device;
- if (!sdev) {
- mutex_unlock(&fs_info->scrub_lock);
- return -ENOTCONN;
- }
- atomic_inc(&sdev->cancel_req);
- while (dev->scrub_device) {
- mutex_unlock(&fs_info->scrub_lock);
- wait_event(fs_info->scrub_pause_wait,
- dev->scrub_device == NULL);
- mutex_lock(&fs_info->scrub_lock);
- }
- mutex_unlock(&fs_info->scrub_lock);
-
- return 0;
-}
-
-int btrfs_scrub_cancel_devid(struct btrfs_root *root, u64 devid)
-{
- struct btrfs_fs_info *fs_info = root->fs_info;
- struct btrfs_device *dev;
- int ret;
-
- /*
- * we have to hold the device_list_mutex here so the device
- * does not go away in cancel_dev. FIXME: find a better solution
- */
- mutex_lock(&fs_info->fs_devices->device_list_mutex);
- dev = btrfs_find_device(root, devid, NULL, NULL);
- if (!dev) {
- mutex_unlock(&fs_info->fs_devices->device_list_mutex);
- return -ENODEV;
- }
- ret = btrfs_scrub_cancel_dev(root, dev);
- mutex_unlock(&fs_info->fs_devices->device_list_mutex);
-
- return ret;
-}
-
-int btrfs_scrub_progress(struct btrfs_root *root, u64 devid,
- struct btrfs_scrub_progress *progress)
-{
- struct btrfs_device *dev;
- struct scrub_dev *sdev = NULL;
-
- mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
- dev = btrfs_find_device(root, devid, NULL, NULL);
- if (dev)
- sdev = dev->scrub_device;
- if (sdev)
- memcpy(progress, &sdev->stat, sizeof(*progress));
- mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
-
- return dev ? (sdev ? 0 : -ENOTCONN) : -ENODEV;
-}
diff --git a/ANDROID_3.4.5/fs/btrfs/struct-funcs.c b/ANDROID_3.4.5/fs/btrfs/struct-funcs.c
deleted file mode 100644
index c6ffa581..00000000
--- a/ANDROID_3.4.5/fs/btrfs/struct-funcs.c
+++ /dev/null
@@ -1,140 +0,0 @@
-/*
- * Copyright (C) 2007 Oracle. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License v2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
- */
-
-#include <linux/highmem.h>
-
-/* this is some deeply nasty code. ctree.h has a different
- * definition for this BTRFS_SETGET_FUNCS macro, behind a #ifndef
- *
- * The end result is that anyone who #includes ctree.h gets a
- * declaration for the btrfs_set_foo functions and btrfs_foo functions
- *
- * This file declares the macros and then #includes ctree.h, which results
- * in cpp creating the function here based on the template below.
- *
- * These setget functions do all the extent_buffer related mapping
- * required to efficiently read and write specific fields in the extent
- * buffers. Every pointer to metadata items in btrfs is really just
- * an unsigned long offset into the extent buffer which has been
- * cast to a specific type. This gives us all the gcc type checking.
- *
- * The extent buffer api is used to do all the kmapping and page
- * spanning work required to get extent buffers in highmem and have
- * a metadata blocksize different from the page size.
- *
- * The macro starts with a simple function prototype declaration so that
- * sparse won't complain about it being static.
- */
-
-#define BTRFS_SETGET_FUNCS(name, type, member, bits) \
-u##bits btrfs_##name(struct extent_buffer *eb, type *s); \
-void btrfs_set_##name(struct extent_buffer *eb, type *s, u##bits val); \
-void btrfs_set_token_##name(struct extent_buffer *eb, type *s, u##bits val, struct btrfs_map_token *token); \
-u##bits btrfs_token_##name(struct extent_buffer *eb, \
- type *s, struct btrfs_map_token *token) \
-{ \
- unsigned long part_offset = (unsigned long)s; \
- unsigned long offset = part_offset + offsetof(type, member); \
- type *p; \
- int err; \
- char *kaddr; \
- unsigned long map_start; \
- unsigned long map_len; \
- unsigned long mem_len = sizeof(((type *)0)->member); \
- u##bits res; \
- if (token && token->kaddr && token->offset <= offset && \
- token->eb == eb && \
- (token->offset + PAGE_CACHE_SIZE >= offset + mem_len)) { \
- kaddr = token->kaddr; \
- p = (type *)(kaddr + part_offset - token->offset); \
- res = le##bits##_to_cpu(p->member); \
- return res; \
- } \
- err = map_private_extent_buffer(eb, offset, \
- mem_len, \
- &kaddr, &map_start, &map_len); \
- if (err) { \
- __le##bits leres; \
- read_eb_member(eb, s, type, member, &leres); \
- return le##bits##_to_cpu(leres); \
- } \
- p = (type *)(kaddr + part_offset - map_start); \
- res = le##bits##_to_cpu(p->member); \
- if (token) { \
- token->kaddr = kaddr; \
- token->offset = map_start; \
- token->eb = eb; \
- } \
- return res; \
-} \
-void btrfs_set_token_##name(struct extent_buffer *eb, \
- type *s, u##bits val, struct btrfs_map_token *token) \
-{ \
- unsigned long part_offset = (unsigned long)s; \
- unsigned long offset = part_offset + offsetof(type, member); \
- type *p; \
- int err; \
- char *kaddr; \
- unsigned long map_start; \
- unsigned long map_len; \
- unsigned long mem_len = sizeof(((type *)0)->member); \
- if (token && token->kaddr && token->offset <= offset && \
- token->eb == eb && \
- (token->offset + PAGE_CACHE_SIZE >= offset + mem_len)) { \
- kaddr = token->kaddr; \
- p = (type *)(kaddr + part_offset - token->offset); \
- p->member = cpu_to_le##bits(val); \
- return; \
- } \
- err = map_private_extent_buffer(eb, offset, \
- mem_len, \
- &kaddr, &map_start, &map_len); \
- if (err) { \
- __le##bits val2; \
- val2 = cpu_to_le##bits(val); \
- write_eb_member(eb, s, type, member, &val2); \
- return; \
- } \
- p = (type *)(kaddr + part_offset - map_start); \
- p->member = cpu_to_le##bits(val); \
- if (token) { \
- token->kaddr = kaddr; \
- token->offset = map_start; \
- token->eb = eb; \
- } \
-} \
-void btrfs_set_##name(struct extent_buffer *eb, \
- type *s, u##bits val) \
-{ \
- btrfs_set_token_##name(eb, s, val, NULL); \
-} \
-u##bits btrfs_##name(struct extent_buffer *eb, \
- type *s) \
-{ \
- return btrfs_token_##name(eb, s, NULL); \
-} \
-
-#include "ctree.h"
-
-void btrfs_node_key(struct extent_buffer *eb,
- struct btrfs_disk_key *disk_key, int nr)
-{
- unsigned long ptr = btrfs_node_key_ptr_offset(nr);
- read_eb_member(eb, (struct btrfs_key_ptr *)ptr,
- struct btrfs_key_ptr, key, disk_key);
-}
diff --git a/ANDROID_3.4.5/fs/btrfs/super.c b/ANDROID_3.4.5/fs/btrfs/super.c
deleted file mode 100644
index c5f8fca4..00000000
--- a/ANDROID_3.4.5/fs/btrfs/super.c
+++ /dev/null
@@ -1,1578 +0,0 @@
-/*
- * Copyright (C) 2007 Oracle. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License v2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
- */
-
-#include <linux/blkdev.h>
-#include <linux/module.h>
-#include <linux/buffer_head.h>
-#include <linux/fs.h>
-#include <linux/pagemap.h>
-#include <linux/highmem.h>
-#include <linux/time.h>
-#include <linux/init.h>
-#include <linux/seq_file.h>
-#include <linux/string.h>
-#include <linux/backing-dev.h>
-#include <linux/mount.h>
-#include <linux/mpage.h>
-#include <linux/swap.h>
-#include <linux/writeback.h>
-#include <linux/statfs.h>
-#include <linux/compat.h>
-#include <linux/parser.h>
-#include <linux/ctype.h>
-#include <linux/namei.h>
-#include <linux/miscdevice.h>
-#include <linux/magic.h>
-#include <linux/slab.h>
-#include <linux/cleancache.h>
-#include <linux/ratelimit.h>
-#include "compat.h"
-#include "delayed-inode.h"
-#include "ctree.h"
-#include "disk-io.h"
-#include "transaction.h"
-#include "btrfs_inode.h"
-#include "ioctl.h"
-#include "print-tree.h"
-#include "xattr.h"
-#include "volumes.h"
-#include "version.h"
-#include "export.h"
-#include "compression.h"
-
-#define CREATE_TRACE_POINTS
-#include <trace/events/btrfs.h>
-
-static const struct super_operations btrfs_super_ops;
-static struct file_system_type btrfs_fs_type;
-
-static const char *btrfs_decode_error(struct btrfs_fs_info *fs_info, int errno,
- char nbuf[16])
-{
- char *errstr = NULL;
-
- switch (errno) {
- case -EIO:
- errstr = "IO failure";
- break;
- case -ENOMEM:
- errstr = "Out of memory";
- break;
- case -EROFS:
- errstr = "Readonly filesystem";
- break;
- case -EEXIST:
- errstr = "Object already exists";
- break;
- default:
- if (nbuf) {
- if (snprintf(nbuf, 16, "error %d", -errno) >= 0)
- errstr = nbuf;
- }
- break;
- }
-
- return errstr;
-}
-
-static void __save_error_info(struct btrfs_fs_info *fs_info)
-{
- /*
- * today we only save the error info into ram. Long term we'll
- * also send it down to the disk
- */
- fs_info->fs_state = BTRFS_SUPER_FLAG_ERROR;
-}
-
-/* NOTE:
- * We move write_super stuff at umount in order to avoid deadlock
- * for umount hold all lock.
- */
-static void save_error_info(struct btrfs_fs_info *fs_info)
-{
- __save_error_info(fs_info);
-}
-
-/* btrfs handle error by forcing the filesystem readonly */
-static void btrfs_handle_error(struct btrfs_fs_info *fs_info)
-{
- struct super_block *sb = fs_info->sb;
-
- if (sb->s_flags & MS_RDONLY)
- return;
-
- if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) {
- sb->s_flags |= MS_RDONLY;
- printk(KERN_INFO "btrfs is forced readonly\n");
- __btrfs_scrub_cancel(fs_info);
-// WARN_ON(1);
- }
-}
-
-/*
- * __btrfs_std_error decodes expected errors from the caller and
- * invokes the approciate error response.
- */
-void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function,
- unsigned int line, int errno, const char *fmt, ...)
-{
- struct super_block *sb = fs_info->sb;
- char nbuf[16];
- const char *errstr;
- va_list args;
- va_start(args, fmt);
-
- /*
- * Special case: if the error is EROFS, and we're already
- * under MS_RDONLY, then it is safe here.
- */
- if (errno == -EROFS && (sb->s_flags & MS_RDONLY))
- return;
-
- errstr = btrfs_decode_error(fs_info, errno, nbuf);
- if (fmt) {
- struct va_format vaf = {
- .fmt = fmt,
- .va = &args,
- };
-
- printk(KERN_CRIT "BTRFS error (device %s) in %s:%d: %s (%pV)\n",
- sb->s_id, function, line, errstr, &vaf);
- } else {
- printk(KERN_CRIT "BTRFS error (device %s) in %s:%d: %s\n",
- sb->s_id, function, line, errstr);
- }
-
- /* Don't go through full error handling during mount */
- if (sb->s_flags & MS_BORN) {
- save_error_info(fs_info);
- btrfs_handle_error(fs_info);
- }
- va_end(args);
-}
-
-const char *logtypes[] = {
- "emergency",
- "alert",
- "critical",
- "error",
- "warning",
- "notice",
- "info",
- "debug",
-};
-
-void btrfs_printk(struct btrfs_fs_info *fs_info, const char *fmt, ...)
-{
- struct super_block *sb = fs_info->sb;
- char lvl[4];
- struct va_format vaf;
- va_list args;
- const char *type = logtypes[4];
-
- va_start(args, fmt);
-
- if (fmt[0] == '<' && isdigit(fmt[1]) && fmt[2] == '>') {
- strncpy(lvl, fmt, 3);
- fmt += 3;
- type = logtypes[fmt[1] - '0'];
- } else
- *lvl = '\0';
-
- vaf.fmt = fmt;
- vaf.va = &args;
- printk("%sBTRFS %s (device %s): %pV", lvl, type, sb->s_id, &vaf);
-}
-
-/*
- * We only mark the transaction aborted and then set the file system read-only.
- * This will prevent new transactions from starting or trying to join this
- * one.
- *
- * This means that error recovery at the call site is limited to freeing
- * any local memory allocations and passing the error code up without
- * further cleanup. The transaction should complete as it normally would
- * in the call path but will return -EIO.
- *
- * We'll complete the cleanup in btrfs_end_transaction and
- * btrfs_commit_transaction.
- */
-void __btrfs_abort_transaction(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, const char *function,
- unsigned int line, int errno)
-{
- WARN_ONCE(1, KERN_DEBUG "btrfs: Transaction aborted");
- trans->aborted = errno;
- /* Nothing used. The other threads that have joined this
- * transaction may be able to continue. */
- if (!trans->blocks_used) {
- btrfs_printk(root->fs_info, "Aborting unused transaction.\n");
- return;
- }
- trans->transaction->aborted = errno;
- __btrfs_std_error(root->fs_info, function, line, errno, NULL);
-}
-/*
- * __btrfs_panic decodes unexpected, fatal errors from the caller,
- * issues an alert, and either panics or BUGs, depending on mount options.
- */
-void __btrfs_panic(struct btrfs_fs_info *fs_info, const char *function,
- unsigned int line, int errno, const char *fmt, ...)
-{
- char nbuf[16];
- char *s_id = "<unknown>";
- const char *errstr;
- struct va_format vaf = { .fmt = fmt };
- va_list args;
-
- if (fs_info)
- s_id = fs_info->sb->s_id;
-
- va_start(args, fmt);
- vaf.va = &args;
-
- errstr = btrfs_decode_error(fs_info, errno, nbuf);
- if (fs_info->mount_opt & BTRFS_MOUNT_PANIC_ON_FATAL_ERROR)
- panic(KERN_CRIT "BTRFS panic (device %s) in %s:%d: %pV (%s)\n",
- s_id, function, line, &vaf, errstr);
-
- printk(KERN_CRIT "BTRFS panic (device %s) in %s:%d: %pV (%s)\n",
- s_id, function, line, &vaf, errstr);
- va_end(args);
- /* Caller calls BUG() */
-}
-
-static void btrfs_put_super(struct super_block *sb)
-{
- (void)close_ctree(btrfs_sb(sb)->tree_root);
- /* FIXME: need to fix VFS to return error? */
- /* AV: return it _where_? ->put_super() can be triggered by any number
- * of async events, up to and including delivery of SIGKILL to the
- * last process that kept it busy. Or segfault in the aforementioned
- * process... Whom would you report that to?
- */
-}
-
-enum {
- Opt_degraded, Opt_subvol, Opt_subvolid, Opt_device, Opt_nodatasum,
- Opt_nodatacow, Opt_max_inline, Opt_alloc_start, Opt_nobarrier, Opt_ssd,
- Opt_nossd, Opt_ssd_spread, Opt_thread_pool, Opt_noacl, Opt_compress,
- Opt_compress_type, Opt_compress_force, Opt_compress_force_type,
- Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_discard,
- Opt_space_cache, Opt_clear_cache, Opt_user_subvol_rm_allowed,
- Opt_enospc_debug, Opt_subvolrootid, Opt_defrag, Opt_inode_cache,
- Opt_no_space_cache, Opt_recovery, Opt_skip_balance,
- Opt_check_integrity, Opt_check_integrity_including_extent_data,
- Opt_check_integrity_print_mask, Opt_fatal_errors,
- Opt_err,
-};
-
-static match_table_t tokens = {
- {Opt_degraded, "degraded"},
- {Opt_subvol, "subvol=%s"},
- {Opt_subvolid, "subvolid=%d"},
- {Opt_device, "device=%s"},
- {Opt_nodatasum, "nodatasum"},
- {Opt_nodatacow, "nodatacow"},
- {Opt_nobarrier, "nobarrier"},
- {Opt_max_inline, "max_inline=%s"},
- {Opt_alloc_start, "alloc_start=%s"},
- {Opt_thread_pool, "thread_pool=%d"},
- {Opt_compress, "compress"},
- {Opt_compress_type, "compress=%s"},
- {Opt_compress_force, "compress-force"},
- {Opt_compress_force_type, "compress-force=%s"},
- {Opt_ssd, "ssd"},
- {Opt_ssd_spread, "ssd_spread"},
- {Opt_nossd, "nossd"},
- {Opt_noacl, "noacl"},
- {Opt_notreelog, "notreelog"},
- {Opt_flushoncommit, "flushoncommit"},
- {Opt_ratio, "metadata_ratio=%d"},
- {Opt_discard, "discard"},
- {Opt_space_cache, "space_cache"},
- {Opt_clear_cache, "clear_cache"},
- {Opt_user_subvol_rm_allowed, "user_subvol_rm_allowed"},
- {Opt_enospc_debug, "enospc_debug"},
- {Opt_subvolrootid, "subvolrootid=%d"},
- {Opt_defrag, "autodefrag"},
- {Opt_inode_cache, "inode_cache"},
- {Opt_no_space_cache, "nospace_cache"},
- {Opt_recovery, "recovery"},
- {Opt_skip_balance, "skip_balance"},
- {Opt_check_integrity, "check_int"},
- {Opt_check_integrity_including_extent_data, "check_int_data"},
- {Opt_check_integrity_print_mask, "check_int_print_mask=%d"},
- {Opt_fatal_errors, "fatal_errors=%s"},
- {Opt_err, NULL},
-};
-
-/*
- * Regular mount options parser. Everything that is needed only when
- * reading in a new superblock is parsed here.
- * XXX JDM: This needs to be cleaned up for remount.
- */
-int btrfs_parse_options(struct btrfs_root *root, char *options)
-{
- struct btrfs_fs_info *info = root->fs_info;
- substring_t args[MAX_OPT_ARGS];
- char *p, *num, *orig = NULL;
- u64 cache_gen;
- int intarg;
- int ret = 0;
- char *compress_type;
- bool compress_force = false;
-
- cache_gen = btrfs_super_cache_generation(root->fs_info->super_copy);
- if (cache_gen)
- btrfs_set_opt(info->mount_opt, SPACE_CACHE);
-
- if (!options)
- goto out;
-
- /*
- * strsep changes the string, duplicate it because parse_options
- * gets called twice
- */
- options = kstrdup(options, GFP_NOFS);
- if (!options)
- return -ENOMEM;
-
- orig = options;
-
- while ((p = strsep(&options, ",")) != NULL) {
- int token;
- if (!*p)
- continue;
-
- token = match_token(p, tokens, args);
- switch (token) {
- case Opt_degraded:
- printk(KERN_INFO "btrfs: allowing degraded mounts\n");
- btrfs_set_opt(info->mount_opt, DEGRADED);
- break;
- case Opt_subvol:
- case Opt_subvolid:
- case Opt_subvolrootid:
- case Opt_device:
- /*
- * These are parsed by btrfs_parse_early_options
- * and can be happily ignored here.
- */
- break;
- case Opt_nodatasum:
- printk(KERN_INFO "btrfs: setting nodatasum\n");
- btrfs_set_opt(info->mount_opt, NODATASUM);
- break;
- case Opt_nodatacow:
- printk(KERN_INFO "btrfs: setting nodatacow\n");
- btrfs_set_opt(info->mount_opt, NODATACOW);
- btrfs_set_opt(info->mount_opt, NODATASUM);
- break;
- case Opt_compress_force:
- case Opt_compress_force_type:
- compress_force = true;
- case Opt_compress:
- case Opt_compress_type:
- if (token == Opt_compress ||
- token == Opt_compress_force ||
- strcmp(args[0].from, "zlib") == 0) {
- compress_type = "zlib";
- info->compress_type = BTRFS_COMPRESS_ZLIB;
- } else if (strcmp(args[0].from, "lzo") == 0) {
- compress_type = "lzo";
- info->compress_type = BTRFS_COMPRESS_LZO;
- } else {
- ret = -EINVAL;
- goto out;
- }
-
- btrfs_set_opt(info->mount_opt, COMPRESS);
- if (compress_force) {
- btrfs_set_opt(info->mount_opt, FORCE_COMPRESS);
- pr_info("btrfs: force %s compression\n",
- compress_type);
- } else
- pr_info("btrfs: use %s compression\n",
- compress_type);
- break;
- case Opt_ssd:
- printk(KERN_INFO "btrfs: use ssd allocation scheme\n");
- btrfs_set_opt(info->mount_opt, SSD);
- break;
- case Opt_ssd_spread:
- printk(KERN_INFO "btrfs: use spread ssd "
- "allocation scheme\n");
- btrfs_set_opt(info->mount_opt, SSD);
- btrfs_set_opt(info->mount_opt, SSD_SPREAD);
- break;
- case Opt_nossd:
- printk(KERN_INFO "btrfs: not using ssd allocation "
- "scheme\n");
- btrfs_set_opt(info->mount_opt, NOSSD);
- btrfs_clear_opt(info->mount_opt, SSD);
- btrfs_clear_opt(info->mount_opt, SSD_SPREAD);
- break;
- case Opt_nobarrier:
- printk(KERN_INFO "btrfs: turning off barriers\n");
- btrfs_set_opt(info->mount_opt, NOBARRIER);
- break;
- case Opt_thread_pool:
- intarg = 0;
- match_int(&args[0], &intarg);
- if (intarg) {
- info->thread_pool_size = intarg;
- printk(KERN_INFO "btrfs: thread pool %d\n",
- info->thread_pool_size);
- }
- break;
- case Opt_max_inline:
- num = match_strdup(&args[0]);
- if (num) {
- info->max_inline = memparse(num, NULL);
- kfree(num);
-
- if (info->max_inline) {
- info->max_inline = max_t(u64,
- info->max_inline,
- root->sectorsize);
- }
- printk(KERN_INFO "btrfs: max_inline at %llu\n",
- (unsigned long long)info->max_inline);
- }
- break;
- case Opt_alloc_start:
- num = match_strdup(&args[0]);
- if (num) {
- info->alloc_start = memparse(num, NULL);
- kfree(num);
- printk(KERN_INFO
- "btrfs: allocations start at %llu\n",
- (unsigned long long)info->alloc_start);
- }
- break;
- case Opt_noacl:
- root->fs_info->sb->s_flags &= ~MS_POSIXACL;
- break;
- case Opt_notreelog:
- printk(KERN_INFO "btrfs: disabling tree log\n");
- btrfs_set_opt(info->mount_opt, NOTREELOG);
- break;
- case Opt_flushoncommit:
- printk(KERN_INFO "btrfs: turning on flush-on-commit\n");
- btrfs_set_opt(info->mount_opt, FLUSHONCOMMIT);
- break;
- case Opt_ratio:
- intarg = 0;
- match_int(&args[0], &intarg);
- if (intarg) {
- info->metadata_ratio = intarg;
- printk(KERN_INFO "btrfs: metadata ratio %d\n",
- info->metadata_ratio);
- }
- break;
- case Opt_discard:
- btrfs_set_opt(info->mount_opt, DISCARD);
- break;
- case Opt_space_cache:
- btrfs_set_opt(info->mount_opt, SPACE_CACHE);
- break;
- case Opt_no_space_cache:
- printk(KERN_INFO "btrfs: disabling disk space caching\n");
- btrfs_clear_opt(info->mount_opt, SPACE_CACHE);
- break;
- case Opt_inode_cache:
- printk(KERN_INFO "btrfs: enabling inode map caching\n");
- btrfs_set_opt(info->mount_opt, INODE_MAP_CACHE);
- break;
- case Opt_clear_cache:
- printk(KERN_INFO "btrfs: force clearing of disk cache\n");
- btrfs_set_opt(info->mount_opt, CLEAR_CACHE);
- break;
- case Opt_user_subvol_rm_allowed:
- btrfs_set_opt(info->mount_opt, USER_SUBVOL_RM_ALLOWED);
- break;
- case Opt_enospc_debug:
- btrfs_set_opt(info->mount_opt, ENOSPC_DEBUG);
- break;
- case Opt_defrag:
- printk(KERN_INFO "btrfs: enabling auto defrag");
- btrfs_set_opt(info->mount_opt, AUTO_DEFRAG);
- break;
- case Opt_recovery:
- printk(KERN_INFO "btrfs: enabling auto recovery");
- btrfs_set_opt(info->mount_opt, RECOVERY);
- break;
- case Opt_skip_balance:
- btrfs_set_opt(info->mount_opt, SKIP_BALANCE);
- break;
-#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
- case Opt_check_integrity_including_extent_data:
- printk(KERN_INFO "btrfs: enabling check integrity"
- " including extent data\n");
- btrfs_set_opt(info->mount_opt,
- CHECK_INTEGRITY_INCLUDING_EXTENT_DATA);
- btrfs_set_opt(info->mount_opt, CHECK_INTEGRITY);
- break;
- case Opt_check_integrity:
- printk(KERN_INFO "btrfs: enabling check integrity\n");
- btrfs_set_opt(info->mount_opt, CHECK_INTEGRITY);
- break;
- case Opt_check_integrity_print_mask:
- intarg = 0;
- match_int(&args[0], &intarg);
- if (intarg) {
- info->check_integrity_print_mask = intarg;
- printk(KERN_INFO "btrfs:"
- " check_integrity_print_mask 0x%x\n",
- info->check_integrity_print_mask);
- }
- break;
-#else
- case Opt_check_integrity_including_extent_data:
- case Opt_check_integrity:
- case Opt_check_integrity_print_mask:
- printk(KERN_ERR "btrfs: support for check_integrity*"
- " not compiled in!\n");
- ret = -EINVAL;
- goto out;
-#endif
- case Opt_fatal_errors:
- if (strcmp(args[0].from, "panic") == 0)
- btrfs_set_opt(info->mount_opt,
- PANIC_ON_FATAL_ERROR);
- else if (strcmp(args[0].from, "bug") == 0)
- btrfs_clear_opt(info->mount_opt,
- PANIC_ON_FATAL_ERROR);
- else {
- ret = -EINVAL;
- goto out;
- }
- break;
- case Opt_err:
- printk(KERN_INFO "btrfs: unrecognized mount option "
- "'%s'\n", p);
- ret = -EINVAL;
- goto out;
- default:
- break;
- }
- }
-out:
- if (!ret && btrfs_test_opt(root, SPACE_CACHE))
- printk(KERN_INFO "btrfs: disk space caching is enabled\n");
- kfree(orig);
- return ret;
-}
-
-/*
- * Parse mount options that are required early in the mount process.
- *
- * All other options will be parsed on much later in the mount process and
- * only when we need to allocate a new super block.
- */
-static int btrfs_parse_early_options(const char *options, fmode_t flags,
- void *holder, char **subvol_name, u64 *subvol_objectid,
- u64 *subvol_rootid, struct btrfs_fs_devices **fs_devices)
-{
- substring_t args[MAX_OPT_ARGS];
- char *device_name, *opts, *orig, *p;
- int error = 0;
- int intarg;
-
- if (!options)
- return 0;
-
- /*
- * strsep changes the string, duplicate it because parse_options
- * gets called twice
- */
- opts = kstrdup(options, GFP_KERNEL);
- if (!opts)
- return -ENOMEM;
- orig = opts;
-
- while ((p = strsep(&opts, ",")) != NULL) {
- int token;
- if (!*p)
- continue;
-
- token = match_token(p, tokens, args);
- switch (token) {
- case Opt_subvol:
- kfree(*subvol_name);
- *subvol_name = match_strdup(&args[0]);
- break;
- case Opt_subvolid:
- intarg = 0;
- error = match_int(&args[0], &intarg);
- if (!error) {
- /* we want the original fs_tree */
- if (!intarg)
- *subvol_objectid =
- BTRFS_FS_TREE_OBJECTID;
- else
- *subvol_objectid = intarg;
- }
- break;
- case Opt_subvolrootid:
- intarg = 0;
- error = match_int(&args[0], &intarg);
- if (!error) {
- /* we want the original fs_tree */
- if (!intarg)
- *subvol_rootid =
- BTRFS_FS_TREE_OBJECTID;
- else
- *subvol_rootid = intarg;
- }
- break;
- case Opt_device:
- device_name = match_strdup(&args[0]);
- if (!device_name) {
- error = -ENOMEM;
- goto out;
- }
- error = btrfs_scan_one_device(device_name,
- flags, holder, fs_devices);
- kfree(device_name);
- if (error)
- goto out;
- break;
- default:
- break;
- }
- }
-
-out:
- kfree(orig);
- return error;
-}
-
-static struct dentry *get_default_root(struct super_block *sb,
- u64 subvol_objectid)
-{
- struct btrfs_fs_info *fs_info = btrfs_sb(sb);
- struct btrfs_root *root = fs_info->tree_root;
- struct btrfs_root *new_root;
- struct btrfs_dir_item *di;
- struct btrfs_path *path;
- struct btrfs_key location;
- struct inode *inode;
- u64 dir_id;
- int new = 0;
-
- /*
- * We have a specific subvol we want to mount, just setup location and
- * go look up the root.
- */
- if (subvol_objectid) {
- location.objectid = subvol_objectid;
- location.type = BTRFS_ROOT_ITEM_KEY;
- location.offset = (u64)-1;
- goto find_root;
- }
-
- path = btrfs_alloc_path();
- if (!path)
- return ERR_PTR(-ENOMEM);
- path->leave_spinning = 1;
-
- /*
- * Find the "default" dir item which points to the root item that we
- * will mount by default if we haven't been given a specific subvolume
- * to mount.
- */
- dir_id = btrfs_super_root_dir(fs_info->super_copy);
- di = btrfs_lookup_dir_item(NULL, root, path, dir_id, "default", 7, 0);
- if (IS_ERR(di)) {
- btrfs_free_path(path);
- return ERR_CAST(di);
- }
- if (!di) {
- /*
- * Ok the default dir item isn't there. This is weird since
- * it's always been there, but don't freak out, just try and
- * mount to root most subvolume.
- */
- btrfs_free_path(path);
- dir_id = BTRFS_FIRST_FREE_OBJECTID;
- new_root = fs_info->fs_root;
- goto setup_root;
- }
-
- btrfs_dir_item_key_to_cpu(path->nodes[0], di, &location);
- btrfs_free_path(path);
-
-find_root:
- new_root = btrfs_read_fs_root_no_name(fs_info, &location);
- if (IS_ERR(new_root))
- return ERR_CAST(new_root);
-
- if (btrfs_root_refs(&new_root->root_item) == 0)
- return ERR_PTR(-ENOENT);
-
- dir_id = btrfs_root_dirid(&new_root->root_item);
-setup_root:
- location.objectid = dir_id;
- location.type = BTRFS_INODE_ITEM_KEY;
- location.offset = 0;
-
- inode = btrfs_iget(sb, &location, new_root, &new);
- if (IS_ERR(inode))
- return ERR_CAST(inode);
-
- /*
- * If we're just mounting the root most subvol put the inode and return
- * a reference to the dentry. We will have already gotten a reference
- * to the inode in btrfs_fill_super so we're good to go.
- */
- if (!new && sb->s_root->d_inode == inode) {
- iput(inode);
- return dget(sb->s_root);
- }
-
- return d_obtain_alias(inode);
-}
-
-static int btrfs_fill_super(struct super_block *sb,
- struct btrfs_fs_devices *fs_devices,
- void *data, int silent)
-{
- struct inode *inode;
- struct btrfs_fs_info *fs_info = btrfs_sb(sb);
- struct btrfs_key key;
- int err;
-
- sb->s_maxbytes = MAX_LFS_FILESIZE;
- sb->s_magic = BTRFS_SUPER_MAGIC;
- sb->s_op = &btrfs_super_ops;
- sb->s_d_op = &btrfs_dentry_operations;
- sb->s_export_op = &btrfs_export_ops;
- sb->s_xattr = btrfs_xattr_handlers;
- sb->s_time_gran = 1;
-#ifdef CONFIG_BTRFS_FS_POSIX_ACL
- sb->s_flags |= MS_POSIXACL;
-#endif
-
- err = open_ctree(sb, fs_devices, (char *)data);
- if (err) {
- printk("btrfs: open_ctree failed\n");
- return err;
- }
-
- key.objectid = BTRFS_FIRST_FREE_OBJECTID;
- key.type = BTRFS_INODE_ITEM_KEY;
- key.offset = 0;
- inode = btrfs_iget(sb, &key, fs_info->fs_root, NULL);
- if (IS_ERR(inode)) {
- err = PTR_ERR(inode);
- goto fail_close;
- }
-
- sb->s_root = d_make_root(inode);
- if (!sb->s_root) {
- err = -ENOMEM;
- goto fail_close;
- }
-
- save_mount_options(sb, data);
- cleancache_init_fs(sb);
- sb->s_flags |= MS_ACTIVE;
- return 0;
-
-fail_close:
- close_ctree(fs_info->tree_root);
- return err;
-}
-
-int btrfs_sync_fs(struct super_block *sb, int wait)
-{
- struct btrfs_trans_handle *trans;
- struct btrfs_fs_info *fs_info = btrfs_sb(sb);
- struct btrfs_root *root = fs_info->tree_root;
- int ret;
-
- trace_btrfs_sync_fs(wait);
-
- if (!wait) {
- filemap_flush(fs_info->btree_inode->i_mapping);
- return 0;
- }
-
- btrfs_wait_ordered_extents(root, 0, 0);
-
- trans = btrfs_start_transaction(root, 0);
- if (IS_ERR(trans))
- return PTR_ERR(trans);
- ret = btrfs_commit_transaction(trans, root);
- return ret;
-}
-
-static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry)
-{
- struct btrfs_fs_info *info = btrfs_sb(dentry->d_sb);
- struct btrfs_root *root = info->tree_root;
- char *compress_type;
-
- if (btrfs_test_opt(root, DEGRADED))
- seq_puts(seq, ",degraded");
- if (btrfs_test_opt(root, NODATASUM))
- seq_puts(seq, ",nodatasum");
- if (btrfs_test_opt(root, NODATACOW))
- seq_puts(seq, ",nodatacow");
- if (btrfs_test_opt(root, NOBARRIER))
- seq_puts(seq, ",nobarrier");
- if (info->max_inline != 8192 * 1024)
- seq_printf(seq, ",max_inline=%llu",
- (unsigned long long)info->max_inline);
- if (info->alloc_start != 0)
- seq_printf(seq, ",alloc_start=%llu",
- (unsigned long long)info->alloc_start);
- if (info->thread_pool_size != min_t(unsigned long,
- num_online_cpus() + 2, 8))
- seq_printf(seq, ",thread_pool=%d", info->thread_pool_size);
- if (btrfs_test_opt(root, COMPRESS)) {
- if (info->compress_type == BTRFS_COMPRESS_ZLIB)
- compress_type = "zlib";
- else
- compress_type = "lzo";
- if (btrfs_test_opt(root, FORCE_COMPRESS))
- seq_printf(seq, ",compress-force=%s", compress_type);
- else
- seq_printf(seq, ",compress=%s", compress_type);
- }
- if (btrfs_test_opt(root, NOSSD))
- seq_puts(seq, ",nossd");
- if (btrfs_test_opt(root, SSD_SPREAD))
- seq_puts(seq, ",ssd_spread");
- else if (btrfs_test_opt(root, SSD))
- seq_puts(seq, ",ssd");
- if (btrfs_test_opt(root, NOTREELOG))
- seq_puts(seq, ",notreelog");
- if (btrfs_test_opt(root, FLUSHONCOMMIT))
- seq_puts(seq, ",flushoncommit");
- if (btrfs_test_opt(root, DISCARD))
- seq_puts(seq, ",discard");
- if (!(root->fs_info->sb->s_flags & MS_POSIXACL))
- seq_puts(seq, ",noacl");
- if (btrfs_test_opt(root, SPACE_CACHE))
- seq_puts(seq, ",space_cache");
- else
- seq_puts(seq, ",nospace_cache");
- if (btrfs_test_opt(root, CLEAR_CACHE))
- seq_puts(seq, ",clear_cache");
- if (btrfs_test_opt(root, USER_SUBVOL_RM_ALLOWED))
- seq_puts(seq, ",user_subvol_rm_allowed");
- if (btrfs_test_opt(root, ENOSPC_DEBUG))
- seq_puts(seq, ",enospc_debug");
- if (btrfs_test_opt(root, AUTO_DEFRAG))
- seq_puts(seq, ",autodefrag");
- if (btrfs_test_opt(root, INODE_MAP_CACHE))
- seq_puts(seq, ",inode_cache");
- if (btrfs_test_opt(root, SKIP_BALANCE))
- seq_puts(seq, ",skip_balance");
- if (btrfs_test_opt(root, PANIC_ON_FATAL_ERROR))
- seq_puts(seq, ",fatal_errors=panic");
- return 0;
-}
-
-static int btrfs_test_super(struct super_block *s, void *data)
-{
- struct btrfs_fs_info *p = data;
- struct btrfs_fs_info *fs_info = btrfs_sb(s);
-
- return fs_info->fs_devices == p->fs_devices;
-}
-
-static int btrfs_set_super(struct super_block *s, void *data)
-{
- int err = set_anon_super(s, data);
- if (!err)
- s->s_fs_info = data;
- return err;
-}
-
-/*
- * subvolumes are identified by ino 256
- */
-static inline int is_subvolume_inode(struct inode *inode)
-{
- if (inode && inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)
- return 1;
- return 0;
-}
-
-/*
- * This will strip out the subvol=%s argument for an argument string and add
- * subvolid=0 to make sure we get the actual tree root for path walking to the
- * subvol we want.
- */
-static char *setup_root_args(char *args)
-{
- unsigned copied = 0;
- unsigned len = strlen(args) + 2;
- char *pos;
- char *ret;
-
- /*
- * We need the same args as before, but minus
- *
- * subvol=a
- *
- * and add
- *
- * subvolid=0
- *
- * which is a difference of 2 characters, so we allocate strlen(args) +
- * 2 characters.
- */
- ret = kzalloc(len * sizeof(char), GFP_NOFS);
- if (!ret)
- return NULL;
- pos = strstr(args, "subvol=");
-
- /* This shouldn't happen, but just in case.. */
- if (!pos) {
- kfree(ret);
- return NULL;
- }
-
- /*
- * The subvol=<> arg is not at the front of the string, copy everybody
- * up to that into ret.
- */
- if (pos != args) {
- *pos = '\0';
- strcpy(ret, args);
- copied += strlen(args);
- pos++;
- }
-
- strncpy(ret + copied, "subvolid=0", len - copied);
-
- /* Length of subvolid=0 */
- copied += 10;
-
- /*
- * If there is no , after the subvol= option then we know there's no
- * other options and we can just return.
- */
- pos = strchr(pos, ',');
- if (!pos)
- return ret;
-
- /* Copy the rest of the arguments into our buffer */
- strncpy(ret + copied, pos, len - copied);
- copied += strlen(pos);
-
- return ret;
-}
-
-static struct dentry *mount_subvol(const char *subvol_name, int flags,
- const char *device_name, char *data)
-{
- struct dentry *root;
- struct vfsmount *mnt;
- char *newargs;
-
- newargs = setup_root_args(data);
- if (!newargs)
- return ERR_PTR(-ENOMEM);
- mnt = vfs_kern_mount(&btrfs_fs_type, flags, device_name,
- newargs);
- kfree(newargs);
- if (IS_ERR(mnt))
- return ERR_CAST(mnt);
-
- root = mount_subtree(mnt, subvol_name);
-
- if (!IS_ERR(root) && !is_subvolume_inode(root->d_inode)) {
- struct super_block *s = root->d_sb;
- dput(root);
- root = ERR_PTR(-EINVAL);
- deactivate_locked_super(s);
- printk(KERN_ERR "btrfs: '%s' is not a valid subvolume\n",
- subvol_name);
- }
-
- return root;
-}
-
-/*
- * Find a superblock for the given device / mount point.
- *
- * Note: This is based on get_sb_bdev from fs/super.c with a few additions
- * for multiple device setup. Make sure to keep it in sync.
- */
-static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
- const char *device_name, void *data)
-{
- struct block_device *bdev = NULL;
- struct super_block *s;
- struct dentry *root;
- struct btrfs_fs_devices *fs_devices = NULL;
- struct btrfs_fs_info *fs_info = NULL;
- fmode_t mode = FMODE_READ;
- char *subvol_name = NULL;
- u64 subvol_objectid = 0;
- u64 subvol_rootid = 0;
- int error = 0;
-
- if (!(flags & MS_RDONLY))
- mode |= FMODE_WRITE;
-
- error = btrfs_parse_early_options(data, mode, fs_type,
- &subvol_name, &subvol_objectid,
- &subvol_rootid, &fs_devices);
- if (error) {
- kfree(subvol_name);
- return ERR_PTR(error);
- }
-
- if (subvol_name) {
- root = mount_subvol(subvol_name, flags, device_name, data);
- kfree(subvol_name);
- return root;
- }
-
- error = btrfs_scan_one_device(device_name, mode, fs_type, &fs_devices);
- if (error)
- return ERR_PTR(error);
-
- /*
- * Setup a dummy root and fs_info for test/set super. This is because
- * we don't actually fill this stuff out until open_ctree, but we need
- * it for searching for existing supers, so this lets us do that and
- * then open_ctree will properly initialize everything later.
- */
- fs_info = kzalloc(sizeof(struct btrfs_fs_info), GFP_NOFS);
- if (!fs_info)
- return ERR_PTR(-ENOMEM);
-
- fs_info->fs_devices = fs_devices;
-
- fs_info->super_copy = kzalloc(BTRFS_SUPER_INFO_SIZE, GFP_NOFS);
- fs_info->super_for_commit = kzalloc(BTRFS_SUPER_INFO_SIZE, GFP_NOFS);
- if (!fs_info->super_copy || !fs_info->super_for_commit) {
- error = -ENOMEM;
- goto error_fs_info;
- }
-
- error = btrfs_open_devices(fs_devices, mode, fs_type);
- if (error)
- goto error_fs_info;
-
- if (!(flags & MS_RDONLY) && fs_devices->rw_devices == 0) {
- error = -EACCES;
- goto error_close_devices;
- }
-
- bdev = fs_devices->latest_bdev;
- s = sget(fs_type, btrfs_test_super, btrfs_set_super, fs_info);
- if (IS_ERR(s)) {
- error = PTR_ERR(s);
- goto error_close_devices;
- }
-
- if (s->s_root) {
- btrfs_close_devices(fs_devices);
- free_fs_info(fs_info);
- if ((flags ^ s->s_flags) & MS_RDONLY)
- error = -EBUSY;
- } else {
- char b[BDEVNAME_SIZE];
-
- s->s_flags = flags | MS_NOSEC;
- strlcpy(s->s_id, bdevname(bdev, b), sizeof(s->s_id));
- btrfs_sb(s)->bdev_holder = fs_type;
- error = btrfs_fill_super(s, fs_devices, data,
- flags & MS_SILENT ? 1 : 0);
- }
-
- root = !error ? get_default_root(s, subvol_objectid) : ERR_PTR(error);
- if (IS_ERR(root))
- deactivate_locked_super(s);
-
- return root;
-
-error_close_devices:
- btrfs_close_devices(fs_devices);
-error_fs_info:
- free_fs_info(fs_info);
- return ERR_PTR(error);
-}
-
-static int btrfs_remount(struct super_block *sb, int *flags, char *data)
-{
- struct btrfs_fs_info *fs_info = btrfs_sb(sb);
- struct btrfs_root *root = fs_info->tree_root;
- unsigned old_flags = sb->s_flags;
- unsigned long old_opts = fs_info->mount_opt;
- unsigned long old_compress_type = fs_info->compress_type;
- u64 old_max_inline = fs_info->max_inline;
- u64 old_alloc_start = fs_info->alloc_start;
- int old_thread_pool_size = fs_info->thread_pool_size;
- unsigned int old_metadata_ratio = fs_info->metadata_ratio;
- int ret;
-
- ret = btrfs_parse_options(root, data);
- if (ret) {
- ret = -EINVAL;
- goto restore;
- }
-
- if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY))
- return 0;
-
- if (*flags & MS_RDONLY) {
- sb->s_flags |= MS_RDONLY;
-
- ret = btrfs_commit_super(root);
- if (ret)
- goto restore;
- } else {
- if (fs_info->fs_devices->rw_devices == 0) {
- ret = -EACCES;
- goto restore;
- }
-
- if (btrfs_super_log_root(fs_info->super_copy) != 0) {
- ret = -EINVAL;
- goto restore;
- }
-
- ret = btrfs_cleanup_fs_roots(fs_info);
- if (ret)
- goto restore;
-
- /* recover relocation */
- ret = btrfs_recover_relocation(root);
- if (ret)
- goto restore;
-
- sb->s_flags &= ~MS_RDONLY;
- }
-
- return 0;
-
-restore:
- /* We've hit an error - don't reset MS_RDONLY */
- if (sb->s_flags & MS_RDONLY)
- old_flags |= MS_RDONLY;
- sb->s_flags = old_flags;
- fs_info->mount_opt = old_opts;
- fs_info->compress_type = old_compress_type;
- fs_info->max_inline = old_max_inline;
- fs_info->alloc_start = old_alloc_start;
- fs_info->thread_pool_size = old_thread_pool_size;
- fs_info->metadata_ratio = old_metadata_ratio;
- return ret;
-}
-
-/* Used to sort the devices by max_avail(descending sort) */
-static int btrfs_cmp_device_free_bytes(const void *dev_info1,
- const void *dev_info2)
-{
- if (((struct btrfs_device_info *)dev_info1)->max_avail >
- ((struct btrfs_device_info *)dev_info2)->max_avail)
- return -1;
- else if (((struct btrfs_device_info *)dev_info1)->max_avail <
- ((struct btrfs_device_info *)dev_info2)->max_avail)
- return 1;
- else
- return 0;
-}
-
-/*
- * sort the devices by max_avail, in which max free extent size of each device
- * is stored.(Descending Sort)
- */
-static inline void btrfs_descending_sort_devices(
- struct btrfs_device_info *devices,
- size_t nr_devices)
-{
- sort(devices, nr_devices, sizeof(struct btrfs_device_info),
- btrfs_cmp_device_free_bytes, NULL);
-}
-
-/*
- * The helper to calc the free space on the devices that can be used to store
- * file data.
- */
-static int btrfs_calc_avail_data_space(struct btrfs_root *root, u64 *free_bytes)
-{
- struct btrfs_fs_info *fs_info = root->fs_info;
- struct btrfs_device_info *devices_info;
- struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
- struct btrfs_device *device;
- u64 skip_space;
- u64 type;
- u64 avail_space;
- u64 used_space;
- u64 min_stripe_size;
- int min_stripes = 1, num_stripes = 1;
- int i = 0, nr_devices;
- int ret;
-
- nr_devices = fs_info->fs_devices->open_devices;
- BUG_ON(!nr_devices);
-
- devices_info = kmalloc(sizeof(*devices_info) * nr_devices,
- GFP_NOFS);
- if (!devices_info)
- return -ENOMEM;
-
- /* calc min stripe number for data space alloction */
- type = btrfs_get_alloc_profile(root, 1);
- if (type & BTRFS_BLOCK_GROUP_RAID0) {
- min_stripes = 2;
- num_stripes = nr_devices;
- } else if (type & BTRFS_BLOCK_GROUP_RAID1) {
- min_stripes = 2;
- num_stripes = 2;
- } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
- min_stripes = 4;
- num_stripes = 4;
- }
-
- if (type & BTRFS_BLOCK_GROUP_DUP)
- min_stripe_size = 2 * BTRFS_STRIPE_LEN;
- else
- min_stripe_size = BTRFS_STRIPE_LEN;
-
- list_for_each_entry(device, &fs_devices->devices, dev_list) {
- if (!device->in_fs_metadata || !device->bdev)
- continue;
-
- avail_space = device->total_bytes - device->bytes_used;
-
- /* align with stripe_len */
- do_div(avail_space, BTRFS_STRIPE_LEN);
- avail_space *= BTRFS_STRIPE_LEN;
-
- /*
- * In order to avoid overwritting the superblock on the drive,
- * btrfs starts at an offset of at least 1MB when doing chunk
- * allocation.
- */
- skip_space = 1024 * 1024;
-
- /* user can set the offset in fs_info->alloc_start. */
- if (fs_info->alloc_start + BTRFS_STRIPE_LEN <=
- device->total_bytes)
- skip_space = max(fs_info->alloc_start, skip_space);
-
- /*
- * btrfs can not use the free space in [0, skip_space - 1],
- * we must subtract it from the total. In order to implement
- * it, we account the used space in this range first.
- */
- ret = btrfs_account_dev_extents_size(device, 0, skip_space - 1,
- &used_space);
- if (ret) {
- kfree(devices_info);
- return ret;
- }
-
- /* calc the free space in [0, skip_space - 1] */
- skip_space -= used_space;
-
- /*
- * we can use the free space in [0, skip_space - 1], subtract
- * it from the total.
- */
- if (avail_space && avail_space >= skip_space)
- avail_space -= skip_space;
- else
- avail_space = 0;
-
- if (avail_space < min_stripe_size)
- continue;
-
- devices_info[i].dev = device;
- devices_info[i].max_avail = avail_space;
-
- i++;
- }
-
- nr_devices = i;
-
- btrfs_descending_sort_devices(devices_info, nr_devices);
-
- i = nr_devices - 1;
- avail_space = 0;
- while (nr_devices >= min_stripes) {
- if (num_stripes > nr_devices)
- num_stripes = nr_devices;
-
- if (devices_info[i].max_avail >= min_stripe_size) {
- int j;
- u64 alloc_size;
-
- avail_space += devices_info[i].max_avail * num_stripes;
- alloc_size = devices_info[i].max_avail;
- for (j = i + 1 - num_stripes; j <= i; j++)
- devices_info[j].max_avail -= alloc_size;
- }
- i--;
- nr_devices--;
- }
-
- kfree(devices_info);
- *free_bytes = avail_space;
- return 0;
-}
-
-static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
-{
- struct btrfs_fs_info *fs_info = btrfs_sb(dentry->d_sb);
- struct btrfs_super_block *disk_super = fs_info->super_copy;
- struct list_head *head = &fs_info->space_info;
- struct btrfs_space_info *found;
- u64 total_used = 0;
- u64 total_free_data = 0;
- int bits = dentry->d_sb->s_blocksize_bits;
- __be32 *fsid = (__be32 *)fs_info->fsid;
- int ret;
-
- /* holding chunk_muext to avoid allocating new chunks */
- mutex_lock(&fs_info->chunk_mutex);
- rcu_read_lock();
- list_for_each_entry_rcu(found, head, list) {
- if (found->flags & BTRFS_BLOCK_GROUP_DATA) {
- total_free_data += found->disk_total - found->disk_used;
- total_free_data -=
- btrfs_account_ro_block_groups_free_space(found);
- }
-
- total_used += found->disk_used;
- }
- rcu_read_unlock();
-
- buf->f_namelen = BTRFS_NAME_LEN;
- buf->f_blocks = btrfs_super_total_bytes(disk_super) >> bits;
- buf->f_bfree = buf->f_blocks - (total_used >> bits);
- buf->f_bsize = dentry->d_sb->s_blocksize;
- buf->f_type = BTRFS_SUPER_MAGIC;
- buf->f_bavail = total_free_data;
- ret = btrfs_calc_avail_data_space(fs_info->tree_root, &total_free_data);
- if (ret) {
- mutex_unlock(&fs_info->chunk_mutex);
- return ret;
- }
- buf->f_bavail += total_free_data;
- buf->f_bavail = buf->f_bavail >> bits;
- mutex_unlock(&fs_info->chunk_mutex);
-
- /* We treat it as constant endianness (it doesn't matter _which_)
- because we want the fsid to come out the same whether mounted
- on a big-endian or little-endian host */
- buf->f_fsid.val[0] = be32_to_cpu(fsid[0]) ^ be32_to_cpu(fsid[2]);
- buf->f_fsid.val[1] = be32_to_cpu(fsid[1]) ^ be32_to_cpu(fsid[3]);
- /* Mask in the root object ID too, to disambiguate subvols */
- buf->f_fsid.val[0] ^= BTRFS_I(dentry->d_inode)->root->objectid >> 32;
- buf->f_fsid.val[1] ^= BTRFS_I(dentry->d_inode)->root->objectid;
-
- return 0;
-}
-
-static void btrfs_kill_super(struct super_block *sb)
-{
- struct btrfs_fs_info *fs_info = btrfs_sb(sb);
- kill_anon_super(sb);
- free_fs_info(fs_info);
-}
-
-static struct file_system_type btrfs_fs_type = {
- .owner = THIS_MODULE,
- .name = "btrfs",
- .mount = btrfs_mount,
- .kill_sb = btrfs_kill_super,
- .fs_flags = FS_REQUIRES_DEV,
-};
-
-/*
- * used by btrfsctl to scan devices when no FS is mounted
- */
-static long btrfs_control_ioctl(struct file *file, unsigned int cmd,
- unsigned long arg)
-{
- struct btrfs_ioctl_vol_args *vol;
- struct btrfs_fs_devices *fs_devices;
- int ret = -ENOTTY;
-
- if (!capable(CAP_SYS_ADMIN))
- return -EPERM;
-
- vol = memdup_user((void __user *)arg, sizeof(*vol));
- if (IS_ERR(vol))
- return PTR_ERR(vol);
-
- switch (cmd) {
- case BTRFS_IOC_SCAN_DEV:
- ret = btrfs_scan_one_device(vol->name, FMODE_READ,
- &btrfs_fs_type, &fs_devices);
- break;
- }
-
- kfree(vol);
- return ret;
-}
-
-static int btrfs_freeze(struct super_block *sb)
-{
- struct btrfs_fs_info *fs_info = btrfs_sb(sb);
- mutex_lock(&fs_info->transaction_kthread_mutex);
- mutex_lock(&fs_info->cleaner_mutex);
- return 0;
-}
-
-static int btrfs_unfreeze(struct super_block *sb)
-{
- struct btrfs_fs_info *fs_info = btrfs_sb(sb);
- mutex_unlock(&fs_info->cleaner_mutex);
- mutex_unlock(&fs_info->transaction_kthread_mutex);
- return 0;
-}
-
-static void btrfs_fs_dirty_inode(struct inode *inode, int flags)
-{
- int ret;
-
- ret = btrfs_dirty_inode(inode);
- if (ret)
- printk_ratelimited(KERN_ERR "btrfs: fail to dirty inode %Lu "
- "error %d\n", btrfs_ino(inode), ret);
-}
-
-static const struct super_operations btrfs_super_ops = {
- .drop_inode = btrfs_drop_inode,
- .evict_inode = btrfs_evict_inode,
- .put_super = btrfs_put_super,
- .sync_fs = btrfs_sync_fs,
- .show_options = btrfs_show_options,
- .write_inode = btrfs_write_inode,
- .dirty_inode = btrfs_fs_dirty_inode,
- .alloc_inode = btrfs_alloc_inode,
- .destroy_inode = btrfs_destroy_inode,
- .statfs = btrfs_statfs,
- .remount_fs = btrfs_remount,
- .freeze_fs = btrfs_freeze,
- .unfreeze_fs = btrfs_unfreeze,
-};
-
-static const struct file_operations btrfs_ctl_fops = {
- .unlocked_ioctl = btrfs_control_ioctl,
- .compat_ioctl = btrfs_control_ioctl,
- .owner = THIS_MODULE,
- .llseek = noop_llseek,
-};
-
-static struct miscdevice btrfs_misc = {
- .minor = BTRFS_MINOR,
- .name = "btrfs-control",
- .fops = &btrfs_ctl_fops
-};
-
-MODULE_ALIAS_MISCDEV(BTRFS_MINOR);
-MODULE_ALIAS("devname:btrfs-control");
-
-static int btrfs_interface_init(void)
-{
- return misc_register(&btrfs_misc);
-}
-
-static void btrfs_interface_exit(void)
-{
- if (misc_deregister(&btrfs_misc) < 0)
- printk(KERN_INFO "misc_deregister failed for control device");
-}
-
-static int __init init_btrfs_fs(void)
-{
- int err;
-
- err = btrfs_init_sysfs();
- if (err)
- return err;
-
- btrfs_init_compress();
-
- err = btrfs_init_cachep();
- if (err)
- goto free_compress;
-
- err = extent_io_init();
- if (err)
- goto free_cachep;
-
- err = extent_map_init();
- if (err)
- goto free_extent_io;
-
- err = btrfs_delayed_inode_init();
- if (err)
- goto free_extent_map;
-
- err = btrfs_interface_init();
- if (err)
- goto free_delayed_inode;
-
- err = register_filesystem(&btrfs_fs_type);
- if (err)
- goto unregister_ioctl;
-
- btrfs_init_lockdep();
-
- printk(KERN_INFO "%s loaded\n", BTRFS_BUILD_VERSION);
- return 0;
-
-unregister_ioctl:
- btrfs_interface_exit();
-free_delayed_inode:
- btrfs_delayed_inode_exit();
-free_extent_map:
- extent_map_exit();
-free_extent_io:
- extent_io_exit();
-free_cachep:
- btrfs_destroy_cachep();
-free_compress:
- btrfs_exit_compress();
- btrfs_exit_sysfs();
- return err;
-}
-
-static void __exit exit_btrfs_fs(void)
-{
- btrfs_destroy_cachep();
- btrfs_delayed_inode_exit();
- extent_map_exit();
- extent_io_exit();
- btrfs_interface_exit();
- unregister_filesystem(&btrfs_fs_type);
- btrfs_exit_sysfs();
- btrfs_cleanup_fs_uuids();
- btrfs_exit_compress();
-}
-
-module_init(init_btrfs_fs)
-module_exit(exit_btrfs_fs)
-
-MODULE_LICENSE("GPL");
diff --git a/ANDROID_3.4.5/fs/btrfs/sysfs.c b/ANDROID_3.4.5/fs/btrfs/sysfs.c
deleted file mode 100644
index daac9ae6..00000000
--- a/ANDROID_3.4.5/fs/btrfs/sysfs.c
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
- * Copyright (C) 2007 Oracle. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License v2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
- */
-
-#include <linux/sched.h>
-#include <linux/slab.h>
-#include <linux/spinlock.h>
-#include <linux/completion.h>
-#include <linux/buffer_head.h>
-#include <linux/module.h>
-#include <linux/kobject.h>
-
-#include "ctree.h"
-#include "disk-io.h"
-#include "transaction.h"
-
-/* /sys/fs/btrfs/ entry */
-static struct kset *btrfs_kset;
-
-int btrfs_init_sysfs(void)
-{
- btrfs_kset = kset_create_and_add("btrfs", NULL, fs_kobj);
- if (!btrfs_kset)
- return -ENOMEM;
- return 0;
-}
-
-void btrfs_exit_sysfs(void)
-{
- kset_unregister(btrfs_kset);
-}
-
diff --git a/ANDROID_3.4.5/fs/btrfs/transaction.c b/ANDROID_3.4.5/fs/btrfs/transaction.c
deleted file mode 100644
index 36422254..00000000
--- a/ANDROID_3.4.5/fs/btrfs/transaction.c
+++ /dev/null
@@ -1,1539 +0,0 @@
-/*
- * Copyright (C) 2007 Oracle. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License v2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
- */
-
-#include <linux/fs.h>
-#include <linux/slab.h>
-#include <linux/sched.h>
-#include <linux/writeback.h>
-#include <linux/pagemap.h>
-#include <linux/blkdev.h>
-#include "ctree.h"
-#include "disk-io.h"
-#include "transaction.h"
-#include "locking.h"
-#include "tree-log.h"
-#include "inode-map.h"
-
-#define BTRFS_ROOT_TRANS_TAG 0
-
-void put_transaction(struct btrfs_transaction *transaction)
-{
- WARN_ON(atomic_read(&transaction->use_count) == 0);
- if (atomic_dec_and_test(&transaction->use_count)) {
- BUG_ON(!list_empty(&transaction->list));
- WARN_ON(transaction->delayed_refs.root.rb_node);
- WARN_ON(!list_empty(&transaction->delayed_refs.seq_head));
- memset(transaction, 0, sizeof(*transaction));
- kmem_cache_free(btrfs_transaction_cachep, transaction);
- }
-}
-
-static noinline void switch_commit_root(struct btrfs_root *root)
-{
- free_extent_buffer(root->commit_root);
- root->commit_root = btrfs_root_node(root);
-}
-
-/*
- * either allocate a new transaction or hop into the existing one
- */
-static noinline int join_transaction(struct btrfs_root *root, int nofail)
-{
- struct btrfs_transaction *cur_trans;
-
- spin_lock(&root->fs_info->trans_lock);
-loop:
- /* The file system has been taken offline. No new transactions. */
- if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) {
- spin_unlock(&root->fs_info->trans_lock);
- return -EROFS;
- }
-
- if (root->fs_info->trans_no_join) {
- if (!nofail) {
- spin_unlock(&root->fs_info->trans_lock);
- return -EBUSY;
- }
- }
-
- cur_trans = root->fs_info->running_transaction;
- if (cur_trans) {
- if (cur_trans->aborted) {
- spin_unlock(&root->fs_info->trans_lock);
- return cur_trans->aborted;
- }
- atomic_inc(&cur_trans->use_count);
- atomic_inc(&cur_trans->num_writers);
- cur_trans->num_joined++;
- spin_unlock(&root->fs_info->trans_lock);
- return 0;
- }
- spin_unlock(&root->fs_info->trans_lock);
-
- cur_trans = kmem_cache_alloc(btrfs_transaction_cachep, GFP_NOFS);
- if (!cur_trans)
- return -ENOMEM;
-
- spin_lock(&root->fs_info->trans_lock);
- if (root->fs_info->running_transaction) {
- /*
- * someone started a transaction after we unlocked. Make sure
- * to redo the trans_no_join checks above
- */
- kmem_cache_free(btrfs_transaction_cachep, cur_trans);
- cur_trans = root->fs_info->running_transaction;
- goto loop;
- }
-
- atomic_set(&cur_trans->num_writers, 1);
- cur_trans->num_joined = 0;
- init_waitqueue_head(&cur_trans->writer_wait);
- init_waitqueue_head(&cur_trans->commit_wait);
- cur_trans->in_commit = 0;
- cur_trans->blocked = 0;
- /*
- * One for this trans handle, one so it will live on until we
- * commit the transaction.
- */
- atomic_set(&cur_trans->use_count, 2);
- cur_trans->commit_done = 0;
- cur_trans->start_time = get_seconds();
-
- cur_trans->delayed_refs.root = RB_ROOT;
- cur_trans->delayed_refs.num_entries = 0;
- cur_trans->delayed_refs.num_heads_ready = 0;
- cur_trans->delayed_refs.num_heads = 0;
- cur_trans->delayed_refs.flushing = 0;
- cur_trans->delayed_refs.run_delayed_start = 0;
- cur_trans->delayed_refs.seq = 1;
- init_waitqueue_head(&cur_trans->delayed_refs.seq_wait);
- spin_lock_init(&cur_trans->commit_lock);
- spin_lock_init(&cur_trans->delayed_refs.lock);
- INIT_LIST_HEAD(&cur_trans->delayed_refs.seq_head);
-
- INIT_LIST_HEAD(&cur_trans->pending_snapshots);
- list_add_tail(&cur_trans->list, &root->fs_info->trans_list);
- extent_io_tree_init(&cur_trans->dirty_pages,
- root->fs_info->btree_inode->i_mapping);
- root->fs_info->generation++;
- cur_trans->transid = root->fs_info->generation;
- root->fs_info->running_transaction = cur_trans;
- cur_trans->aborted = 0;
- spin_unlock(&root->fs_info->trans_lock);
-
- return 0;
-}
-
-/*
- * this does all the record keeping required to make sure that a reference
- * counted root is properly recorded in a given transaction. This is required
- * to make sure the old root from before we joined the transaction is deleted
- * when the transaction commits
- */
-static int record_root_in_trans(struct btrfs_trans_handle *trans,
- struct btrfs_root *root)
-{
- if (root->ref_cows && root->last_trans < trans->transid) {
- WARN_ON(root == root->fs_info->extent_root);
- WARN_ON(root->commit_root != root->node);
-
- /*
- * see below for in_trans_setup usage rules
- * we have the reloc mutex held now, so there
- * is only one writer in this function
- */
- root->in_trans_setup = 1;
-
- /* make sure readers find in_trans_setup before
- * they find our root->last_trans update
- */
- smp_wmb();
-
- spin_lock(&root->fs_info->fs_roots_radix_lock);
- if (root->last_trans == trans->transid) {
- spin_unlock(&root->fs_info->fs_roots_radix_lock);
- return 0;
- }
- radix_tree_tag_set(&root->fs_info->fs_roots_radix,
- (unsigned long)root->root_key.objectid,
- BTRFS_ROOT_TRANS_TAG);
- spin_unlock(&root->fs_info->fs_roots_radix_lock);
- root->last_trans = trans->transid;
-
- /* this is pretty tricky. We don't want to
- * take the relocation lock in btrfs_record_root_in_trans
- * unless we're really doing the first setup for this root in
- * this transaction.
- *
- * Normally we'd use root->last_trans as a flag to decide
- * if we want to take the expensive mutex.
- *
- * But, we have to set root->last_trans before we
- * init the relocation root, otherwise, we trip over warnings
- * in ctree.c. The solution used here is to flag ourselves
- * with root->in_trans_setup. When this is 1, we're still
- * fixing up the reloc trees and everyone must wait.
- *
- * When this is zero, they can trust root->last_trans and fly
- * through btrfs_record_root_in_trans without having to take the
- * lock. smp_wmb() makes sure that all the writes above are
- * done before we pop in the zero below
- */
- btrfs_init_reloc_root(trans, root);
- smp_wmb();
- root->in_trans_setup = 0;
- }
- return 0;
-}
-
-
-int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans,
- struct btrfs_root *root)
-{
- if (!root->ref_cows)
- return 0;
-
- /*
- * see record_root_in_trans for comments about in_trans_setup usage
- * and barriers
- */
- smp_rmb();
- if (root->last_trans == trans->transid &&
- !root->in_trans_setup)
- return 0;
-
- mutex_lock(&root->fs_info->reloc_mutex);
- record_root_in_trans(trans, root);
- mutex_unlock(&root->fs_info->reloc_mutex);
-
- return 0;
-}
-
-/* wait for commit against the current transaction to become unblocked
- * when this is done, it is safe to start a new transaction, but the current
- * transaction might not be fully on disk.
- */
-static void wait_current_trans(struct btrfs_root *root)
-{
- struct btrfs_transaction *cur_trans;
-
- spin_lock(&root->fs_info->trans_lock);
- cur_trans = root->fs_info->running_transaction;
- if (cur_trans && cur_trans->blocked) {
- atomic_inc(&cur_trans->use_count);
- spin_unlock(&root->fs_info->trans_lock);
-
- wait_event(root->fs_info->transaction_wait,
- !cur_trans->blocked);
- put_transaction(cur_trans);
- } else {
- spin_unlock(&root->fs_info->trans_lock);
- }
-}
-
-enum btrfs_trans_type {
- TRANS_START,
- TRANS_JOIN,
- TRANS_USERSPACE,
- TRANS_JOIN_NOLOCK,
-};
-
-static int may_wait_transaction(struct btrfs_root *root, int type)
-{
- if (root->fs_info->log_root_recovering)
- return 0;
-
- if (type == TRANS_USERSPACE)
- return 1;
-
- if (type == TRANS_START &&
- !atomic_read(&root->fs_info->open_ioctl_trans))
- return 1;
-
- return 0;
-}
-
-static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root,
- u64 num_items, int type)
-{
- struct btrfs_trans_handle *h;
- struct btrfs_transaction *cur_trans;
- u64 num_bytes = 0;
- int ret;
-
- if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR)
- return ERR_PTR(-EROFS);
-
- if (current->journal_info) {
- WARN_ON(type != TRANS_JOIN && type != TRANS_JOIN_NOLOCK);
- h = current->journal_info;
- h->use_count++;
- h->orig_rsv = h->block_rsv;
- h->block_rsv = NULL;
- goto got_it;
- }
-
- /*
- * Do the reservation before we join the transaction so we can do all
- * the appropriate flushing if need be.
- */
- if (num_items > 0 && root != root->fs_info->chunk_root) {
- num_bytes = btrfs_calc_trans_metadata_size(root, num_items);
- ret = btrfs_block_rsv_add(root,
- &root->fs_info->trans_block_rsv,
- num_bytes);
- if (ret)
- return ERR_PTR(ret);
- }
-again:
- h = kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS);
- if (!h)
- return ERR_PTR(-ENOMEM);
-
- if (may_wait_transaction(root, type))
- wait_current_trans(root);
-
- do {
- ret = join_transaction(root, type == TRANS_JOIN_NOLOCK);
- if (ret == -EBUSY)
- wait_current_trans(root);
- } while (ret == -EBUSY);
-
- if (ret < 0) {
- kmem_cache_free(btrfs_trans_handle_cachep, h);
- return ERR_PTR(ret);
- }
-
- cur_trans = root->fs_info->running_transaction;
-
- h->transid = cur_trans->transid;
- h->transaction = cur_trans;
- h->blocks_used = 0;
- h->bytes_reserved = 0;
- h->delayed_ref_updates = 0;
- h->use_count = 1;
- h->block_rsv = NULL;
- h->orig_rsv = NULL;
- h->aborted = 0;
-
- smp_mb();
- if (cur_trans->blocked && may_wait_transaction(root, type)) {
- btrfs_commit_transaction(h, root);
- goto again;
- }
-
- if (num_bytes) {
- trace_btrfs_space_reservation(root->fs_info, "transaction",
- h->transid, num_bytes, 1);
- h->block_rsv = &root->fs_info->trans_block_rsv;
- h->bytes_reserved = num_bytes;
- }
-
-got_it:
- btrfs_record_root_in_trans(h, root);
-
- if (!current->journal_info && type != TRANS_USERSPACE)
- current->journal_info = h;
- return h;
-}
-
-struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root,
- int num_items)
-{
- return start_transaction(root, num_items, TRANS_START);
-}
-struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root)
-{
- return start_transaction(root, 0, TRANS_JOIN);
-}
-
-struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root)
-{
- return start_transaction(root, 0, TRANS_JOIN_NOLOCK);
-}
-
-struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *root)
-{
- return start_transaction(root, 0, TRANS_USERSPACE);
-}
-
-/* wait for a transaction commit to be fully complete */
-static noinline void wait_for_commit(struct btrfs_root *root,
- struct btrfs_transaction *commit)
-{
- wait_event(commit->commit_wait, commit->commit_done);
-}
-
-int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid)
-{
- struct btrfs_transaction *cur_trans = NULL, *t;
- int ret;
-
- ret = 0;
- if (transid) {
- if (transid <= root->fs_info->last_trans_committed)
- goto out;
-
- /* find specified transaction */
- spin_lock(&root->fs_info->trans_lock);
- list_for_each_entry(t, &root->fs_info->trans_list, list) {
- if (t->transid == transid) {
- cur_trans = t;
- atomic_inc(&cur_trans->use_count);
- break;
- }
- if (t->transid > transid)
- break;
- }
- spin_unlock(&root->fs_info->trans_lock);
- ret = -EINVAL;
- if (!cur_trans)
- goto out; /* bad transid */
- } else {
- /* find newest transaction that is committing | committed */
- spin_lock(&root->fs_info->trans_lock);
- list_for_each_entry_reverse(t, &root->fs_info->trans_list,
- list) {
- if (t->in_commit) {
- if (t->commit_done)
- break;
- cur_trans = t;
- atomic_inc(&cur_trans->use_count);
- break;
- }
- }
- spin_unlock(&root->fs_info->trans_lock);
- if (!cur_trans)
- goto out; /* nothing committing|committed */
- }
-
- wait_for_commit(root, cur_trans);
-
- put_transaction(cur_trans);
- ret = 0;
-out:
- return ret;
-}
-
-void btrfs_throttle(struct btrfs_root *root)
-{
- if (!atomic_read(&root->fs_info->open_ioctl_trans))
- wait_current_trans(root);
-}
-
-static int should_end_transaction(struct btrfs_trans_handle *trans,
- struct btrfs_root *root)
-{
- int ret;
-
- ret = btrfs_block_rsv_check(root, &root->fs_info->global_block_rsv, 5);
- return ret ? 1 : 0;
-}
-
-int btrfs_should_end_transaction(struct btrfs_trans_handle *trans,
- struct btrfs_root *root)
-{
- struct btrfs_transaction *cur_trans = trans->transaction;
- struct btrfs_block_rsv *rsv = trans->block_rsv;
- int updates;
- int err;
-
- smp_mb();
- if (cur_trans->blocked || cur_trans->delayed_refs.flushing)
- return 1;
-
- /*
- * We need to do this in case we're deleting csums so the global block
- * rsv get's used instead of the csum block rsv.
- */
- trans->block_rsv = NULL;
-
- updates = trans->delayed_ref_updates;
- trans->delayed_ref_updates = 0;
- if (updates) {
- err = btrfs_run_delayed_refs(trans, root, updates);
- if (err) /* Error code will also eval true */
- return err;
- }
-
- trans->block_rsv = rsv;
-
- return should_end_transaction(trans, root);
-}
-
-static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, int throttle, int lock)
-{
- struct btrfs_transaction *cur_trans = trans->transaction;
- struct btrfs_fs_info *info = root->fs_info;
- int count = 0;
- int err = 0;
-
- if (--trans->use_count) {
- trans->block_rsv = trans->orig_rsv;
- return 0;
- }
-
- btrfs_trans_release_metadata(trans, root);
- trans->block_rsv = NULL;
- while (count < 2) {
- unsigned long cur = trans->delayed_ref_updates;
- trans->delayed_ref_updates = 0;
- if (cur &&
- trans->transaction->delayed_refs.num_heads_ready > 64) {
- trans->delayed_ref_updates = 0;
- btrfs_run_delayed_refs(trans, root, cur);
- } else {
- break;
- }
- count++;
- }
-
- if (lock && !atomic_read(&root->fs_info->open_ioctl_trans) &&
- should_end_transaction(trans, root)) {
- trans->transaction->blocked = 1;
- smp_wmb();
- }
-
- if (lock && cur_trans->blocked && !cur_trans->in_commit) {
- if (throttle) {
- /*
- * We may race with somebody else here so end up having
- * to call end_transaction on ourselves again, so inc
- * our use_count.
- */
- trans->use_count++;
- return btrfs_commit_transaction(trans, root);
- } else {
- wake_up_process(info->transaction_kthread);
- }
- }
-
- WARN_ON(cur_trans != info->running_transaction);
- WARN_ON(atomic_read(&cur_trans->num_writers) < 1);
- atomic_dec(&cur_trans->num_writers);
-
- smp_mb();
- if (waitqueue_active(&cur_trans->writer_wait))
- wake_up(&cur_trans->writer_wait);
- put_transaction(cur_trans);
-
- if (current->journal_info == trans)
- current->journal_info = NULL;
-
- if (throttle)
- btrfs_run_delayed_iputs(root);
-
- if (trans->aborted ||
- root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) {
- err = -EIO;
- }
-
- memset(trans, 0, sizeof(*trans));
- kmem_cache_free(btrfs_trans_handle_cachep, trans);
- return err;
-}
-
-int btrfs_end_transaction(struct btrfs_trans_handle *trans,
- struct btrfs_root *root)
-{
- int ret;
-
- ret = __btrfs_end_transaction(trans, root, 0, 1);
- if (ret)
- return ret;
- return 0;
-}
-
-int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans,
- struct btrfs_root *root)
-{
- int ret;
-
- ret = __btrfs_end_transaction(trans, root, 1, 1);
- if (ret)
- return ret;
- return 0;
-}
-
-int btrfs_end_transaction_nolock(struct btrfs_trans_handle *trans,
- struct btrfs_root *root)
-{
- int ret;
-
- ret = __btrfs_end_transaction(trans, root, 0, 0);
- if (ret)
- return ret;
- return 0;
-}
-
-int btrfs_end_transaction_dmeta(struct btrfs_trans_handle *trans,
- struct btrfs_root *root)
-{
- return __btrfs_end_transaction(trans, root, 1, 1);
-}
-
-/*
- * when btree blocks are allocated, they have some corresponding bits set for
- * them in one of two extent_io trees. This is used to make sure all of
- * those extents are sent to disk but does not wait on them
- */
-int btrfs_write_marked_extents(struct btrfs_root *root,
- struct extent_io_tree *dirty_pages, int mark)
-{
- int err = 0;
- int werr = 0;
- struct address_space *mapping = root->fs_info->btree_inode->i_mapping;
- u64 start = 0;
- u64 end;
-
- while (!find_first_extent_bit(dirty_pages, start, &start, &end,
- mark)) {
- convert_extent_bit(dirty_pages, start, end, EXTENT_NEED_WAIT, mark,
- GFP_NOFS);
- err = filemap_fdatawrite_range(mapping, start, end);
- if (err)
- werr = err;
- cond_resched();
- start = end + 1;
- }
- if (err)
- werr = err;
- return werr;
-}
-
-/*
- * when btree blocks are allocated, they have some corresponding bits set for
- * them in one of two extent_io trees. This is used to make sure all of
- * those extents are on disk for transaction or log commit. We wait
- * on all the pages and clear them from the dirty pages state tree
- */
-int btrfs_wait_marked_extents(struct btrfs_root *root,
- struct extent_io_tree *dirty_pages, int mark)
-{
- int err = 0;
- int werr = 0;
- struct address_space *mapping = root->fs_info->btree_inode->i_mapping;
- u64 start = 0;
- u64 end;
-
- while (!find_first_extent_bit(dirty_pages, start, &start, &end,
- EXTENT_NEED_WAIT)) {
- clear_extent_bits(dirty_pages, start, end, EXTENT_NEED_WAIT, GFP_NOFS);
- err = filemap_fdatawait_range(mapping, start, end);
- if (err)
- werr = err;
- cond_resched();
- start = end + 1;
- }
- if (err)
- werr = err;
- return werr;
-}
-
-/*
- * when btree blocks are allocated, they have some corresponding bits set for
- * them in one of two extent_io trees. This is used to make sure all of
- * those extents are on disk for transaction or log commit
- */
-int btrfs_write_and_wait_marked_extents(struct btrfs_root *root,
- struct extent_io_tree *dirty_pages, int mark)
-{
- int ret;
- int ret2;
-
- ret = btrfs_write_marked_extents(root, dirty_pages, mark);
- ret2 = btrfs_wait_marked_extents(root, dirty_pages, mark);
-
- if (ret)
- return ret;
- if (ret2)
- return ret2;
- return 0;
-}
-
-int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans,
- struct btrfs_root *root)
-{
- if (!trans || !trans->transaction) {
- struct inode *btree_inode;
- btree_inode = root->fs_info->btree_inode;
- return filemap_write_and_wait(btree_inode->i_mapping);
- }
- return btrfs_write_and_wait_marked_extents(root,
- &trans->transaction->dirty_pages,
- EXTENT_DIRTY);
-}
-
-/*
- * this is used to update the root pointer in the tree of tree roots.
- *
- * But, in the case of the extent allocation tree, updating the root
- * pointer may allocate blocks which may change the root of the extent
- * allocation tree.
- *
- * So, this loops and repeats and makes sure the cowonly root didn't
- * change while the root pointer was being updated in the metadata.
- */
-static int update_cowonly_root(struct btrfs_trans_handle *trans,
- struct btrfs_root *root)
-{
- int ret;
- u64 old_root_bytenr;
- u64 old_root_used;
- struct btrfs_root *tree_root = root->fs_info->tree_root;
-
- old_root_used = btrfs_root_used(&root->root_item);
- btrfs_write_dirty_block_groups(trans, root);
-
- while (1) {
- old_root_bytenr = btrfs_root_bytenr(&root->root_item);
- if (old_root_bytenr == root->node->start &&
- old_root_used == btrfs_root_used(&root->root_item))
- break;
-
- btrfs_set_root_node(&root->root_item, root->node);
- ret = btrfs_update_root(trans, tree_root,
- &root->root_key,
- &root->root_item);
- if (ret)
- return ret;
-
- old_root_used = btrfs_root_used(&root->root_item);
- ret = btrfs_write_dirty_block_groups(trans, root);
- if (ret)
- return ret;
- }
-
- if (root != root->fs_info->extent_root)
- switch_commit_root(root);
-
- return 0;
-}
-
-/*
- * update all the cowonly tree roots on disk
- *
- * The error handling in this function may not be obvious. Any of the
- * failures will cause the file system to go offline. We still need
- * to clean up the delayed refs.
- */
-static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans,
- struct btrfs_root *root)
-{
- struct btrfs_fs_info *fs_info = root->fs_info;
- struct list_head *next;
- struct extent_buffer *eb;
- int ret;
-
- ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
- if (ret)
- return ret;
-
- eb = btrfs_lock_root_node(fs_info->tree_root);
- ret = btrfs_cow_block(trans, fs_info->tree_root, eb, NULL,
- 0, &eb);
- btrfs_tree_unlock(eb);
- free_extent_buffer(eb);
-
- if (ret)
- return ret;
-
- ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
- if (ret)
- return ret;
-
- while (!list_empty(&fs_info->dirty_cowonly_roots)) {
- next = fs_info->dirty_cowonly_roots.next;
- list_del_init(next);
- root = list_entry(next, struct btrfs_root, dirty_list);
-
- ret = update_cowonly_root(trans, root);
- if (ret)
- return ret;
- }
-
- down_write(&fs_info->extent_commit_sem);
- switch_commit_root(fs_info->extent_root);
- up_write(&fs_info->extent_commit_sem);
-
- return 0;
-}
-
-/*
- * dead roots are old snapshots that need to be deleted. This allocates
- * a dirty root struct and adds it into the list of dead roots that need to
- * be deleted
- */
-int btrfs_add_dead_root(struct btrfs_root *root)
-{
- spin_lock(&root->fs_info->trans_lock);
- list_add(&root->root_list, &root->fs_info->dead_roots);
- spin_unlock(&root->fs_info->trans_lock);
- return 0;
-}
-
-/*
- * update all the cowonly tree roots on disk
- */
-static noinline int commit_fs_roots(struct btrfs_trans_handle *trans,
- struct btrfs_root *root)
-{
- struct btrfs_root *gang[8];
- struct btrfs_fs_info *fs_info = root->fs_info;
- int i;
- int ret;
- int err = 0;
-
- spin_lock(&fs_info->fs_roots_radix_lock);
- while (1) {
- ret = radix_tree_gang_lookup_tag(&fs_info->fs_roots_radix,
- (void **)gang, 0,
- ARRAY_SIZE(gang),
- BTRFS_ROOT_TRANS_TAG);
- if (ret == 0)
- break;
- for (i = 0; i < ret; i++) {
- root = gang[i];
- radix_tree_tag_clear(&fs_info->fs_roots_radix,
- (unsigned long)root->root_key.objectid,
- BTRFS_ROOT_TRANS_TAG);
- spin_unlock(&fs_info->fs_roots_radix_lock);
-
- btrfs_free_log(trans, root);
- btrfs_update_reloc_root(trans, root);
- btrfs_orphan_commit_root(trans, root);
-
- btrfs_save_ino_cache(root, trans);
-
- /* see comments in should_cow_block() */
- root->force_cow = 0;
- smp_wmb();
-
- if (root->commit_root != root->node) {
- mutex_lock(&root->fs_commit_mutex);
- switch_commit_root(root);
- btrfs_unpin_free_ino(root);
- mutex_unlock(&root->fs_commit_mutex);
-
- btrfs_set_root_node(&root->root_item,
- root->node);
- }
-
- err = btrfs_update_root(trans, fs_info->tree_root,
- &root->root_key,
- &root->root_item);
- spin_lock(&fs_info->fs_roots_radix_lock);
- if (err)
- break;
- }
- }
- spin_unlock(&fs_info->fs_roots_radix_lock);
- return err;
-}
-
-/*
- * defrag a given btree. If cacheonly == 1, this won't read from the disk,
- * otherwise every leaf in the btree is read and defragged.
- */
-int btrfs_defrag_root(struct btrfs_root *root, int cacheonly)
-{
- struct btrfs_fs_info *info = root->fs_info;
- struct btrfs_trans_handle *trans;
- int ret;
- unsigned long nr;
-
- if (xchg(&root->defrag_running, 1))
- return 0;
-
- while (1) {
- trans = btrfs_start_transaction(root, 0);
- if (IS_ERR(trans))
- return PTR_ERR(trans);
-
- ret = btrfs_defrag_leaves(trans, root, cacheonly);
-
- nr = trans->blocks_used;
- btrfs_end_transaction(trans, root);
- btrfs_btree_balance_dirty(info->tree_root, nr);
- cond_resched();
-
- if (btrfs_fs_closing(root->fs_info) || ret != -EAGAIN)
- break;
- }
- root->defrag_running = 0;
- return ret;
-}
-
-/*
- * new snapshots need to be created at a very specific time in the
- * transaction commit. This does the actual creation
- */
-static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info,
- struct btrfs_pending_snapshot *pending)
-{
- struct btrfs_key key;
- struct btrfs_root_item *new_root_item;
- struct btrfs_root *tree_root = fs_info->tree_root;
- struct btrfs_root *root = pending->root;
- struct btrfs_root *parent_root;
- struct btrfs_block_rsv *rsv;
- struct inode *parent_inode;
- struct dentry *parent;
- struct dentry *dentry;
- struct extent_buffer *tmp;
- struct extent_buffer *old;
- int ret;
- u64 to_reserve = 0;
- u64 index = 0;
- u64 objectid;
- u64 root_flags;
-
- rsv = trans->block_rsv;
-
- new_root_item = kmalloc(sizeof(*new_root_item), GFP_NOFS);
- if (!new_root_item) {
- ret = pending->error = -ENOMEM;
- goto fail;
- }
-
- ret = btrfs_find_free_objectid(tree_root, &objectid);
- if (ret) {
- pending->error = ret;
- goto fail;
- }
-
- btrfs_reloc_pre_snapshot(trans, pending, &to_reserve);
-
- if (to_reserve > 0) {
- ret = btrfs_block_rsv_add_noflush(root, &pending->block_rsv,
- to_reserve);
- if (ret) {
- pending->error = ret;
- goto fail;
- }
- }
-
- key.objectid = objectid;
- key.offset = (u64)-1;
- key.type = BTRFS_ROOT_ITEM_KEY;
-
- trans->block_rsv = &pending->block_rsv;
-
- dentry = pending->dentry;
- parent = dget_parent(dentry);
- parent_inode = parent->d_inode;
- parent_root = BTRFS_I(parent_inode)->root;
- record_root_in_trans(trans, parent_root);
-
- /*
- * insert the directory item
- */
- ret = btrfs_set_inode_index(parent_inode, &index);
- BUG_ON(ret); /* -ENOMEM */
- ret = btrfs_insert_dir_item(trans, parent_root,
- dentry->d_name.name, dentry->d_name.len,
- parent_inode, &key,
- BTRFS_FT_DIR, index);
- if (ret == -EEXIST) {
- pending->error = -EEXIST;
- dput(parent);
- goto fail;
- } else if (ret) {
- goto abort_trans_dput;
- }
-
- btrfs_i_size_write(parent_inode, parent_inode->i_size +
- dentry->d_name.len * 2);
- ret = btrfs_update_inode(trans, parent_root, parent_inode);
- if (ret)
- goto abort_trans_dput;
-
- /*
- * pull in the delayed directory update
- * and the delayed inode item
- * otherwise we corrupt the FS during
- * snapshot
- */
- ret = btrfs_run_delayed_items(trans, root);
- if (ret) { /* Transaction aborted */
- dput(parent);
- goto fail;
- }
-
- record_root_in_trans(trans, root);
- btrfs_set_root_last_snapshot(&root->root_item, trans->transid);
- memcpy(new_root_item, &root->root_item, sizeof(*new_root_item));
- btrfs_check_and_init_root_item(new_root_item);
-
- root_flags = btrfs_root_flags(new_root_item);
- if (pending->readonly)
- root_flags |= BTRFS_ROOT_SUBVOL_RDONLY;
- else
- root_flags &= ~BTRFS_ROOT_SUBVOL_RDONLY;
- btrfs_set_root_flags(new_root_item, root_flags);
-
- old = btrfs_lock_root_node(root);
- ret = btrfs_cow_block(trans, root, old, NULL, 0, &old);
- if (ret) {
- btrfs_tree_unlock(old);
- free_extent_buffer(old);
- goto abort_trans_dput;
- }
-
- btrfs_set_lock_blocking(old);
-
- ret = btrfs_copy_root(trans, root, old, &tmp, objectid);
- /* clean up in any case */
- btrfs_tree_unlock(old);
- free_extent_buffer(old);
- if (ret)
- goto abort_trans_dput;
-
- /* see comments in should_cow_block() */
- root->force_cow = 1;
- smp_wmb();
-
- btrfs_set_root_node(new_root_item, tmp);
- /* record when the snapshot was created in key.offset */
- key.offset = trans->transid;
- ret = btrfs_insert_root(trans, tree_root, &key, new_root_item);
- btrfs_tree_unlock(tmp);
- free_extent_buffer(tmp);
- if (ret)
- goto abort_trans_dput;
-
- /*
- * insert root back/forward references
- */
- ret = btrfs_add_root_ref(trans, tree_root, objectid,
- parent_root->root_key.objectid,
- btrfs_ino(parent_inode), index,
- dentry->d_name.name, dentry->d_name.len);
- dput(parent);
- if (ret)
- goto fail;
-
- key.offset = (u64)-1;
- pending->snap = btrfs_read_fs_root_no_name(root->fs_info, &key);
- if (IS_ERR(pending->snap)) {
- ret = PTR_ERR(pending->snap);
- goto abort_trans;
- }
-
- ret = btrfs_reloc_post_snapshot(trans, pending);
- if (ret)
- goto abort_trans;
- ret = 0;
-fail:
- kfree(new_root_item);
- trans->block_rsv = rsv;
- btrfs_block_rsv_release(root, &pending->block_rsv, (u64)-1);
- return ret;
-
-abort_trans_dput:
- dput(parent);
-abort_trans:
- btrfs_abort_transaction(trans, root, ret);
- goto fail;
-}
-
-/*
- * create all the snapshots we've scheduled for creation
- */
-static noinline int create_pending_snapshots(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info)
-{
- struct btrfs_pending_snapshot *pending;
- struct list_head *head = &trans->transaction->pending_snapshots;
-
- list_for_each_entry(pending, head, list)
- create_pending_snapshot(trans, fs_info, pending);
- return 0;
-}
-
-static void update_super_roots(struct btrfs_root *root)
-{
- struct btrfs_root_item *root_item;
- struct btrfs_super_block *super;
-
- super = root->fs_info->super_copy;
-
- root_item = &root->fs_info->chunk_root->root_item;
- super->chunk_root = root_item->bytenr;
- super->chunk_root_generation = root_item->generation;
- super->chunk_root_level = root_item->level;
-
- root_item = &root->fs_info->tree_root->root_item;
- super->root = root_item->bytenr;
- super->generation = root_item->generation;
- super->root_level = root_item->level;
- if (btrfs_test_opt(root, SPACE_CACHE))
- super->cache_generation = root_item->generation;
-}
-
-int btrfs_transaction_in_commit(struct btrfs_fs_info *info)
-{
- int ret = 0;
- spin_lock(&info->trans_lock);
- if (info->running_transaction)
- ret = info->running_transaction->in_commit;
- spin_unlock(&info->trans_lock);
- return ret;
-}
-
-int btrfs_transaction_blocked(struct btrfs_fs_info *info)
-{
- int ret = 0;
- spin_lock(&info->trans_lock);
- if (info->running_transaction)
- ret = info->running_transaction->blocked;
- spin_unlock(&info->trans_lock);
- return ret;
-}
-
-/*
- * wait for the current transaction commit to start and block subsequent
- * transaction joins
- */
-static void wait_current_trans_commit_start(struct btrfs_root *root,
- struct btrfs_transaction *trans)
-{
- wait_event(root->fs_info->transaction_blocked_wait, trans->in_commit);
-}
-
-/*
- * wait for the current transaction to start and then become unblocked.
- * caller holds ref.
- */
-static void wait_current_trans_commit_start_and_unblock(struct btrfs_root *root,
- struct btrfs_transaction *trans)
-{
- wait_event(root->fs_info->transaction_wait,
- trans->commit_done || (trans->in_commit && !trans->blocked));
-}
-
-/*
- * commit transactions asynchronously. once btrfs_commit_transaction_async
- * returns, any subsequent transaction will not be allowed to join.
- */
-struct btrfs_async_commit {
- struct btrfs_trans_handle *newtrans;
- struct btrfs_root *root;
- struct delayed_work work;
-};
-
-static void do_async_commit(struct work_struct *work)
-{
- struct btrfs_async_commit *ac =
- container_of(work, struct btrfs_async_commit, work.work);
-
- btrfs_commit_transaction(ac->newtrans, ac->root);
- kfree(ac);
-}
-
-int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- int wait_for_unblock)
-{
- struct btrfs_async_commit *ac;
- struct btrfs_transaction *cur_trans;
-
- ac = kmalloc(sizeof(*ac), GFP_NOFS);
- if (!ac)
- return -ENOMEM;
-
- INIT_DELAYED_WORK(&ac->work, do_async_commit);
- ac->root = root;
- ac->newtrans = btrfs_join_transaction(root);
- if (IS_ERR(ac->newtrans)) {
- int err = PTR_ERR(ac->newtrans);
- kfree(ac);
- return err;
- }
-
- /* take transaction reference */
- cur_trans = trans->transaction;
- atomic_inc(&cur_trans->use_count);
-
- btrfs_end_transaction(trans, root);
- schedule_delayed_work(&ac->work, 0);
-
- /* wait for transaction to start and unblock */
- if (wait_for_unblock)
- wait_current_trans_commit_start_and_unblock(root, cur_trans);
- else
- wait_current_trans_commit_start(root, cur_trans);
-
- if (current->journal_info == trans)
- current->journal_info = NULL;
-
- put_transaction(cur_trans);
- return 0;
-}
-
-
-static void cleanup_transaction(struct btrfs_trans_handle *trans,
- struct btrfs_root *root)
-{
- struct btrfs_transaction *cur_trans = trans->transaction;
-
- WARN_ON(trans->use_count > 1);
-
- spin_lock(&root->fs_info->trans_lock);
- list_del_init(&cur_trans->list);
- spin_unlock(&root->fs_info->trans_lock);
-
- btrfs_cleanup_one_transaction(trans->transaction, root);
-
- put_transaction(cur_trans);
- put_transaction(cur_trans);
-
- trace_btrfs_transaction_commit(root);
-
- btrfs_scrub_continue(root);
-
- if (current->journal_info == trans)
- current->journal_info = NULL;
-
- kmem_cache_free(btrfs_trans_handle_cachep, trans);
-}
-
-/*
- * btrfs_transaction state sequence:
- * in_commit = 0, blocked = 0 (initial)
- * in_commit = 1, blocked = 1
- * blocked = 0
- * commit_done = 1
- */
-int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
- struct btrfs_root *root)
-{
- unsigned long joined = 0;
- struct btrfs_transaction *cur_trans = trans->transaction;
- struct btrfs_transaction *prev_trans = NULL;
- DEFINE_WAIT(wait);
- int ret = -EIO;
- int should_grow = 0;
- unsigned long now = get_seconds();
- int flush_on_commit = btrfs_test_opt(root, FLUSHONCOMMIT);
-
- btrfs_run_ordered_operations(root, 0);
-
- btrfs_trans_release_metadata(trans, root);
- trans->block_rsv = NULL;
-
- if (cur_trans->aborted)
- goto cleanup_transaction;
-
- /* make a pass through all the delayed refs we have so far
- * any runnings procs may add more while we are here
- */
- ret = btrfs_run_delayed_refs(trans, root, 0);
- if (ret)
- goto cleanup_transaction;
-
- cur_trans = trans->transaction;
-
- /*
- * set the flushing flag so procs in this transaction have to
- * start sending their work down.
- */
- cur_trans->delayed_refs.flushing = 1;
-
- ret = btrfs_run_delayed_refs(trans, root, 0);
- if (ret)
- goto cleanup_transaction;
-
- spin_lock(&cur_trans->commit_lock);
- if (cur_trans->in_commit) {
- spin_unlock(&cur_trans->commit_lock);
- atomic_inc(&cur_trans->use_count);
- ret = btrfs_end_transaction(trans, root);
-
- wait_for_commit(root, cur_trans);
-
- put_transaction(cur_trans);
-
- return ret;
- }
-
- trans->transaction->in_commit = 1;
- trans->transaction->blocked = 1;
- spin_unlock(&cur_trans->commit_lock);
- wake_up(&root->fs_info->transaction_blocked_wait);
-
- spin_lock(&root->fs_info->trans_lock);
- if (cur_trans->list.prev != &root->fs_info->trans_list) {
- prev_trans = list_entry(cur_trans->list.prev,
- struct btrfs_transaction, list);
- if (!prev_trans->commit_done) {
- atomic_inc(&prev_trans->use_count);
- spin_unlock(&root->fs_info->trans_lock);
-
- wait_for_commit(root, prev_trans);
-
- put_transaction(prev_trans);
- } else {
- spin_unlock(&root->fs_info->trans_lock);
- }
- } else {
- spin_unlock(&root->fs_info->trans_lock);
- }
-
- if (now < cur_trans->start_time || now - cur_trans->start_time < 1)
- should_grow = 1;
-
- do {
- int snap_pending = 0;
-
- joined = cur_trans->num_joined;
- if (!list_empty(&trans->transaction->pending_snapshots))
- snap_pending = 1;
-
- WARN_ON(cur_trans != trans->transaction);
-
- if (flush_on_commit || snap_pending) {
- btrfs_start_delalloc_inodes(root, 1);
- btrfs_wait_ordered_extents(root, 0, 1);
- }
-
- ret = btrfs_run_delayed_items(trans, root);
- if (ret)
- goto cleanup_transaction;
-
- /*
- * rename don't use btrfs_join_transaction, so, once we
- * set the transaction to blocked above, we aren't going
- * to get any new ordered operations. We can safely run
- * it here and no for sure that nothing new will be added
- * to the list
- */
- btrfs_run_ordered_operations(root, 1);
-
- prepare_to_wait(&cur_trans->writer_wait, &wait,
- TASK_UNINTERRUPTIBLE);
-
- if (atomic_read(&cur_trans->num_writers) > 1)
- schedule_timeout(MAX_SCHEDULE_TIMEOUT);
- else if (should_grow)
- schedule_timeout(1);
-
- finish_wait(&cur_trans->writer_wait, &wait);
- } while (atomic_read(&cur_trans->num_writers) > 1 ||
- (should_grow && cur_trans->num_joined != joined));
-
- /*
- * Ok now we need to make sure to block out any other joins while we
- * commit the transaction. We could have started a join before setting
- * no_join so make sure to wait for num_writers to == 1 again.
- */
- spin_lock(&root->fs_info->trans_lock);
- root->fs_info->trans_no_join = 1;
- spin_unlock(&root->fs_info->trans_lock);
- wait_event(cur_trans->writer_wait,
- atomic_read(&cur_trans->num_writers) == 1);
-
- /*
- * the reloc mutex makes sure that we stop
- * the balancing code from coming in and moving
- * extents around in the middle of the commit
- */
- mutex_lock(&root->fs_info->reloc_mutex);
-
- ret = btrfs_run_delayed_items(trans, root);
- if (ret) {
- mutex_unlock(&root->fs_info->reloc_mutex);
- goto cleanup_transaction;
- }
-
- ret = create_pending_snapshots(trans, root->fs_info);
- if (ret) {
- mutex_unlock(&root->fs_info->reloc_mutex);
- goto cleanup_transaction;
- }
-
- ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
- if (ret) {
- mutex_unlock(&root->fs_info->reloc_mutex);
- goto cleanup_transaction;
- }
-
- /*
- * make sure none of the code above managed to slip in a
- * delayed item
- */
- btrfs_assert_delayed_root_empty(root);
-
- WARN_ON(cur_trans != trans->transaction);
-
- btrfs_scrub_pause(root);
- /* btrfs_commit_tree_roots is responsible for getting the
- * various roots consistent with each other. Every pointer
- * in the tree of tree roots has to point to the most up to date
- * root for every subvolume and other tree. So, we have to keep
- * the tree logging code from jumping in and changing any
- * of the trees.
- *
- * At this point in the commit, there can't be any tree-log
- * writers, but a little lower down we drop the trans mutex
- * and let new people in. By holding the tree_log_mutex
- * from now until after the super is written, we avoid races
- * with the tree-log code.
- */
- mutex_lock(&root->fs_info->tree_log_mutex);
-
- ret = commit_fs_roots(trans, root);
- if (ret) {
- mutex_unlock(&root->fs_info->tree_log_mutex);
- mutex_unlock(&root->fs_info->reloc_mutex);
- goto cleanup_transaction;
- }
-
- /* commit_fs_roots gets rid of all the tree log roots, it is now
- * safe to free the root of tree log roots
- */
- btrfs_free_log_root_tree(trans, root->fs_info);
-
- ret = commit_cowonly_roots(trans, root);
- if (ret) {
- mutex_unlock(&root->fs_info->tree_log_mutex);
- mutex_unlock(&root->fs_info->reloc_mutex);
- goto cleanup_transaction;
- }
-
- btrfs_prepare_extent_commit(trans, root);
-
- cur_trans = root->fs_info->running_transaction;
-
- btrfs_set_root_node(&root->fs_info->tree_root->root_item,
- root->fs_info->tree_root->node);
- switch_commit_root(root->fs_info->tree_root);
-
- btrfs_set_root_node(&root->fs_info->chunk_root->root_item,
- root->fs_info->chunk_root->node);
- switch_commit_root(root->fs_info->chunk_root);
-
- update_super_roots(root);
-
- if (!root->fs_info->log_root_recovering) {
- btrfs_set_super_log_root(root->fs_info->super_copy, 0);
- btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
- }
-
- memcpy(root->fs_info->super_for_commit, root->fs_info->super_copy,
- sizeof(*root->fs_info->super_copy));
-
- trans->transaction->blocked = 0;
- spin_lock(&root->fs_info->trans_lock);
- root->fs_info->running_transaction = NULL;
- root->fs_info->trans_no_join = 0;
- spin_unlock(&root->fs_info->trans_lock);
- mutex_unlock(&root->fs_info->reloc_mutex);
-
- wake_up(&root->fs_info->transaction_wait);
-
- ret = btrfs_write_and_wait_transaction(trans, root);
- if (ret) {
- btrfs_error(root->fs_info, ret,
- "Error while writing out transaction.");
- mutex_unlock(&root->fs_info->tree_log_mutex);
- goto cleanup_transaction;
- }
-
- ret = write_ctree_super(trans, root, 0);
- if (ret) {
- mutex_unlock(&root->fs_info->tree_log_mutex);
- goto cleanup_transaction;
- }
-
- /*
- * the super is written, we can safely allow the tree-loggers
- * to go about their business
- */
- mutex_unlock(&root->fs_info->tree_log_mutex);
-
- btrfs_finish_extent_commit(trans, root);
-
- cur_trans->commit_done = 1;
-
- root->fs_info->last_trans_committed = cur_trans->transid;
-
- wake_up(&cur_trans->commit_wait);
-
- spin_lock(&root->fs_info->trans_lock);
- list_del_init(&cur_trans->list);
- spin_unlock(&root->fs_info->trans_lock);
-
- put_transaction(cur_trans);
- put_transaction(cur_trans);
-
- trace_btrfs_transaction_commit(root);
-
- btrfs_scrub_continue(root);
-
- if (current->journal_info == trans)
- current->journal_info = NULL;
-
- kmem_cache_free(btrfs_trans_handle_cachep, trans);
-
- if (current != root->fs_info->transaction_kthread)
- btrfs_run_delayed_iputs(root);
-
- return ret;
-
-cleanup_transaction:
- btrfs_printk(root->fs_info, "Skipping commit of aborted transaction.\n");
-// WARN_ON(1);
- if (current->journal_info == trans)
- current->journal_info = NULL;
- cleanup_transaction(trans, root);
-
- return ret;
-}
-
-/*
- * interface function to delete all the snapshots we have scheduled for deletion
- */
-int btrfs_clean_old_snapshots(struct btrfs_root *root)
-{
- LIST_HEAD(list);
- struct btrfs_fs_info *fs_info = root->fs_info;
-
- spin_lock(&fs_info->trans_lock);
- list_splice_init(&fs_info->dead_roots, &list);
- spin_unlock(&fs_info->trans_lock);
-
- while (!list_empty(&list)) {
- int ret;
-
- root = list_entry(list.next, struct btrfs_root, root_list);
- list_del(&root->root_list);
-
- btrfs_kill_all_delayed_nodes(root);
-
- if (btrfs_header_backref_rev(root->node) <
- BTRFS_MIXED_BACKREF_REV)
- ret = btrfs_drop_snapshot(root, NULL, 0, 0);
- else
- ret =btrfs_drop_snapshot(root, NULL, 1, 0);
- BUG_ON(ret < 0);
- }
- return 0;
-}
diff --git a/ANDROID_3.4.5/fs/btrfs/transaction.h b/ANDROID_3.4.5/fs/btrfs/transaction.h
deleted file mode 100644
index fe27379e..00000000
--- a/ANDROID_3.4.5/fs/btrfs/transaction.h
+++ /dev/null
@@ -1,120 +0,0 @@
-/*
- * Copyright (C) 2007 Oracle. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License v2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
- */
-
-#ifndef __BTRFS_TRANSACTION__
-#define __BTRFS_TRANSACTION__
-#include "btrfs_inode.h"
-#include "delayed-ref.h"
-
-struct btrfs_transaction {
- u64 transid;
- /*
- * total writers in this transaction, it must be zero before the
- * transaction can end
- */
- atomic_t num_writers;
- atomic_t use_count;
-
- unsigned long num_joined;
-
- spinlock_t commit_lock;
- int in_commit;
- int commit_done;
- int blocked;
- struct list_head list;
- struct extent_io_tree dirty_pages;
- unsigned long start_time;
- wait_queue_head_t writer_wait;
- wait_queue_head_t commit_wait;
- struct list_head pending_snapshots;
- struct btrfs_delayed_ref_root delayed_refs;
- int aborted;
-};
-
-struct btrfs_trans_handle {
- u64 transid;
- u64 bytes_reserved;
- unsigned long use_count;
- unsigned long blocks_reserved;
- unsigned long blocks_used;
- unsigned long delayed_ref_updates;
- struct btrfs_transaction *transaction;
- struct btrfs_block_rsv *block_rsv;
- struct btrfs_block_rsv *orig_rsv;
- int aborted;
-};
-
-struct btrfs_pending_snapshot {
- struct dentry *dentry;
- struct btrfs_root *root;
- struct btrfs_root *snap;
- /* block reservation for the operation */
- struct btrfs_block_rsv block_rsv;
- /* extra metadata reseration for relocation */
- int error;
- bool readonly;
- struct list_head list;
-};
-
-static inline void btrfs_set_inode_last_trans(struct btrfs_trans_handle *trans,
- struct inode *inode)
-{
- BTRFS_I(inode)->last_trans = trans->transaction->transid;
- BTRFS_I(inode)->last_sub_trans = BTRFS_I(inode)->root->log_transid;
-}
-
-int btrfs_end_transaction(struct btrfs_trans_handle *trans,
- struct btrfs_root *root);
-int btrfs_end_transaction_nolock(struct btrfs_trans_handle *trans,
- struct btrfs_root *root);
-struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root,
- int num_items);
-struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root);
-struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root);
-struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *root);
-int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid);
-int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans,
- struct btrfs_root *root);
-
-int btrfs_add_dead_root(struct btrfs_root *root);
-int btrfs_defrag_root(struct btrfs_root *root, int cacheonly);
-int btrfs_clean_old_snapshots(struct btrfs_root *root);
-int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
- struct btrfs_root *root);
-int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- int wait_for_unblock);
-int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans,
- struct btrfs_root *root);
-int btrfs_end_transaction_dmeta(struct btrfs_trans_handle *trans,
- struct btrfs_root *root);
-int btrfs_should_end_transaction(struct btrfs_trans_handle *trans,
- struct btrfs_root *root);
-void btrfs_throttle(struct btrfs_root *root);
-int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans,
- struct btrfs_root *root);
-int btrfs_write_and_wait_marked_extents(struct btrfs_root *root,
- struct extent_io_tree *dirty_pages, int mark);
-int btrfs_write_marked_extents(struct btrfs_root *root,
- struct extent_io_tree *dirty_pages, int mark);
-int btrfs_wait_marked_extents(struct btrfs_root *root,
- struct extent_io_tree *dirty_pages, int mark);
-int btrfs_transaction_blocked(struct btrfs_fs_info *info);
-int btrfs_transaction_in_commit(struct btrfs_fs_info *info);
-void put_transaction(struct btrfs_transaction *transaction);
-#endif
diff --git a/ANDROID_3.4.5/fs/btrfs/tree-defrag.c b/ANDROID_3.4.5/fs/btrfs/tree-defrag.c
deleted file mode 100644
index 3b580ee8..00000000
--- a/ANDROID_3.4.5/fs/btrfs/tree-defrag.c
+++ /dev/null
@@ -1,145 +0,0 @@
-/*
- * Copyright (C) 2007 Oracle. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License v2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
- */
-
-#include <linux/sched.h>
-#include "ctree.h"
-#include "disk-io.h"
-#include "print-tree.h"
-#include "transaction.h"
-#include "locking.h"
-
-/* defrag all the leaves in a given btree. If cache_only == 1, don't read
- * things from disk, otherwise read all the leaves and try to get key order to
- * better reflect disk order
- */
-
-int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, int cache_only)
-{
- struct btrfs_path *path = NULL;
- struct btrfs_key key;
- int ret = 0;
- int wret;
- int level;
- int is_extent = 0;
- int next_key_ret = 0;
- u64 last_ret = 0;
- u64 min_trans = 0;
-
- if (cache_only)
- goto out;
-
- if (root->fs_info->extent_root == root) {
- /*
- * there's recursion here right now in the tree locking,
- * we can't defrag the extent root without deadlock
- */
- goto out;
- }
-
- if (root->ref_cows == 0 && !is_extent)
- goto out;
-
- if (btrfs_test_opt(root, SSD))
- goto out;
-
- path = btrfs_alloc_path();
- if (!path)
- return -ENOMEM;
-
- level = btrfs_header_level(root->node);
-
- if (level == 0)
- goto out;
-
- if (root->defrag_progress.objectid == 0) {
- struct extent_buffer *root_node;
- u32 nritems;
-
- root_node = btrfs_lock_root_node(root);
- btrfs_set_lock_blocking(root_node);
- nritems = btrfs_header_nritems(root_node);
- root->defrag_max.objectid = 0;
- /* from above we know this is not a leaf */
- btrfs_node_key_to_cpu(root_node, &root->defrag_max,
- nritems - 1);
- btrfs_tree_unlock(root_node);
- free_extent_buffer(root_node);
- memset(&key, 0, sizeof(key));
- } else {
- memcpy(&key, &root->defrag_progress, sizeof(key));
- }
-
- path->keep_locks = 1;
- if (cache_only)
- min_trans = root->defrag_trans_start;
-
- ret = btrfs_search_forward(root, &key, NULL, path,
- cache_only, min_trans);
- if (ret < 0)
- goto out;
- if (ret > 0) {
- ret = 0;
- goto out;
- }
- btrfs_release_path(path);
- wret = btrfs_search_slot(trans, root, &key, path, 0, 1);
-
- if (wret < 0) {
- ret = wret;
- goto out;
- }
- if (!path->nodes[1]) {
- ret = 0;
- goto out;
- }
- path->slots[1] = btrfs_header_nritems(path->nodes[1]);
- next_key_ret = btrfs_find_next_key(root, path, &key, 1, cache_only,
- min_trans);
- ret = btrfs_realloc_node(trans, root,
- path->nodes[1], 0,
- cache_only, &last_ret,
- &root->defrag_progress);
- if (ret) {
- WARN_ON(ret == -EAGAIN);
- goto out;
- }
- if (next_key_ret == 0) {
- memcpy(&root->defrag_progress, &key, sizeof(key));
- ret = -EAGAIN;
- }
-out:
- if (path)
- btrfs_free_path(path);
- if (ret == -EAGAIN) {
- if (root->defrag_max.objectid > root->defrag_progress.objectid)
- goto done;
- if (root->defrag_max.type > root->defrag_progress.type)
- goto done;
- if (root->defrag_max.offset > root->defrag_progress.offset)
- goto done;
- ret = 0;
- }
-done:
- if (ret != -EAGAIN) {
- memset(&root->defrag_progress, 0,
- sizeof(root->defrag_progress));
- root->defrag_trans_start = trans->transid;
- }
- return ret;
-}
diff --git a/ANDROID_3.4.5/fs/btrfs/tree-log.c b/ANDROID_3.4.5/fs/btrfs/tree-log.c
deleted file mode 100644
index dce89da9..00000000
--- a/ANDROID_3.4.5/fs/btrfs/tree-log.c
+++ /dev/null
@@ -1,3398 +0,0 @@
-/*
- * Copyright (C) 2008 Oracle. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License v2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
- */
-
-#include <linux/sched.h>
-#include <linux/slab.h>
-#include "ctree.h"
-#include "transaction.h"
-#include "disk-io.h"
-#include "locking.h"
-#include "print-tree.h"
-#include "compat.h"
-#include "tree-log.h"
-
-/* magic values for the inode_only field in btrfs_log_inode:
- *
- * LOG_INODE_ALL means to log everything
- * LOG_INODE_EXISTS means to log just enough to recreate the inode
- * during log replay
- */
-#define LOG_INODE_ALL 0
-#define LOG_INODE_EXISTS 1
-
-/*
- * directory trouble cases
- *
- * 1) on rename or unlink, if the inode being unlinked isn't in the fsync
- * log, we must force a full commit before doing an fsync of the directory
- * where the unlink was done.
- * ---> record transid of last unlink/rename per directory
- *
- * mkdir foo/some_dir
- * normal commit
- * rename foo/some_dir foo2/some_dir
- * mkdir foo/some_dir
- * fsync foo/some_dir/some_file
- *
- * The fsync above will unlink the original some_dir without recording
- * it in its new location (foo2). After a crash, some_dir will be gone
- * unless the fsync of some_file forces a full commit
- *
- * 2) we must log any new names for any file or dir that is in the fsync
- * log. ---> check inode while renaming/linking.
- *
- * 2a) we must log any new names for any file or dir during rename
- * when the directory they are being removed from was logged.
- * ---> check inode and old parent dir during rename
- *
- * 2a is actually the more important variant. With the extra logging
- * a crash might unlink the old name without recreating the new one
- *
- * 3) after a crash, we must go through any directories with a link count
- * of zero and redo the rm -rf
- *
- * mkdir f1/foo
- * normal commit
- * rm -rf f1/foo
- * fsync(f1)
- *
- * The directory f1 was fully removed from the FS, but fsync was never
- * called on f1, only its parent dir. After a crash the rm -rf must
- * be replayed. This must be able to recurse down the entire
- * directory tree. The inode link count fixup code takes care of the
- * ugly details.
- */
-
-/*
- * stages for the tree walking. The first
- * stage (0) is to only pin down the blocks we find
- * the second stage (1) is to make sure that all the inodes
- * we find in the log are created in the subvolume.
- *
- * The last stage is to deal with directories and links and extents
- * and all the other fun semantics
- */
-#define LOG_WALK_PIN_ONLY 0
-#define LOG_WALK_REPLAY_INODES 1
-#define LOG_WALK_REPLAY_ALL 2
-
-static int btrfs_log_inode(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, struct inode *inode,
- int inode_only);
-static int link_to_fixup_dir(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_path *path, u64 objectid);
-static noinline int replay_dir_deletes(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_root *log,
- struct btrfs_path *path,
- u64 dirid, int del_all);
-
-/*
- * tree logging is a special write ahead log used to make sure that
- * fsyncs and O_SYNCs can happen without doing full tree commits.
- *
- * Full tree commits are expensive because they require commonly
- * modified blocks to be recowed, creating many dirty pages in the
- * extent tree an 4x-6x higher write load than ext3.
- *
- * Instead of doing a tree commit on every fsync, we use the
- * key ranges and transaction ids to find items for a given file or directory
- * that have changed in this transaction. Those items are copied into
- * a special tree (one per subvolume root), that tree is written to disk
- * and then the fsync is considered complete.
- *
- * After a crash, items are copied out of the log-tree back into the
- * subvolume tree. Any file data extents found are recorded in the extent
- * allocation tree, and the log-tree freed.
- *
- * The log tree is read three times, once to pin down all the extents it is
- * using in ram and once, once to create all the inodes logged in the tree
- * and once to do all the other items.
- */
-
-/*
- * start a sub transaction and setup the log tree
- * this increments the log tree writer count to make the people
- * syncing the tree wait for us to finish
- */
-static int start_log_trans(struct btrfs_trans_handle *trans,
- struct btrfs_root *root)
-{
- int ret;
- int err = 0;
-
- mutex_lock(&root->log_mutex);
- if (root->log_root) {
- if (!root->log_start_pid) {
- root->log_start_pid = current->pid;
- root->log_multiple_pids = false;
- } else if (root->log_start_pid != current->pid) {
- root->log_multiple_pids = true;
- }
-
- root->log_batch++;
- atomic_inc(&root->log_writers);
- mutex_unlock(&root->log_mutex);
- return 0;
- }
- root->log_multiple_pids = false;
- root->log_start_pid = current->pid;
- mutex_lock(&root->fs_info->tree_log_mutex);
- if (!root->fs_info->log_root_tree) {
- ret = btrfs_init_log_root_tree(trans, root->fs_info);
- if (ret)
- err = ret;
- }
- if (err == 0 && !root->log_root) {
- ret = btrfs_add_log_tree(trans, root);
- if (ret)
- err = ret;
- }
- mutex_unlock(&root->fs_info->tree_log_mutex);
- root->log_batch++;
- atomic_inc(&root->log_writers);
- mutex_unlock(&root->log_mutex);
- return err;
-}
-
-/*
- * returns 0 if there was a log transaction running and we were able
- * to join, or returns -ENOENT if there were not transactions
- * in progress
- */
-static int join_running_log_trans(struct btrfs_root *root)
-{
- int ret = -ENOENT;
-
- smp_mb();
- if (!root->log_root)
- return -ENOENT;
-
- mutex_lock(&root->log_mutex);
- if (root->log_root) {
- ret = 0;
- atomic_inc(&root->log_writers);
- }
- mutex_unlock(&root->log_mutex);
- return ret;
-}
-
-/*
- * This either makes the current running log transaction wait
- * until you call btrfs_end_log_trans() or it makes any future
- * log transactions wait until you call btrfs_end_log_trans()
- */
-int btrfs_pin_log_trans(struct btrfs_root *root)
-{
- int ret = -ENOENT;
-
- mutex_lock(&root->log_mutex);
- atomic_inc(&root->log_writers);
- mutex_unlock(&root->log_mutex);
- return ret;
-}
-
-/*
- * indicate we're done making changes to the log tree
- * and wake up anyone waiting to do a sync
- */
-void btrfs_end_log_trans(struct btrfs_root *root)
-{
- if (atomic_dec_and_test(&root->log_writers)) {
- smp_mb();
- if (waitqueue_active(&root->log_writer_wait))
- wake_up(&root->log_writer_wait);
- }
-}
-
-
-/*
- * the walk control struct is used to pass state down the chain when
- * processing the log tree. The stage field tells us which part
- * of the log tree processing we are currently doing. The others
- * are state fields used for that specific part
- */
-struct walk_control {
- /* should we free the extent on disk when done? This is used
- * at transaction commit time while freeing a log tree
- */
- int free;
-
- /* should we write out the extent buffer? This is used
- * while flushing the log tree to disk during a sync
- */
- int write;
-
- /* should we wait for the extent buffer io to finish? Also used
- * while flushing the log tree to disk for a sync
- */
- int wait;
-
- /* pin only walk, we record which extents on disk belong to the
- * log trees
- */
- int pin;
-
- /* what stage of the replay code we're currently in */
- int stage;
-
- /* the root we are currently replaying */
- struct btrfs_root *replay_dest;
-
- /* the trans handle for the current replay */
- struct btrfs_trans_handle *trans;
-
- /* the function that gets used to process blocks we find in the
- * tree. Note the extent_buffer might not be up to date when it is
- * passed in, and it must be checked or read if you need the data
- * inside it
- */
- int (*process_func)(struct btrfs_root *log, struct extent_buffer *eb,
- struct walk_control *wc, u64 gen);
-};
-
-/*
- * process_func used to pin down extents, write them or wait on them
- */
-static int process_one_buffer(struct btrfs_root *log,
- struct extent_buffer *eb,
- struct walk_control *wc, u64 gen)
-{
- if (wc->pin)
- btrfs_pin_extent_for_log_replay(wc->trans,
- log->fs_info->extent_root,
- eb->start, eb->len);
-
- if (btrfs_buffer_uptodate(eb, gen, 0)) {
- if (wc->write)
- btrfs_write_tree_block(eb);
- if (wc->wait)
- btrfs_wait_tree_block_writeback(eb);
- }
- return 0;
-}
-
-/*
- * Item overwrite used by replay and tree logging. eb, slot and key all refer
- * to the src data we are copying out.
- *
- * root is the tree we are copying into, and path is a scratch
- * path for use in this function (it should be released on entry and
- * will be released on exit).
- *
- * If the key is already in the destination tree the existing item is
- * overwritten. If the existing item isn't big enough, it is extended.
- * If it is too large, it is truncated.
- *
- * If the key isn't in the destination yet, a new item is inserted.
- */
-static noinline int overwrite_item(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_path *path,
- struct extent_buffer *eb, int slot,
- struct btrfs_key *key)
-{
- int ret;
- u32 item_size;
- u64 saved_i_size = 0;
- int save_old_i_size = 0;
- unsigned long src_ptr;
- unsigned long dst_ptr;
- int overwrite_root = 0;
-
- if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID)
- overwrite_root = 1;
-
- item_size = btrfs_item_size_nr(eb, slot);
- src_ptr = btrfs_item_ptr_offset(eb, slot);
-
- /* look for the key in the destination tree */
- ret = btrfs_search_slot(NULL, root, key, path, 0, 0);
- if (ret == 0) {
- char *src_copy;
- char *dst_copy;
- u32 dst_size = btrfs_item_size_nr(path->nodes[0],
- path->slots[0]);
- if (dst_size != item_size)
- goto insert;
-
- if (item_size == 0) {
- btrfs_release_path(path);
- return 0;
- }
- dst_copy = kmalloc(item_size, GFP_NOFS);
- src_copy = kmalloc(item_size, GFP_NOFS);
- if (!dst_copy || !src_copy) {
- btrfs_release_path(path);
- kfree(dst_copy);
- kfree(src_copy);
- return -ENOMEM;
- }
-
- read_extent_buffer(eb, src_copy, src_ptr, item_size);
-
- dst_ptr = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
- read_extent_buffer(path->nodes[0], dst_copy, dst_ptr,
- item_size);
- ret = memcmp(dst_copy, src_copy, item_size);
-
- kfree(dst_copy);
- kfree(src_copy);
- /*
- * they have the same contents, just return, this saves
- * us from cowing blocks in the destination tree and doing
- * extra writes that may not have been done by a previous
- * sync
- */
- if (ret == 0) {
- btrfs_release_path(path);
- return 0;
- }
-
- }
-insert:
- btrfs_release_path(path);
- /* try to insert the key into the destination tree */
- ret = btrfs_insert_empty_item(trans, root, path,
- key, item_size);
-
- /* make sure any existing item is the correct size */
- if (ret == -EEXIST) {
- u32 found_size;
- found_size = btrfs_item_size_nr(path->nodes[0],
- path->slots[0]);
- if (found_size > item_size)
- btrfs_truncate_item(trans, root, path, item_size, 1);
- else if (found_size < item_size)
- btrfs_extend_item(trans, root, path,
- item_size - found_size);
- } else if (ret) {
- return ret;
- }
- dst_ptr = btrfs_item_ptr_offset(path->nodes[0],
- path->slots[0]);
-
- /* don't overwrite an existing inode if the generation number
- * was logged as zero. This is done when the tree logging code
- * is just logging an inode to make sure it exists after recovery.
- *
- * Also, don't overwrite i_size on directories during replay.
- * log replay inserts and removes directory items based on the
- * state of the tree found in the subvolume, and i_size is modified
- * as it goes
- */
- if (key->type == BTRFS_INODE_ITEM_KEY && ret == -EEXIST) {
- struct btrfs_inode_item *src_item;
- struct btrfs_inode_item *dst_item;
-
- src_item = (struct btrfs_inode_item *)src_ptr;
- dst_item = (struct btrfs_inode_item *)dst_ptr;
-
- if (btrfs_inode_generation(eb, src_item) == 0)
- goto no_copy;
-
- if (overwrite_root &&
- S_ISDIR(btrfs_inode_mode(eb, src_item)) &&
- S_ISDIR(btrfs_inode_mode(path->nodes[0], dst_item))) {
- save_old_i_size = 1;
- saved_i_size = btrfs_inode_size(path->nodes[0],
- dst_item);
- }
- }
-
- copy_extent_buffer(path->nodes[0], eb, dst_ptr,
- src_ptr, item_size);
-
- if (save_old_i_size) {
- struct btrfs_inode_item *dst_item;
- dst_item = (struct btrfs_inode_item *)dst_ptr;
- btrfs_set_inode_size(path->nodes[0], dst_item, saved_i_size);
- }
-
- /* make sure the generation is filled in */
- if (key->type == BTRFS_INODE_ITEM_KEY) {
- struct btrfs_inode_item *dst_item;
- dst_item = (struct btrfs_inode_item *)dst_ptr;
- if (btrfs_inode_generation(path->nodes[0], dst_item) == 0) {
- btrfs_set_inode_generation(path->nodes[0], dst_item,
- trans->transid);
- }
- }
-no_copy:
- btrfs_mark_buffer_dirty(path->nodes[0]);
- btrfs_release_path(path);
- return 0;
-}
-
-/*
- * simple helper to read an inode off the disk from a given root
- * This can only be called for subvolume roots and not for the log
- */
-static noinline struct inode *read_one_inode(struct btrfs_root *root,
- u64 objectid)
-{
- struct btrfs_key key;
- struct inode *inode;
-
- key.objectid = objectid;
- key.type = BTRFS_INODE_ITEM_KEY;
- key.offset = 0;
- inode = btrfs_iget(root->fs_info->sb, &key, root, NULL);
- if (IS_ERR(inode)) {
- inode = NULL;
- } else if (is_bad_inode(inode)) {
- iput(inode);
- inode = NULL;
- }
- return inode;
-}
-
-/* replays a single extent in 'eb' at 'slot' with 'key' into the
- * subvolume 'root'. path is released on entry and should be released
- * on exit.
- *
- * extents in the log tree have not been allocated out of the extent
- * tree yet. So, this completes the allocation, taking a reference
- * as required if the extent already exists or creating a new extent
- * if it isn't in the extent allocation tree yet.
- *
- * The extent is inserted into the file, dropping any existing extents
- * from the file that overlap the new one.
- */
-static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_path *path,
- struct extent_buffer *eb, int slot,
- struct btrfs_key *key)
-{
- int found_type;
- u64 mask = root->sectorsize - 1;
- u64 extent_end;
- u64 alloc_hint;
- u64 start = key->offset;
- u64 saved_nbytes;
- struct btrfs_file_extent_item *item;
- struct inode *inode = NULL;
- unsigned long size;
- int ret = 0;
-
- item = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
- found_type = btrfs_file_extent_type(eb, item);
-
- if (found_type == BTRFS_FILE_EXTENT_REG ||
- found_type == BTRFS_FILE_EXTENT_PREALLOC)
- extent_end = start + btrfs_file_extent_num_bytes(eb, item);
- else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
- size = btrfs_file_extent_inline_len(eb, item);
- extent_end = (start + size + mask) & ~mask;
- } else {
- ret = 0;
- goto out;
- }
-
- inode = read_one_inode(root, key->objectid);
- if (!inode) {
- ret = -EIO;
- goto out;
- }
-
- /*
- * first check to see if we already have this extent in the
- * file. This must be done before the btrfs_drop_extents run
- * so we don't try to drop this extent.
- */
- ret = btrfs_lookup_file_extent(trans, root, path, btrfs_ino(inode),
- start, 0);
-
- if (ret == 0 &&
- (found_type == BTRFS_FILE_EXTENT_REG ||
- found_type == BTRFS_FILE_EXTENT_PREALLOC)) {
- struct btrfs_file_extent_item cmp1;
- struct btrfs_file_extent_item cmp2;
- struct btrfs_file_extent_item *existing;
- struct extent_buffer *leaf;
-
- leaf = path->nodes[0];
- existing = btrfs_item_ptr(leaf, path->slots[0],
- struct btrfs_file_extent_item);
-
- read_extent_buffer(eb, &cmp1, (unsigned long)item,
- sizeof(cmp1));
- read_extent_buffer(leaf, &cmp2, (unsigned long)existing,
- sizeof(cmp2));
-
- /*
- * we already have a pointer to this exact extent,
- * we don't have to do anything
- */
- if (memcmp(&cmp1, &cmp2, sizeof(cmp1)) == 0) {
- btrfs_release_path(path);
- goto out;
- }
- }
- btrfs_release_path(path);
-
- saved_nbytes = inode_get_bytes(inode);
- /* drop any overlapping extents */
- ret = btrfs_drop_extents(trans, inode, start, extent_end,
- &alloc_hint, 1);
- BUG_ON(ret);
-
- if (found_type == BTRFS_FILE_EXTENT_REG ||
- found_type == BTRFS_FILE_EXTENT_PREALLOC) {
- u64 offset;
- unsigned long dest_offset;
- struct btrfs_key ins;
-
- ret = btrfs_insert_empty_item(trans, root, path, key,
- sizeof(*item));
- BUG_ON(ret);
- dest_offset = btrfs_item_ptr_offset(path->nodes[0],
- path->slots[0]);
- copy_extent_buffer(path->nodes[0], eb, dest_offset,
- (unsigned long)item, sizeof(*item));
-
- ins.objectid = btrfs_file_extent_disk_bytenr(eb, item);
- ins.offset = btrfs_file_extent_disk_num_bytes(eb, item);
- ins.type = BTRFS_EXTENT_ITEM_KEY;
- offset = key->offset - btrfs_file_extent_offset(eb, item);
-
- if (ins.objectid > 0) {
- u64 csum_start;
- u64 csum_end;
- LIST_HEAD(ordered_sums);
- /*
- * is this extent already allocated in the extent
- * allocation tree? If so, just add a reference
- */
- ret = btrfs_lookup_extent(root, ins.objectid,
- ins.offset);
- if (ret == 0) {
- ret = btrfs_inc_extent_ref(trans, root,
- ins.objectid, ins.offset,
- 0, root->root_key.objectid,
- key->objectid, offset, 0);
- BUG_ON(ret);
- } else {
- /*
- * insert the extent pointer in the extent
- * allocation tree
- */
- ret = btrfs_alloc_logged_file_extent(trans,
- root, root->root_key.objectid,
- key->objectid, offset, &ins);
- BUG_ON(ret);
- }
- btrfs_release_path(path);
-
- if (btrfs_file_extent_compression(eb, item)) {
- csum_start = ins.objectid;
- csum_end = csum_start + ins.offset;
- } else {
- csum_start = ins.objectid +
- btrfs_file_extent_offset(eb, item);
- csum_end = csum_start +
- btrfs_file_extent_num_bytes(eb, item);
- }
-
- ret = btrfs_lookup_csums_range(root->log_root,
- csum_start, csum_end - 1,
- &ordered_sums, 0);
- BUG_ON(ret);
- while (!list_empty(&ordered_sums)) {
- struct btrfs_ordered_sum *sums;
- sums = list_entry(ordered_sums.next,
- struct btrfs_ordered_sum,
- list);
- ret = btrfs_csum_file_blocks(trans,
- root->fs_info->csum_root,
- sums);
- BUG_ON(ret);
- list_del(&sums->list);
- kfree(sums);
- }
- } else {
- btrfs_release_path(path);
- }
- } else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
- /* inline extents are easy, we just overwrite them */
- ret = overwrite_item(trans, root, path, eb, slot, key);
- BUG_ON(ret);
- }
-
- inode_set_bytes(inode, saved_nbytes);
- btrfs_update_inode(trans, root, inode);
-out:
- if (inode)
- iput(inode);
- return ret;
-}
-
-/*
- * when cleaning up conflicts between the directory names in the
- * subvolume, directory names in the log and directory names in the
- * inode back references, we may have to unlink inodes from directories.
- *
- * This is a helper function to do the unlink of a specific directory
- * item
- */
-static noinline int drop_one_dir_item(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_path *path,
- struct inode *dir,
- struct btrfs_dir_item *di)
-{
- struct inode *inode;
- char *name;
- int name_len;
- struct extent_buffer *leaf;
- struct btrfs_key location;
- int ret;
-
- leaf = path->nodes[0];
-
- btrfs_dir_item_key_to_cpu(leaf, di, &location);
- name_len = btrfs_dir_name_len(leaf, di);
- name = kmalloc(name_len, GFP_NOFS);
- if (!name)
- return -ENOMEM;
-
- read_extent_buffer(leaf, name, (unsigned long)(di + 1), name_len);
- btrfs_release_path(path);
-
- inode = read_one_inode(root, location.objectid);
- if (!inode) {
- kfree(name);
- return -EIO;
- }
-
- ret = link_to_fixup_dir(trans, root, path, location.objectid);
- BUG_ON(ret);
-
- ret = btrfs_unlink_inode(trans, root, dir, inode, name, name_len);
- BUG_ON(ret);
- kfree(name);
-
- iput(inode);
-
- btrfs_run_delayed_items(trans, root);
- return ret;
-}
-
-/*
- * helper function to see if a given name and sequence number found
- * in an inode back reference are already in a directory and correctly
- * point to this inode
- */
-static noinline int inode_in_dir(struct btrfs_root *root,
- struct btrfs_path *path,
- u64 dirid, u64 objectid, u64 index,
- const char *name, int name_len)
-{
- struct btrfs_dir_item *di;
- struct btrfs_key location;
- int match = 0;
-
- di = btrfs_lookup_dir_index_item(NULL, root, path, dirid,
- index, name, name_len, 0);
- if (di && !IS_ERR(di)) {
- btrfs_dir_item_key_to_cpu(path->nodes[0], di, &location);
- if (location.objectid != objectid)
- goto out;
- } else
- goto out;
- btrfs_release_path(path);
-
- di = btrfs_lookup_dir_item(NULL, root, path, dirid, name, name_len, 0);
- if (di && !IS_ERR(di)) {
- btrfs_dir_item_key_to_cpu(path->nodes[0], di, &location);
- if (location.objectid != objectid)
- goto out;
- } else
- goto out;
- match = 1;
-out:
- btrfs_release_path(path);
- return match;
-}
-
-/*
- * helper function to check a log tree for a named back reference in
- * an inode. This is used to decide if a back reference that is
- * found in the subvolume conflicts with what we find in the log.
- *
- * inode backreferences may have multiple refs in a single item,
- * during replay we process one reference at a time, and we don't
- * want to delete valid links to a file from the subvolume if that
- * link is also in the log.
- */
-static noinline int backref_in_log(struct btrfs_root *log,
- struct btrfs_key *key,
- char *name, int namelen)
-{
- struct btrfs_path *path;
- struct btrfs_inode_ref *ref;
- unsigned long ptr;
- unsigned long ptr_end;
- unsigned long name_ptr;
- int found_name_len;
- int item_size;
- int ret;
- int match = 0;
-
- path = btrfs_alloc_path();
- if (!path)
- return -ENOMEM;
-
- ret = btrfs_search_slot(NULL, log, key, path, 0, 0);
- if (ret != 0)
- goto out;
-
- item_size = btrfs_item_size_nr(path->nodes[0], path->slots[0]);
- ptr = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
- ptr_end = ptr + item_size;
- while (ptr < ptr_end) {
- ref = (struct btrfs_inode_ref *)ptr;
- found_name_len = btrfs_inode_ref_name_len(path->nodes[0], ref);
- if (found_name_len == namelen) {
- name_ptr = (unsigned long)(ref + 1);
- ret = memcmp_extent_buffer(path->nodes[0], name,
- name_ptr, namelen);
- if (ret == 0) {
- match = 1;
- goto out;
- }
- }
- ptr = (unsigned long)(ref + 1) + found_name_len;
- }
-out:
- btrfs_free_path(path);
- return match;
-}
-
-
-/*
- * replay one inode back reference item found in the log tree.
- * eb, slot and key refer to the buffer and key found in the log tree.
- * root is the destination we are replaying into, and path is for temp
- * use by this function. (it should be released on return).
- */
-static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_root *log,
- struct btrfs_path *path,
- struct extent_buffer *eb, int slot,
- struct btrfs_key *key)
-{
- struct btrfs_inode_ref *ref;
- struct btrfs_dir_item *di;
- struct inode *dir;
- struct inode *inode;
- unsigned long ref_ptr;
- unsigned long ref_end;
- char *name;
- int namelen;
- int ret;
- int search_done = 0;
-
- /*
- * it is possible that we didn't log all the parent directories
- * for a given inode. If we don't find the dir, just don't
- * copy the back ref in. The link count fixup code will take
- * care of the rest
- */
- dir = read_one_inode(root, key->offset);
- if (!dir)
- return -ENOENT;
-
- inode = read_one_inode(root, key->objectid);
- if (!inode) {
- iput(dir);
- return -EIO;
- }
-
- ref_ptr = btrfs_item_ptr_offset(eb, slot);
- ref_end = ref_ptr + btrfs_item_size_nr(eb, slot);
-
-again:
- ref = (struct btrfs_inode_ref *)ref_ptr;
-
- namelen = btrfs_inode_ref_name_len(eb, ref);
- name = kmalloc(namelen, GFP_NOFS);
- BUG_ON(!name);
-
- read_extent_buffer(eb, name, (unsigned long)(ref + 1), namelen);
-
- /* if we already have a perfect match, we're done */
- if (inode_in_dir(root, path, btrfs_ino(dir), btrfs_ino(inode),
- btrfs_inode_ref_index(eb, ref),
- name, namelen)) {
- goto out;
- }
-
- /*
- * look for a conflicting back reference in the metadata.
- * if we find one we have to unlink that name of the file
- * before we add our new link. Later on, we overwrite any
- * existing back reference, and we don't want to create
- * dangling pointers in the directory.
- */
-
- if (search_done)
- goto insert;
-
- ret = btrfs_search_slot(NULL, root, key, path, 0, 0);
- if (ret == 0) {
- char *victim_name;
- int victim_name_len;
- struct btrfs_inode_ref *victim_ref;
- unsigned long ptr;
- unsigned long ptr_end;
- struct extent_buffer *leaf = path->nodes[0];
-
- /* are we trying to overwrite a back ref for the root directory
- * if so, just jump out, we're done
- */
- if (key->objectid == key->offset)
- goto out_nowrite;
-
- /* check all the names in this back reference to see
- * if they are in the log. if so, we allow them to stay
- * otherwise they must be unlinked as a conflict
- */
- ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
- ptr_end = ptr + btrfs_item_size_nr(leaf, path->slots[0]);
- while (ptr < ptr_end) {
- victim_ref = (struct btrfs_inode_ref *)ptr;
- victim_name_len = btrfs_inode_ref_name_len(leaf,
- victim_ref);
- victim_name = kmalloc(victim_name_len, GFP_NOFS);
- BUG_ON(!victim_name);
-
- read_extent_buffer(leaf, victim_name,
- (unsigned long)(victim_ref + 1),
- victim_name_len);
-
- if (!backref_in_log(log, key, victim_name,
- victim_name_len)) {
- btrfs_inc_nlink(inode);
- btrfs_release_path(path);
-
- ret = btrfs_unlink_inode(trans, root, dir,
- inode, victim_name,
- victim_name_len);
- btrfs_run_delayed_items(trans, root);
- }
- kfree(victim_name);
- ptr = (unsigned long)(victim_ref + 1) + victim_name_len;
- }
- BUG_ON(ret);
-
- /*
- * NOTE: we have searched root tree and checked the
- * coresponding ref, it does not need to check again.
- */
- search_done = 1;
- }
- btrfs_release_path(path);
-
- /* look for a conflicting sequence number */
- di = btrfs_lookup_dir_index_item(trans, root, path, btrfs_ino(dir),
- btrfs_inode_ref_index(eb, ref),
- name, namelen, 0);
- if (di && !IS_ERR(di)) {
- ret = drop_one_dir_item(trans, root, path, dir, di);
- BUG_ON(ret);
- }
- btrfs_release_path(path);
-
- /* look for a conflicing name */
- di = btrfs_lookup_dir_item(trans, root, path, btrfs_ino(dir),
- name, namelen, 0);
- if (di && !IS_ERR(di)) {
- ret = drop_one_dir_item(trans, root, path, dir, di);
- BUG_ON(ret);
- }
- btrfs_release_path(path);
-
-insert:
- /* insert our name */
- ret = btrfs_add_link(trans, dir, inode, name, namelen, 0,
- btrfs_inode_ref_index(eb, ref));
- BUG_ON(ret);
-
- btrfs_update_inode(trans, root, inode);
-
-out:
- ref_ptr = (unsigned long)(ref + 1) + namelen;
- kfree(name);
- if (ref_ptr < ref_end)
- goto again;
-
- /* finally write the back reference in the inode */
- ret = overwrite_item(trans, root, path, eb, slot, key);
- BUG_ON(ret);
-
-out_nowrite:
- btrfs_release_path(path);
- iput(dir);
- iput(inode);
- return 0;
-}
-
-static int insert_orphan_item(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, u64 offset)
-{
- int ret;
- ret = btrfs_find_orphan_item(root, offset);
- if (ret > 0)
- ret = btrfs_insert_orphan_item(trans, root, offset);
- return ret;
-}
-
-
-/*
- * There are a few corners where the link count of the file can't
- * be properly maintained during replay. So, instead of adding
- * lots of complexity to the log code, we just scan the backrefs
- * for any file that has been through replay.
- *
- * The scan will update the link count on the inode to reflect the
- * number of back refs found. If it goes down to zero, the iput
- * will free the inode.
- */
-static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct inode *inode)
-{
- struct btrfs_path *path;
- int ret;
- struct btrfs_key key;
- u64 nlink = 0;
- unsigned long ptr;
- unsigned long ptr_end;
- int name_len;
- u64 ino = btrfs_ino(inode);
-
- key.objectid = ino;
- key.type = BTRFS_INODE_REF_KEY;
- key.offset = (u64)-1;
-
- path = btrfs_alloc_path();
- if (!path)
- return -ENOMEM;
-
- while (1) {
- ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
- if (ret < 0)
- break;
- if (ret > 0) {
- if (path->slots[0] == 0)
- break;
- path->slots[0]--;
- }
- btrfs_item_key_to_cpu(path->nodes[0], &key,
- path->slots[0]);
- if (key.objectid != ino ||
- key.type != BTRFS_INODE_REF_KEY)
- break;
- ptr = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
- ptr_end = ptr + btrfs_item_size_nr(path->nodes[0],
- path->slots[0]);
- while (ptr < ptr_end) {
- struct btrfs_inode_ref *ref;
-
- ref = (struct btrfs_inode_ref *)ptr;
- name_len = btrfs_inode_ref_name_len(path->nodes[0],
- ref);
- ptr = (unsigned long)(ref + 1) + name_len;
- nlink++;
- }
-
- if (key.offset == 0)
- break;
- key.offset--;
- btrfs_release_path(path);
- }
- btrfs_release_path(path);
- if (nlink != inode->i_nlink) {
- set_nlink(inode, nlink);
- btrfs_update_inode(trans, root, inode);
- }
- BTRFS_I(inode)->index_cnt = (u64)-1;
-
- if (inode->i_nlink == 0) {
- if (S_ISDIR(inode->i_mode)) {
- ret = replay_dir_deletes(trans, root, NULL, path,
- ino, 1);
- BUG_ON(ret);
- }
- ret = insert_orphan_item(trans, root, ino);
- BUG_ON(ret);
- }
- btrfs_free_path(path);
-
- return 0;
-}
-
-static noinline int fixup_inode_link_counts(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_path *path)
-{
- int ret;
- struct btrfs_key key;
- struct inode *inode;
-
- key.objectid = BTRFS_TREE_LOG_FIXUP_OBJECTID;
- key.type = BTRFS_ORPHAN_ITEM_KEY;
- key.offset = (u64)-1;
- while (1) {
- ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
- if (ret < 0)
- break;
-
- if (ret == 1) {
- if (path->slots[0] == 0)
- break;
- path->slots[0]--;
- }
-
- btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
- if (key.objectid != BTRFS_TREE_LOG_FIXUP_OBJECTID ||
- key.type != BTRFS_ORPHAN_ITEM_KEY)
- break;
-
- ret = btrfs_del_item(trans, root, path);
- if (ret)
- goto out;
-
- btrfs_release_path(path);
- inode = read_one_inode(root, key.offset);
- if (!inode)
- return -EIO;
-
- ret = fixup_inode_link_count(trans, root, inode);
- BUG_ON(ret);
-
- iput(inode);
-
- /*
- * fixup on a directory may create new entries,
- * make sure we always look for the highset possible
- * offset
- */
- key.offset = (u64)-1;
- }
- ret = 0;
-out:
- btrfs_release_path(path);
- return ret;
-}
-
-
-/*
- * record a given inode in the fixup dir so we can check its link
- * count when replay is done. The link count is incremented here
- * so the inode won't go away until we check it
- */
-static noinline int link_to_fixup_dir(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_path *path,
- u64 objectid)
-{
- struct btrfs_key key;
- int ret = 0;
- struct inode *inode;
-
- inode = read_one_inode(root, objectid);
- if (!inode)
- return -EIO;
-
- key.objectid = BTRFS_TREE_LOG_FIXUP_OBJECTID;
- btrfs_set_key_type(&key, BTRFS_ORPHAN_ITEM_KEY);
- key.offset = objectid;
-
- ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
-
- btrfs_release_path(path);
- if (ret == 0) {
- btrfs_inc_nlink(inode);
- btrfs_update_inode(trans, root, inode);
- } else if (ret == -EEXIST) {
- ret = 0;
- } else {
- BUG();
- }
- iput(inode);
-
- return ret;
-}
-
-/*
- * when replaying the log for a directory, we only insert names
- * for inodes that actually exist. This means an fsync on a directory
- * does not implicitly fsync all the new files in it
- */
-static noinline int insert_one_name(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_path *path,
- u64 dirid, u64 index,
- char *name, int name_len, u8 type,
- struct btrfs_key *location)
-{
- struct inode *inode;
- struct inode *dir;
- int ret;
-
- inode = read_one_inode(root, location->objectid);
- if (!inode)
- return -ENOENT;
-
- dir = read_one_inode(root, dirid);
- if (!dir) {
- iput(inode);
- return -EIO;
- }
- ret = btrfs_add_link(trans, dir, inode, name, name_len, 1, index);
-
- /* FIXME, put inode into FIXUP list */
-
- iput(inode);
- iput(dir);
- return ret;
-}
-
-/*
- * take a single entry in a log directory item and replay it into
- * the subvolume.
- *
- * if a conflicting item exists in the subdirectory already,
- * the inode it points to is unlinked and put into the link count
- * fix up tree.
- *
- * If a name from the log points to a file or directory that does
- * not exist in the FS, it is skipped. fsyncs on directories
- * do not force down inodes inside that directory, just changes to the
- * names or unlinks in a directory.
- */
-static noinline int replay_one_name(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_path *path,
- struct extent_buffer *eb,
- struct btrfs_dir_item *di,
- struct btrfs_key *key)
-{
- char *name;
- int name_len;
- struct btrfs_dir_item *dst_di;
- struct btrfs_key found_key;
- struct btrfs_key log_key;
- struct inode *dir;
- u8 log_type;
- int exists;
- int ret;
-
- dir = read_one_inode(root, key->objectid);
- if (!dir)
- return -EIO;
-
- name_len = btrfs_dir_name_len(eb, di);
- name = kmalloc(name_len, GFP_NOFS);
- if (!name)
- return -ENOMEM;
-
- log_type = btrfs_dir_type(eb, di);
- read_extent_buffer(eb, name, (unsigned long)(di + 1),
- name_len);
-
- btrfs_dir_item_key_to_cpu(eb, di, &log_key);
- exists = btrfs_lookup_inode(trans, root, path, &log_key, 0);
- if (exists == 0)
- exists = 1;
- else
- exists = 0;
- btrfs_release_path(path);
-
- if (key->type == BTRFS_DIR_ITEM_KEY) {
- dst_di = btrfs_lookup_dir_item(trans, root, path, key->objectid,
- name, name_len, 1);
- } else if (key->type == BTRFS_DIR_INDEX_KEY) {
- dst_di = btrfs_lookup_dir_index_item(trans, root, path,
- key->objectid,
- key->offset, name,
- name_len, 1);
- } else {
- BUG();
- }
- if (IS_ERR_OR_NULL(dst_di)) {
- /* we need a sequence number to insert, so we only
- * do inserts for the BTRFS_DIR_INDEX_KEY types
- */
- if (key->type != BTRFS_DIR_INDEX_KEY)
- goto out;
- goto insert;
- }
-
- btrfs_dir_item_key_to_cpu(path->nodes[0], dst_di, &found_key);
- /* the existing item matches the logged item */
- if (found_key.objectid == log_key.objectid &&
- found_key.type == log_key.type &&
- found_key.offset == log_key.offset &&
- btrfs_dir_type(path->nodes[0], dst_di) == log_type) {
- goto out;
- }
-
- /*
- * don't drop the conflicting directory entry if the inode
- * for the new entry doesn't exist
- */
- if (!exists)
- goto out;
-
- ret = drop_one_dir_item(trans, root, path, dir, dst_di);
- BUG_ON(ret);
-
- if (key->type == BTRFS_DIR_INDEX_KEY)
- goto insert;
-out:
- btrfs_release_path(path);
- kfree(name);
- iput(dir);
- return 0;
-
-insert:
- btrfs_release_path(path);
- ret = insert_one_name(trans, root, path, key->objectid, key->offset,
- name, name_len, log_type, &log_key);
-
- BUG_ON(ret && ret != -ENOENT);
- goto out;
-}
-
-/*
- * find all the names in a directory item and reconcile them into
- * the subvolume. Only BTRFS_DIR_ITEM_KEY types will have more than
- * one name in a directory item, but the same code gets used for
- * both directory index types
- */
-static noinline int replay_one_dir_item(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_path *path,
- struct extent_buffer *eb, int slot,
- struct btrfs_key *key)
-{
- int ret;
- u32 item_size = btrfs_item_size_nr(eb, slot);
- struct btrfs_dir_item *di;
- int name_len;
- unsigned long ptr;
- unsigned long ptr_end;
-
- ptr = btrfs_item_ptr_offset(eb, slot);
- ptr_end = ptr + item_size;
- while (ptr < ptr_end) {
- di = (struct btrfs_dir_item *)ptr;
- if (verify_dir_item(root, eb, di))
- return -EIO;
- name_len = btrfs_dir_name_len(eb, di);
- ret = replay_one_name(trans, root, path, eb, di, key);
- BUG_ON(ret);
- ptr = (unsigned long)(di + 1);
- ptr += name_len;
- }
- return 0;
-}
-
-/*
- * directory replay has two parts. There are the standard directory
- * items in the log copied from the subvolume, and range items
- * created in the log while the subvolume was logged.
- *
- * The range items tell us which parts of the key space the log
- * is authoritative for. During replay, if a key in the subvolume
- * directory is in a logged range item, but not actually in the log
- * that means it was deleted from the directory before the fsync
- * and should be removed.
- */
-static noinline int find_dir_range(struct btrfs_root *root,
- struct btrfs_path *path,
- u64 dirid, int key_type,
- u64 *start_ret, u64 *end_ret)
-{
- struct btrfs_key key;
- u64 found_end;
- struct btrfs_dir_log_item *item;
- int ret;
- int nritems;
-
- if (*start_ret == (u64)-1)
- return 1;
-
- key.objectid = dirid;
- key.type = key_type;
- key.offset = *start_ret;
-
- ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
- if (ret < 0)
- goto out;
- if (ret > 0) {
- if (path->slots[0] == 0)
- goto out;
- path->slots[0]--;
- }
- if (ret != 0)
- btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
-
- if (key.type != key_type || key.objectid != dirid) {
- ret = 1;
- goto next;
- }
- item = btrfs_item_ptr(path->nodes[0], path->slots[0],
- struct btrfs_dir_log_item);
- found_end = btrfs_dir_log_end(path->nodes[0], item);
-
- if (*start_ret >= key.offset && *start_ret <= found_end) {
- ret = 0;
- *start_ret = key.offset;
- *end_ret = found_end;
- goto out;
- }
- ret = 1;
-next:
- /* check the next slot in the tree to see if it is a valid item */
- nritems = btrfs_header_nritems(path->nodes[0]);
- if (path->slots[0] >= nritems) {
- ret = btrfs_next_leaf(root, path);
- if (ret)
- goto out;
- } else {
- path->slots[0]++;
- }
-
- btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
-
- if (key.type != key_type || key.objectid != dirid) {
- ret = 1;
- goto out;
- }
- item = btrfs_item_ptr(path->nodes[0], path->slots[0],
- struct btrfs_dir_log_item);
- found_end = btrfs_dir_log_end(path->nodes[0], item);
- *start_ret = key.offset;
- *end_ret = found_end;
- ret = 0;
-out:
- btrfs_release_path(path);
- return ret;
-}
-
-/*
- * this looks for a given directory item in the log. If the directory
- * item is not in the log, the item is removed and the inode it points
- * to is unlinked
- */
-static noinline int check_item_in_log(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_root *log,
- struct btrfs_path *path,
- struct btrfs_path *log_path,
- struct inode *dir,
- struct btrfs_key *dir_key)
-{
- int ret;
- struct extent_buffer *eb;
- int slot;
- u32 item_size;
- struct btrfs_dir_item *di;
- struct btrfs_dir_item *log_di;
- int name_len;
- unsigned long ptr;
- unsigned long ptr_end;
- char *name;
- struct inode *inode;
- struct btrfs_key location;
-
-again:
- eb = path->nodes[0];
- slot = path->slots[0];
- item_size = btrfs_item_size_nr(eb, slot);
- ptr = btrfs_item_ptr_offset(eb, slot);
- ptr_end = ptr + item_size;
- while (ptr < ptr_end) {
- di = (struct btrfs_dir_item *)ptr;
- if (verify_dir_item(root, eb, di)) {
- ret = -EIO;
- goto out;
- }
-
- name_len = btrfs_dir_name_len(eb, di);
- name = kmalloc(name_len, GFP_NOFS);
- if (!name) {
- ret = -ENOMEM;
- goto out;
- }
- read_extent_buffer(eb, name, (unsigned long)(di + 1),
- name_len);
- log_di = NULL;
- if (log && dir_key->type == BTRFS_DIR_ITEM_KEY) {
- log_di = btrfs_lookup_dir_item(trans, log, log_path,
- dir_key->objectid,
- name, name_len, 0);
- } else if (log && dir_key->type == BTRFS_DIR_INDEX_KEY) {
- log_di = btrfs_lookup_dir_index_item(trans, log,
- log_path,
- dir_key->objectid,
- dir_key->offset,
- name, name_len, 0);
- }
- if (IS_ERR_OR_NULL(log_di)) {
- btrfs_dir_item_key_to_cpu(eb, di, &location);
- btrfs_release_path(path);
- btrfs_release_path(log_path);
- inode = read_one_inode(root, location.objectid);
- if (!inode) {
- kfree(name);
- return -EIO;
- }
-
- ret = link_to_fixup_dir(trans, root,
- path, location.objectid);
- BUG_ON(ret);
- btrfs_inc_nlink(inode);
- ret = btrfs_unlink_inode(trans, root, dir, inode,
- name, name_len);
- BUG_ON(ret);
-
- btrfs_run_delayed_items(trans, root);
-
- kfree(name);
- iput(inode);
-
- /* there might still be more names under this key
- * check and repeat if required
- */
- ret = btrfs_search_slot(NULL, root, dir_key, path,
- 0, 0);
- if (ret == 0)
- goto again;
- ret = 0;
- goto out;
- }
- btrfs_release_path(log_path);
- kfree(name);
-
- ptr = (unsigned long)(di + 1);
- ptr += name_len;
- }
- ret = 0;
-out:
- btrfs_release_path(path);
- btrfs_release_path(log_path);
- return ret;
-}
-
-/*
- * deletion replay happens before we copy any new directory items
- * out of the log or out of backreferences from inodes. It
- * scans the log to find ranges of keys that log is authoritative for,
- * and then scans the directory to find items in those ranges that are
- * not present in the log.
- *
- * Anything we don't find in the log is unlinked and removed from the
- * directory.
- */
-static noinline int replay_dir_deletes(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_root *log,
- struct btrfs_path *path,
- u64 dirid, int del_all)
-{
- u64 range_start;
- u64 range_end;
- int key_type = BTRFS_DIR_LOG_ITEM_KEY;
- int ret = 0;
- struct btrfs_key dir_key;
- struct btrfs_key found_key;
- struct btrfs_path *log_path;
- struct inode *dir;
-
- dir_key.objectid = dirid;
- dir_key.type = BTRFS_DIR_ITEM_KEY;
- log_path = btrfs_alloc_path();
- if (!log_path)
- return -ENOMEM;
-
- dir = read_one_inode(root, dirid);
- /* it isn't an error if the inode isn't there, that can happen
- * because we replay the deletes before we copy in the inode item
- * from the log
- */
- if (!dir) {
- btrfs_free_path(log_path);
- return 0;
- }
-again:
- range_start = 0;
- range_end = 0;
- while (1) {
- if (del_all)
- range_end = (u64)-1;
- else {
- ret = find_dir_range(log, path, dirid, key_type,
- &range_start, &range_end);
- if (ret != 0)
- break;
- }
-
- dir_key.offset = range_start;
- while (1) {
- int nritems;
- ret = btrfs_search_slot(NULL, root, &dir_key, path,
- 0, 0);
- if (ret < 0)
- goto out;
-
- nritems = btrfs_header_nritems(path->nodes[0]);
- if (path->slots[0] >= nritems) {
- ret = btrfs_next_leaf(root, path);
- if (ret)
- break;
- }
- btrfs_item_key_to_cpu(path->nodes[0], &found_key,
- path->slots[0]);
- if (found_key.objectid != dirid ||
- found_key.type != dir_key.type)
- goto next_type;
-
- if (found_key.offset > range_end)
- break;
-
- ret = check_item_in_log(trans, root, log, path,
- log_path, dir,
- &found_key);
- BUG_ON(ret);
- if (found_key.offset == (u64)-1)
- break;
- dir_key.offset = found_key.offset + 1;
- }
- btrfs_release_path(path);
- if (range_end == (u64)-1)
- break;
- range_start = range_end + 1;
- }
-
-next_type:
- ret = 0;
- if (key_type == BTRFS_DIR_LOG_ITEM_KEY) {
- key_type = BTRFS_DIR_LOG_INDEX_KEY;
- dir_key.type = BTRFS_DIR_INDEX_KEY;
- btrfs_release_path(path);
- goto again;
- }
-out:
- btrfs_release_path(path);
- btrfs_free_path(log_path);
- iput(dir);
- return ret;
-}
-
-/*
- * the process_func used to replay items from the log tree. This
- * gets called in two different stages. The first stage just looks
- * for inodes and makes sure they are all copied into the subvolume.
- *
- * The second stage copies all the other item types from the log into
- * the subvolume. The two stage approach is slower, but gets rid of
- * lots of complexity around inodes referencing other inodes that exist
- * only in the log (references come from either directory items or inode
- * back refs).
- */
-static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
- struct walk_control *wc, u64 gen)
-{
- int nritems;
- struct btrfs_path *path;
- struct btrfs_root *root = wc->replay_dest;
- struct btrfs_key key;
- int level;
- int i;
- int ret;
-
- btrfs_read_buffer(eb, gen);
-
- level = btrfs_header_level(eb);
-
- if (level != 0)
- return 0;
-
- path = btrfs_alloc_path();
- if (!path)
- return -ENOMEM;
-
- nritems = btrfs_header_nritems(eb);
- for (i = 0; i < nritems; i++) {
- btrfs_item_key_to_cpu(eb, &key, i);
-
- /* inode keys are done during the first stage */
- if (key.type == BTRFS_INODE_ITEM_KEY &&
- wc->stage == LOG_WALK_REPLAY_INODES) {
- struct btrfs_inode_item *inode_item;
- u32 mode;
-
- inode_item = btrfs_item_ptr(eb, i,
- struct btrfs_inode_item);
- mode = btrfs_inode_mode(eb, inode_item);
- if (S_ISDIR(mode)) {
- ret = replay_dir_deletes(wc->trans,
- root, log, path, key.objectid, 0);
- BUG_ON(ret);
- }
- ret = overwrite_item(wc->trans, root, path,
- eb, i, &key);
- BUG_ON(ret);
-
- /* for regular files, make sure corresponding
- * orhpan item exist. extents past the new EOF
- * will be truncated later by orphan cleanup.
- */
- if (S_ISREG(mode)) {
- ret = insert_orphan_item(wc->trans, root,
- key.objectid);
- BUG_ON(ret);
- }
-
- ret = link_to_fixup_dir(wc->trans, root,
- path, key.objectid);
- BUG_ON(ret);
- }
- if (wc->stage < LOG_WALK_REPLAY_ALL)
- continue;
-
- /* these keys are simply copied */
- if (key.type == BTRFS_XATTR_ITEM_KEY) {
- ret = overwrite_item(wc->trans, root, path,
- eb, i, &key);
- BUG_ON(ret);
- } else if (key.type == BTRFS_INODE_REF_KEY) {
- ret = add_inode_ref(wc->trans, root, log, path,
- eb, i, &key);
- BUG_ON(ret && ret != -ENOENT);
- } else if (key.type == BTRFS_EXTENT_DATA_KEY) {
- ret = replay_one_extent(wc->trans, root, path,
- eb, i, &key);
- BUG_ON(ret);
- } else if (key.type == BTRFS_DIR_ITEM_KEY ||
- key.type == BTRFS_DIR_INDEX_KEY) {
- ret = replay_one_dir_item(wc->trans, root, path,
- eb, i, &key);
- BUG_ON(ret);
- }
- }
- btrfs_free_path(path);
- return 0;
-}
-
-static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_path *path, int *level,
- struct walk_control *wc)
-{
- u64 root_owner;
- u64 bytenr;
- u64 ptr_gen;
- struct extent_buffer *next;
- struct extent_buffer *cur;
- struct extent_buffer *parent;
- u32 blocksize;
- int ret = 0;
-
- WARN_ON(*level < 0);
- WARN_ON(*level >= BTRFS_MAX_LEVEL);
-
- while (*level > 0) {
- WARN_ON(*level < 0);
- WARN_ON(*level >= BTRFS_MAX_LEVEL);
- cur = path->nodes[*level];
-
- if (btrfs_header_level(cur) != *level)
- WARN_ON(1);
-
- if (path->slots[*level] >=
- btrfs_header_nritems(cur))
- break;
-
- bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
- ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
- blocksize = btrfs_level_size(root, *level - 1);
-
- parent = path->nodes[*level];
- root_owner = btrfs_header_owner(parent);
-
- next = btrfs_find_create_tree_block(root, bytenr, blocksize);
- if (!next)
- return -ENOMEM;
-
- if (*level == 1) {
- ret = wc->process_func(root, next, wc, ptr_gen);
- if (ret)
- return ret;
-
- path->slots[*level]++;
- if (wc->free) {
- btrfs_read_buffer(next, ptr_gen);
-
- btrfs_tree_lock(next);
- btrfs_set_lock_blocking(next);
- clean_tree_block(trans, root, next);
- btrfs_wait_tree_block_writeback(next);
- btrfs_tree_unlock(next);
-
- WARN_ON(root_owner !=
- BTRFS_TREE_LOG_OBJECTID);
- ret = btrfs_free_and_pin_reserved_extent(root,
- bytenr, blocksize);
- BUG_ON(ret); /* -ENOMEM or logic errors */
- }
- free_extent_buffer(next);
- continue;
- }
- btrfs_read_buffer(next, ptr_gen);
-
- WARN_ON(*level <= 0);
- if (path->nodes[*level-1])
- free_extent_buffer(path->nodes[*level-1]);
- path->nodes[*level-1] = next;
- *level = btrfs_header_level(next);
- path->slots[*level] = 0;
- cond_resched();
- }
- WARN_ON(*level < 0);
- WARN_ON(*level >= BTRFS_MAX_LEVEL);
-
- path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
-
- cond_resched();
- return 0;
-}
-
-static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_path *path, int *level,
- struct walk_control *wc)
-{
- u64 root_owner;
- int i;
- int slot;
- int ret;
-
- for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
- slot = path->slots[i];
- if (slot + 1 < btrfs_header_nritems(path->nodes[i])) {
- path->slots[i]++;
- *level = i;
- WARN_ON(*level == 0);
- return 0;
- } else {
- struct extent_buffer *parent;
- if (path->nodes[*level] == root->node)
- parent = path->nodes[*level];
- else
- parent = path->nodes[*level + 1];
-
- root_owner = btrfs_header_owner(parent);
- ret = wc->process_func(root, path->nodes[*level], wc,
- btrfs_header_generation(path->nodes[*level]));
- if (ret)
- return ret;
-
- if (wc->free) {
- struct extent_buffer *next;
-
- next = path->nodes[*level];
-
- btrfs_tree_lock(next);
- btrfs_set_lock_blocking(next);
- clean_tree_block(trans, root, next);
- btrfs_wait_tree_block_writeback(next);
- btrfs_tree_unlock(next);
-
- WARN_ON(root_owner != BTRFS_TREE_LOG_OBJECTID);
- ret = btrfs_free_and_pin_reserved_extent(root,
- path->nodes[*level]->start,
- path->nodes[*level]->len);
- BUG_ON(ret);
- }
- free_extent_buffer(path->nodes[*level]);
- path->nodes[*level] = NULL;
- *level = i + 1;
- }
- }
- return 1;
-}
-
-/*
- * drop the reference count on the tree rooted at 'snap'. This traverses
- * the tree freeing any blocks that have a ref count of zero after being
- * decremented.
- */
-static int walk_log_tree(struct btrfs_trans_handle *trans,
- struct btrfs_root *log, struct walk_control *wc)
-{
- int ret = 0;
- int wret;
- int level;
- struct btrfs_path *path;
- int i;
- int orig_level;
-
- path = btrfs_alloc_path();
- if (!path)
- return -ENOMEM;
-
- level = btrfs_header_level(log->node);
- orig_level = level;
- path->nodes[level] = log->node;
- extent_buffer_get(log->node);
- path->slots[level] = 0;
-
- while (1) {
- wret = walk_down_log_tree(trans, log, path, &level, wc);
- if (wret > 0)
- break;
- if (wret < 0) {
- ret = wret;
- goto out;
- }
-
- wret = walk_up_log_tree(trans, log, path, &level, wc);
- if (wret > 0)
- break;
- if (wret < 0) {
- ret = wret;
- goto out;
- }
- }
-
- /* was the root node processed? if not, catch it here */
- if (path->nodes[orig_level]) {
- ret = wc->process_func(log, path->nodes[orig_level], wc,
- btrfs_header_generation(path->nodes[orig_level]));
- if (ret)
- goto out;
- if (wc->free) {
- struct extent_buffer *next;
-
- next = path->nodes[orig_level];
-
- btrfs_tree_lock(next);
- btrfs_set_lock_blocking(next);
- clean_tree_block(trans, log, next);
- btrfs_wait_tree_block_writeback(next);
- btrfs_tree_unlock(next);
-
- WARN_ON(log->root_key.objectid !=
- BTRFS_TREE_LOG_OBJECTID);
- ret = btrfs_free_and_pin_reserved_extent(log, next->start,
- next->len);
- BUG_ON(ret); /* -ENOMEM or logic errors */
- }
- }
-
-out:
- for (i = 0; i <= orig_level; i++) {
- if (path->nodes[i]) {
- free_extent_buffer(path->nodes[i]);
- path->nodes[i] = NULL;
- }
- }
- btrfs_free_path(path);
- return ret;
-}
-
-/*
- * helper function to update the item for a given subvolumes log root
- * in the tree of log roots
- */
-static int update_log_root(struct btrfs_trans_handle *trans,
- struct btrfs_root *log)
-{
- int ret;
-
- if (log->log_transid == 1) {
- /* insert root item on the first sync */
- ret = btrfs_insert_root(trans, log->fs_info->log_root_tree,
- &log->root_key, &log->root_item);
- } else {
- ret = btrfs_update_root(trans, log->fs_info->log_root_tree,
- &log->root_key, &log->root_item);
- }
- return ret;
-}
-
-static int wait_log_commit(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, unsigned long transid)
-{
- DEFINE_WAIT(wait);
- int index = transid % 2;
-
- /*
- * we only allow two pending log transactions at a time,
- * so we know that if ours is more than 2 older than the
- * current transaction, we're done
- */
- do {
- prepare_to_wait(&root->log_commit_wait[index],
- &wait, TASK_UNINTERRUPTIBLE);
- mutex_unlock(&root->log_mutex);
-
- if (root->fs_info->last_trans_log_full_commit !=
- trans->transid && root->log_transid < transid + 2 &&
- atomic_read(&root->log_commit[index]))
- schedule();
-
- finish_wait(&root->log_commit_wait[index], &wait);
- mutex_lock(&root->log_mutex);
- } while (root->fs_info->last_trans_log_full_commit !=
- trans->transid && root->log_transid < transid + 2 &&
- atomic_read(&root->log_commit[index]));
- return 0;
-}
-
-static void wait_for_writer(struct btrfs_trans_handle *trans,
- struct btrfs_root *root)
-{
- DEFINE_WAIT(wait);
- while (root->fs_info->last_trans_log_full_commit !=
- trans->transid && atomic_read(&root->log_writers)) {
- prepare_to_wait(&root->log_writer_wait,
- &wait, TASK_UNINTERRUPTIBLE);
- mutex_unlock(&root->log_mutex);
- if (root->fs_info->last_trans_log_full_commit !=
- trans->transid && atomic_read(&root->log_writers))
- schedule();
- mutex_lock(&root->log_mutex);
- finish_wait(&root->log_writer_wait, &wait);
- }
-}
-
-/*
- * btrfs_sync_log does sends a given tree log down to the disk and
- * updates the super blocks to record it. When this call is done,
- * you know that any inodes previously logged are safely on disk only
- * if it returns 0.
- *
- * Any other return value means you need to call btrfs_commit_transaction.
- * Some of the edge cases for fsyncing directories that have had unlinks
- * or renames done in the past mean that sometimes the only safe
- * fsync is to commit the whole FS. When btrfs_sync_log returns -EAGAIN,
- * that has happened.
- */
-int btrfs_sync_log(struct btrfs_trans_handle *trans,
- struct btrfs_root *root)
-{
- int index1;
- int index2;
- int mark;
- int ret;
- struct btrfs_root *log = root->log_root;
- struct btrfs_root *log_root_tree = root->fs_info->log_root_tree;
- unsigned long log_transid = 0;
-
- mutex_lock(&root->log_mutex);
- index1 = root->log_transid % 2;
- if (atomic_read(&root->log_commit[index1])) {
- wait_log_commit(trans, root, root->log_transid);
- mutex_unlock(&root->log_mutex);
- return 0;
- }
- atomic_set(&root->log_commit[index1], 1);
-
- /* wait for previous tree log sync to complete */
- if (atomic_read(&root->log_commit[(index1 + 1) % 2]))
- wait_log_commit(trans, root, root->log_transid - 1);
- while (1) {
- unsigned long batch = root->log_batch;
- /* when we're on an ssd, just kick the log commit out */
- if (!btrfs_test_opt(root, SSD) && root->log_multiple_pids) {
- mutex_unlock(&root->log_mutex);
- schedule_timeout_uninterruptible(1);
- mutex_lock(&root->log_mutex);
- }
- wait_for_writer(trans, root);
- if (batch == root->log_batch)
- break;
- }
-
- /* bail out if we need to do a full commit */
- if (root->fs_info->last_trans_log_full_commit == trans->transid) {
- ret = -EAGAIN;
- mutex_unlock(&root->log_mutex);
- goto out;
- }
-
- log_transid = root->log_transid;
- if (log_transid % 2 == 0)
- mark = EXTENT_DIRTY;
- else
- mark = EXTENT_NEW;
-
- /* we start IO on all the marked extents here, but we don't actually
- * wait for them until later.
- */
- ret = btrfs_write_marked_extents(log, &log->dirty_log_pages, mark);
- if (ret) {
- btrfs_abort_transaction(trans, root, ret);
- mutex_unlock(&root->log_mutex);
- goto out;
- }
-
- btrfs_set_root_node(&log->root_item, log->node);
-
- root->log_batch = 0;
- root->log_transid++;
- log->log_transid = root->log_transid;
- root->log_start_pid = 0;
- smp_mb();
- /*
- * IO has been started, blocks of the log tree have WRITTEN flag set
- * in their headers. new modifications of the log will be written to
- * new positions. so it's safe to allow log writers to go in.
- */
- mutex_unlock(&root->log_mutex);
-
- mutex_lock(&log_root_tree->log_mutex);
- log_root_tree->log_batch++;
- atomic_inc(&log_root_tree->log_writers);
- mutex_unlock(&log_root_tree->log_mutex);
-
- ret = update_log_root(trans, log);
-
- mutex_lock(&log_root_tree->log_mutex);
- if (atomic_dec_and_test(&log_root_tree->log_writers)) {
- smp_mb();
- if (waitqueue_active(&log_root_tree->log_writer_wait))
- wake_up(&log_root_tree->log_writer_wait);
- }
-
- if (ret) {
- if (ret != -ENOSPC) {
- btrfs_abort_transaction(trans, root, ret);
- mutex_unlock(&log_root_tree->log_mutex);
- goto out;
- }
- root->fs_info->last_trans_log_full_commit = trans->transid;
- btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark);
- mutex_unlock(&log_root_tree->log_mutex);
- ret = -EAGAIN;
- goto out;
- }
-
- index2 = log_root_tree->log_transid % 2;
- if (atomic_read(&log_root_tree->log_commit[index2])) {
- btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark);
- wait_log_commit(trans, log_root_tree,
- log_root_tree->log_transid);
- mutex_unlock(&log_root_tree->log_mutex);
- ret = 0;
- goto out;
- }
- atomic_set(&log_root_tree->log_commit[index2], 1);
-
- if (atomic_read(&log_root_tree->log_commit[(index2 + 1) % 2])) {
- wait_log_commit(trans, log_root_tree,
- log_root_tree->log_transid - 1);
- }
-
- wait_for_writer(trans, log_root_tree);
-
- /*
- * now that we've moved on to the tree of log tree roots,
- * check the full commit flag again
- */
- if (root->fs_info->last_trans_log_full_commit == trans->transid) {
- btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark);
- mutex_unlock(&log_root_tree->log_mutex);
- ret = -EAGAIN;
- goto out_wake_log_root;
- }
-
- ret = btrfs_write_and_wait_marked_extents(log_root_tree,
- &log_root_tree->dirty_log_pages,
- EXTENT_DIRTY | EXTENT_NEW);
- if (ret) {
- btrfs_abort_transaction(trans, root, ret);
- mutex_unlock(&log_root_tree->log_mutex);
- goto out_wake_log_root;
- }
- btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark);
-
- btrfs_set_super_log_root(root->fs_info->super_for_commit,
- log_root_tree->node->start);
- btrfs_set_super_log_root_level(root->fs_info->super_for_commit,
- btrfs_header_level(log_root_tree->node));
-
- log_root_tree->log_batch = 0;
- log_root_tree->log_transid++;
- smp_mb();
-
- mutex_unlock(&log_root_tree->log_mutex);
-
- /*
- * nobody else is going to jump in and write the the ctree
- * super here because the log_commit atomic below is protecting
- * us. We must be called with a transaction handle pinning
- * the running transaction open, so a full commit can't hop
- * in and cause problems either.
- */
- btrfs_scrub_pause_super(root);
- write_ctree_super(trans, root->fs_info->tree_root, 1);
- btrfs_scrub_continue_super(root);
- ret = 0;
-
- mutex_lock(&root->log_mutex);
- if (root->last_log_commit < log_transid)
- root->last_log_commit = log_transid;
- mutex_unlock(&root->log_mutex);
-
-out_wake_log_root:
- atomic_set(&log_root_tree->log_commit[index2], 0);
- smp_mb();
- if (waitqueue_active(&log_root_tree->log_commit_wait[index2]))
- wake_up(&log_root_tree->log_commit_wait[index2]);
-out:
- atomic_set(&root->log_commit[index1], 0);
- smp_mb();
- if (waitqueue_active(&root->log_commit_wait[index1]))
- wake_up(&root->log_commit_wait[index1]);
- return ret;
-}
-
-static void free_log_tree(struct btrfs_trans_handle *trans,
- struct btrfs_root *log)
-{
- int ret;
- u64 start;
- u64 end;
- struct walk_control wc = {
- .free = 1,
- .process_func = process_one_buffer
- };
-
- ret = walk_log_tree(trans, log, &wc);
- BUG_ON(ret);
-
- while (1) {
- ret = find_first_extent_bit(&log->dirty_log_pages,
- 0, &start, &end, EXTENT_DIRTY | EXTENT_NEW);
- if (ret)
- break;
-
- clear_extent_bits(&log->dirty_log_pages, start, end,
- EXTENT_DIRTY | EXTENT_NEW, GFP_NOFS);
- }
-
- free_extent_buffer(log->node);
- kfree(log);
-}
-
-/*
- * free all the extents used by the tree log. This should be called
- * at commit time of the full transaction
- */
-int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root)
-{
- if (root->log_root) {
- free_log_tree(trans, root->log_root);
- root->log_root = NULL;
- }
- return 0;
-}
-
-int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info)
-{
- if (fs_info->log_root_tree) {
- free_log_tree(trans, fs_info->log_root_tree);
- fs_info->log_root_tree = NULL;
- }
- return 0;
-}
-
-/*
- * If both a file and directory are logged, and unlinks or renames are
- * mixed in, we have a few interesting corners:
- *
- * create file X in dir Y
- * link file X to X.link in dir Y
- * fsync file X
- * unlink file X but leave X.link
- * fsync dir Y
- *
- * After a crash we would expect only X.link to exist. But file X
- * didn't get fsync'd again so the log has back refs for X and X.link.
- *
- * We solve this by removing directory entries and inode backrefs from the
- * log when a file that was logged in the current transaction is
- * unlinked. Any later fsync will include the updated log entries, and
- * we'll be able to reconstruct the proper directory items from backrefs.
- *
- * This optimizations allows us to avoid relogging the entire inode
- * or the entire directory.
- */
-int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- const char *name, int name_len,
- struct inode *dir, u64 index)
-{
- struct btrfs_root *log;
- struct btrfs_dir_item *di;
- struct btrfs_path *path;
- int ret;
- int err = 0;
- int bytes_del = 0;
- u64 dir_ino = btrfs_ino(dir);
-
- if (BTRFS_I(dir)->logged_trans < trans->transid)
- return 0;
-
- ret = join_running_log_trans(root);
- if (ret)
- return 0;
-
- mutex_lock(&BTRFS_I(dir)->log_mutex);
-
- log = root->log_root;
- path = btrfs_alloc_path();
- if (!path) {
- err = -ENOMEM;
- goto out_unlock;
- }
-
- di = btrfs_lookup_dir_item(trans, log, path, dir_ino,
- name, name_len, -1);
- if (IS_ERR(di)) {
- err = PTR_ERR(di);
- goto fail;
- }
- if (di) {
- ret = btrfs_delete_one_dir_name(trans, log, path, di);
- bytes_del += name_len;
- BUG_ON(ret);
- }
- btrfs_release_path(path);
- di = btrfs_lookup_dir_index_item(trans, log, path, dir_ino,
- index, name, name_len, -1);
- if (IS_ERR(di)) {
- err = PTR_ERR(di);
- goto fail;
- }
- if (di) {
- ret = btrfs_delete_one_dir_name(trans, log, path, di);
- bytes_del += name_len;
- BUG_ON(ret);
- }
-
- /* update the directory size in the log to reflect the names
- * we have removed
- */
- if (bytes_del) {
- struct btrfs_key key;
-
- key.objectid = dir_ino;
- key.offset = 0;
- key.type = BTRFS_INODE_ITEM_KEY;
- btrfs_release_path(path);
-
- ret = btrfs_search_slot(trans, log, &key, path, 0, 1);
- if (ret < 0) {
- err = ret;
- goto fail;
- }
- if (ret == 0) {
- struct btrfs_inode_item *item;
- u64 i_size;
-
- item = btrfs_item_ptr(path->nodes[0], path->slots[0],
- struct btrfs_inode_item);
- i_size = btrfs_inode_size(path->nodes[0], item);
- if (i_size > bytes_del)
- i_size -= bytes_del;
- else
- i_size = 0;
- btrfs_set_inode_size(path->nodes[0], item, i_size);
- btrfs_mark_buffer_dirty(path->nodes[0]);
- } else
- ret = 0;
- btrfs_release_path(path);
- }
-fail:
- btrfs_free_path(path);
-out_unlock:
- mutex_unlock(&BTRFS_I(dir)->log_mutex);
- if (ret == -ENOSPC) {
- root->fs_info->last_trans_log_full_commit = trans->transid;
- ret = 0;
- } else if (ret < 0)
- btrfs_abort_transaction(trans, root, ret);
-
- btrfs_end_log_trans(root);
-
- return err;
-}
-
-/* see comments for btrfs_del_dir_entries_in_log */
-int btrfs_del_inode_ref_in_log(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- const char *name, int name_len,
- struct inode *inode, u64 dirid)
-{
- struct btrfs_root *log;
- u64 index;
- int ret;
-
- if (BTRFS_I(inode)->logged_trans < trans->transid)
- return 0;
-
- ret = join_running_log_trans(root);
- if (ret)
- return 0;
- log = root->log_root;
- mutex_lock(&BTRFS_I(inode)->log_mutex);
-
- ret = btrfs_del_inode_ref(trans, log, name, name_len, btrfs_ino(inode),
- dirid, &index);
- mutex_unlock(&BTRFS_I(inode)->log_mutex);
- if (ret == -ENOSPC) {
- root->fs_info->last_trans_log_full_commit = trans->transid;
- ret = 0;
- } else if (ret < 0 && ret != -ENOENT)
- btrfs_abort_transaction(trans, root, ret);
- btrfs_end_log_trans(root);
-
- return ret;
-}
-
-/*
- * creates a range item in the log for 'dirid'. first_offset and
- * last_offset tell us which parts of the key space the log should
- * be considered authoritative for.
- */
-static noinline int insert_dir_log_key(struct btrfs_trans_handle *trans,
- struct btrfs_root *log,
- struct btrfs_path *path,
- int key_type, u64 dirid,
- u64 first_offset, u64 last_offset)
-{
- int ret;
- struct btrfs_key key;
- struct btrfs_dir_log_item *item;
-
- key.objectid = dirid;
- key.offset = first_offset;
- if (key_type == BTRFS_DIR_ITEM_KEY)
- key.type = BTRFS_DIR_LOG_ITEM_KEY;
- else
- key.type = BTRFS_DIR_LOG_INDEX_KEY;
- ret = btrfs_insert_empty_item(trans, log, path, &key, sizeof(*item));
- if (ret)
- return ret;
-
- item = btrfs_item_ptr(path->nodes[0], path->slots[0],
- struct btrfs_dir_log_item);
- btrfs_set_dir_log_end(path->nodes[0], item, last_offset);
- btrfs_mark_buffer_dirty(path->nodes[0]);
- btrfs_release_path(path);
- return 0;
-}
-
-/*
- * log all the items included in the current transaction for a given
- * directory. This also creates the range items in the log tree required
- * to replay anything deleted before the fsync
- */
-static noinline int log_dir_items(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, struct inode *inode,
- struct btrfs_path *path,
- struct btrfs_path *dst_path, int key_type,
- u64 min_offset, u64 *last_offset_ret)
-{
- struct btrfs_key min_key;
- struct btrfs_key max_key;
- struct btrfs_root *log = root->log_root;
- struct extent_buffer *src;
- int err = 0;
- int ret;
- int i;
- int nritems;
- u64 first_offset = min_offset;
- u64 last_offset = (u64)-1;
- u64 ino = btrfs_ino(inode);
-
- log = root->log_root;
- max_key.objectid = ino;
- max_key.offset = (u64)-1;
- max_key.type = key_type;
-
- min_key.objectid = ino;
- min_key.type = key_type;
- min_key.offset = min_offset;
-
- path->keep_locks = 1;
-
- ret = btrfs_search_forward(root, &min_key, &max_key,
- path, 0, trans->transid);
-
- /*
- * we didn't find anything from this transaction, see if there
- * is anything at all
- */
- if (ret != 0 || min_key.objectid != ino || min_key.type != key_type) {
- min_key.objectid = ino;
- min_key.type = key_type;
- min_key.offset = (u64)-1;
- btrfs_release_path(path);
- ret = btrfs_search_slot(NULL, root, &min_key, path, 0, 0);
- if (ret < 0) {
- btrfs_release_path(path);
- return ret;
- }
- ret = btrfs_previous_item(root, path, ino, key_type);
-
- /* if ret == 0 there are items for this type,
- * create a range to tell us the last key of this type.
- * otherwise, there are no items in this directory after
- * *min_offset, and we create a range to indicate that.
- */
- if (ret == 0) {
- struct btrfs_key tmp;
- btrfs_item_key_to_cpu(path->nodes[0], &tmp,
- path->slots[0]);
- if (key_type == tmp.type)
- first_offset = max(min_offset, tmp.offset) + 1;
- }
- goto done;
- }
-
- /* go backward to find any previous key */
- ret = btrfs_previous_item(root, path, ino, key_type);
- if (ret == 0) {
- struct btrfs_key tmp;
- btrfs_item_key_to_cpu(path->nodes[0], &tmp, path->slots[0]);
- if (key_type == tmp.type) {
- first_offset = tmp.offset;
- ret = overwrite_item(trans, log, dst_path,
- path->nodes[0], path->slots[0],
- &tmp);
- if (ret) {
- err = ret;
- goto done;
- }
- }
- }
- btrfs_release_path(path);
-
- /* find the first key from this transaction again */
- ret = btrfs_search_slot(NULL, root, &min_key, path, 0, 0);
- if (ret != 0) {
- WARN_ON(1);
- goto done;
- }
-
- /*
- * we have a block from this transaction, log every item in it
- * from our directory
- */
- while (1) {
- struct btrfs_key tmp;
- src = path->nodes[0];
- nritems = btrfs_header_nritems(src);
- for (i = path->slots[0]; i < nritems; i++) {
- btrfs_item_key_to_cpu(src, &min_key, i);
-
- if (min_key.objectid != ino || min_key.type != key_type)
- goto done;
- ret = overwrite_item(trans, log, dst_path, src, i,
- &min_key);
- if (ret) {
- err = ret;
- goto done;
- }
- }
- path->slots[0] = nritems;
-
- /*
- * look ahead to the next item and see if it is also
- * from this directory and from this transaction
- */
- ret = btrfs_next_leaf(root, path);
- if (ret == 1) {
- last_offset = (u64)-1;
- goto done;
- }
- btrfs_item_key_to_cpu(path->nodes[0], &tmp, path->slots[0]);
- if (tmp.objectid != ino || tmp.type != key_type) {
- last_offset = (u64)-1;
- goto done;
- }
- if (btrfs_header_generation(path->nodes[0]) != trans->transid) {
- ret = overwrite_item(trans, log, dst_path,
- path->nodes[0], path->slots[0],
- &tmp);
- if (ret)
- err = ret;
- else
- last_offset = tmp.offset;
- goto done;
- }
- }
-done:
- btrfs_release_path(path);
- btrfs_release_path(dst_path);
-
- if (err == 0) {
- *last_offset_ret = last_offset;
- /*
- * insert the log range keys to indicate where the log
- * is valid
- */
- ret = insert_dir_log_key(trans, log, path, key_type,
- ino, first_offset, last_offset);
- if (ret)
- err = ret;
- }
- return err;
-}
-
-/*
- * logging directories is very similar to logging inodes, We find all the items
- * from the current transaction and write them to the log.
- *
- * The recovery code scans the directory in the subvolume, and if it finds a
- * key in the range logged that is not present in the log tree, then it means
- * that dir entry was unlinked during the transaction.
- *
- * In order for that scan to work, we must include one key smaller than
- * the smallest logged by this transaction and one key larger than the largest
- * key logged by this transaction.
- */
-static noinline int log_directory_changes(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, struct inode *inode,
- struct btrfs_path *path,
- struct btrfs_path *dst_path)
-{
- u64 min_key;
- u64 max_key;
- int ret;
- int key_type = BTRFS_DIR_ITEM_KEY;
-
-again:
- min_key = 0;
- max_key = 0;
- while (1) {
- ret = log_dir_items(trans, root, inode, path,
- dst_path, key_type, min_key,
- &max_key);
- if (ret)
- return ret;
- if (max_key == (u64)-1)
- break;
- min_key = max_key + 1;
- }
-
- if (key_type == BTRFS_DIR_ITEM_KEY) {
- key_type = BTRFS_DIR_INDEX_KEY;
- goto again;
- }
- return 0;
-}
-
-/*
- * a helper function to drop items from the log before we relog an
- * inode. max_key_type indicates the highest item type to remove.
- * This cannot be run for file data extents because it does not
- * free the extents they point to.
- */
-static int drop_objectid_items(struct btrfs_trans_handle *trans,
- struct btrfs_root *log,
- struct btrfs_path *path,
- u64 objectid, int max_key_type)
-{
- int ret;
- struct btrfs_key key;
- struct btrfs_key found_key;
-
- key.objectid = objectid;
- key.type = max_key_type;
- key.offset = (u64)-1;
-
- while (1) {
- ret = btrfs_search_slot(trans, log, &key, path, -1, 1);
- BUG_ON(ret == 0);
- if (ret < 0)
- break;
-
- if (path->slots[0] == 0)
- break;
-
- path->slots[0]--;
- btrfs_item_key_to_cpu(path->nodes[0], &found_key,
- path->slots[0]);
-
- if (found_key.objectid != objectid)
- break;
-
- ret = btrfs_del_item(trans, log, path);
- if (ret)
- break;
- btrfs_release_path(path);
- }
- btrfs_release_path(path);
- return ret;
-}
-
-static noinline int copy_items(struct btrfs_trans_handle *trans,
- struct btrfs_root *log,
- struct btrfs_path *dst_path,
- struct extent_buffer *src,
- int start_slot, int nr, int inode_only)
-{
- unsigned long src_offset;
- unsigned long dst_offset;
- struct btrfs_file_extent_item *extent;
- struct btrfs_inode_item *inode_item;
- int ret;
- struct btrfs_key *ins_keys;
- u32 *ins_sizes;
- char *ins_data;
- int i;
- struct list_head ordered_sums;
-
- INIT_LIST_HEAD(&ordered_sums);
-
- ins_data = kmalloc(nr * sizeof(struct btrfs_key) +
- nr * sizeof(u32), GFP_NOFS);
- if (!ins_data)
- return -ENOMEM;
-
- ins_sizes = (u32 *)ins_data;
- ins_keys = (struct btrfs_key *)(ins_data + nr * sizeof(u32));
-
- for (i = 0; i < nr; i++) {
- ins_sizes[i] = btrfs_item_size_nr(src, i + start_slot);
- btrfs_item_key_to_cpu(src, ins_keys + i, i + start_slot);
- }
- ret = btrfs_insert_empty_items(trans, log, dst_path,
- ins_keys, ins_sizes, nr);
- if (ret) {
- kfree(ins_data);
- return ret;
- }
-
- for (i = 0; i < nr; i++, dst_path->slots[0]++) {
- dst_offset = btrfs_item_ptr_offset(dst_path->nodes[0],
- dst_path->slots[0]);
-
- src_offset = btrfs_item_ptr_offset(src, start_slot + i);
-
- copy_extent_buffer(dst_path->nodes[0], src, dst_offset,
- src_offset, ins_sizes[i]);
-
- if (inode_only == LOG_INODE_EXISTS &&
- ins_keys[i].type == BTRFS_INODE_ITEM_KEY) {
- inode_item = btrfs_item_ptr(dst_path->nodes[0],
- dst_path->slots[0],
- struct btrfs_inode_item);
- btrfs_set_inode_size(dst_path->nodes[0], inode_item, 0);
-
- /* set the generation to zero so the recover code
- * can tell the difference between an logging
- * just to say 'this inode exists' and a logging
- * to say 'update this inode with these values'
- */
- btrfs_set_inode_generation(dst_path->nodes[0],
- inode_item, 0);
- }
- /* take a reference on file data extents so that truncates
- * or deletes of this inode don't have to relog the inode
- * again
- */
- if (btrfs_key_type(ins_keys + i) == BTRFS_EXTENT_DATA_KEY) {
- int found_type;
- extent = btrfs_item_ptr(src, start_slot + i,
- struct btrfs_file_extent_item);
-
- if (btrfs_file_extent_generation(src, extent) < trans->transid)
- continue;
-
- found_type = btrfs_file_extent_type(src, extent);
- if (found_type == BTRFS_FILE_EXTENT_REG ||
- found_type == BTRFS_FILE_EXTENT_PREALLOC) {
- u64 ds, dl, cs, cl;
- ds = btrfs_file_extent_disk_bytenr(src,
- extent);
- /* ds == 0 is a hole */
- if (ds == 0)
- continue;
-
- dl = btrfs_file_extent_disk_num_bytes(src,
- extent);
- cs = btrfs_file_extent_offset(src, extent);
- cl = btrfs_file_extent_num_bytes(src,
- extent);
- if (btrfs_file_extent_compression(src,
- extent)) {
- cs = 0;
- cl = dl;
- }
-
- ret = btrfs_lookup_csums_range(
- log->fs_info->csum_root,
- ds + cs, ds + cs + cl - 1,
- &ordered_sums, 0);
- BUG_ON(ret);
- }
- }
- }
-
- btrfs_mark_buffer_dirty(dst_path->nodes[0]);
- btrfs_release_path(dst_path);
- kfree(ins_data);
-
- /*
- * we have to do this after the loop above to avoid changing the
- * log tree while trying to change the log tree.
- */
- ret = 0;
- while (!list_empty(&ordered_sums)) {
- struct btrfs_ordered_sum *sums = list_entry(ordered_sums.next,
- struct btrfs_ordered_sum,
- list);
- if (!ret)
- ret = btrfs_csum_file_blocks(trans, log, sums);
- list_del(&sums->list);
- kfree(sums);
- }
- return ret;
-}
-
-/* log a single inode in the tree log.
- * At least one parent directory for this inode must exist in the tree
- * or be logged already.
- *
- * Any items from this inode changed by the current transaction are copied
- * to the log tree. An extra reference is taken on any extents in this
- * file, allowing us to avoid a whole pile of corner cases around logging
- * blocks that have been removed from the tree.
- *
- * See LOG_INODE_ALL and related defines for a description of what inode_only
- * does.
- *
- * This handles both files and directories.
- */
-static int btrfs_log_inode(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, struct inode *inode,
- int inode_only)
-{
- struct btrfs_path *path;
- struct btrfs_path *dst_path;
- struct btrfs_key min_key;
- struct btrfs_key max_key;
- struct btrfs_root *log = root->log_root;
- struct extent_buffer *src = NULL;
- int err = 0;
- int ret;
- int nritems;
- int ins_start_slot = 0;
- int ins_nr;
- u64 ino = btrfs_ino(inode);
-
- log = root->log_root;
-
- path = btrfs_alloc_path();
- if (!path)
- return -ENOMEM;
- dst_path = btrfs_alloc_path();
- if (!dst_path) {
- btrfs_free_path(path);
- return -ENOMEM;
- }
-
- min_key.objectid = ino;
- min_key.type = BTRFS_INODE_ITEM_KEY;
- min_key.offset = 0;
-
- max_key.objectid = ino;
-
- /* today the code can only do partial logging of directories */
- if (!S_ISDIR(inode->i_mode))
- inode_only = LOG_INODE_ALL;
-
- if (inode_only == LOG_INODE_EXISTS || S_ISDIR(inode->i_mode))
- max_key.type = BTRFS_XATTR_ITEM_KEY;
- else
- max_key.type = (u8)-1;
- max_key.offset = (u64)-1;
-
- ret = btrfs_commit_inode_delayed_items(trans, inode);
- if (ret) {
- btrfs_free_path(path);
- btrfs_free_path(dst_path);
- return ret;
- }
-
- mutex_lock(&BTRFS_I(inode)->log_mutex);
-
- /*
- * a brute force approach to making sure we get the most uptodate
- * copies of everything.
- */
- if (S_ISDIR(inode->i_mode)) {
- int max_key_type = BTRFS_DIR_LOG_INDEX_KEY;
-
- if (inode_only == LOG_INODE_EXISTS)
- max_key_type = BTRFS_XATTR_ITEM_KEY;
- ret = drop_objectid_items(trans, log, path, ino, max_key_type);
- } else {
- ret = btrfs_truncate_inode_items(trans, log, inode, 0, 0);
- }
- if (ret) {
- err = ret;
- goto out_unlock;
- }
- path->keep_locks = 1;
-
- while (1) {
- ins_nr = 0;
- ret = btrfs_search_forward(root, &min_key, &max_key,
- path, 0, trans->transid);
- if (ret != 0)
- break;
-again:
- /* note, ins_nr might be > 0 here, cleanup outside the loop */
- if (min_key.objectid != ino)
- break;
- if (min_key.type > max_key.type)
- break;
-
- src = path->nodes[0];
- if (ins_nr && ins_start_slot + ins_nr == path->slots[0]) {
- ins_nr++;
- goto next_slot;
- } else if (!ins_nr) {
- ins_start_slot = path->slots[0];
- ins_nr = 1;
- goto next_slot;
- }
-
- ret = copy_items(trans, log, dst_path, src, ins_start_slot,
- ins_nr, inode_only);
- if (ret) {
- err = ret;
- goto out_unlock;
- }
- ins_nr = 1;
- ins_start_slot = path->slots[0];
-next_slot:
-
- nritems = btrfs_header_nritems(path->nodes[0]);
- path->slots[0]++;
- if (path->slots[0] < nritems) {
- btrfs_item_key_to_cpu(path->nodes[0], &min_key,
- path->slots[0]);
- goto again;
- }
- if (ins_nr) {
- ret = copy_items(trans, log, dst_path, src,
- ins_start_slot,
- ins_nr, inode_only);
- if (ret) {
- err = ret;
- goto out_unlock;
- }
- ins_nr = 0;
- }
- btrfs_release_path(path);
-
- if (min_key.offset < (u64)-1)
- min_key.offset++;
- else if (min_key.type < (u8)-1)
- min_key.type++;
- else if (min_key.objectid < (u64)-1)
- min_key.objectid++;
- else
- break;
- }
- if (ins_nr) {
- ret = copy_items(trans, log, dst_path, src,
- ins_start_slot,
- ins_nr, inode_only);
- if (ret) {
- err = ret;
- goto out_unlock;
- }
- ins_nr = 0;
- }
- WARN_ON(ins_nr);
- if (inode_only == LOG_INODE_ALL && S_ISDIR(inode->i_mode)) {
- btrfs_release_path(path);
- btrfs_release_path(dst_path);
- ret = log_directory_changes(trans, root, inode, path, dst_path);
- if (ret) {
- err = ret;
- goto out_unlock;
- }
- }
- BTRFS_I(inode)->logged_trans = trans->transid;
-out_unlock:
- mutex_unlock(&BTRFS_I(inode)->log_mutex);
-
- btrfs_free_path(path);
- btrfs_free_path(dst_path);
- return err;
-}
-
-/*
- * follow the dentry parent pointers up the chain and see if any
- * of the directories in it require a full commit before they can
- * be logged. Returns zero if nothing special needs to be done or 1 if
- * a full commit is required.
- */
-static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans,
- struct inode *inode,
- struct dentry *parent,
- struct super_block *sb,
- u64 last_committed)
-{
- int ret = 0;
- struct btrfs_root *root;
- struct dentry *old_parent = NULL;
-
- /*
- * for regular files, if its inode is already on disk, we don't
- * have to worry about the parents at all. This is because
- * we can use the last_unlink_trans field to record renames
- * and other fun in this file.
- */
- if (S_ISREG(inode->i_mode) &&
- BTRFS_I(inode)->generation <= last_committed &&
- BTRFS_I(inode)->last_unlink_trans <= last_committed)
- goto out;
-
- if (!S_ISDIR(inode->i_mode)) {
- if (!parent || !parent->d_inode || sb != parent->d_inode->i_sb)
- goto out;
- inode = parent->d_inode;
- }
-
- while (1) {
- BTRFS_I(inode)->logged_trans = trans->transid;
- smp_mb();
-
- if (BTRFS_I(inode)->last_unlink_trans > last_committed) {
- root = BTRFS_I(inode)->root;
-
- /*
- * make sure any commits to the log are forced
- * to be full commits
- */
- root->fs_info->last_trans_log_full_commit =
- trans->transid;
- ret = 1;
- break;
- }
-
- if (!parent || !parent->d_inode || sb != parent->d_inode->i_sb)
- break;
-
- if (IS_ROOT(parent))
- break;
-
- parent = dget_parent(parent);
- dput(old_parent);
- old_parent = parent;
- inode = parent->d_inode;
-
- }
- dput(old_parent);
-out:
- return ret;
-}
-
-static int inode_in_log(struct btrfs_trans_handle *trans,
- struct inode *inode)
-{
- struct btrfs_root *root = BTRFS_I(inode)->root;
- int ret = 0;
-
- mutex_lock(&root->log_mutex);
- if (BTRFS_I(inode)->logged_trans == trans->transid &&
- BTRFS_I(inode)->last_sub_trans <= root->last_log_commit)
- ret = 1;
- mutex_unlock(&root->log_mutex);
- return ret;
-}
-
-
-/*
- * helper function around btrfs_log_inode to make sure newly created
- * parent directories also end up in the log. A minimal inode and backref
- * only logging is done of any parent directories that are older than
- * the last committed transaction
- */
-int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, struct inode *inode,
- struct dentry *parent, int exists_only)
-{
- int inode_only = exists_only ? LOG_INODE_EXISTS : LOG_INODE_ALL;
- struct super_block *sb;
- struct dentry *old_parent = NULL;
- int ret = 0;
- u64 last_committed = root->fs_info->last_trans_committed;
-
- sb = inode->i_sb;
-
- if (btrfs_test_opt(root, NOTREELOG)) {
- ret = 1;
- goto end_no_trans;
- }
-
- if (root->fs_info->last_trans_log_full_commit >
- root->fs_info->last_trans_committed) {
- ret = 1;
- goto end_no_trans;
- }
-
- if (root != BTRFS_I(inode)->root ||
- btrfs_root_refs(&root->root_item) == 0) {
- ret = 1;
- goto end_no_trans;
- }
-
- ret = check_parent_dirs_for_sync(trans, inode, parent,
- sb, last_committed);
- if (ret)
- goto end_no_trans;
-
- if (inode_in_log(trans, inode)) {
- ret = BTRFS_NO_LOG_SYNC;
- goto end_no_trans;
- }
-
- ret = start_log_trans(trans, root);
- if (ret)
- goto end_trans;
-
- ret = btrfs_log_inode(trans, root, inode, inode_only);
- if (ret)
- goto end_trans;
-
- /*
- * for regular files, if its inode is already on disk, we don't
- * have to worry about the parents at all. This is because
- * we can use the last_unlink_trans field to record renames
- * and other fun in this file.
- */
- if (S_ISREG(inode->i_mode) &&
- BTRFS_I(inode)->generation <= last_committed &&
- BTRFS_I(inode)->last_unlink_trans <= last_committed) {
- ret = 0;
- goto end_trans;
- }
-
- inode_only = LOG_INODE_EXISTS;
- while (1) {
- if (!parent || !parent->d_inode || sb != parent->d_inode->i_sb)
- break;
-
- inode = parent->d_inode;
- if (root != BTRFS_I(inode)->root)
- break;
-
- if (BTRFS_I(inode)->generation >
- root->fs_info->last_trans_committed) {
- ret = btrfs_log_inode(trans, root, inode, inode_only);
- if (ret)
- goto end_trans;
- }
- if (IS_ROOT(parent))
- break;
-
- parent = dget_parent(parent);
- dput(old_parent);
- old_parent = parent;
- }
- ret = 0;
-end_trans:
- dput(old_parent);
- if (ret < 0) {
- BUG_ON(ret != -ENOSPC);
- root->fs_info->last_trans_log_full_commit = trans->transid;
- ret = 1;
- }
- btrfs_end_log_trans(root);
-end_no_trans:
- return ret;
-}
-
-/*
- * it is not safe to log dentry if the chunk root has added new
- * chunks. This returns 0 if the dentry was logged, and 1 otherwise.
- * If this returns 1, you must commit the transaction to safely get your
- * data on disk.
- */
-int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, struct dentry *dentry)
-{
- struct dentry *parent = dget_parent(dentry);
- int ret;
-
- ret = btrfs_log_inode_parent(trans, root, dentry->d_inode, parent, 0);
- dput(parent);
-
- return ret;
-}
-
-/*
- * should be called during mount to recover any replay any log trees
- * from the FS
- */
-int btrfs_recover_log_trees(struct btrfs_root *log_root_tree)
-{
- int ret;
- struct btrfs_path *path;
- struct btrfs_trans_handle *trans;
- struct btrfs_key key;
- struct btrfs_key found_key;
- struct btrfs_key tmp_key;
- struct btrfs_root *log;
- struct btrfs_fs_info *fs_info = log_root_tree->fs_info;
- struct walk_control wc = {
- .process_func = process_one_buffer,
- .stage = 0,
- };
-
- path = btrfs_alloc_path();
- if (!path)
- return -ENOMEM;
-
- fs_info->log_root_recovering = 1;
-
- trans = btrfs_start_transaction(fs_info->tree_root, 0);
- if (IS_ERR(trans)) {
- ret = PTR_ERR(trans);
- goto error;
- }
-
- wc.trans = trans;
- wc.pin = 1;
-
- ret = walk_log_tree(trans, log_root_tree, &wc);
- if (ret) {
- btrfs_error(fs_info, ret, "Failed to pin buffers while "
- "recovering log root tree.");
- goto error;
- }
-
-again:
- key.objectid = BTRFS_TREE_LOG_OBJECTID;
- key.offset = (u64)-1;
- btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
-
- while (1) {
- ret = btrfs_search_slot(NULL, log_root_tree, &key, path, 0, 0);
-
- if (ret < 0) {
- btrfs_error(fs_info, ret,
- "Couldn't find tree log root.");
- goto error;
- }
- if (ret > 0) {
- if (path->slots[0] == 0)
- break;
- path->slots[0]--;
- }
- btrfs_item_key_to_cpu(path->nodes[0], &found_key,
- path->slots[0]);
- btrfs_release_path(path);
- if (found_key.objectid != BTRFS_TREE_LOG_OBJECTID)
- break;
-
- log = btrfs_read_fs_root_no_radix(log_root_tree,
- &found_key);
- if (IS_ERR(log)) {
- ret = PTR_ERR(log);
- btrfs_error(fs_info, ret,
- "Couldn't read tree log root.");
- goto error;
- }
-
- tmp_key.objectid = found_key.offset;
- tmp_key.type = BTRFS_ROOT_ITEM_KEY;
- tmp_key.offset = (u64)-1;
-
- wc.replay_dest = btrfs_read_fs_root_no_name(fs_info, &tmp_key);
- if (IS_ERR(wc.replay_dest)) {
- ret = PTR_ERR(wc.replay_dest);
- btrfs_error(fs_info, ret, "Couldn't read target root "
- "for tree log recovery.");
- goto error;
- }
-
- wc.replay_dest->log_root = log;
- btrfs_record_root_in_trans(trans, wc.replay_dest);
- ret = walk_log_tree(trans, log, &wc);
- BUG_ON(ret);
-
- if (wc.stage == LOG_WALK_REPLAY_ALL) {
- ret = fixup_inode_link_counts(trans, wc.replay_dest,
- path);
- BUG_ON(ret);
- }
-
- key.offset = found_key.offset - 1;
- wc.replay_dest->log_root = NULL;
- free_extent_buffer(log->node);
- free_extent_buffer(log->commit_root);
- kfree(log);
-
- if (found_key.offset == 0)
- break;
- }
- btrfs_release_path(path);
-
- /* step one is to pin it all, step two is to replay just inodes */
- if (wc.pin) {
- wc.pin = 0;
- wc.process_func = replay_one_buffer;
- wc.stage = LOG_WALK_REPLAY_INODES;
- goto again;
- }
- /* step three is to replay everything */
- if (wc.stage < LOG_WALK_REPLAY_ALL) {
- wc.stage++;
- goto again;
- }
-
- btrfs_free_path(path);
-
- free_extent_buffer(log_root_tree->node);
- log_root_tree->log_root = NULL;
- fs_info->log_root_recovering = 0;
-
- /* step 4: commit the transaction, which also unpins the blocks */
- btrfs_commit_transaction(trans, fs_info->tree_root);
-
- kfree(log_root_tree);
- return 0;
-
-error:
- btrfs_free_path(path);
- return ret;
-}
-
-/*
- * there are some corner cases where we want to force a full
- * commit instead of allowing a directory to be logged.
- *
- * They revolve around files there were unlinked from the directory, and
- * this function updates the parent directory so that a full commit is
- * properly done if it is fsync'd later after the unlinks are done.
- */
-void btrfs_record_unlink_dir(struct btrfs_trans_handle *trans,
- struct inode *dir, struct inode *inode,
- int for_rename)
-{
- /*
- * when we're logging a file, if it hasn't been renamed
- * or unlinked, and its inode is fully committed on disk,
- * we don't have to worry about walking up the directory chain
- * to log its parents.
- *
- * So, we use the last_unlink_trans field to put this transid
- * into the file. When the file is logged we check it and
- * don't log the parents if the file is fully on disk.
- */
- if (S_ISREG(inode->i_mode))
- BTRFS_I(inode)->last_unlink_trans = trans->transid;
-
- /*
- * if this directory was already logged any new
- * names for this file/dir will get recorded
- */
- smp_mb();
- if (BTRFS_I(dir)->logged_trans == trans->transid)
- return;
-
- /*
- * if the inode we're about to unlink was logged,
- * the log will be properly updated for any new names
- */
- if (BTRFS_I(inode)->logged_trans == trans->transid)
- return;
-
- /*
- * when renaming files across directories, if the directory
- * there we're unlinking from gets fsync'd later on, there's
- * no way to find the destination directory later and fsync it
- * properly. So, we have to be conservative and force commits
- * so the new name gets discovered.
- */
- if (for_rename)
- goto record;
-
- /* we can safely do the unlink without any special recording */
- return;
-
-record:
- BTRFS_I(dir)->last_unlink_trans = trans->transid;
-}
-
-/*
- * Call this after adding a new name for a file and it will properly
- * update the log to reflect the new name.
- *
- * It will return zero if all goes well, and it will return 1 if a
- * full transaction commit is required.
- */
-int btrfs_log_new_name(struct btrfs_trans_handle *trans,
- struct inode *inode, struct inode *old_dir,
- struct dentry *parent)
-{
- struct btrfs_root * root = BTRFS_I(inode)->root;
-
- /*
- * this will force the logging code to walk the dentry chain
- * up for the file
- */
- if (S_ISREG(inode->i_mode))
- BTRFS_I(inode)->last_unlink_trans = trans->transid;
-
- /*
- * if this inode hasn't been logged and directory we're renaming it
- * from hasn't been logged, we don't need to log it
- */
- if (BTRFS_I(inode)->logged_trans <=
- root->fs_info->last_trans_committed &&
- (!old_dir || BTRFS_I(old_dir)->logged_trans <=
- root->fs_info->last_trans_committed))
- return 0;
-
- return btrfs_log_inode_parent(trans, root, inode, parent, 1);
-}
-
diff --git a/ANDROID_3.4.5/fs/btrfs/tree-log.h b/ANDROID_3.4.5/fs/btrfs/tree-log.h
deleted file mode 100644
index 862ac813..00000000
--- a/ANDROID_3.4.5/fs/btrfs/tree-log.h
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * Copyright (C) 2008 Oracle. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License v2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
- */
-
-#ifndef __TREE_LOG_
-#define __TREE_LOG_
-
-/* return value for btrfs_log_dentry_safe that means we don't need to log it at all */
-#define BTRFS_NO_LOG_SYNC 256
-
-int btrfs_sync_log(struct btrfs_trans_handle *trans,
- struct btrfs_root *root);
-int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root);
-int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info);
-int btrfs_recover_log_trees(struct btrfs_root *tree_root);
-int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, struct dentry *dentry);
-int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- const char *name, int name_len,
- struct inode *dir, u64 index);
-int btrfs_del_inode_ref_in_log(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- const char *name, int name_len,
- struct inode *inode, u64 dirid);
-void btrfs_end_log_trans(struct btrfs_root *root);
-int btrfs_pin_log_trans(struct btrfs_root *root);
-int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, struct inode *inode,
- struct dentry *parent, int exists_only);
-void btrfs_record_unlink_dir(struct btrfs_trans_handle *trans,
- struct inode *dir, struct inode *inode,
- int for_rename);
-int btrfs_log_new_name(struct btrfs_trans_handle *trans,
- struct inode *inode, struct inode *old_dir,
- struct dentry *parent);
-#endif
diff --git a/ANDROID_3.4.5/fs/btrfs/ulist.c b/ANDROID_3.4.5/fs/btrfs/ulist.c
deleted file mode 100644
index 12f5147b..00000000
--- a/ANDROID_3.4.5/fs/btrfs/ulist.c
+++ /dev/null
@@ -1,220 +0,0 @@
-/*
- * Copyright (C) 2011 STRATO AG
- * written by Arne Jansen <sensille@gmx.net>
- * Distributed under the GNU GPL license version 2.
- */
-
-#include <linux/slab.h>
-#include <linux/module.h>
-#include "ulist.h"
-
-/*
- * ulist is a generic data structure to hold a collection of unique u64
- * values. The only operations it supports is adding to the list and
- * enumerating it.
- * It is possible to store an auxiliary value along with the key.
- *
- * The implementation is preliminary and can probably be sped up
- * significantly. A first step would be to store the values in an rbtree
- * as soon as ULIST_SIZE is exceeded.
- *
- * A sample usage for ulists is the enumeration of directed graphs without
- * visiting a node twice. The pseudo-code could look like this:
- *
- * ulist = ulist_alloc();
- * ulist_add(ulist, root);
- * elem = NULL;
- *
- * while ((elem = ulist_next(ulist, elem)) {
- * for (all child nodes n in elem)
- * ulist_add(ulist, n);
- * do something useful with the node;
- * }
- * ulist_free(ulist);
- *
- * This assumes the graph nodes are adressable by u64. This stems from the
- * usage for tree enumeration in btrfs, where the logical addresses are
- * 64 bit.
- *
- * It is also useful for tree enumeration which could be done elegantly
- * recursively, but is not possible due to kernel stack limitations. The
- * loop would be similar to the above.
- */
-
-/**
- * ulist_init - freshly initialize a ulist
- * @ulist: the ulist to initialize
- *
- * Note: don't use this function to init an already used ulist, use
- * ulist_reinit instead.
- */
-void ulist_init(struct ulist *ulist)
-{
- ulist->nnodes = 0;
- ulist->nodes = ulist->int_nodes;
- ulist->nodes_alloced = ULIST_SIZE;
-}
-EXPORT_SYMBOL(ulist_init);
-
-/**
- * ulist_fini - free up additionally allocated memory for the ulist
- * @ulist: the ulist from which to free the additional memory
- *
- * This is useful in cases where the base 'struct ulist' has been statically
- * allocated.
- */
-void ulist_fini(struct ulist *ulist)
-{
- /*
- * The first ULIST_SIZE elements are stored inline in struct ulist.
- * Only if more elements are alocated they need to be freed.
- */
- if (ulist->nodes_alloced > ULIST_SIZE)
- kfree(ulist->nodes);
- ulist->nodes_alloced = 0; /* in case ulist_fini is called twice */
-}
-EXPORT_SYMBOL(ulist_fini);
-
-/**
- * ulist_reinit - prepare a ulist for reuse
- * @ulist: ulist to be reused
- *
- * Free up all additional memory allocated for the list elements and reinit
- * the ulist.
- */
-void ulist_reinit(struct ulist *ulist)
-{
- ulist_fini(ulist);
- ulist_init(ulist);
-}
-EXPORT_SYMBOL(ulist_reinit);
-
-/**
- * ulist_alloc - dynamically allocate a ulist
- * @gfp_mask: allocation flags to for base allocation
- *
- * The allocated ulist will be returned in an initialized state.
- */
-struct ulist *ulist_alloc(unsigned long gfp_mask)
-{
- struct ulist *ulist = kmalloc(sizeof(*ulist), gfp_mask);
-
- if (!ulist)
- return NULL;
-
- ulist_init(ulist);
-
- return ulist;
-}
-EXPORT_SYMBOL(ulist_alloc);
-
-/**
- * ulist_free - free dynamically allocated ulist
- * @ulist: ulist to free
- *
- * It is not necessary to call ulist_fini before.
- */
-void ulist_free(struct ulist *ulist)
-{
- if (!ulist)
- return;
- ulist_fini(ulist);
- kfree(ulist);
-}
-EXPORT_SYMBOL(ulist_free);
-
-/**
- * ulist_add - add an element to the ulist
- * @ulist: ulist to add the element to
- * @val: value to add to ulist
- * @aux: auxiliary value to store along with val
- * @gfp_mask: flags to use for allocation
- *
- * Note: locking must be provided by the caller. In case of rwlocks write
- * locking is needed
- *
- * Add an element to a ulist. The @val will only be added if it doesn't
- * already exist. If it is added, the auxiliary value @aux is stored along with
- * it. In case @val already exists in the ulist, @aux is ignored, even if
- * it differs from the already stored value.
- *
- * ulist_add returns 0 if @val already exists in ulist and 1 if @val has been
- * inserted.
- * In case of allocation failure -ENOMEM is returned and the ulist stays
- * unaltered.
- */
-int ulist_add(struct ulist *ulist, u64 val, unsigned long aux,
- unsigned long gfp_mask)
-{
- int i;
-
- for (i = 0; i < ulist->nnodes; ++i) {
- if (ulist->nodes[i].val == val)
- return 0;
- }
-
- if (ulist->nnodes >= ulist->nodes_alloced) {
- u64 new_alloced = ulist->nodes_alloced + 128;
- struct ulist_node *new_nodes;
- void *old = NULL;
-
- /*
- * if nodes_alloced == ULIST_SIZE no memory has been allocated
- * yet, so pass NULL to krealloc
- */
- if (ulist->nodes_alloced > ULIST_SIZE)
- old = ulist->nodes;
-
- new_nodes = krealloc(old, sizeof(*new_nodes) * new_alloced,
- gfp_mask);
- if (!new_nodes)
- return -ENOMEM;
-
- if (!old)
- memcpy(new_nodes, ulist->int_nodes,
- sizeof(ulist->int_nodes));
-
- ulist->nodes = new_nodes;
- ulist->nodes_alloced = new_alloced;
- }
- ulist->nodes[ulist->nnodes].val = val;
- ulist->nodes[ulist->nnodes].aux = aux;
- ++ulist->nnodes;
-
- return 1;
-}
-EXPORT_SYMBOL(ulist_add);
-
-/**
- * ulist_next - iterate ulist
- * @ulist: ulist to iterate
- * @prev: previously returned element or %NULL to start iteration
- *
- * Note: locking must be provided by the caller. In case of rwlocks only read
- * locking is needed
- *
- * This function is used to iterate an ulist. The iteration is started with
- * @prev = %NULL. It returns the next element from the ulist or %NULL when the
- * end is reached. No guarantee is made with respect to the order in which
- * the elements are returned. They might neither be returned in order of
- * addition nor in ascending order.
- * It is allowed to call ulist_add during an enumeration. Newly added items
- * are guaranteed to show up in the running enumeration.
- */
-struct ulist_node *ulist_next(struct ulist *ulist, struct ulist_node *prev)
-{
- int next;
-
- if (ulist->nnodes == 0)
- return NULL;
-
- if (!prev)
- return &ulist->nodes[0];
-
- next = (prev - ulist->nodes) + 1;
- if (next < 0 || next >= ulist->nnodes)
- return NULL;
-
- return &ulist->nodes[next];
-}
-EXPORT_SYMBOL(ulist_next);
diff --git a/ANDROID_3.4.5/fs/btrfs/ulist.h b/ANDROID_3.4.5/fs/btrfs/ulist.h
deleted file mode 100644
index 2e25dec5..00000000
--- a/ANDROID_3.4.5/fs/btrfs/ulist.h
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * Copyright (C) 2011 STRATO AG
- * written by Arne Jansen <sensille@gmx.net>
- * Distributed under the GNU GPL license version 2.
- *
- */
-
-#ifndef __ULIST__
-#define __ULIST__
-
-/*
- * ulist is a generic data structure to hold a collection of unique u64
- * values. The only operations it supports is adding to the list and
- * enumerating it.
- * It is possible to store an auxiliary value along with the key.
- *
- * The implementation is preliminary and can probably be sped up
- * significantly. A first step would be to store the values in an rbtree
- * as soon as ULIST_SIZE is exceeded.
- */
-
-/*
- * number of elements statically allocated inside struct ulist
- */
-#define ULIST_SIZE 16
-
-/*
- * element of the list
- */
-struct ulist_node {
- u64 val; /* value to store */
- unsigned long aux; /* auxiliary value saved along with the val */
-};
-
-struct ulist {
- /*
- * number of elements stored in list
- */
- unsigned long nnodes;
-
- /*
- * number of nodes we already have room for
- */
- unsigned long nodes_alloced;
-
- /*
- * pointer to the array storing the elements. The first ULIST_SIZE
- * elements are stored inline. In this case the it points to int_nodes.
- * After exceeding ULIST_SIZE, dynamic memory is allocated.
- */
- struct ulist_node *nodes;
-
- /*
- * inline storage space for the first ULIST_SIZE entries
- */
- struct ulist_node int_nodes[ULIST_SIZE];
-};
-
-void ulist_init(struct ulist *ulist);
-void ulist_fini(struct ulist *ulist);
-void ulist_reinit(struct ulist *ulist);
-struct ulist *ulist_alloc(unsigned long gfp_mask);
-void ulist_free(struct ulist *ulist);
-int ulist_add(struct ulist *ulist, u64 val, unsigned long aux,
- unsigned long gfp_mask);
-struct ulist_node *ulist_next(struct ulist *ulist, struct ulist_node *prev);
-
-#endif
diff --git a/ANDROID_3.4.5/fs/btrfs/version.h b/ANDROID_3.4.5/fs/btrfs/version.h
deleted file mode 100644
index 9bf3946d..00000000
--- a/ANDROID_3.4.5/fs/btrfs/version.h
+++ /dev/null
@@ -1,4 +0,0 @@
-#ifndef __BTRFS_VERSION_H
-#define __BTRFS_VERSION_H
-#define BTRFS_BUILD_VERSION "Btrfs"
-#endif
diff --git a/ANDROID_3.4.5/fs/btrfs/volumes.c b/ANDROID_3.4.5/fs/btrfs/volumes.c
deleted file mode 100644
index 1411b995..00000000
--- a/ANDROID_3.4.5/fs/btrfs/volumes.c
+++ /dev/null
@@ -1,4585 +0,0 @@
-/*
- * Copyright (C) 2007 Oracle. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License v2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
- */
-#include <linux/sched.h>
-#include <linux/bio.h>
-#include <linux/slab.h>
-#include <linux/buffer_head.h>
-#include <linux/blkdev.h>
-#include <linux/random.h>
-#include <linux/iocontext.h>
-#include <linux/capability.h>
-#include <linux/kthread.h>
-#include <asm/div64.h>
-#include "compat.h"
-#include "ctree.h"
-#include "extent_map.h"
-#include "disk-io.h"
-#include "transaction.h"
-#include "print-tree.h"
-#include "volumes.h"
-#include "async-thread.h"
-#include "check-integrity.h"
-
-static int init_first_rw_device(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_device *device);
-static int btrfs_relocate_sys_chunks(struct btrfs_root *root);
-
-static DEFINE_MUTEX(uuid_mutex);
-static LIST_HEAD(fs_uuids);
-
-static void lock_chunks(struct btrfs_root *root)
-{
- mutex_lock(&root->fs_info->chunk_mutex);
-}
-
-static void unlock_chunks(struct btrfs_root *root)
-{
- mutex_unlock(&root->fs_info->chunk_mutex);
-}
-
-static void free_fs_devices(struct btrfs_fs_devices *fs_devices)
-{
- struct btrfs_device *device;
- WARN_ON(fs_devices->opened);
- while (!list_empty(&fs_devices->devices)) {
- device = list_entry(fs_devices->devices.next,
- struct btrfs_device, dev_list);
- list_del(&device->dev_list);
- kfree(device->name);
- kfree(device);
- }
- kfree(fs_devices);
-}
-
-void btrfs_cleanup_fs_uuids(void)
-{
- struct btrfs_fs_devices *fs_devices;
-
- while (!list_empty(&fs_uuids)) {
- fs_devices = list_entry(fs_uuids.next,
- struct btrfs_fs_devices, list);
- list_del(&fs_devices->list);
- free_fs_devices(fs_devices);
- }
-}
-
-static noinline struct btrfs_device *__find_device(struct list_head *head,
- u64 devid, u8 *uuid)
-{
- struct btrfs_device *dev;
-
- list_for_each_entry(dev, head, dev_list) {
- if (dev->devid == devid &&
- (!uuid || !memcmp(dev->uuid, uuid, BTRFS_UUID_SIZE))) {
- return dev;
- }
- }
- return NULL;
-}
-
-static noinline struct btrfs_fs_devices *find_fsid(u8 *fsid)
-{
- struct btrfs_fs_devices *fs_devices;
-
- list_for_each_entry(fs_devices, &fs_uuids, list) {
- if (memcmp(fsid, fs_devices->fsid, BTRFS_FSID_SIZE) == 0)
- return fs_devices;
- }
- return NULL;
-}
-
-static void requeue_list(struct btrfs_pending_bios *pending_bios,
- struct bio *head, struct bio *tail)
-{
-
- struct bio *old_head;
-
- old_head = pending_bios->head;
- pending_bios->head = head;
- if (pending_bios->tail)
- tail->bi_next = old_head;
- else
- pending_bios->tail = tail;
-}
-
-/*
- * we try to collect pending bios for a device so we don't get a large
- * number of procs sending bios down to the same device. This greatly
- * improves the schedulers ability to collect and merge the bios.
- *
- * But, it also turns into a long list of bios to process and that is sure
- * to eventually make the worker thread block. The solution here is to
- * make some progress and then put this work struct back at the end of
- * the list if the block device is congested. This way, multiple devices
- * can make progress from a single worker thread.
- */
-static noinline void run_scheduled_bios(struct btrfs_device *device)
-{
- struct bio *pending;
- struct backing_dev_info *bdi;
- struct btrfs_fs_info *fs_info;
- struct btrfs_pending_bios *pending_bios;
- struct bio *tail;
- struct bio *cur;
- int again = 0;
- unsigned long num_run;
- unsigned long batch_run = 0;
- unsigned long limit;
- unsigned long last_waited = 0;
- int force_reg = 0;
- int sync_pending = 0;
- struct blk_plug plug;
-
- /*
- * this function runs all the bios we've collected for
- * a particular device. We don't want to wander off to
- * another device without first sending all of these down.
- * So, setup a plug here and finish it off before we return
- */
- blk_start_plug(&plug);
-
- bdi = blk_get_backing_dev_info(device->bdev);
- fs_info = device->dev_root->fs_info;
- limit = btrfs_async_submit_limit(fs_info);
- limit = limit * 2 / 3;
-
-loop:
- spin_lock(&device->io_lock);
-
-loop_lock:
- num_run = 0;
-
- /* take all the bios off the list at once and process them
- * later on (without the lock held). But, remember the
- * tail and other pointers so the bios can be properly reinserted
- * into the list if we hit congestion
- */
- if (!force_reg && device->pending_sync_bios.head) {
- pending_bios = &device->pending_sync_bios;
- force_reg = 1;
- } else {
- pending_bios = &device->pending_bios;
- force_reg = 0;
- }
-
- pending = pending_bios->head;
- tail = pending_bios->tail;
- WARN_ON(pending && !tail);
-
- /*
- * if pending was null this time around, no bios need processing
- * at all and we can stop. Otherwise it'll loop back up again
- * and do an additional check so no bios are missed.
- *
- * device->running_pending is used to synchronize with the
- * schedule_bio code.
- */
- if (device->pending_sync_bios.head == NULL &&
- device->pending_bios.head == NULL) {
- again = 0;
- device->running_pending = 0;
- } else {
- again = 1;
- device->running_pending = 1;
- }
-
- pending_bios->head = NULL;
- pending_bios->tail = NULL;
-
- spin_unlock(&device->io_lock);
-
- while (pending) {
-
- rmb();
- /* we want to work on both lists, but do more bios on the
- * sync list than the regular list
- */
- if ((num_run > 32 &&
- pending_bios != &device->pending_sync_bios &&
- device->pending_sync_bios.head) ||
- (num_run > 64 && pending_bios == &device->pending_sync_bios &&
- device->pending_bios.head)) {
- spin_lock(&device->io_lock);
- requeue_list(pending_bios, pending, tail);
- goto loop_lock;
- }
-
- cur = pending;
- pending = pending->bi_next;
- cur->bi_next = NULL;
- atomic_dec(&fs_info->nr_async_bios);
-
- if (atomic_read(&fs_info->nr_async_bios) < limit &&
- waitqueue_active(&fs_info->async_submit_wait))
- wake_up(&fs_info->async_submit_wait);
-
- BUG_ON(atomic_read(&cur->bi_cnt) == 0);
-
- /*
- * if we're doing the sync list, record that our
- * plug has some sync requests on it
- *
- * If we're doing the regular list and there are
- * sync requests sitting around, unplug before
- * we add more
- */
- if (pending_bios == &device->pending_sync_bios) {
- sync_pending = 1;
- } else if (sync_pending) {
- blk_finish_plug(&plug);
- blk_start_plug(&plug);
- sync_pending = 0;
- }
-
- btrfsic_submit_bio(cur->bi_rw, cur);
- num_run++;
- batch_run++;
- if (need_resched())
- cond_resched();
-
- /*
- * we made progress, there is more work to do and the bdi
- * is now congested. Back off and let other work structs
- * run instead
- */
- if (pending && bdi_write_congested(bdi) && batch_run > 8 &&
- fs_info->fs_devices->open_devices > 1) {
- struct io_context *ioc;
-
- ioc = current->io_context;
-
- /*
- * the main goal here is that we don't want to
- * block if we're going to be able to submit
- * more requests without blocking.
- *
- * This code does two great things, it pokes into
- * the elevator code from a filesystem _and_
- * it makes assumptions about how batching works.
- */
- if (ioc && ioc->nr_batch_requests > 0 &&
- time_before(jiffies, ioc->last_waited + HZ/50UL) &&
- (last_waited == 0 ||
- ioc->last_waited == last_waited)) {
- /*
- * we want to go through our batch of
- * requests and stop. So, we copy out
- * the ioc->last_waited time and test
- * against it before looping
- */
- last_waited = ioc->last_waited;
- if (need_resched())
- cond_resched();
- continue;
- }
- spin_lock(&device->io_lock);
- requeue_list(pending_bios, pending, tail);
- device->running_pending = 1;
-
- spin_unlock(&device->io_lock);
- btrfs_requeue_work(&device->work);
- goto done;
- }
- /* unplug every 64 requests just for good measure */
- if (batch_run % 64 == 0) {
- blk_finish_plug(&plug);
- blk_start_plug(&plug);
- sync_pending = 0;
- }
- }
-
- cond_resched();
- if (again)
- goto loop;
-
- spin_lock(&device->io_lock);
- if (device->pending_bios.head || device->pending_sync_bios.head)
- goto loop_lock;
- spin_unlock(&device->io_lock);
-
-done:
- blk_finish_plug(&plug);
-}
-
-static void pending_bios_fn(struct btrfs_work *work)
-{
- struct btrfs_device *device;
-
- device = container_of(work, struct btrfs_device, work);
- run_scheduled_bios(device);
-}
-
-static noinline int device_list_add(const char *path,
- struct btrfs_super_block *disk_super,
- u64 devid, struct btrfs_fs_devices **fs_devices_ret)
-{
- struct btrfs_device *device;
- struct btrfs_fs_devices *fs_devices;
- u64 found_transid = btrfs_super_generation(disk_super);
- char *name;
-
- fs_devices = find_fsid(disk_super->fsid);
- if (!fs_devices) {
- fs_devices = kzalloc(sizeof(*fs_devices), GFP_NOFS);
- if (!fs_devices)
- return -ENOMEM;
- INIT_LIST_HEAD(&fs_devices->devices);
- INIT_LIST_HEAD(&fs_devices->alloc_list);
- list_add(&fs_devices->list, &fs_uuids);
- memcpy(fs_devices->fsid, disk_super->fsid, BTRFS_FSID_SIZE);
- fs_devices->latest_devid = devid;
- fs_devices->latest_trans = found_transid;
- mutex_init(&fs_devices->device_list_mutex);
- device = NULL;
- } else {
- device = __find_device(&fs_devices->devices, devid,
- disk_super->dev_item.uuid);
- }
- if (!device) {
- if (fs_devices->opened)
- return -EBUSY;
-
- device = kzalloc(sizeof(*device), GFP_NOFS);
- if (!device) {
- /* we can safely leave the fs_devices entry around */
- return -ENOMEM;
- }
- device->devid = devid;
- device->work.func = pending_bios_fn;
- memcpy(device->uuid, disk_super->dev_item.uuid,
- BTRFS_UUID_SIZE);
- spin_lock_init(&device->io_lock);
- device->name = kstrdup(path, GFP_NOFS);
- if (!device->name) {
- kfree(device);
- return -ENOMEM;
- }
- INIT_LIST_HEAD(&device->dev_alloc_list);
-
- /* init readahead state */
- spin_lock_init(&device->reada_lock);
- device->reada_curr_zone = NULL;
- atomic_set(&device->reada_in_flight, 0);
- device->reada_next = 0;
- INIT_RADIX_TREE(&device->reada_zones, GFP_NOFS & ~__GFP_WAIT);
- INIT_RADIX_TREE(&device->reada_extents, GFP_NOFS & ~__GFP_WAIT);
-
- mutex_lock(&fs_devices->device_list_mutex);
- list_add_rcu(&device->dev_list, &fs_devices->devices);
- mutex_unlock(&fs_devices->device_list_mutex);
-
- device->fs_devices = fs_devices;
- fs_devices->num_devices++;
- } else if (!device->name || strcmp(device->name, path)) {
- name = kstrdup(path, GFP_NOFS);
- if (!name)
- return -ENOMEM;
- kfree(device->name);
- device->name = name;
- if (device->missing) {
- fs_devices->missing_devices--;
- device->missing = 0;
- }
- }
-
- if (found_transid > fs_devices->latest_trans) {
- fs_devices->latest_devid = devid;
- fs_devices->latest_trans = found_transid;
- }
- *fs_devices_ret = fs_devices;
- return 0;
-}
-
-static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig)
-{
- struct btrfs_fs_devices *fs_devices;
- struct btrfs_device *device;
- struct btrfs_device *orig_dev;
-
- fs_devices = kzalloc(sizeof(*fs_devices), GFP_NOFS);
- if (!fs_devices)
- return ERR_PTR(-ENOMEM);
-
- INIT_LIST_HEAD(&fs_devices->devices);
- INIT_LIST_HEAD(&fs_devices->alloc_list);
- INIT_LIST_HEAD(&fs_devices->list);
- mutex_init(&fs_devices->device_list_mutex);
- fs_devices->latest_devid = orig->latest_devid;
- fs_devices->latest_trans = orig->latest_trans;
- memcpy(fs_devices->fsid, orig->fsid, sizeof(fs_devices->fsid));
-
- /* We have held the volume lock, it is safe to get the devices. */
- list_for_each_entry(orig_dev, &orig->devices, dev_list) {
- device = kzalloc(sizeof(*device), GFP_NOFS);
- if (!device)
- goto error;
-
- device->name = kstrdup(orig_dev->name, GFP_NOFS);
- if (!device->name) {
- kfree(device);
- goto error;
- }
-
- device->devid = orig_dev->devid;
- device->work.func = pending_bios_fn;
- memcpy(device->uuid, orig_dev->uuid, sizeof(device->uuid));
- spin_lock_init(&device->io_lock);
- INIT_LIST_HEAD(&device->dev_list);
- INIT_LIST_HEAD(&device->dev_alloc_list);
-
- list_add(&device->dev_list, &fs_devices->devices);
- device->fs_devices = fs_devices;
- fs_devices->num_devices++;
- }
- return fs_devices;
-error:
- free_fs_devices(fs_devices);
- return ERR_PTR(-ENOMEM);
-}
-
-void btrfs_close_extra_devices(struct btrfs_fs_devices *fs_devices)
-{
- struct btrfs_device *device, *next;
-
- struct block_device *latest_bdev = NULL;
- u64 latest_devid = 0;
- u64 latest_transid = 0;
-
- mutex_lock(&uuid_mutex);
-again:
- /* This is the initialized path, it is safe to release the devices. */
- list_for_each_entry_safe(device, next, &fs_devices->devices, dev_list) {
- if (device->in_fs_metadata) {
- if (!latest_transid ||
- device->generation > latest_transid) {
- latest_devid = device->devid;
- latest_transid = device->generation;
- latest_bdev = device->bdev;
- }
- continue;
- }
-
- if (device->bdev) {
- blkdev_put(device->bdev, device->mode);
- device->bdev = NULL;
- fs_devices->open_devices--;
- }
- if (device->writeable) {
- list_del_init(&device->dev_alloc_list);
- device->writeable = 0;
- fs_devices->rw_devices--;
- }
- list_del_init(&device->dev_list);
- fs_devices->num_devices--;
- kfree(device->name);
- kfree(device);
- }
-
- if (fs_devices->seed) {
- fs_devices = fs_devices->seed;
- goto again;
- }
-
- fs_devices->latest_bdev = latest_bdev;
- fs_devices->latest_devid = latest_devid;
- fs_devices->latest_trans = latest_transid;
-
- mutex_unlock(&uuid_mutex);
-}
-
-static void __free_device(struct work_struct *work)
-{
- struct btrfs_device *device;
-
- device = container_of(work, struct btrfs_device, rcu_work);
-
- if (device->bdev)
- blkdev_put(device->bdev, device->mode);
-
- kfree(device->name);
- kfree(device);
-}
-
-static void free_device(struct rcu_head *head)
-{
- struct btrfs_device *device;
-
- device = container_of(head, struct btrfs_device, rcu);
-
- INIT_WORK(&device->rcu_work, __free_device);
- schedule_work(&device->rcu_work);
-}
-
-static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
-{
- struct btrfs_device *device;
-
- if (--fs_devices->opened > 0)
- return 0;
-
- mutex_lock(&fs_devices->device_list_mutex);
- list_for_each_entry(device, &fs_devices->devices, dev_list) {
- struct btrfs_device *new_device;
-
- if (device->bdev)
- fs_devices->open_devices--;
-
- if (device->writeable) {
- list_del_init(&device->dev_alloc_list);
- fs_devices->rw_devices--;
- }
-
- if (device->can_discard)
- fs_devices->num_can_discard--;
-
- new_device = kmalloc(sizeof(*new_device), GFP_NOFS);
- BUG_ON(!new_device); /* -ENOMEM */
- memcpy(new_device, device, sizeof(*new_device));
- new_device->name = kstrdup(device->name, GFP_NOFS);
- BUG_ON(device->name && !new_device->name); /* -ENOMEM */
- new_device->bdev = NULL;
- new_device->writeable = 0;
- new_device->in_fs_metadata = 0;
- new_device->can_discard = 0;
- list_replace_rcu(&device->dev_list, &new_device->dev_list);
-
- call_rcu(&device->rcu, free_device);
- }
- mutex_unlock(&fs_devices->device_list_mutex);
-
- WARN_ON(fs_devices->open_devices);
- WARN_ON(fs_devices->rw_devices);
- fs_devices->opened = 0;
- fs_devices->seeding = 0;
-
- return 0;
-}
-
-int btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
-{
- struct btrfs_fs_devices *seed_devices = NULL;
- int ret;
-
- mutex_lock(&uuid_mutex);
- ret = __btrfs_close_devices(fs_devices);
- if (!fs_devices->opened) {
- seed_devices = fs_devices->seed;
- fs_devices->seed = NULL;
- }
- mutex_unlock(&uuid_mutex);
-
- while (seed_devices) {
- fs_devices = seed_devices;
- seed_devices = fs_devices->seed;
- __btrfs_close_devices(fs_devices);
- free_fs_devices(fs_devices);
- }
- return ret;
-}
-
-static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
- fmode_t flags, void *holder)
-{
- struct request_queue *q;
- struct block_device *bdev;
- struct list_head *head = &fs_devices->devices;
- struct btrfs_device *device;
- struct block_device *latest_bdev = NULL;
- struct buffer_head *bh;
- struct btrfs_super_block *disk_super;
- u64 latest_devid = 0;
- u64 latest_transid = 0;
- u64 devid;
- int seeding = 1;
- int ret = 0;
-
- flags |= FMODE_EXCL;
-
- list_for_each_entry(device, head, dev_list) {
- if (device->bdev)
- continue;
- if (!device->name)
- continue;
-
- bdev = blkdev_get_by_path(device->name, flags, holder);
- if (IS_ERR(bdev)) {
- printk(KERN_INFO "open %s failed\n", device->name);
- goto error;
- }
- filemap_write_and_wait(bdev->bd_inode->i_mapping);
- invalidate_bdev(bdev);
- set_blocksize(bdev, 4096);
-
- bh = btrfs_read_dev_super(bdev);
- if (!bh)
- goto error_close;
-
- disk_super = (struct btrfs_super_block *)bh->b_data;
- devid = btrfs_stack_device_id(&disk_super->dev_item);
- if (devid != device->devid)
- goto error_brelse;
-
- if (memcmp(device->uuid, disk_super->dev_item.uuid,
- BTRFS_UUID_SIZE))
- goto error_brelse;
-
- device->generation = btrfs_super_generation(disk_super);
- if (!latest_transid || device->generation > latest_transid) {
- latest_devid = devid;
- latest_transid = device->generation;
- latest_bdev = bdev;
- }
-
- if (btrfs_super_flags(disk_super) & BTRFS_SUPER_FLAG_SEEDING) {
- device->writeable = 0;
- } else {
- device->writeable = !bdev_read_only(bdev);
- seeding = 0;
- }
-
- q = bdev_get_queue(bdev);
- if (blk_queue_discard(q)) {
- device->can_discard = 1;
- fs_devices->num_can_discard++;
- }
-
- device->bdev = bdev;
- device->in_fs_metadata = 0;
- device->mode = flags;
-
- if (!blk_queue_nonrot(bdev_get_queue(bdev)))
- fs_devices->rotating = 1;
-
- fs_devices->open_devices++;
- if (device->writeable) {
- fs_devices->rw_devices++;
- list_add(&device->dev_alloc_list,
- &fs_devices->alloc_list);
- }
- brelse(bh);
- continue;
-
-error_brelse:
- brelse(bh);
-error_close:
- blkdev_put(bdev, flags);
-error:
- continue;
- }
- if (fs_devices->open_devices == 0) {
- ret = -EINVAL;
- goto out;
- }
- fs_devices->seeding = seeding;
- fs_devices->opened = 1;
- fs_devices->latest_bdev = latest_bdev;
- fs_devices->latest_devid = latest_devid;
- fs_devices->latest_trans = latest_transid;
- fs_devices->total_rw_bytes = 0;
-out:
- return ret;
-}
-
-int btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
- fmode_t flags, void *holder)
-{
- int ret;
-
- mutex_lock(&uuid_mutex);
- if (fs_devices->opened) {
- fs_devices->opened++;
- ret = 0;
- } else {
- ret = __btrfs_open_devices(fs_devices, flags, holder);
- }
- mutex_unlock(&uuid_mutex);
- return ret;
-}
-
-int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder,
- struct btrfs_fs_devices **fs_devices_ret)
-{
- struct btrfs_super_block *disk_super;
- struct block_device *bdev;
- struct buffer_head *bh;
- int ret;
- u64 devid;
- u64 transid;
-
- flags |= FMODE_EXCL;
- bdev = blkdev_get_by_path(path, flags, holder);
-
- if (IS_ERR(bdev)) {
- ret = PTR_ERR(bdev);
- goto error;
- }
-
- mutex_lock(&uuid_mutex);
- ret = set_blocksize(bdev, 4096);
- if (ret)
- goto error_close;
- bh = btrfs_read_dev_super(bdev);
- if (!bh) {
- ret = -EINVAL;
- goto error_close;
- }
- disk_super = (struct btrfs_super_block *)bh->b_data;
- devid = btrfs_stack_device_id(&disk_super->dev_item);
- transid = btrfs_super_generation(disk_super);
- if (disk_super->label[0])
- printk(KERN_INFO "device label %s ", disk_super->label);
- else
- printk(KERN_INFO "device fsid %pU ", disk_super->fsid);
- printk(KERN_CONT "devid %llu transid %llu %s\n",
- (unsigned long long)devid, (unsigned long long)transid, path);
- ret = device_list_add(path, disk_super, devid, fs_devices_ret);
-
- brelse(bh);
-error_close:
- mutex_unlock(&uuid_mutex);
- blkdev_put(bdev, flags);
-error:
- return ret;
-}
-
-/* helper to account the used device space in the range */
-int btrfs_account_dev_extents_size(struct btrfs_device *device, u64 start,
- u64 end, u64 *length)
-{
- struct btrfs_key key;
- struct btrfs_root *root = device->dev_root;
- struct btrfs_dev_extent *dev_extent;
- struct btrfs_path *path;
- u64 extent_end;
- int ret;
- int slot;
- struct extent_buffer *l;
-
- *length = 0;
-
- if (start >= device->total_bytes)
- return 0;
-
- path = btrfs_alloc_path();
- if (!path)
- return -ENOMEM;
- path->reada = 2;
-
- key.objectid = device->devid;
- key.offset = start;
- key.type = BTRFS_DEV_EXTENT_KEY;
-
- ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
- if (ret < 0)
- goto out;
- if (ret > 0) {
- ret = btrfs_previous_item(root, path, key.objectid, key.type);
- if (ret < 0)
- goto out;
- }
-
- while (1) {
- l = path->nodes[0];
- slot = path->slots[0];
- if (slot >= btrfs_header_nritems(l)) {
- ret = btrfs_next_leaf(root, path);
- if (ret == 0)
- continue;
- if (ret < 0)
- goto out;
-
- break;
- }
- btrfs_item_key_to_cpu(l, &key, slot);
-
- if (key.objectid < device->devid)
- goto next;
-
- if (key.objectid > device->devid)
- break;
-
- if (btrfs_key_type(&key) != BTRFS_DEV_EXTENT_KEY)
- goto next;
-
- dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent);
- extent_end = key.offset + btrfs_dev_extent_length(l,
- dev_extent);
- if (key.offset <= start && extent_end > end) {
- *length = end - start + 1;
- break;
- } else if (key.offset <= start && extent_end > start)
- *length += extent_end - start;
- else if (key.offset > start && extent_end <= end)
- *length += extent_end - key.offset;
- else if (key.offset > start && key.offset <= end) {
- *length += end - key.offset + 1;
- break;
- } else if (key.offset > end)
- break;
-
-next:
- path->slots[0]++;
- }
- ret = 0;
-out:
- btrfs_free_path(path);
- return ret;
-}
-
-/*
- * find_free_dev_extent - find free space in the specified device
- * @device: the device which we search the free space in
- * @num_bytes: the size of the free space that we need
- * @start: store the start of the free space.
- * @len: the size of the free space. that we find, or the size of the max
- * free space if we don't find suitable free space
- *
- * this uses a pretty simple search, the expectation is that it is
- * called very infrequently and that a given device has a small number
- * of extents
- *
- * @start is used to store the start of the free space if we find. But if we
- * don't find suitable free space, it will be used to store the start position
- * of the max free space.
- *
- * @len is used to store the size of the free space that we find.
- * But if we don't find suitable free space, it is used to store the size of
- * the max free space.
- */
-int find_free_dev_extent(struct btrfs_device *device, u64 num_bytes,
- u64 *start, u64 *len)
-{
- struct btrfs_key key;
- struct btrfs_root *root = device->dev_root;
- struct btrfs_dev_extent *dev_extent;
- struct btrfs_path *path;
- u64 hole_size;
- u64 max_hole_start;
- u64 max_hole_size;
- u64 extent_end;
- u64 search_start;
- u64 search_end = device->total_bytes;
- int ret;
- int slot;
- struct extent_buffer *l;
-
- /* FIXME use last free of some kind */
-
- /* we don't want to overwrite the superblock on the drive,
- * so we make sure to start at an offset of at least 1MB
- */
- search_start = max(root->fs_info->alloc_start, 1024ull * 1024);
-
- max_hole_start = search_start;
- max_hole_size = 0;
- hole_size = 0;
-
- if (search_start >= search_end) {
- ret = -ENOSPC;
- goto error;
- }
-
- path = btrfs_alloc_path();
- if (!path) {
- ret = -ENOMEM;
- goto error;
- }
- path->reada = 2;
-
- key.objectid = device->devid;
- key.offset = search_start;
- key.type = BTRFS_DEV_EXTENT_KEY;
-
- ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
- if (ret < 0)
- goto out;
- if (ret > 0) {
- ret = btrfs_previous_item(root, path, key.objectid, key.type);
- if (ret < 0)
- goto out;
- }
-
- while (1) {
- l = path->nodes[0];
- slot = path->slots[0];
- if (slot >= btrfs_header_nritems(l)) {
- ret = btrfs_next_leaf(root, path);
- if (ret == 0)
- continue;
- if (ret < 0)
- goto out;
-
- break;
- }
- btrfs_item_key_to_cpu(l, &key, slot);
-
- if (key.objectid < device->devid)
- goto next;
-
- if (key.objectid > device->devid)
- break;
-
- if (btrfs_key_type(&key) != BTRFS_DEV_EXTENT_KEY)
- goto next;
-
- if (key.offset > search_start) {
- hole_size = key.offset - search_start;
-
- if (hole_size > max_hole_size) {
- max_hole_start = search_start;
- max_hole_size = hole_size;
- }
-
- /*
- * If this free space is greater than which we need,
- * it must be the max free space that we have found
- * until now, so max_hole_start must point to the start
- * of this free space and the length of this free space
- * is stored in max_hole_size. Thus, we return
- * max_hole_start and max_hole_size and go back to the
- * caller.
- */
- if (hole_size >= num_bytes) {
- ret = 0;
- goto out;
- }
- }
-
- dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent);
- extent_end = key.offset + btrfs_dev_extent_length(l,
- dev_extent);
- if (extent_end > search_start)
- search_start = extent_end;
-next:
- path->slots[0]++;
- cond_resched();
- }
-
- /*
- * At this point, search_start should be the end of
- * allocated dev extents, and when shrinking the device,
- * search_end may be smaller than search_start.
- */
- if (search_end > search_start)
- hole_size = search_end - search_start;
-
- if (hole_size > max_hole_size) {
- max_hole_start = search_start;
- max_hole_size = hole_size;
- }
-
- /* See above. */
- if (hole_size < num_bytes)
- ret = -ENOSPC;
- else
- ret = 0;
-
-out:
- btrfs_free_path(path);
-error:
- *start = max_hole_start;
- if (len)
- *len = max_hole_size;
- return ret;
-}
-
-static int btrfs_free_dev_extent(struct btrfs_trans_handle *trans,
- struct btrfs_device *device,
- u64 start)
-{
- int ret;
- struct btrfs_path *path;
- struct btrfs_root *root = device->dev_root;
- struct btrfs_key key;
- struct btrfs_key found_key;
- struct extent_buffer *leaf = NULL;
- struct btrfs_dev_extent *extent = NULL;
-
- path = btrfs_alloc_path();
- if (!path)
- return -ENOMEM;
-
- key.objectid = device->devid;
- key.offset = start;
- key.type = BTRFS_DEV_EXTENT_KEY;
-again:
- ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
- if (ret > 0) {
- ret = btrfs_previous_item(root, path, key.objectid,
- BTRFS_DEV_EXTENT_KEY);
- if (ret)
- goto out;
- leaf = path->nodes[0];
- btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
- extent = btrfs_item_ptr(leaf, path->slots[0],
- struct btrfs_dev_extent);
- BUG_ON(found_key.offset > start || found_key.offset +
- btrfs_dev_extent_length(leaf, extent) < start);
- key = found_key;
- btrfs_release_path(path);
- goto again;
- } else if (ret == 0) {
- leaf = path->nodes[0];
- extent = btrfs_item_ptr(leaf, path->slots[0],
- struct btrfs_dev_extent);
- } else {
- btrfs_error(root->fs_info, ret, "Slot search failed");
- goto out;
- }
-
- if (device->bytes_used > 0) {
- u64 len = btrfs_dev_extent_length(leaf, extent);
- device->bytes_used -= len;
- spin_lock(&root->fs_info->free_chunk_lock);
- root->fs_info->free_chunk_space += len;
- spin_unlock(&root->fs_info->free_chunk_lock);
- }
- ret = btrfs_del_item(trans, root, path);
- if (ret) {
- btrfs_error(root->fs_info, ret,
- "Failed to remove dev extent item");
- }
-out:
- btrfs_free_path(path);
- return ret;
-}
-
-int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans,
- struct btrfs_device *device,
- u64 chunk_tree, u64 chunk_objectid,
- u64 chunk_offset, u64 start, u64 num_bytes)
-{
- int ret;
- struct btrfs_path *path;
- struct btrfs_root *root = device->dev_root;
- struct btrfs_dev_extent *extent;
- struct extent_buffer *leaf;
- struct btrfs_key key;
-
- WARN_ON(!device->in_fs_metadata);
- path = btrfs_alloc_path();
- if (!path)
- return -ENOMEM;
-
- key.objectid = device->devid;
- key.offset = start;
- key.type = BTRFS_DEV_EXTENT_KEY;
- ret = btrfs_insert_empty_item(trans, root, path, &key,
- sizeof(*extent));
- if (ret)
- goto out;
-
- leaf = path->nodes[0];
- extent = btrfs_item_ptr(leaf, path->slots[0],
- struct btrfs_dev_extent);
- btrfs_set_dev_extent_chunk_tree(leaf, extent, chunk_tree);
- btrfs_set_dev_extent_chunk_objectid(leaf, extent, chunk_objectid);
- btrfs_set_dev_extent_chunk_offset(leaf, extent, chunk_offset);
-
- write_extent_buffer(leaf, root->fs_info->chunk_tree_uuid,
- (unsigned long)btrfs_dev_extent_chunk_tree_uuid(extent),
- BTRFS_UUID_SIZE);
-
- btrfs_set_dev_extent_length(leaf, extent, num_bytes);
- btrfs_mark_buffer_dirty(leaf);
-out:
- btrfs_free_path(path);
- return ret;
-}
-
-static noinline int find_next_chunk(struct btrfs_root *root,
- u64 objectid, u64 *offset)
-{
- struct btrfs_path *path;
- int ret;
- struct btrfs_key key;
- struct btrfs_chunk *chunk;
- struct btrfs_key found_key;
-
- path = btrfs_alloc_path();
- if (!path)
- return -ENOMEM;
-
- key.objectid = objectid;
- key.offset = (u64)-1;
- key.type = BTRFS_CHUNK_ITEM_KEY;
-
- ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
- if (ret < 0)
- goto error;
-
- BUG_ON(ret == 0); /* Corruption */
-
- ret = btrfs_previous_item(root, path, 0, BTRFS_CHUNK_ITEM_KEY);
- if (ret) {
- *offset = 0;
- } else {
- btrfs_item_key_to_cpu(path->nodes[0], &found_key,
- path->slots[0]);
- if (found_key.objectid != objectid)
- *offset = 0;
- else {
- chunk = btrfs_item_ptr(path->nodes[0], path->slots[0],
- struct btrfs_chunk);
- *offset = found_key.offset +
- btrfs_chunk_length(path->nodes[0], chunk);
- }
- }
- ret = 0;
-error:
- btrfs_free_path(path);
- return ret;
-}
-
-static noinline int find_next_devid(struct btrfs_root *root, u64 *objectid)
-{
- int ret;
- struct btrfs_key key;
- struct btrfs_key found_key;
- struct btrfs_path *path;
-
- root = root->fs_info->chunk_root;
-
- path = btrfs_alloc_path();
- if (!path)
- return -ENOMEM;
-
- key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
- key.type = BTRFS_DEV_ITEM_KEY;
- key.offset = (u64)-1;
-
- ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
- if (ret < 0)
- goto error;
-
- BUG_ON(ret == 0); /* Corruption */
-
- ret = btrfs_previous_item(root, path, BTRFS_DEV_ITEMS_OBJECTID,
- BTRFS_DEV_ITEM_KEY);
- if (ret) {
- *objectid = 1;
- } else {
- btrfs_item_key_to_cpu(path->nodes[0], &found_key,
- path->slots[0]);
- *objectid = found_key.offset + 1;
- }
- ret = 0;
-error:
- btrfs_free_path(path);
- return ret;
-}
-
-/*
- * the device information is stored in the chunk root
- * the btrfs_device struct should be fully filled in
- */
-int btrfs_add_device(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_device *device)
-{
- int ret;
- struct btrfs_path *path;
- struct btrfs_dev_item *dev_item;
- struct extent_buffer *leaf;
- struct btrfs_key key;
- unsigned long ptr;
-
- root = root->fs_info->chunk_root;
-
- path = btrfs_alloc_path();
- if (!path)
- return -ENOMEM;
-
- key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
- key.type = BTRFS_DEV_ITEM_KEY;
- key.offset = device->devid;
-
- ret = btrfs_insert_empty_item(trans, root, path, &key,
- sizeof(*dev_item));
- if (ret)
- goto out;
-
- leaf = path->nodes[0];
- dev_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dev_item);
-
- btrfs_set_device_id(leaf, dev_item, device->devid);
- btrfs_set_device_generation(leaf, dev_item, 0);
- btrfs_set_device_type(leaf, dev_item, device->type);
- btrfs_set_device_io_align(leaf, dev_item, device->io_align);
- btrfs_set_device_io_width(leaf, dev_item, device->io_width);
- btrfs_set_device_sector_size(leaf, dev_item, device->sector_size);
- btrfs_set_device_total_bytes(leaf, dev_item, device->total_bytes);
- btrfs_set_device_bytes_used(leaf, dev_item, device->bytes_used);
- btrfs_set_device_group(leaf, dev_item, 0);
- btrfs_set_device_seek_speed(leaf, dev_item, 0);
- btrfs_set_device_bandwidth(leaf, dev_item, 0);
- btrfs_set_device_start_offset(leaf, dev_item, 0);
-
- ptr = (unsigned long)btrfs_device_uuid(dev_item);
- write_extent_buffer(leaf, device->uuid, ptr, BTRFS_UUID_SIZE);
- ptr = (unsigned long)btrfs_device_fsid(dev_item);
- write_extent_buffer(leaf, root->fs_info->fsid, ptr, BTRFS_UUID_SIZE);
- btrfs_mark_buffer_dirty(leaf);
-
- ret = 0;
-out:
- btrfs_free_path(path);
- return ret;
-}
-
-static int btrfs_rm_dev_item(struct btrfs_root *root,
- struct btrfs_device *device)
-{
- int ret;
- struct btrfs_path *path;
- struct btrfs_key key;
- struct btrfs_trans_handle *trans;
-
- root = root->fs_info->chunk_root;
-
- path = btrfs_alloc_path();
- if (!path)
- return -ENOMEM;
-
- trans = btrfs_start_transaction(root, 0);
- if (IS_ERR(trans)) {
- btrfs_free_path(path);
- return PTR_ERR(trans);
- }
- key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
- key.type = BTRFS_DEV_ITEM_KEY;
- key.offset = device->devid;
- lock_chunks(root);
-
- ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
- if (ret < 0)
- goto out;
-
- if (ret > 0) {
- ret = -ENOENT;
- goto out;
- }
-
- ret = btrfs_del_item(trans, root, path);
- if (ret)
- goto out;
-out:
- btrfs_free_path(path);
- unlock_chunks(root);
- btrfs_commit_transaction(trans, root);
- return ret;
-}
-
-int btrfs_rm_device(struct btrfs_root *root, char *device_path)
-{
- struct btrfs_device *device;
- struct btrfs_device *next_device;
- struct block_device *bdev;
- struct buffer_head *bh = NULL;
- struct btrfs_super_block *disk_super;
- struct btrfs_fs_devices *cur_devices;
- u64 all_avail;
- u64 devid;
- u64 num_devices;
- u8 *dev_uuid;
- int ret = 0;
- bool clear_super = false;
-
- mutex_lock(&uuid_mutex);
-
- all_avail = root->fs_info->avail_data_alloc_bits |
- root->fs_info->avail_system_alloc_bits |
- root->fs_info->avail_metadata_alloc_bits;
-
- if ((all_avail & BTRFS_BLOCK_GROUP_RAID10) &&
- root->fs_info->fs_devices->num_devices <= 4) {
- printk(KERN_ERR "btrfs: unable to go below four devices "
- "on raid10\n");
- ret = -EINVAL;
- goto out;
- }
-
- if ((all_avail & BTRFS_BLOCK_GROUP_RAID1) &&
- root->fs_info->fs_devices->num_devices <= 2) {
- printk(KERN_ERR "btrfs: unable to go below two "
- "devices on raid1\n");
- ret = -EINVAL;
- goto out;
- }
-
- if (strcmp(device_path, "missing") == 0) {
- struct list_head *devices;
- struct btrfs_device *tmp;
-
- device = NULL;
- devices = &root->fs_info->fs_devices->devices;
- /*
- * It is safe to read the devices since the volume_mutex
- * is held.
- */
- list_for_each_entry(tmp, devices, dev_list) {
- if (tmp->in_fs_metadata && !tmp->bdev) {
- device = tmp;
- break;
- }
- }
- bdev = NULL;
- bh = NULL;
- disk_super = NULL;
- if (!device) {
- printk(KERN_ERR "btrfs: no missing devices found to "
- "remove\n");
- goto out;
- }
- } else {
- bdev = blkdev_get_by_path(device_path, FMODE_READ | FMODE_EXCL,
- root->fs_info->bdev_holder);
- if (IS_ERR(bdev)) {
- ret = PTR_ERR(bdev);
- goto out;
- }
-
- set_blocksize(bdev, 4096);
- invalidate_bdev(bdev);
- bh = btrfs_read_dev_super(bdev);
- if (!bh) {
- ret = -EINVAL;
- goto error_close;
- }
- disk_super = (struct btrfs_super_block *)bh->b_data;
- devid = btrfs_stack_device_id(&disk_super->dev_item);
- dev_uuid = disk_super->dev_item.uuid;
- device = btrfs_find_device(root, devid, dev_uuid,
- disk_super->fsid);
- if (!device) {
- ret = -ENOENT;
- goto error_brelse;
- }
- }
-
- if (device->writeable && root->fs_info->fs_devices->rw_devices == 1) {
- printk(KERN_ERR "btrfs: unable to remove the only writeable "
- "device\n");
- ret = -EINVAL;
- goto error_brelse;
- }
-
- if (device->writeable) {
- lock_chunks(root);
- list_del_init(&device->dev_alloc_list);
- unlock_chunks(root);
- root->fs_info->fs_devices->rw_devices--;
- clear_super = true;
- }
-
- ret = btrfs_shrink_device(device, 0);
- if (ret)
- goto error_undo;
-
- ret = btrfs_rm_dev_item(root->fs_info->chunk_root, device);
- if (ret)
- goto error_undo;
-
- spin_lock(&root->fs_info->free_chunk_lock);
- root->fs_info->free_chunk_space = device->total_bytes -
- device->bytes_used;
- spin_unlock(&root->fs_info->free_chunk_lock);
-
- device->in_fs_metadata = 0;
- btrfs_scrub_cancel_dev(root, device);
-
- /*
- * the device list mutex makes sure that we don't change
- * the device list while someone else is writing out all
- * the device supers.
- */
-
- cur_devices = device->fs_devices;
- mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
- list_del_rcu(&device->dev_list);
-
- device->fs_devices->num_devices--;
-
- if (device->missing)
- root->fs_info->fs_devices->missing_devices--;
-
- next_device = list_entry(root->fs_info->fs_devices->devices.next,
- struct btrfs_device, dev_list);
- if (device->bdev == root->fs_info->sb->s_bdev)
- root->fs_info->sb->s_bdev = next_device->bdev;
- if (device->bdev == root->fs_info->fs_devices->latest_bdev)
- root->fs_info->fs_devices->latest_bdev = next_device->bdev;
-
- if (device->bdev)
- device->fs_devices->open_devices--;
-
- call_rcu(&device->rcu, free_device);
- mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
-
- num_devices = btrfs_super_num_devices(root->fs_info->super_copy) - 1;
- btrfs_set_super_num_devices(root->fs_info->super_copy, num_devices);
-
- if (cur_devices->open_devices == 0) {
- struct btrfs_fs_devices *fs_devices;
- fs_devices = root->fs_info->fs_devices;
- while (fs_devices) {
- if (fs_devices->seed == cur_devices)
- break;
- fs_devices = fs_devices->seed;
- }
- fs_devices->seed = cur_devices->seed;
- cur_devices->seed = NULL;
- lock_chunks(root);
- __btrfs_close_devices(cur_devices);
- unlock_chunks(root);
- free_fs_devices(cur_devices);
- }
-
- /*
- * at this point, the device is zero sized. We want to
- * remove it from the devices list and zero out the old super
- */
- if (clear_super) {
- /* make sure this device isn't detected as part of
- * the FS anymore
- */
- memset(&disk_super->magic, 0, sizeof(disk_super->magic));
- set_buffer_dirty(bh);
- sync_dirty_buffer(bh);
- }
-
- ret = 0;
-
-error_brelse:
- brelse(bh);
-error_close:
- if (bdev)
- blkdev_put(bdev, FMODE_READ | FMODE_EXCL);
-out:
- mutex_unlock(&uuid_mutex);
- return ret;
-error_undo:
- if (device->writeable) {
- lock_chunks(root);
- list_add(&device->dev_alloc_list,
- &root->fs_info->fs_devices->alloc_list);
- unlock_chunks(root);
- root->fs_info->fs_devices->rw_devices++;
- }
- goto error_brelse;
-}
-
-/*
- * does all the dirty work required for changing file system's UUID.
- */
-static int btrfs_prepare_sprout(struct btrfs_root *root)
-{
- struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices;
- struct btrfs_fs_devices *old_devices;
- struct btrfs_fs_devices *seed_devices;
- struct btrfs_super_block *disk_super = root->fs_info->super_copy;
- struct btrfs_device *device;
- u64 super_flags;
-
- BUG_ON(!mutex_is_locked(&uuid_mutex));
- if (!fs_devices->seeding)
- return -EINVAL;
-
- seed_devices = kzalloc(sizeof(*fs_devices), GFP_NOFS);
- if (!seed_devices)
- return -ENOMEM;
-
- old_devices = clone_fs_devices(fs_devices);
- if (IS_ERR(old_devices)) {
- kfree(seed_devices);
- return PTR_ERR(old_devices);
- }
-
- list_add(&old_devices->list, &fs_uuids);
-
- memcpy(seed_devices, fs_devices, sizeof(*seed_devices));
- seed_devices->opened = 1;
- INIT_LIST_HEAD(&seed_devices->devices);
- INIT_LIST_HEAD(&seed_devices->alloc_list);
- mutex_init(&seed_devices->device_list_mutex);
-
- mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
- list_splice_init_rcu(&fs_devices->devices, &seed_devices->devices,
- synchronize_rcu);
- mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
-
- list_splice_init(&fs_devices->alloc_list, &seed_devices->alloc_list);
- list_for_each_entry(device, &seed_devices->devices, dev_list) {
- device->fs_devices = seed_devices;
- }
-
- fs_devices->seeding = 0;
- fs_devices->num_devices = 0;
- fs_devices->open_devices = 0;
- fs_devices->seed = seed_devices;
-
- generate_random_uuid(fs_devices->fsid);
- memcpy(root->fs_info->fsid, fs_devices->fsid, BTRFS_FSID_SIZE);
- memcpy(disk_super->fsid, fs_devices->fsid, BTRFS_FSID_SIZE);
- super_flags = btrfs_super_flags(disk_super) &
- ~BTRFS_SUPER_FLAG_SEEDING;
- btrfs_set_super_flags(disk_super, super_flags);
-
- return 0;
-}
-
-/*
- * strore the expected generation for seed devices in device items.
- */
-static int btrfs_finish_sprout(struct btrfs_trans_handle *trans,
- struct btrfs_root *root)
-{
- struct btrfs_path *path;
- struct extent_buffer *leaf;
- struct btrfs_dev_item *dev_item;
- struct btrfs_device *device;
- struct btrfs_key key;
- u8 fs_uuid[BTRFS_UUID_SIZE];
- u8 dev_uuid[BTRFS_UUID_SIZE];
- u64 devid;
- int ret;
-
- path = btrfs_alloc_path();
- if (!path)
- return -ENOMEM;
-
- root = root->fs_info->chunk_root;
- key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
- key.offset = 0;
- key.type = BTRFS_DEV_ITEM_KEY;
-
- while (1) {
- ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
- if (ret < 0)
- goto error;
-
- leaf = path->nodes[0];
-next_slot:
- if (path->slots[0] >= btrfs_header_nritems(leaf)) {
- ret = btrfs_next_leaf(root, path);
- if (ret > 0)
- break;
- if (ret < 0)
- goto error;
- leaf = path->nodes[0];
- btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
- btrfs_release_path(path);
- continue;
- }
-
- btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
- if (key.objectid != BTRFS_DEV_ITEMS_OBJECTID ||
- key.type != BTRFS_DEV_ITEM_KEY)
- break;
-
- dev_item = btrfs_item_ptr(leaf, path->slots[0],
- struct btrfs_dev_item);
- devid = btrfs_device_id(leaf, dev_item);
- read_extent_buffer(leaf, dev_uuid,
- (unsigned long)btrfs_device_uuid(dev_item),
- BTRFS_UUID_SIZE);
- read_extent_buffer(leaf, fs_uuid,
- (unsigned long)btrfs_device_fsid(dev_item),
- BTRFS_UUID_SIZE);
- device = btrfs_find_device(root, devid, dev_uuid, fs_uuid);
- BUG_ON(!device); /* Logic error */
-
- if (device->fs_devices->seeding) {
- btrfs_set_device_generation(leaf, dev_item,
- device->generation);
- btrfs_mark_buffer_dirty(leaf);
- }
-
- path->slots[0]++;
- goto next_slot;
- }
- ret = 0;
-error:
- btrfs_free_path(path);
- return ret;
-}
-
-int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
-{
- struct request_queue *q;
- struct btrfs_trans_handle *trans;
- struct btrfs_device *device;
- struct block_device *bdev;
- struct list_head *devices;
- struct super_block *sb = root->fs_info->sb;
- u64 total_bytes;
- int seeding_dev = 0;
- int ret = 0;
-
- if ((sb->s_flags & MS_RDONLY) && !root->fs_info->fs_devices->seeding)
- return -EINVAL;
-
- bdev = blkdev_get_by_path(device_path, FMODE_WRITE | FMODE_EXCL,
- root->fs_info->bdev_holder);
- if (IS_ERR(bdev))
- return PTR_ERR(bdev);
-
- if (root->fs_info->fs_devices->seeding) {
- seeding_dev = 1;
- down_write(&sb->s_umount);
- mutex_lock(&uuid_mutex);
- }
-
- filemap_write_and_wait(bdev->bd_inode->i_mapping);
-
- devices = &root->fs_info->fs_devices->devices;
- /*
- * we have the volume lock, so we don't need the extra
- * device list mutex while reading the list here.
- */
- list_for_each_entry(device, devices, dev_list) {
- if (device->bdev == bdev) {
- ret = -EEXIST;
- goto error;
- }
- }
-
- device = kzalloc(sizeof(*device), GFP_NOFS);
- if (!device) {
- /* we can safely leave the fs_devices entry around */
- ret = -ENOMEM;
- goto error;
- }
-
- device->name = kstrdup(device_path, GFP_NOFS);
- if (!device->name) {
- kfree(device);
- ret = -ENOMEM;
- goto error;
- }
-
- ret = find_next_devid(root, &device->devid);
- if (ret) {
- kfree(device->name);
- kfree(device);
- goto error;
- }
-
- trans = btrfs_start_transaction(root, 0);
- if (IS_ERR(trans)) {
- kfree(device->name);
- kfree(device);
- ret = PTR_ERR(trans);
- goto error;
- }
-
- lock_chunks(root);
-
- q = bdev_get_queue(bdev);
- if (blk_queue_discard(q))
- device->can_discard = 1;
- device->writeable = 1;
- device->work.func = pending_bios_fn;
- generate_random_uuid(device->uuid);
- spin_lock_init(&device->io_lock);
- device->generation = trans->transid;
- device->io_width = root->sectorsize;
- device->io_align = root->sectorsize;
- device->sector_size = root->sectorsize;
- device->total_bytes = i_size_read(bdev->bd_inode);
- device->disk_total_bytes = device->total_bytes;
- device->dev_root = root->fs_info->dev_root;
- device->bdev = bdev;
- device->in_fs_metadata = 1;
- device->mode = FMODE_EXCL;
- set_blocksize(device->bdev, 4096);
-
- if (seeding_dev) {
- sb->s_flags &= ~MS_RDONLY;
- ret = btrfs_prepare_sprout(root);
- BUG_ON(ret); /* -ENOMEM */
- }
-
- device->fs_devices = root->fs_info->fs_devices;
-
- /*
- * we don't want write_supers to jump in here with our device
- * half setup
- */
- mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
- list_add_rcu(&device->dev_list, &root->fs_info->fs_devices->devices);
- list_add(&device->dev_alloc_list,
- &root->fs_info->fs_devices->alloc_list);
- root->fs_info->fs_devices->num_devices++;
- root->fs_info->fs_devices->open_devices++;
- root->fs_info->fs_devices->rw_devices++;
- if (device->can_discard)
- root->fs_info->fs_devices->num_can_discard++;
- root->fs_info->fs_devices->total_rw_bytes += device->total_bytes;
-
- spin_lock(&root->fs_info->free_chunk_lock);
- root->fs_info->free_chunk_space += device->total_bytes;
- spin_unlock(&root->fs_info->free_chunk_lock);
-
- if (!blk_queue_nonrot(bdev_get_queue(bdev)))
- root->fs_info->fs_devices->rotating = 1;
-
- total_bytes = btrfs_super_total_bytes(root->fs_info->super_copy);
- btrfs_set_super_total_bytes(root->fs_info->super_copy,
- total_bytes + device->total_bytes);
-
- total_bytes = btrfs_super_num_devices(root->fs_info->super_copy);
- btrfs_set_super_num_devices(root->fs_info->super_copy,
- total_bytes + 1);
- mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
-
- if (seeding_dev) {
- ret = init_first_rw_device(trans, root, device);
- if (ret)
- goto error_trans;
- ret = btrfs_finish_sprout(trans, root);
- if (ret)
- goto error_trans;
- } else {
- ret = btrfs_add_device(trans, root, device);
- if (ret)
- goto error_trans;
- }
-
- /*
- * we've got more storage, clear any full flags on the space
- * infos
- */
- btrfs_clear_space_info_full(root->fs_info);
-
- unlock_chunks(root);
- ret = btrfs_commit_transaction(trans, root);
-
- if (seeding_dev) {
- mutex_unlock(&uuid_mutex);
- up_write(&sb->s_umount);
-
- if (ret) /* transaction commit */
- return ret;
-
- ret = btrfs_relocate_sys_chunks(root);
- if (ret < 0)
- btrfs_error(root->fs_info, ret,
- "Failed to relocate sys chunks after "
- "device initialization. This can be fixed "
- "using the \"btrfs balance\" command.");
- }
-
- return ret;
-
-error_trans:
- unlock_chunks(root);
- btrfs_abort_transaction(trans, root, ret);
- btrfs_end_transaction(trans, root);
- kfree(device->name);
- kfree(device);
-error:
- blkdev_put(bdev, FMODE_EXCL);
- if (seeding_dev) {
- mutex_unlock(&uuid_mutex);
- up_write(&sb->s_umount);
- }
- return ret;
-}
-
-static noinline int btrfs_update_device(struct btrfs_trans_handle *trans,
- struct btrfs_device *device)
-{
- int ret;
- struct btrfs_path *path;
- struct btrfs_root *root;
- struct btrfs_dev_item *dev_item;
- struct extent_buffer *leaf;
- struct btrfs_key key;
-
- root = device->dev_root->fs_info->chunk_root;
-
- path = btrfs_alloc_path();
- if (!path)
- return -ENOMEM;
-
- key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
- key.type = BTRFS_DEV_ITEM_KEY;
- key.offset = device->devid;
-
- ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
- if (ret < 0)
- goto out;
-
- if (ret > 0) {
- ret = -ENOENT;
- goto out;
- }
-
- leaf = path->nodes[0];
- dev_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dev_item);
-
- btrfs_set_device_id(leaf, dev_item, device->devid);
- btrfs_set_device_type(leaf, dev_item, device->type);
- btrfs_set_device_io_align(leaf, dev_item, device->io_align);
- btrfs_set_device_io_width(leaf, dev_item, device->io_width);
- btrfs_set_device_sector_size(leaf, dev_item, device->sector_size);
- btrfs_set_device_total_bytes(leaf, dev_item, device->disk_total_bytes);
- btrfs_set_device_bytes_used(leaf, dev_item, device->bytes_used);
- btrfs_mark_buffer_dirty(leaf);
-
-out:
- btrfs_free_path(path);
- return ret;
-}
-
-static int __btrfs_grow_device(struct btrfs_trans_handle *trans,
- struct btrfs_device *device, u64 new_size)
-{
- struct btrfs_super_block *super_copy =
- device->dev_root->fs_info->super_copy;
- u64 old_total = btrfs_super_total_bytes(super_copy);
- u64 diff = new_size - device->total_bytes;
-
- if (!device->writeable)
- return -EACCES;
- if (new_size <= device->total_bytes)
- return -EINVAL;
-
- btrfs_set_super_total_bytes(super_copy, old_total + diff);
- device->fs_devices->total_rw_bytes += diff;
-
- device->total_bytes = new_size;
- device->disk_total_bytes = new_size;
- btrfs_clear_space_info_full(device->dev_root->fs_info);
-
- return btrfs_update_device(trans, device);
-}
-
-int btrfs_grow_device(struct btrfs_trans_handle *trans,
- struct btrfs_device *device, u64 new_size)
-{
- int ret;
- lock_chunks(device->dev_root);
- ret = __btrfs_grow_device(trans, device, new_size);
- unlock_chunks(device->dev_root);
- return ret;
-}
-
-static int btrfs_free_chunk(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- u64 chunk_tree, u64 chunk_objectid,
- u64 chunk_offset)
-{
- int ret;
- struct btrfs_path *path;
- struct btrfs_key key;
-
- root = root->fs_info->chunk_root;
- path = btrfs_alloc_path();
- if (!path)
- return -ENOMEM;
-
- key.objectid = chunk_objectid;
- key.offset = chunk_offset;
- key.type = BTRFS_CHUNK_ITEM_KEY;
-
- ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
- if (ret < 0)
- goto out;
- else if (ret > 0) { /* Logic error or corruption */
- btrfs_error(root->fs_info, -ENOENT,
- "Failed lookup while freeing chunk.");
- ret = -ENOENT;
- goto out;
- }
-
- ret = btrfs_del_item(trans, root, path);
- if (ret < 0)
- btrfs_error(root->fs_info, ret,
- "Failed to delete chunk item.");
-out:
- btrfs_free_path(path);
- return ret;
-}
-
-static int btrfs_del_sys_chunk(struct btrfs_root *root, u64 chunk_objectid, u64
- chunk_offset)
-{
- struct btrfs_super_block *super_copy = root->fs_info->super_copy;
- struct btrfs_disk_key *disk_key;
- struct btrfs_chunk *chunk;
- u8 *ptr;
- int ret = 0;
- u32 num_stripes;
- u32 array_size;
- u32 len = 0;
- u32 cur;
- struct btrfs_key key;
-
- array_size = btrfs_super_sys_array_size(super_copy);
-
- ptr = super_copy->sys_chunk_array;
- cur = 0;
-
- while (cur < array_size) {
- disk_key = (struct btrfs_disk_key *)ptr;
- btrfs_disk_key_to_cpu(&key, disk_key);
-
- len = sizeof(*disk_key);
-
- if (key.type == BTRFS_CHUNK_ITEM_KEY) {
- chunk = (struct btrfs_chunk *)(ptr + len);
- num_stripes = btrfs_stack_chunk_num_stripes(chunk);
- len += btrfs_chunk_item_size(num_stripes);
- } else {
- ret = -EIO;
- break;
- }
- if (key.objectid == chunk_objectid &&
- key.offset == chunk_offset) {
- memmove(ptr, ptr + len, array_size - (cur + len));
- array_size -= len;
- btrfs_set_super_sys_array_size(super_copy, array_size);
- } else {
- ptr += len;
- cur += len;
- }
- }
- return ret;
-}
-
-static int btrfs_relocate_chunk(struct btrfs_root *root,
- u64 chunk_tree, u64 chunk_objectid,
- u64 chunk_offset)
-{
- struct extent_map_tree *em_tree;
- struct btrfs_root *extent_root;
- struct btrfs_trans_handle *trans;
- struct extent_map *em;
- struct map_lookup *map;
- int ret;
- int i;
-
- root = root->fs_info->chunk_root;
- extent_root = root->fs_info->extent_root;
- em_tree = &root->fs_info->mapping_tree.map_tree;
-
- ret = btrfs_can_relocate(extent_root, chunk_offset);
- if (ret)
- return -ENOSPC;
-
- /* step one, relocate all the extents inside this chunk */
- ret = btrfs_relocate_block_group(extent_root, chunk_offset);
- if (ret)
- return ret;
-
- trans = btrfs_start_transaction(root, 0);
- BUG_ON(IS_ERR(trans));
-
- lock_chunks(root);
-
- /*
- * step two, delete the device extents and the
- * chunk tree entries
- */
- read_lock(&em_tree->lock);
- em = lookup_extent_mapping(em_tree, chunk_offset, 1);
- read_unlock(&em_tree->lock);
-
- BUG_ON(!em || em->start > chunk_offset ||
- em->start + em->len < chunk_offset);
- map = (struct map_lookup *)em->bdev;
-
- for (i = 0; i < map->num_stripes; i++) {
- ret = btrfs_free_dev_extent(trans, map->stripes[i].dev,
- map->stripes[i].physical);
- BUG_ON(ret);
-
- if (map->stripes[i].dev) {
- ret = btrfs_update_device(trans, map->stripes[i].dev);
- BUG_ON(ret);
- }
- }
- ret = btrfs_free_chunk(trans, root, chunk_tree, chunk_objectid,
- chunk_offset);
-
- BUG_ON(ret);
-
- trace_btrfs_chunk_free(root, map, chunk_offset, em->len);
-
- if (map->type & BTRFS_BLOCK_GROUP_SYSTEM) {
- ret = btrfs_del_sys_chunk(root, chunk_objectid, chunk_offset);
- BUG_ON(ret);
- }
-
- ret = btrfs_remove_block_group(trans, extent_root, chunk_offset);
- BUG_ON(ret);
-
- write_lock(&em_tree->lock);
- remove_extent_mapping(em_tree, em);
- write_unlock(&em_tree->lock);
-
- kfree(map);
- em->bdev = NULL;
-
- /* once for the tree */
- free_extent_map(em);
- /* once for us */
- free_extent_map(em);
-
- unlock_chunks(root);
- btrfs_end_transaction(trans, root);
- return 0;
-}
-
-static int btrfs_relocate_sys_chunks(struct btrfs_root *root)
-{
- struct btrfs_root *chunk_root = root->fs_info->chunk_root;
- struct btrfs_path *path;
- struct extent_buffer *leaf;
- struct btrfs_chunk *chunk;
- struct btrfs_key key;
- struct btrfs_key found_key;
- u64 chunk_tree = chunk_root->root_key.objectid;
- u64 chunk_type;
- bool retried = false;
- int failed = 0;
- int ret;
-
- path = btrfs_alloc_path();
- if (!path)
- return -ENOMEM;
-
-again:
- key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
- key.offset = (u64)-1;
- key.type = BTRFS_CHUNK_ITEM_KEY;
-
- while (1) {
- ret = btrfs_search_slot(NULL, chunk_root, &key, path, 0, 0);
- if (ret < 0)
- goto error;
- BUG_ON(ret == 0); /* Corruption */
-
- ret = btrfs_previous_item(chunk_root, path, key.objectid,
- key.type);
- if (ret < 0)
- goto error;
- if (ret > 0)
- break;
-
- leaf = path->nodes[0];
- btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
-
- chunk = btrfs_item_ptr(leaf, path->slots[0],
- struct btrfs_chunk);
- chunk_type = btrfs_chunk_type(leaf, chunk);
- btrfs_release_path(path);
-
- if (chunk_type & BTRFS_BLOCK_GROUP_SYSTEM) {
- ret = btrfs_relocate_chunk(chunk_root, chunk_tree,
- found_key.objectid,
- found_key.offset);
- if (ret == -ENOSPC)
- failed++;
- else if (ret)
- BUG();
- }
-
- if (found_key.offset == 0)
- break;
- key.offset = found_key.offset - 1;
- }
- ret = 0;
- if (failed && !retried) {
- failed = 0;
- retried = true;
- goto again;
- } else if (failed && retried) {
- WARN_ON(1);
- ret = -ENOSPC;
- }
-error:
- btrfs_free_path(path);
- return ret;
-}
-
-static int insert_balance_item(struct btrfs_root *root,
- struct btrfs_balance_control *bctl)
-{
- struct btrfs_trans_handle *trans;
- struct btrfs_balance_item *item;
- struct btrfs_disk_balance_args disk_bargs;
- struct btrfs_path *path;
- struct extent_buffer *leaf;
- struct btrfs_key key;
- int ret, err;
-
- path = btrfs_alloc_path();
- if (!path)
- return -ENOMEM;
-
- trans = btrfs_start_transaction(root, 0);
- if (IS_ERR(trans)) {
- btrfs_free_path(path);
- return PTR_ERR(trans);
- }
-
- key.objectid = BTRFS_BALANCE_OBJECTID;
- key.type = BTRFS_BALANCE_ITEM_KEY;
- key.offset = 0;
-
- ret = btrfs_insert_empty_item(trans, root, path, &key,
- sizeof(*item));
- if (ret)
- goto out;
-
- leaf = path->nodes[0];
- item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_balance_item);
-
- memset_extent_buffer(leaf, 0, (unsigned long)item, sizeof(*item));
-
- btrfs_cpu_balance_args_to_disk(&disk_bargs, &bctl->data);
- btrfs_set_balance_data(leaf, item, &disk_bargs);
- btrfs_cpu_balance_args_to_disk(&disk_bargs, &bctl->meta);
- btrfs_set_balance_meta(leaf, item, &disk_bargs);
- btrfs_cpu_balance_args_to_disk(&disk_bargs, &bctl->sys);
- btrfs_set_balance_sys(leaf, item, &disk_bargs);
-
- btrfs_set_balance_flags(leaf, item, bctl->flags);
-
- btrfs_mark_buffer_dirty(leaf);
-out:
- btrfs_free_path(path);
- err = btrfs_commit_transaction(trans, root);
- if (err && !ret)
- ret = err;
- return ret;
-}
-
-static int del_balance_item(struct btrfs_root *root)
-{
- struct btrfs_trans_handle *trans;
- struct btrfs_path *path;
- struct btrfs_key key;
- int ret, err;
-
- path = btrfs_alloc_path();
- if (!path)
- return -ENOMEM;
-
- trans = btrfs_start_transaction(root, 0);
- if (IS_ERR(trans)) {
- btrfs_free_path(path);
- return PTR_ERR(trans);
- }
-
- key.objectid = BTRFS_BALANCE_OBJECTID;
- key.type = BTRFS_BALANCE_ITEM_KEY;
- key.offset = 0;
-
- ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
- if (ret < 0)
- goto out;
- if (ret > 0) {
- ret = -ENOENT;
- goto out;
- }
-
- ret = btrfs_del_item(trans, root, path);
-out:
- btrfs_free_path(path);
- err = btrfs_commit_transaction(trans, root);
- if (err && !ret)
- ret = err;
- return ret;
-}
-
-/*
- * This is a heuristic used to reduce the number of chunks balanced on
- * resume after balance was interrupted.
- */
-static void update_balance_args(struct btrfs_balance_control *bctl)
-{
- /*
- * Turn on soft mode for chunk types that were being converted.
- */
- if (bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT)
- bctl->data.flags |= BTRFS_BALANCE_ARGS_SOFT;
- if (bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT)
- bctl->sys.flags |= BTRFS_BALANCE_ARGS_SOFT;
- if (bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT)
- bctl->meta.flags |= BTRFS_BALANCE_ARGS_SOFT;
-
- /*
- * Turn on usage filter if is not already used. The idea is
- * that chunks that we have already balanced should be
- * reasonably full. Don't do it for chunks that are being
- * converted - that will keep us from relocating unconverted
- * (albeit full) chunks.
- */
- if (!(bctl->data.flags & BTRFS_BALANCE_ARGS_USAGE) &&
- !(bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT)) {
- bctl->data.flags |= BTRFS_BALANCE_ARGS_USAGE;
- bctl->data.usage = 90;
- }
- if (!(bctl->sys.flags & BTRFS_BALANCE_ARGS_USAGE) &&
- !(bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT)) {
- bctl->sys.flags |= BTRFS_BALANCE_ARGS_USAGE;
- bctl->sys.usage = 90;
- }
- if (!(bctl->meta.flags & BTRFS_BALANCE_ARGS_USAGE) &&
- !(bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT)) {
- bctl->meta.flags |= BTRFS_BALANCE_ARGS_USAGE;
- bctl->meta.usage = 90;
- }
-}
-
-/*
- * Should be called with both balance and volume mutexes held to
- * serialize other volume operations (add_dev/rm_dev/resize) with
- * restriper. Same goes for unset_balance_control.
- */
-static void set_balance_control(struct btrfs_balance_control *bctl)
-{
- struct btrfs_fs_info *fs_info = bctl->fs_info;
-
- BUG_ON(fs_info->balance_ctl);
-
- spin_lock(&fs_info->balance_lock);
- fs_info->balance_ctl = bctl;
- spin_unlock(&fs_info->balance_lock);
-}
-
-static void unset_balance_control(struct btrfs_fs_info *fs_info)
-{
- struct btrfs_balance_control *bctl = fs_info->balance_ctl;
-
- BUG_ON(!fs_info->balance_ctl);
-
- spin_lock(&fs_info->balance_lock);
- fs_info->balance_ctl = NULL;
- spin_unlock(&fs_info->balance_lock);
-
- kfree(bctl);
-}
-
-/*
- * Balance filters. Return 1 if chunk should be filtered out
- * (should not be balanced).
- */
-static int chunk_profiles_filter(u64 chunk_type,
- struct btrfs_balance_args *bargs)
-{
- chunk_type = chunk_to_extended(chunk_type) &
- BTRFS_EXTENDED_PROFILE_MASK;
-
- if (bargs->profiles & chunk_type)
- return 0;
-
- return 1;
-}
-
-static u64 div_factor_fine(u64 num, int factor)
-{
- if (factor <= 0)
- return 0;
- if (factor >= 100)
- return num;
-
- num *= factor;
- do_div(num, 100);
- return num;
-}
-
-static int chunk_usage_filter(struct btrfs_fs_info *fs_info, u64 chunk_offset,
- struct btrfs_balance_args *bargs)
-{
- struct btrfs_block_group_cache *cache;
- u64 chunk_used, user_thresh;
- int ret = 1;
-
- cache = btrfs_lookup_block_group(fs_info, chunk_offset);
- chunk_used = btrfs_block_group_used(&cache->item);
-
- user_thresh = div_factor_fine(cache->key.offset, bargs->usage);
- if (chunk_used < user_thresh)
- ret = 0;
-
- btrfs_put_block_group(cache);
- return ret;
-}
-
-static int chunk_devid_filter(struct extent_buffer *leaf,
- struct btrfs_chunk *chunk,
- struct btrfs_balance_args *bargs)
-{
- struct btrfs_stripe *stripe;
- int num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
- int i;
-
- for (i = 0; i < num_stripes; i++) {
- stripe = btrfs_stripe_nr(chunk, i);
- if (btrfs_stripe_devid(leaf, stripe) == bargs->devid)
- return 0;
- }
-
- return 1;
-}
-
-/* [pstart, pend) */
-static int chunk_drange_filter(struct extent_buffer *leaf,
- struct btrfs_chunk *chunk,
- u64 chunk_offset,
- struct btrfs_balance_args *bargs)
-{
- struct btrfs_stripe *stripe;
- int num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
- u64 stripe_offset;
- u64 stripe_length;
- int factor;
- int i;
-
- if (!(bargs->flags & BTRFS_BALANCE_ARGS_DEVID))
- return 0;
-
- if (btrfs_chunk_type(leaf, chunk) & (BTRFS_BLOCK_GROUP_DUP |
- BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10))
- factor = 2;
- else
- factor = 1;
- factor = num_stripes / factor;
-
- for (i = 0; i < num_stripes; i++) {
- stripe = btrfs_stripe_nr(chunk, i);
- if (btrfs_stripe_devid(leaf, stripe) != bargs->devid)
- continue;
-
- stripe_offset = btrfs_stripe_offset(leaf, stripe);
- stripe_length = btrfs_chunk_length(leaf, chunk);
- do_div(stripe_length, factor);
-
- if (stripe_offset < bargs->pend &&
- stripe_offset + stripe_length > bargs->pstart)
- return 0;
- }
-
- return 1;
-}
-
-/* [vstart, vend) */
-static int chunk_vrange_filter(struct extent_buffer *leaf,
- struct btrfs_chunk *chunk,
- u64 chunk_offset,
- struct btrfs_balance_args *bargs)
-{
- if (chunk_offset < bargs->vend &&
- chunk_offset + btrfs_chunk_length(leaf, chunk) > bargs->vstart)
- /* at least part of the chunk is inside this vrange */
- return 0;
-
- return 1;
-}
-
-static int chunk_soft_convert_filter(u64 chunk_type,
- struct btrfs_balance_args *bargs)
-{
- if (!(bargs->flags & BTRFS_BALANCE_ARGS_CONVERT))
- return 0;
-
- chunk_type = chunk_to_extended(chunk_type) &
- BTRFS_EXTENDED_PROFILE_MASK;
-
- if (bargs->target == chunk_type)
- return 1;
-
- return 0;
-}
-
-static int should_balance_chunk(struct btrfs_root *root,
- struct extent_buffer *leaf,
- struct btrfs_chunk *chunk, u64 chunk_offset)
-{
- struct btrfs_balance_control *bctl = root->fs_info->balance_ctl;
- struct btrfs_balance_args *bargs = NULL;
- u64 chunk_type = btrfs_chunk_type(leaf, chunk);
-
- /* type filter */
- if (!((chunk_type & BTRFS_BLOCK_GROUP_TYPE_MASK) &
- (bctl->flags & BTRFS_BALANCE_TYPE_MASK))) {
- return 0;
- }
-
- if (chunk_type & BTRFS_BLOCK_GROUP_DATA)
- bargs = &bctl->data;
- else if (chunk_type & BTRFS_BLOCK_GROUP_SYSTEM)
- bargs = &bctl->sys;
- else if (chunk_type & BTRFS_BLOCK_GROUP_METADATA)
- bargs = &bctl->meta;
-
- /* profiles filter */
- if ((bargs->flags & BTRFS_BALANCE_ARGS_PROFILES) &&
- chunk_profiles_filter(chunk_type, bargs)) {
- return 0;
- }
-
- /* usage filter */
- if ((bargs->flags & BTRFS_BALANCE_ARGS_USAGE) &&
- chunk_usage_filter(bctl->fs_info, chunk_offset, bargs)) {
- return 0;
- }
-
- /* devid filter */
- if ((bargs->flags & BTRFS_BALANCE_ARGS_DEVID) &&
- chunk_devid_filter(leaf, chunk, bargs)) {
- return 0;
- }
-
- /* drange filter, makes sense only with devid filter */
- if ((bargs->flags & BTRFS_BALANCE_ARGS_DRANGE) &&
- chunk_drange_filter(leaf, chunk, chunk_offset, bargs)) {
- return 0;
- }
-
- /* vrange filter */
- if ((bargs->flags & BTRFS_BALANCE_ARGS_VRANGE) &&
- chunk_vrange_filter(leaf, chunk, chunk_offset, bargs)) {
- return 0;
- }
-
- /* soft profile changing mode */
- if ((bargs->flags & BTRFS_BALANCE_ARGS_SOFT) &&
- chunk_soft_convert_filter(chunk_type, bargs)) {
- return 0;
- }
-
- return 1;
-}
-
-static u64 div_factor(u64 num, int factor)
-{
- if (factor == 10)
- return num;
- num *= factor;
- do_div(num, 10);
- return num;
-}
-
-static int __btrfs_balance(struct btrfs_fs_info *fs_info)
-{
- struct btrfs_balance_control *bctl = fs_info->balance_ctl;
- struct btrfs_root *chunk_root = fs_info->chunk_root;
- struct btrfs_root *dev_root = fs_info->dev_root;
- struct list_head *devices;
- struct btrfs_device *device;
- u64 old_size;
- u64 size_to_free;
- struct btrfs_chunk *chunk;
- struct btrfs_path *path;
- struct btrfs_key key;
- struct btrfs_key found_key;
- struct btrfs_trans_handle *trans;
- struct extent_buffer *leaf;
- int slot;
- int ret;
- int enospc_errors = 0;
- bool counting = true;
-
- /* step one make some room on all the devices */
- devices = &fs_info->fs_devices->devices;
- list_for_each_entry(device, devices, dev_list) {
- old_size = device->total_bytes;
- size_to_free = div_factor(old_size, 1);
- size_to_free = min(size_to_free, (u64)1 * 1024 * 1024);
- if (!device->writeable ||
- device->total_bytes - device->bytes_used > size_to_free)
- continue;
-
- ret = btrfs_shrink_device(device, old_size - size_to_free);
- if (ret == -ENOSPC)
- break;
- BUG_ON(ret);
-
- trans = btrfs_start_transaction(dev_root, 0);
- BUG_ON(IS_ERR(trans));
-
- ret = btrfs_grow_device(trans, device, old_size);
- BUG_ON(ret);
-
- btrfs_end_transaction(trans, dev_root);
- }
-
- /* step two, relocate all the chunks */
- path = btrfs_alloc_path();
- if (!path) {
- ret = -ENOMEM;
- goto error;
- }
-
- /* zero out stat counters */
- spin_lock(&fs_info->balance_lock);
- memset(&bctl->stat, 0, sizeof(bctl->stat));
- spin_unlock(&fs_info->balance_lock);
-again:
- key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
- key.offset = (u64)-1;
- key.type = BTRFS_CHUNK_ITEM_KEY;
-
- while (1) {
- if ((!counting && atomic_read(&fs_info->balance_pause_req)) ||
- atomic_read(&fs_info->balance_cancel_req)) {
- ret = -ECANCELED;
- goto error;
- }
-
- ret = btrfs_search_slot(NULL, chunk_root, &key, path, 0, 0);
- if (ret < 0)
- goto error;
-
- /*
- * this shouldn't happen, it means the last relocate
- * failed
- */
- if (ret == 0)
- BUG(); /* FIXME break ? */
-
- ret = btrfs_previous_item(chunk_root, path, 0,
- BTRFS_CHUNK_ITEM_KEY);
- if (ret) {
- ret = 0;
- break;
- }
-
- leaf = path->nodes[0];
- slot = path->slots[0];
- btrfs_item_key_to_cpu(leaf, &found_key, slot);
-
- if (found_key.objectid != key.objectid)
- break;
-
- /* chunk zero is special */
- if (found_key.offset == 0)
- break;
-
- chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
-
- if (!counting) {
- spin_lock(&fs_info->balance_lock);
- bctl->stat.considered++;
- spin_unlock(&fs_info->balance_lock);
- }
-
- ret = should_balance_chunk(chunk_root, leaf, chunk,
- found_key.offset);
- btrfs_release_path(path);
- if (!ret)
- goto loop;
-
- if (counting) {
- spin_lock(&fs_info->balance_lock);
- bctl->stat.expected++;
- spin_unlock(&fs_info->balance_lock);
- goto loop;
- }
-
- ret = btrfs_relocate_chunk(chunk_root,
- chunk_root->root_key.objectid,
- found_key.objectid,
- found_key.offset);
- if (ret && ret != -ENOSPC)
- goto error;
- if (ret == -ENOSPC) {
- enospc_errors++;
- } else {
- spin_lock(&fs_info->balance_lock);
- bctl->stat.completed++;
- spin_unlock(&fs_info->balance_lock);
- }
-loop:
- key.offset = found_key.offset - 1;
- }
-
- if (counting) {
- btrfs_release_path(path);
- counting = false;
- goto again;
- }
-error:
- btrfs_free_path(path);
- if (enospc_errors) {
- printk(KERN_INFO "btrfs: %d enospc errors during balance\n",
- enospc_errors);
- if (!ret)
- ret = -ENOSPC;
- }
-
- return ret;
-}
-
-/**
- * alloc_profile_is_valid - see if a given profile is valid and reduced
- * @flags: profile to validate
- * @extended: if true @flags is treated as an extended profile
- */
-static int alloc_profile_is_valid(u64 flags, int extended)
-{
- u64 mask = (extended ? BTRFS_EXTENDED_PROFILE_MASK :
- BTRFS_BLOCK_GROUP_PROFILE_MASK);
-
- flags &= ~BTRFS_BLOCK_GROUP_TYPE_MASK;
-
- /* 1) check that all other bits are zeroed */
- if (flags & ~mask)
- return 0;
-
- /* 2) see if profile is reduced */
- if (flags == 0)
- return !extended; /* "0" is valid for usual profiles */
-
- /* true if exactly one bit set */
- return (flags & (flags - 1)) == 0;
-}
-
-static inline int balance_need_close(struct btrfs_fs_info *fs_info)
-{
- /* cancel requested || normal exit path */
- return atomic_read(&fs_info->balance_cancel_req) ||
- (atomic_read(&fs_info->balance_pause_req) == 0 &&
- atomic_read(&fs_info->balance_cancel_req) == 0);
-}
-
-static void __cancel_balance(struct btrfs_fs_info *fs_info)
-{
- int ret;
-
- unset_balance_control(fs_info);
- ret = del_balance_item(fs_info->tree_root);
- BUG_ON(ret);
-}
-
-void update_ioctl_balance_args(struct btrfs_fs_info *fs_info, int lock,
- struct btrfs_ioctl_balance_args *bargs);
-
-/*
- * Should be called with both balance and volume mutexes held
- */
-int btrfs_balance(struct btrfs_balance_control *bctl,
- struct btrfs_ioctl_balance_args *bargs)
-{
- struct btrfs_fs_info *fs_info = bctl->fs_info;
- u64 allowed;
- int mixed = 0;
- int ret;
-
- if (btrfs_fs_closing(fs_info) ||
- atomic_read(&fs_info->balance_pause_req) ||
- atomic_read(&fs_info->balance_cancel_req)) {
- ret = -EINVAL;
- goto out;
- }
-
- allowed = btrfs_super_incompat_flags(fs_info->super_copy);
- if (allowed & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)
- mixed = 1;
-
- /*
- * In case of mixed groups both data and meta should be picked,
- * and identical options should be given for both of them.
- */
- allowed = BTRFS_BALANCE_DATA | BTRFS_BALANCE_METADATA;
- if (mixed && (bctl->flags & allowed)) {
- if (!(bctl->flags & BTRFS_BALANCE_DATA) ||
- !(bctl->flags & BTRFS_BALANCE_METADATA) ||
- memcmp(&bctl->data, &bctl->meta, sizeof(bctl->data))) {
- printk(KERN_ERR "btrfs: with mixed groups data and "
- "metadata balance options must be the same\n");
- ret = -EINVAL;
- goto out;
- }
- }
-
- allowed = BTRFS_AVAIL_ALLOC_BIT_SINGLE;
- if (fs_info->fs_devices->num_devices == 1)
- allowed |= BTRFS_BLOCK_GROUP_DUP;
- else if (fs_info->fs_devices->num_devices < 4)
- allowed |= (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1);
- else
- allowed |= (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1 |
- BTRFS_BLOCK_GROUP_RAID10);
-
- if ((bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT) &&
- (!alloc_profile_is_valid(bctl->data.target, 1) ||
- (bctl->data.target & ~allowed))) {
- printk(KERN_ERR "btrfs: unable to start balance with target "
- "data profile %llu\n",
- (unsigned long long)bctl->data.target);
- ret = -EINVAL;
- goto out;
- }
- if ((bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT) &&
- (!alloc_profile_is_valid(bctl->meta.target, 1) ||
- (bctl->meta.target & ~allowed))) {
- printk(KERN_ERR "btrfs: unable to start balance with target "
- "metadata profile %llu\n",
- (unsigned long long)bctl->meta.target);
- ret = -EINVAL;
- goto out;
- }
- if ((bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) &&
- (!alloc_profile_is_valid(bctl->sys.target, 1) ||
- (bctl->sys.target & ~allowed))) {
- printk(KERN_ERR "btrfs: unable to start balance with target "
- "system profile %llu\n",
- (unsigned long long)bctl->sys.target);
- ret = -EINVAL;
- goto out;
- }
-
- /* allow dup'ed data chunks only in mixed mode */
- if (!mixed && (bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT) &&
- (bctl->data.target & BTRFS_BLOCK_GROUP_DUP)) {
- printk(KERN_ERR "btrfs: dup for data is not allowed\n");
- ret = -EINVAL;
- goto out;
- }
-
- /* allow to reduce meta or sys integrity only if force set */
- allowed = BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1 |
- BTRFS_BLOCK_GROUP_RAID10;
- if (((bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) &&
- (fs_info->avail_system_alloc_bits & allowed) &&
- !(bctl->sys.target & allowed)) ||
- ((bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT) &&
- (fs_info->avail_metadata_alloc_bits & allowed) &&
- !(bctl->meta.target & allowed))) {
- if (bctl->flags & BTRFS_BALANCE_FORCE) {
- printk(KERN_INFO "btrfs: force reducing metadata "
- "integrity\n");
- } else {
- printk(KERN_ERR "btrfs: balance will reduce metadata "
- "integrity, use force if you want this\n");
- ret = -EINVAL;
- goto out;
- }
- }
-
- ret = insert_balance_item(fs_info->tree_root, bctl);
- if (ret && ret != -EEXIST)
- goto out;
-
- if (!(bctl->flags & BTRFS_BALANCE_RESUME)) {
- BUG_ON(ret == -EEXIST);
- set_balance_control(bctl);
- } else {
- BUG_ON(ret != -EEXIST);
- spin_lock(&fs_info->balance_lock);
- update_balance_args(bctl);
- spin_unlock(&fs_info->balance_lock);
- }
-
- atomic_inc(&fs_info->balance_running);
- mutex_unlock(&fs_info->balance_mutex);
-
- ret = __btrfs_balance(fs_info);
-
- mutex_lock(&fs_info->balance_mutex);
- atomic_dec(&fs_info->balance_running);
-
- if (bargs) {
- memset(bargs, 0, sizeof(*bargs));
- update_ioctl_balance_args(fs_info, 0, bargs);
- }
-
- if ((ret && ret != -ECANCELED && ret != -ENOSPC) ||
- balance_need_close(fs_info)) {
- __cancel_balance(fs_info);
- }
-
- wake_up(&fs_info->balance_wait_q);
-
- return ret;
-out:
- if (bctl->flags & BTRFS_BALANCE_RESUME)
- __cancel_balance(fs_info);
- else
- kfree(bctl);
- return ret;
-}
-
-static int balance_kthread(void *data)
-{
- struct btrfs_balance_control *bctl =
- (struct btrfs_balance_control *)data;
- struct btrfs_fs_info *fs_info = bctl->fs_info;
- int ret = 0;
-
- mutex_lock(&fs_info->volume_mutex);
- mutex_lock(&fs_info->balance_mutex);
-
- set_balance_control(bctl);
-
- if (btrfs_test_opt(fs_info->tree_root, SKIP_BALANCE)) {
- printk(KERN_INFO "btrfs: force skipping balance\n");
- } else {
- printk(KERN_INFO "btrfs: continuing balance\n");
- ret = btrfs_balance(bctl, NULL);
- }
-
- mutex_unlock(&fs_info->balance_mutex);
- mutex_unlock(&fs_info->volume_mutex);
- return ret;
-}
-
-int btrfs_recover_balance(struct btrfs_root *tree_root)
-{
- struct task_struct *tsk;
- struct btrfs_balance_control *bctl;
- struct btrfs_balance_item *item;
- struct btrfs_disk_balance_args disk_bargs;
- struct btrfs_path *path;
- struct extent_buffer *leaf;
- struct btrfs_key key;
- int ret;
-
- path = btrfs_alloc_path();
- if (!path)
- return -ENOMEM;
-
- bctl = kzalloc(sizeof(*bctl), GFP_NOFS);
- if (!bctl) {
- ret = -ENOMEM;
- goto out;
- }
-
- key.objectid = BTRFS_BALANCE_OBJECTID;
- key.type = BTRFS_BALANCE_ITEM_KEY;
- key.offset = 0;
-
- ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0);
- if (ret < 0)
- goto out_bctl;
- if (ret > 0) { /* ret = -ENOENT; */
- ret = 0;
- goto out_bctl;
- }
-
- leaf = path->nodes[0];
- item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_balance_item);
-
- bctl->fs_info = tree_root->fs_info;
- bctl->flags = btrfs_balance_flags(leaf, item) | BTRFS_BALANCE_RESUME;
-
- btrfs_balance_data(leaf, item, &disk_bargs);
- btrfs_disk_balance_args_to_cpu(&bctl->data, &disk_bargs);
- btrfs_balance_meta(leaf, item, &disk_bargs);
- btrfs_disk_balance_args_to_cpu(&bctl->meta, &disk_bargs);
- btrfs_balance_sys(leaf, item, &disk_bargs);
- btrfs_disk_balance_args_to_cpu(&bctl->sys, &disk_bargs);
-
- tsk = kthread_run(balance_kthread, bctl, "btrfs-balance");
- if (IS_ERR(tsk))
- ret = PTR_ERR(tsk);
- else
- goto out;
-
-out_bctl:
- kfree(bctl);
-out:
- btrfs_free_path(path);
- return ret;
-}
-
-int btrfs_pause_balance(struct btrfs_fs_info *fs_info)
-{
- int ret = 0;
-
- mutex_lock(&fs_info->balance_mutex);
- if (!fs_info->balance_ctl) {
- mutex_unlock(&fs_info->balance_mutex);
- return -ENOTCONN;
- }
-
- if (atomic_read(&fs_info->balance_running)) {
- atomic_inc(&fs_info->balance_pause_req);
- mutex_unlock(&fs_info->balance_mutex);
-
- wait_event(fs_info->balance_wait_q,
- atomic_read(&fs_info->balance_running) == 0);
-
- mutex_lock(&fs_info->balance_mutex);
- /* we are good with balance_ctl ripped off from under us */
- BUG_ON(atomic_read(&fs_info->balance_running));
- atomic_dec(&fs_info->balance_pause_req);
- } else {
- ret = -ENOTCONN;
- }
-
- mutex_unlock(&fs_info->balance_mutex);
- return ret;
-}
-
-int btrfs_cancel_balance(struct btrfs_fs_info *fs_info)
-{
- mutex_lock(&fs_info->balance_mutex);
- if (!fs_info->balance_ctl) {
- mutex_unlock(&fs_info->balance_mutex);
- return -ENOTCONN;
- }
-
- atomic_inc(&fs_info->balance_cancel_req);
- /*
- * if we are running just wait and return, balance item is
- * deleted in btrfs_balance in this case
- */
- if (atomic_read(&fs_info->balance_running)) {
- mutex_unlock(&fs_info->balance_mutex);
- wait_event(fs_info->balance_wait_q,
- atomic_read(&fs_info->balance_running) == 0);
- mutex_lock(&fs_info->balance_mutex);
- } else {
- /* __cancel_balance needs volume_mutex */
- mutex_unlock(&fs_info->balance_mutex);
- mutex_lock(&fs_info->volume_mutex);
- mutex_lock(&fs_info->balance_mutex);
-
- if (fs_info->balance_ctl)
- __cancel_balance(fs_info);
-
- mutex_unlock(&fs_info->volume_mutex);
- }
-
- BUG_ON(fs_info->balance_ctl || atomic_read(&fs_info->balance_running));
- atomic_dec(&fs_info->balance_cancel_req);
- mutex_unlock(&fs_info->balance_mutex);
- return 0;
-}
-
-/*
- * shrinking a device means finding all of the device extents past
- * the new size, and then following the back refs to the chunks.
- * The chunk relocation code actually frees the device extent
- */
-int btrfs_shrink_device(struct btrfs_device *device, u64 new_size)
-{
- struct btrfs_trans_handle *trans;
- struct btrfs_root *root = device->dev_root;
- struct btrfs_dev_extent *dev_extent = NULL;
- struct btrfs_path *path;
- u64 length;
- u64 chunk_tree;
- u64 chunk_objectid;
- u64 chunk_offset;
- int ret;
- int slot;
- int failed = 0;
- bool retried = false;
- struct extent_buffer *l;
- struct btrfs_key key;
- struct btrfs_super_block *super_copy = root->fs_info->super_copy;
- u64 old_total = btrfs_super_total_bytes(super_copy);
- u64 old_size = device->total_bytes;
- u64 diff = device->total_bytes - new_size;
-
- if (new_size >= device->total_bytes)
- return -EINVAL;
-
- path = btrfs_alloc_path();
- if (!path)
- return -ENOMEM;
-
- path->reada = 2;
-
- lock_chunks(root);
-
- device->total_bytes = new_size;
- if (device->writeable) {
- device->fs_devices->total_rw_bytes -= diff;
- spin_lock(&root->fs_info->free_chunk_lock);
- root->fs_info->free_chunk_space -= diff;
- spin_unlock(&root->fs_info->free_chunk_lock);
- }
- unlock_chunks(root);
-
-again:
- key.objectid = device->devid;
- key.offset = (u64)-1;
- key.type = BTRFS_DEV_EXTENT_KEY;
-
- do {
- ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
- if (ret < 0)
- goto done;
-
- ret = btrfs_previous_item(root, path, 0, key.type);
- if (ret < 0)
- goto done;
- if (ret) {
- ret = 0;
- btrfs_release_path(path);
- break;
- }
-
- l = path->nodes[0];
- slot = path->slots[0];
- btrfs_item_key_to_cpu(l, &key, path->slots[0]);
-
- if (key.objectid != device->devid) {
- btrfs_release_path(path);
- break;
- }
-
- dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent);
- length = btrfs_dev_extent_length(l, dev_extent);
-
- if (key.offset + length <= new_size) {
- btrfs_release_path(path);
- break;
- }
-
- chunk_tree = btrfs_dev_extent_chunk_tree(l, dev_extent);
- chunk_objectid = btrfs_dev_extent_chunk_objectid(l, dev_extent);
- chunk_offset = btrfs_dev_extent_chunk_offset(l, dev_extent);
- btrfs_release_path(path);
-
- ret = btrfs_relocate_chunk(root, chunk_tree, chunk_objectid,
- chunk_offset);
- if (ret && ret != -ENOSPC)
- goto done;
- if (ret == -ENOSPC)
- failed++;
- } while (key.offset-- > 0);
-
- if (failed && !retried) {
- failed = 0;
- retried = true;
- goto again;
- } else if (failed && retried) {
- ret = -ENOSPC;
- lock_chunks(root);
-
- device->total_bytes = old_size;
- if (device->writeable)
- device->fs_devices->total_rw_bytes += diff;
- spin_lock(&root->fs_info->free_chunk_lock);
- root->fs_info->free_chunk_space += diff;
- spin_unlock(&root->fs_info->free_chunk_lock);
- unlock_chunks(root);
- goto done;
- }
-
- /* Shrinking succeeded, else we would be at "done". */
- trans = btrfs_start_transaction(root, 0);
- if (IS_ERR(trans)) {
- ret = PTR_ERR(trans);
- goto done;
- }
-
- lock_chunks(root);
-
- device->disk_total_bytes = new_size;
- /* Now btrfs_update_device() will change the on-disk size. */
- ret = btrfs_update_device(trans, device);
- if (ret) {
- unlock_chunks(root);
- btrfs_end_transaction(trans, root);
- goto done;
- }
- WARN_ON(diff > old_total);
- btrfs_set_super_total_bytes(super_copy, old_total - diff);
- unlock_chunks(root);
- btrfs_end_transaction(trans, root);
-done:
- btrfs_free_path(path);
- return ret;
-}
-
-static int btrfs_add_system_chunk(struct btrfs_root *root,
- struct btrfs_key *key,
- struct btrfs_chunk *chunk, int item_size)
-{
- struct btrfs_super_block *super_copy = root->fs_info->super_copy;
- struct btrfs_disk_key disk_key;
- u32 array_size;
- u8 *ptr;
-
- array_size = btrfs_super_sys_array_size(super_copy);
- if (array_size + item_size > BTRFS_SYSTEM_CHUNK_ARRAY_SIZE)
- return -EFBIG;
-
- ptr = super_copy->sys_chunk_array + array_size;
- btrfs_cpu_key_to_disk(&disk_key, key);
- memcpy(ptr, &disk_key, sizeof(disk_key));
- ptr += sizeof(disk_key);
- memcpy(ptr, chunk, item_size);
- item_size += sizeof(disk_key);
- btrfs_set_super_sys_array_size(super_copy, array_size + item_size);
- return 0;
-}
-
-/*
- * sort the devices in descending order by max_avail, total_avail
- */
-static int btrfs_cmp_device_info(const void *a, const void *b)
-{
- const struct btrfs_device_info *di_a = a;
- const struct btrfs_device_info *di_b = b;
-
- if (di_a->max_avail > di_b->max_avail)
- return -1;
- if (di_a->max_avail < di_b->max_avail)
- return 1;
- if (di_a->total_avail > di_b->total_avail)
- return -1;
- if (di_a->total_avail < di_b->total_avail)
- return 1;
- return 0;
-}
-
-static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
- struct btrfs_root *extent_root,
- struct map_lookup **map_ret,
- u64 *num_bytes_out, u64 *stripe_size_out,
- u64 start, u64 type)
-{
- struct btrfs_fs_info *info = extent_root->fs_info;
- struct btrfs_fs_devices *fs_devices = info->fs_devices;
- struct list_head *cur;
- struct map_lookup *map = NULL;
- struct extent_map_tree *em_tree;
- struct extent_map *em;
- struct btrfs_device_info *devices_info = NULL;
- u64 total_avail;
- int num_stripes; /* total number of stripes to allocate */
- int sub_stripes; /* sub_stripes info for map */
- int dev_stripes; /* stripes per dev */
- int devs_max; /* max devs to use */
- int devs_min; /* min devs needed */
- int devs_increment; /* ndevs has to be a multiple of this */
- int ncopies; /* how many copies to data has */
- int ret;
- u64 max_stripe_size;
- u64 max_chunk_size;
- u64 stripe_size;
- u64 num_bytes;
- int ndevs;
- int i;
- int j;
-
- BUG_ON(!alloc_profile_is_valid(type, 0));
-
- if (list_empty(&fs_devices->alloc_list))
- return -ENOSPC;
-
- sub_stripes = 1;
- dev_stripes = 1;
- devs_increment = 1;
- ncopies = 1;
- devs_max = 0; /* 0 == as many as possible */
- devs_min = 1;
-
- /*
- * define the properties of each RAID type.
- * FIXME: move this to a global table and use it in all RAID
- * calculation code
- */
- if (type & (BTRFS_BLOCK_GROUP_DUP)) {
- dev_stripes = 2;
- ncopies = 2;
- devs_max = 1;
- } else if (type & (BTRFS_BLOCK_GROUP_RAID0)) {
- devs_min = 2;
- } else if (type & (BTRFS_BLOCK_GROUP_RAID1)) {
- devs_increment = 2;
- ncopies = 2;
- devs_max = 2;
- devs_min = 2;
- } else if (type & (BTRFS_BLOCK_GROUP_RAID10)) {
- sub_stripes = 2;
- devs_increment = 2;
- ncopies = 2;
- devs_min = 4;
- } else {
- devs_max = 1;
- }
-
- if (type & BTRFS_BLOCK_GROUP_DATA) {
- max_stripe_size = 1024 * 1024 * 1024;
- max_chunk_size = 10 * max_stripe_size;
- } else if (type & BTRFS_BLOCK_GROUP_METADATA) {
- /* for larger filesystems, use larger metadata chunks */
- if (fs_devices->total_rw_bytes > 50ULL * 1024 * 1024 * 1024)
- max_stripe_size = 1024 * 1024 * 1024;
- else
- max_stripe_size = 256 * 1024 * 1024;
- max_chunk_size = max_stripe_size;
- } else if (type & BTRFS_BLOCK_GROUP_SYSTEM) {
- max_stripe_size = 32 * 1024 * 1024;
- max_chunk_size = 2 * max_stripe_size;
- } else {
- printk(KERN_ERR "btrfs: invalid chunk type 0x%llx requested\n",
- type);
- BUG_ON(1);
- }
-
- /* we don't want a chunk larger than 10% of writeable space */
- max_chunk_size = min(div_factor(fs_devices->total_rw_bytes, 1),
- max_chunk_size);
-
- devices_info = kzalloc(sizeof(*devices_info) * fs_devices->rw_devices,
- GFP_NOFS);
- if (!devices_info)
- return -ENOMEM;
-
- cur = fs_devices->alloc_list.next;
-
- /*
- * in the first pass through the devices list, we gather information
- * about the available holes on each device.
- */
- ndevs = 0;
- while (cur != &fs_devices->alloc_list) {
- struct btrfs_device *device;
- u64 max_avail;
- u64 dev_offset;
-
- device = list_entry(cur, struct btrfs_device, dev_alloc_list);
-
- cur = cur->next;
-
- if (!device->writeable) {
- printk(KERN_ERR
- "btrfs: read-only device in alloc_list\n");
- WARN_ON(1);
- continue;
- }
-
- if (!device->in_fs_metadata)
- continue;
-
- if (device->total_bytes > device->bytes_used)
- total_avail = device->total_bytes - device->bytes_used;
- else
- total_avail = 0;
-
- /* If there is no space on this device, skip it. */
- if (total_avail == 0)
- continue;
-
- ret = find_free_dev_extent(device,
- max_stripe_size * dev_stripes,
- &dev_offset, &max_avail);
- if (ret && ret != -ENOSPC)
- goto error;
-
- if (ret == 0)
- max_avail = max_stripe_size * dev_stripes;
-
- if (max_avail < BTRFS_STRIPE_LEN * dev_stripes)
- continue;
-
- devices_info[ndevs].dev_offset = dev_offset;
- devices_info[ndevs].max_avail = max_avail;
- devices_info[ndevs].total_avail = total_avail;
- devices_info[ndevs].dev = device;
- ++ndevs;
- }
-
- /*
- * now sort the devices by hole size / available space
- */
- sort(devices_info, ndevs, sizeof(struct btrfs_device_info),
- btrfs_cmp_device_info, NULL);
-
- /* round down to number of usable stripes */
- ndevs -= ndevs % devs_increment;
-
- if (ndevs < devs_increment * sub_stripes || ndevs < devs_min) {
- ret = -ENOSPC;
- goto error;
- }
-
- if (devs_max && ndevs > devs_max)
- ndevs = devs_max;
- /*
- * the primary goal is to maximize the number of stripes, so use as many
- * devices as possible, even if the stripes are not maximum sized.
- */
- stripe_size = devices_info[ndevs-1].max_avail;
- num_stripes = ndevs * dev_stripes;
-
- if (stripe_size * ndevs > max_chunk_size * ncopies) {
- stripe_size = max_chunk_size * ncopies;
- do_div(stripe_size, ndevs);
- }
-
- do_div(stripe_size, dev_stripes);
-
- /* align to BTRFS_STRIPE_LEN */
- do_div(stripe_size, BTRFS_STRIPE_LEN);
- stripe_size *= BTRFS_STRIPE_LEN;
-
- map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS);
- if (!map) {
- ret = -ENOMEM;
- goto error;
- }
- map->num_stripes = num_stripes;
-
- for (i = 0; i < ndevs; ++i) {
- for (j = 0; j < dev_stripes; ++j) {
- int s = i * dev_stripes + j;
- map->stripes[s].dev = devices_info[i].dev;
- map->stripes[s].physical = devices_info[i].dev_offset +
- j * stripe_size;
- }
- }
- map->sector_size = extent_root->sectorsize;
- map->stripe_len = BTRFS_STRIPE_LEN;
- map->io_align = BTRFS_STRIPE_LEN;
- map->io_width = BTRFS_STRIPE_LEN;
- map->type = type;
- map->sub_stripes = sub_stripes;
-
- *map_ret = map;
- num_bytes = stripe_size * (num_stripes / ncopies);
-
- *stripe_size_out = stripe_size;
- *num_bytes_out = num_bytes;
-
- trace_btrfs_chunk_alloc(info->chunk_root, map, start, num_bytes);
-
- em = alloc_extent_map();
- if (!em) {
- ret = -ENOMEM;
- goto error;
- }
- em->bdev = (struct block_device *)map;
- em->start = start;
- em->len = num_bytes;
- em->block_start = 0;
- em->block_len = em->len;
-
- em_tree = &extent_root->fs_info->mapping_tree.map_tree;
- write_lock(&em_tree->lock);
- ret = add_extent_mapping(em_tree, em);
- write_unlock(&em_tree->lock);
- free_extent_map(em);
- if (ret)
- goto error;
-
- ret = btrfs_make_block_group(trans, extent_root, 0, type,
- BTRFS_FIRST_CHUNK_TREE_OBJECTID,
- start, num_bytes);
- if (ret)
- goto error;
-
- for (i = 0; i < map->num_stripes; ++i) {
- struct btrfs_device *device;
- u64 dev_offset;
-
- device = map->stripes[i].dev;
- dev_offset = map->stripes[i].physical;
-
- ret = btrfs_alloc_dev_extent(trans, device,
- info->chunk_root->root_key.objectid,
- BTRFS_FIRST_CHUNK_TREE_OBJECTID,
- start, dev_offset, stripe_size);
- if (ret) {
- btrfs_abort_transaction(trans, extent_root, ret);
- goto error;
- }
- }
-
- kfree(devices_info);
- return 0;
-
-error:
- kfree(map);
- kfree(devices_info);
- return ret;
-}
-
-static int __finish_chunk_alloc(struct btrfs_trans_handle *trans,
- struct btrfs_root *extent_root,
- struct map_lookup *map, u64 chunk_offset,
- u64 chunk_size, u64 stripe_size)
-{
- u64 dev_offset;
- struct btrfs_key key;
- struct btrfs_root *chunk_root = extent_root->fs_info->chunk_root;
- struct btrfs_device *device;
- struct btrfs_chunk *chunk;
- struct btrfs_stripe *stripe;
- size_t item_size = btrfs_chunk_item_size(map->num_stripes);
- int index = 0;
- int ret;
-
- chunk = kzalloc(item_size, GFP_NOFS);
- if (!chunk)
- return -ENOMEM;
-
- index = 0;
- while (index < map->num_stripes) {
- device = map->stripes[index].dev;
- device->bytes_used += stripe_size;
- ret = btrfs_update_device(trans, device);
- if (ret)
- goto out_free;
- index++;
- }
-
- spin_lock(&extent_root->fs_info->free_chunk_lock);
- extent_root->fs_info->free_chunk_space -= (stripe_size *
- map->num_stripes);
- spin_unlock(&extent_root->fs_info->free_chunk_lock);
-
- index = 0;
- stripe = &chunk->stripe;
- while (index < map->num_stripes) {
- device = map->stripes[index].dev;
- dev_offset = map->stripes[index].physical;
-
- btrfs_set_stack_stripe_devid(stripe, device->devid);
- btrfs_set_stack_stripe_offset(stripe, dev_offset);
- memcpy(stripe->dev_uuid, device->uuid, BTRFS_UUID_SIZE);
- stripe++;
- index++;
- }
-
- btrfs_set_stack_chunk_length(chunk, chunk_size);
- btrfs_set_stack_chunk_owner(chunk, extent_root->root_key.objectid);
- btrfs_set_stack_chunk_stripe_len(chunk, map->stripe_len);
- btrfs_set_stack_chunk_type(chunk, map->type);
- btrfs_set_stack_chunk_num_stripes(chunk, map->num_stripes);
- btrfs_set_stack_chunk_io_align(chunk, map->stripe_len);
- btrfs_set_stack_chunk_io_width(chunk, map->stripe_len);
- btrfs_set_stack_chunk_sector_size(chunk, extent_root->sectorsize);
- btrfs_set_stack_chunk_sub_stripes(chunk, map->sub_stripes);
-
- key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
- key.type = BTRFS_CHUNK_ITEM_KEY;
- key.offset = chunk_offset;
-
- ret = btrfs_insert_item(trans, chunk_root, &key, chunk, item_size);
-
- if (ret == 0 && map->type & BTRFS_BLOCK_GROUP_SYSTEM) {
- /*
- * TODO: Cleanup of inserted chunk root in case of
- * failure.
- */
- ret = btrfs_add_system_chunk(chunk_root, &key, chunk,
- item_size);
- }
-
-out_free:
- kfree(chunk);
- return ret;
-}
-
-/*
- * Chunk allocation falls into two parts. The first part does works
- * that make the new allocated chunk useable, but not do any operation
- * that modifies the chunk tree. The second part does the works that
- * require modifying the chunk tree. This division is important for the
- * bootstrap process of adding storage to a seed btrfs.
- */
-int btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
- struct btrfs_root *extent_root, u64 type)
-{
- u64 chunk_offset;
- u64 chunk_size;
- u64 stripe_size;
- struct map_lookup *map;
- struct btrfs_root *chunk_root = extent_root->fs_info->chunk_root;
- int ret;
-
- ret = find_next_chunk(chunk_root, BTRFS_FIRST_CHUNK_TREE_OBJECTID,
- &chunk_offset);
- if (ret)
- return ret;
-
- ret = __btrfs_alloc_chunk(trans, extent_root, &map, &chunk_size,
- &stripe_size, chunk_offset, type);
- if (ret)
- return ret;
-
- ret = __finish_chunk_alloc(trans, extent_root, map, chunk_offset,
- chunk_size, stripe_size);
- if (ret)
- return ret;
- return 0;
-}
-
-static noinline int init_first_rw_device(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_device *device)
-{
- u64 chunk_offset;
- u64 sys_chunk_offset;
- u64 chunk_size;
- u64 sys_chunk_size;
- u64 stripe_size;
- u64 sys_stripe_size;
- u64 alloc_profile;
- struct map_lookup *map;
- struct map_lookup *sys_map;
- struct btrfs_fs_info *fs_info = root->fs_info;
- struct btrfs_root *extent_root = fs_info->extent_root;
- int ret;
-
- ret = find_next_chunk(fs_info->chunk_root,
- BTRFS_FIRST_CHUNK_TREE_OBJECTID, &chunk_offset);
- if (ret)
- return ret;
-
- alloc_profile = BTRFS_BLOCK_GROUP_METADATA |
- fs_info->avail_metadata_alloc_bits;
- alloc_profile = btrfs_reduce_alloc_profile(root, alloc_profile);
-
- ret = __btrfs_alloc_chunk(trans, extent_root, &map, &chunk_size,
- &stripe_size, chunk_offset, alloc_profile);
- if (ret)
- return ret;
-
- sys_chunk_offset = chunk_offset + chunk_size;
-
- alloc_profile = BTRFS_BLOCK_GROUP_SYSTEM |
- fs_info->avail_system_alloc_bits;
- alloc_profile = btrfs_reduce_alloc_profile(root, alloc_profile);
-
- ret = __btrfs_alloc_chunk(trans, extent_root, &sys_map,
- &sys_chunk_size, &sys_stripe_size,
- sys_chunk_offset, alloc_profile);
- if (ret)
- goto abort;
-
- ret = btrfs_add_device(trans, fs_info->chunk_root, device);
- if (ret)
- goto abort;
-
- /*
- * Modifying chunk tree needs allocating new blocks from both
- * system block group and metadata block group. So we only can
- * do operations require modifying the chunk tree after both
- * block groups were created.
- */
- ret = __finish_chunk_alloc(trans, extent_root, map, chunk_offset,
- chunk_size, stripe_size);
- if (ret)
- goto abort;
-
- ret = __finish_chunk_alloc(trans, extent_root, sys_map,
- sys_chunk_offset, sys_chunk_size,
- sys_stripe_size);
- if (ret)
- goto abort;
-
- return 0;
-
-abort:
- btrfs_abort_transaction(trans, root, ret);
- return ret;
-}
-
-int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset)
-{
- struct extent_map *em;
- struct map_lookup *map;
- struct btrfs_mapping_tree *map_tree = &root->fs_info->mapping_tree;
- int readonly = 0;
- int i;
-
- read_lock(&map_tree->map_tree.lock);
- em = lookup_extent_mapping(&map_tree->map_tree, chunk_offset, 1);
- read_unlock(&map_tree->map_tree.lock);
- if (!em)
- return 1;
-
- if (btrfs_test_opt(root, DEGRADED)) {
- free_extent_map(em);
- return 0;
- }
-
- map = (struct map_lookup *)em->bdev;
- for (i = 0; i < map->num_stripes; i++) {
- if (!map->stripes[i].dev->writeable) {
- readonly = 1;
- break;
- }
- }
- free_extent_map(em);
- return readonly;
-}
-
-void btrfs_mapping_init(struct btrfs_mapping_tree *tree)
-{
- extent_map_tree_init(&tree->map_tree);
-}
-
-void btrfs_mapping_tree_free(struct btrfs_mapping_tree *tree)
-{
- struct extent_map *em;
-
- while (1) {
- write_lock(&tree->map_tree.lock);
- em = lookup_extent_mapping(&tree->map_tree, 0, (u64)-1);
- if (em)
- remove_extent_mapping(&tree->map_tree, em);
- write_unlock(&tree->map_tree.lock);
- if (!em)
- break;
- kfree(em->bdev);
- /* once for us */
- free_extent_map(em);
- /* once for the tree */
- free_extent_map(em);
- }
-}
-
-int btrfs_num_copies(struct btrfs_mapping_tree *map_tree, u64 logical, u64 len)
-{
- struct extent_map *em;
- struct map_lookup *map;
- struct extent_map_tree *em_tree = &map_tree->map_tree;
- int ret;
-
- read_lock(&em_tree->lock);
- em = lookup_extent_mapping(em_tree, logical, len);
- read_unlock(&em_tree->lock);
- BUG_ON(!em);
-
- BUG_ON(em->start > logical || em->start + em->len < logical);
- map = (struct map_lookup *)em->bdev;
- if (map->type & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1))
- ret = map->num_stripes;
- else if (map->type & BTRFS_BLOCK_GROUP_RAID10)
- ret = map->sub_stripes;
- else
- ret = 1;
- free_extent_map(em);
- return ret;
-}
-
-static int find_live_mirror(struct map_lookup *map, int first, int num,
- int optimal)
-{
- int i;
- if (map->stripes[optimal].dev->bdev)
- return optimal;
- for (i = first; i < first + num; i++) {
- if (map->stripes[i].dev->bdev)
- return i;
- }
- /* we couldn't find one that doesn't fail. Just return something
- * and the io error handling code will clean up eventually
- */
- return optimal;
-}
-
-static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw,
- u64 logical, u64 *length,
- struct btrfs_bio **bbio_ret,
- int mirror_num)
-{
- struct extent_map *em;
- struct map_lookup *map;
- struct extent_map_tree *em_tree = &map_tree->map_tree;
- u64 offset;
- u64 stripe_offset;
- u64 stripe_end_offset;
- u64 stripe_nr;
- u64 stripe_nr_orig;
- u64 stripe_nr_end;
- int stripe_index;
- int i;
- int ret = 0;
- int num_stripes;
- int max_errors = 0;
- struct btrfs_bio *bbio = NULL;
-
- read_lock(&em_tree->lock);
- em = lookup_extent_mapping(em_tree, logical, *length);
- read_unlock(&em_tree->lock);
-
- if (!em) {
- printk(KERN_CRIT "unable to find logical %llu len %llu\n",
- (unsigned long long)logical,
- (unsigned long long)*length);
- BUG();
- }
-
- BUG_ON(em->start > logical || em->start + em->len < logical);
- map = (struct map_lookup *)em->bdev;
- offset = logical - em->start;
-
- if (mirror_num > map->num_stripes)
- mirror_num = 0;
-
- stripe_nr = offset;
- /*
- * stripe_nr counts the total number of stripes we have to stride
- * to get to this block
- */
- do_div(stripe_nr, map->stripe_len);
-
- stripe_offset = stripe_nr * map->stripe_len;
- BUG_ON(offset < stripe_offset);
-
- /* stripe_offset is the offset of this block in its stripe*/
- stripe_offset = offset - stripe_offset;
-
- if (rw & REQ_DISCARD)
- *length = min_t(u64, em->len - offset, *length);
- else if (map->type & BTRFS_BLOCK_GROUP_PROFILE_MASK) {
- /* we limit the length of each bio to what fits in a stripe */
- *length = min_t(u64, em->len - offset,
- map->stripe_len - stripe_offset);
- } else {
- *length = em->len - offset;
- }
-
- if (!bbio_ret)
- goto out;
-
- num_stripes = 1;
- stripe_index = 0;
- stripe_nr_orig = stripe_nr;
- stripe_nr_end = (offset + *length + map->stripe_len - 1) &
- (~(map->stripe_len - 1));
- do_div(stripe_nr_end, map->stripe_len);
- stripe_end_offset = stripe_nr_end * map->stripe_len -
- (offset + *length);
- if (map->type & BTRFS_BLOCK_GROUP_RAID0) {
- if (rw & REQ_DISCARD)
- num_stripes = min_t(u64, map->num_stripes,
- stripe_nr_end - stripe_nr_orig);
- stripe_index = do_div(stripe_nr, map->num_stripes);
- } else if (map->type & BTRFS_BLOCK_GROUP_RAID1) {
- if (rw & (REQ_WRITE | REQ_DISCARD))
- num_stripes = map->num_stripes;
- else if (mirror_num)
- stripe_index = mirror_num - 1;
- else {
- stripe_index = find_live_mirror(map, 0,
- map->num_stripes,
- current->pid % map->num_stripes);
- mirror_num = stripe_index + 1;
- }
-
- } else if (map->type & BTRFS_BLOCK_GROUP_DUP) {
- if (rw & (REQ_WRITE | REQ_DISCARD)) {
- num_stripes = map->num_stripes;
- } else if (mirror_num) {
- stripe_index = mirror_num - 1;
- } else {
- mirror_num = 1;
- }
-
- } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) {
- int factor = map->num_stripes / map->sub_stripes;
-
- stripe_index = do_div(stripe_nr, factor);
- stripe_index *= map->sub_stripes;
-
- if (rw & REQ_WRITE)
- num_stripes = map->sub_stripes;
- else if (rw & REQ_DISCARD)
- num_stripes = min_t(u64, map->sub_stripes *
- (stripe_nr_end - stripe_nr_orig),
- map->num_stripes);
- else if (mirror_num)
- stripe_index += mirror_num - 1;
- else {
- int old_stripe_index = stripe_index;
- stripe_index = find_live_mirror(map, stripe_index,
- map->sub_stripes, stripe_index +
- current->pid % map->sub_stripes);
- mirror_num = stripe_index - old_stripe_index + 1;
- }
- } else {
- /*
- * after this do_div call, stripe_nr is the number of stripes
- * on this device we have to walk to find the data, and
- * stripe_index is the number of our device in the stripe array
- */
- stripe_index = do_div(stripe_nr, map->num_stripes);
- mirror_num = stripe_index + 1;
- }
- BUG_ON(stripe_index >= map->num_stripes);
-
- bbio = kzalloc(btrfs_bio_size(num_stripes), GFP_NOFS);
- if (!bbio) {
- ret = -ENOMEM;
- goto out;
- }
- atomic_set(&bbio->error, 0);
-
- if (rw & REQ_DISCARD) {
- int factor = 0;
- int sub_stripes = 0;
- u64 stripes_per_dev = 0;
- u32 remaining_stripes = 0;
- u32 last_stripe = 0;
-
- if (map->type &
- (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID10)) {
- if (map->type & BTRFS_BLOCK_GROUP_RAID0)
- sub_stripes = 1;
- else
- sub_stripes = map->sub_stripes;
-
- factor = map->num_stripes / sub_stripes;
- stripes_per_dev = div_u64_rem(stripe_nr_end -
- stripe_nr_orig,
- factor,
- &remaining_stripes);
- div_u64_rem(stripe_nr_end - 1, factor, &last_stripe);
- last_stripe *= sub_stripes;
- }
-
- for (i = 0; i < num_stripes; i++) {
- bbio->stripes[i].physical =
- map->stripes[stripe_index].physical +
- stripe_offset + stripe_nr * map->stripe_len;
- bbio->stripes[i].dev = map->stripes[stripe_index].dev;
-
- if (map->type & (BTRFS_BLOCK_GROUP_RAID0 |
- BTRFS_BLOCK_GROUP_RAID10)) {
- bbio->stripes[i].length = stripes_per_dev *
- map->stripe_len;
-
- if (i / sub_stripes < remaining_stripes)
- bbio->stripes[i].length +=
- map->stripe_len;
-
- /*
- * Special for the first stripe and
- * the last stripe:
- *
- * |-------|...|-------|
- * |----------|
- * off end_off
- */
- if (i < sub_stripes)
- bbio->stripes[i].length -=
- stripe_offset;
-
- if (stripe_index >= last_stripe &&
- stripe_index <= (last_stripe +
- sub_stripes - 1))
- bbio->stripes[i].length -=
- stripe_end_offset;
-
- if (i == sub_stripes - 1)
- stripe_offset = 0;
- } else
- bbio->stripes[i].length = *length;
-
- stripe_index++;
- if (stripe_index == map->num_stripes) {
- /* This could only happen for RAID0/10 */
- stripe_index = 0;
- stripe_nr++;
- }
- }
- } else {
- for (i = 0; i < num_stripes; i++) {
- bbio->stripes[i].physical =
- map->stripes[stripe_index].physical +
- stripe_offset +
- stripe_nr * map->stripe_len;
- bbio->stripes[i].dev =
- map->stripes[stripe_index].dev;
- stripe_index++;
- }
- }
-
- if (rw & REQ_WRITE) {
- if (map->type & (BTRFS_BLOCK_GROUP_RAID1 |
- BTRFS_BLOCK_GROUP_RAID10 |
- BTRFS_BLOCK_GROUP_DUP)) {
- max_errors = 1;
- }
- }
-
- *bbio_ret = bbio;
- bbio->num_stripes = num_stripes;
- bbio->max_errors = max_errors;
- bbio->mirror_num = mirror_num;
-out:
- free_extent_map(em);
- return ret;
-}
-
-int btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw,
- u64 logical, u64 *length,
- struct btrfs_bio **bbio_ret, int mirror_num)
-{
- return __btrfs_map_block(map_tree, rw, logical, length, bbio_ret,
- mirror_num);
-}
-
-int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree,
- u64 chunk_start, u64 physical, u64 devid,
- u64 **logical, int *naddrs, int *stripe_len)
-{
- struct extent_map_tree *em_tree = &map_tree->map_tree;
- struct extent_map *em;
- struct map_lookup *map;
- u64 *buf;
- u64 bytenr;
- u64 length;
- u64 stripe_nr;
- int i, j, nr = 0;
-
- read_lock(&em_tree->lock);
- em = lookup_extent_mapping(em_tree, chunk_start, 1);
- read_unlock(&em_tree->lock);
-
- BUG_ON(!em || em->start != chunk_start);
- map = (struct map_lookup *)em->bdev;
-
- length = em->len;
- if (map->type & BTRFS_BLOCK_GROUP_RAID10)
- do_div(length, map->num_stripes / map->sub_stripes);
- else if (map->type & BTRFS_BLOCK_GROUP_RAID0)
- do_div(length, map->num_stripes);
-
- buf = kzalloc(sizeof(u64) * map->num_stripes, GFP_NOFS);
- BUG_ON(!buf); /* -ENOMEM */
-
- for (i = 0; i < map->num_stripes; i++) {
- if (devid && map->stripes[i].dev->devid != devid)
- continue;
- if (map->stripes[i].physical > physical ||
- map->stripes[i].physical + length <= physical)
- continue;
-
- stripe_nr = physical - map->stripes[i].physical;
- do_div(stripe_nr, map->stripe_len);
-
- if (map->type & BTRFS_BLOCK_GROUP_RAID10) {
- stripe_nr = stripe_nr * map->num_stripes + i;
- do_div(stripe_nr, map->sub_stripes);
- } else if (map->type & BTRFS_BLOCK_GROUP_RAID0) {
- stripe_nr = stripe_nr * map->num_stripes + i;
- }
- bytenr = chunk_start + stripe_nr * map->stripe_len;
- WARN_ON(nr >= map->num_stripes);
- for (j = 0; j < nr; j++) {
- if (buf[j] == bytenr)
- break;
- }
- if (j == nr) {
- WARN_ON(nr >= map->num_stripes);
- buf[nr++] = bytenr;
- }
- }
-
- *logical = buf;
- *naddrs = nr;
- *stripe_len = map->stripe_len;
-
- free_extent_map(em);
- return 0;
-}
-
-static void btrfs_end_bio(struct bio *bio, int err)
-{
- struct btrfs_bio *bbio = bio->bi_private;
- int is_orig_bio = 0;
-
- if (err)
- atomic_inc(&bbio->error);
-
- if (bio == bbio->orig_bio)
- is_orig_bio = 1;
-
- if (atomic_dec_and_test(&bbio->stripes_pending)) {
- if (!is_orig_bio) {
- bio_put(bio);
- bio = bbio->orig_bio;
- }
- bio->bi_private = bbio->private;
- bio->bi_end_io = bbio->end_io;
- bio->bi_bdev = (struct block_device *)
- (unsigned long)bbio->mirror_num;
- /* only send an error to the higher layers if it is
- * beyond the tolerance of the multi-bio
- */
- if (atomic_read(&bbio->error) > bbio->max_errors) {
- err = -EIO;
- } else {
- /*
- * this bio is actually up to date, we didn't
- * go over the max number of errors
- */
- set_bit(BIO_UPTODATE, &bio->bi_flags);
- err = 0;
- }
- kfree(bbio);
-
- bio_endio(bio, err);
- } else if (!is_orig_bio) {
- bio_put(bio);
- }
-}
-
-struct async_sched {
- struct bio *bio;
- int rw;
- struct btrfs_fs_info *info;
- struct btrfs_work work;
-};
-
-/*
- * see run_scheduled_bios for a description of why bios are collected for
- * async submit.
- *
- * This will add one bio to the pending list for a device and make sure
- * the work struct is scheduled.
- */
-static noinline void schedule_bio(struct btrfs_root *root,
- struct btrfs_device *device,
- int rw, struct bio *bio)
-{
- int should_queue = 1;
- struct btrfs_pending_bios *pending_bios;
-
- /* don't bother with additional async steps for reads, right now */
- if (!(rw & REQ_WRITE)) {
- bio_get(bio);
- btrfsic_submit_bio(rw, bio);
- bio_put(bio);
- return;
- }
-
- /*
- * nr_async_bios allows us to reliably return congestion to the
- * higher layers. Otherwise, the async bio makes it appear we have
- * made progress against dirty pages when we've really just put it
- * on a queue for later
- */
- atomic_inc(&root->fs_info->nr_async_bios);
- WARN_ON(bio->bi_next);
- bio->bi_next = NULL;
- bio->bi_rw |= rw;
-
- spin_lock(&device->io_lock);
- if (bio->bi_rw & REQ_SYNC)
- pending_bios = &device->pending_sync_bios;
- else
- pending_bios = &device->pending_bios;
-
- if (pending_bios->tail)
- pending_bios->tail->bi_next = bio;
-
- pending_bios->tail = bio;
- if (!pending_bios->head)
- pending_bios->head = bio;
- if (device->running_pending)
- should_queue = 0;
-
- spin_unlock(&device->io_lock);
-
- if (should_queue)
- btrfs_queue_worker(&root->fs_info->submit_workers,
- &device->work);
-}
-
-int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
- int mirror_num, int async_submit)
-{
- struct btrfs_mapping_tree *map_tree;
- struct btrfs_device *dev;
- struct bio *first_bio = bio;
- u64 logical = (u64)bio->bi_sector << 9;
- u64 length = 0;
- u64 map_length;
- int ret;
- int dev_nr = 0;
- int total_devs = 1;
- struct btrfs_bio *bbio = NULL;
-
- length = bio->bi_size;
- map_tree = &root->fs_info->mapping_tree;
- map_length = length;
-
- ret = btrfs_map_block(map_tree, rw, logical, &map_length, &bbio,
- mirror_num);
- if (ret) /* -ENOMEM */
- return ret;
-
- total_devs = bbio->num_stripes;
- if (map_length < length) {
- printk(KERN_CRIT "mapping failed logical %llu bio len %llu "
- "len %llu\n", (unsigned long long)logical,
- (unsigned long long)length,
- (unsigned long long)map_length);
- BUG();
- }
-
- bbio->orig_bio = first_bio;
- bbio->private = first_bio->bi_private;
- bbio->end_io = first_bio->bi_end_io;
- atomic_set(&bbio->stripes_pending, bbio->num_stripes);
-
- while (dev_nr < total_devs) {
- if (dev_nr < total_devs - 1) {
- bio = bio_clone(first_bio, GFP_NOFS);
- BUG_ON(!bio); /* -ENOMEM */
- } else {
- bio = first_bio;
- }
- bio->bi_private = bbio;
- bio->bi_end_io = btrfs_end_bio;
- bio->bi_sector = bbio->stripes[dev_nr].physical >> 9;
- dev = bbio->stripes[dev_nr].dev;
- if (dev && dev->bdev && (rw != WRITE || dev->writeable)) {
- pr_debug("btrfs_map_bio: rw %d, secor=%llu, dev=%lu "
- "(%s id %llu), size=%u\n", rw,
- (u64)bio->bi_sector, (u_long)dev->bdev->bd_dev,
- dev->name, dev->devid, bio->bi_size);
- bio->bi_bdev = dev->bdev;
- if (async_submit)
- schedule_bio(root, dev, rw, bio);
- else
- btrfsic_submit_bio(rw, bio);
- } else {
- bio->bi_bdev = root->fs_info->fs_devices->latest_bdev;
- bio->bi_sector = logical >> 9;
- bio_endio(bio, -EIO);
- }
- dev_nr++;
- }
- return 0;
-}
-
-struct btrfs_device *btrfs_find_device(struct btrfs_root *root, u64 devid,
- u8 *uuid, u8 *fsid)
-{
- struct btrfs_device *device;
- struct btrfs_fs_devices *cur_devices;
-
- cur_devices = root->fs_info->fs_devices;
- while (cur_devices) {
- if (!fsid ||
- !memcmp(cur_devices->fsid, fsid, BTRFS_UUID_SIZE)) {
- device = __find_device(&cur_devices->devices,
- devid, uuid);
- if (device)
- return device;
- }
- cur_devices = cur_devices->seed;
- }
- return NULL;
-}
-
-static struct btrfs_device *add_missing_dev(struct btrfs_root *root,
- u64 devid, u8 *dev_uuid)
-{
- struct btrfs_device *device;
- struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices;
-
- device = kzalloc(sizeof(*device), GFP_NOFS);
- if (!device)
- return NULL;
- list_add(&device->dev_list,
- &fs_devices->devices);
- device->dev_root = root->fs_info->dev_root;
- device->devid = devid;
- device->work.func = pending_bios_fn;
- device->fs_devices = fs_devices;
- device->missing = 1;
- fs_devices->num_devices++;
- fs_devices->missing_devices++;
- spin_lock_init(&device->io_lock);
- INIT_LIST_HEAD(&device->dev_alloc_list);
- memcpy(device->uuid, dev_uuid, BTRFS_UUID_SIZE);
- return device;
-}
-
-static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key,
- struct extent_buffer *leaf,
- struct btrfs_chunk *chunk)
-{
- struct btrfs_mapping_tree *map_tree = &root->fs_info->mapping_tree;
- struct map_lookup *map;
- struct extent_map *em;
- u64 logical;
- u64 length;
- u64 devid;
- u8 uuid[BTRFS_UUID_SIZE];
- int num_stripes;
- int ret;
- int i;
-
- logical = key->offset;
- length = btrfs_chunk_length(leaf, chunk);
-
- read_lock(&map_tree->map_tree.lock);
- em = lookup_extent_mapping(&map_tree->map_tree, logical, 1);
- read_unlock(&map_tree->map_tree.lock);
-
- /* already mapped? */
- if (em && em->start <= logical && em->start + em->len > logical) {
- free_extent_map(em);
- return 0;
- } else if (em) {
- free_extent_map(em);
- }
-
- em = alloc_extent_map();
- if (!em)
- return -ENOMEM;
- num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
- map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS);
- if (!map) {
- free_extent_map(em);
- return -ENOMEM;
- }
-
- em->bdev = (struct block_device *)map;
- em->start = logical;
- em->len = length;
- em->block_start = 0;
- em->block_len = em->len;
-
- map->num_stripes = num_stripes;
- map->io_width = btrfs_chunk_io_width(leaf, chunk);
- map->io_align = btrfs_chunk_io_align(leaf, chunk);
- map->sector_size = btrfs_chunk_sector_size(leaf, chunk);
- map->stripe_len = btrfs_chunk_stripe_len(leaf, chunk);
- map->type = btrfs_chunk_type(leaf, chunk);
- map->sub_stripes = btrfs_chunk_sub_stripes(leaf, chunk);
- for (i = 0; i < num_stripes; i++) {
- map->stripes[i].physical =
- btrfs_stripe_offset_nr(leaf, chunk, i);
- devid = btrfs_stripe_devid_nr(leaf, chunk, i);
- read_extent_buffer(leaf, uuid, (unsigned long)
- btrfs_stripe_dev_uuid_nr(chunk, i),
- BTRFS_UUID_SIZE);
- map->stripes[i].dev = btrfs_find_device(root, devid, uuid,
- NULL);
- if (!map->stripes[i].dev && !btrfs_test_opt(root, DEGRADED)) {
- kfree(map);
- free_extent_map(em);
- return -EIO;
- }
- if (!map->stripes[i].dev) {
- map->stripes[i].dev =
- add_missing_dev(root, devid, uuid);
- if (!map->stripes[i].dev) {
- kfree(map);
- free_extent_map(em);
- return -EIO;
- }
- }
- map->stripes[i].dev->in_fs_metadata = 1;
- }
-
- write_lock(&map_tree->map_tree.lock);
- ret = add_extent_mapping(&map_tree->map_tree, em);
- write_unlock(&map_tree->map_tree.lock);
- BUG_ON(ret); /* Tree corruption */
- free_extent_map(em);
-
- return 0;
-}
-
-static void fill_device_from_item(struct extent_buffer *leaf,
- struct btrfs_dev_item *dev_item,
- struct btrfs_device *device)
-{
- unsigned long ptr;
-
- device->devid = btrfs_device_id(leaf, dev_item);
- device->disk_total_bytes = btrfs_device_total_bytes(leaf, dev_item);
- device->total_bytes = device->disk_total_bytes;
- device->bytes_used = btrfs_device_bytes_used(leaf, dev_item);
- device->type = btrfs_device_type(leaf, dev_item);
- device->io_align = btrfs_device_io_align(leaf, dev_item);
- device->io_width = btrfs_device_io_width(leaf, dev_item);
- device->sector_size = btrfs_device_sector_size(leaf, dev_item);
-
- ptr = (unsigned long)btrfs_device_uuid(dev_item);
- read_extent_buffer(leaf, device->uuid, ptr, BTRFS_UUID_SIZE);
-}
-
-static int open_seed_devices(struct btrfs_root *root, u8 *fsid)
-{
- struct btrfs_fs_devices *fs_devices;
- int ret;
-
- BUG_ON(!mutex_is_locked(&uuid_mutex));
-
- fs_devices = root->fs_info->fs_devices->seed;
- while (fs_devices) {
- if (!memcmp(fs_devices->fsid, fsid, BTRFS_UUID_SIZE)) {
- ret = 0;
- goto out;
- }
- fs_devices = fs_devices->seed;
- }
-
- fs_devices = find_fsid(fsid);
- if (!fs_devices) {
- ret = -ENOENT;
- goto out;
- }
-
- fs_devices = clone_fs_devices(fs_devices);
- if (IS_ERR(fs_devices)) {
- ret = PTR_ERR(fs_devices);
- goto out;
- }
-
- ret = __btrfs_open_devices(fs_devices, FMODE_READ,
- root->fs_info->bdev_holder);
- if (ret) {
- free_fs_devices(fs_devices);
- goto out;
- }
-
- if (!fs_devices->seeding) {
- __btrfs_close_devices(fs_devices);
- free_fs_devices(fs_devices);
- ret = -EINVAL;
- goto out;
- }
-
- fs_devices->seed = root->fs_info->fs_devices->seed;
- root->fs_info->fs_devices->seed = fs_devices;
-out:
- return ret;
-}
-
-static int read_one_dev(struct btrfs_root *root,
- struct extent_buffer *leaf,
- struct btrfs_dev_item *dev_item)
-{
- struct btrfs_device *device;
- u64 devid;
- int ret;
- u8 fs_uuid[BTRFS_UUID_SIZE];
- u8 dev_uuid[BTRFS_UUID_SIZE];
-
- devid = btrfs_device_id(leaf, dev_item);
- read_extent_buffer(leaf, dev_uuid,
- (unsigned long)btrfs_device_uuid(dev_item),
- BTRFS_UUID_SIZE);
- read_extent_buffer(leaf, fs_uuid,
- (unsigned long)btrfs_device_fsid(dev_item),
- BTRFS_UUID_SIZE);
-
- if (memcmp(fs_uuid, root->fs_info->fsid, BTRFS_UUID_SIZE)) {
- ret = open_seed_devices(root, fs_uuid);
- if (ret && !btrfs_test_opt(root, DEGRADED))
- return ret;
- }
-
- device = btrfs_find_device(root, devid, dev_uuid, fs_uuid);
- if (!device || !device->bdev) {
- if (!btrfs_test_opt(root, DEGRADED))
- return -EIO;
-
- if (!device) {
- printk(KERN_WARNING "warning devid %llu missing\n",
- (unsigned long long)devid);
- device = add_missing_dev(root, devid, dev_uuid);
- if (!device)
- return -ENOMEM;
- } else if (!device->missing) {
- /*
- * this happens when a device that was properly setup
- * in the device info lists suddenly goes bad.
- * device->bdev is NULL, and so we have to set
- * device->missing to one here
- */
- root->fs_info->fs_devices->missing_devices++;
- device->missing = 1;
- }
- }
-
- if (device->fs_devices != root->fs_info->fs_devices) {
- BUG_ON(device->writeable);
- if (device->generation !=
- btrfs_device_generation(leaf, dev_item))
- return -EINVAL;
- }
-
- fill_device_from_item(leaf, dev_item, device);
- device->dev_root = root->fs_info->dev_root;
- device->in_fs_metadata = 1;
- if (device->writeable) {
- device->fs_devices->total_rw_bytes += device->total_bytes;
- spin_lock(&root->fs_info->free_chunk_lock);
- root->fs_info->free_chunk_space += device->total_bytes -
- device->bytes_used;
- spin_unlock(&root->fs_info->free_chunk_lock);
- }
- ret = 0;
- return ret;
-}
-
-int btrfs_read_sys_array(struct btrfs_root *root)
-{
- struct btrfs_super_block *super_copy = root->fs_info->super_copy;
- struct extent_buffer *sb;
- struct btrfs_disk_key *disk_key;
- struct btrfs_chunk *chunk;
- u8 *ptr;
- unsigned long sb_ptr;
- int ret = 0;
- u32 num_stripes;
- u32 array_size;
- u32 len = 0;
- u32 cur;
- struct btrfs_key key;
-
- sb = btrfs_find_create_tree_block(root, BTRFS_SUPER_INFO_OFFSET,
- BTRFS_SUPER_INFO_SIZE);
- if (!sb)
- return -ENOMEM;
- btrfs_set_buffer_uptodate(sb);
- btrfs_set_buffer_lockdep_class(root->root_key.objectid, sb, 0);
- /*
- * The sb extent buffer is artifical and just used to read the system array.
- * btrfs_set_buffer_uptodate() call does not properly mark all it's
- * pages up-to-date when the page is larger: extent does not cover the
- * whole page and consequently check_page_uptodate does not find all
- * the page's extents up-to-date (the hole beyond sb),
- * write_extent_buffer then triggers a WARN_ON.
- *
- * Regular short extents go through mark_extent_buffer_dirty/writeback cycle,
- * but sb spans only this function. Add an explicit SetPageUptodate call
- * to silence the warning eg. on PowerPC 64.
- */
- if (PAGE_CACHE_SIZE > BTRFS_SUPER_INFO_SIZE)
- SetPageUptodate(sb->pages[0]);
-
- write_extent_buffer(sb, super_copy, 0, BTRFS_SUPER_INFO_SIZE);
- array_size = btrfs_super_sys_array_size(super_copy);
-
- ptr = super_copy->sys_chunk_array;
- sb_ptr = offsetof(struct btrfs_super_block, sys_chunk_array);
- cur = 0;
-
- while (cur < array_size) {
- disk_key = (struct btrfs_disk_key *)ptr;
- btrfs_disk_key_to_cpu(&key, disk_key);
-
- len = sizeof(*disk_key); ptr += len;
- sb_ptr += len;
- cur += len;
-
- if (key.type == BTRFS_CHUNK_ITEM_KEY) {
- chunk = (struct btrfs_chunk *)sb_ptr;
- ret = read_one_chunk(root, &key, sb, chunk);
- if (ret)
- break;
- num_stripes = btrfs_chunk_num_stripes(sb, chunk);
- len = btrfs_chunk_item_size(num_stripes);
- } else {
- ret = -EIO;
- break;
- }
- ptr += len;
- sb_ptr += len;
- cur += len;
- }
- free_extent_buffer(sb);
- return ret;
-}
-
-int btrfs_read_chunk_tree(struct btrfs_root *root)
-{
- struct btrfs_path *path;
- struct extent_buffer *leaf;
- struct btrfs_key key;
- struct btrfs_key found_key;
- int ret;
- int slot;
-
- root = root->fs_info->chunk_root;
-
- path = btrfs_alloc_path();
- if (!path)
- return -ENOMEM;
-
- mutex_lock(&uuid_mutex);
- lock_chunks(root);
-
- /* first we search for all of the device items, and then we
- * read in all of the chunk items. This way we can create chunk
- * mappings that reference all of the devices that are afound
- */
- key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
- key.offset = 0;
- key.type = 0;
-again:
- ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
- if (ret < 0)
- goto error;
- while (1) {
- leaf = path->nodes[0];
- slot = path->slots[0];
- if (slot >= btrfs_header_nritems(leaf)) {
- ret = btrfs_next_leaf(root, path);
- if (ret == 0)
- continue;
- if (ret < 0)
- goto error;
- break;
- }
- btrfs_item_key_to_cpu(leaf, &found_key, slot);
- if (key.objectid == BTRFS_DEV_ITEMS_OBJECTID) {
- if (found_key.objectid != BTRFS_DEV_ITEMS_OBJECTID)
- break;
- if (found_key.type == BTRFS_DEV_ITEM_KEY) {
- struct btrfs_dev_item *dev_item;
- dev_item = btrfs_item_ptr(leaf, slot,
- struct btrfs_dev_item);
- ret = read_one_dev(root, leaf, dev_item);
- if (ret)
- goto error;
- }
- } else if (found_key.type == BTRFS_CHUNK_ITEM_KEY) {
- struct btrfs_chunk *chunk;
- chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
- ret = read_one_chunk(root, &found_key, leaf, chunk);
- if (ret)
- goto error;
- }
- path->slots[0]++;
- }
- if (key.objectid == BTRFS_DEV_ITEMS_OBJECTID) {
- key.objectid = 0;
- btrfs_release_path(path);
- goto again;
- }
- ret = 0;
-error:
- unlock_chunks(root);
- mutex_unlock(&uuid_mutex);
-
- btrfs_free_path(path);
- return ret;
-}
diff --git a/ANDROID_3.4.5/fs/btrfs/volumes.h b/ANDROID_3.4.5/fs/btrfs/volumes.h
deleted file mode 100644
index bb6b03f9..00000000
--- a/ANDROID_3.4.5/fs/btrfs/volumes.h
+++ /dev/null
@@ -1,284 +0,0 @@
-/*
- * Copyright (C) 2007 Oracle. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License v2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
- */
-
-#ifndef __BTRFS_VOLUMES_
-#define __BTRFS_VOLUMES_
-
-#include <linux/bio.h>
-#include <linux/sort.h>
-#include "async-thread.h"
-
-#define BTRFS_STRIPE_LEN (64 * 1024)
-
-struct buffer_head;
-struct btrfs_pending_bios {
- struct bio *head;
- struct bio *tail;
-};
-
-struct btrfs_device {
- struct list_head dev_list;
- struct list_head dev_alloc_list;
- struct btrfs_fs_devices *fs_devices;
- struct btrfs_root *dev_root;
-
- /* regular prio bios */
- struct btrfs_pending_bios pending_bios;
- /* WRITE_SYNC bios */
- struct btrfs_pending_bios pending_sync_bios;
-
- int running_pending;
- u64 generation;
-
- int writeable;
- int in_fs_metadata;
- int missing;
- int can_discard;
-
- spinlock_t io_lock;
-
- struct block_device *bdev;
-
- /* the mode sent to blkdev_get */
- fmode_t mode;
-
- char *name;
-
- /* the internal btrfs device id */
- u64 devid;
-
- /* size of the device */
- u64 total_bytes;
-
- /* size of the disk */
- u64 disk_total_bytes;
-
- /* bytes used */
- u64 bytes_used;
-
- /* optimal io alignment for this device */
- u32 io_align;
-
- /* optimal io width for this device */
- u32 io_width;
-
- /* minimal io size for this device */
- u32 sector_size;
-
- /* type and info about this device */
- u64 type;
-
- /* physical drive uuid (or lvm uuid) */
- u8 uuid[BTRFS_UUID_SIZE];
-
- /* per-device scrub information */
- struct scrub_dev *scrub_device;
-
- struct btrfs_work work;
- struct rcu_head rcu;
- struct work_struct rcu_work;
-
- /* readahead state */
- spinlock_t reada_lock;
- atomic_t reada_in_flight;
- u64 reada_next;
- struct reada_zone *reada_curr_zone;
- struct radix_tree_root reada_zones;
- struct radix_tree_root reada_extents;
-
- /* for sending down flush barriers */
- struct bio *flush_bio;
- struct completion flush_wait;
- int nobarriers;
-
-};
-
-struct btrfs_fs_devices {
- u8 fsid[BTRFS_FSID_SIZE]; /* FS specific uuid */
-
- /* the device with this id has the most recent copy of the super */
- u64 latest_devid;
- u64 latest_trans;
- u64 num_devices;
- u64 open_devices;
- u64 rw_devices;
- u64 missing_devices;
- u64 total_rw_bytes;
- u64 num_can_discard;
- struct block_device *latest_bdev;
-
- /* all of the devices in the FS, protected by a mutex
- * so we can safely walk it to write out the supers without
- * worrying about add/remove by the multi-device code
- */
- struct mutex device_list_mutex;
- struct list_head devices;
-
- /* devices not currently being allocated */
- struct list_head alloc_list;
- struct list_head list;
-
- struct btrfs_fs_devices *seed;
- int seeding;
-
- int opened;
-
- /* set when we find or add a device that doesn't have the
- * nonrot flag set
- */
- int rotating;
-};
-
-struct btrfs_bio_stripe {
- struct btrfs_device *dev;
- u64 physical;
- u64 length; /* only used for discard mappings */
-};
-
-struct btrfs_bio;
-typedef void (btrfs_bio_end_io_t) (struct btrfs_bio *bio, int err);
-
-struct btrfs_bio {
- atomic_t stripes_pending;
- bio_end_io_t *end_io;
- struct bio *orig_bio;
- void *private;
- atomic_t error;
- int max_errors;
- int num_stripes;
- int mirror_num;
- struct btrfs_bio_stripe stripes[];
-};
-
-struct btrfs_device_info {
- struct btrfs_device *dev;
- u64 dev_offset;
- u64 max_avail;
- u64 total_avail;
-};
-
-struct map_lookup {
- u64 type;
- int io_align;
- int io_width;
- int stripe_len;
- int sector_size;
- int num_stripes;
- int sub_stripes;
- struct btrfs_bio_stripe stripes[];
-};
-
-#define map_lookup_size(n) (sizeof(struct map_lookup) + \
- (sizeof(struct btrfs_bio_stripe) * (n)))
-
-/*
- * Restriper's general type filter
- */
-#define BTRFS_BALANCE_DATA (1ULL << 0)
-#define BTRFS_BALANCE_SYSTEM (1ULL << 1)
-#define BTRFS_BALANCE_METADATA (1ULL << 2)
-
-#define BTRFS_BALANCE_TYPE_MASK (BTRFS_BALANCE_DATA | \
- BTRFS_BALANCE_SYSTEM | \
- BTRFS_BALANCE_METADATA)
-
-#define BTRFS_BALANCE_FORCE (1ULL << 3)
-#define BTRFS_BALANCE_RESUME (1ULL << 4)
-
-/*
- * Balance filters
- */
-#define BTRFS_BALANCE_ARGS_PROFILES (1ULL << 0)
-#define BTRFS_BALANCE_ARGS_USAGE (1ULL << 1)
-#define BTRFS_BALANCE_ARGS_DEVID (1ULL << 2)
-#define BTRFS_BALANCE_ARGS_DRANGE (1ULL << 3)
-#define BTRFS_BALANCE_ARGS_VRANGE (1ULL << 4)
-
-/*
- * Profile changing flags. When SOFT is set we won't relocate chunk if
- * it already has the target profile (even though it may be
- * half-filled).
- */
-#define BTRFS_BALANCE_ARGS_CONVERT (1ULL << 8)
-#define BTRFS_BALANCE_ARGS_SOFT (1ULL << 9)
-
-struct btrfs_balance_args;
-struct btrfs_balance_progress;
-struct btrfs_balance_control {
- struct btrfs_fs_info *fs_info;
-
- struct btrfs_balance_args data;
- struct btrfs_balance_args meta;
- struct btrfs_balance_args sys;
-
- u64 flags;
-
- struct btrfs_balance_progress stat;
-};
-
-int btrfs_account_dev_extents_size(struct btrfs_device *device, u64 start,
- u64 end, u64 *length);
-
-#define btrfs_bio_size(n) (sizeof(struct btrfs_bio) + \
- (sizeof(struct btrfs_bio_stripe) * (n)))
-
-int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans,
- struct btrfs_device *device,
- u64 chunk_tree, u64 chunk_objectid,
- u64 chunk_offset, u64 start, u64 num_bytes);
-int btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw,
- u64 logical, u64 *length,
- struct btrfs_bio **bbio_ret, int mirror_num);
-int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree,
- u64 chunk_start, u64 physical, u64 devid,
- u64 **logical, int *naddrs, int *stripe_len);
-int btrfs_read_sys_array(struct btrfs_root *root);
-int btrfs_read_chunk_tree(struct btrfs_root *root);
-int btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
- struct btrfs_root *extent_root, u64 type);
-void btrfs_mapping_init(struct btrfs_mapping_tree *tree);
-void btrfs_mapping_tree_free(struct btrfs_mapping_tree *tree);
-int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
- int mirror_num, int async_submit);
-int btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
- fmode_t flags, void *holder);
-int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder,
- struct btrfs_fs_devices **fs_devices_ret);
-int btrfs_close_devices(struct btrfs_fs_devices *fs_devices);
-void btrfs_close_extra_devices(struct btrfs_fs_devices *fs_devices);
-int btrfs_add_device(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_device *device);
-int btrfs_rm_device(struct btrfs_root *root, char *device_path);
-void btrfs_cleanup_fs_uuids(void);
-int btrfs_num_copies(struct btrfs_mapping_tree *map_tree, u64 logical, u64 len);
-int btrfs_grow_device(struct btrfs_trans_handle *trans,
- struct btrfs_device *device, u64 new_size);
-struct btrfs_device *btrfs_find_device(struct btrfs_root *root, u64 devid,
- u8 *uuid, u8 *fsid);
-int btrfs_shrink_device(struct btrfs_device *device, u64 new_size);
-int btrfs_init_new_device(struct btrfs_root *root, char *path);
-int btrfs_balance(struct btrfs_balance_control *bctl,
- struct btrfs_ioctl_balance_args *bargs);
-int btrfs_recover_balance(struct btrfs_root *tree_root);
-int btrfs_pause_balance(struct btrfs_fs_info *fs_info);
-int btrfs_cancel_balance(struct btrfs_fs_info *fs_info);
-int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset);
-int find_free_dev_extent(struct btrfs_device *device, u64 num_bytes,
- u64 *start, u64 *max_avail);
-#endif
diff --git a/ANDROID_3.4.5/fs/btrfs/xattr.c b/ANDROID_3.4.5/fs/btrfs/xattr.c
deleted file mode 100644
index e7a56590..00000000
--- a/ANDROID_3.4.5/fs/btrfs/xattr.c
+++ /dev/null
@@ -1,429 +0,0 @@
-/*
- * Copyright (C) 2007 Red Hat. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License v2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
- */
-
-#include <linux/init.h>
-#include <linux/fs.h>
-#include <linux/slab.h>
-#include <linux/rwsem.h>
-#include <linux/xattr.h>
-#include <linux/security.h>
-#include "ctree.h"
-#include "btrfs_inode.h"
-#include "transaction.h"
-#include "xattr.h"
-#include "disk-io.h"
-
-
-ssize_t __btrfs_getxattr(struct inode *inode, const char *name,
- void *buffer, size_t size)
-{
- struct btrfs_dir_item *di;
- struct btrfs_root *root = BTRFS_I(inode)->root;
- struct btrfs_path *path;
- struct extent_buffer *leaf;
- int ret = 0;
- unsigned long data_ptr;
-
- path = btrfs_alloc_path();
- if (!path)
- return -ENOMEM;
-
- /* lookup the xattr by name */
- di = btrfs_lookup_xattr(NULL, root, path, btrfs_ino(inode), name,
- strlen(name), 0);
- if (!di) {
- ret = -ENODATA;
- goto out;
- } else if (IS_ERR(di)) {
- ret = PTR_ERR(di);
- goto out;
- }
-
- leaf = path->nodes[0];
- /* if size is 0, that means we want the size of the attr */
- if (!size) {
- ret = btrfs_dir_data_len(leaf, di);
- goto out;
- }
-
- /* now get the data out of our dir_item */
- if (btrfs_dir_data_len(leaf, di) > size) {
- ret = -ERANGE;
- goto out;
- }
-
- /*
- * The way things are packed into the leaf is like this
- * |struct btrfs_dir_item|name|data|
- * where name is the xattr name, so security.foo, and data is the
- * content of the xattr. data_ptr points to the location in memory
- * where the data starts in the in memory leaf
- */
- data_ptr = (unsigned long)((char *)(di + 1) +
- btrfs_dir_name_len(leaf, di));
- read_extent_buffer(leaf, buffer, data_ptr,
- btrfs_dir_data_len(leaf, di));
- ret = btrfs_dir_data_len(leaf, di);
-
-out:
- btrfs_free_path(path);
- return ret;
-}
-
-static int do_setxattr(struct btrfs_trans_handle *trans,
- struct inode *inode, const char *name,
- const void *value, size_t size, int flags)
-{
- struct btrfs_dir_item *di;
- struct btrfs_root *root = BTRFS_I(inode)->root;
- struct btrfs_path *path;
- size_t name_len = strlen(name);
- int ret = 0;
-
- if (name_len + size > BTRFS_MAX_XATTR_SIZE(root))
- return -ENOSPC;
-
- path = btrfs_alloc_path();
- if (!path)
- return -ENOMEM;
-
- if (flags & XATTR_REPLACE) {
- di = btrfs_lookup_xattr(trans, root, path, btrfs_ino(inode), name,
- name_len, -1);
- if (IS_ERR(di)) {
- ret = PTR_ERR(di);
- goto out;
- } else if (!di) {
- ret = -ENODATA;
- goto out;
- }
- ret = btrfs_delete_one_dir_name(trans, root, path, di);
- if (ret)
- goto out;
- btrfs_release_path(path);
-
- /*
- * remove the attribute
- */
- if (!value)
- goto out;
- }
-
-again:
- ret = btrfs_insert_xattr_item(trans, root, path, btrfs_ino(inode),
- name, name_len, value, size);
- /*
- * If we're setting an xattr to a new value but the new value is say
- * exactly BTRFS_MAX_XATTR_SIZE, we could end up with EOVERFLOW getting
- * back from split_leaf. This is because it thinks we'll be extending
- * the existing item size, but we're asking for enough space to add the
- * item itself. So if we get EOVERFLOW just set ret to EEXIST and let
- * the rest of the function figure it out.
- */
- if (ret == -EOVERFLOW)
- ret = -EEXIST;
-
- if (ret == -EEXIST) {
- if (flags & XATTR_CREATE)
- goto out;
- /*
- * We can't use the path we already have since we won't have the
- * proper locking for a delete, so release the path and
- * re-lookup to delete the thing.
- */
- btrfs_release_path(path);
- di = btrfs_lookup_xattr(trans, root, path, btrfs_ino(inode),
- name, name_len, -1);
- if (IS_ERR(di)) {
- ret = PTR_ERR(di);
- goto out;
- } else if (!di) {
- /* Shouldn't happen but just in case... */
- btrfs_release_path(path);
- goto again;
- }
-
- ret = btrfs_delete_one_dir_name(trans, root, path, di);
- if (ret)
- goto out;
-
- /*
- * We have a value to set, so go back and try to insert it now.
- */
- if (value) {
- btrfs_release_path(path);
- goto again;
- }
- }
-out:
- btrfs_free_path(path);
- return ret;
-}
-
-/*
- * @value: "" makes the attribute to empty, NULL removes it
- */
-int __btrfs_setxattr(struct btrfs_trans_handle *trans,
- struct inode *inode, const char *name,
- const void *value, size_t size, int flags)
-{
- struct btrfs_root *root = BTRFS_I(inode)->root;
- int ret;
-
- if (trans)
- return do_setxattr(trans, inode, name, value, size, flags);
-
- trans = btrfs_start_transaction(root, 2);
- if (IS_ERR(trans))
- return PTR_ERR(trans);
-
- ret = do_setxattr(trans, inode, name, value, size, flags);
- if (ret)
- goto out;
-
- inode->i_ctime = CURRENT_TIME;
- ret = btrfs_update_inode(trans, root, inode);
- BUG_ON(ret);
-out:
- btrfs_end_transaction(trans, root);
- return ret;
-}
-
-ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size)
-{
- struct btrfs_key key, found_key;
- struct inode *inode = dentry->d_inode;
- struct btrfs_root *root = BTRFS_I(inode)->root;
- struct btrfs_path *path;
- struct extent_buffer *leaf;
- struct btrfs_dir_item *di;
- int ret = 0, slot;
- size_t total_size = 0, size_left = size;
- unsigned long name_ptr;
- size_t name_len;
-
- /*
- * ok we want all objects associated with this id.
- * NOTE: we set key.offset = 0; because we want to start with the
- * first xattr that we find and walk forward
- */
- key.objectid = btrfs_ino(inode);
- btrfs_set_key_type(&key, BTRFS_XATTR_ITEM_KEY);
- key.offset = 0;
-
- path = btrfs_alloc_path();
- if (!path)
- return -ENOMEM;
- path->reada = 2;
-
- /* search for our xattrs */
- ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
- if (ret < 0)
- goto err;
-
- while (1) {
- leaf = path->nodes[0];
- slot = path->slots[0];
-
- /* this is where we start walking through the path */
- if (slot >= btrfs_header_nritems(leaf)) {
- /*
- * if we've reached the last slot in this leaf we need
- * to go to the next leaf and reset everything
- */
- ret = btrfs_next_leaf(root, path);
- if (ret < 0)
- goto err;
- else if (ret > 0)
- break;
- continue;
- }
-
- btrfs_item_key_to_cpu(leaf, &found_key, slot);
-
- /* check to make sure this item is what we want */
- if (found_key.objectid != key.objectid)
- break;
- if (btrfs_key_type(&found_key) != BTRFS_XATTR_ITEM_KEY)
- break;
-
- di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item);
- if (verify_dir_item(root, leaf, di))
- continue;
-
- name_len = btrfs_dir_name_len(leaf, di);
- total_size += name_len + 1;
-
- /* we are just looking for how big our buffer needs to be */
- if (!size)
- goto next;
-
- if (!buffer || (name_len + 1) > size_left) {
- ret = -ERANGE;
- goto err;
- }
-
- name_ptr = (unsigned long)(di + 1);
- read_extent_buffer(leaf, buffer, name_ptr, name_len);
- buffer[name_len] = '\0';
-
- size_left -= name_len + 1;
- buffer += name_len + 1;
-next:
- path->slots[0]++;
- }
- ret = total_size;
-
-err:
- btrfs_free_path(path);
-
- return ret;
-}
-
-/*
- * List of handlers for synthetic system.* attributes. All real ondisk
- * attributes are handled directly.
- */
-const struct xattr_handler *btrfs_xattr_handlers[] = {
-#ifdef CONFIG_BTRFS_FS_POSIX_ACL
- &btrfs_xattr_acl_access_handler,
- &btrfs_xattr_acl_default_handler,
-#endif
- NULL,
-};
-
-/*
- * Check if the attribute is in a supported namespace.
- *
- * This applied after the check for the synthetic attributes in the system
- * namespace.
- */
-static bool btrfs_is_valid_xattr(const char *name)
-{
- return !strncmp(name, XATTR_SECURITY_PREFIX,
- XATTR_SECURITY_PREFIX_LEN) ||
- !strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN) ||
- !strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) ||
- !strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN);
-}
-
-ssize_t btrfs_getxattr(struct dentry *dentry, const char *name,
- void *buffer, size_t size)
-{
- /*
- * If this is a request for a synthetic attribute in the system.*
- * namespace use the generic infrastructure to resolve a handler
- * for it via sb->s_xattr.
- */
- if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
- return generic_getxattr(dentry, name, buffer, size);
-
- if (!btrfs_is_valid_xattr(name))
- return -EOPNOTSUPP;
- return __btrfs_getxattr(dentry->d_inode, name, buffer, size);
-}
-
-int btrfs_setxattr(struct dentry *dentry, const char *name, const void *value,
- size_t size, int flags)
-{
- struct btrfs_root *root = BTRFS_I(dentry->d_inode)->root;
-
- /*
- * The permission on security.* and system.* is not checked
- * in permission().
- */
- if (btrfs_root_readonly(root))
- return -EROFS;
-
- /*
- * If this is a request for a synthetic attribute in the system.*
- * namespace use the generic infrastructure to resolve a handler
- * for it via sb->s_xattr.
- */
- if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
- return generic_setxattr(dentry, name, value, size, flags);
-
- if (!btrfs_is_valid_xattr(name))
- return -EOPNOTSUPP;
-
- if (size == 0)
- value = ""; /* empty EA, do not remove */
-
- return __btrfs_setxattr(NULL, dentry->d_inode, name, value, size,
- flags);
-}
-
-int btrfs_removexattr(struct dentry *dentry, const char *name)
-{
- struct btrfs_root *root = BTRFS_I(dentry->d_inode)->root;
-
- /*
- * The permission on security.* and system.* is not checked
- * in permission().
- */
- if (btrfs_root_readonly(root))
- return -EROFS;
-
- /*
- * If this is a request for a synthetic attribute in the system.*
- * namespace use the generic infrastructure to resolve a handler
- * for it via sb->s_xattr.
- */
- if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
- return generic_removexattr(dentry, name);
-
- if (!btrfs_is_valid_xattr(name))
- return -EOPNOTSUPP;
-
- return __btrfs_setxattr(NULL, dentry->d_inode, name, NULL, 0,
- XATTR_REPLACE);
-}
-
-int btrfs_initxattrs(struct inode *inode, const struct xattr *xattr_array,
- void *fs_info)
-{
- const struct xattr *xattr;
- struct btrfs_trans_handle *trans = fs_info;
- char *name;
- int err = 0;
-
- for (xattr = xattr_array; xattr->name != NULL; xattr++) {
- name = kmalloc(XATTR_SECURITY_PREFIX_LEN +
- strlen(xattr->name) + 1, GFP_NOFS);
- if (!name) {
- err = -ENOMEM;
- break;
- }
- strcpy(name, XATTR_SECURITY_PREFIX);
- strcpy(name + XATTR_SECURITY_PREFIX_LEN, xattr->name);
- err = __btrfs_setxattr(trans, inode, name,
- xattr->value, xattr->value_len, 0);
- kfree(name);
- if (err < 0)
- break;
- }
- return err;
-}
-
-int btrfs_xattr_security_init(struct btrfs_trans_handle *trans,
- struct inode *inode, struct inode *dir,
- const struct qstr *qstr)
-{
- return security_inode_init_security(inode, dir, qstr,
- &btrfs_initxattrs, trans);
-}
diff --git a/ANDROID_3.4.5/fs/btrfs/xattr.h b/ANDROID_3.4.5/fs/btrfs/xattr.h
deleted file mode 100644
index b3cc8039..00000000
--- a/ANDROID_3.4.5/fs/btrfs/xattr.h
+++ /dev/null
@@ -1,43 +0,0 @@
-/*
- * Copyright (C) 2007 Red Hat. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License v2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
- */
-
-#ifndef __XATTR__
-#define __XATTR__
-
-#include <linux/xattr.h>
-
-extern const struct xattr_handler btrfs_xattr_acl_access_handler;
-extern const struct xattr_handler btrfs_xattr_acl_default_handler;
-extern const struct xattr_handler *btrfs_xattr_handlers[];
-
-extern ssize_t __btrfs_getxattr(struct inode *inode, const char *name,
- void *buffer, size_t size);
-extern int __btrfs_setxattr(struct btrfs_trans_handle *trans,
- struct inode *inode, const char *name,
- const void *value, size_t size, int flags);
-extern ssize_t btrfs_getxattr(struct dentry *dentry, const char *name,
- void *buffer, size_t size);
-extern int btrfs_setxattr(struct dentry *dentry, const char *name,
- const void *value, size_t size, int flags);
-extern int btrfs_removexattr(struct dentry *dentry, const char *name);
-
-extern int btrfs_xattr_security_init(struct btrfs_trans_handle *trans,
- struct inode *inode, struct inode *dir,
- const struct qstr *qstr);
-
-#endif /* __XATTR__ */
diff --git a/ANDROID_3.4.5/fs/btrfs/zlib.c b/ANDROID_3.4.5/fs/btrfs/zlib.c
deleted file mode 100644
index 92c20654..00000000
--- a/ANDROID_3.4.5/fs/btrfs/zlib.c
+++ /dev/null
@@ -1,399 +0,0 @@
-/*
- * Copyright (C) 2008 Oracle. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License v2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
- *
- * Based on jffs2 zlib code:
- * Copyright © 2001-2007 Red Hat, Inc.
- * Created by David Woodhouse <dwmw2@infradead.org>
- */
-
-#include <linux/kernel.h>
-#include <linux/slab.h>
-#include <linux/zlib.h>
-#include <linux/zutil.h>
-#include <linux/vmalloc.h>
-#include <linux/init.h>
-#include <linux/err.h>
-#include <linux/sched.h>
-#include <linux/pagemap.h>
-#include <linux/bio.h>
-#include "compression.h"
-
-struct workspace {
- z_stream inf_strm;
- z_stream def_strm;
- char *buf;
- struct list_head list;
-};
-
-static void zlib_free_workspace(struct list_head *ws)
-{
- struct workspace *workspace = list_entry(ws, struct workspace, list);
-
- vfree(workspace->def_strm.workspace);
- vfree(workspace->inf_strm.workspace);
- kfree(workspace->buf);
- kfree(workspace);
-}
-
-static struct list_head *zlib_alloc_workspace(void)
-{
- struct workspace *workspace;
-
- workspace = kzalloc(sizeof(*workspace), GFP_NOFS);
- if (!workspace)
- return ERR_PTR(-ENOMEM);
-
- workspace->def_strm.workspace = vmalloc(zlib_deflate_workspacesize(
- MAX_WBITS, MAX_MEM_LEVEL));
- workspace->inf_strm.workspace = vmalloc(zlib_inflate_workspacesize());
- workspace->buf = kmalloc(PAGE_CACHE_SIZE, GFP_NOFS);
- if (!workspace->def_strm.workspace ||
- !workspace->inf_strm.workspace || !workspace->buf)
- goto fail;
-
- INIT_LIST_HEAD(&workspace->list);
-
- return &workspace->list;
-fail:
- zlib_free_workspace(&workspace->list);
- return ERR_PTR(-ENOMEM);
-}
-
-static int zlib_compress_pages(struct list_head *ws,
- struct address_space *mapping,
- u64 start, unsigned long len,
- struct page **pages,
- unsigned long nr_dest_pages,
- unsigned long *out_pages,
- unsigned long *total_in,
- unsigned long *total_out,
- unsigned long max_out)
-{
- struct workspace *workspace = list_entry(ws, struct workspace, list);
- int ret;
- char *data_in;
- char *cpage_out;
- int nr_pages = 0;
- struct page *in_page = NULL;
- struct page *out_page = NULL;
- unsigned long bytes_left;
-
- *out_pages = 0;
- *total_out = 0;
- *total_in = 0;
-
- if (Z_OK != zlib_deflateInit(&workspace->def_strm, 3)) {
- printk(KERN_WARNING "deflateInit failed\n");
- ret = -1;
- goto out;
- }
-
- workspace->def_strm.total_in = 0;
- workspace->def_strm.total_out = 0;
-
- in_page = find_get_page(mapping, start >> PAGE_CACHE_SHIFT);
- data_in = kmap(in_page);
-
- out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
- if (out_page == NULL) {
- ret = -1;
- goto out;
- }
- cpage_out = kmap(out_page);
- pages[0] = out_page;
- nr_pages = 1;
-
- workspace->def_strm.next_in = data_in;
- workspace->def_strm.next_out = cpage_out;
- workspace->def_strm.avail_out = PAGE_CACHE_SIZE;
- workspace->def_strm.avail_in = min(len, PAGE_CACHE_SIZE);
-
- while (workspace->def_strm.total_in < len) {
- ret = zlib_deflate(&workspace->def_strm, Z_SYNC_FLUSH);
- if (ret != Z_OK) {
- printk(KERN_DEBUG "btrfs deflate in loop returned %d\n",
- ret);
- zlib_deflateEnd(&workspace->def_strm);
- ret = -1;
- goto out;
- }
-
- /* we're making it bigger, give up */
- if (workspace->def_strm.total_in > 8192 &&
- workspace->def_strm.total_in <
- workspace->def_strm.total_out) {
- ret = -1;
- goto out;
- }
- /* we need another page for writing out. Test this
- * before the total_in so we will pull in a new page for
- * the stream end if required
- */
- if (workspace->def_strm.avail_out == 0) {
- kunmap(out_page);
- if (nr_pages == nr_dest_pages) {
- out_page = NULL;
- ret = -1;
- goto out;
- }
- out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
- if (out_page == NULL) {
- ret = -1;
- goto out;
- }
- cpage_out = kmap(out_page);
- pages[nr_pages] = out_page;
- nr_pages++;
- workspace->def_strm.avail_out = PAGE_CACHE_SIZE;
- workspace->def_strm.next_out = cpage_out;
- }
- /* we're all done */
- if (workspace->def_strm.total_in >= len)
- break;
-
- /* we've read in a full page, get a new one */
- if (workspace->def_strm.avail_in == 0) {
- if (workspace->def_strm.total_out > max_out)
- break;
-
- bytes_left = len - workspace->def_strm.total_in;
- kunmap(in_page);
- page_cache_release(in_page);
-
- start += PAGE_CACHE_SIZE;
- in_page = find_get_page(mapping,
- start >> PAGE_CACHE_SHIFT);
- data_in = kmap(in_page);
- workspace->def_strm.avail_in = min(bytes_left,
- PAGE_CACHE_SIZE);
- workspace->def_strm.next_in = data_in;
- }
- }
- workspace->def_strm.avail_in = 0;
- ret = zlib_deflate(&workspace->def_strm, Z_FINISH);
- zlib_deflateEnd(&workspace->def_strm);
-
- if (ret != Z_STREAM_END) {
- ret = -1;
- goto out;
- }
-
- if (workspace->def_strm.total_out >= workspace->def_strm.total_in) {
- ret = -1;
- goto out;
- }
-
- ret = 0;
- *total_out = workspace->def_strm.total_out;
- *total_in = workspace->def_strm.total_in;
-out:
- *out_pages = nr_pages;
- if (out_page)
- kunmap(out_page);
-
- if (in_page) {
- kunmap(in_page);
- page_cache_release(in_page);
- }
- return ret;
-}
-
-static int zlib_decompress_biovec(struct list_head *ws, struct page **pages_in,
- u64 disk_start,
- struct bio_vec *bvec,
- int vcnt,
- size_t srclen)
-{
- struct workspace *workspace = list_entry(ws, struct workspace, list);
- int ret = 0, ret2;
- int wbits = MAX_WBITS;
- char *data_in;
- size_t total_out = 0;
- unsigned long page_in_index = 0;
- unsigned long page_out_index = 0;
- unsigned long total_pages_in = (srclen + PAGE_CACHE_SIZE - 1) /
- PAGE_CACHE_SIZE;
- unsigned long buf_start;
- unsigned long pg_offset;
-
- data_in = kmap(pages_in[page_in_index]);
- workspace->inf_strm.next_in = data_in;
- workspace->inf_strm.avail_in = min_t(size_t, srclen, PAGE_CACHE_SIZE);
- workspace->inf_strm.total_in = 0;
-
- workspace->inf_strm.total_out = 0;
- workspace->inf_strm.next_out = workspace->buf;
- workspace->inf_strm.avail_out = PAGE_CACHE_SIZE;
- pg_offset = 0;
-
- /* If it's deflate, and it's got no preset dictionary, then
- we can tell zlib to skip the adler32 check. */
- if (srclen > 2 && !(data_in[1] & PRESET_DICT) &&
- ((data_in[0] & 0x0f) == Z_DEFLATED) &&
- !(((data_in[0]<<8) + data_in[1]) % 31)) {
-
- wbits = -((data_in[0] >> 4) + 8);
- workspace->inf_strm.next_in += 2;
- workspace->inf_strm.avail_in -= 2;
- }
-
- if (Z_OK != zlib_inflateInit2(&workspace->inf_strm, wbits)) {
- printk(KERN_WARNING "inflateInit failed\n");
- return -1;
- }
- while (workspace->inf_strm.total_in < srclen) {
- ret = zlib_inflate(&workspace->inf_strm, Z_NO_FLUSH);
- if (ret != Z_OK && ret != Z_STREAM_END)
- break;
-
- buf_start = total_out;
- total_out = workspace->inf_strm.total_out;
-
- /* we didn't make progress in this inflate call, we're done */
- if (buf_start == total_out)
- break;
-
- ret2 = btrfs_decompress_buf2page(workspace->buf, buf_start,
- total_out, disk_start,
- bvec, vcnt,
- &page_out_index, &pg_offset);
- if (ret2 == 0) {
- ret = 0;
- goto done;
- }
-
- workspace->inf_strm.next_out = workspace->buf;
- workspace->inf_strm.avail_out = PAGE_CACHE_SIZE;
-
- if (workspace->inf_strm.avail_in == 0) {
- unsigned long tmp;
- kunmap(pages_in[page_in_index]);
- page_in_index++;
- if (page_in_index >= total_pages_in) {
- data_in = NULL;
- break;
- }
- data_in = kmap(pages_in[page_in_index]);
- workspace->inf_strm.next_in = data_in;
- tmp = srclen - workspace->inf_strm.total_in;
- workspace->inf_strm.avail_in = min(tmp,
- PAGE_CACHE_SIZE);
- }
- }
- if (ret != Z_STREAM_END)
- ret = -1;
- else
- ret = 0;
-done:
- zlib_inflateEnd(&workspace->inf_strm);
- if (data_in)
- kunmap(pages_in[page_in_index]);
- return ret;
-}
-
-static int zlib_decompress(struct list_head *ws, unsigned char *data_in,
- struct page *dest_page,
- unsigned long start_byte,
- size_t srclen, size_t destlen)
-{
- struct workspace *workspace = list_entry(ws, struct workspace, list);
- int ret = 0;
- int wbits = MAX_WBITS;
- unsigned long bytes_left = destlen;
- unsigned long total_out = 0;
- char *kaddr;
-
- workspace->inf_strm.next_in = data_in;
- workspace->inf_strm.avail_in = srclen;
- workspace->inf_strm.total_in = 0;
-
- workspace->inf_strm.next_out = workspace->buf;
- workspace->inf_strm.avail_out = PAGE_CACHE_SIZE;
- workspace->inf_strm.total_out = 0;
- /* If it's deflate, and it's got no preset dictionary, then
- we can tell zlib to skip the adler32 check. */
- if (srclen > 2 && !(data_in[1] & PRESET_DICT) &&
- ((data_in[0] & 0x0f) == Z_DEFLATED) &&
- !(((data_in[0]<<8) + data_in[1]) % 31)) {
-
- wbits = -((data_in[0] >> 4) + 8);
- workspace->inf_strm.next_in += 2;
- workspace->inf_strm.avail_in -= 2;
- }
-
- if (Z_OK != zlib_inflateInit2(&workspace->inf_strm, wbits)) {
- printk(KERN_WARNING "inflateInit failed\n");
- return -1;
- }
-
- while (bytes_left > 0) {
- unsigned long buf_start;
- unsigned long buf_offset;
- unsigned long bytes;
- unsigned long pg_offset = 0;
-
- ret = zlib_inflate(&workspace->inf_strm, Z_NO_FLUSH);
- if (ret != Z_OK && ret != Z_STREAM_END)
- break;
-
- buf_start = total_out;
- total_out = workspace->inf_strm.total_out;
-
- if (total_out == buf_start) {
- ret = -1;
- break;
- }
-
- if (total_out <= start_byte)
- goto next;
-
- if (total_out > start_byte && buf_start < start_byte)
- buf_offset = start_byte - buf_start;
- else
- buf_offset = 0;
-
- bytes = min(PAGE_CACHE_SIZE - pg_offset,
- PAGE_CACHE_SIZE - buf_offset);
- bytes = min(bytes, bytes_left);
-
- kaddr = kmap_atomic(dest_page);
- memcpy(kaddr + pg_offset, workspace->buf + buf_offset, bytes);
- kunmap_atomic(kaddr);
-
- pg_offset += bytes;
- bytes_left -= bytes;
-next:
- workspace->inf_strm.next_out = workspace->buf;
- workspace->inf_strm.avail_out = PAGE_CACHE_SIZE;
- }
-
- if (ret != Z_STREAM_END && bytes_left != 0)
- ret = -1;
- else
- ret = 0;
-
- zlib_inflateEnd(&workspace->inf_strm);
- return ret;
-}
-
-struct btrfs_compress_op btrfs_zlib_compress = {
- .alloc_workspace = zlib_alloc_workspace,
- .free_workspace = zlib_free_workspace,
- .compress_pages = zlib_compress_pages,
- .decompress_biovec = zlib_decompress_biovec,
- .decompress = zlib_decompress,
-};